Example #1
0
def create_template_sentence(templates, instructions=False):
    """Create templates choice sentence.

    :ref templates: list of templates coming from manifest file
    :ref instructions: add instructions
    """
    Template = namedtuple('Template', ['index', 'name', 'description'])
    templates_friendly = [
        Template(
            index=index + 1,
            name=template_elem['name'],
            description=template_elem['description'],
        ) for index, template_elem in enumerate(templates)
    ]

    text = tabulate(templates_friendly,
                    headers=OrderedDict((
                        ('index', 'Number'),
                        ('name', 'Name'),
                        ('description', 'Description'),
                    )))

    if not instructions:
        return text
    return '{0}\nPlease choose a template by typing the number'.format(text)
Example #2
0
def tabular(client, records):
    """Format dataset files with a tabular output.

    :param client: LocalClient instance.
    :param records: Filtered collection.
    """
    return tabulate(
        records,
        headers=OrderedDict((
            ('added', None),
            ('creators_csv', 'creators'),
            ('dataset', None),
            ('full_path', 'path'),
        )),
    )
Example #3
0
def create_template_sentence(templates, describe=False, instructions=False):
    """Create templates choice sentence.

    :ref templates: list of templates coming from manifest file
    :ref instructions: add instructions
    """
    Template = namedtuple("Template",
                          ["index", "id", "description", "variables"])

    def extract_description(template_elem):
        """Extract description from template manifest."""
        if describe:
            return template_elem["description"]
        return None

    def extract_variables(template_elem):
        """Extract variables from tempalte manifest."""
        if describe:
            return "\n".join([
                f"{variable[0]}: {variable[1]}"
                for variable in template_elem.get("variables", {}).items()
            ])

        return ",".join(template_elem.get("variables", {}).keys())

    templates_friendly = [
        Template(
            index=index + 1,
            id=template_elem["folder"],
            description=extract_description(template_elem),
            variables=extract_variables(template_elem),
        ) for index, template_elem in enumerate(templates)
    ]

    table_headers = OrderedDict((
        ("index", "Index"),
        ("id", "Id"),
        ("variables", "Parameters"),
    ))

    if describe:
        table_headers["description"] = "Description"

    text = tabulate(templates_friendly, headers=table_headers)

    if not instructions:
        return text
    return "{0}\nPlease choose a template by typing the index".format(text)
Example #4
0
def create_template_sentence(templates, describe=False, instructions=False):
    """Create templates choice sentence.

    :ref templates: list of templates coming from manifest file
    :ref instructions: add instructions
    """
    Template = namedtuple('Template',
                          ['index', 'id', 'description', 'variables'])

    def extract_description(template_elem):
        """Extract description from template manifest."""
        if describe:
            return template_elem['description']
        return None

    def extract_variables(template_elem):
        """Extract variables from tempalte manifest."""
        if describe:
            return '\n'.join([
                f'{variable[0]}: {variable[1]}'
                for variable in template_elem.get('variables', {}).items()
            ])

        return ','.join(template_elem.get('variables', {}).keys())

    templates_friendly = [
        Template(
            index=index + 1,
            id=template_elem['folder'],
            description=extract_description(template_elem),
            variables=extract_variables(template_elem),
        ) for index, template_elem in enumerate(templates)
    ]

    table_headers = OrderedDict((
        ('index', 'Index'),
        ('id', 'Id'),
        ('variables', 'Parameters'),
    ))

    if describe:
        table_headers['description'] = 'Description'

    text = tabulate(templates_friendly, headers=table_headers)

    if not instructions:
        return text
    return '{0}\nPlease choose a template by typing the index'.format(text)
Example #5
0
def tabular(client, datasets):
    """Format datasets with a tabular output."""
    return tabulate(
        datasets,
        headers=OrderedDict((
            ('uid', 'id'),
            ('created', None),
            ('short_name', None),
            ('creators_csv', 'creators'),
            ('tags_csv', 'tags'),
            ('version', None),
        )),
        # workaround for tabulate issue 181
        # https://bitbucket.org/astanin/python-tabulate/issues/181/disable_numparse-fails-on-empty-input
        disable_numparse=[0, 2] if any(datasets) else False
    )
Example #6
0
def tabular(client, tags):
    """Format dataset tags with a tabular output.

    :param client: LocalClient instance.
    :param tags: Dataset tags.
    """
    return tabulate(
        tags,
        headers=OrderedDict((
            ('created', None),
            ('name', None),
            ('description', None),
            ('dataset', None),
            ('commit', None),
        )),
        # workaround for tabulate issue 181
        # https://bitbucket.org/astanin/python-tabulate/issues/181/disable_numparse-fails-on-empty-input
        disable_numparse=[1, 2, 4] if len(tags) > 0 else False)
Example #7
0
def outputs(ctx, client, revision, verbose, paths):
    r"""Show output files in the repository.

    <PATHS>    Files to show. If no files are given all output files are shown.
    """
    graph = Graph(client)
    filter_ = graph.build(paths=paths, revision=revision)
    output_paths = {}

    for activity in graph.activities.values():
        if isinstance(activity, ProcessRun):
            for entity in activity.generated:
                if entity.path not in graph.output_paths:
                    continue
                output_paths[entity.path] = Result(
                    path=entity.path, commit=entity.commit, time=activity.ended_at_time, workflow=activity.path
                )

    if not verbose:
        click.echo("\n".join(graph._format_path(path) for path in output_paths.keys()))
    else:
        records = list(output_paths.values())
        records.sort(key=lambda v: v[0])
        HEADERS["time"] = "generation time"
        click.echo(tabulate(collection=records, headers=HEADERS))

    if paths:
        if not output_paths:
            ctx.exit(1)

        from renku.core.models.datastructures import DirectoryTree

        tree = DirectoryTree.from_list(item.path for item in filter_)

        for output in output_paths:
            if tree.get(output) is None:
                ctx.exit(1)
                return
Example #8
0
def inputs(ctx, client, revision, verbose, paths):
    r"""Show inputs files in the repository.

    <PATHS>    Files to show. If no files are given all input files are shown.
    """
    graph = Graph(client)
    paths = set(paths)
    nodes = graph.build(revision=revision)
    commits = {node.activity.commit if hasattr(node, "activity") else node.commit for node in nodes}
    commits |= {node.activity.commit for node in nodes if hasattr(node, "activity")}
    candidates = {(node.commit, node.path) for node in nodes if not paths or node.path in paths}

    input_paths = {}

    for commit in commits:
        activity = graph.activities.get(commit)
        if not activity:
            continue

        if isinstance(activity, ProcessRun):
            for usage in activity.qualified_usage:
                for entity in usage.entity.entities:
                    path = str((usage.client.path / entity.path).relative_to(client.path))
                    usage_key = (entity.commit, entity.path)

                    if path not in input_paths and usage_key in candidates:
                        input_paths[path] = Result(
                            path=path, commit=entity.commit, time=activity.started_at_time, workflow=activity.path
                        )

    if not verbose:
        click.echo("\n".join(graph._format_path(path) for path in input_paths))
    else:
        records = list(input_paths.values())
        records.sort(key=lambda v: v[0])
        HEADERS["time"] = "usage time"
        click.echo(tabulate(collection=records, headers=HEADERS))
    ctx.exit(0 if not paths or len(input_paths) == len(paths) else 1)
Example #9
0
def import_dataset(
    client,
    uri,
    short_name='',
    extract=False,
    with_prompt=False,
    yes=False,
    commit_message=None,
    progress=None,
):
    """Import data from a 3rd party provider or another renku project."""
    provider, err = ProviderFactory.from_uri(uri)
    if err and provider is None:
        raise ParameterError('Could not process {0}.\n{1}'.format(uri, err))

    try:
        record = provider.find_record(uri, client)
        dataset = record.as_dataset(client)
        files = dataset.files
        total_size = 0

        if with_prompt and not yes:
            click.echo(
                tabulate(
                    files,
                    headers=OrderedDict((
                        ('checksum', None),
                        ('filename', 'name'),
                        ('size_in_mb', 'size (mb)'),
                        ('filetype', 'type'),
                    )),
                    floatfmt='.2f'
                )
            )

            text_prompt = 'Do you wish to download this version?'
            if record.is_last_version(uri) is False:
                text_prompt = WARNING + 'Newer version found at {}\n'.format(
                    record.links.get('latest_html')
                ) + text_prompt

            click.confirm(text_prompt, abort=True)

            for file_ in files:
                if file_.size_in_mb is not None:
                    total_size += file_.size_in_mb

            total_size *= 2**20

    except KeyError as e:
        raise ParameterError((
            'Could not process {0}.\n'
            'Unable to fetch metadata due to {1}'.format(uri, e)
        ))

    except LookupError as e:
        raise ParameterError(
            ('Could not process {0}.\n'
             'Reason: {1}'.format(uri, str(e)))
        )

    if not files:
        raise ParameterError('Dataset {} has no files.'.format(uri))

    dataset.same_as = Url(url_id=remove_credentials(uri))

    if not provider.is_git_based:
        if not short_name:
            short_name = generate_default_short_name(
                dataset.name, dataset.version
            )

        if is_doi(dataset.identifier):
            dataset.same_as = Url(
                url_str=urllib.parse.
                urljoin('https://doi.org', dataset.identifier)
            )

        urls, names = zip(*[(f.url, f.filename) for f in files])

        _add_to_dataset(
            client,
            urls=urls,
            short_name=short_name,
            create=True,
            with_metadata=dataset,
            force=True,
            extract=extract,
            all_at_once=True,
            destination_names=names,
            progress=progress,
            interactive=with_prompt,
            total_size=total_size,
        )

        if dataset.version:
            tag_name = re.sub('[^a-zA-Z0-9.-_]', '_', dataset.version)
            tag_dataset(
                client, short_name, tag_name,
                'Tag {} created by renku import'.format(dataset.version)
            )
    else:
        short_name = short_name or dataset.short_name

        _add_to_dataset(
            client,
            urls=[record.project_url],
            short_name=short_name,
            sources=[f.path for f in files],
            with_metadata=dataset,
            create=True
        )
Example #10
0
def import_dataset(
    client,
    uri,
    name="",
    extract=False,
    with_prompt=False,
    yes=False,
    commit_message=None,
    progress=None,
):
    """Import data from a 3rd party provider or another renku project."""
    u = urllib.parse.urlparse(uri)
    if u.scheme not in ("", "file", "git+https", "git+ssh", "doi"):
        # NOTE: Check if the url is a redirect.
        uri = requests.head(uri, allow_redirects=True).url

    provider, err = ProviderFactory.from_uri(uri)
    if err and provider is None:
        raise ParameterError("Could not process {0}.\n{1}".format(uri, err))

    try:
        record = provider.find_record(uri, client)
        dataset = record.as_dataset(client)
        files = dataset.files
        total_size = 0

        if with_prompt and not yes:
            click.echo(
                tabulate(
                    files,
                    headers=OrderedDict((
                        ("checksum", None),
                        ("filename", "name"),
                        ("size_in_mb", "size (mb)"),
                        ("filetype", "type"),
                    )),
                    floatfmt=".2f",
                ))

            text_prompt = "Do you wish to download this version?"
            if record.is_last_version(uri) is False:
                text_prompt = (WARNING + "Newer version found at {}\n".format(
                    record.links.get("latest_html")) + text_prompt)

            click.confirm(text_prompt, abort=True)

            for file_ in files:
                if file_.size_in_mb is not None:
                    total_size += file_.size_in_mb

            total_size *= 2**20

    except KeyError as e:
        raise ParameterError(
            ("Could not process {0}.\n"
             "Unable to fetch metadata due to {1}".format(uri, e)))

    except LookupError as e:
        raise ParameterError(("Could not process {0}.\n"
                              "Reason: {1}".format(uri, str(e))))

    if not files:
        raise ParameterError("Dataset {} has no files.".format(uri))

    dataset.same_as = Url(url_id=remove_credentials(uri))

    if not provider.is_git_based:
        if not name:
            name = generate_default_name(dataset.title, dataset.version)

        if is_doi(dataset.identifier):
            dataset.same_as = Url(url_str=urllib.parse.urljoin(
                "https://doi.org", dataset.identifier))

        urls, names = zip(*[(f.source, f.filename) for f in files])

        _add_to_dataset(
            client,
            urls=urls,
            name=name,
            create=True,
            with_metadata=dataset,
            force=True,
            extract=extract,
            all_at_once=True,
            destination_names=names,
            progress=progress,
            interactive=with_prompt,
            total_size=total_size,
        )

        if dataset.version:
            tag_name = re.sub("[^a-zA-Z0-9.-_]", "_", dataset.version)
            tag_dataset(
                client, name, tag_name,
                "Tag {} created by renku import".format(dataset.version))
    else:
        name = name or dataset.name

        if not dataset.data_dir:
            raise OperationError(
                f"Data directory for dataset must be set: {dataset.name}")

        sources = [f"{dataset.data_dir}/**"]
        for file_ in dataset.files:
            try:
                Path(file_.path).relative_to(dataset.data_dir)
            except ValueError:  # Files that are not in dataset's data directory
                sources.append(file_.path)

        _add_to_dataset(
            client,
            urls=[record.project_url],
            name=name,
            sources=sources,
            with_metadata=dataset,
            create=True,
        )
Example #11
0
def import_dataset(
    client,
    uri,
    short_name='',
    extract=False,
    with_prompt=False,
    commit_message=None,
    progress=None,
):
    """Import data from a 3rd party provider."""
    provider, err = ProviderFactory.from_uri(uri)
    if err and provider is None:
        raise ParameterError('Could not process {0}.\n{1}'.format(uri, err))

    try:
        record = provider.find_record(uri)
        dataset = record.as_dataset(client)
        files = dataset.files

        if with_prompt:
            click.echo(
                tabulate(files,
                         headers=OrderedDict((
                             ('checksum', None),
                             ('filename', 'name'),
                             ('size_in_mb', 'size (mb)'),
                             ('filetype', 'type'),
                         ))))

            text_prompt = 'Do you wish to download this version?'
            if record.is_last_version(uri) is False:
                text_prompt = WARNING + 'Newer version found at {}\n'.format(
                    record.links.get('latest_html')) + text_prompt

            click.confirm(text_prompt, abort=True)

    except KeyError as e:
        raise ParameterError(
            ('Could not process {0}.\n'
             'Unable to fetch metadata due to {1}'.format(uri, e)))

    except LookupError:
        raise ParameterError(('Could not process {0}.\n'
                              'URI not found.'.format(uri)))

    if files:
        if not short_name:
            short_name = generate_default_short_name(dataset.name,
                                                     dataset.version)

        dataset.url = remove_credentials(dataset.url)

        add_to_dataset(
            client,
            urls=[f.url for f in files],
            short_name=short_name,
            create=True,
            with_metadata=dataset,
            force=True,
            extract=extract,
            all_at_once=True,
            progress=progress,
        )

        if dataset.version:
            tag_name = re.sub('[^a-zA-Z0-9.-_]', '_', dataset.version)
            tag_dataset(
                client, short_name, tag_name,
                'Tag {} created by renku import'.format(dataset.version))