def create_template_sentence(templates, instructions=False): """Create templates choice sentence. :ref templates: list of templates coming from manifest file :ref instructions: add instructions """ Template = namedtuple('Template', ['index', 'name', 'description']) templates_friendly = [ Template( index=index + 1, name=template_elem['name'], description=template_elem['description'], ) for index, template_elem in enumerate(templates) ] text = tabulate(templates_friendly, headers=OrderedDict(( ('index', 'Number'), ('name', 'Name'), ('description', 'Description'), ))) if not instructions: return text return '{0}\nPlease choose a template by typing the number'.format(text)
def tabular(client, records): """Format dataset files with a tabular output. :param client: LocalClient instance. :param records: Filtered collection. """ return tabulate( records, headers=OrderedDict(( ('added', None), ('creators_csv', 'creators'), ('dataset', None), ('full_path', 'path'), )), )
def create_template_sentence(templates, describe=False, instructions=False): """Create templates choice sentence. :ref templates: list of templates coming from manifest file :ref instructions: add instructions """ Template = namedtuple("Template", ["index", "id", "description", "variables"]) def extract_description(template_elem): """Extract description from template manifest.""" if describe: return template_elem["description"] return None def extract_variables(template_elem): """Extract variables from tempalte manifest.""" if describe: return "\n".join([ f"{variable[0]}: {variable[1]}" for variable in template_elem.get("variables", {}).items() ]) return ",".join(template_elem.get("variables", {}).keys()) templates_friendly = [ Template( index=index + 1, id=template_elem["folder"], description=extract_description(template_elem), variables=extract_variables(template_elem), ) for index, template_elem in enumerate(templates) ] table_headers = OrderedDict(( ("index", "Index"), ("id", "Id"), ("variables", "Parameters"), )) if describe: table_headers["description"] = "Description" text = tabulate(templates_friendly, headers=table_headers) if not instructions: return text return "{0}\nPlease choose a template by typing the index".format(text)
def create_template_sentence(templates, describe=False, instructions=False): """Create templates choice sentence. :ref templates: list of templates coming from manifest file :ref instructions: add instructions """ Template = namedtuple('Template', ['index', 'id', 'description', 'variables']) def extract_description(template_elem): """Extract description from template manifest.""" if describe: return template_elem['description'] return None def extract_variables(template_elem): """Extract variables from tempalte manifest.""" if describe: return '\n'.join([ f'{variable[0]}: {variable[1]}' for variable in template_elem.get('variables', {}).items() ]) return ','.join(template_elem.get('variables', {}).keys()) templates_friendly = [ Template( index=index + 1, id=template_elem['folder'], description=extract_description(template_elem), variables=extract_variables(template_elem), ) for index, template_elem in enumerate(templates) ] table_headers = OrderedDict(( ('index', 'Index'), ('id', 'Id'), ('variables', 'Parameters'), )) if describe: table_headers['description'] = 'Description' text = tabulate(templates_friendly, headers=table_headers) if not instructions: return text return '{0}\nPlease choose a template by typing the index'.format(text)
def tabular(client, datasets): """Format datasets with a tabular output.""" return tabulate( datasets, headers=OrderedDict(( ('uid', 'id'), ('created', None), ('short_name', None), ('creators_csv', 'creators'), ('tags_csv', 'tags'), ('version', None), )), # workaround for tabulate issue 181 # https://bitbucket.org/astanin/python-tabulate/issues/181/disable_numparse-fails-on-empty-input disable_numparse=[0, 2] if any(datasets) else False )
def tabular(client, tags): """Format dataset tags with a tabular output. :param client: LocalClient instance. :param tags: Dataset tags. """ return tabulate( tags, headers=OrderedDict(( ('created', None), ('name', None), ('description', None), ('dataset', None), ('commit', None), )), # workaround for tabulate issue 181 # https://bitbucket.org/astanin/python-tabulate/issues/181/disable_numparse-fails-on-empty-input disable_numparse=[1, 2, 4] if len(tags) > 0 else False)
def outputs(ctx, client, revision, verbose, paths): r"""Show output files in the repository. <PATHS> Files to show. If no files are given all output files are shown. """ graph = Graph(client) filter_ = graph.build(paths=paths, revision=revision) output_paths = {} for activity in graph.activities.values(): if isinstance(activity, ProcessRun): for entity in activity.generated: if entity.path not in graph.output_paths: continue output_paths[entity.path] = Result( path=entity.path, commit=entity.commit, time=activity.ended_at_time, workflow=activity.path ) if not verbose: click.echo("\n".join(graph._format_path(path) for path in output_paths.keys())) else: records = list(output_paths.values()) records.sort(key=lambda v: v[0]) HEADERS["time"] = "generation time" click.echo(tabulate(collection=records, headers=HEADERS)) if paths: if not output_paths: ctx.exit(1) from renku.core.models.datastructures import DirectoryTree tree = DirectoryTree.from_list(item.path for item in filter_) for output in output_paths: if tree.get(output) is None: ctx.exit(1) return
def inputs(ctx, client, revision, verbose, paths): r"""Show inputs files in the repository. <PATHS> Files to show. If no files are given all input files are shown. """ graph = Graph(client) paths = set(paths) nodes = graph.build(revision=revision) commits = {node.activity.commit if hasattr(node, "activity") else node.commit for node in nodes} commits |= {node.activity.commit for node in nodes if hasattr(node, "activity")} candidates = {(node.commit, node.path) for node in nodes if not paths or node.path in paths} input_paths = {} for commit in commits: activity = graph.activities.get(commit) if not activity: continue if isinstance(activity, ProcessRun): for usage in activity.qualified_usage: for entity in usage.entity.entities: path = str((usage.client.path / entity.path).relative_to(client.path)) usage_key = (entity.commit, entity.path) if path not in input_paths and usage_key in candidates: input_paths[path] = Result( path=path, commit=entity.commit, time=activity.started_at_time, workflow=activity.path ) if not verbose: click.echo("\n".join(graph._format_path(path) for path in input_paths)) else: records = list(input_paths.values()) records.sort(key=lambda v: v[0]) HEADERS["time"] = "usage time" click.echo(tabulate(collection=records, headers=HEADERS)) ctx.exit(0 if not paths or len(input_paths) == len(paths) else 1)
def import_dataset( client, uri, short_name='', extract=False, with_prompt=False, yes=False, commit_message=None, progress=None, ): """Import data from a 3rd party provider or another renku project.""" provider, err = ProviderFactory.from_uri(uri) if err and provider is None: raise ParameterError('Could not process {0}.\n{1}'.format(uri, err)) try: record = provider.find_record(uri, client) dataset = record.as_dataset(client) files = dataset.files total_size = 0 if with_prompt and not yes: click.echo( tabulate( files, headers=OrderedDict(( ('checksum', None), ('filename', 'name'), ('size_in_mb', 'size (mb)'), ('filetype', 'type'), )), floatfmt='.2f' ) ) text_prompt = 'Do you wish to download this version?' if record.is_last_version(uri) is False: text_prompt = WARNING + 'Newer version found at {}\n'.format( record.links.get('latest_html') ) + text_prompt click.confirm(text_prompt, abort=True) for file_ in files: if file_.size_in_mb is not None: total_size += file_.size_in_mb total_size *= 2**20 except KeyError as e: raise ParameterError(( 'Could not process {0}.\n' 'Unable to fetch metadata due to {1}'.format(uri, e) )) except LookupError as e: raise ParameterError( ('Could not process {0}.\n' 'Reason: {1}'.format(uri, str(e))) ) if not files: raise ParameterError('Dataset {} has no files.'.format(uri)) dataset.same_as = Url(url_id=remove_credentials(uri)) if not provider.is_git_based: if not short_name: short_name = generate_default_short_name( dataset.name, dataset.version ) if is_doi(dataset.identifier): dataset.same_as = Url( url_str=urllib.parse. urljoin('https://doi.org', dataset.identifier) ) urls, names = zip(*[(f.url, f.filename) for f in files]) _add_to_dataset( client, urls=urls, short_name=short_name, create=True, with_metadata=dataset, force=True, extract=extract, all_at_once=True, destination_names=names, progress=progress, interactive=with_prompt, total_size=total_size, ) if dataset.version: tag_name = re.sub('[^a-zA-Z0-9.-_]', '_', dataset.version) tag_dataset( client, short_name, tag_name, 'Tag {} created by renku import'.format(dataset.version) ) else: short_name = short_name or dataset.short_name _add_to_dataset( client, urls=[record.project_url], short_name=short_name, sources=[f.path for f in files], with_metadata=dataset, create=True )
def import_dataset( client, uri, name="", extract=False, with_prompt=False, yes=False, commit_message=None, progress=None, ): """Import data from a 3rd party provider or another renku project.""" u = urllib.parse.urlparse(uri) if u.scheme not in ("", "file", "git+https", "git+ssh", "doi"): # NOTE: Check if the url is a redirect. uri = requests.head(uri, allow_redirects=True).url provider, err = ProviderFactory.from_uri(uri) if err and provider is None: raise ParameterError("Could not process {0}.\n{1}".format(uri, err)) try: record = provider.find_record(uri, client) dataset = record.as_dataset(client) files = dataset.files total_size = 0 if with_prompt and not yes: click.echo( tabulate( files, headers=OrderedDict(( ("checksum", None), ("filename", "name"), ("size_in_mb", "size (mb)"), ("filetype", "type"), )), floatfmt=".2f", )) text_prompt = "Do you wish to download this version?" if record.is_last_version(uri) is False: text_prompt = (WARNING + "Newer version found at {}\n".format( record.links.get("latest_html")) + text_prompt) click.confirm(text_prompt, abort=True) for file_ in files: if file_.size_in_mb is not None: total_size += file_.size_in_mb total_size *= 2**20 except KeyError as e: raise ParameterError( ("Could not process {0}.\n" "Unable to fetch metadata due to {1}".format(uri, e))) except LookupError as e: raise ParameterError(("Could not process {0}.\n" "Reason: {1}".format(uri, str(e)))) if not files: raise ParameterError("Dataset {} has no files.".format(uri)) dataset.same_as = Url(url_id=remove_credentials(uri)) if not provider.is_git_based: if not name: name = generate_default_name(dataset.title, dataset.version) if is_doi(dataset.identifier): dataset.same_as = Url(url_str=urllib.parse.urljoin( "https://doi.org", dataset.identifier)) urls, names = zip(*[(f.source, f.filename) for f in files]) _add_to_dataset( client, urls=urls, name=name, create=True, with_metadata=dataset, force=True, extract=extract, all_at_once=True, destination_names=names, progress=progress, interactive=with_prompt, total_size=total_size, ) if dataset.version: tag_name = re.sub("[^a-zA-Z0-9.-_]", "_", dataset.version) tag_dataset( client, name, tag_name, "Tag {} created by renku import".format(dataset.version)) else: name = name or dataset.name if not dataset.data_dir: raise OperationError( f"Data directory for dataset must be set: {dataset.name}") sources = [f"{dataset.data_dir}/**"] for file_ in dataset.files: try: Path(file_.path).relative_to(dataset.data_dir) except ValueError: # Files that are not in dataset's data directory sources.append(file_.path) _add_to_dataset( client, urls=[record.project_url], name=name, sources=sources, with_metadata=dataset, create=True, )
def import_dataset( client, uri, short_name='', extract=False, with_prompt=False, commit_message=None, progress=None, ): """Import data from a 3rd party provider.""" provider, err = ProviderFactory.from_uri(uri) if err and provider is None: raise ParameterError('Could not process {0}.\n{1}'.format(uri, err)) try: record = provider.find_record(uri) dataset = record.as_dataset(client) files = dataset.files if with_prompt: click.echo( tabulate(files, headers=OrderedDict(( ('checksum', None), ('filename', 'name'), ('size_in_mb', 'size (mb)'), ('filetype', 'type'), )))) text_prompt = 'Do you wish to download this version?' if record.is_last_version(uri) is False: text_prompt = WARNING + 'Newer version found at {}\n'.format( record.links.get('latest_html')) + text_prompt click.confirm(text_prompt, abort=True) except KeyError as e: raise ParameterError( ('Could not process {0}.\n' 'Unable to fetch metadata due to {1}'.format(uri, e))) except LookupError: raise ParameterError(('Could not process {0}.\n' 'URI not found.'.format(uri))) if files: if not short_name: short_name = generate_default_short_name(dataset.name, dataset.version) dataset.url = remove_credentials(dataset.url) add_to_dataset( client, urls=[f.url for f in files], short_name=short_name, create=True, with_metadata=dataset, force=True, extract=extract, all_at_once=True, progress=progress, ) if dataset.version: tag_name = re.sub('[^a-zA-Z0-9.-_]', '_', dataset.version) tag_dataset( client, short_name, tag_name, 'Tag {} created by renku import'.format(dataset.version))