예제 #1
0
def _migrate_composite_step(client, workflow, path, commit=None):
    """Migrate a composite workflow."""
    if not commit:
        commit = client.find_previous_commit(path)
    run = Run(client=client, path=path, commit=commit)

    name = '{0}_migrated.yaml'.format(uuid.uuid4().hex)

    run.path = (client.workflow_path / name).relative_to(client.path)

    for step in workflow.steps:
        if isinstance(step.run, dict):
            continue
        else:
            path = client.workflow_path / step.run
            subrun = parse_cwl_cached(str(path))

        subprocess, _ = _migrate_single_step(client,
                                             subrun,
                                             path,
                                             commit=commit)
        subprocess.path = run.path
        run.add_subprocess(subprocess)

    with with_reference(run.path):
        wf = WorkflowRun.from_run(run, client, run.path, commit=commit)
        wf.to_yaml()
        client.add_to_activity_index(wf)

    return wf, run.path
예제 #2
0
    def with_workflow_storage(self):
        """Yield a workflow storage."""
        from renku.core.models.cwl.workflow import Workflow

        workflow = Workflow()
        yield workflow

        for step in workflow.steps:
            step_name = '{0}_{1}.yaml'.format(
                uuid.uuid4().hex,
                secure_filename('_'.join(step.run.baseCommand)),
            )

            workflow_path = self.workflow_path
            if not workflow_path.exists():
                workflow_path.mkdir()

            path = workflow_path / step_name

            with with_reference(path):
                run = step.run.generate_process_run(
                    client=self,
                    commit=self.repo.head.commit,
                    path=path,
                )
                run.to_yaml()
                self.add_to_activity_index(run)
예제 #3
0
    def create_dataset(
        self,
        short_name=None,
        title=None,
        description=None,
        creators=None,
        keywords=None,
    ):
        """Create a dataset."""
        if not short_name:
            raise errors.ParameterError('Dataset short_name must be provided.')

        if not is_dataset_short_name_valid(short_name):
            raise errors.ParameterError(
                'Dataset short_name "{}" is not valid.'.format(short_name))

        if self.load_dataset(short_name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        if not title:
            title = short_name

        identifier = str(uuid.uuid4())

        path = self.renku_datasets_path / identifier / self.METADATA

        if path.exists():
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path))

        path.parent.mkdir(parents=True, exist_ok=True)

        if creators is None:
            creators = [Person.from_git(self.repo)]

        keywords = keywords or ()

        with with_reference(path):
            dataset = Dataset(
                client=self,
                identifier=identifier,
                short_name=short_name,
                name=title,
                description=description,
                creator=creators,
                keywords=keywords,
            )

        dataset_ref = LinkReference.create(client=self,
                                           name='datasets/' + short_name)

        dataset_ref.set_reference(path)
        dataset.path = Path(dataset.path).relative_to(self.path)
        dataset.to_yaml()

        return dataset, path, dataset_ref
예제 #4
0
def update(client, revision, no_output, siblings, paths):
    """Update existing files by rerunning their outdated workflow."""
    graph = Graph(client)
    outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths)
    outputs = {node for node in outputs if graph.need_update(node)}
    if not outputs:
        click.secho("All files were generated from the latest inputs.", fg="green")
        sys.exit(0)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs if _safe_path(node.path)}

    # Get all clean nodes.
    input_paths = {node.path for node in graph.nodes} - output_paths

    # Store the generated workflow used for updating paths.
    workflow = graph.as_workflow(input_paths=input_paths, output_paths=output_paths, outputs=outputs,)

    wf, path = CWLConverter.convert(workflow, client)
    # Don't compute paths if storage is disabled.
    if client.check_external_storage():
        # Make sure all inputs are pulled from a storage.
        paths_ = (i.consumes.path for i in workflow.inputs)
        client.pull_paths_from_storage(*paths_)

    execute(client, path, output_paths=output_paths)

    paths = [o.produces.path for o in workflow.outputs]

    client.repo.git.add(*paths)

    if client.repo.is_dirty():
        commit_msg = "renku update: committing {} newly added files".format(len(paths))

        committer = Actor("renku {0}".format(__version__), version_url)

        client.repo.index.commit(
            commit_msg, committer=committer, skip_hooks=True,
        )

    workflow_name = "{0}_update.yaml".format(uuid.uuid4().hex)

    path = client.workflow_path / workflow_name

    workflow.update_id_and_label_from_commit_path(client, client.repo.head.commit, path)

    with with_reference(path):
        cls = WorkflowRun if workflow.subprocesses else ProcessRun
        run = cls.from_run(run=workflow, client=client, path=path, update_commits=True)
        run.to_yaml()
        client.add_to_activity_index(run)
예제 #5
0
    def from_cwl(cls, data, __reference__=None):
        """Return an instance from CWL data."""
        class_name = data.get('class', None)
        cls = cls.registry.get(class_name, cls)

        if __reference__:
            with with_reference(__reference__):
                self = cls(
                    **{k: v
                       for k, v in iteritems(data) if k != 'class'})
        else:
            self = cls(**{k: v for k, v in iteritems(data) if k != 'class'})
        return self
예제 #6
0
    def create_dataset(self,
                       name,
                       short_name=None,
                       description='',
                       creators=None):
        """Create a dataset."""
        if not name:
            raise errors.ParameterError('Dataset name must be provided.')

        if not short_name:
            short_name = generate_default_short_name(name, None)

        if not is_dataset_name_valid(short_name):
            raise errors.ParameterError(
                'Dataset name "{}" is not valid.'.format(short_name))

        if self.load_dataset(name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        identifier = str(uuid.uuid4())

        path = self.renku_datasets_path / identifier / self.METADATA

        if path.exists():
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path))

        path.parent.mkdir(parents=True, exist_ok=True)

        if creators is None:
            creators = [Person.from_git(self.repo)]

        with with_reference(path):
            dataset = Dataset(client=self,
                              identifier=identifier,
                              name=name,
                              short_name=short_name,
                              description=description,
                              creator=creators)

        dataset_ref = LinkReference.create(client=self,
                                           name='datasets/' + short_name)

        dataset_ref.set_reference(path)
        dataset.to_yaml()

        return dataset, path, dataset_ref
예제 #7
0
def _migrate_composite_step(client, workflow, path, commit=None):
    """Migrate a composite workflow."""
    if not commit:
        commit = client.find_previous_commit(path)
    run = Run(client=client, path=path, commit=commit)
    rel_path = Path(path).relative_to(client.path)
    label = f"{rel_path}@{commit.hexsha}"
    identifier = sha1(label.encode("utf-8")).hexdigest()
    run._id = Run.generate_id(client, identifier=identifier)

    name = "{0}_migrated.yaml".format(uuid.uuid4().hex)

    run.path = (client.workflow_path / name).relative_to(client.path)

    for step in workflow.steps:
        if isinstance(step.run, dict):
            continue
        else:
            path = client.workflow_path / step.run
            subrun = parse_cwl_cached(str(path))

        subprocess, _ = _migrate_single_step(client,
                                             subrun,
                                             path,
                                             parent_commit=commit)
        run.add_subprocess(subprocess)

    with with_reference(run.path):
        wf = WorkflowRun.from_run(run, client, run.path, commit=commit)

        # HACK: This fixes broken SoftwareAgent due to rebases done by users
        if isinstance(
                wf.association.agent,
                Person) or not wf.association.agent.label.startswith("renku "):
            wf.association.agent = default_missing_software_agent
        for p in wf._processes:
            if isinstance(
                    p.association.agent, Person
            ) or not p.association.agent.label.startswith("renku "):
                p.association.agent = default_missing_software_agent
        wf.to_yaml()
        client.add_to_activity_index(wf)

    return wf, run.path
예제 #8
0
def rerun(client, revision, roots, siblings, inputs, paths):
    """Recreate files generated by a sequence of ``run`` commands."""
    graph = Graph(client)
    outputs = graph.build(paths=paths, revision=revision)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs}

    # Normalize and check all starting paths.
    roots = {graph.normalize_path(root) for root in roots}
    output_paths -= roots
    outputs = [o for o in outputs if o.path not in roots]

    # Generate workflow and check inputs.
    # NOTE The workflow creation is done before opening a new file.
    workflow = inputs(
        client,
        graph.as_workflow(
            input_paths=roots,
            output_paths=output_paths,
            outputs=outputs,
        )
    )

    wf, path = CWLConverter.convert(workflow, client)

    # Don't compute paths if storage is disabled.
    if client.check_external_storage():
        # Make sure all inputs are pulled from a storage.
        paths_ = (i.consumes.path for i in workflow.inputs)
        client.pull_paths_from_storage(*paths_)

    # Execute the workflow and relocate all output files.
    # FIXME get new output paths for edited tools
    # output_paths = {path for _, path in workflow.iter_output_files()}
    execute(
        client,
        path,
        output_paths=output_paths,
    )

    paths = [o.produces.path for o in workflow.outputs]

    client.repo.git.add(*paths)

    if client.repo.is_dirty():
        commit_msg = ('renku rerun: '
                      'committing {} newly added files').format(len(paths))

        committer = Actor('renku {0}'.format(__version__), version_url)

        client.repo.index.commit(
            commit_msg,
            committer=committer,
            skip_hooks=True,
        )

    workflow_name = '{0}_rerun.yaml'.format(uuid.uuid4().hex)

    path = client.workflow_path / workflow_name

    workflow.update_id_and_label_from_commit_path(
        client, client.repo.head.commit, path
    )

    with with_reference(path):
        run = WorkflowRun.from_run(workflow, client, path)
        run.to_yaml()
        client.add_to_activity_index(run)
예제 #9
0
def _migrate_single_step(client,
                         cmd_line_tool,
                         path,
                         commit=None,
                         parent_commit=None,
                         persist=False):
    """Migrate a single step workflow."""
    if not commit:
        commit = client.find_previous_commit(
            path, revision=parent_commit if parent_commit else "HEAD")

    run = Run(client=client, path=path, commit=commit)
    run.command = " ".join(cmd_line_tool.baseCommand)
    run.successcodes = cmd_line_tool.successCodes

    inputs = list(cmd_line_tool.inputs)
    outputs = list(cmd_line_tool.outputs)

    # NOTE: Make run ids deterministic to prevent duplication.
    rel_path = Path(path).relative_to(client.path)
    if parent_commit:
        label = f"{rel_path}@{parent_commit.hexsha}"
    else:
        label = f"{rel_path}@{commit.hexsha}"
    identifier = sha1(label.encode("utf-8")).hexdigest()

    base_id = Run.generate_id(client, identifier=identifier)
    run._id = base_id

    if cmd_line_tool.stdin:
        name = cmd_line_tool.stdin.split(".")[1]

        if name.endswith(")"):
            name = name[:-1]

        matched_input = next(i for i in inputs if i.id == name)
        inputs.remove(matched_input)

        path = client.workflow_path / Path(matched_input.default["path"])
        stdin = path.resolve().relative_to(client.path)
        id_ = CommandInput.generate_id(base_id, "stdin")

        run.inputs.append(
            CommandInput(
                id=id_,
                consumes=_entity_from_path(client, stdin, commit),
                mapped_to=MappedIOStream(client=client, stream_type="stdin"),
            ))

    if cmd_line_tool.stdout:
        run.outputs.append(
            CommandOutput(
                id=CommandOutput.generate_id(base_id, "stdout"),
                produces=_entity_from_path(client, cmd_line_tool.stdout,
                                           commit),
                mapped_to=MappedIOStream(client=client, stream_type="stdout"),
                create_folder=False,
            ))

        matched_output = next(o for o in outputs if o.id == "output_stdout")

        if matched_output:
            outputs.remove(matched_output)

    if cmd_line_tool.stderr:
        run.outputs.append(
            CommandOutput(
                id=CommandOutput.generate_id(base_id, "stderr"),
                produces=_entity_from_path(client, cmd_line_tool.stderr,
                                           commit),
                mapped_to=MappedIOStream(client=client, stream_type="stderr"),
                create_folder=False,
            ))

        matched_output = next(o for o in outputs if o.id == "output_stderr")

        if matched_output:
            outputs.remove(matched_output)

    created_outputs = []
    workdir_requirements = [
        r for r in cmd_line_tool.requirements
        if isinstance(r, InitialWorkDirRequirement)
    ]

    for r in workdir_requirements:
        for listing in r.listing:
            if listing.entry == '$({"listing": [], "class": "Directory"})':
                created_outputs.append(listing.entryname)

    for o in outputs:
        prefix = None
        position = None

        if o.outputBinding.glob.startswith("$(inputs."):
            name = o.outputBinding.glob.split(".")[1]

            if name.endswith(")"):
                name = name[:-1]

            matched_input = next(i for i in inputs if i.id == name)
            inputs.remove(matched_input)

            if isinstance(matched_input.default, dict):
                path = client.workflow_path / Path(
                    matched_input.default["path"])
            else:
                path = Path(matched_input.default)

            path = Path(os.path.abspath(client.path / path)).relative_to(
                client.path)

            if matched_input.inputBinding:
                prefix = matched_input.inputBinding.prefix
                position = matched_input.inputBinding.position

                if prefix and matched_input.inputBinding.separate:
                    prefix += " "
        else:
            path = Path(o.outputBinding.glob)

        create_folder = False

        check_path = path
        if not (client.path / path).is_dir():
            check_path = path.parent

        if check_path != "." and str(check_path) in created_outputs:
            create_folder = True

        run.outputs.append(
            CommandOutput(
                id=CommandOutput.generate_id(base_id, position),
                position=position,
                prefix=prefix,
                produces=_entity_from_path(client, path, commit),
                create_folder=create_folder,
            ))

    for i in inputs:
        prefix = None
        position = None

        if i.inputBinding:
            prefix = i.inputBinding.prefix
            position = i.inputBinding.position

            if prefix and i.inputBinding.separate:
                prefix += " "

        if isinstance(
                i.default,
                dict) and "class" in i.default and i.default["class"] in [
                    "File", "Directory"
                ]:
            path = client.workflow_path / Path(i.default["path"])
            path = Path(os.path.abspath(path)).relative_to(client.path)

            run.inputs.append(
                CommandInput(
                    id=CommandInput.generate_id(base_id, position),
                    position=position,
                    prefix=prefix,
                    consumes=_entity_from_path(client, path, commit),
                ))
        else:
            run.arguments.append(
                CommandArgument(
                    id=CommandArgument.generate_id(base_id, position),
                    position=position,
                    prefix=prefix,
                    value=str(i.default),
                ))

    for a in cmd_line_tool.arguments:
        id_ = CommandArgument.generate_id(base_id, a["position"])
        run.arguments.append(
            CommandArgument(id=id_,
                            position=a["position"],
                            value=a["valueFrom"]))

    if not persist:
        return run, None

    step_name = "{0}_{1}.yaml".format(
        uuid.uuid4().hex,
        secure_filename("_".join(cmd_line_tool.baseCommand)),
    )

    absolute_path = client.workflow_path / step_name
    path = absolute_path.relative_to(client.path)

    with with_reference(absolute_path):
        run.path = path
        process_run = ProcessRun.from_run(run, client, path, commit=commit)
        process_run.invalidated = _invalidations_from_commit(client, commit)

        # HACK: This fixes broken SoftwareAgent due to rebases done by users
        if isinstance(
                process_run.association.agent, Person
        ) or not process_run.association.agent.label.startswith("renku "):
            process_run.association.agent = default_missing_software_agent
        process_run.to_yaml()
        client.add_to_activity_index(process_run)
        return process_run, absolute_path
예제 #10
0
    def from_jsonld(
        cls,
        data,
        client=None,
        commit=None,
        __reference__=None,
        __source__=None,
    ):
        """Instantiate a JSON-LD class from data."""
        if isinstance(data, cls):
            return data

        if not isinstance(data, dict):
            raise ValueError(data)

        if '@type' in data:
            # @type could be a string or a list - make sure it is a list
            type_ = data['@type']
            if not isinstance(type_, list):
                type_ = [type_]
            # If a json-ld class has multiple types, they are in a
            # sorted tuple. This is used as the key for the class
            # registry, so we have to match it here.
            type_ = tuple(sorted(type_))
            if type_ in cls.__type_registry__ and getattr(
                cls, '_jsonld_type', None
            ) != type_:
                new_cls = cls.__type_registry__[type_]
                if cls != new_cls:
                    return new_cls.from_jsonld(
                        data, client=client, commit=commit
                    )

        if cls._jsonld_translate:
            # perform the translation
            data = pyld.jsonld.compact(data, cls._jsonld_translate)
            # compact using the class json-ld context
            data.pop('@context', None)
            data = pyld.jsonld.compact(data, cls._jsonld_context)

        data.setdefault('@context', cls._jsonld_context)

        if data['@context'] != cls._jsonld_context:
            # merge new context into old context to prevent properties
            # getting lost in jsonld expansion
            if isinstance(data['@context'], str):
                data['@context'] = {'@base': data['@context']}
            data['@context'].update(cls._jsonld_context)
            try:
                compacted = pyld.jsonld.compact(data, cls._jsonld_context)
            except Exception:
                compacted = data
        else:
            compacted = data

        fields = cls._jsonld_fields

        data_ = {}
        # `client` and `commit` are passed in optionally for some classes
        # They might be unset if the metadata is used to instantiate
        # an object outside of a repo/client context.
        if client:
            data_['client'] = client
        if commit:
            data_['commit'] = commit

        for k, v in compacted.items():
            if k in fields:
                no_value_context = isinstance(v, dict) and '@context' not in v
                has_nested_context = (
                    k in compacted['@context'] and
                    '@context' in compacted['@context'][k]
                )
                if no_value_context and has_nested_context:
                    # Propagate down context
                    v['@context'] = compacted['@context'][k]['@context']

                data_[k.lstrip('_')] = v

        if __reference__:
            with with_reference(__reference__):
                self = cls(**data_)
        else:
            self = cls(**data_)

        if __source__:
            setattr(self, '__source__', __source__)

        return self
예제 #11
0
    def from_cwl(cls, data, __reference__=None):
        """Return an instance from CWL data."""
        exclude_properties = ['class', '$namespaces', '@reverse']
        class_name = data.get('class', None)
        cls = cls.registry.get(class_name, cls)

        if '$namespaces' in data:
            # handle custom metadata
            keys = data.keys()

            metadata_keys = [(k, False) for k in keys if ':' in k]

            if '@reverse' in keys:
                metadata_keys.extend(
                    (k, True) for k in data['@reverse'].keys() if ':' in k)

            attrs = fields(cls)

            for a in attrs:
                # map custom metadata
                if 'cwl_metadata' not in a.metadata:
                    continue

                metadata = a.metadata['cwl_metadata']

                k = (metadata.get('property'), metadata.get('reverse', False))

                if k not in metadata_keys:
                    continue

                metadata_type = metadata.get('type')

                if not metadata_type:
                    raise ValueError('CWL metadata type not specified')

                if metadata.get('reverse', False):
                    metadata_value = data['@reverse'][metadata['property']]
                else:
                    metadata_value = data[metadata['property']]
                    exclude_properties.append(metadata['property'])

                if isinstance(metadata_value, list):
                    data[a.name] = [
                        type_from_metadata(metadata_type, v)
                        for v in metadata_value
                    ]
                else:
                    data[a.name] = type_from_metadata(metadata_type,
                                                      metadata_value)

        if __reference__:
            with with_reference(__reference__):
                self = cls(
                    **{
                        k: v
                        for k, v in iteritems(data)
                        if k not in exclude_properties
                    })
        else:
            self = cls(**{
                k: v
                for k, v in iteritems(data) if k not in exclude_properties
            })
        return self
예제 #12
0
def _migrate_single_step(client,
                         cmd_line_tool,
                         path,
                         commit=None,
                         persist=False):
    """Migrate a single step workflow."""
    if not commit:
        commit = client.find_previous_commit(path)

    run = Run(client=client, path=path, commit=commit)
    run.command = ' '.join(cmd_line_tool.baseCommand)
    run.successcodes = cmd_line_tool.successCodes

    inputs = list(cmd_line_tool.inputs)
    outputs = list(cmd_line_tool.outputs)

    if cmd_line_tool.stdin:
        name = cmd_line_tool.stdin.split('.')[1]

        if name.endswith(')'):
            name = name[:-1]

        matched_input = next(i for i in inputs if i.id == name)
        inputs.remove(matched_input)

        path = client.workflow_path / Path(matched_input.default['path'])
        stdin = path.resolve().relative_to(client.path)

        run.inputs.append(
            CommandInput(consumes=_entity_from_path(client, stdin, commit),
                         mapped_to=MappedIOStream(stream_type='stdin')))

    if cmd_line_tool.stdout:
        run.outputs.append(
            CommandOutput(produces=_entity_from_path(client,
                                                     cmd_line_tool.stdout,
                                                     commit),
                          mapped_to=MappedIOStream(stream_type='stdout'),
                          create_folder=False))

        matched_output = next(o for o in outputs if o.id == 'output_stdout')

        if matched_output:
            outputs.remove(matched_output)

    if cmd_line_tool.stderr:
        run.outputs.append(
            CommandOutput(produces=_entity_from_path(client,
                                                     cmd_line_tool.stderr,
                                                     commit),
                          mapped_to=MappedIOStream(stream_type='stderr'),
                          create_folder=False))

        matched_output = next(o for o in outputs if o.id == 'output_stderr')

        if matched_output:
            outputs.remove(matched_output)

    created_outputs = []
    workdir_requirements = [
        r for r in cmd_line_tool.requirements
        if isinstance(r, InitialWorkDirRequirement)
    ]

    for r in workdir_requirements:
        for l in r.listing:
            if l.entry == '$({"listing": [], "class": "Directory"})':
                created_outputs.append(l.entryname)

    for o in outputs:
        prefix = None
        position = None

        if o.outputBinding.glob.startswith('$(inputs.'):
            name = o.outputBinding.glob.split('.')[1]

            if name.endswith(')'):
                name = name[:-1]

            matched_input = next(i for i in inputs if i.id == name)
            inputs.remove(matched_input)

            if isinstance(matched_input.default, dict):
                path = client.workflow_path / Path(
                    matched_input.default['path'])
            else:
                path = Path(matched_input.default)

            path = Path(os.path.abspath(path)).relative_to(client.path)

            if matched_input.inputBinding:
                prefix = matched_input.inputBinding.prefix
                position = matched_input.inputBinding.position

                if prefix and matched_input.inputBinding.separate:
                    prefix += ' '
        else:
            path = Path(o.outputBinding.glob)

        create_folder = False

        check_path = path
        if not (client.path / path).is_dir():
            check_path = path.parent

        if check_path != '.' and str(check_path) in created_outputs:
            create_folder = True

        run.outputs.append(
            CommandOutput(position=position,
                          prefix=prefix,
                          produces=_entity_from_path(client, path, commit),
                          create_folder=create_folder))

    for i in inputs:
        prefix = None
        position = None

        if i.inputBinding:
            prefix = i.inputBinding.prefix
            position = i.inputBinding.position

            if prefix and i.inputBinding.separate:
                prefix += ' '

        if (isinstance(i.default, dict) and 'class' in i.default
                and i.default['class'] in ['File', 'Directory']):
            path = client.workflow_path / Path(i.default['path'])
            path = Path(os.path.abspath(path)).relative_to(client.path)

            run.inputs.append(
                CommandInput(position=position,
                             prefix=prefix,
                             consumes=_entity_from_path(client, path, commit)))
        else:
            run.arguments.append(
                CommandArgument(position=position,
                                prefix=prefix,
                                value=str(i.default)))

    for a in cmd_line_tool.arguments:
        run.arguments.append(
            CommandArgument(position=a['position'], value=a['valueFrom']))

    if not persist:
        return run, None

    step_name = '{0}_{1}.yaml'.format(
        uuid.uuid4().hex,
        secure_filename('_'.join(cmd_line_tool.baseCommand)),
    )

    path = (client.workflow_path / step_name).relative_to(client.path)

    with with_reference(path):
        run.path = path
        process_run = ProcessRun.from_run(run, client, path, commit=commit)
        process_run.invalidated = _invalidations_from_commit(client, commit)
        process_run.to_yaml()
        client.add_to_activity_index(process_run)
        return process_run, path