Ejemplo n.º 1
0
def test_inheritance():
    """Test type and context inheritance."""
    types = {'prov:Activity', 'wfprov:ProcessRun'}
    context_keys = {'prov', 'wfprov'}

    @jsonld.s(type='prov:Activity', context={'prov': 'A'})
    class Activity:
        """Define an activity."""

    @jsonld.s(type='wfprov:ProcessRun', context={'wfprov': 'B'})
    class ProcessRun(Activity):
        """Define a process execution based on an activity."""

    data = jsonld.asjsonld(ProcessRun())
    assert set(data['@type']) == types
    assert set(data['@context'].keys()) == context_keys

    types = {'prov:Activity', 'wfprov:ProcessRun', 'wfprov:WorkflowRun'}

    @jsonld.s(type='wfprov:WorkflowRun')
    class WorkflowRun(ProcessRun):
        """Define a workflow run."""

    data = jsonld.asjsonld(WorkflowRun())
    assert set(data['@type']) == types
    assert set(data['@context'].keys()) == context_keys
Ejemplo n.º 2
0
def datasets(ctx, client):
    """Migrate dataset metadata."""
    from renku.models._jsonld import asjsonld
    from renku.models.datasets import Dataset
    from renku.models.refs import LinkReference

    from ._checks.location_datasets import _dataset_metadata_pre_0_3_4

    for old_path in _dataset_metadata_pre_0_3_4(client):
        dataset = Dataset.from_yaml(old_path, client=client)

        name = str(old_path.parent.relative_to(client.path / 'data'))
        new_path = (client.renku_datasets_path / dataset.uid / client.METADATA)
        new_path.parent.mkdir(parents=True, exist_ok=True)

        dataset = dataset.rename_files(lambda key: os.path.relpath(
            str(old_path.parent / key), start=str(new_path.parent)))

        with new_path.open('w') as fp:
            yaml.dump(asjsonld(dataset), fp, default_flow_style=False)

        old_path.unlink()

        LinkReference.create(client=client,
                             name='datasets/' + name).set_reference(new_path)
Ejemplo n.º 3
0
def _jsonld(graph, format, *args, **kwargs):
    """Return formatted graph in JSON-LD ``format`` function."""
    import json

    from pyld import jsonld
    from renku.models._jsonld import asjsonld

    output = getattr(jsonld, format)(
        [asjsonld(action) for action in graph.activities.values()])
    return json.dumps(output, indent=2)
Ejemplo n.º 4
0
def jsonld(client, datasets=None):
    """Format datasets as JSON-LD."""
    from renku.models._json import dumps
    from renku.models._jsonld import asjsonld

    datasets = datasets or client.datasets
    data = [
        asjsonld(dataset, basedir=os.path.relpath('.', start=str(path.parent)))
        for path, dataset in datasets.items()
    ]
    click.echo(dumps(data, indent=2))
Ejemplo n.º 5
0
def jsonld(client, records):
    """Format dataset files as JSON-LD.

    :param client: LocalClient instance.
    :param records: Filtered collection.
    """
    from renku.models._json import dumps
    from renku.models._jsonld import asjsonld

    data = [asjsonld(record) for record in records]
    echo_via_pager(dumps(data, indent=2))
Ejemplo n.º 6
0
def test_project_serialization():
    """Test project serialization with JSON-LD context."""
    with freeze_time('2017-03-01 08:00:00') as frozen_time:
        project = Project(name='demo')
        assert project.name == 'demo'
        assert project.created == frozen_time()
        assert project.updated == frozen_time()

    data = asjsonld(project)
    assert data['@type'].endswith('Project')

    context = data['@context']
    assert 'created' in context
Ejemplo n.º 7
0
def jsonld(client, datasets):
    """Format datasets as JSON-LD."""
    from renku.models._json import dumps
    from renku.models._jsonld import asjsonld

    data = [
        asjsonld(dataset,
                 basedir=os.path.relpath('.',
                                         start=str(
                                             dataset.__reference__.parent)))
        for dataset in datasets
    ]
    click.echo(dumps(data, indent=2))
Ejemplo n.º 8
0
    def with_dataset(self, name=None):
        """Yield an editable metadata object for a dataset."""
        from renku.models.refs import LinkReference

        with self.lock:
            path = None
            dataset = None

            if name:
                path = self.renku_datasets_path / name / self.METADATA

                if not path.exists():
                    path = LinkReference(client=self,
                                         name='datasets/' + name).reference

                if path.exists():
                    with path.open('r') as f:
                        source = yaml.load(f) or {}
                    dataset = Dataset.from_jsonld(source)

            if dataset is None:
                source = {}
                dataset = Dataset(name=name)

                path = (self.renku_datasets_path / dataset.identifier.hex /
                        self.METADATA)
                path.parent.mkdir(parents=True, exist_ok=True)

                if name:
                    LinkReference.create(client=self, name='datasets/' +
                                         name).set_reference(path)

            dataset_path = self.path / self.datadir / dataset.name
            dataset_path.mkdir(parents=True, exist_ok=True)

            yield dataset

            source.update(**asjsonld(dataset))

            # TODO
            # if path is None:
            #     path = dataset_path / self.METADATA
            #     if path.exists():
            #         raise ValueError('Dataset already exists')

            with path.open('w') as f:
                yaml.dump(source, f, default_flow_style=False)
Ejemplo n.º 9
0
    def with_dataset(self, name=None):
        """Yield an editable metadata object for a dataset."""
        with self.lock:
            from renku.models._jsonld import asjsonld
            from renku.models.datasets import Dataset
            path = None
            dataset = None

            dataset_path = self.path / self.datadir / name

            if name:
                path = dataset_path / self.METADATA
                if path.exists():
                    with open(path, 'r') as f:
                        source = yaml.load(f) or {}
                    dataset = Dataset.from_jsonld(source)

            if dataset is None:
                source = {}
                dataset = Dataset(name=name)
                try:
                    dataset_path.mkdir(parents=True, exist_ok=True)
                except FileExistsError:
                    raise FileExistsError('This dataset already exists.')

            yield dataset

            source.update(
                **asjsonld(
                    dataset,
                    filter=lambda attr, _: attr.name != 'datadir',
                )
            )

            # TODO
            # if path is None:
            #     path = dataset_path / self.METADATA
            #     if path.exists():
            #         raise ValueError('Dataset already exists')

            with open(path, 'w') as f:
                yaml.dump(source, f, default_flow_style=False)
Ejemplo n.º 10
0
    def with_metadata(self):
        """Yield an editable metadata object."""
        with self.lock:
            from renku.models._jsonld import asjsonld
            from renku.models.projects import Project

            metadata_path = self.renku_metadata_path

            if self.renku_metadata_path.exists():
                with metadata_path.open('r') as f:
                    source = yaml.load(f) or {}
                metadata = Project.from_jsonld(source)
            else:
                source = {}
                metadata = Project()

            yield metadata

            source.update(**asjsonld(metadata))
            with metadata_path.open('w') as f:
                yaml.dump(source, f, default_flow_style=False)
Ejemplo n.º 11
0
def move(ctx, client, sources, destination):
    """Move files and check repository for potential problems."""
    import yaml

    from renku.api._git import _expand_directories
    from renku.models._jsonld import asjsonld

    dst = Path(destination)

    def fmt_path(path):
        """Format path as relative to the client path."""
        return str(Path(path).absolute().relative_to(client.path))

    files = {
        fmt_path(source): fmt_path(file_or_dir)
        for file_or_dir in sources
        for source in _expand_directories((file_or_dir, ))
    }

    def fmt_dst(path):
        """Build a destination path for a source path."""
        return str(dst / os.path.relpath(path, start=files[path]))

    destinations = {source: fmt_dst(source) for source in files}

    # 1. Check .gitignore.
    ignored = client.find_ignored_paths(*destinations.values())
    if ignored:
        click.echo(WARNING + 'Renamed files match .gitignore.\n')
        if click.confirm(
            'Do you want to edit ".gitignore" now?', default=False
        ):
            click.edit(filename=str(client.path / '.gitignore'))

    # 2. Update dataset metadata files.
    with progressbar(
        client.datasets.items(),
        item_show_func=lambda item: str(item[1].short_id) if item else '',
        label='Updating dataset metadata',
        width=0,
    ) as bar:
        for (path, dataset) in bar:
            renames = {}

            for file in dataset.files:
                filepath = fmt_path(os.path.normpath(str(path.parent / file)))

                if filepath in files:
                    renames[file] = os.path.relpath(
                        destinations[filepath], start=str(path.parent)
                    )

            if renames:
                dataset = dataset.rename_files(
                    lambda key: renames.get(key, key)
                )

                with path.open('w') as fp:
                    yaml.dump(asjsonld(dataset), fp, default_flow_style=False)

    # 3. Manage .gitattributes for external storage.
    tracked = tuple(
        path for path, attr in client.find_attr(*files).items()
        if attr.get('filter') == 'lfs'
    )
    client.untrack_paths_from_storage(*tracked)
    existing = client.find_attr(*tracked)
    if existing:
        click.echo(WARNING + 'There are custom .gitattributes.\n')
        if click.confirm(
            'Do you want to edit ".gitattributes" now?', default=False
        ):
            click.edit(filename=str(client.path / '.gitattributes'))

    client.track_paths_in_storage(*(destinations[path] for path in tracked))

    # 4. Handle symlinks.
    dst.parent.mkdir(parents=True, exist_ok=True)

    for source, target in destinations.items():
        src = Path(source)
        if src.is_symlink():
            Path(target).parent.mkdir(parents=True, exist_ok=True)
            Path(target).symlink_to(
                os.path.relpath(
                    str(src.resolve()), start=os.path.dirname(target)
                )
            )
            src.unlink()
            del files[source]

    # Finally move the files.
    final_sources = list(set(files.values()))
    if final_sources:
        run(['git', 'mv'] + final_sources + [destination], check=True)