Beispiel #1
0
def test_output_directory(runner, project, run):
    """Test detection of output directory."""
    cwd = Path(project)
    data = cwd / 'source' / 'data.txt'
    source = data.parent
    source.mkdir(parents=True)
    data.write_text('data')

    # Empty destination
    destination = cwd / 'destination'
    source_wc = cwd / 'destination_source.wc'
    # Non empty destination
    invalid_destination = cwd / 'invalid_destination'
    invalid_destination.mkdir(parents=True)
    (invalid_destination / 'non_empty').touch()

    repo = git.Repo(project)
    repo.git.add('--all')
    repo.index.commit('Created source directory')

    cmd = ['run', 'cp', '-LRf', str(source), str(destination)]
    result = runner.invoke(cli.cli, cmd, catch_exceptions=False)
    assert 0 == result.exit_code

    destination_source = destination / data.name
    assert destination_source.exists()

    # check that the output in subdir is added to LFS
    with (cwd / '.gitattributes').open() as f:
        gitattr = f.read()
    assert str(destination.relative_to(cwd)) + '/**' in gitattr
    assert destination_source.name in subprocess.check_output(
        ['git', 'lfs', 'ls-files']).decode()

    cmd = ['run', 'wc']
    assert 0 == run(args=cmd, stdin=destination_source, stdout=source_wc)

    # Make sure the output directory can be recreated
    assert 0 == run(args=('rerun', str(source_wc)))
    assert {data.name} == {path.name for path in destination.iterdir()}

    cmd = ['log', str(source_wc)]
    result = runner.invoke(cli.cli, cmd, catch_exceptions=False)
    destination_data = str(Path('destination') / 'data.txt')
    assert destination_data in result.output, cmd
    assert ' directory)' in result.output

    cmd = ['run', 'cp', '-r', str(source), str(invalid_destination)]
    result = runner.invoke(cli.cli, cmd, catch_exceptions=False)
    assert 1 == result.exit_code
    assert not (invalid_destination / data.name).exists()
Beispiel #2
0
    def get_relative_url(self, url):
        """Determine if the repo url should be relative."""
        # Check if the default remote of the branch we are on is on
        # the same server as the submodule. If so, use a relative path
        # instead of an absolute URL.
        try:
            branch_remote = self.repo.config_reader().get(
                'branch "{}"'.format(self.repo.active_branch.name), 'remote')
        except NoSectionError:
            branch_remote = 'origin'

        try:
            remote = self.repo.remote(branch_remote)
        except ValueError:
            warnings.warn(
                'Remote {} not found, cannot check for relative URL.'.format(
                    branch_remote))
            return url

        remote_url = GitURL.parse(remote.url)
        submodule_url = GitURL.parse(url)

        if remote_url.hostname == submodule_url.hostname:
            # construct the relative path
            url = Path('../../{}'.format(submodule_url.owner) if remote_url.
                       owner == submodule_url.owner else '..')
            url = str(url / submodule_url.name)
        return url
Beispiel #3
0
def test_run_in_isolation(runner, project, client, run):
    """Test run in isolation."""
    import filelock

    cwd = Path(project)
    with client.commit():
        with (cwd / '.gitignore').open('a') as f:
            f.write('lock')

    prefix = [
        'run',
        '--no-output',
    ]
    cmd = [
        'python', '-S', '-c',
        'import os, sys; sys.exit(1 if os.path.exists("lock") else 0)'
    ]

    head = client.repo.head.commit.hexsha

    with filelock.FileLock('lock'):
        assert 1 == run(args=prefix + cmd)
        assert client.repo.head.commit.hexsha == head

        assert 0 == run(prefix + ['--isolation'] + cmd)
        assert client.repo.head.commit.hexsha != head
Beispiel #4
0
def makefile(graph):
    """Format graph as Makefile."""
    from renku._compat import Path
    from renku.models.provenance.activities import ProcessRun, WorkflowRun

    for activity in graph.activities.values():
        if not isinstance(activity, ProcessRun):
            continue
        elif isinstance(activity, WorkflowRun):
            steps = activity.subprocesses.values()
        else:
            steps = [activity]

        for step in steps:
            click.echo(' '.join(step.outputs) + ': ' + ' '.join(step.inputs))

            tool = step.process
            basedir = Path(step.path).parent

            click.echo(
                '\t@' + ' '.join(tool.to_argv()) + ' ' + ' '.join(
                    tool.STD_STREAMS_REPR[key] + ' ' + str(path)
                    for key, path in tool._std_streams(basedir=basedir).items()
                )
            )
Beispiel #5
0
def test_git_pre_commit_hook(runner, project, capsys):
    """Test detection of output edits."""
    result = runner.invoke(cli.cli, ['githooks', 'install'])
    assert result.exit_code == 0
    assert 'Hook already exists.' in result.output

    repo = git.Repo(project)
    cwd = Path(project)
    output = cwd / 'output.txt'

    result = runner.invoke(cli.cli, ['run', 'touch', output.name])
    assert result.exit_code == 0

    with output.open('w') as f:
        f.write('hello')

    repo.git.add('--all')
    with pytest.raises(git.HookExecutionError) as error:
        repo.index.commit('hello')
        assert output.name in error.stdout

    result = runner.invoke(cli.cli, ['githooks', 'uninstall'])
    assert result.exit_code == 0

    repo.index.commit('hello')
Beispiel #6
0
def test_base_command_detection(instance_path):
    """Test base command detection."""
    hello = Path(instance_path) / 'hello.tar'
    hello.touch()

    argv = ['tar', 'xf', 'hello.tar']
    tool = CommandLineToolFactory(argv,
                                  directory=instance_path).generate_tool()

    assert tool.baseCommand == ['tar', 'xf']
    assert tool.inputs[0].default.path == Path('hello.tar')
    assert tool.inputs[0].type == 'File'
    assert tool.inputs[0].inputBinding.prefix is None
    assert tool.inputs[0].inputBinding.separate is True

    assert tool.to_argv() == argv
Beispiel #7
0
def test_output_directory(runner, project):
    """Test detection of output directory."""
    cwd = Path(project)
    data = cwd / 'source' / 'data.txt'
    source = data.parent
    source.mkdir(parents=True)
    data.touch()

    # Empty destination
    destination = cwd / 'destination'
    # Non empty destination
    invalid_destination = cwd / 'invalid_destination'
    invalid_destination.mkdir(parents=True)
    (invalid_destination / 'non_empty').touch()

    repo = git.Repo(project)
    repo.git.add('--all')
    repo.index.commit('Created source directory')

    cmd = ['run', 'cp', '-r', str(source), str(destination)]
    result = runner.invoke(cli.cli, cmd, catch_exceptions=False)
    assert result.exit_code == 0
    assert (destination / data.name).exists()

    cmd = ['run', 'cp', '-r', str(source), str(invalid_destination)]
    result = runner.invoke(cli.cli, cmd, catch_exceptions=False)
    assert result.exit_code == 1
    assert not (invalid_destination / data.name).exists()
Beispiel #8
0
def test_03_input(instance_path):
    """Check the essential input parameters."""
    whale = Path(instance_path) / 'whale.txt'
    whale.touch()

    argv = [
        'echo',
        '-f',
        '-i42',
        '--example-string',
        'hello',
        '--file=whale.txt',
    ]
    tool = CommandLineToolFactory(
        argv, directory=instance_path, working_dir=instance_path
    ).generate_tool()

    assert tool.arguments[0].to_argv() == ['-f']

    assert tool.inputs[0].default == 42
    assert tool.inputs[0].type == 'int'
    assert tool.inputs[0].inputBinding.prefix == '-i'
    assert tool.inputs[0].inputBinding.separate is False

    assert tool.inputs[1].default == 'hello'
    assert tool.inputs[1].type == 'string'
    assert tool.inputs[1].inputBinding.prefix == '--example-string'
    assert tool.inputs[1].inputBinding.separate is True

    assert tool.inputs[2].default.path.samefile(whale)
    assert tool.inputs[2].type == 'File'
    assert tool.inputs[2].inputBinding.prefix == '--file='
    assert tool.inputs[2].inputBinding.separate is False

    assert tool.to_argv() == argv
Beispiel #9
0
 def add(self, value):
     """Create a safe directory from a value."""
     path = value if isinstance(value, Path) else Path(str(value))
     if path and path != path.parent:
         destination = self
         for part in path.parts:
             destination = destination.setdefault(part, DirectoryTree())
Beispiel #10
0
def test_input_directory(instance_path):
    """Test input directory."""
    cwd = Path(instance_path)
    src = cwd / 'src'
    src.mkdir(parents=True)

    for i in range(5):
        (src / str(i)).touch()

    argv = ['tar', 'czvf', 'src.tar', 'src']
    factory = CommandLineToolFactory(
        argv,
        directory=instance_path,
        working_dir=instance_path,
    )

    src_tar = src / 'src.tar'
    src_tar.touch()

    tool = factory.generate_tool()
    assert tool.to_argv() == argv

    assert tool.inputs[0].type == 'string'
    assert tool.inputs[0].default == src_tar.name
    assert tool.inputs[1].type == 'Directory'
    assert tool.inputs[1].default.path.samefile(src)
Beispiel #11
0
    def track_paths_in_storage(self, *paths):
        """Track paths in the external storage."""
        if self.use_external_storage and self.external_storage_installed:
            track_paths = []
            attrs = self.find_attr(*paths)

            for path in paths:
                # Do not add files with filter=lfs in .gitattributes
                if attrs.get(path, {}).get('filter') == 'lfs':
                    continue

                path = Path(path)
                if path.is_dir():
                    track_paths.append(str(path / '**'))
                elif path.suffix != '.ipynb':
                    # TODO create configurable filter and follow .gitattributes
                    track_paths.append(str(path))

            call(
                self._CMD_STORAGE_TRACK + track_paths,
                stdout=PIPE,
                stderr=STDOUT,
                cwd=str(self.path),
            )
        elif self.use_external_storage:
            raise errors.ExternalStorageNotInstalled(self.repo)
Beispiel #12
0
def test_workflow_without_outputs(runner, project, run):
    """Test workflow without outputs."""
    repo = git.Repo(project)
    cwd = Path(project)
    input_ = cwd / 'input.txt'
    with input_.open('w') as f:
        f.write('first')

    repo.git.add('--all')
    repo.index.commit('Created input.txt')

    cmd = ['run', 'cat', '--no-output', input_.name]
    result = runner.invoke(cli.cli, cmd)
    assert 0 == result.exit_code

    cmd = ['status', '--no-output']
    result = runner.invoke(cli.cli, cmd)
    assert 0 == result.exit_code

    with input_.open('w') as f:
        f.write('second')

    repo.git.add('--all')
    repo.index.commit('Updated input.txt')

    cmd = ['status', '--no-output']
    result = runner.invoke(cli.cli, cmd)
    assert 1 == result.exit_code

    assert 0 == run(args=('update', '--no-output'))

    cmd = ['status', '--no-output']
    result = runner.invoke(cli.cli, cmd)
    assert 0 == result.exit_code
Beispiel #13
0
    def track_paths_in_storage(self, *paths):
        """Track paths in the external storage."""
        # Calculate which paths can be tracked in lfs
        track_paths = []
        attrs = self.find_attr(*paths)

        for path in paths:
            # Do not add files with filter=lfs in .gitattributes
            if attrs.get(path, {}).get('filter') == 'lfs':
                continue

            path = Path(path)
            if path.is_dir():
                track_paths.append(str(path / '**'))
            elif path.suffix != '.ipynb':
                # TODO create configurable filter and follow .gitattributes
                track_paths.append(str(path))

        if track_paths:
            try:
                call(
                    self._CMD_STORAGE_TRACK + track_paths,
                    stdout=PIPE,
                    stderr=STDOUT,
                    cwd=str(self.path),
                )
            except (KeyboardInterrupt, OSError) as e:
                raise BadParameter('Couldn\'t run \'git lfs\':\n{0}'.format(e))
Beispiel #14
0
def test_input_directory(runner, project, capsys):
    """Test detection of input directory."""
    repo = git.Repo(project)
    cwd = Path(project)
    output = cwd / 'output.txt'
    inputs = cwd / 'inputs'
    inputs.mkdir(parents=True)
    (inputs / 'first').touch()

    repo.git.add('--all')
    repo.index.commit('Created inputs')

    with output.open('w') as stdout:
        with contextlib.redirect_stdout(stdout):
            try:
                cli.cli.main(
                    args=('run', 'ls', str(inputs)),
                    prog_name=runner.get_default_prog_name(cli.cli),
                )
            except SystemExit as e:
                assert e.code in {None, 0}

    with output.open('r') as f:
        assert 'first\n' == f.read()

    (inputs / 'second').touch()
    repo.git.add('--all')
    repo.index.commit('Added second input')

    assert 0 == _run_update(runner, capsys, args=('update', output.name))
    with output.open('r') as f:
        assert 'first\nsecond\n' == f.read()
Beispiel #15
0
        def connect_file_to_directory(node):
            """Return step connecting file to a directory."""
            process = attr.evolve(
                LINK_CWL,
                inputs={
                    'input_directory': 'Directory',
                    'filename': {
                        'type':
                        'string',
                        'default':
                        str(Path(node.path).relative_to(node.parent.path)),
                    },
                })
            process_run = ProcessRun(
                commit=node.commit,
                client=node.client,
                path=None,
                process=process,
                inputs={
                    node.parent.path:
                    Usage(
                        entity=node.parent,
                        role='input_directory',
                    ),
                },
                outputs={
                    node.path: 'output_file',
                },
            )

            for generated in process_run.generated:
                nodes[(generated.commit, generated.path)] = generated

            return process_run
Beispiel #16
0
def test_datasets_ls_files_correct_paths(tmpdir, runner, project):
    """Test listing of data within dataset and check that paths are correct."""
    # create a dataset
    result = runner.invoke(cli.cli, ['dataset', 'create', 'my-dataset'])
    assert 0 == result.exit_code
    assert 'OK' in result.output

    # create some data
    paths = []
    for i in range(3):
        new_file = tmpdir.join('file_{0}'.format(i))
        new_file.write(str(i))
        paths.append(str(new_file))

    # add data to dataset
    result = runner.invoke(
        cli.cli,
        ['dataset', 'add', 'my-dataset'] + paths,
        catch_exceptions=False,
    )
    assert 0 == result.exit_code

    # check include / exclude filters
    result = runner.invoke(
        cli.cli, ['dataset', 'ls-files', '--format=json-ld']
    )
    assert 0 == result.exit_code

    output = json.loads(result.output)
    for record in output:
        assert Path(record['url']).exists()
Beispiel #17
0
def test_deleted_input(runner, project, capsys):
    """Test deleted input."""
    repo = git.Repo(project)
    cwd = Path(project)
    input_ = cwd / 'input.txt'
    with input_.open('w') as f:
        f.write('first')

    repo.git.add('--all')
    repo.index.commit('Created input.txt')

    cmd = ['run', 'mv', input_.name, 'input.mv']
    result = runner.invoke(cli.cli, cmd, catch_exceptions=False)
    assert 0 == result.exit_code
    assert not input_.exists()
    assert Path('input.mv').exists()
Beispiel #18
0
def test_rerun_with_inputs(runner, project, run):
    """Test file recreation with specified inputs."""
    cwd = Path(project)
    first = cwd / 'first.txt'
    second = cwd / 'second.txt'
    inputs = (first, second)

    output = cwd / 'output.txt'

    cmd = [
        'run', 'python', '-S', '-c', 'import random; print(random.random())'
    ]

    for file_ in inputs:
        assert 0 == run(args=cmd, stdout=file_), 'Random number generation.'

    cmd = ['run', 'cat'] + [str(path) for path in inputs]
    assert 0 == run(args=cmd, stdout=output)

    with output.open('r') as f:
        initial_data = f.read()

    assert 0 == run(args=('rerun', str(output)))

    with output.open('r') as f:
        assert f.read() != initial_data, 'The output should have changed.'

    # Keep the first file unchanged.
    with first.open('r') as f:
        first_data = f.read()

    assert 0 == run(args=('rerun', '--from', str(first), str(output)))

    with output.open('r') as f:
        assert f.read().startswith(first_data)
Beispiel #19
0
def test_dataset_unlink_file(tmpdir, runner, client):
    """Test unlinking of file and check removal from dataset"""
    # create a dataset
    result = runner.invoke(cli.cli, ['dataset', 'create', 'my-dataset'])
    assert 0 == result.exit_code
    assert 'OK' in result.output

    # create data file
    new_file = tmpdir.join('datafile.csv')
    new_file.write('1,2,3')

    # add data to dataset
    result = runner.invoke(cli.cli,
                           ['dataset', 'add', 'my-dataset',
                            str(new_file)])
    assert 0 == result.exit_code

    with client.with_dataset(name='my-dataset') as dataset:
        assert new_file.basename in {
            Path(file_.path).name
            for file_ in dataset.files
        }

    result = runner.invoke(cli.cli, [
        'dataset', 'unlink', 'my-dataset', '--include', new_file.basename, '-y'
    ])
    assert 0 == result.exit_code

    with client.with_dataset(name='my-dataset') as dataset:
        assert new_file.basename not in [
            file_.path.name for file_ in dataset.files
        ]
Beispiel #20
0
def test_update(runner, project, run):
    """Test automatic file update."""
    cwd = Path(project)
    data = cwd / 'data'
    data.mkdir()
    source = cwd / 'source.txt'
    output = data / 'result.txt'

    repo = git.Repo(project)

    update_and_commit('1', source, repo)

    assert 0 == run(args=('run', 'wc', '-c'), stdin=source, stdout=output)

    with output.open('r') as f:
        assert f.read().strip() == '1'

    result = runner.invoke(cli.cli, ['status'])
    assert 0 == result.exit_code

    update_and_commit('12', source, repo)

    result = runner.invoke(cli.cli, ['status'])
    assert 1 == result.exit_code

    assert 0 == run()

    result = runner.invoke(cli.cli, ['status'])
    assert 0 == result.exit_code

    with output.open('r') as f:
        assert f.read().strip() == '2'

    result = runner.invoke(cli.cli, ['log'], catch_exceptions=False)
    assert '(part of' in result.output, result.output

    # Source has been updated but output is unchanged.
    update_and_commit('34', source, repo)

    result = runner.invoke(cli.cli, ['status'])
    assert 1 == result.exit_code

    assert 0 == run()

    result = runner.invoke(cli.cli, ['status'])
    assert 0 == result.exit_code

    with output.open('r') as f:
        assert f.read().strip() == '2'

    from renku.cli.log import FORMATS
    for output_format in FORMATS:
        # Make sure the log contains the original parent.
        result = runner.invoke(
            cli.cli,
            ['log', '--format', output_format],
            catch_exceptions=False,
        )
        assert 0 == result.exit_code, output_format
        assert source.name in result.output, output_format
Beispiel #21
0
class LocalClient(
        RepositoryApiMixin,
        DatasetsApiMixin,
):
    """A low-level client for communicating with a local Renku repository.

    Example:

        >>> import renku
        >>> client = renku.LocalClient('.')

    """

    path = attr.ib(converter=lambda arg: Path(arg).resolve().absolute())

    @path.default
    def _default_path(self):
        """Return default repository path."""
        from renku.cli._git import get_git_home
        return get_git_home()

    @path.validator
    def _check_path(self, _, value):
        """Check the path exists and it is a directory."""
        if not (value.exists() and value.is_dir()):
            raise ValueError('Define an existing directory.')
Beispiel #22
0
def test_siblings_update(runner, project, run):
    """Test detection of siblings during update."""
    cwd = Path(project)
    parent = cwd / 'parent.txt'
    brother = cwd / 'brother.txt'
    sister = cwd / 'sister.txt'
    siblings = {brother, sister}

    repo = git.Repo(project)

    def update_source(data):
        """Update parent.txt."""
        with parent.open('w') as fp:
            fp.write(data)

        repo.git.add('--all')
        repo.index.commit('Updated parent.txt')

    update_source('1')

    # The output files do not exist.
    assert not any(sibling.exists() for sibling in siblings)

    cmd = ['run', 'tee', 'brother.txt']
    assert 0 == run(args=cmd, stdin=parent, stdout=sister)

    # The output file is copied from the source.
    for sibling in siblings:
        with sibling.open('r') as f:
            assert f.read().strip() == '1', sibling

    update_source('2')

    # Siblings must be updated together.
    for sibling in siblings:
        assert 1 == run(args=('update', sibling.name))

    # Update brother and check the sister has not been changed.
    assert 0 == run(args=('update', '--with-siblings', brother.name))

    for sibling in siblings:
        with sibling.open('r') as f:
            assert f.read().strip() == '2', sibling

    update_source('3')

    # Siblings kept together even when one is removed.
    repo.index.remove([brother.name], working_tree=True)
    repo.index.commit('Brother removed')

    assert not brother.exists()

    # Update should find also missing siblings.
    assert 1 == run(args=('update', ))
    assert 0 == run(args=('update', '--with-siblings'))

    for sibling in siblings:
        with sibling.open('r') as f:
            assert f.read().strip() == '3', sibling
Beispiel #23
0
def uninstall(client):
    """Uninstall Git hooks."""
    from git.index.fun import hook_path as get_hook_path

    for hook in HOOKS:
        hook_path = Path(get_hook_path(hook, client.repo.git_dir))
        if hook_path.exists():
            hook_path.unlink()
Beispiel #24
0
def create_project_config_path(
    path, mode=0o777, parents=False, exist_ok=False
):
    """Create new project configuration folder."""
    # FIXME check default directory mode
    project_path = Path(path).absolute().joinpath(RENKU_HOME)
    project_path.mkdir(mode=mode, parents=parents, exist_ok=exist_ok)
    return str(project_path)
Beispiel #25
0
def test_init_force_in_empty_dir(isolated_runner):
    """Run init --force in empty directory."""
    runner = isolated_runner

    new_project = Path('test-new-project')
    assert not new_project.exists()
    result = runner.invoke(cli.cli, ['init', '--force', 'test-new-project'])
    assert 0 == result.exit_code
Beispiel #26
0
 def set_reference(self, reference):
     """Set ourselves to the given reference path."""
     reference_path = Path(reference).resolve().absolute()
     reference_path.relative_to(self.client.path)
     self.path.parent.mkdir(parents=True, exist_ok=True)
     os.symlink(
         os.path.relpath(str(reference_path), start=str(self.path.parent)),
         str(self.path))
Beispiel #27
0
    def _add_from_url(self, dataset, path, url, nocopy=False, **kwargs):
        """Process an add from url and return the location on disk."""
        u = parse.urlparse(url)

        if u.scheme not in Dataset.SUPPORTED_SCHEMES:
            raise NotImplementedError('{} URLs are not supported'.format(
                u.scheme))

        dst = path.joinpath(os.path.basename(url)).absolute()

        if u.scheme in ('', 'file'):
            src = Path(u.path).absolute()

            # if we have a directory, recurse
            if src.is_dir():
                files = {}
                os.mkdir(dst)
                for f in src.iterdir():
                    files.update(
                        self._add_from_url(dataset,
                                           dst,
                                           f.absolute().as_posix(),
                                           nocopy=nocopy))
                return files
            if nocopy:
                try:
                    os.link(src, dst)
                except Exception as e:
                    raise Exception('Could not create hard link '
                                    '- retry without nocopy.') from e
            else:
                shutil.copy(src, dst)

            # Do not expose local paths.
            src = None
        else:
            try:
                response = requests.get(url)
                dst.write_bytes(response.content)
            except error.HTTPError as e:  # pragma nocover
                raise e

        # make the added file read-only
        mode = dst.stat().st_mode & 0o777
        dst.chmod(mode & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH))

        self.track_paths_in_storage(dst.relative_to(self.path))
        dataset_path = self.path / self.datadir / dataset.name
        result = dst.relative_to(dataset_path).as_posix()
        return {
            result:
            DatasetFile(
                path=result,
                url=url,
                authors=dataset.authors,
                dataset=dataset.name,
            )
        }
Beispiel #28
0
    def _cache(cls, app_name):
        """Return cache file."""
        from renku._compat import Path
        from appdirs import user_cache_dir

        cache_dir = Path(user_cache_dir(app_name, None))
        cache_dir.mkdir(parents=True, exist_ok=True)

        return cache_dir / cls.STATE_NAME
Beispiel #29
0
def _expand_directories(paths):
    """Expand directory with all files it contains."""
    for path in paths:
        path_ = Path(path)
        if path_.is_dir():
            for expanded in path_.rglob('*'):
                yield str(expanded)
        else:
            yield path
    def file_candidate(self, candidate):
        """Return a path instance if it exists in current directory."""
        candidate = Path(candidate)

        if not candidate.is_absolute():
            candidate = self.directory / candidate

        if candidate.exists():
            return candidate