def test_output_directory(runner, project, run): """Test detection of output directory.""" cwd = Path(project) data = cwd / 'source' / 'data.txt' source = data.parent source.mkdir(parents=True) data.write_text('data') # Empty destination destination = cwd / 'destination' source_wc = cwd / 'destination_source.wc' # Non empty destination invalid_destination = cwd / 'invalid_destination' invalid_destination.mkdir(parents=True) (invalid_destination / 'non_empty').touch() repo = git.Repo(project) repo.git.add('--all') repo.index.commit('Created source directory') cmd = ['run', 'cp', '-LRf', str(source), str(destination)] result = runner.invoke(cli.cli, cmd, catch_exceptions=False) assert 0 == result.exit_code destination_source = destination / data.name assert destination_source.exists() # check that the output in subdir is added to LFS with (cwd / '.gitattributes').open() as f: gitattr = f.read() assert str(destination.relative_to(cwd)) + '/**' in gitattr assert destination_source.name in subprocess.check_output( ['git', 'lfs', 'ls-files']).decode() cmd = ['run', 'wc'] assert 0 == run(args=cmd, stdin=destination_source, stdout=source_wc) # Make sure the output directory can be recreated assert 0 == run(args=('rerun', str(source_wc))) assert {data.name} == {path.name for path in destination.iterdir()} cmd = ['log', str(source_wc)] result = runner.invoke(cli.cli, cmd, catch_exceptions=False) destination_data = str(Path('destination') / 'data.txt') assert destination_data in result.output, cmd assert ' directory)' in result.output cmd = ['run', 'cp', '-r', str(source), str(invalid_destination)] result = runner.invoke(cli.cli, cmd, catch_exceptions=False) assert 1 == result.exit_code assert not (invalid_destination / data.name).exists()
def get_relative_url(self, url): """Determine if the repo url should be relative.""" # Check if the default remote of the branch we are on is on # the same server as the submodule. If so, use a relative path # instead of an absolute URL. try: branch_remote = self.repo.config_reader().get( 'branch "{}"'.format(self.repo.active_branch.name), 'remote') except NoSectionError: branch_remote = 'origin' try: remote = self.repo.remote(branch_remote) except ValueError: warnings.warn( 'Remote {} not found, cannot check for relative URL.'.format( branch_remote)) return url remote_url = GitURL.parse(remote.url) submodule_url = GitURL.parse(url) if remote_url.hostname == submodule_url.hostname: # construct the relative path url = Path('../../{}'.format(submodule_url.owner) if remote_url. owner == submodule_url.owner else '..') url = str(url / submodule_url.name) return url
def test_run_in_isolation(runner, project, client, run): """Test run in isolation.""" import filelock cwd = Path(project) with client.commit(): with (cwd / '.gitignore').open('a') as f: f.write('lock') prefix = [ 'run', '--no-output', ] cmd = [ 'python', '-S', '-c', 'import os, sys; sys.exit(1 if os.path.exists("lock") else 0)' ] head = client.repo.head.commit.hexsha with filelock.FileLock('lock'): assert 1 == run(args=prefix + cmd) assert client.repo.head.commit.hexsha == head assert 0 == run(prefix + ['--isolation'] + cmd) assert client.repo.head.commit.hexsha != head
def makefile(graph): """Format graph as Makefile.""" from renku._compat import Path from renku.models.provenance.activities import ProcessRun, WorkflowRun for activity in graph.activities.values(): if not isinstance(activity, ProcessRun): continue elif isinstance(activity, WorkflowRun): steps = activity.subprocesses.values() else: steps = [activity] for step in steps: click.echo(' '.join(step.outputs) + ': ' + ' '.join(step.inputs)) tool = step.process basedir = Path(step.path).parent click.echo( '\t@' + ' '.join(tool.to_argv()) + ' ' + ' '.join( tool.STD_STREAMS_REPR[key] + ' ' + str(path) for key, path in tool._std_streams(basedir=basedir).items() ) )
def test_git_pre_commit_hook(runner, project, capsys): """Test detection of output edits.""" result = runner.invoke(cli.cli, ['githooks', 'install']) assert result.exit_code == 0 assert 'Hook already exists.' in result.output repo = git.Repo(project) cwd = Path(project) output = cwd / 'output.txt' result = runner.invoke(cli.cli, ['run', 'touch', output.name]) assert result.exit_code == 0 with output.open('w') as f: f.write('hello') repo.git.add('--all') with pytest.raises(git.HookExecutionError) as error: repo.index.commit('hello') assert output.name in error.stdout result = runner.invoke(cli.cli, ['githooks', 'uninstall']) assert result.exit_code == 0 repo.index.commit('hello')
def test_base_command_detection(instance_path): """Test base command detection.""" hello = Path(instance_path) / 'hello.tar' hello.touch() argv = ['tar', 'xf', 'hello.tar'] tool = CommandLineToolFactory(argv, directory=instance_path).generate_tool() assert tool.baseCommand == ['tar', 'xf'] assert tool.inputs[0].default.path == Path('hello.tar') assert tool.inputs[0].type == 'File' assert tool.inputs[0].inputBinding.prefix is None assert tool.inputs[0].inputBinding.separate is True assert tool.to_argv() == argv
def test_output_directory(runner, project): """Test detection of output directory.""" cwd = Path(project) data = cwd / 'source' / 'data.txt' source = data.parent source.mkdir(parents=True) data.touch() # Empty destination destination = cwd / 'destination' # Non empty destination invalid_destination = cwd / 'invalid_destination' invalid_destination.mkdir(parents=True) (invalid_destination / 'non_empty').touch() repo = git.Repo(project) repo.git.add('--all') repo.index.commit('Created source directory') cmd = ['run', 'cp', '-r', str(source), str(destination)] result = runner.invoke(cli.cli, cmd, catch_exceptions=False) assert result.exit_code == 0 assert (destination / data.name).exists() cmd = ['run', 'cp', '-r', str(source), str(invalid_destination)] result = runner.invoke(cli.cli, cmd, catch_exceptions=False) assert result.exit_code == 1 assert not (invalid_destination / data.name).exists()
def test_03_input(instance_path): """Check the essential input parameters.""" whale = Path(instance_path) / 'whale.txt' whale.touch() argv = [ 'echo', '-f', '-i42', '--example-string', 'hello', '--file=whale.txt', ] tool = CommandLineToolFactory( argv, directory=instance_path, working_dir=instance_path ).generate_tool() assert tool.arguments[0].to_argv() == ['-f'] assert tool.inputs[0].default == 42 assert tool.inputs[0].type == 'int' assert tool.inputs[0].inputBinding.prefix == '-i' assert tool.inputs[0].inputBinding.separate is False assert tool.inputs[1].default == 'hello' assert tool.inputs[1].type == 'string' assert tool.inputs[1].inputBinding.prefix == '--example-string' assert tool.inputs[1].inputBinding.separate is True assert tool.inputs[2].default.path.samefile(whale) assert tool.inputs[2].type == 'File' assert tool.inputs[2].inputBinding.prefix == '--file=' assert tool.inputs[2].inputBinding.separate is False assert tool.to_argv() == argv
def add(self, value): """Create a safe directory from a value.""" path = value if isinstance(value, Path) else Path(str(value)) if path and path != path.parent: destination = self for part in path.parts: destination = destination.setdefault(part, DirectoryTree())
def test_input_directory(instance_path): """Test input directory.""" cwd = Path(instance_path) src = cwd / 'src' src.mkdir(parents=True) for i in range(5): (src / str(i)).touch() argv = ['tar', 'czvf', 'src.tar', 'src'] factory = CommandLineToolFactory( argv, directory=instance_path, working_dir=instance_path, ) src_tar = src / 'src.tar' src_tar.touch() tool = factory.generate_tool() assert tool.to_argv() == argv assert tool.inputs[0].type == 'string' assert tool.inputs[0].default == src_tar.name assert tool.inputs[1].type == 'Directory' assert tool.inputs[1].default.path.samefile(src)
def track_paths_in_storage(self, *paths): """Track paths in the external storage.""" if self.use_external_storage and self.external_storage_installed: track_paths = [] attrs = self.find_attr(*paths) for path in paths: # Do not add files with filter=lfs in .gitattributes if attrs.get(path, {}).get('filter') == 'lfs': continue path = Path(path) if path.is_dir(): track_paths.append(str(path / '**')) elif path.suffix != '.ipynb': # TODO create configurable filter and follow .gitattributes track_paths.append(str(path)) call( self._CMD_STORAGE_TRACK + track_paths, stdout=PIPE, stderr=STDOUT, cwd=str(self.path), ) elif self.use_external_storage: raise errors.ExternalStorageNotInstalled(self.repo)
def test_workflow_without_outputs(runner, project, run): """Test workflow without outputs.""" repo = git.Repo(project) cwd = Path(project) input_ = cwd / 'input.txt' with input_.open('w') as f: f.write('first') repo.git.add('--all') repo.index.commit('Created input.txt') cmd = ['run', 'cat', '--no-output', input_.name] result = runner.invoke(cli.cli, cmd) assert 0 == result.exit_code cmd = ['status', '--no-output'] result = runner.invoke(cli.cli, cmd) assert 0 == result.exit_code with input_.open('w') as f: f.write('second') repo.git.add('--all') repo.index.commit('Updated input.txt') cmd = ['status', '--no-output'] result = runner.invoke(cli.cli, cmd) assert 1 == result.exit_code assert 0 == run(args=('update', '--no-output')) cmd = ['status', '--no-output'] result = runner.invoke(cli.cli, cmd) assert 0 == result.exit_code
def track_paths_in_storage(self, *paths): """Track paths in the external storage.""" # Calculate which paths can be tracked in lfs track_paths = [] attrs = self.find_attr(*paths) for path in paths: # Do not add files with filter=lfs in .gitattributes if attrs.get(path, {}).get('filter') == 'lfs': continue path = Path(path) if path.is_dir(): track_paths.append(str(path / '**')) elif path.suffix != '.ipynb': # TODO create configurable filter and follow .gitattributes track_paths.append(str(path)) if track_paths: try: call( self._CMD_STORAGE_TRACK + track_paths, stdout=PIPE, stderr=STDOUT, cwd=str(self.path), ) except (KeyboardInterrupt, OSError) as e: raise BadParameter('Couldn\'t run \'git lfs\':\n{0}'.format(e))
def test_input_directory(runner, project, capsys): """Test detection of input directory.""" repo = git.Repo(project) cwd = Path(project) output = cwd / 'output.txt' inputs = cwd / 'inputs' inputs.mkdir(parents=True) (inputs / 'first').touch() repo.git.add('--all') repo.index.commit('Created inputs') with output.open('w') as stdout: with contextlib.redirect_stdout(stdout): try: cli.cli.main( args=('run', 'ls', str(inputs)), prog_name=runner.get_default_prog_name(cli.cli), ) except SystemExit as e: assert e.code in {None, 0} with output.open('r') as f: assert 'first\n' == f.read() (inputs / 'second').touch() repo.git.add('--all') repo.index.commit('Added second input') assert 0 == _run_update(runner, capsys, args=('update', output.name)) with output.open('r') as f: assert 'first\nsecond\n' == f.read()
def connect_file_to_directory(node): """Return step connecting file to a directory.""" process = attr.evolve( LINK_CWL, inputs={ 'input_directory': 'Directory', 'filename': { 'type': 'string', 'default': str(Path(node.path).relative_to(node.parent.path)), }, }) process_run = ProcessRun( commit=node.commit, client=node.client, path=None, process=process, inputs={ node.parent.path: Usage( entity=node.parent, role='input_directory', ), }, outputs={ node.path: 'output_file', }, ) for generated in process_run.generated: nodes[(generated.commit, generated.path)] = generated return process_run
def test_datasets_ls_files_correct_paths(tmpdir, runner, project): """Test listing of data within dataset and check that paths are correct.""" # create a dataset result = runner.invoke(cli.cli, ['dataset', 'create', 'my-dataset']) assert 0 == result.exit_code assert 'OK' in result.output # create some data paths = [] for i in range(3): new_file = tmpdir.join('file_{0}'.format(i)) new_file.write(str(i)) paths.append(str(new_file)) # add data to dataset result = runner.invoke( cli.cli, ['dataset', 'add', 'my-dataset'] + paths, catch_exceptions=False, ) assert 0 == result.exit_code # check include / exclude filters result = runner.invoke( cli.cli, ['dataset', 'ls-files', '--format=json-ld'] ) assert 0 == result.exit_code output = json.loads(result.output) for record in output: assert Path(record['url']).exists()
def test_deleted_input(runner, project, capsys): """Test deleted input.""" repo = git.Repo(project) cwd = Path(project) input_ = cwd / 'input.txt' with input_.open('w') as f: f.write('first') repo.git.add('--all') repo.index.commit('Created input.txt') cmd = ['run', 'mv', input_.name, 'input.mv'] result = runner.invoke(cli.cli, cmd, catch_exceptions=False) assert 0 == result.exit_code assert not input_.exists() assert Path('input.mv').exists()
def test_rerun_with_inputs(runner, project, run): """Test file recreation with specified inputs.""" cwd = Path(project) first = cwd / 'first.txt' second = cwd / 'second.txt' inputs = (first, second) output = cwd / 'output.txt' cmd = [ 'run', 'python', '-S', '-c', 'import random; print(random.random())' ] for file_ in inputs: assert 0 == run(args=cmd, stdout=file_), 'Random number generation.' cmd = ['run', 'cat'] + [str(path) for path in inputs] assert 0 == run(args=cmd, stdout=output) with output.open('r') as f: initial_data = f.read() assert 0 == run(args=('rerun', str(output))) with output.open('r') as f: assert f.read() != initial_data, 'The output should have changed.' # Keep the first file unchanged. with first.open('r') as f: first_data = f.read() assert 0 == run(args=('rerun', '--from', str(first), str(output))) with output.open('r') as f: assert f.read().startswith(first_data)
def test_dataset_unlink_file(tmpdir, runner, client): """Test unlinking of file and check removal from dataset""" # create a dataset result = runner.invoke(cli.cli, ['dataset', 'create', 'my-dataset']) assert 0 == result.exit_code assert 'OK' in result.output # create data file new_file = tmpdir.join('datafile.csv') new_file.write('1,2,3') # add data to dataset result = runner.invoke(cli.cli, ['dataset', 'add', 'my-dataset', str(new_file)]) assert 0 == result.exit_code with client.with_dataset(name='my-dataset') as dataset: assert new_file.basename in { Path(file_.path).name for file_ in dataset.files } result = runner.invoke(cli.cli, [ 'dataset', 'unlink', 'my-dataset', '--include', new_file.basename, '-y' ]) assert 0 == result.exit_code with client.with_dataset(name='my-dataset') as dataset: assert new_file.basename not in [ file_.path.name for file_ in dataset.files ]
def test_update(runner, project, run): """Test automatic file update.""" cwd = Path(project) data = cwd / 'data' data.mkdir() source = cwd / 'source.txt' output = data / 'result.txt' repo = git.Repo(project) update_and_commit('1', source, repo) assert 0 == run(args=('run', 'wc', '-c'), stdin=source, stdout=output) with output.open('r') as f: assert f.read().strip() == '1' result = runner.invoke(cli.cli, ['status']) assert 0 == result.exit_code update_and_commit('12', source, repo) result = runner.invoke(cli.cli, ['status']) assert 1 == result.exit_code assert 0 == run() result = runner.invoke(cli.cli, ['status']) assert 0 == result.exit_code with output.open('r') as f: assert f.read().strip() == '2' result = runner.invoke(cli.cli, ['log'], catch_exceptions=False) assert '(part of' in result.output, result.output # Source has been updated but output is unchanged. update_and_commit('34', source, repo) result = runner.invoke(cli.cli, ['status']) assert 1 == result.exit_code assert 0 == run() result = runner.invoke(cli.cli, ['status']) assert 0 == result.exit_code with output.open('r') as f: assert f.read().strip() == '2' from renku.cli.log import FORMATS for output_format in FORMATS: # Make sure the log contains the original parent. result = runner.invoke( cli.cli, ['log', '--format', output_format], catch_exceptions=False, ) assert 0 == result.exit_code, output_format assert source.name in result.output, output_format
class LocalClient( RepositoryApiMixin, DatasetsApiMixin, ): """A low-level client for communicating with a local Renku repository. Example: >>> import renku >>> client = renku.LocalClient('.') """ path = attr.ib(converter=lambda arg: Path(arg).resolve().absolute()) @path.default def _default_path(self): """Return default repository path.""" from renku.cli._git import get_git_home return get_git_home() @path.validator def _check_path(self, _, value): """Check the path exists and it is a directory.""" if not (value.exists() and value.is_dir()): raise ValueError('Define an existing directory.')
def test_siblings_update(runner, project, run): """Test detection of siblings during update.""" cwd = Path(project) parent = cwd / 'parent.txt' brother = cwd / 'brother.txt' sister = cwd / 'sister.txt' siblings = {brother, sister} repo = git.Repo(project) def update_source(data): """Update parent.txt.""" with parent.open('w') as fp: fp.write(data) repo.git.add('--all') repo.index.commit('Updated parent.txt') update_source('1') # The output files do not exist. assert not any(sibling.exists() for sibling in siblings) cmd = ['run', 'tee', 'brother.txt'] assert 0 == run(args=cmd, stdin=parent, stdout=sister) # The output file is copied from the source. for sibling in siblings: with sibling.open('r') as f: assert f.read().strip() == '1', sibling update_source('2') # Siblings must be updated together. for sibling in siblings: assert 1 == run(args=('update', sibling.name)) # Update brother and check the sister has not been changed. assert 0 == run(args=('update', '--with-siblings', brother.name)) for sibling in siblings: with sibling.open('r') as f: assert f.read().strip() == '2', sibling update_source('3') # Siblings kept together even when one is removed. repo.index.remove([brother.name], working_tree=True) repo.index.commit('Brother removed') assert not brother.exists() # Update should find also missing siblings. assert 1 == run(args=('update', )) assert 0 == run(args=('update', '--with-siblings')) for sibling in siblings: with sibling.open('r') as f: assert f.read().strip() == '3', sibling
def uninstall(client): """Uninstall Git hooks.""" from git.index.fun import hook_path as get_hook_path for hook in HOOKS: hook_path = Path(get_hook_path(hook, client.repo.git_dir)) if hook_path.exists(): hook_path.unlink()
def create_project_config_path( path, mode=0o777, parents=False, exist_ok=False ): """Create new project configuration folder.""" # FIXME check default directory mode project_path = Path(path).absolute().joinpath(RENKU_HOME) project_path.mkdir(mode=mode, parents=parents, exist_ok=exist_ok) return str(project_path)
def test_init_force_in_empty_dir(isolated_runner): """Run init --force in empty directory.""" runner = isolated_runner new_project = Path('test-new-project') assert not new_project.exists() result = runner.invoke(cli.cli, ['init', '--force', 'test-new-project']) assert 0 == result.exit_code
def set_reference(self, reference): """Set ourselves to the given reference path.""" reference_path = Path(reference).resolve().absolute() reference_path.relative_to(self.client.path) self.path.parent.mkdir(parents=True, exist_ok=True) os.symlink( os.path.relpath(str(reference_path), start=str(self.path.parent)), str(self.path))
def _add_from_url(self, dataset, path, url, nocopy=False, **kwargs): """Process an add from url and return the location on disk.""" u = parse.urlparse(url) if u.scheme not in Dataset.SUPPORTED_SCHEMES: raise NotImplementedError('{} URLs are not supported'.format( u.scheme)) dst = path.joinpath(os.path.basename(url)).absolute() if u.scheme in ('', 'file'): src = Path(u.path).absolute() # if we have a directory, recurse if src.is_dir(): files = {} os.mkdir(dst) for f in src.iterdir(): files.update( self._add_from_url(dataset, dst, f.absolute().as_posix(), nocopy=nocopy)) return files if nocopy: try: os.link(src, dst) except Exception as e: raise Exception('Could not create hard link ' '- retry without nocopy.') from e else: shutil.copy(src, dst) # Do not expose local paths. src = None else: try: response = requests.get(url) dst.write_bytes(response.content) except error.HTTPError as e: # pragma nocover raise e # make the added file read-only mode = dst.stat().st_mode & 0o777 dst.chmod(mode & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)) self.track_paths_in_storage(dst.relative_to(self.path)) dataset_path = self.path / self.datadir / dataset.name result = dst.relative_to(dataset_path).as_posix() return { result: DatasetFile( path=result, url=url, authors=dataset.authors, dataset=dataset.name, ) }
def _cache(cls, app_name): """Return cache file.""" from renku._compat import Path from appdirs import user_cache_dir cache_dir = Path(user_cache_dir(app_name, None)) cache_dir.mkdir(parents=True, exist_ok=True) return cache_dir / cls.STATE_NAME
def _expand_directories(paths): """Expand directory with all files it contains.""" for path in paths: path_ = Path(path) if path_.is_dir(): for expanded in path_.rglob('*'): yield str(expanded) else: yield path
def file_candidate(self, candidate): """Return a path instance if it exists in current directory.""" candidate = Path(candidate) if not candidate.is_absolute(): candidate = self.directory / candidate if candidate.exists(): return candidate