def test_03_input(instance_path): """Check the essential input parameters.""" whale = Path(instance_path) / 'whale.txt' whale.touch() argv = [ 'echo', '-f', '-i42', '--example-string', 'hello', '--file=whale.txt', ] tool = CommandLineToolFactory(argv, directory=instance_path).generate_tool() assert tool.arguments[0].prefix == '-f' assert tool.inputs[0].default == 42 assert tool.inputs[0].type == 'int' assert tool.inputs[0].inputBinding.prefix == '-i' assert tool.inputs[0].inputBinding.separate is False assert tool.inputs[1].default == 'hello' assert tool.inputs[1].type == 'string' assert tool.inputs[1].inputBinding.prefix == '--example-string' assert tool.inputs[1].inputBinding.separate is True assert tool.inputs[2].default.path == Path('whale.txt') assert tool.inputs[2].type == 'File' assert tool.inputs[2].inputBinding.prefix == '--file=' assert tool.inputs[2].inputBinding.separate is False assert tool.to_argv() == argv
def track_paths_in_storage(self, *paths): """Track paths in the external storage.""" # Calculate which paths can be tracked in lfs track_paths = [] attrs = self.find_attr(*paths) for path in paths: # Do not add files with filter=lfs in .gitattributes if attrs.get(path, {}).get('filter') == 'lfs': continue path = Path(path) if path.is_dir(): track_paths.append(str(path / '**')) elif path.suffix != '.ipynb': # TODO create configurable filter and follow .gitattributes track_paths.append(str(path)) if track_paths: try: call( self._CMD_STORAGE_TRACK + track_paths, stdout=PIPE, stderr=STDOUT, cwd=str(self.path), ) except (KeyboardInterrupt, OSError) as e: raise BadParameter('Couldn\'t run \'git lfs\':\n{0}'.format(e))
def track_paths_in_storage(self, *paths): """Track paths in the external storage.""" if self.use_external_storage and self.external_storage_installed: track_paths = [] attrs = self.find_attr(*paths) for path in paths: # Do not add files with filter=lfs in .gitattributes if attrs.get(path, {}).get('filter') == 'lfs': continue path = Path(path) if path.is_dir(): track_paths.append(str(path / '**')) elif path.suffix != '.ipynb': # TODO create configurable filter and follow .gitattributes track_paths.append(str(path)) call( self._CMD_STORAGE_TRACK + track_paths, stdout=PIPE, stderr=STDOUT, cwd=str(self.path), ) elif self.use_external_storage: raise errors.ExternalStorageNotInstalled(self.repo)
def uninstall(client): """Uninstall Git hooks.""" from git.index.fun import hook_path as get_hook_path for hook in HOOKS: hook_path = Path(get_hook_path(hook, client.repo.git_dir)) if hook_path.exists(): hook_path.unlink()
def test_init_force_in_empty_dir(isolated_runner): """Run init --force in empty directory.""" runner = isolated_runner new_project = Path('test-new-project') assert not new_project.exists() result = runner.invoke(cli.cli, ['init', '--force', 'test-new-project']) assert 0 == result.exit_code
def create_project_config_path( path, mode=0o777, parents=False, exist_ok=False ): """Create new project configuration folder.""" # FIXME check default directory mode project_path = Path(path).absolute().joinpath(RENKU_HOME) project_path.mkdir(mode=mode, parents=parents, exist_ok=exist_ok) return str(project_path)
def set_reference(self, reference): """Set ourselves to the given reference path.""" reference_path = Path(reference).resolve().absolute() reference_path.relative_to(self.client.path) self.path.parent.mkdir(parents=True, exist_ok=True) os.symlink( os.path.relpath(str(reference_path), start=str(self.path.parent)), str(self.path))
def _add_from_url(self, dataset, path, url, nocopy=False, **kwargs): """Process an add from url and return the location on disk.""" u = parse.urlparse(url) if u.scheme not in Dataset.SUPPORTED_SCHEMES: raise NotImplementedError('{} URLs are not supported'.format( u.scheme)) dst = path.joinpath(os.path.basename(url)).absolute() if u.scheme in ('', 'file'): src = Path(u.path).absolute() # if we have a directory, recurse if src.is_dir(): files = {} os.mkdir(dst) for f in src.iterdir(): files.update( self._add_from_url(dataset, dst, f.absolute().as_posix(), nocopy=nocopy)) return files if nocopy: try: os.link(src, dst) except Exception as e: raise Exception('Could not create hard link ' '- retry without nocopy.') from e else: shutil.copy(src, dst) # Do not expose local paths. src = None else: try: response = requests.get(url) dst.write_bytes(response.content) except error.HTTPError as e: # pragma nocover raise e # make the added file read-only mode = dst.stat().st_mode & 0o777 dst.chmod(mode & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)) self.track_paths_in_storage(dst.relative_to(self.path)) dataset_path = self.path / self.datadir / dataset.name result = dst.relative_to(dataset_path).as_posix() return { result: DatasetFile( path=result, url=url, authors=dataset.authors, dataset=dataset.name, ) }
def _cache(cls, app_name): """Return cache file.""" from renku._compat import Path from appdirs import user_cache_dir cache_dir = Path(user_cache_dir(app_name, None)) cache_dir.mkdir(parents=True, exist_ok=True) return cache_dir / cls.STATE_NAME
def file_candidate(self, candidate): """Return a path instance if it exists in current directory.""" candidate = Path(candidate) if not candidate.is_absolute(): candidate = self.directory / candidate if candidate.exists(): return candidate
def _expand_directories(paths): """Expand directory with all files it contains.""" for path in paths: path_ = Path(path) if path_.is_dir(): for expanded in path_.rglob('*'): yield str(expanded) else: yield path
def test_streams_cleanup(runner, project, run): """Test cleanup of standard streams.""" source = Path(project) / 'source.txt' stdout = Path(project) / 'result.txt' with source.open('w') as fp: fp.write('first,second,third') # File outside the Git index should be deleted. with source.open('r') as fp: assert fp.read() == 'first,second,third' assert not stdout.exists() result = runner.invoke(cli.cli, ['status']) # Dirty repository check. assert 1 == result.exit_code # File from the Git index should be restored. repo = git.Repo(project) with stdout.open('w') as fp: fp.write('1') repo.index.add(['result.txt']) with stdout.open('r') as fp: assert fp.read() == '1'
def test_submodule_init(tmpdir_factory, runner, run, project): """Test initializing submodules.""" src_project = Path(str(tmpdir_factory.mktemp('src_project'))) assert 0 == run(args=('init', str(src_project))) woop = src_project / 'woop' with woop.open('w') as fp: fp.write('woop') repo = git.Repo(str(src_project)) repo.git.add('--all') repo.index.commit('Added woop file') assert 0 == run(args=('dataset', 'create', 'foo')) assert 0 == run(args=('dataset', 'add', 'foo', str(woop))) imported_woop = Path(project) / 'data' / 'foo' / woop.name assert imported_woop.exists() dst_project = Path(str(tmpdir_factory.mktemp('dst_project'))) subprocess.call(['git', 'clone', project, str(dst_project)]) subprocess.call(['git', 'lfs', 'install', '--local'], cwd=str(dst_project)) dst_woop = Path(dst_project) / 'data' / 'foo' / 'woop' assert not dst_woop.exists() result = runner.invoke(cli.cli, [ '--path', str(dst_project), 'run', '--no-output', 'wc', str(dst_woop.absolute()) ], catch_exceptions=False) assert 0 == result.exit_code
def from_tool(cls, tool, existing_directories=None): """Create a directory structure based on tool inputs and outputs.""" directories = DirectoryTree() inputs = {input_.id: input_ for input_ in tool.inputs} converters = { 'File': lambda value: Path(value).parent, 'Directory': lambda value: Path(value), } # TODO enable for extra tool inputs when there is no inputBinding # for input_ in tool.inputs: # # NOTE use with CWL 1.1 # # if intput_.type == 'stdin': # # stream = getattr(tool, input_.type) # # directories[stream] # if input_.type == 'File': # directories.add(input_.default.path) # # TODO add Directory for output in tool.outputs: # NOTE output streams should be handled automatically # if output.type in {'stdout', 'stderr'}: # stream = getattr(tool, output.type) # directories.add(stream) if output.type in PATH_OBJECTS: glob = output.outputBinding.glob convert = converters[output.type] # TODO better support for Expression if glob.startswith('$(inputs.'): input_id = glob[len('$(inputs.'):-1] input_ = inputs.get(input_id) if input_ is not None: directories.add(convert(input_.default)) # TODO parametrize directory name directories.add(glob) elif glob: directories.add(convert(glob)) requirement = cls() for directory in directories: if existing_directories and directory not in existing_directories: # Create only existing directories. continue requirement.listing.append( Dirent( entryname=directory, entry=DIRECTORY_EXPRESSION, writable=True, )) if requirement.listing: return requirement
def test_file_modification_during_run(tmpdir, runner, project, client, run): """Test run in isolation.""" script = client.path / 'script.py' output = client.path / 'output' lock = Path(str(tmpdir.join('lock'))) with client.commit(): with script.open('w') as fp: fp.write('import os, time, sys\n' 'open("{lock}", "a")\n' 'while os.path.exists("{lock}"):\n' ' time.sleep(1)\n' 'sys.stdout.write(sys.stdin.read())\n' 'sys.stdout.flush()\n'.format(lock=str(lock))) prefix = [ sys.executable, '-m', 'renku', 'run', '--isolation', ] cmd = [ 'python', script.name, ] previous = client.repo.head.commit with output.open('wb') as stdout: process = subprocess.Popen(prefix + cmd, stdin=subprocess.PIPE, stdout=stdout) while not lock.exists(): time.sleep(1) with script.open('w') as fp: fp.write('print("edited")') lock.unlink() process.communicate(input=b'test') assert 0 == process.wait() with output.open('r') as fp: assert 'test' == fp.read().strip() diff = previous.diff(client.repo.head.commit) modifications = [ modification for modification in diff if modification.change_type == 'M' ] assert 0 == len(modifications)
def file_candidate(self, candidate, ignore=None): """Return a path instance if it exists in current directory.""" if ignore and candidate in ignore: return candidate = Path(candidate) if not candidate.is_absolute(): candidate = self.directory / candidate if candidate.exists(): return candidate.resolve()
def find_project_config_path(path=None): """Find project config path.""" path = Path(path) if path else Path.cwd() abspath = path.absolute() project_path = get_project_config_path(abspath) if project_path: return project_path for parent in abspath.parents: project_path = get_project_config_path(parent) if project_path: return project_path
def test_init(isolated_runner): """Test project initialization.""" runner = isolated_runner # 1. the directory should be automatically created new_project = Path('test-new-project') assert not new_project.exists() result = runner.invoke(cli.cli, ['init', 'test-new-project']) assert 0 == result.exit_code assert new_project.exists() # 2. test project directory creation os.mkdir('test-project') result = runner.invoke(cli.cli, ['init', 'test-project']) assert 0 == result.exit_code assert os.stat(os.path.join('test-project', '.git')) assert os.stat(os.path.join('test-project', '.renku')) # 3. test project init from already existing renku repository os.chdir('test-project') result = runner.invoke(cli.cli, ['init']) assert 0 != result.exit_code # 4. in case of init failure because of existing .git folder # .renku directory should not exist assert not os.path.exists(os.path.join('test-project', '.renku')) result = runner.invoke(cli.cli, ['init', '--force']) assert 0 == result.exit_code assert os.stat(os.path.join('.git')) assert os.stat(os.path.join('.renku')) # 5. check git lfs init options os.chdir('../') shutil.rmtree('test-project') os.mkdir('test-project') os.chdir('test-project') result = runner.invoke(cli.cli, ['init', '--no-external-storage']) with open('.git/config') as f: config = f.read() assert 'filter "lfs"' not in config result = runner.invoke(cli.cli, ['init', '-S']) with open('.git/config') as f: config = f.read() assert 'filter "lfs"' not in config result = runner.invoke(cli.cli, ['init', '--force']) with open('.git/config') as f: config = f.read() assert 'filter "lfs"' in config
def test_output_directory(runner, project, run): """Test detection of output directory.""" cwd = Path(project) data = cwd / 'source' / 'data.txt' source = data.parent source.mkdir(parents=True) data.write_text('data') # Empty destination destination = cwd / 'destination' source_wc = cwd / 'destination_source.wc' # Non empty destination invalid_destination = cwd / 'invalid_destination' invalid_destination.mkdir(parents=True) (invalid_destination / 'non_empty').touch() repo = git.Repo(project) repo.git.add('--all') repo.index.commit('Created source directory') cmd = ['run', 'cp', '-LRf', str(source), str(destination)] result = runner.invoke(cli.cli, cmd, catch_exceptions=False) assert 0 == result.exit_code destination_source = destination / data.name assert destination_source.exists() # check that the output in subdir is added to LFS with (cwd / '.gitattributes').open() as f: gitattr = f.read() assert str(destination.relative_to(cwd)) + '/**' in gitattr assert destination_source.name in subprocess.check_output( ['git', 'lfs', 'ls-files']).decode() cmd = ['run', 'wc'] assert 0 == run(args=cmd, stdin=destination_source, stdout=source_wc) # Make sure the output directory can be recreated assert 0 == run(args=('rerun', str(source_wc))) assert {data.name} == {path.name for path in destination.iterdir()} cmd = ['log', str(source_wc)] result = runner.invoke(cli.cli, cmd, catch_exceptions=False) destination_data = str(Path('destination') / 'data.txt') assert destination_data in result.output, cmd assert ' directory)' in result.output cmd = ['run', 'cp', '-r', str(source), str(invalid_destination)] result = runner.invoke(cli.cli, cmd, catch_exceptions=False) assert 1 == result.exit_code assert not (invalid_destination / data.name).exists()
def test_stdin_and_stdout(argv, instance_path): """Test stdout mapping.""" input_ = Path(instance_path) / 'input.txt' input_.touch() output = Path(instance_path) / 'output.txt' output.touch() factory = CommandLineToolFactory( argv, directory=instance_path, working_dir=instance_path, stdin='input.txt', stdout='output.txt', stderr='error.log', ) assert factory.stdin if len(argv) > 1: assert factory.arguments assert factory.stdout == 'output.txt' assert factory.outputs[0].type == 'stdout' tool = factory.generate_tool() assert tool.to_argv() == argv std_streams = ' < input.txt > output.txt 2> error.log' assert str(tool) == ' '.join(argv) + std_streams
def test_06_params(instance_path): """Test referencing input parameters in other fields.""" hello = Path(instance_path) / 'hello.tar' hello.touch() argv = ['tar', 'xf', 'hello.tar', 'goodbye.txt'] factory = CommandLineToolFactory( argv, directory=instance_path, ) assert factory.inputs[1].default == 'goodbye.txt' assert factory.inputs[1].type == 'string' assert factory.inputs[1].inputBinding.position == 2 goodbye_id = factory.inputs[1].id # simulate run output = Path(instance_path) / 'goodbye.txt' output.touch() parameters = list(factory.guess_outputs([output])) assert parameters[0][0].type == 'File' assert parameters[0][0].outputBinding.glob == \ '$(inputs.{0})'.format(goodbye_id) tool = factory.generate_tool() assert tool.to_argv() == argv
def __attrs_post_init__(self): """Initialize computed attributes.""" #: Configure Renku path. path = Path(self.renku_home) if not path.is_absolute(): path = self.path / path path.relative_to(path) self.renku_path = path #: Create an instance of a Git repository for the given path. try: self.git = GitRepo(str(self.path)) except InvalidGitRepositoryError: self.git = None
def test_workflow_without_outputs(runner, project, run): """Test workflow without outputs.""" repo = git.Repo(project) cwd = Path(project) input_ = cwd / 'input.txt' with input_.open('w') as f: f.write('first') repo.git.add('--all') repo.index.commit('Created input.txt') cmd = ['run', 'cat', '--no-output', input_.name] result = runner.invoke(cli.cli, cmd) assert 0 == result.exit_code cmd = ['status', '--no-output'] result = runner.invoke(cli.cli, cmd) assert 0 == result.exit_code with input_.open('w') as f: f.write('second') repo.git.add('--all') repo.index.commit('Updated input.txt') cmd = ['status', '--no-output'] result = runner.invoke(cli.cli, cmd) assert 1 == result.exit_code assert 0 == run(args=('update', '--no-output')) cmd = ['status', '--no-output'] result = runner.invoke(cli.cli, cmd) assert 0 == result.exit_code
def test_update(runner, project, run): """Test automatic file update.""" cwd = Path(project) data = cwd / 'data' data.mkdir() source = cwd / 'source.txt' output = data / 'result.txt' repo = git.Repo(project) update_and_commit('1', source, repo) assert 0 == run(args=('run', 'wc', '-c'), stdin=source, stdout=output) with output.open('r') as f: assert f.read().strip() == '1' result = runner.invoke(cli.cli, ['status']) assert 0 == result.exit_code update_and_commit('12', source, repo) result = runner.invoke(cli.cli, ['status']) assert 1 == result.exit_code assert 0 == run() result = runner.invoke(cli.cli, ['status']) assert 0 == result.exit_code with output.open('r') as f: assert f.read().strip() == '2' result = runner.invoke(cli.cli, ['log'], catch_exceptions=False) assert '(part of' in result.output, result.output # Source has been updated but output is unchanged. update_and_commit('34', source, repo) result = runner.invoke(cli.cli, ['status']) assert 1 == result.exit_code assert 0 == run() result = runner.invoke(cli.cli, ['status']) assert 0 == result.exit_code with output.open('r') as f: assert f.read().strip() == '2' from renku.cli.log import FORMATS for output_format in FORMATS: # Make sure the log contains the original parent. result = runner.invoke( cli.cli, ['log', '--format', output_format], catch_exceptions=False, ) assert 0 == result.exit_code, output_format assert source.name in result.output, output_format
def connect_file_to_directory(node): """Return step connecting file to a directory.""" process = attr.evolve( LINK_CWL, inputs={ 'input_directory': 'Directory', 'filename': { 'type': 'string', 'default': str(Path(node.path).relative_to(node.parent.path)), }, }) process_run = ProcessRun( commit=node.commit, client=node.client, path=None, process=process, inputs={ node.parent.path: Usage( entity=node.parent, role='input_directory', ), }, outputs={ node.path: 'output_file', }, ) for generated in process_run.generated: nodes[(generated.commit, generated.path)] = generated return process_run
def test_dataset_unlink_file(tmpdir, runner, client): """Test unlinking of file and check removal from dataset""" # create a dataset result = runner.invoke(cli.cli, ['dataset', 'create', 'my-dataset']) assert 0 == result.exit_code assert 'OK' in result.output # create data file new_file = tmpdir.join('datafile.csv') new_file.write('1,2,3') # add data to dataset result = runner.invoke(cli.cli, ['dataset', 'add', 'my-dataset', str(new_file)]) assert 0 == result.exit_code with client.with_dataset(name='my-dataset') as dataset: assert new_file.basename in { Path(file_.path).name for file_ in dataset.files } result = runner.invoke(cli.cli, [ 'dataset', 'unlink', 'my-dataset', '--include', new_file.basename, '-y' ]) assert 0 == result.exit_code with client.with_dataset(name='my-dataset') as dataset: assert new_file.basename not in [ file_.path.name for file_ in dataset.files ]
def test_rerun_with_inputs(runner, project, run): """Test file recreation with specified inputs.""" cwd = Path(project) first = cwd / 'first.txt' second = cwd / 'second.txt' inputs = (first, second) output = cwd / 'output.txt' cmd = [ 'run', 'python', '-S', '-c', 'import random; print(random.random())' ] for file_ in inputs: assert 0 == run(args=cmd, stdout=file_), 'Random number generation.' cmd = ['run', 'cat'] + [str(path) for path in inputs] assert 0 == run(args=cmd, stdout=output) with output.open('r') as f: initial_data = f.read() assert 0 == run(args=('rerun', str(output))) with output.open('r') as f: assert f.read() != initial_data, 'The output should have changed.' # Keep the first file unchanged. with first.open('r') as f: first_data = f.read() assert 0 == run(args=('rerun', '--from', str(first), str(output))) with output.open('r') as f: assert f.read().startswith(first_data)
class LocalClient( RepositoryApiMixin, DatasetsApiMixin, ): """A low-level client for communicating with a local Renku repository. Example: >>> import renku >>> client = renku.LocalClient('.') """ path = attr.ib(converter=lambda arg: Path(arg).resolve().absolute()) @path.default def _default_path(self): """Return default repository path.""" from renku.cli._git import get_git_home return get_git_home() @path.validator def _check_path(self, _, value): """Check the path exists and it is a directory.""" if not (value.exists() and value.is_dir()): raise ValueError('Define an existing directory.')
def test_datasets_ls_files_correct_paths(tmpdir, runner, project): """Test listing of data within dataset and check that paths are correct.""" # create a dataset result = runner.invoke(cli.cli, ['dataset', 'create', 'my-dataset']) assert 0 == result.exit_code assert 'OK' in result.output # create some data paths = [] for i in range(3): new_file = tmpdir.join('file_{0}'.format(i)) new_file.write(str(i)) paths.append(str(new_file)) # add data to dataset result = runner.invoke( cli.cli, ['dataset', 'add', 'my-dataset'] + paths, catch_exceptions=False, ) assert 0 == result.exit_code # check include / exclude filters result = runner.invoke( cli.cli, ['dataset', 'ls-files', '--format=json-ld'] ) assert 0 == result.exit_code output = json.loads(result.output) for record in output: assert Path(record['url']).exists()
def test_deleted_input(runner, project, capsys): """Test deleted input.""" repo = git.Repo(project) cwd = Path(project) input_ = cwd / 'input.txt' with input_.open('w') as f: f.write('first') repo.git.add('--all') repo.index.commit('Created input.txt') cmd = ['run', 'mv', input_.name, 'input.mv'] result = runner.invoke(cli.cli, cmd, catch_exceptions=False) assert 0 == result.exit_code assert not input_.exists() assert Path('input.mv').exists()