def test_import_from_renku_project(tmpdir, client, runner): """Test an imported dataset from other renku repos will have metadata.""" from renku.core.management import LocalClient REMOTE = 'https://dev.renku.ch/gitlab/virginiafriedrich/datasets-test.git' path = tmpdir.strpath os.environ['GIT_LFS_SKIP_SMUDGE'] = '1' git.Repo.clone_from(REMOTE, path, recursive=True) remote_client = LocalClient(path) remote = read_dataset_file_metadata( remote_client, 'zhbikes', '2019_verkehrszaehlungen_werte_fussgaenger_velo.csv' ) result = runner.invoke( cli, [ 'dataset', 'add', '--create', 'remote-dataset', '-s', 'data/zhbikes/2019_verkehrszaehlungen_werte_fussgaenger_velo.csv', '-d', 'file', '--ref', 'b973db5', REMOTE ], catch_exceptions=False, ) assert 0 == result.exit_code metadata = read_dataset_file_metadata(client, 'remote-dataset', 'file') assert metadata.creator[0].name == remote.creator[0].name assert metadata.based_on._id == remote._id assert metadata.based_on._label == remote._label assert metadata.based_on.path == remote.path assert metadata.based_on.based_on is None assert metadata.based_on.url == REMOTE
def find_record(self, uri, client=None): """Retrieves a dataset from Renku. :raises: ``LookupError`` :param uri: URL :return: ``DataverseRecord`` """ from renku.core.management import LocalClient same_as, kg_urls = self._get_dataset_info(uri) project_url = None failed_urls = [] for kg_url in kg_urls: kg_datasets_url, ssh_url, https_url = self._get_project_urls(kg_url) # Check if the project contains the dataset if same_as is None: # Dataset is in the project dataset_id = self._extract_dataset_id(uri) else: # Dataset is sameAs one of the datasets in the project datasets = self._query_knowledge_graph(kg_datasets_url) ids = [ds["identifier"] for ds in datasets if ds["sameAs"] == same_as] if not ids: continue dataset_id = ids[0] # Check if we can clone the project for url in (ssh_url, https_url): try: repo, repo_path = client.prepare_git_repo(url) except errors.GitError: failed_urls.append(url) else: project_url = url break if project_url is not None: break if project_url is None: if failed_urls: message = "Cannot clone remote projects:\n\t" + "\n\t".join(failed_urls) else: message = "Cannot find any project for the dataset." raise errors.ParameterError(message, param_hint=uri) remote_client = LocalClient(repo_path) self._migrate_project(remote_client) datasets = [d for d in remote_client.datasets.values() if urllib.parse.quote(d.uid, safe="") == dataset_id] if len(datasets) == 0: raise errors.ParameterError( 'Cannot find dataset with id "{}" in project "{}"'.format(dataset_id, project_url) ) if len(datasets) > 1: raise errors.ParameterError('Found multiple datasets with id "{}"'.format(dataset_id)) return _RenkuRecordSerializer(datasets[0], project_url, remote_client)
def client_with_lfs_warning(project): """Return a Renku repository with lfs warnings active.""" from renku.core.management import LocalClient client = LocalClient(path=project) client.set_value('renku', 'lfs_threshold', '0b') client.repo.git.add('.renku/renku.ini') client.repo.index.commit('update renku.ini') yield client
def client_with_lfs_warning(project): """Return a Renku repository with lfs warnings active.""" from renku.core.management import LocalClient client = LocalClient(path=project) client.set_value("renku", "lfs_threshold", "0b") client.repo.git.add(".renku/renku.ini") client.repo.index.commit("update renku.ini") yield client
def test_modified_tool(runner, project, run): """Test detection of modified tool.""" from renku.core.management import LocalClient client = LocalClient(project) repo = client.repo greeting = client.path / 'greeting.txt' assert 0 == run(args=('run', 'echo', 'hello'), stdout=greeting) cmd = ['status'] result = runner.invoke(cli, cmd) assert 0 == result.exit_code # There should be only one command line tool. tools = list(client.workflow_path.glob('*_echo.cwl')) assert 1 == len(tools) tool_path = tools[0] with tool_path.open('r') as f: command_line_tool = CWLClass.from_cwl(yaml.safe_load(f)) # Simulate a manual edit. command_line_tool.inputs[0].default = 'ahoj' command_line_tool.stdout = 'pozdrav.txt' with tool_path.open('w') as f: yaml.dump(ascwl( command_line_tool, filter=lambda _, x: x is not None, basedir=client.workflow_path, ), stream=f, default_flow_style=False) repo.git.add('--all') repo.index.commit('Modified tool', skip_hooks=True) assert 0 == run() output = client.path / 'pozdrav.txt' assert output.exists() with output.open('r') as f: assert 'ahoj\n' == f.read() cmd = ['status'] result = runner.invoke(cli, cmd) assert 0 == result.exit_code
def new_func(*args, **kwargs): ctx = click.get_current_context(silent=True) if ctx is None: client = LocalClient( path=default_path(), renku_home=RENKU_HOME, external_storage_requested=True, ) ctx = click.Context(click.Command(method)) else: client = ctx.ensure_object(LocalClient) stack = contextlib.ExitStack() # Handle --isolation option: if get_git_isolation(): client = stack.enter_context(client.worktree()) if requires_migration: check_for_migration(client) transaction = client.transaction( clean=clean, commit=commit, commit_empty=commit_empty, commit_message=kwargs.get('commit_message', None), commit_only=commit_only, ignore_std_streams=ignore_std_streams, raise_if_empty=raise_if_empty, up_to_date=up_to_date, ) stack.enter_context(transaction) if lock or (lock is None and commit): stack.enter_context(client.lock) result = None if ctx: with stack: result = ctx.invoke(method, client, *args, **kwargs) return result
def client(project): """Return a Renku repository.""" from renku.core.management import LocalClient original_get_value = LocalClient.get_value def mocked_get_value(self, section, key, local_only=False, global_only=False): """We don't want lfs warnings in tests.""" if key == 'show_lfs_message': return 'False' return original_get_value(self, section, key, local_only, global_only) LocalClient.get_value = mocked_get_value yield LocalClient(path=project) LocalClient.get_value = original_get_value
def local_client(): """Add a Renku local client.""" from renku.core.management import LocalClient with tempfile.TemporaryDirectory() as tempdir: yield LocalClient(path=tempdir)
def add_client(doctest_namespace): """Add Renku client to doctest namespace.""" from renku.core.management import LocalClient doctest_namespace['client'] = LocalClient(path=tempfile.mkdtemp())
def _migrate_submodule_based_datasets(client): from renku.core.management import LocalClient from renku.core.management.migrate import is_project_unsupported submodules = client.repo.submodules if not submodules: return for s in submodules: try: s.update() except GitError: pass submodules_urls = {s.path: s.url for s in submodules} repo_paths = [] symlinks = [] for dataset in client.datasets.values(): for file_ in dataset.files: path = client.path / file_.path if not path.is_symlink(): continue target = path.resolve() if '/.renku/vendors/' not in str(target): continue repo = Repo(target.parent, search_parent_directories=True) repo_path = repo.working_dir if repo_path not in repo_paths: repo_paths.append(repo_path) symlinks.append((file_.path, target, repo_path)) if not symlinks: return remote_clients = {p: LocalClient(p) for p in repo_paths} for remote_client in remote_clients.values(): if not is_project_unsupported(remote_client): migrate(remote_client) metadata = {} for path, target, repo_path in symlinks: remote_client = remote_clients[repo_path] path_within_repo = target.relative_to(repo_path) repo_is_remote = '.renku/vendors/local' not in repo_path based_on = None submodule_path = Path(repo_path).relative_to(client.path) url = submodules_urls.get(str(submodule_path), '') if repo_is_remote: based_on = _fetch_file_metadata(remote_client, path_within_repo) if based_on: based_on.url = url based_on.based_on = None else: based_on = DatasetFile.from_revision(remote_client, path=path_within_repo, url=url) else: if url: full_path = Path(url) / path_within_repo rel_path = os.path.relpath(full_path, client.path) url = f'file://{rel_path}' metadata[path] = (based_on, url) path = client.path / path path.unlink() try: shutil.move(target, path) except FileNotFoundError: raise errors.InvalidFileOperation(f'File was not found: {target}') for s in submodules: if s.path.startswith('.renku/vendors/'): try: s.remove(force=True) except ValueError: pass for dataset in client.datasets.values(): for file_ in dataset.files: if file_.path in metadata: based_on, url = metadata[file_.path] file_.based_on = based_on file_.url = remove_credentials(url) dataset.to_yaml()
def client(project): """Return a Renku repository.""" from renku.core.management import LocalClient yield LocalClient(path=project)