def integration_repo(headers, url_components): """With integration repo helper.""" from renku.core.utils.contexts import chdir with chdir(integration_repo_path(headers, url_components)): repo = Repo('.') yield repo
def dataset_add_remote_file(cache, user, user_job_id, project_id, create_dataset, commit_message, name, url): """Add a remote file to a specified dataset.""" user = cache.ensure_user(user) worker_log.debug((f"executing dataset add remote " f"file job for {user.user_id}:{user.fullname}")) user_job = cache.get_job(user, user_job_id) user_job.in_progress() try: worker_log.debug(f"checking metadata for project {project_id}") project = cache.get_project(user, project_id) with chdir(project.abs_path): urls = url if isinstance(url, list) else [url] worker_log.debug(f"adding files {urls} to dataset {name}") add_file(urls, name, create=create_dataset, commit_message=commit_message) worker_log.debug("operation successful - syncing with remote") _, remote_branch = repo_sync(Repo(project.abs_path), remote="origin") user_job.update_extras("remote_branch", remote_branch) user_job.complete() worker_log.debug("job completed") except (HTTPError, BaseException, GitCommandError, RenkuException) as exp: user_job.fail_job(str(exp)) # Reraise exception, so we see trace in job metadata # and in metrics as failed job. raise exp
def unlink_file_view(user_data, cache): """Unlink a file from a dataset.""" ctx = DatasetUnlinkRequest().load(request.json) include = ctx.get("include_filter") exclude = ctx.get("exclude_filter") user = cache.ensure_user(user_data) project = cache.get_project(user, ctx["project_id"]) if ctx.get("commit_message") is None: if include and exclude: filters = "-I {0} -X {1}".format(include, exclude) elif not include and exclude: filters = "-X {0}".format(exclude) else: filters = "-I {0}".format(include) ctx["commit_message"] = "service: unlink dataset {0} {1}".format( ctx["name"], filters) with chdir(project.abs_path): records = file_unlink( name=ctx["name"], include=ctx.get("include_filters"), exclude=ctx.get("exclude_filters"), yes=True, interactive=False, commit_message=ctx["commit_message"], ) unlinked = [record.path for record in records] return result_response(DatasetUnlinkResponseRPC(), {"unlinked": unlinked})
def dataset_import( cache, user, user_job_id, project_id, dataset_uri, name=None, extract=False, timeout=None, ): """Job for dataset import.""" user = cache.ensure_user(user) worker_log.debug(f"executing dataset import job for {user.user_id}:{user.fullname}") user_job = cache.get_job(user, user_job_id) user_job.in_progress() try: worker_log.debug(f"retrieving metadata for project {project_id}") project = cache.get_project(user, project_id) with chdir(project.abs_path): worker_log.debug(f"project found in cache - importing dataset {dataset_uri}") import_dataset( dataset_uri, name, extract, commit_message=f"service: dataset import {dataset_uri}", progress=DatasetImportJobProcess(cache, user_job), ) worker_log.debug("operation successful - syncing with remote") _, remote_branch = repo_sync(Repo(project.abs_path), remote="origin") user_job.update_extras("remote_branch", remote_branch) user_job.complete() worker_log.debug("job completed") except (HTTPError, ParameterError, RenkuException, GitCommandError) as exp: user_job.fail_job(str(exp)) # Reraise exception, so we see trace in job metadata # and in metrics as failed job. raise exp
def unlink_file_view(user_data, cache): """Unlink a file from a dataset.""" ctx = DatasetUnlinkRequest().load(request.json) include = ctx.get('include_filter') exclude = ctx.get('exclude_filter') user = cache.ensure_user(user_data) project = cache.get_project(user, ctx['project_id']) if ctx.get('commit_message') is None: if include and exclude: filters = '-I {0} -X {0}'.format(include, exclude) elif not include and exclude: filters = '-X {0}'.format(exclude) else: filters = '-I {0}'.format(include) ctx['commit_message'] = ('service: unlink dataset {0} {1}'.format( ctx['short_name'], filters)) with chdir(project.abs_path): records = file_unlink(short_name=ctx['short_name'], include=ctx.get('include_filters'), exclude=ctx.get('exclude_filters'), yes=True, interactive=False, commit_message=ctx['commit_message']) unlinked = [record.path for record in records] return result_response(DatasetUnlinkResponseRPC(), {'unlinked': unlinked})
def edit_dataset_view(user_data, cache): """Edit dataset metadata.""" ctx = DatasetEditRequest().load(request.json) user = cache.ensure_user(user_data) project = cache.get_project(user, ctx['project_id']) if ctx.get('commit_message') is None: ctx['commit_message'] = 'service: dataset edit {0}'.format( ctx['short_name']) with chdir(project.abs_path): edited, warnings = edit_dataset(ctx['short_name'], ctx.get('title'), ctx.get('description'), ctx.get('creators'), keywords=ctx.get('keywords'), commit_message=ctx['commit_message']) return result_response( DatasetEditResponseRPC(), { 'edited': {field: ctx.get(field) for field in edited}, 'warnings': warnings })
def create_dataset_view(user, cache): """Create a new dataset in a project.""" ctx = DatasetCreateRequest().load(request.json) project = cache.get_project(user, ctx['project_id']) project_path = make_project_path(user, project) if not project_path: return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'message': 'invalid project_id argument', }) with chdir(project_path): create_dataset( ctx['dataset_name'], commit_message=ctx['commit_message'], creators=ctx.get('creators'), description=ctx.get('description'), ) if not repo_sync(project_path): return jsonify( error={ 'code': INTERNAL_FAILURE_ERROR_CODE, 'reason': 'push to remote failed silently - try again' }) return jsonify(DatasetCreateResponseRPC().load( {'result': DatasetCreateResponse().load(ctx, unknown=EXCLUDE)}))
def edit_dataset_view(user_data, cache): """Edit dataset metadata.""" ctx = DatasetEditRequest().load(request.json) user = cache.ensure_user(user_data) project = cache.get_project(user, ctx["project_id"]) if ctx.get("commit_message") is None: ctx["commit_message"] = "service: dataset edit {0}".format(ctx["name"]) with chdir(project.abs_path): edited, warnings = edit_dataset( ctx["name"], ctx.get("title"), ctx.get("description"), ctx.get("creators"), keywords=ctx.get("keywords"), commit_message=ctx["commit_message"], ) return result_response( DatasetEditResponseRPC(), { "edited": {field: ctx.get(field) for field in edited}, "warnings": warnings })
def add_file_to_dataset_view(user_data, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) user = cache.ensure_user(user_data) project = cache.get_project(user, ctx['project_id']) if not ctx['commit_message']: ctx['commit_message'] = 'service: dataset add {0}'.format( ctx['short_name']) local_paths = [] for _file in ctx['files']: local_path = None if 'file_url' in _file: commit_message = '{0}{1}'.format(ctx['commit_message'], _file['file_url']) job = cache.make_job(user) _file['job_id'] = job.job_id with enqueue_retry(DATASETS_JOB_QUEUE) as queue: queue.enqueue(dataset_add_remote_file, user_data, job.job_id, project.project_id, ctx['create_dataset'], commit_message, ctx['short_name'], _file['file_url']) continue if 'file_id' in _file: file = cache.get_file(user, _file['file_id']) local_path = file.abs_path elif 'file_path' in _file: local_path = project.abs_path / Path(_file['file_path']) if not local_path or not local_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, 'invalid file reference: {0}'.format(json.dumps(_file))) ctx['commit_message'] += ' {0}'.format(local_path.name) local_paths.append(str(local_path)) if local_paths: with chdir(project.abs_path): add_file(local_paths, ctx['short_name'], create=ctx['create_dataset'], force=ctx['force'], commit_message=ctx['commit_message']) try: _, ctx['remote_branch'] = repo_sync(Repo(project.abs_path), remote='origin') except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, 'repo sync failed') return result_response(DatasetAddResponseRPC(), ctx)
def test_unlink_default(directory_tree, client): """Test unlink default behaviour.""" with chdir(client.path): create_dataset("dataset") add_file([directory_tree.join("dir2").strpath], "dataset") with pytest.raises(ParameterError): file_unlink("dataset", (), ())
def datapack_tar(directory_tree): """Returns dummy data folder as a tar archive.""" from renku.core.utils.contexts import chdir workspace_dir = tempfile.TemporaryDirectory() with chdir(workspace_dir.name): shutil.make_archive('datapack', 'tar', str(directory_tree)) yield Path(workspace_dir.name) / 'datapack.tar'
def test_cli_initialization_no_err_help(cmd, runner): """Test allowed commands within non-renku repository.""" from renku.core.utils.contexts import chdir sys.argv = cmd with tempfile.TemporaryDirectory() as tmpdir: with chdir(tmpdir): result = runner.invoke(cli, cmd) assert 0 == result.exit_code
def datapack_zip(directory_tree): """Returns dummy data folder as a zip archive.""" from renku.core.utils.contexts import chdir workspace_dir = tempfile.TemporaryDirectory() with chdir(workspace_dir.name): shutil.make_archive("datapack", "zip", str(directory_tree)) yield Path(workspace_dir.name) / "datapack.zip"
def list_datasets_view(user, cache): """List all datasets in project.""" ctx = DatasetListRequest().load(request.args) project = cache.get_project(cache.ensure_user(user), ctx['project_id']) with chdir(project.abs_path): ctx['datasets'] = list_datasets() return result_response(DatasetListResponseRPC(), ctx)
def list_dataset_files_view(user, cache): """List files in a dataset.""" ctx = DatasetFilesListRequest().load(request.args) project = cache.get_project(cache.ensure_user(user), ctx['project_id']) with chdir(project.abs_path): ctx['files'] = list_files(datasets=[ctx['short_name']]) return result_response(DatasetFilesListResponseRPC(), ctx)
def add_file_to_dataset_view(user, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) project = cache.get_project(user, ctx['project_id']) project_path = make_project_path(user, project) if not project_path: return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'message': 'invalid project_id: {0}'.format(ctx['project_id']), }) if not ctx['commit_message']: ctx['commit_message'] = 'service: dataset add {0}'.format( ctx['dataset_name']) local_paths = [] for _file in ctx['files']: local_path = None if 'file_id' in _file: file = cache.get_file(user, _file['file_id']) local_path = make_file_path(user, file) elif 'file_path' in _file: local_path = project_path / Path(_file['file_path']) if not local_path or not local_path.exists(): return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'message': 'invalid file reference: {0}'.format( local_path.relative_to(project_path)) }) ctx['commit_message'] += ' {0}'.format(local_path.name) local_paths.append(str(local_path)) with chdir(project_path): add_file(local_paths, ctx['dataset_name'], create=ctx['create_dataset'], commit_message=ctx['commit_message']) if not repo_sync(project_path): return jsonify(error={ 'code': INTERNAL_FAILURE_ERROR_CODE, 'message': 'repo sync failed' }) return jsonify(DatasetAddResponseRPC().load( {'result': DatasetAddResponse().load(ctx, unknown=EXCLUDE)}))
def migration_check_project_view(user_data, cache): """Migrate specified project.""" user = cache.ensure_user(user_data) project = cache.get_project(user, request.json['project_id']) with chdir(project.abs_path): migration_required, project_supported = migrations_check() return result_response( ProjectMigrationCheckResponseRPC(), { 'migration_required': migration_required, 'project_supported': project_supported })
def test_cli_initialization_err(cmd, runner): """Test correct exception raise within non-renku repository.""" from renku.core.utils.contexts import chdir with tempfile.TemporaryDirectory() as tmpdir: with chdir(tmpdir): result = runner.invoke(cli, ["--disable-version-check"] + cmd) assert 2 == result.exit_code expected_output = ("Error: `.` is not a renku repository.\n" "To initialize this as a " "renku repository use: `renku init`\n") assert expected_output == result.output
def subdirectory(request): """Runs tests in root directory and a subdirectory.""" from renku.core.utils.contexts import chdir if request.param != '.': path = Path(request.param) / '.gitkeep' path.parent.mkdir(parents=True, exist_ok=True) path.touch() Repo().git.add(str(path)) Repo().index.commit('Create subdirectory') with chdir(request.param): yield
def subdirectory(project, request): """Runs tests in root directory and a subdirectory.""" from renku.core.utils.contexts import chdir if request.param != ".": path = Path(request.param) / ".gitkeep" path.parent.mkdir(parents=True, exist_ok=True) path.touch() Repo().git.add(str(path)) Repo().index.commit("Create subdirectory", skip_hooks=True) with chdir(request.param): yield Path(request.param).resolve()
def list_dataset_files_view(user, cache): """List files in a dataset.""" ctx = DatasetFilesListRequest().load(request.args) project = cache.get_project(cache.ensure_user(user), ctx['project_id']) if not project.abs_path.exists(): return error_response(INVALID_PARAMS_ERROR_CODE, 'invalid project_id argument') with chdir(project.abs_path): ctx['files'] = list_files(datasets=[ctx['dataset_name']]) return result_response(DatasetFilesListResponseRPC(), ctx)
def test_run_from_non_root(runner, client, cwd): path = client.path / cwd path.mkdir(parents=True, exist_ok=True) with chdir(path): result = runner.invoke(cli, ['dataset']) assert 0 == result.exit_code assert 'Run CLI commands only from project\'s root' in result.output result = runner.invoke(cli, ['help']) assert 0 == result.exit_code assert 'Run CLI commands only from project' not in result.output result = runner.invoke(cli, ['dataset']) assert 0 == result.exit_code assert 'Run CLI commands only from project\'s root' not in result.output
def add_file_to_dataset_view(user, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) user = cache.ensure_user(user) project = cache.get_project(user, ctx['project_id']) if not project.abs_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, 'invalid project_id: {0}'.format(ctx['project_id'])) if not ctx['commit_message']: ctx['commit_message'] = 'service: dataset add {0}'.format( ctx['dataset_name']) local_paths = [] for _file in ctx['files']: local_path = None if 'file_id' in _file: file = cache.get_file(user, _file['file_id']) local_path = file.abs_path elif 'file_path' in _file: local_path = project.abs_path / Path(_file['file_path']) if not local_path or not local_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, 'invalid file reference: {0}'.format(json.dumps(_file))) ctx['commit_message'] += ' {0}'.format(local_path.name) local_paths.append(str(local_path)) with chdir(project.abs_path): add_file(local_paths, ctx['dataset_name'], create=ctx['create_dataset'], commit_message=ctx['commit_message']) if not repo_sync(project.abs_path): return error_response(INTERNAL_FAILURE_ERROR_CODE, 'repo sync failed') return result_response(DatasetAddResponseRPC(), ctx)
def migrate_project_view(user_data, cache): """Migrate specified project.""" user = cache.ensure_user(user_data) project = cache.get_project(user, request.json['project_id']) messages = [] def collect_message(msg): """Collect migration message.""" messages.append(msg) with chdir(project.abs_path): was_migrated = migrate_project(progress_callback=collect_message) return result_response(ProjectMigrateResponseRPC(), { 'messages': messages, 'was_migrated': was_migrated })
def remove_dataset_view(user, cache): """Remove a dataset from a project.""" ctx = DatasetRemoveRequest().load(request.json) project = cache.get_project(cache.ensure_user(user), ctx["project_id"]) if not project.abs_path.exists(): return error_response(INVALID_PARAMS_ERROR_CODE, "invalid project_id argument") with chdir(project.abs_path): dataset_remove([ctx["name"]], commit_message=ctx["commit_message"]) try: _, ctx["remote_branch"] = repo_sync(Repo(project.abs_path), remote="origin") except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, "repo sync failed") return result_response(DatasetRemoveResponseRPC(), ctx)
def test_renku_clone_with_config(tmpdir): """Test cloning of a Renku repo and existence of required settings.""" REMOTE = 'https://dev.renku.ch/gitlab/virginiafriedrich/datasets-test.git' with chdir(tmpdir): renku_clone( REMOTE, config={ 'user.name': 'sam', 'user.email': '[email protected]', 'filter.lfs.custom': '0' } ) repo = git.Repo('datasets-test') reader = repo.config_reader() reader.values() lfs_config = dict(reader.items('filter.lfs')) assert '0' == lfs_config.get('custom')
def create_dataset_view(user, cache): """Create a new dataset in a project.""" ctx = DatasetCreateRequest().load(request.json) project = cache.get_project(cache.ensure_user(user), ctx['project_id']) with chdir(project.abs_path): create_dataset(ctx['short_name'], title=ctx.get('name'), creators=ctx.get('creator'), description=ctx.get('description'), keywords=ctx.get('keywords'), commit_message=ctx['commit_message']) try: _, ctx['remote_branch'] = repo_sync(Repo(project.abs_path), remote='origin') except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, 'push to remote failed silently - try again') return result_response(DatasetCreateResponseRPC(), ctx)
def create_dataset_view(user, cache): """Create a new dataset in a project.""" ctx = DatasetCreateRequest().load(request.json) project = cache.get_project(cache.ensure_user(user), ctx["project_id"]) with chdir(project.abs_path): create_dataset( ctx["name"], title=ctx.get("title"), creators=ctx.get("creators"), description=ctx.get("description"), keywords=ctx.get("keywords"), commit_message=ctx["commit_message"], ) try: _, ctx["remote_branch"] = repo_sync(Repo(project.abs_path), remote="origin") except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, "repo sync failed") return result_response(DatasetCreateResponseRPC(), ctx)
def create_dataset_view(user, cache): """Create a new dataset in a project.""" ctx = DatasetCreateRequest().load(request.json) project = cache.get_project(cache.ensure_user(user), ctx['project_id']) if not project.abs_path.exists(): return error_response(INVALID_PARAMS_ERROR_CODE, 'invalid project_id argument') with chdir(project.abs_path): create_dataset( ctx['dataset_name'], commit_message=ctx['commit_message'], creators=ctx.get('creators'), description=ctx.get('description'), ) if not repo_sync(project.abs_path): return error_response(INTERNAL_FAILURE_ERROR_CODE, 'push to remote failed silently - try again') return result_response(DatasetCreateResponseRPC(), ctx)
def list_datasets_view(user, cache): """List all datasets in project.""" req = DatasetListRequest().load(request.args) project = cache.get_project(user, req['project_id']) project_path = make_project_path(user, project) if not project_path: return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'reason': 'invalid project_id argument', }) with chdir(project_path): datasets = [ DatasetDetails().load(ds, unknown=EXCLUDE) # TODO: fix core interface to address this issue (add ticket ref) for ds in json.loads(dataset_parent(None, 'data', 'json-ld')) ] response = DatasetListResponse().load({'datasets': datasets}) return jsonify(DatasetListResponseRPC().load({'result': response}))