Exemple #1
0
def dataset_add_remote_file(cache, user, user_job_id, project_id, create_dataset, commit_message, name, url):
    """Add a remote file to a specified dataset."""
    user = cache.ensure_user(user)
    worker_log.debug((f"executing dataset add remote " f"file job for {user.user_id}:{user.fullname}"))

    user_job = cache.get_job(user, user_job_id)
    user_job.in_progress()

    try:
        worker_log.debug(f"checking metadata for project {project_id}")
        project = cache.get_project(user, project_id)

        with chdir(project.abs_path):
            urls = url if isinstance(url, list) else [url]

            worker_log.debug(f"adding files {urls} to dataset {name}")
            add_file(urls, name, create=create_dataset, commit_message=commit_message)

            worker_log.debug("operation successful - syncing with remote")
            _, remote_branch = repo_sync(Repo(project.abs_path), remote="origin")
            user_job.update_extras("remote_branch", remote_branch)

            user_job.complete()
            worker_log.debug("job completed")
    except (HTTPError, BaseException, GitCommandError, RenkuException) as exp:
        user_job.fail_job(str(exp))

        # Reraise exception, so we see trace in job metadata
        # and in metrics as failed job.
        raise exp
Exemple #2
0
def add_file_to_dataset_view(user_data, cache):
    """Add the uploaded file to cloned repository."""
    ctx = DatasetAddRequest().load(request.json)
    user = cache.ensure_user(user_data)
    project = cache.get_project(user, ctx['project_id'])

    if not ctx['commit_message']:
        ctx['commit_message'] = 'service: dataset add {0}'.format(
            ctx['short_name'])

    local_paths = []
    for _file in ctx['files']:
        local_path = None

        if 'file_url' in _file:
            commit_message = '{0}{1}'.format(ctx['commit_message'],
                                             _file['file_url'])

            job = cache.make_job(user)
            _file['job_id'] = job.job_id

            with enqueue_retry(DATASETS_JOB_QUEUE) as queue:
                queue.enqueue(dataset_add_remote_file, user_data, job.job_id,
                              project.project_id, ctx['create_dataset'],
                              commit_message, ctx['short_name'],
                              _file['file_url'])
            continue

        if 'file_id' in _file:
            file = cache.get_file(user, _file['file_id'])
            local_path = file.abs_path

        elif 'file_path' in _file:
            local_path = project.abs_path / Path(_file['file_path'])

        if not local_path or not local_path.exists():
            return error_response(
                INVALID_PARAMS_ERROR_CODE,
                'invalid file reference: {0}'.format(json.dumps(_file)))

        ctx['commit_message'] += ' {0}'.format(local_path.name)
        local_paths.append(str(local_path))

    if local_paths:
        with chdir(project.abs_path):
            add_file(local_paths,
                     ctx['short_name'],
                     create=ctx['create_dataset'],
                     force=ctx['force'],
                     commit_message=ctx['commit_message'])

            try:
                _, ctx['remote_branch'] = repo_sync(Repo(project.abs_path),
                                                    remote='origin')
            except GitCommandError:
                return error_response(INTERNAL_FAILURE_ERROR_CODE,
                                      'repo sync failed')

    return result_response(DatasetAddResponseRPC(), ctx)
Exemple #3
0
def test_unlink_default(directory_tree, client):
    """Test unlink default behaviour."""
    with chdir(client.path):
        create_dataset("dataset")
        add_file([directory_tree.join("dir2").strpath], "dataset")

    with pytest.raises(ParameterError):
        file_unlink("dataset", (), ())
Exemple #4
0
def add_file_to_dataset_view(user, cache):
    """Add the uploaded file to cloned repository."""
    ctx = DatasetAddRequest().load(request.json)
    project = cache.get_project(user, ctx['project_id'])
    project_path = make_project_path(user, project)

    if not project_path:
        return jsonify(
            error={
                'code': INVALID_PARAMS_ERROR_CODE,
                'message': 'invalid project_id: {0}'.format(ctx['project_id']),
            })

    if not ctx['commit_message']:
        ctx['commit_message'] = 'service: dataset add {0}'.format(
            ctx['dataset_name'])

    local_paths = []
    for _file in ctx['files']:
        local_path = None

        if 'file_id' in _file:
            file = cache.get_file(user, _file['file_id'])
            local_path = make_file_path(user, file)

        elif 'file_path' in _file:
            local_path = project_path / Path(_file['file_path'])

        if not local_path or not local_path.exists():
            return jsonify(
                error={
                    'code':
                    INVALID_PARAMS_ERROR_CODE,
                    'message':
                    'invalid file reference: {0}'.format(
                        local_path.relative_to(project_path))
                })

        ctx['commit_message'] += ' {0}'.format(local_path.name)
        local_paths.append(str(local_path))

    with chdir(project_path):
        add_file(local_paths,
                 ctx['dataset_name'],
                 create=ctx['create_dataset'],
                 commit_message=ctx['commit_message'])

        if not repo_sync(project_path):
            return jsonify(error={
                'code': INTERNAL_FAILURE_ERROR_CODE,
                'message': 'repo sync failed'
            })

    return jsonify(DatasetAddResponseRPC().load(
        {'result': DatasetAddResponse().load(ctx, unknown=EXCLUDE)}))
Exemple #5
0
def test_list_files_default(project, tmpdir):
    """Test a default file listing."""
    create_dataset("ds1", title="", description="", creators=[], commit_message="my awesome dataset")
    data_file = tmpdir / Path("somefile")
    data_file.write_text("1,2,3", encoding="utf-8")

    add_file([str(data_file)], "ds1")
    files = list_files(datasets=["ds1"])

    assert isinstance(files, list)
    assert "somefile" in [file_.name for file_ in files]
Exemple #6
0
def add(name, urls, link, force, create, sources, destination, ref):
    """Add data to a dataset."""
    progress = partial(progressbar, label='Adding data to dataset')
    add_file(
        urls=urls,
        name=name,
        link=link,
        force=force,
        create=create,
        sources=sources,
        destination=destination,
        ref=ref,
        urlscontext=progress
    )
def test_list_files_default(project, tmpdir):
    """Test a default file listing."""
    create_dataset('ds1',
                   title='',
                   description='',
                   creators=[],
                   commit_message='my awesome dataset')
    data_file = tmpdir / Path('somefile')
    data_file.write_text('1,2,3', encoding='utf-8')

    add_file([str(data_file)], 'ds1')
    files = list_files(datasets=['ds1'])

    assert isinstance(files, list)
    assert 'somefile' in [ds.name for ds in files]
Exemple #8
0
def add_file_to_dataset_view(user, cache):
    """Add the uploaded file to cloned repository."""
    ctx = DatasetAddRequest().load(request.json)
    user = cache.ensure_user(user)
    project = cache.get_project(user, ctx['project_id'])

    if not project.abs_path.exists():
        return error_response(
            INVALID_PARAMS_ERROR_CODE,
            'invalid project_id: {0}'.format(ctx['project_id']))

    if not ctx['commit_message']:
        ctx['commit_message'] = 'service: dataset add {0}'.format(
            ctx['dataset_name'])

    local_paths = []
    for _file in ctx['files']:
        local_path = None

        if 'file_id' in _file:
            file = cache.get_file(user, _file['file_id'])
            local_path = file.abs_path

        elif 'file_path' in _file:
            local_path = project.abs_path / Path(_file['file_path'])

        if not local_path or not local_path.exists():
            return error_response(
                INVALID_PARAMS_ERROR_CODE,
                'invalid file reference: {0}'.format(json.dumps(_file)))

        ctx['commit_message'] += ' {0}'.format(local_path.name)
        local_paths.append(str(local_path))

    with chdir(project.abs_path):
        add_file(local_paths,
                 ctx['dataset_name'],
                 create=ctx['create_dataset'],
                 commit_message=ctx['commit_message'])

        if not repo_sync(project.abs_path):
            return error_response(INTERNAL_FAILURE_ERROR_CODE,
                                  'repo sync failed')

    return result_response(DatasetAddResponseRPC(), ctx)
Exemple #9
0
def add(name, urls, external, force, overwrite, create, sources, destination, ref):
    """Add data to a dataset."""
    progress = partial(progressbar, label="Adding data to dataset")
    add_file(
        urls=urls,
        name=name,
        external=external,
        force=force,
        overwrite=overwrite,
        create=create,
        sources=sources,
        destination=destination,
        ref=ref,
        urlscontext=progress,
        progress=_DownloadProgressbar,
        interactive=True,
    )
    click.secho("OK", fg="green")
def add(
    short_name, urls, link, external, force, create, sources, destination, ref
):
    """Add data to a dataset."""
    progress = partial(progressbar, label='Adding data to dataset')
    add_file(
        urls=urls,
        short_name=short_name,
        link=link,
        external=external,
        force=force,
        create=create,
        sources=sources,
        destination=destination,
        ref=ref,
        urlscontext=progress,
        progress=_DownloadProgressbar,
        interactive=True,
    )
    click.secho('OK', fg='green')
def dataset_add_remote_file(cache, user, user_job_id, project_id,
                            create_dataset, commit_message, short_name, url):
    """Add a remote file to a specified dataset."""
    user = cache.ensure_user(user)
    user_job = cache.get_job(user, user_job_id)
    project = cache.get_project(user, project_id)

    try:
        user_job.in_progress()

        with chdir(project.abs_path):
            urls = url if isinstance(url, list) else [url]
            add_file(urls,
                     short_name,
                     create=create_dataset,
                     commit_message=commit_message)

            _, remote_branch = repo_sync(Repo(project.abs_path),
                                         remote='origin')
            user_job.update_extras('remote_branch', remote_branch)

            user_job.complete()
    except (HTTPError, BaseException, GitCommandError) as e:
        user_job.fail_job(str(e))
Exemple #12
0
def add_file_to_dataset_view(user_data, cache):
    """Add the uploaded file to cloned repository."""
    ctx = DatasetAddRequest().load(request.json)
    user = cache.ensure_user(user_data)
    project = cache.get_project(user, ctx["project_id"])

    if not ctx["commit_message"]:
        ctx["commit_message"] = "service: dataset add {0}".format(ctx["name"])

    local_paths = []
    for _file in ctx["files"]:
        local_path = None

        if "file_url" in _file:
            commit_message = "{0}{1}".format(ctx["commit_message"],
                                             _file["file_url"])

            job = cache.make_job(
                user,
                project=project,
                job_data={
                    "renku_op": "dataset_add_remote_file",
                    "client_extras": ctx.get("client_extras")
                },
            )
            _file["job_id"] = job.job_id

            with enqueue_retry(DATASETS_JOB_QUEUE) as queue:
                queue.enqueue(
                    dataset_add_remote_file,
                    user_data,
                    job.job_id,
                    project.project_id,
                    ctx["create_dataset"],
                    commit_message,
                    ctx["name"],
                    _file["file_url"],
                )
            continue

        if "file_id" in _file:
            file = cache.get_file(user, _file["file_id"])
            local_path = file.abs_path

        elif "file_path" in _file:
            local_path = project.abs_path / Path(_file["file_path"])

        if not local_path or not local_path.exists():
            return error_response(
                INVALID_PARAMS_ERROR_CODE,
                "invalid file reference: {0}".format(json.dumps(_file)))

        ctx["commit_message"] += " {0}".format(local_path.name)
        local_paths.append(str(local_path))

    if local_paths:
        with chdir(project.abs_path):
            add_file(
                local_paths,
                ctx["name"],
                create=ctx["create_dataset"],
                force=ctx["force"],
                commit_message=ctx["commit_message"],
            )

            try:
                _, ctx["remote_branch"] = repo_sync(Repo(project.abs_path),
                                                    remote="origin")
            except GitCommandError:
                return error_response(INTERNAL_FAILURE_ERROR_CODE,
                                      "repo sync failed")

    return result_response(DatasetAddResponseRPC(), ctx)