def test_dataset_import_twice_job(doi, svc_client_with_repo):
    """Test dataset import."""
    svc_client, headers, project_id, url_components = svc_client_with_repo
    user = {'user_id': headers['Renku-User-Id']}
    payload = {
        'project_id': project_id,
        'dataset_uri': doi,
    }
    response = svc_client.post(
        '/datasets.import',
        data=json.dumps(payload),
        headers=headers,
    )

    assert response
    assert_rpc_response(response)
    assert {'job_id', 'created_at'} == set(response.json['result'].keys())

    dest = make_project_path(
        user, {
            'owner': url_components.owner,
            'name': url_components.name
        }
    )

    old_commit = Repo(dest).head.commit
    job_id = response.json['result']['job_id']

    dataset_import(
        user,
        job_id,
        project_id,
        doi,
    )

    new_commit = Repo(dest).head.commit
    assert old_commit.hexsha != new_commit.hexsha

    with pytest.raises(DatasetExistsError):
        dataset_import(
            user,
            job_id,
            project_id,
            doi,
        )

    new_commit2 = Repo(dest).head.commit
    assert new_commit.hexsha == new_commit2.hexsha

    response = svc_client.get(
        f'/jobs/{job_id}',
        data=json.dumps(payload),
        headers=headers,
    )

    assert_rpc_response(response)
    extras = response.json['result']['extras']

    assert 'error' in extras
    assert 'Dataset exists' in extras['error']
Esempio n. 2
0
def test_dataset_import_junk_job(doi, expected_err, svc_client_with_repo):
    """Test dataset import."""
    svc_client, headers, project_id, url_components = svc_client_with_repo
    user = {"user_id": headers["Renku-User-Id"]}
    payload = {
        "project_id": project_id,
        "dataset_uri": doi,
    }
    response = svc_client.post("/datasets.import", data=json.dumps(payload), headers=headers,)

    assert response
    assert_rpc_response(response)
    assert {"job_id", "created_at"} == set(response.json["result"].keys())

    dest = make_project_path(user, {"owner": url_components.owner, "name": url_components.name})

    old_commit = Repo(dest).head.commit
    job_id = response.json["result"]["job_id"]

    with pytest.raises(ParameterError):
        dataset_import(
            user, job_id, project_id, doi,
        )

    new_commit = Repo(dest).head.commit
    assert old_commit.hexsha == new_commit.hexsha

    response = svc_client.get(f"/jobs/{job_id}", data=json.dumps(payload), headers=headers,)

    assert_rpc_response(response)
    extras = response.json["result"]["extras"]

    assert "error" in extras
    assert expected_err in extras["error"]
Esempio n. 3
0
def test_dataset_import_job(doi, svc_client_with_repo):
    """Test dataset import via doi."""
    svc_client, headers, project_id, url_components = svc_client_with_repo
    user = {"user_id": headers["Renku-User-Id"]}
    payload = {
        "project_id": project_id,
        "dataset_uri": doi,
    }
    response = svc_client.post("/datasets.import", data=json.dumps(payload), headers=headers,)

    assert response
    assert_rpc_response(response)
    assert {"job_id", "created_at"} == set(response.json["result"].keys())

    dest = make_project_path(user, {"owner": url_components.owner, "name": url_components.name})

    old_commit = Repo(dest).head.commit
    job_id = response.json["result"]["job_id"]

    dataset_import(
        user, job_id, project_id, doi,
    )

    new_commit = Repo(dest).head.commit
    assert old_commit.hexsha != new_commit.hexsha
    assert f"service: dataset import {doi}" == new_commit.message

    response = svc_client.get(f"/jobs/{job_id}", headers=headers,)
    assert response
    assert_rpc_response(response)
    assert "COMPLETED" == response.json["result"]["state"]
Esempio n. 4
0
def _project_clone(cache, user_data, project_data):
    """Clones the project for a given user."""
    local_path = make_project_path(user_data, project_data)
    user = cache.ensure_user(user_data)

    if local_path.exists():
        shutil.rmtree(str(local_path))

        for project in cache.get_projects(user):
            if project.git_url == project_data["git_url"]:
                project.delete()

    local_path.mkdir(parents=True, exist_ok=True)

    repo = project_clone(
        project_data["url_with_auth"],
        local_path,
        depth=project_data["depth"] if project_data["depth"] != 0 else None,
        raise_git_except=True,
        config={
            "user.name": project_data["fullname"],
            "user.email": project_data["email"],
        },
        checkout_rev=project_data["ref"],
    )

    service_log.debug(f"project successfully cloned: {repo}")
    service_log.debug(f"project folder exists: {local_path.exists()}")

    project = cache.make_project(user, project_data)
    return project
Esempio n. 5
0
def create_dataset_view(user, cache):
    """Create a new dataset in a project."""
    ctx = DatasetCreateRequest().load(request.json)
    project = cache.get_project(user, ctx['project_id'])

    project_path = make_project_path(user, project)
    if not project_path:
        return jsonify(
            error={
                'code': INVALID_PARAMS_ERROR_CODE,
                'message': 'invalid project_id argument',
            })

    with chdir(project_path):
        create_dataset(
            ctx['dataset_name'],
            commit_message=ctx['commit_message'],
            creators=ctx.get('creators'),
            description=ctx.get('description'),
        )

    if not repo_sync(project_path):
        return jsonify(
            error={
                'code': INTERNAL_FAILURE_ERROR_CODE,
                'reason': 'push to remote failed silently - try again'
            })

    return jsonify(DatasetCreateResponseRPC().load(
        {'result': DatasetCreateResponse().load(ctx, unknown=EXCLUDE)}))
Esempio n. 6
0
def project_clone(user, cache):
    """Clone a remote repository."""
    ctx = ProjectCloneContext().load(
        (lambda a, b: a.update(b) or a)(request.json, user),
        unknown=EXCLUDE,
    )
    local_path = make_project_path(user, ctx)
    user = cache.ensure_user(user)

    if local_path.exists():
        shutil.rmtree(str(local_path))

        for project in cache.get_projects(user):
            if project.git_url == ctx['git_url']:
                project.delete()

    local_path.mkdir(parents=True, exist_ok=True)
    renku_clone(ctx['url_with_auth'],
                local_path,
                depth=ctx['depth'],
                raise_git_except=True,
                config={
                    'user.name': ctx['fullname'],
                    'user.email': ctx['email'],
                })

    project = cache.make_project(user, ctx)

    return result_response(ProjectCloneResponseRPC(), project)
Esempio n. 7
0
def _project_clone(cache, user_data, project_data):
    """Clones the project for a given user."""
    local_path = make_project_path(user_data, project_data)
    user = cache.ensure_user(user_data)

    if local_path.exists():
        shutil.rmtree(str(local_path))

        for project in cache.get_projects(user):
            if project.git_url == project_data['git_url']:
                project.delete()

    local_path.mkdir(parents=True, exist_ok=True)
    project_clone(
        project_data['url_with_auth'],
        local_path,
        depth=project_data['depth'] if project_data['depth'] != 0 else None,
        raise_git_except=True,
        config={
            'user.name': project_data['fullname'],
            'user.email': project_data['email'],
        },
        checkout_rev=project_data['ref'])

    project = cache.make_project(user, project_data)
    return project
def test_dataset_project_lock(doi, svc_client_with_repo):
    """Test dataset project lock."""
    svc_client, headers, project_id, url_components = svc_client_with_repo
    user = {'user_id': headers['Renku-User-Id']}
    payload = {
        'project_id': project_id,
        'dataset_uri': doi,
    }
    response = svc_client.post(
        '/datasets.import',
        data=json.dumps(payload),
        headers=headers,
    )

    assert response
    assert_rpc_response(response)
    assert {'job_id', 'created_at'} == set(response.json['result'].keys())

    dest = make_project_path(user, {
        'owner': url_components.owner,
        'name': url_components.name
    })

    old_commit = Repo(dest).head.commit

    cache_project_cleanup()

    new_commit = Repo(dest).head.commit
    assert old_commit.hexsha == new_commit.hexsha
    assert dest.exists() and [file for file in dest.glob('*')]
Esempio n. 9
0
def add_file_to_dataset_view(user, cache):
    """Add the uploaded file to cloned repository."""
    ctx = DatasetAddRequest().load(request.json)
    project = cache.get_project(user, ctx['project_id'])
    project_path = make_project_path(user, project)

    if not project_path:
        return jsonify(
            error={
                'code': INVALID_PARAMS_ERROR_CODE,
                'message': 'invalid project_id: {0}'.format(ctx['project_id']),
            })

    if not ctx['commit_message']:
        ctx['commit_message'] = 'service: dataset add {0}'.format(
            ctx['dataset_name'])

    local_paths = []
    for _file in ctx['files']:
        local_path = None

        if 'file_id' in _file:
            file = cache.get_file(user, _file['file_id'])
            local_path = make_file_path(user, file)

        elif 'file_path' in _file:
            local_path = project_path / Path(_file['file_path'])

        if not local_path or not local_path.exists():
            return jsonify(
                error={
                    'code':
                    INVALID_PARAMS_ERROR_CODE,
                    'message':
                    'invalid file reference: {0}'.format(
                        local_path.relative_to(project_path))
                })

        ctx['commit_message'] += ' {0}'.format(local_path.name)
        local_paths.append(str(local_path))

    with chdir(project_path):
        add_file(local_paths,
                 ctx['dataset_name'],
                 create=ctx['create_dataset'],
                 commit_message=ctx['commit_message'])

        if not repo_sync(project_path):
            return jsonify(error={
                'code': INTERNAL_FAILURE_ERROR_CODE,
                'message': 'repo sync failed'
            })

    return jsonify(DatasetAddResponseRPC().load(
        {'result': DatasetAddResponse().load(ctx, unknown=EXCLUDE)}))
def test_dataset_url_import_job(url, svc_client_with_repo):
    """Test dataset import via url."""
    svc_client, headers, project_id, url_components = svc_client_with_repo
    user = {'user_id': headers['Renku-User-Id']}
    payload = {
        'project_id': project_id,
        'dataset_uri': url,
    }
    response = svc_client.post(
        '/datasets.import',
        data=json.dumps(payload),
        headers=headers,
    )

    assert response
    assert_rpc_response(response)
    assert {'job_id', 'created_at'} == set(response.json['result'].keys())

    dest = make_project_path(
        user, {
            'owner': url_components.owner,
            'name': url_components.name
        }
    )

    old_commit = Repo(dest).head.commit
    job_id = response.json['result']['job_id']

    dataset_import(
        user,
        job_id,
        project_id,
        url,
    )

    new_commit = Repo(dest).head.commit
    assert old_commit.hexsha != new_commit.hexsha
    assert f'service: dataset import {url}' == new_commit.message

    response = svc_client.get(
        f'/jobs/{job_id}',
        headers=headers,
    )

    assert response
    assert_rpc_response(response)
    assert 'COMPLETED' == response.json['result']['state']
Esempio n. 11
0
def list_datasets_view(user, cache):
    """List all datasets in project."""
    req = DatasetListRequest().load(request.args)
    project = cache.get_project(user, req['project_id'])
    project_path = make_project_path(user, project)

    if not project_path:
        return jsonify(
            error={
                'code': INVALID_PARAMS_ERROR_CODE,
                'reason': 'invalid project_id argument',
            })

    with chdir(project_path):
        datasets = [
            DatasetDetails().load(ds, unknown=EXCLUDE)
            # TODO: fix core interface to address this issue (add ticket ref)
            for ds in json.loads(dataset_parent(None, 'data', 'json-ld'))
        ]

    response = DatasetListResponse().load({'datasets': datasets})
    return jsonify(DatasetListResponseRPC().load({'result': response}))
Esempio n. 12
0
def test_dataset_add_remote_file(url, svc_client_with_repo):
    """Test dataset add a remote file."""
    svc_client, headers, project_id, url_components = svc_client_with_repo
    user = {'user_id': headers['Renku-User-Id']}

    payload = {
        'project_id': project_id,
        'short_name': uuid.uuid4().hex,
        'create_dataset': True,
        'files': [{
            'file_url': url
        }]
    }
    response = svc_client.post(
        '/datasets.add',
        data=json.dumps(payload),
        headers=headers,
    )

    assert response
    assert_rpc_response(response)
    assert {'files', 'short_name',
            'project_id'} == set(response.json['result'].keys())

    dest = make_project_path(user, {
        'owner': url_components.owner,
        'name': url_components.name
    })
    old_commit = Repo(dest).head.commit
    job_id = response.json['result']['files'][0]['job_id']
    commit_message = 'service: dataset add remote file'

    dataset_add_remote_file(user, job_id, project_id, True, commit_message,
                            payload['short_name'], url)

    new_commit = Repo(dest).head.commit

    assert old_commit.hexsha != new_commit.hexsha
    assert commit_message == new_commit.message
Esempio n. 13
0
def test_dataset_project_lock(doi, svc_client_with_repo):
    """Test dataset project lock."""
    svc_client, headers, project_id, url_components = svc_client_with_repo
    user = {"user_id": headers["Renku-User-Id"]}
    payload = {
        "project_id": project_id,
        "dataset_uri": doi,
    }
    response = svc_client.post("/datasets.import", data=json.dumps(payload), headers=headers,)

    assert response
    assert_rpc_response(response)
    assert {"job_id", "created_at"} == set(response.json["result"].keys())

    dest = make_project_path(user, {"owner": url_components.owner, "name": url_components.name})

    old_commit = Repo(dest).head.commit

    cache_project_cleanup()

    new_commit = Repo(dest).head.commit
    assert old_commit.hexsha == new_commit.hexsha
    assert dest.exists() and [file for file in dest.glob("*")]
Esempio n. 14
0
def test_dataset_add_remote_file(url, svc_client_with_repo):
    """Test dataset add a remote file."""
    svc_client, headers, project_id, url_components = svc_client_with_repo
    user = {"user_id": headers["Renku-User-Id"]}

    payload = {"project_id": project_id, "name": uuid.uuid4().hex, "create_dataset": True, "files": [{"file_url": url}]}
    response = svc_client.post("/datasets.add", data=json.dumps(payload), headers=headers,)

    assert response
    assert_rpc_response(response)
    assert {"files", "name", "project_id"} == set(response.json["result"].keys())

    dest = make_project_path(user, {"owner": url_components.owner, "name": url_components.name})
    old_commit = Repo(dest).head.commit
    job_id = response.json["result"]["files"][0]["job_id"]
    commit_message = "service: dataset add remote file"

    dataset_add_remote_file(user, job_id, project_id, True, commit_message, payload["name"], url)

    new_commit = Repo(dest).head.commit

    assert old_commit.hexsha != new_commit.hexsha
    assert commit_message == new_commit.message
Esempio n. 15
0
def list_dataset_files_view(user, cache):
    """List files in a dataset."""
    ctx = DatasetFilesListRequest().load(request.args)
    project = cache.get_project(user, ctx['project_id'])
    project_path = make_project_path(user, project)

    if not project_path:
        return jsonify(
            error={
                'code': INVALID_PARAMS_ERROR_CODE,
                'reason': 'invalid project_id argument',
            })

    with chdir(project_path):
        dataset_files = json.loads(
            # TODO: fix core interface to address this issue (add ticket ref)
            list_files(ctx['dataset_name'], None, None, None, 'json-ld'))
        ctx['files'] = [
            DatasetFileDetails().load(ds, unknown=EXCLUDE)
            for ds in dataset_files
        ]

    response = DatasetFilesListResponse().load(ctx, unknown=EXCLUDE)
    return jsonify(DatasetFilesListResponseRPC().load({'result': response}))