Exemple #1
0
def upload_dataset_version_to_figshare(figshareDatasetVersionLink):
    dataset_version_id = figshareDatasetVersionLink["dataset_version_id"]
    article_name = figshareDatasetVersionLink["article_name"]
    article_description = figshareDatasetVersionLink["article_description"]
    article_license = figshareDatasetVersionLink.get("license", 0)
    article_categories = figshareDatasetVersionLink.get("categories", None)
    article_keywords = figshareDatasetVersionLink.get("keywords", None)
    article_references = figshareDatasetVersionLink.get("references", None)

    files_to_upload = figshareDatasetVersionLink["files_to_upload"]

    token = _fetch_figshare_token()
    if token is None:
        flask.abort(401)

    dataset_version = models_controller.get_dataset_version(dataset_version_id)
    figshare_dataset_version_link = figshare.create_article(
        dataset_version_id,
        article_name,
        article_description,
        article_license,
        article_categories,
        article_keywords,
        article_references,
        token,
    )

    from taiga2.tasks import upload_datafile_to_figshare

    for file_to_upload in files_to_upload:
        datafile = models_controller.get_datafile(
            file_to_upload["datafile_id"])
        if datafile.type == "gcs":
            file_to_upload[
                "failure_reason"] = "Cannot upload GCS pointer files"
            continue
        elif datafile.type == "virtual":
            datafile = datafile.underlying_data_file

        if datafile.compressed_s3_key is None:
            file_to_upload[
                "failure_reason"] = "Cannot upload files without compressed S3 file"
            continue

        task = upload_datafile_to_figshare.delay(
            figshare_dataset_version_link.figshare_article_id,
            figshare_dataset_version_link.id,
            file_to_upload["file_name"],
            file_to_upload["datafile_id"],
            datafile.compressed_s3_key,
            datafile.original_file_md5,
            token,
        )

        file_to_upload["task_id"] = task.id

    return flask.jsonify({
        "article_id": figshare_dataset_version_link.figshare_article_id,
        "files": files_to_upload,
    })
Exemple #2
0
def test_state_to_deleted(session: SessionBase, new_dataset,
                          new_dataset_version):
    # TODO: Could benefit of parametrizing the state of the datasetVersion in case some switch are not allowed
    dataset_version_id = new_dataset_version.id
    mc.delete_dataset_version(dataset_version_id)

    updated_dataset_version = mc.get_dataset_version(
        dataset_version_id=dataset_version_id)

    assert updated_dataset_version.state == DatasetVersion.DatasetVersionState.deleted
Exemple #3
0
def test_state_approved_to_deprecated(session: SessionBase, new_dataset,
                                      new_dataset_version):
    assert new_dataset_version.state == DatasetVersion.DatasetVersionState.approved

    dataset_version_id = new_dataset_version.id
    mc.deprecate_dataset_version(dataset_version_id, "test deprecation")

    updated_dataset_version = mc.get_dataset_version(
        dataset_version_id=dataset_version_id)

    assert (updated_dataset_version.state ==
            DatasetVersion.DatasetVersionState.deprecated)
Exemple #4
0
def get_dataset_version(datasetVersion_id):
    dv = models_controller.get_dataset_version(
        dataset_version_id=datasetVersion_id, one_or_none=True)
    if dv is None:
        flask.abort(404)

    dataset_version_right = models_controller.get_rights(dv.id)

    dataset_version_schema = schemas.DatasetVersionSchema()
    dataset_version_schema.context["entry_user_right"] = dataset_version_right
    json_dv_data = dataset_version_schema.dump(dv).data

    return flask.jsonify(json_dv_data)
def test_de_deprecate_dataset_version(session: SessionBase, new_dataset: Dataset):
    # Deprecate the dataset version first
    dataset_version_id = new_dataset.dataset_versions[0].id
    models_controller.deprecate_dataset_version(
        dataset_version_id, "Test de-deprecation"
    )

    flask_answer = endpoint.de_deprecate_dataset_version(dataset_version_id)
    ack = get_data_from_flask_jsonify(flask_answer)

    updated_dataset_version = models_controller.get_dataset_version(dataset_version_id)

    assert updated_dataset_version.state == DatasetVersion.DatasetVersionState.approved
    assert updated_dataset_version.reason_state == "Test de-deprecation"
def test_deprecate_dataset_version(session: SessionBase, new_dataset: Dataset):
    """Check if deprecation was a success"""
    dataset_version_id = new_dataset.dataset_versions[0].id
    reason_state = "Test deprecation"
    deprecationReasonObj = {"deprecationReason": reason_state}

    flask_answer = endpoint.deprecate_dataset_version(
        dataset_version_id, deprecationReasonObj=deprecationReasonObj
    )
    ack = get_data_from_flask_jsonify(flask_answer)

    updated_dataset_version = models_controller.get_dataset_version(dataset_version_id)

    assert (
        updated_dataset_version.state == DatasetVersion.DatasetVersionState.deprecated
    )
    assert updated_dataset_version.reason_state == reason_state
Exemple #7
0
def get_figshare_links_for_client(datasetVersionId: str):
    dataset_version = models_controller.get_dataset_version(datasetVersionId)
    if not dataset_version.figshare_dataset_version_link:
        flask.abort(404)

    try:
        article_files = figshare.get_public_article_files(
            dataset_version.figshare_dataset_version_link.figshare_article_id)

        figshare_file_ids_to_download_url = {
            f["id"]: f["download_url"]
            for f in article_files
        }
        figshare_datafile_links = models_controller.get_figshare_datafile_links_for_dataset_version_link(
            dataset_version.figshare_dataset_version_link.id)

        taiga_figshare_map = {
            "{}.{}/{}".format(
                dataset_version.dataset.permaname,
                dataset_version.version,
                l.datafile.name,
            ): {
                "download_url":
                figshare_file_ids_to_download_url[l.figshare_file_id],
                "format":
                l.datafile.format.value,
                "encoding":
                l.datafile.encoding if l.datafile.type == "s3" else
                l.datafile.underlying_data_file.encoding,
                "column_types":
                l.datafile.column_types_as_json if l.datafile.type == "s3" else
                l.datafile.underlying_data_file.column_types_as_json,
            }
            for l in figshare_datafile_links
        }

        buffer = BytesIO()
        buffer.write(json.dumps(taiga_figshare_map).encode())
        buffer.seek(0)
        return flask.jsonify({"content": json.dumps(taiga_figshare_map)})
    except HTTPError as error:
        flask.abort(404)
def test_create_new_dataset_version(
    session: SessionBase, new_dataset, new_upload_session_file
):
    new_description = "My new description!"

    session_id = new_upload_session_file.session.id

    # TODO: Get instead the last datasetVersion
    latest_dataset_version = new_dataset.dataset_versions[0]

    # We fetch the datafiles from the first dataset_version
    assert len(latest_dataset_version.datafiles) == 1
    datafile = latest_dataset_version.datafiles[0]

    datasetVersionMetadata = {
        "sessionId": session_id,
        "datasetId": new_dataset.id,
        "newDescription": new_description,
    }

    _add_virtual_file_to_upload_session(
        session_id, datafile.name, datafile_id=datafile.id
    )

    response_json_create_new_dataset_version_id = endpoint.create_new_dataset_version(
        datasetVersionMetadata
    )
    new_dataset_version_id = get_data_from_flask_jsonify(
        response_json_create_new_dataset_version_id
    )

    _new_dataset_version = models_controller.get_dataset_version(new_dataset_version_id)

    assert _new_dataset_version.version == latest_dataset_version.version + 1
    # TODO: Test the number of uploaded files in the session + the 1 existing file
    assert len(_new_dataset_version.datafiles) == 2

    assert _new_dataset_version.description == new_description
Exemple #9
0
def update_figshare_article_with_dataset_version(figshareDatasetVersionLink):
    dataset_version_id = figshareDatasetVersionLink["dataset_version_id"]
    description = figshareDatasetVersionLink["description"]
    article_id = figshareDatasetVersionLink["article_id"]
    current_article_version = figshareDatasetVersionLink[
        "current_article_version"]
    files_to_update = figshareDatasetVersionLink["files_to_update"]

    token = _fetch_figshare_token()
    if token is None:
        flask.abort(401)

    dataset_version = models_controller.get_dataset_version(dataset_version_id)
    figshare_dataset_version_link = models_controller.add_figshare_dataset_version_link(
        dataset_version.id, article_id, current_article_version + 1)

    from taiga2.tasks import upload_datafile_to_figshare

    try:
        for file_to_update in files_to_update:
            action = file_to_update["action"]

            if action == "Delete":
                figshare.delete_file(article_id,
                                     file_to_update["figshare_file_id"], token)
            elif action == "Add":
                datafile_id = file_to_update["datafile_id"]
                if datafile_id is None:
                    file_to_update[
                        "failure_reason"] = "Cannot add or replace file without datafile ID"
                    continue

                datafile = models_controller.get_datafile(datafile_id)

                if datafile.type == "gcs":
                    file_to_update[
                        "failure_reason"] = "Cannot upload GCS pointer files"
                    continue
                elif datafile.type == "virtual":
                    datafile = datafile.underlying_data_file

                if datafile.compressed_s3_key is None:
                    file_to_update[
                        "failure_reason"] = "Cannot upload files without compressed S3 file"
                    continue

                task = upload_datafile_to_figshare.delay(
                    figshare_dataset_version_link.figshare_article_id,
                    figshare_dataset_version_link.id,
                    file_to_update["file_name"],
                    datafile_id,
                    datafile.compressed_s3_key,
                    datafile.original_file_md5,
                    token,
                )

                file_to_update["task_id"] = task.id
            else:
                raise ValueError(f"Unrecognized action: {action}")

        r = figshare.update_article(article_id, description, token)

        return flask.jsonify({
            "article_id": figshare_dataset_version_link.figshare_article_id,
            "files": files_to_update,
        })
    except HTTPError as error:
        models_controller.delete_figshare_dataset_version_and_datafiles(
            figshare_dataset_version_link.id)
        return flask.abort(error.code, error.reason)
Exemple #10
0
def test_get_dataset_version(session, new_dataset_version):
    fetched_dataset_version = mc.get_dataset_version(new_dataset_version.id)

    assert fetched_dataset_version == new_dataset_version
    assert fetched_dataset_version.id == new_dataset_version.id