Ejemplo n.º 1
0
def tmp_app_with_dependent_data(request):
    from dtool_lookup_server.config import Config
    from dtool_lookup_server import create_app, mongo, sql_db
    from dtool_lookup_server.utils import (
        register_users,
        register_base_uri,
        register_dataset,
        update_permissions,
    )

    tmp_mongo_db_name = random_string()

    config = {
        "FLASK_ENV": "development",
        "SQLALCHEMY_DATABASE_URI": "sqlite:///:memory:",
        "MONGO_URI": "mongodb://localhost:27017/{}".format(tmp_mongo_db_name),
        "SQLALCHEMY_TRACK_MODIFICATIONS": False,
        "JWT_ALGORITHM": "RS256",
        "JWT_PUBLIC_KEY": JWT_PUBLIC_KEY,
        "JWT_TOKEN_LOCATION": "headers",
        "JWT_HEADER_NAME": "Authorization",
        "JWT_HEADER_TYPE": "Bearer",
    }

    app = create_app(config)

    # Ensure the sql database has been put into the context.
    app.app_context().push()

    # Populate the database.
    sql_db.Model.metadata.create_all(sql_db.engine)

    # Register some users.
    username = "******"
    register_users([
        dict(username=username),
    ])

    base_uri = "s3://snow-white"
    register_base_uri(base_uri)
    permissions = {
        "base_uri": base_uri,
        "users_with_search_permissions": [username],
        "users_with_register_permissions": [username]
    }
    update_permissions(permissions)

    for dataset_info in family_datasets(base_uri):
        register_dataset(dataset_info)

    @request.addfinalizer
    def teardown():
        mongo.cx.drop_database(tmp_mongo_db_name)
        sql_db.session.remove()

    return app.test_client()
Ejemplo n.º 2
0
def index_base_uri(base_uri):
    """Register all the datasets in a base URI."""
    base_uri = dtoolcore.utils.sanitise_uri(base_uri)

    if not base_uri_exists(base_uri):
        click.secho("Base URI '{}' not registered".format(base_uri),
                    fg="red",
                    err=True)
        sys.exit(1)

    for dataset in iter_datasets_in_base_uri(base_uri):
        try:
            dataset_info = generate_dataset_info(dataset, base_uri)
        except (yaml.parser.ParserError, yaml.scanner.ScannerError) as message:
            click.secho("Failed to register: {} {}".format(
                dataset.name, dataset.uri),
                        fg="red")
            click.secho("README YAML parsing issue", fg="red")
            click.echo(message)
            continue

        try:
            r = register_dataset(dataset_info)
        except dtool_lookup_server.ValidationError as message:
            click.secho("Failed to register: {} {}".format(
                dataset.name, dataset.uri),
                        fg="red")
            click.echo(message)
            continue

        click.secho("Registered: {}".format(r), fg="green")
def notify_create_or_update(objpath):
    """Notify the lookup server about creation of a new object or modification
    of an object's metadata."""
    json = request.get_json()
    if json is None:
        abort(400)

    dataset_uri = None

    # The metadata is only attached to the 'dtool' object of the respective
    # UUID and finalizes creation of a dataset. We can register that dataset
    # now.
    if 'metadata' in json:
        admin_metadata = json['metadata']

        if 'name' in admin_metadata and 'uuid' in admin_metadata:
            bucket = json['bucket']

            base_uri = Config.BUCKET_TO_BASE_URI[bucket]

            dataset_uri = dtoolcore._generate_uri(admin_metadata, base_uri)

            current_app.logger.info(
                'Registering dataset with URI {}'.format(dataset_uri))
    else:
        base_uri, uuid, kind = _parse_objpath(objpath)
        # We also need to update the database if the metadata has changed.
        if kind in ['README.yml', 'tags', 'annotations']:
            dataset_uri = _retrieve_uri(base_uri, uuid)

    if dataset_uri is not None:
        try:
            dataset = dtoolcore.DataSet.from_uri(dataset_uri)
            dataset_info = generate_dataset_info(dataset, base_uri)
            register_dataset(dataset_info)
        except dtoolcore.DtoolCoreTypeError:
            # DtoolCoreTypeError is raised if this is not a dataset yet, i.e.
            # if the dataset has only partially been copied. There will be
            # another notification once everything is final. We simply
            # ignore this.
            current_app.logger.debug('DtoolCoreTypeError raised for dataset '
                                     'with URI {}'.format(dataset_uri))
            pass

    return jsonify({})
Ejemplo n.º 4
0
def register():
    """Register a dataset. The user needs to have register permissions."""
    username = get_jwt_identity()
    dataset_info = request.get_json()

    try:
        user = get_user_obj(username)
    except AuthenticationError:
        # User not registered in system.
        abort(401)

    if not dataset_info_is_valid(dataset_info):
        abort(409)

    try:
        base_uri = get_base_uri_obj(dataset_info["base_uri"])
    except ValidationError:
        abort(409)

    if base_uri not in user.register_base_uris:
        abort(401)

    dataset_uri = register_dataset(dataset_info)
    return dataset_uri, 201
Ejemplo n.º 5
0
def test_register_dataset_without_created_at(tmp_app):   # NOQA
    from dtool_lookup_server import ValidationError
    from dtool_lookup_server.utils import (
        register_users,
        register_base_uri,
        update_permissions,
        register_dataset,
        get_admin_metadata_from_uri,
        get_readme_from_uri_by_user,
    )

    register_users([
        dict(username="******"),
        dict(username="******"),
    ])

    base_uri = "s3://snow-white"
    register_base_uri(base_uri)

    permissions = {
        "base_uri": base_uri,
        "users_with_search_permissions": ["grumpy", "sleepy"],
        "users_with_register_permissions": ["grumpy"],
    }
    update_permissions(permissions)

    uuid = "af6727bf-29c7-43dd-b42f-a5d7ede28337"
    uri = "{}/{}".format(base_uri, uuid)
    dataset_info = {
        "base_uri": base_uri,
        "uuid": uuid,
        "uri": uri,
        "name": "my-dataset",
        "type": "dataset",
        "readme": {"description": "test dataset"},
        "manifest": {
            "dtoolcore_version": "3.7.0",
            "hash_function": "md5sum_hexdigest",
            "items": {
                "e4cc3a7dc281c3d89ed4553293c4b4b110dc9bf3": {
                    "hash": "d89117c9da2cc34586e183017cb14851",
                    "relpath": "U00096.3.rev.1.bt2",
                    "size_in_bytes": 5741810,
                    "utc_timestamp": 1536832115.0
                }
            }
        },
        "creator_username": "******",
        "frozen_at": 1536238185.881941,
        "annotations": {"software": "bowtie2"},
        "tags": ["rnaseq"],
    }

    register_dataset(dataset_info)

    # When missing, created_at will be set to frozen_at.
    expected_content = {
        "base_uri": base_uri,
        "uuid": uuid,
        "uri": uri,
        "name": "my-dataset",
        "creator_username": "******",
        "frozen_at": 1536238185.881941,
        "created_at": 1536238185.881941,
    }
    assert get_admin_metadata_from_uri(uri) == expected_content
    assert get_readme_from_uri_by_user("sleepy", uri) == dataset_info["readme"]

    with pytest.raises(ValidationError):
        register_dataset({"name": "not-all-required-metadata"})
Ejemplo n.º 6
0
def test_register_too_large_metadata_dataset(tmp_app):  # NOQA

    from dtool_lookup_server import ValidationError
    from dtool_lookup_server.utils import (
        register_users,
        register_base_uri,
        update_permissions,
        register_dataset,
        get_admin_metadata_from_uri,
    )

    register_users([
        dict(username="******"),
        dict(username="******"),
    ])

    base_uri = "s3://snow-white"
    register_base_uri(base_uri)

    permissions = {
        "base_uri": base_uri,
        "users_with_search_permissions": ["grumpy", "sleepy"],
        "users_with_register_permissions": ["grumpy"],
    }
    update_permissions(permissions)

    uuid = "af6727bf-29c7-43dd-b42f-a5d7ede28337"
    uri = "{}/{}".format(base_uri, uuid)
    dataset_info = {
        "base_uri": base_uri,
        "uuid": uuid,
        "uri": uri,
        "name": "my-dataset",
        "type": "dataset",
        "manifest": {
            "dtoolcore_version": "3.7.0",
            "hash_function": "md5sum_hexdigest",
            "items": {
                "e4cc3a7dc281c3d89ed4553293c4b4b110dc9bf3": {
                    "hash": "d89117c9da2cc34586e183017cb14851",
                    "relpath": "U00096.3.rev.1.bt2",
                    "size_in_bytes": 5741810,
                    "utc_timestamp": 1536832115.0
                }
            }
        },
        "creator_username": "******",
        "frozen_at": 1536238185.881941,
        "created_at": 1536236399.19497,
        "annotations": {"software": "bowtie2"},
        "tags": ["rnaseq"],
    }

    really_large_readme = {}
    for i in range(100000):
        key = "here_is_a_long_key_{}".format(i)
        value = "here_is_a_long_value_{}".format(i) * 10
        really_large_readme[key] = value

    dataset_info["readme"] = really_large_readme

    # The dataset_info is too large and raises:
    # pymongo.errors.DocumentTooLarge: BSON document too large (28978543 bytes)
    # - the connected server supports BSON document sizes up to 16793598 bytes.
    # See https://github.com/jic-dtool/dtool-lookup-server/issues/16
    # So the code catches this and raises dtool_lookup_server.ValidationError
    # instead.
    with pytest.raises(ValidationError):
        register_dataset(dataset_info)

    assert get_admin_metadata_from_uri(dataset_info["uri"]) is None
Ejemplo n.º 7
0
def tmp_app_with_data(request):

    from dtool_lookup_server import create_app, mongo, sql_db
    from dtool_lookup_server.utils import (
        register_users,
        register_base_uri,
        register_dataset,
        update_permissions,
    )

    tmp_mongo_db_name = random_string()

    config = {
        "FLASK_ENV": "development",
        "SQLALCHEMY_DATABASE_URI": "sqlite:///:memory:",
        "MONGO_URI": "mongodb://localhost:27017/{}".format(tmp_mongo_db_name),
        "SQLALCHEMY_TRACK_MODIFICATIONS": False,
        "JWT_ALGORITHM": "RS256",
        "JWT_PUBLIC_KEY": JWT_PUBLIC_KEY,
        "JWT_TOKEN_LOCATION": "headers",
        "JWT_HEADER_NAME": "Authorization",
        "JWT_HEADER_TYPE": "Bearer",
    }

    app = create_app(config)

    # Ensure the sql database has been put into the context.
    app.app_context().push()

    # Populate the database.
    sql_db.Model.metadata.create_all(sql_db.engine)

    # Register some users.
    username = "******"
    register_users([
        dict(username=username),
        dict(username="******"),
        dict(username="******", is_admin=True)
    ])

    # Add base URIs and update permissions
    for base_uri in ["s3://snow-white", "s3://mr-men"]:
        register_base_uri(base_uri)
        permissions = {
            "base_uri": base_uri,
            "users_with_search_permissions": [username],
            "users_with_register_permissions": [username]
        }
        update_permissions(permissions)

    # Add some data to the database.
    for base_uri in ["s3://snow-white", "s3://mr-men"]:
        uuid = "af6727bf-29c7-43dd-b42f-a5d7ede28337"
        uri = "{}/{}".format(base_uri, uuid)
        dataset_info = {
            "base_uri": base_uri,
            "type": "dataset",
            "uuid": uuid,
            "uri": uri,
            "name": "bad-apples",
            "readme": {
                "descripton": "apples from queen"
            },
            "manifest": {
                "dtoolcore_version": "3.7.0",
                "hash_function": "md5sum_hexdigest",
                "items": {
                    "e4cc3a7dc281c3d89ed4553293c4b4b110dc9bf3": {
                        "hash": "d89117c9da2cc34586e183017cb14851",
                        "relpath": "U00096.3.rev.1.bt2",
                        "size_in_bytes": 5741810,
                        "utc_timestamp": 1536832115.0
                    }
                }
            },
            "creator_username": "******",
            "frozen_at": 1536238185.881941,
            "annotations": {
                "type": "fruit"
            },
            "tags": ["evil", "fruit"],
        }
        register_dataset(dataset_info)

    base_uri = "s3://snow-white"
    uuid = "a2218059-5bd0-4690-b090-062faf08e046"
    uri = "{}/{}".format(base_uri, uuid)
    dataset_info = {
        "base_uri": base_uri,
        "type": "dataset",
        "uuid": uuid,
        "uri": uri,
        "name": "oranges",
        "readme": {
            "descripton": "oranges from queen"
        },
        "manifest": {
            "dtoolcore_version": "3.7.0",
            "hash_function": "md5sum_hexdigest",
            "items": {}
        },
        "creator_username": "******",
        "frozen_at": 1536238185.881941,
        "annotations": {
            "type": "fruit",
            "only_here": "crazystuff"
        },
        "tags": ["good", "fruit"],
    }
    register_dataset(dataset_info)

    @request.addfinalizer
    def teardown():
        mongo.cx.drop_database(tmp_mongo_db_name)
        sql_db.session.remove()

    return app.test_client()
Ejemplo n.º 8
0
def tmp_app(request):

    from dtool_lookup_server import create_app, mongo, sql_db
    from dtool_lookup_server.utils import (
        register_users,
        register_base_uri,
        register_dataset,
        update_permissions,
    )

    # Create temporary sqlite URI.
    d = tempfile.mkdtemp()
    sqlite_uri = randome_sqlite_uri(d)

    # Create temporary mongodb name.
    tmp_mongo_db_name = random_string()

    config = {
        "SECRET_KEY": "secret",
        "FLASK_ENV": "development",
        "SQLALCHEMY_DATABASE_URI": sqlite_uri,
        "MONGO_URI": "mongodb://localhost:27017/{}".format(tmp_mongo_db_name),
        "SQLALCHEMY_TRACK_MODIFICATIONS": False,
        "JWT_ALGORITHM": "RS256",
        "JWT_PUBLIC_KEY": JWT_PUBLIC_KEY,
        "JWT_TOKEN_LOCATION": "headers",
        "JWT_HEADER_NAME": "Authorization",
        "JWT_HEADER_TYPE": "Bearer",
    }

    app = create_app(config)

    # Ensure the sql database has been put into the context.
    app.app_context().push()

    # Populate the database.
    sql_db.Model.metadata.create_all(sql_db.engine)

    # Register some users.
    username = "******"
    register_users([
        dict(username=username),
        dict(username="******"),
    ])

    # Register base URIs and set permissions.
    base_uri_1 = "s3://snow-white"
    base_uri_2 = "s3://mr-men"
    for base_uri in [base_uri_1, base_uri_2]:
        register_base_uri(base_uri)
        permissions = {
            "base_uri": base_uri,
            "users_with_search_permissions": [username],
            "users_with_register_permissions": [],
        }
        update_permissions(permissions)

    dataset_info = generate_dataset_info(
        base_uri_1,
        "blue-shirt",
        {"color": "blue"}
    )
    register_dataset(dataset_info)

    dataset_info = generate_dataset_info(
        base_uri_2,
        "red-wavy-shirt",
        {
            "color": "red",
            "pattern": "wavy",
            "complex_ignored": ["lists", "are", "ignored"]
        }
    )
    register_dataset(dataset_info)

    dataset_info = generate_dataset_info(
        base_uri_1,
        "stripy-shirt",
        {
            "pattern": "stripey",
            "color": ["purple", "gray"]  # Complex data type so ignored
        }
    )
    register_dataset(dataset_info)

    dataset_info = generate_dataset_info(
        base_uri_1,
        "complex-shirt",
        {
            "pattern": ["lies", "circles"],  # Complex data type so ignored
            "color": ["purple", "gray"]  # Complex data type so ignored
        }
    )
    register_dataset(dataset_info)  # Whole dataset ignored by plugin.

    @request.addfinalizer
    def teardown():
        mongo.cx.drop_database(tmp_mongo_db_name)
        sql_db.session.remove()
        shutil.rmtree(d)

    return app.test_client()