Ejemplo n.º 1
0
def check_and_update_data_with_schema(data, ana):
    """
    Checks if the analysis type is included in the schema, or adds it. It also
    checks if the schema provided is valid.
    """
    schema = data.get('$schema')
    if not schema and not ana:
        click.secho(
            'You need to provide the --ana/-a parameter OR '
            'add the $schema field in your JSON',
            fg='red')
        return False

    try:
        if schema:
            if ana:
                click.secho(
                    "Your data already provide a $schema, --ana will not be used."
                )  # noqa
            resolve_schema_by_url(schema)
        elif ana:
            data['$schema'] = schema_name_to_url(ana)
        return True
    except JSONSchemaNotFound:
        click.secho('Provided schema is not a valid option.', fg='red')
        return False
def test_schema_name_to_url(db):
    db.session.add(Schema(name='my-schema', version='1.5.5'))
    db.session.add(Schema(name='my-schema', version='2.4.0'))
    db.session.add(Schema(name='my-schema', version='2.4.3'))
    db.session.add(Schema(name='different-schema', version='3.5.4'))
    db.session.commit()

    assert schema_name_to_url(
        'my-schema'
    ) == 'https://analysispreservation.cern.ch/schemas/deposits/records/my-schema-v2.4.3.json'
Ejemplo n.º 3
0
    def _preprocess_create_data(cls, data, uuid_, owner):
        """Preprocess metadata for new deposit.

        :param data: metadata, need to contain $schema|$ana_type field
        :type data: dict
        :param id_: specify a UUID to use for the new record, instead of
                    automatically generated
        :type id_: `uuid.UUID`
        :param owner: owner of a new deposit (will get all permissions)
        :type owner: `invenio_accounts.models.User`

        :returns: processed metadata dictionary
        :rtype: dict
         """
        if not isinstance(data, dict) or data == {}:
            raise DepositValidationError('Empty deposit data.')

        if '$ana_type' in data:
            try:
                ana_type = data.pop('$ana_type')
                data['$schema'] = schema_name_to_url(ana_type)
            except JSONSchemaNotFound:
                raise DepositValidationError(
                    f'Schema {ana_type} is not a valid deposit schema.')
        elif '$schema' not in data:
            raise DepositValidationError('Schema not specified.')

        try:
            schema = resolve_schema_by_url(data['$schema'])
            data['_experiment'] = schema.experiment
        except JSONSchemaNotFound:
            raise DepositValidationError(
                f'Schema {data["$schema"]} is not a valid deposit schema.')

        # minting is done by invenio on POST action preprocessing,
        # if method called programatically mint PID here
        if '_deposit' not in data:
            cls.deposit_minter(uuid_, data)

        if owner:
            data['_deposit']['owners'] = [owner.id]
            data['_deposit']['created_by'] = owner.id
            data['_access'] = {
                permission: {
                    'users': [owner.id],
                    'roles': []
                }
                for permission in DEPOSIT_ACTIONS
            }
        else:
            data['_deposit']['owners'] = []
            data['_access'] = copy.deepcopy(EMPTY_ACCESS_OBJECT)

        return data
Ejemplo n.º 4
0
    def _preprocess_data(cls, data):
        # data can be sent without specifying particular version of schema,
        # but just with a type, e.g. cms-analysis
        # this be resolved to the last version of deposit schema of this type
        if '$ana_type' in data:
            try:
                ana_type = data.pop('$ana_type')
                data['$schema'] = schema_name_to_url(ana_type)
            except JSONSchemaNotFound:
                raise DepositValidationError(
                    'Schema {} is not a valid deposit schema.'.format(
                        ana_type))

        return data
Ejemplo n.º 5
0
def validate(schema_url, ana_type, ana_version, compare_with, status, export,
             export_type):
    """
    Validate deposit or record metadata based on their schema. Provide the
    schema url OR ana-type and version, as well as the schema version that you
    want to compare the records you get, to. E.g.

    If you do not provide an ana-version, it will get the latest. If you do
    not provide a -c parameter, the records will compare the data to their
    own schema.

    cap fixtures validate -u https://analysispreservation.cern.ch/schemas/deposits/records/test-v2.0.0.json -c 1.0.0  # noqa
    cap fixtures validate -a test -c 1.0.0
    """
    try:
        if schema_url:
            schema = resolve_schema_by_url(schema_url)
        elif ana_type:
            schema = resolve_schema_by_name_and_version(ana_type, ana_version)
        else:
            raise click.UsageError(
                'You need to provide the ana-type or the schema-url.')
    except JSONSchemaNotFound:
        raise click.UsageError('Schema not found.')
    except ValueError:
        raise click.UsageError(
            'Version has to be passed as string <major>.<minor>.<patch>.')

    # differentiate between drafts/published
    from cap.modules.deposit.api import CAPDeposit
    if status == 'draft':
        search_path = 'deposits-records'
        cap_record_class = CAPDeposit
    else:
        search_path = 'records'
        cap_record_class = CAPRecord

    # get all the records for this specific schema/type combination
    records = current_search_client.search(
        search_path,
        q=f'_deposit.status: {status} AND '
        f'$schema: "{schema_name_to_url(schema.name, schema.version)}"',
        size=5000)['hits']['hits']
    pids = [rec['_id'] for rec in records]

    click.secho(f'{len(records)} record(s) of {schema.name} found.\n',
                fg='green')

    total_errors = []
    for pid in pids:
        cap_record = cap_record_class.get_record(pid)
        cap_record_pid = cap_record.get('_deposit', {}).get('id')
        cap_record_cadi_id = cap_record.get('basic_info', {}).get('cadi_id')
        cap_host = 'https://analysispreservation.cern.ch/drafts'
        # get the url of the schema version, used for validation
        if compare_with:
            cap_record['$schema'] = schema_name_to_url(schema.name,
                                                       compare_with)
        try:
            cap_record.validate()
            click.secho(f'No errors found in record {pid}', fg='green')
        except DepositValidationError as exc:
            if export_type == 'md':
                msg = '- [ ] Errors in **CADI ID:** ' + \
                    f'{cap_record_cadi_id or "?"}' + \
                    f' - **[link]({cap_host}/{cap_record_pid})** :\n'
                msg += "\n| Field Path | Error | \n| ---------- | ----- | \n"
                for err in exc.errors:
                    _err = err.res
                    msg += f"| ```{_err.get('field')}``` |"
                    msg += f" {_err.get('message')}  | \n"
                msg += "----\n"
            else:
                error_list = '\n'.join(str(err.res) for err in exc.errors)
                msg = f'Errors in {pid} - CADI ' + \
                      f'id: {cap_record_cadi_id or "?"}' + \
                      f' - {cap_host}/{cap_record_pid} :\n{error_list}'

            click.secho(msg, fg='red')

            if export:
                total_errors.append(msg)

    # export the errors in a file
    if export:
        with open(export, 'w') as out:
            out.writelines('\n\n'.join(err for err in total_errors))
        click.secho(f'Errors saved at {export}.', fg='red')
def test_schema_name_to_url_when_schema_doesnt_exist_raises_JSONSchemaNotFound(
        db):
    with raises(JSONSchemaNotFound):
        schema_name_to_url('non-existing-schema')