Example #1
0
 def _validate_record(obj, eng):
     try:
         validate(obj.data, schema)
     except ValidationError:
         obj.extra_data['validation_errors'] = \
             get_validation_errors(obj.data, schema)
         obj.extra_data['callback_url'] = \
             get_resolve_validation_callback_url()
         obj.save()
         db.session.commit()
         reraise(*sys.exc_info())
Example #2
0
def test_results_from_jats():
    """Get and validate results from mocking a JATS response."""
    from scrapy.http import XmlResponse

    spider = aps_spider.APSSpider()
    fake_response = fake_response_from_file(
        'aps/PhysRevD.96.095036.xml',
        response_type=XmlResponse,
    )
    record = spider._parse_jats(fake_response).record
    assert validate(record, 'hep') == None
Example #3
0
def validate_record(obj, eng):
    """
    Validate record based on its schema.

    If there is no schema or the record is invalid, the workflow will be halted.
    """

    if '$schema' not in obj.data:
        __halt_and_notify('No schema found!', eng)
        return

    schema_data = requests_retry_session().get(obj.data['$schema']).content
    schema_data = json.loads(schema_data)

    try:
        validate(obj.data, schema_data)
    except ValidationError as err:
        __halt_and_notify('Invalid record: %s' % err, eng)
    except SchemaError as err:
        __halt_and_notify('SchemaError during record validation! %s' % err, eng)
Example #4
0
def test_convert_new_publication_info_to_old_handles_renamed_journals():
    schema = utils.load_schema('hep')
    subschema = schema['properties']['publication_info']

    publication_info = [{
        'artid': '525',
        'journal_title': 'Nucl.Phys.B Proc.Suppl.',
        'journal_volume': '118',
        'page_start': '525',
    }]
    assert utils.validate(publication_info, subschema) is None

    expected = [{
        'artid': '525',
        'journal_title': 'Nucl.Phys.Proc.Suppl.',
        'journal_volume': '118',
        'page_start': '525',
    }]
    result = utils.convert_new_publication_info_to_old(publication_info)

    assert utils.validate(result, subschema) is None
    assert expected == result
Example #5
0
def test_convert_new_publication_info_to_old():
    schema = utils.load_schema('hep')
    subschema = schema['properties']['publication_info']

    publication_info = [
        {
            'journal_title': 'Phys.Rev.C',
            'journal_volume': '48',
        },
    ]
    assert utils.validate(publication_info, subschema) is None

    expected = [
        {
            'journal_title': 'Phys.Rev.',
            'journal_volume': 'C48',
        },
    ]
    result = utils.convert_new_publication_info_to_old(publication_info)

    assert utils.validate(result, subschema) is None
    assert expected == result
Example #6
0
def test_convert_new_publication_info_to_old_handles_journals_with_already_a_letter():
    schema = utils.load_schema('hep')
    subschema = schema['properties']['publication_info']

    publication_info = [
        {
            'journal_title': 'Kumamoto J.Sci.Ser.A',
            'journal_volume': '13',
        },
    ]
    assert utils.validate(publication_info, subschema) is None

    expected = [
        {
            'journal_title': 'Kumamoto J.Sci.Ser.A',
            'journal_volume': '13',
        },
    ]
    result = utils.convert_new_publication_info_to_old(publication_info)

    assert utils.validate(result, subschema) is None
    assert expected == result
Example #7
0
def test_convert_old_publication_info_to_new_does_not_double_letters():
    schema = utils.load_schema('hep')
    subschema = schema['properties']['publication_info']

    publication_info = [
        {
            'journal_title': 'Proc.Roy.Soc.Lond.A',
            'journal_volume': 'A120',
        },
    ]
    assert utils.validate(publication_info, subschema) is None

    expected = [
        {
            'journal_title': 'Proc.Roy.Soc.Lond.A',
            'journal_volume': '120',
        },
    ]
    result = utils.convert_old_publication_info_to_new(publication_info)

    assert utils.validate(result, subschema) is None
    assert expected == result
def test_add_license_doesnt_overwrite_name_if_no_url():
    schema = load_schema('hep')
    subschema = schema['properties']['license']
    builder = LiteratureBuilder()
    builder.add_license(license='foo')

    result = builder.record['license']
    expected = [{
        'license': 'foo',
    }]

    assert validate(result, subschema) is None
    assert expected == result
def test_add_doi_normalizes_doi():
    schema = load_schema('hep')
    subschema = schema['properties']['dois']
    builder = LiteratureBuilder()
    builder.add_doi('doi.org/10.1234/foo')

    result = builder.record['dois']
    expected = [{
        'value': '10.1234/foo',
    }]

    assert validate(result, subschema) is None
    assert expected == result
Example #10
0
def test_convert_new_publication_info_to_old_handles_volumes_with_letters_in_the_middle():
    schema = utils.load_schema('hep')
    subschema = schema['properties']['publication_info']

    publication_info = [
        {
            'journal_title': 'Eur.Phys.J.A',
            'journal_volume': '28S1',
        },
    ]
    assert utils.validate(publication_info, subschema) is None

    expected = [
        {
            'journal_title': 'Eur.Phys.J.',
            'journal_volume': 'A28S1',
        },
    ]
    result = utils.convert_new_publication_info_to_old(publication_info)

    assert utils.validate(result, subschema) is None
    assert expected == result
Example #11
0
def test_convert_old_publication_info_to_new_handles_hidden_without_volume_variations():
    schema = utils.load_schema('hep')
    subschema = schema['properties']['publication_info']

    publication_info = [
        {
            'artid': 'R10587',
            'journal_record': {
                '$ref': 'http://localhost:5000/api/journals/1214516',
            },
            'journal_title': 'Phys.Rev.',
            'journal_volume': 'B61',
        },
        {
            'artid': '10587',
            'hidden': True,
            'journal_title': 'Phys.Rev.',
            'journal_volume': 'B61',
        },
    ]
    assert utils.validate(publication_info, subschema) is None

    expected = [
        {
            'artid': 'R10587',
            'journal_title': 'Phys.Rev.B',
            'journal_volume': '61',
        },
        {
            'artid': '10587',
            'hidden': True,
            'journal_title': 'Phys.Rev.B',
            'journal_volume': '61',
        },
    ]
    result = utils.convert_old_publication_info_to_new(publication_info)

    assert utils.validate(result, subschema) is None
    assert expected == result
Example #12
0
def test_convert_new_publication_info_to_old_handles_the_letter_in_proc_roy_soc_lond(
):
    schema = utils.load_schema('hep')
    subschema = schema['properties']['publication_info']

    publication_info = [
        {
            'journal_title': 'Proc.Roy.Soc.Lond.A',
            'journal_volume': '110',
        },
    ]
    assert utils.validate(publication_info, subschema) is None

    expected = [
        {
            'journal_title': 'Proc.Roy.Soc.Lond.',
            'journal_volume': 'A110',
        },
    ]
    result = utils.convert_new_publication_info_to_old(publication_info)

    assert utils.validate(result, subschema) is None
    assert expected == result
Example #13
0
def test_convert_old_publication_info_to_new_handles_year_added_to_volumes_when_no_journal_title(
):
    schema = utils.load_schema('hep')
    subschema = schema['properties']['publication_info']

    publication_info = [{
        'artid': '137',
        'journal_volume': '1709',
        'year': 2017,
        'page_start': '137',
    }]
    assert utils.validate(publication_info, subschema) is None

    expected = [{
        'artid': '137',
        'journal_volume': '1709',
        'year': 2017,
        'page_start': '137',
    }]
    result = utils.convert_old_publication_info_to_new(publication_info)

    assert utils.validate(result, subschema) is None
    assert expected == result
def test_publication_info_public_note():
    schema = load_schema('hep')
    subschema = schema['properties']['public_notes']
    builder = LiteratureBuilder(source="APS")
    builder.add_publication_info(journal_title="Phys. Rev. B")

    expected = [{
        'source': 'APS',
        'value': 'Submitted to Phys. Rev. B',
    }]
    result = builder.record['public_notes']

    assert validate(result, subschema) is None
    assert expected == result
    assert 'publication_info' not in builder.record
def test_make_author_handles_none_in_id_schema():
    schema = load_schema('hep')
    subschema = schema['properties']['authors']
    builder = LiteratureBuilder()

    result = builder.make_author(
        'Smith, John',
        ids=[(None, 'J.Smith.1')],
    )
    expected = {
        'full_name': 'Smith, John',
    }

    assert validate([result], subschema) is None
    assert expected == result
def test_add_keyword():
    schema = load_schema('hep')
    subschema = schema['properties']['keywords']
    builder = LiteratureBuilder(source='Publisher')
    builder.add_keyword('29.27.Fh', schema='PACS')

    result = builder.record['keywords']
    expected = [{
        'value': '29.27.Fh',
        'schema': 'PACS',
        'source': 'Publisher',
    }]

    assert validate(result, subschema) is None
    assert expected == result
def test_curate():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.curate()

    expected = [
        {'curated_relation': True},
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
Example #18
0
def test_convert_old_publication_info_to_new_does_not_double_letters_when_letter_with_volume(
):
    schema = utils.load_schema('hep')
    subschema = schema['properties']['publication_info']

    publication_info = [
        {
            'journal_title': 'Nucl.Phys.Proc.Suppl.',
            'journal_volume': 'B120',
        },
    ]

    assert utils.validate(publication_info, subschema) is None

    expected = [
        {
            'journal_title': 'Nucl.Phys.B Proc.Suppl.',
            'journal_volume': '120',
        },
    ]
    result = utils.convert_old_publication_info_to_new(publication_info)

    assert utils.validate(result, subschema) is None
    assert expected == result
Example #19
0
def test_add_external_system_identifier_kwargs():
    schema = load_schema('hep')
    subschema = schema['properties']['external_system_identifiers']
    builder = LiteratureBuilder()
    builder.add_external_system_identifier(schema='osti', extid='12345')

    result = builder.record['external_system_identifiers']
    expected = [
        {
            'value': '12345',
            'schema': 'osti',
        }
    ]

    assert validate(result, subschema) is None
    assert expected == result
Example #20
0
def test_make_author_sets_record():
    schema = load_schema('hep')
    subschema = schema['properties']['authors']
    builder = LiteratureBuilder()
    author_record = {'$ref': 'http://url/api/authors/1234'}
    result = builder.make_author(
        'Smith, John',
        record=author_record,
    )
    expected = {
        'full_name': 'Smith, John',
        'record': author_record,
    }

    assert validate([result], subschema) is None
    assert expected == result
Example #21
0
def test_add_url_adds_uid():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.add_url('10.1109/NSSMIC.2005.1596597')

    expected = [
        {
            'reference': {
                'dois': ['10.1109/NSSMIC.2005.1596597'],
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
def test_set_label():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.set_label('Abe et al, 2008')

    expected = [
        {
            'reference': {
                'label': 'Abe et al, 2008',
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
def test_add_uid_falls_back_to_isbn():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.add_uid('1449344852')

    expected = [
        {
            'reference': {
                'isbn': '9781449344856',
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
def test_set_pubnote_puts_incomplete_pubnote_in_misc():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.set_pubnote('Phys.Rev.,D43,')

    expected = [
        {
            'reference': {
                'misc': ['Phys.Rev.,D43,']
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
def test_add_uid_handles_arxiv_ids():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.add_uid('hep-th/0603001')

    expected = [
        {
            'reference': {
                'arxiv_eprint': 'hep-th/0603001',
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
def test_set_texkey():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.set_texkey('Aaij:2016qlz')

    expected = [
        {
            'reference': {
                'texkey': 'Aaij:2016qlz',
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
def test_set_pubnote_falls_back_to_misc():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.set_pubnote('not-a-valid-pubnote')

    expected = [
        {
            'reference': {
                'misc': ['not-a-valid-pubnote'],
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
Example #28
0
def test_pop_additional_pubnotes_several_pubnotes():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()
    builder.add_misc(
        "Additional pubnote: J.Improbable Testing,453,42-47 / some other stuff"
    )
    builder.add_misc("Additional pubnote: J.Testing,42,R477")

    expected = [
        {
            'reference': {
                'publication_info': {
                    'journal_title': 'J.Improbable Testing',
                    'journal_volume': '453',
                    'page_start': '42',
                    'page_end': '47'
                },
                'misc': [
                    'Additional pubnote split from previous reference',
                ],
            },
        },
        {
            'reference': {
                'publication_info': {
                    'journal_title': 'J.Testing',
                    'journal_volume': '42',
                    'page_start': 'R477',
                    'artid': 'R477'
                },
                'misc': [
                    'Additional pubnote split from previous reference',
                ],
            },
        },
    ]
    result = list(builder.pop_additional_pubnotes())

    assert validate(result, subschema) is None
    assert expected == result
    assert builder.obj['reference']['misc'] == ['some other stuff']
Example #29
0
def test_make_author():
    schema = load_schema('hep')
    subschema = schema['properties']['authors']
    builder = LiteratureBuilder()

    result = builder.make_author(
        'Smith, John',
        affiliations=['CERN', 'SLAC'],
        source='submitter',
        raw_affiliations=['CERN, 1211 Geneva', 'SLAC, Stanford'],
        emails=['*****@*****.**'],
        ids=[('INSPIRE BAI', 'J.Smith.1')],
        alternative_names=['Johnny Smith']
    )
    expected = {
        'full_name': 'Smith, John',
        'affiliations': [
            {'value': 'CERN'},
            {'value': 'SLAC'},
        ],
        'raw_affiliations': [
            {
                'value': 'CERN, 1211 Geneva',
                'source': 'submitter'
            },
            {
                'value': 'SLAC, Stanford',
                'source': 'submitter',
            }
        ],
        'emails': ['*****@*****.**'],
        'ids': [
            {
                'schema': 'INSPIRE BAI',
                'value': 'J.Smith.1',
            }
        ],
        'alternative_names': ['Johnny Smith'],
    }

    assert validate([result], subschema) is None
    assert expected == result
def test_add_collaboration():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.add_collaboration('ALICE')

    expected = [
        {
            'reference': {
                'collaborations': [
                    'ALICE',
                ],
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
def test_add_uid_rejects_invalid_isbns():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.add_uid('123456789')

    expected = [
        {
            'reference': {
                'misc': [
                    '123456789',
                ]
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
def test_add_uid_handles_cnums():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.add_uid('C87-11-11')

    expected = [
        {
            'reference': {
                'publication_info': {
                    'cnum': 'C87-11-11',
                },
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
def test_add_uid_handles_dois():
    schema = load_schema('hep')
    subschema = schema['properties']['references']

    builder = ReferenceBuilder()

    builder.add_uid('http://dx.doi.org/10.3972/water973.0145.db')

    expected = [
        {
            'reference': {
                'dois': [
                    '10.3972/water973.0145.db',
                ],
            },
        },
    ]
    result = [builder.obj]

    assert validate(result, subschema) is None
    assert expected == result
Example #34
0
 def _validate_record(obj, eng):
     validate(obj.data, schema)
Example #35
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match authors data model."""
    form_fields = copy.deepcopy(formdata)

    filter_empty_elements(
        form_fields,
        ['institution_history', 'advisors',
         'websites', 'experiments']
    )
    data = updateform.do(form_fields)

    # ======
    # Schema
    # ======
    if '$schema' not in data and '$schema' in obj.data:
        data['$schema'] = obj.data.get('$schema')

    if '$schema' in data and not data['$schema'].startswith('http'):
        data['$schema'] = url_for(
            'invenio_jsonschemas.get_schema',
            schema_path="records/{0}".format(data['$schema'])
        )

    author_name = ''

    if 'family_name' in form_fields and form_fields['family_name']:
        author_name = form_fields['family_name'].strip() + ', '
    if 'given_names' in form_fields and form_fields['given_names']:
        author_name += form_fields['given_names']

    if author_name:
        data.get('name', {})['value'] = author_name

    # Add comments to extra data
    if 'extra_comments' in form_fields and form_fields['extra_comments']:
        data.setdefault('_private_notes', []).append({
            'source': 'submitter',
            'value': form_fields['extra_comments']
        })

    data['stub'] = False

    # ==========
    # Submitter Info
    # ==========
    try:
        user_email = User.query.get(obj.id_user).email
    except AttributeError:
        user_email = ''
    try:
        orcid = UserIdentity.query.filter_by(
            id_user=obj.id_user,
            method='orcid'
        ).one().id
    except NoResultFound:
        orcid = ''
    data['acquisition_source'] = dict(
        email=user_email,
        datetime=datetime.datetime.utcnow().isoformat(),
        method="submitter",
        orcid=orcid,
        submission_number=str(obj.id),
        internal_uid=int(obj.id_user),
    )

    strip_empty_values(data)

    validate(data, 'authors')

    return data
Example #36
0
def map_old_record(record, dry_run):
    """
    Maps the given record if needed to comply with the new schema.

    Following fields will be mapped:
     - page_nr will be a list of integers instead of list of strings
     - arxiv id will be put to the arxiv_eprints field
     - arxiv categories will be added if not yet present
     - "arxiv:" prefix will be removed from arxiv id
     - record_creation_date will be converted to iso format

     Following fields will be deleted at the end of the process:
     - _collections
     - report_numbers
     - files
     - local_files
     - free_keywords
     - additional_files
     - file_urls
     - earliest_date

    The result won't be saved and None will be returned in the following cases:
     - the record doesn't contain a json
     - a record fails the validation after mapping
     - both report_numbers and arxiv_eprints fields are present (shouldn't happen in the existing records)
     - there is more then one value in report_numbers field (shouldn't happen in the existing records)
     - report_numbers field is present, but there is no source subfield
     - no record_creation_date is present
    """

    # if there is no json, the record is considered deleted
    if not record.json:
        rerror('no json', record)
        return

    # page_nr to list of integers
    if 'page_nr' in record.json:
        record.json['page_nr'] = [int(x) for x in record.json['page_nr']]

    # extract arxiv from report_numbers if present
    if "report_numbers" in record.json and "arxiv_eprints" in record.json:
        rerror('both report_numbers and arxiv_eprints are present. Skip record.', record)
        return

    if "report_numbers" in record.json:
        if len(record.json["report_numbers"]) > 1:
            rerror('report_numbers has more then one element. Skip record.', record)
            return

        arxiv_id = None
        for element in record.json.get("report_numbers", ()):
            source = element.get('source')
            if not source:
                rerror('report_numbers present, but no source. Skip record.', record)
                return

            if source.lower() == 'arxiv':
                arxiv_id = element.get('value')
                break

        if arxiv_id:
            arxiv_id = arxiv_id.lower().replace('arxiv:', '')
            record.json['arxiv_eprints'] = [{'value': arxiv_id}]
            rinfo('report_numbers -> arxiv_eprints', record)
        else:
            rerror('report_numbers present, but no arxiv id? Skip record.', record)
            return

    # add arxiv category if not yet present
    if "arxiv_eprints" in record.json:
        for element in record.json.get("arxiv_eprints", ()):
            if 'value' not in element:
                rerror('arxiv_eprints value missing', record)
                continue

            arxiv_id = element['value']

            # remove arxiv prefix if present
            if arxiv_id.lower().startswith('arxiv:'):
                rinfo('removing "arxiv:" prefix', record)
                arxiv_id = arxiv_id[len('arxiv:'):]

            if 'categories' not in element:
                categories = get_arxiv_categories(arxiv_id)
                element['categories'] = categories

    # record_creation_date to isoformat
    record_creation_date = record.json.get('record_creation_date')
    if record_creation_date is None:
        rerror('no record creation date. Skip record.', record)
        return

    new_date = parse_date(record_creation_date).isoformat()
    if new_date != record_creation_date:
        rinfo('update record_creation_date: %s -> %s' % (record_creation_date, new_date), record)
        record.json['record_creation_date'] = new_date

    # delete unwanted fields
    unwanted_fields = (
        '_collections',
        'report_numbers',
        'files',
        'local_files',
        'free_keywords',
        'additional_files',
        'file_urls',
        'earliest_date',
    )
    for key in unwanted_fields:
        if record.json.pop(key, None) is not None:
            rinfo('deleted %s field' % key, record)

    # validate record
    valid = False
    schema = record.json.get('$schema')
    if schema is not None:
        schema_data = requests_retry_session().get(schema).content
        schema_data = json.loads(schema_data)

        try:
            validate(record.json, schema_data)
            valid = True
        except ValidationError as err:
            rerror('Invalid record: %s' % err, record)
        except SchemaError as err:
            rerror('SchemaError during record validation! %s' % err, record)
    else:
        rerror('No schema found!', record)

    if not valid:
        return

    # mark changes if not dry_run
    if not dry_run:
        flag_modified(record, 'json')

    return record