Esempio n. 1
0
def test_create_with_source_record_with_different_control_number(isolated_app):
    expected_file_content = 'dummy body'
    rec1_expected_key = '1_Fulltext.pdf'
    rec2_expected_key = '2_Fulltext.pdf'

    record1_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
        'documents': [{
            'key': 'Fulltext.pdf',
            'url': '/some/non/existing/path.pdf',
        }],
    }

    record2_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 2,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
    }

    record1 = InspireRecord.create(record1_json)
    rec1_file_content = open(
        record1.files[rec1_expected_key].obj.file.uri
    ).read()
    assert rec1_file_content == expected_file_content

    record2_json['documents'] = copy.deepcopy(record1['documents'])

    record2 = InspireRecord.create(record2_json, files_src_records=[record1])

    assert len(record2.files) == len(record2_json['documents'])
    assert len(record2['documents']) == len(record2_json['documents'])
    assert record2['documents'][0]['url'] != record1['documents'][0]['url']
    rec2_file_content = open(
        record2.files[rec2_expected_key].obj.file.uri
    ).read()
    assert rec2_file_content == expected_file_content
Esempio n. 2
0
def store_record(obj, eng):
    """Insert or replace a record."""
    is_update = obj.extra_data.get('is-update')
    is_authors = eng.workflow_definition.data_type == 'authors'

    if is_update:
        if not is_authors and not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER', False):
            obj.log.info(
                'skipping update record, feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is disabled.'
            )
            return

        record = InspireRecord.get_record(obj.extra_data['head_uuid'])
        obj.data['control_number'] = record['control_number']
        record.clear()
        record.update(obj.data, files_src_records=[obj])

    else:
        # Skip the files to avoid issues in case the record has already pid
        # TODO: remove the skip files once labs becomes master
        record = InspireRecord.create(obj.data, id_=None, skip_files=True)
        # Create persistent identifier.
        # Now that we have a recid, we can properly download the documents
        record.download_documents_and_figures(src_records=[obj])

        obj.data['control_number'] = record['control_number']
        # store head_uuid to store the root later
        obj.extra_data['head_uuid'] = str(record.id)

    record.commit()
    obj.save()
    db.session.commit()
def test_literature_citations_api_without_results(api_client):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 111,
        'titles': [
            {
                'title': 'Jessica Jones',
            },
        ],
        '_collections': ['Literature']
    }
    record = InspireRecord.create(record_json)
    record.commit()

    es.indices.refresh('records-hep')

    response = api_client.get(
        '/literature/111/citations',
        headers={'Accept': 'application/json'}
    )
    result = json.loads(response.get_data(as_text=True))

    expected_metadata = {
        "citation_count": 0,
        "citations": [],
    }

    assert response.status_code == 200
    assert expected_metadata == result['metadata']

    _delete_record('lit', 111)
Esempio n. 4
0
def test_create_handles_figures(isolated_app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],
        'figures': [{
            'key': 'graph.png',
            'url': '/afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037619/graph.png;1',
        }]  # record/1628455/export/xme
    }

    record = InspireRecord.create(record_json)
    expected_file_content = 'dummy body'
    expected_key = '1_graph.png'

    assert expected_key in record.files.keys
    assert len(record.files) == 1
    assert len(record['figures']) == len(record_json['figures'])
    file_content = open(record.files[expected_key].obj.file.uri).read()
    assert file_content == expected_file_content
Esempio n. 5
0
def test_create_with_skip_files_param_overrides_records_skip_files_conf_and_does_not_add_documents_or_figures(isolated_app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],
        'figures': [{
            'key': 'graph.png',
            'url': 'http://www.mdpi.com/2218-1997/3/1/24/png',
        }],
        'documents': [{
            'key': 'arXiv:1710.01187.pdf',
            'url': '/afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037619/content.pdf;1',
        }]  # record/1628455/export/xme -- with some modification
    }

    with patch.dict(isolated_app.config, {'RECORDS_SKIP_FILES': False}):
        record = InspireRecord.create(record_json, skip_files=True)

    assert len(record.files) == 0
    assert record['documents'] == record_json['documents']
    assert record['figures'] == record_json['figures']
Esempio n. 6
0
def test_record_with_non_valid_content_is_cleaned_and_created_properly(
        isolated_app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],
        # these two fields make the record not valid
        'documents': [],
        'urls': [
            {'url': ''},
        ],
        # record/1628455/export/xme -- with some modification
    }
    non_valid = False
    try:
        validate(record_json)
    except ValidationError:
        non_valid = True

    assert non_valid
    record = InspireRecord.create(record_json)
    validate(record)
 def test_new_record(self):
     recid = 9999912587
     record_json = {
         '$schema': 'http://localhost:5000/schemas/records/hep.json',
         'document_type': [
             'article',
         ],
         'control_number': recid,
         'titles': [
             {
                 'title': 'Jessica Jones',
             },
         ],
         '_collections': ['Literature'],
         'references': [{'record': {
             '$ref': 'http://localhost:5000/api/literature/1498589'}}]
     }
     inspire_record = InspireRecord.create(record_json)
     with override_config(FEATURE_FLAG_ENABLE_ORCID_PUSH=True,
                          FEATURE_FLAG_ORCID_PUSH_WHITELIST_REGEX='.*',
                          ORCID_APP_CREDENTIALS={'consumer_key': '0000-0001-8607-8906'}), \
             mock.patch('inspirehep.modules.records.receivers.push_access_tokens') as mock_push_access_tokens, \
             mock.patch('inspirehep.modules.orcid.tasks.orcid_push.apply_async') as mock_apply_async:
         mock_push_access_tokens.get_access_tokens.return_value = [('myorcid', 'mytoken')]
         inspire_record.commit()
         mock_apply_async.assert_called_once_with(
             kwargs={'orcid': 'myorcid',
                     'oauth_token': 'mytoken',
                     'kwargs_to_pusher': {'record_db_version': inspire_record.model.version_id},
                     'rec_id': recid},
             queue='orcid_push')
     _delete_record('lit', recid)
def test_creating_deleted_record_and_undeleting_created_record_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        'deleted': True,
        '_collections': ['Literature']
    }

    # When a record is created in the DB with deleted flag True, it is not created in ES.

    record = InspireRecord.create(json)
    record.commit()
    db.session.commit()
    with pytest.raises(NotFoundError):
        search.get_source(record.id)

    # When a record is undeleted, it is created in ES.
    record['deleted'] = False
    record.commit()
    db.session.commit()
    search.get_source(record.id)
    record._delete(force=True)
def test_deleting_record_triggers_delete_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': ['Literature']
    }

    # When a record is created in the DB, it is also created in ES.

    record = InspireRecord.create(json)
    record.commit()
    db.session.commit()
    search.get_source(record.id)

    # When a record is updated with deleted flag true, it is deleted in ES
    record['deleted'] = True
    record.commit()
    db.session.commit()
    with pytest.raises(NotFoundError):
        search.get_source(record.id)
Esempio n. 10
0
def _create_and_index_record(record):
    record = InspireRecord.create(record)
    inspire_recid_minter(record.id, record)
    db.session.commit()
    es.indices.refresh('records-hep')

    return record
def test_that_db_changes_are_mirrored_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
    }

    # When a record is created in the DB, it is also created in ES.

    record = InspireRecord.create(json)
    es_record = search.get_source(record.id)

    assert get_title(es_record) == 'foo'

    # When a record is updated in the DB, is is also updated in ES.

    record['titles'][0]['title'] = 'bar'
    record.commit()
    es_record = search.get_source(record.id)

    assert get_title(es_record) == 'bar'

    # When a record is deleted in the DB, it is also deleted in ES.

    record._delete(force=True)

    with pytest.raises(NotFoundError):
        es_record = search.get_source(record.id)
Esempio n. 12
0
def test_download_local_file(isolated_app):
    with NamedTemporaryFile(suffix=';1') as temp_file:
        file_location = 'file://{0}'.format(quote(temp_file.name))
        file_name = os.path.basename(temp_file.name)
        data = {
            '$schema': 'http://localhost:5000/schemas/records/hep.json',
            '_collections': [
                'Literature'
            ],
            'document_type': [
                'article'
            ],
            'titles': [
                {
                    'title': 'h'
                },
            ],
            'documents': [
                {
                    'key': file_name,
                    'url': file_location,
                },
            ],
        }

        record = InspireRecord.create(data)

        documents = record['documents']
        files = record['_files']

        assert 1 == len(documents)
        assert 1 == len(files)
Esempio n. 13
0
def record_insert_or_replace(json, skip_files=False):
    """Insert or replace a record."""
    pid_type = get_pid_type_from_schema(json['$schema'])
    control_number = json['control_number']

    try:
        pid = PersistentIdentifier.get(pid_type, control_number)
        record = InspireRecord.get_record(pid.object_uuid)
        record.clear()
        record.update(json, skip_files=skip_files)
        if json.get('legacy_creation_date'):
            record.model.created = datetime.strptime(json['legacy_creation_date'], '%Y-%m-%d')
        record.commit()
    except PIDDoesNotExistError:
        record = InspireRecord.create(json, id_=None, skip_files=skip_files)
        if json.get('legacy_creation_date'):
            record.model.created = datetime.strptime(json['legacy_creation_date'], '%Y-%m-%d')
        inspire_recid_minter(str(record.id), json)

    if json.get('deleted'):
        new_recid = get_recid_from_ref(json.get('new_record'))
        if not new_recid:
            record.delete()

    return record
Esempio n. 14
0
def test_create_does_not_save_zombie_identifiers_if_record_creation_fails(isolated_app):
    invalid_record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': [
            'Literature',
        ],
        'control_number': 1936477,
    }

    with pytest.raises(ValidationError):
        InspireRecord.create(invalid_record)

    record_identifier = RecordIdentifier.query.filter_by(recid=1936477).one_or_none()
    persistent_identifier = PersistentIdentifier.query.filter_by(pid_value='1936477').one_or_none()

    assert not record_identifier
    assert not persistent_identifier
def test_index_after_commit_indexes_raises_if_cited_records_are_not_in_db(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Record citing the first one'}],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [
            {"reference": {'authors': [{'full_name': 'Smith, J.'}]}}
        ]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 2
    mocked_indexing_task.assert_called_with(*expected_args)

    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    references = {
        'references': [
            {
                "curated_relation": False,
                "record": {
                    "$ref": "http://localhost:5000/api/literature/9999"
                },
                "reference": {
                    'authors': [{'full_name': 'Smith, J.'}],
                }
            }
        ]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 3)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    with pytest.raises(MissingCitedRecordError):
        index_modified_citations_from_record(*expected_args)

    _delete_record('lit', 8888)
Esempio n. 16
0
def test_selecting_2_facets_generates_search_with_must_query(api_client):
    record_json = {
        'control_number': 843386527,
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Article 1'}],
        '_collections': ['Literature'],
        'authors': [{'full_name': 'John Doe'}]
    }

    rec = InspireRecord.create(data=record_json)
    rec.commit()

    record_json2 = {
        'control_number': 843386521,
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Article 2'}],
        '_collections': ['Literature'],
        'authors': [{'full_name': 'John Doe'}, {'full_name': 'John Doe2'}]
    }

    rec2 = InspireRecord.create(data=record_json2)
    rec2.commit()

    db.session.commit()
    es.indices.refresh('records-hep')

    response = api_client.get('/literature?q=&author=BAI_John%20Doe')
    data = json.loads(response.data)
    response_recids = [record['metadata']['control_number'] for record in data['hits']['hits']]
    assert rec['control_number'] in response_recids
    assert rec2['control_number'] in response_recids

    response = api_client.get('/literature?q=&author=BAI_John%20Doe&author=BAI_John%20Doe2')
    data = json.loads(response.data)
    response_recids = [record['metadata']['control_number'] for record in data['hits']['hits']]
    assert rec['control_number'] not in response_recids
    assert rec2['control_number'] in response_recids

    _delete_record('lit', 843386527)
    _delete_record('lit', 843386521)
    db.session.commit()
def dummy_record(workflow_app):
    record = InspireRecord.create({
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'document_type': ['thesis'],
        'titles': [{'title': 'foo'}],
    })

    yield record

    record._delete(force=True)
Esempio n. 18
0
def record_to_merge(workflow_app):
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': [
            'Literature'
        ],
        'authors': [
            {
                'full_name': 'Jessica, Jones',
            },
        ],
        'document_type': [
            'thesis'
        ],
        'number_of_pages': 100,
        'preprint_date': '2016-11-16',
        'public_notes': [
            {
                'source': 'arXiv',
                'value': '100 pages, 36 figures'
            }
        ],
        'titles': [
            {
                'title': 'Alias Investigations'
            }
        ],
        'dois': [
            {
                'value': '10.1007/978-3-319-15001-7'
            }
        ],
    }
    record = InspireRecord.create(json, id_=None, skip_files=True)
    record.commit()
    rec_uuid = record.id

    db.session.commit()
    es.indices.refresh('records-hep')

    yield record

    record = InspireRecord.get_record(rec_uuid)
    pid = PersistentIdentifier.get(
        pid_type='lit',
        pid_value=record['control_number']
    )

    pid.unassign()
    pid.delete()
    record.delete()
    record.commit()
def dummy_record(workflow_app):
    record = InspireRecord.create({
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'document_type': ['thesis'],
        'titles': [{
            'title': 'foo'
        }],
    })

    yield record

    record._delete(force=True)
Esempio n. 20
0
def test_create_with_skip_files_param_overrides_records_skip_files_conf_and_does_add_documents_or_figures(app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],
        'figures': [{
            'key': 'graph.png',
            'url': 'http://www.mdpi.com/2218-1997/3/1/24/png',
        }],
        'documents': [{
            'key': 'arXiv:1710.01187.pdf',
            'url': '/afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037619/content.pdf;1',
        }]  # record/1628455/export/xme -- with some modification
    }
    expected_document_file_content = 'dummy body'
    expected_document_key = '1_graph.png'

    expected_figure_file_content = 'dummy body'
    expected_figure_key = '1_graph.png'

    with patch.dict(app.config, {'RECORDS_SKIP_FILES': True}):
        with patch(
            'inspirehep.modules.records.api.fsopen',
            mock_open(read_data=expected_figure_file_content),
        ):
            record = InspireRecord.create(record_json, skip_files=False)

    assert len(record.files) == 2

    assert expected_document_key in record.files.keys
    assert len(record['documents']) == len(record_json['documents'])
    document_file_content = open(
        record.files[expected_document_key].obj.file.uri
    ).read()
    assert document_file_content == expected_document_file_content

    assert expected_figure_key in record.files.keys
    assert len(record['figures']) == len(record_json['figures'])
    figure_file_content = open(
        record.files[expected_figure_key].obj.file.uri
    ).read()
    assert figure_file_content == expected_figure_file_content
Esempio n. 21
0
def test_create_with_skip_files_param_overrides_records_skip_files_conf_and_does_add_documents_or_figures(isolated_app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],
        'figures': [{
            'key': 'graph.png',
            'url': 'http://www.mdpi.com/2218-1997/3/1/24/png',
        }],
        'documents': [{
            'key': 'arXiv:1710.01187.pdf',
            'url': '/afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037619/content.pdf;1',
        }]  # record/1628455/export/xme -- with some modification
    }
    expected_document_file_content = 'dummy body'
    expected_document_key = '1_graph.png'

    expected_figure_file_content = 'dummy body'
    expected_figure_key = '1_graph.png'

    with patch.dict(isolated_app.config, {'RECORDS_SKIP_FILES': True}):
        with patch(
            'inspirehep.modules.records.api.fsopen',
            mock_open(read_data=expected_figure_file_content),
        ):
            record = InspireRecord.create(record_json, skip_files=False)

    assert len(record.files) == 2

    assert expected_document_key in record.files.keys
    assert len(record['documents']) == len(record_json['documents'])
    document_file_content = open(
        record.files[expected_document_key].obj.file.uri
    ).read()
    assert document_file_content == expected_document_file_content

    assert expected_figure_key in record.files.keys
    assert len(record['figures']) == len(record_json['figures'])
    figure_file_content = open(
        record.files[expected_figure_key].obj.file.uri
    ).read()
    assert figure_file_content == expected_figure_file_content
Esempio n. 22
0
def record_from_db(workflow_app):
    json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'document_type': ['article'],
        'titles': [{
            'title': 'Fancy title for a new record'
        }],
        'arxiv_eprints': [{
            'categories': ['hep-th'],
            'value': '1407.7587'
        }],
        'control_number':
        1234,
        'authors': [
            {
                'full_name': 'Maldacena, J.'
            },
            {
                'full_name': 'Strominger, A.'
            },
        ],
        'abstracts': [{
            'source': 'arxiv',
            'value': 'A basic abstract.'
        }],
        'report_numbers': [{
            'value': 'DESY-17-036'
        }]
    }
    record = InspireRecord.create(json, id_=None, skip_files=True)
    record.commit()
    rec_uuid = record.id

    db.session.commit()
    es.indices.refresh('records-hep')

    yield record

    record = InspireRecord.get_record(rec_uuid)
    pid = PersistentIdentifier.get(pid_type='lit',
                                   pid_value=record['control_number'])

    pid.unassign()
    pid.delete()
    record.delete()
    record.commit()
Esempio n. 23
0
def store_record(obj, eng):
    """Insert or replace a record."""
    def _get_updated_record(obj):
        """TODO: use only head_uuid once we have the merger."""
        if 'head_uuid' in obj.extra_data:
            updated_record = InspireRecord.get_record(
                obj.extra_data['head_uuid'], )
        else:
            pid_type = get_pid_type_from_schema(obj.data['$schema'])
            updated_record_id = obj.extra_data['matches']['approved']
            updated_record = get_db_record(pid_type, updated_record_id)

        return updated_record

    is_update = obj.extra_data.get('is-update')
    is_authors = eng.workflow_definition.data_type == 'authors'

    if is_update:
        if not is_authors and not current_app.config.get(
                'FEATURE_FLAG_ENABLE_MERGER', False):
            obj.log.info(
                'skipping update record, feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is disabled.'
            )
            return

        record = _get_updated_record(obj)
        obj.data['control_number'] = record['control_number']
        record.clear()
        record.update(obj.data, files_src_records=[obj])

    else:
        # Skip the files to avoid issues in case the record has already pid
        # TODO: remove the skip files once labs becomes master
        record = InspireRecord.create(obj.data, id_=None, skip_files=True)
        # Create persistent identifier.
        # Now that we have a recid, we can properly download the documents
        record.download_documents_and_figures(src_records=[obj])

        obj.data['control_number'] = record['control_number']
        # store head_uuid to store the root later
        obj.extra_data['head_uuid'] = str(record.id)

    record.commit()
    obj.save()
    db.session.commit()
Esempio n. 24
0
def test_update_handles_figures(app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'foo'
            },
        ],
        '_collections': ['Literature'],  # DESY harvest
    }

    update_to_record = {
        'figures': [{
            'key': 'graph.png',
            'url': 'http://www.mdpi.com/2218-1997/3/1/24/png',
        }],
    }

    expected_file_content = 'dummy body'
    expected_key = '1_graph.png'

    record = InspireRecord.create(record_json)
    assert not len(record.files)

    record.clear()
    updated_json = record_json
    updated_json.update(copy.deepcopy(update_to_record))

    mocked_addresses = [{
        'method': 'GET',
        'url': 'http://www.mdpi.com/2218-1997/3/1/24/png',
        'body': StringIO.StringIO(expected_file_content),
    }]
    with mock_addresses(mocked_addresses):
        record.update(updated_json)

    assert expected_key in record.files.keys
    assert len(record.files) == len(update_to_record['figures'])
    assert len(record['figures']) == len(update_to_record['figures'])
    file_content = open(record.files[expected_key].obj.file.uri).read()
    assert file_content == expected_file_content
Esempio n. 25
0
def test_records_files_attached_correctly(app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ]
    }

    record = InspireRecord.create(record_json)
    record.files['fulltext.pdf'] = StringIO.StringIO()
    record.commit()

    assert 'fulltext.pdf' in record.files
Esempio n. 26
0
def test_receive_after_model_commit(app):
    """Test if records are correctly synced with ElasticSearch."""
    json = {
        "$schema": "http://localhost:5000/schemas/records/hep.json",
        "Hello": "World"
    }
    record = InspireRecord.create(json)
    search = LiteratureSearch()
    es_record = search.get_source(record.id)
    assert es_record["Hello"] == "World"

    record["Hello"] = "INSPIRE"
    record.commit()
    es_record = search.get_source(record.id)
    assert es_record["Hello"] == "INSPIRE"

    record._delete(force=True)
    with pytest.raises(NotFoundError):
        es_record = search.get_source(record.id)
Esempio n. 27
0
def test_update_handles_documents(app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
    }

    update_to_record = {
        'documents': [{
            'key': 'Fulltext.pdf',
            'url': 'http://www.mdpi.com/2218-1997/3/1/24/pdf',
        }],
    }

    expected_file_content = 'dummy body'
    expected_key = '1_Fulltext.pdf'

    record = InspireRecord.create(record_json)
    assert not len(record.files)

    record.clear()
    updated_json = record_json
    updated_json.update(copy.deepcopy(update_to_record))

    with patch(
        'inspirehep.modules.records.api.fsopen',
        mock_open(read_data=expected_file_content),
    ):
        record.update(updated_json)

    assert expected_key in record.files.keys
    assert len(record.files) == len(update_to_record['documents'])
    assert len(record['documents']) == len(update_to_record['documents'])
    file_content = open(record.files[expected_key].obj.file.uri).read()
    assert file_content == expected_file_content
Esempio n. 28
0
def test_update_handles_figures(isolated_app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
    }

    update_to_record = {
        'figures': [{
            'key': 'graph.png',
            'url': 'http://www.mdpi.com/2218-1997/3/1/24/png',
        }],
    }

    expected_file_content = 'dummy body'
    expected_key = '1_graph.png'

    record = InspireRecord.create(record_json)
    assert not len(record.files)

    record.clear()
    updated_json = record_json
    updated_json.update(copy.deepcopy(update_to_record))

    with patch(
        'inspirehep.modules.records.api.fsopen',
        mock_open(read_data=expected_file_content),
    ):
        record.update(updated_json)

    assert expected_key in record.files.keys
    assert len(record.files) == len(update_to_record['figures'])
    assert len(record['figures']) == len(update_to_record['figures'])
    file_content = open(record.files[expected_key].obj.file.uri).read()
    assert file_content == expected_file_content
Esempio n. 29
0
def test_records_files_attached_correctly(isolated_app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ]
    }

    record = InspireRecord.create(record_json)
    record.files['fulltext.pdf'] = StringIO.StringIO()
    record.commit()

    assert 'fulltext.pdf' in record.files
Esempio n. 30
0
 def test_new_record(self):
     recid = 9999912587
     record_json = {
         '$schema':
         'http://localhost:5000/schemas/records/hep.json',
         'document_type': [
             'article',
         ],
         'control_number':
         recid,
         'titles': [
             {
                 'title': 'Jessica Jones',
             },
         ],
         '_collections': ['Literature'],
         'references': [{
             'record': {
                 '$ref': 'http://localhost:5000/api/literature/1498589'
             }
         }]
     }
     inspire_record = InspireRecord.create(record_json)
     with override_config(FEATURE_FLAG_ENABLE_ORCID_PUSH=True,
                          FEATURE_FLAG_ORCID_PUSH_WHITELIST_REGEX='.*',
                          ORCID_APP_CREDENTIALS={'consumer_key': '0000-0001-8607-8906'}), \
             mock.patch('inspirehep.modules.records.receivers.push_access_tokens') as mock_push_access_tokens, \
             mock.patch('inspirehep.modules.orcid.tasks.orcid_push.apply_async') as mock_apply_async:
         mock_push_access_tokens.get_access_tokens.return_value = [
             ('myorcid', 'mytoken')
         ]
         inspire_record.commit()
         mock_apply_async.assert_called_once_with(kwargs={
             'orcid': 'myorcid',
             'oauth_token': 'mytoken',
             'kwargs_to_pusher': {
                 'record_db_version': inspire_record.model.version_id
             },
             'rec_id': recid
         },
                                                  queue='orcid_push')
     _delete_record('lit', recid)
Esempio n. 31
0
def record_to_merge(workflow_app):
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'authors': [
            {
                'full_name': 'Jessica, Jones',
            },
        ],
        'document_type': ['thesis'],
        'number_of_pages': 100,
        'preprint_date': '2016-11-16',
        'public_notes': [{
            'source': 'arXiv',
            'value': '100 pages, 36 figures'
        }],
        'titles': [{
            'title': 'Alias Investigations'
        }],
        'dois': [{
            'value': '10.1007/978-3-319-15001-7'
        }],
    }
    record = InspireRecord.create(json, id_=None, skip_files=True)
    record.commit()
    rec_uuid = record.id

    db.session.commit()
    es.indices.refresh('records-hep')

    yield record

    record = InspireRecord.get_record(rec_uuid)
    pid = PersistentIdentifier.get(pid_type='lit',
                                   pid_value=record['control_number'])

    pid.unassign()
    pid.delete()
    record.delete()
    record.commit()
def test_that_db_changes_are_mirrored_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'foo'
            },
        ],
        '_collections': ['Literature']
    }

    # When a record is created in the DB, it is also created in ES.

    record = InspireRecord.create(json)
    record.commit()
    db.session.commit()
    es_record = search.get_source(record.id)

    assert get_title(es_record) == 'foo'

    # When a record is updated in the DB, is is also updated in ES.

    record['titles'][0]['title'] = 'bar'
    record.commit()
    db.session.commit()
    es_record = search.get_source(record.id)

    assert get_title(es_record) == 'bar'

    # When a record is deleted in the DB, it is also deleted in ES.

    record._delete(force=True)
    db.session.commit()

    with pytest.raises(NotFoundError):
        es_record = search.get_source(record.id)
Esempio n. 33
0
def record_insert_or_replace(json):
    """Insert or replace a record."""
    control_number = json.get('control_number', json.get('recid'))
    if control_number:
        pid_type = get_pid_type_from_schema(json['$schema'])
        try:
            pid = PersistentIdentifier.get(pid_type, control_number)
            record = InspireRecord.get_record(pid.object_uuid)
            record.clear()
            record.update(json)
            record.commit()
        except PIDDoesNotExistError:
            record = InspireRecord.create(json, id_=None)
            # Create persistent identifier.
            inspire_recid_minter(str(record.id), json)

        if json.get('deleted'):
            new_recid = get_recid_from_ref(json.get('new_record'))
            if not new_recid:
                record.delete()

        return record
Esempio n. 34
0
def store_record(obj, eng):
    """Insert or replace a record."""
    is_update = obj.extra_data.get('is-update')
    if is_update:
        record = InspireRecord.get_record(obj.extra_data['head_uuid'])
        record.clear()
        record.update(obj.data, files_src_records=[obj])

    else:
        record = InspireRecord.create(obj.data, id_=None)
        # Create persistent identifier.
        created_pid = inspire_recid_minter(str(record.id), record).pid_value
        # Now that we have a recid, we can properly download the documents
        record.download_documents_and_figures(src_records=[obj])

        obj.data['control_number'] = created_pid
        # store head_uuid to store the root later
        obj.extra_data['head_uuid'] = str(record.id)

    record.commit()
    obj.save()
    db.session.commit()
Esempio n. 35
0
def record_from_db(workflow_app):
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'document_type': ['article'],
        'titles': [{'title': 'Fancy title for a new record'}],
        'arxiv_eprints': [
            {'categories': ['hep-th'], 'value': '1407.7587'}
        ],
        'control_number': 1234,
        'authors': [
            {'full_name': 'Maldacena, J.'},
            {'full_name': 'Strominger, A.'},
        ],
        'abstracts': [
            {'source': 'arxiv', 'value': 'A basic abstract.'}
        ],
        'report_numbers': [{'value': 'DESY-17-036'}]
    }
    record = InspireRecord.create(json, id_=None, skip_files=True)
    record.commit()
    rec_uuid = record.id

    db.session.commit()
    es.indices.refresh('records-hep')

    yield record

    record = InspireRecord.get_record(rec_uuid)
    pid = PersistentIdentifier.get(
        pid_type='lit',
        pid_value=record['control_number']
    )

    pid.unassign()
    pid.delete()
    record.delete()
    record.commit()
Esempio n. 36
0
def store_record(obj, eng):
    """Insert or replace a record."""
    def _get_updated_record(obj):
        """TODO: use only head_uuid once we have them merger."""
        if 'head_uuid' in obj.extra_data:
            updated_record = InspireRecord.get_record(
                obj.extra_data['head_uuid'],
            )
        else:
            pid_type = get_pid_type_from_schema(obj.data['$schema'])
            updated_record_id = obj.extra_data['record_matches'][0]
            updated_record = get_db_record(pid_type, updated_record_id)

        return updated_record

    is_update = obj.extra_data.get('is-update')
    if is_update:
        record = _get_updated_record(obj)
        obj.data['control_number'] = record['control_number']
        record.clear()
        record.update(obj.data, files_src_records=[obj])

    else:
        # Skip the files to avoid issues in case the record has already pid
        # TODO: remove the skip files once labs becomes master
        record = InspireRecord.create(obj.data, id_=None, skip_files=True)
        # Create persistent identifier.
        created_pid = inspire_recid_minter(str(record.id), record).pid_value
        # Now that we have a recid, we can properly download the documents
        record.download_documents_and_figures(src_records=[obj])

        obj.data['control_number'] = created_pid
        # store head_uuid to store the root later
        obj.extra_data['head_uuid'] = str(record.id)

    record.commit()
    obj.save()
    db.session.commit()
Esempio n. 37
0
def store_record(obj, eng):
    """Insert or replace a record."""
    is_update = obj.extra_data.get('is-update')
    is_authors = eng.workflow_definition.data_type == 'authors'
    if not current_app.config.get(
            "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT"):
        with db.session.begin_nested():
            if is_update:
                if not is_authors and not current_app.config.get(
                        'FEATURE_FLAG_ENABLE_MERGER', False):
                    obj.log.info(
                        'skipping update record, feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is disabled.'
                    )
                    return

                record = InspireRecord.get_record(obj.extra_data['head_uuid'])
                obj.data['control_number'] = record['control_number']
                record.clear()
                record.update(obj.data, files_src_records=[obj])

            else:
                # Skip the files to avoid issues in case the record has already pid
                # TODO: remove the skip files once labs becomes master
                record = InspireRecord.create(obj.data,
                                              id_=None,
                                              skip_files=True)
                # Create persistent identifier.
                # Now that we have a recid, we can properly download the documents
                record.download_documents_and_figures(src_records=[obj])

                obj.data['control_number'] = record['control_number']
                # store head_uuid to store the root later
                obj.extra_data['head_uuid'] = str(record.id)

            record.commit()
            obj.save()
    else:
        store_record_inspirehep_api(obj, eng, is_update, is_authors)
Esempio n. 38
0
def store_record(obj, *args, **kwargs):
    """Create and index new record in main record space."""
    obj.log.debug('Storing record: \n%s', pformat(obj.data))

    assert "$schema" in obj.data, "No $schema attribute found!"

    # Create record
    # FIXME: Do some preprocessing of obj.data before creating a record so that
    # we're sure that the schema will be validated without touching the full
    # holdingpen stack.
    record = InspireRecord.create(obj.data, id_=None)

    # Create persistent identifier.
    inspire_recid_minter(str(record.id), record)

    # Commit any changes to record
    record.commit()

    # Dump any changes to record
    obj.data = record.dumps()

    # Commit to DB before indexing
    db.session.commit()
Esempio n. 39
0
def store_record(obj, *args, **kwargs):
    """Create and index new record in main record space."""
    obj.log.debug('Storing record: \n%s', pformat(obj.data))

    assert "$schema" in obj.data, "No $schema attribute found!"

    # Create record
    # FIXME: Do some preprocessing of obj.data before creating a record so that
    # we're sure that the schema will be validated without touching the full
    # holdingpen stack.
    record = InspireRecord.create(obj.data, id_=None)

    # Create persistent identifier.
    inspire_recid_minter(str(record.id), record)

    # Commit any changes to record
    record.commit()

    # Dump any changes to record
    obj.data = record.dumps()

    # Commit to DB before indexing
    db.session.commit()
Esempio n. 40
0
def store_record(obj, eng):
    """Insert or replace a record."""
    def _get_updated_record(obj):
        """TODO: use only head_uuid once we have them merger."""
        if 'head_uuid' in obj.extra_data:
            updated_record = InspireRecord.get_record(
                obj.extra_data['head_uuid'], )
        else:
            pid_type = get_pid_type_from_schema(obj.data['$schema'])
            updated_record_id = obj.extra_data['record_matches'][0]
            updated_record = get_db_record(pid_type, updated_record_id)

        return updated_record

    is_update = obj.extra_data.get('is-update')
    if is_update:
        record = _get_updated_record(obj)
        obj.data['control_number'] = record['control_number']
        record.clear()
        record.update(obj.data, files_src_records=[obj])

    else:
        # Skip the files to avoid issues in case the record has already pid
        # TODO: remove the skip files once labs becomes master
        record = InspireRecord.create(obj.data, id_=None, skip_files=True)
        # Create persistent identifier.
        created_pid = inspire_recid_minter(str(record.id), record).pid_value
        # Now that we have a recid, we can properly download the documents
        record.download_documents_and_figures(src_records=[obj])

        obj.data['control_number'] = created_pid
        # store head_uuid to store the root later
        obj.extra_data['head_uuid'] = str(record.id)

    record.commit()
    obj.save()
    db.session.commit()
def test_record_enhanced_in_es_and_not_enhanced_in_db(app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 111,
        'titles': [
            {
                'title': 'Jessica Jones',
            },
        ],
        '_collections': ['Literature'],
        'references': [{'record': {'$ref': 'http://localhost:5000/api/literature/1498589'}}]
    }
    record = InspireRecord.create(record_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')
    rec1 = get_db_record('lit', 111)
    rec2 = get_es_record('lit', 111)
    assert 'facet_author_name' not in rec1
    assert 'facet_author_name' in rec2
    _delete_record('lit', 111)
Esempio n. 42
0
def test_regression_author_count_10_does_not_display_zero_facet(isolated_api_client):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Article with 10 authors'}],
        '_collections': ['Literature'],
        'authors': []
    }
    for i in range(10):
        record_json['authors'].append({'full_name': 'Pincopallino' + str(i)})

    rec = InspireRecord.create(data=record_json)
    rec.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    response_facets = isolated_api_client.get('/literature/facets?q=ac%2010')
    response_records = isolated_api_client.get('/literature?q=ac%2010')
    # we don't have isolation on tests and are inconsistent between test
    # environments.
    data_facets = json.loads(response_facets.data)
    data_records = json.loads(response_records.data)

    assert data_facets['aggregations']['author_count']['buckets'][0]['doc_count'] == data_records['hits']['total']
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.

    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This is the record being cited'
        }],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited = InspireRecord.create(data=json_data, skip_files=True)
    cited.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', 9999, 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert get_citations_from_es(es_rec).total == 0

    citing_json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number':
        8888,
        'references': [{
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9999'
            },
            'reference': {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)

    # execute mocked task pretending record is not committed yet to DB
    _delete_record('lit', record['control_number'])
    with pytest.raises(RecordGetterError):
        # XXX: celery in eager mode does not retry, so it raises the first time
        index_modified_citations_from_record(*expected_args)

    _delete_record('lit', cited['control_number'])
def test_index_after_commit_indexes_raises_if_cited_records_are_not_in_db(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [{
            "reference": {
                'authors': [{
                    'full_name': 'Smith, J.'
                }]
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 2
    mocked_indexing_task.assert_called_with(*expected_args)

    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    references = {
        'references': [{
            "curated_relation": False,
            "record": {
                "$ref": "http://localhost:5000/api/literature/9999"
            },
            "reference": {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 3)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    with pytest.raises(MissingCitedRecordError):
        index_modified_citations_from_record(*expected_args)

    _delete_record('lit', 8888)
def test_index_after_commit_indexes_also_cites_record_when_new_citation_is_added(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This is the record being cited'
        }],
        'control_number': 9999,
        '_collections': ['Literature']
    }
    cited = InspireRecord.create(data=json_data, skip_files=True)
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', cited['control_number'], 1
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert get_citations_from_es(es_rec).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [{
            "reference": {
                'authors': [{
                    'full_name': 'Smith, J.'
                }]
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 1
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert get_citations_from_es(es_rec).total == 0

    references = {
        'references': [{
            "curated_relation": False,
            "record": {
                "$ref": "http://localhost:5000/api/literature/9999"
            },
            "reference": {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 2
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 1
    assert get_citations_from_es(es_rec).total == 1

    _delete_record('lit', 8888)
    _delete_record('lit', 9999)
def test_index_after_commit_indexes_also_cites_two_records(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json1 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This is the record being cited'
        }],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited1 = InspireRecord.create(data=json1, skip_files=True)
    cited1.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', cited1['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    json2 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This also is the record being cited'
        }],
        'control_number': 9998,
        '_collections': ['Literature']
    }

    cited2 = InspireRecord.create(data=json2, skip_files=True)
    cited2.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', cited2['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert get_citations_from_es(es_rec1).total == 0
    assert get_citations_from_es(es_rec2).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [{
            'reference': {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert get_citations_from_es(es_rec1).total == 0
    assert get_citations_from_es(es_rec2).total == 0

    references = {
        'references': [{
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9998'
            },
        }, {
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9999'
            },
        }]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 3)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 1
    assert es_rec2['citation_count'] == 1
    assert get_citations_from_es(es_rec1).total == 1
    assert get_citations_from_es(es_rec2).total == 1

    _delete_record('lit', record['control_number'])
    _delete_record('lit', cited1['control_number'])
    _delete_record('lit', cited2['control_number'])
Esempio n. 47
0
def test_index_after_commit_indexes_also_cites_record_when_citer_is_deleted(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.

    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This is the record being cited'
        }],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited = InspireRecord.create(data=json_data, skip_files=True)
    cited.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', 9999, 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    citing_json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number':
        8888,
        'references': [{
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9999'
            },
            'reference': {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 1
    assert LiteratureSearch.citations(es_rec).total == 1

    record.delete()
    record.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', record['control_number'], 3)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    _delete_record('lit', record['control_number'])
    _delete_record('lit', cited['control_number'])
Esempio n. 48
0
def test_update_with_only_new(app):
    doc1_expected_file_content = 'doc1 body'
    doc1_expected_key = '1_Fulltext.pdf'
    doc2_expected_file_content = 'doc2 body'
    doc2_expected_key = '1_Fulltext.pdf_1'

    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
        'documents': [{
            'key': 'Fulltext.pdf',
            'url': '/some/non/existing/path.pdf',
        }],
    }

    update_to_record = {
        'documents': [
            {
                'key': doc1_expected_key,
                'url': '/api/files/somebucket/somefile',
            },
            {
                'key': 'Fulltext.pdf',
                'url': 'http://www.mdpi.com/2218-1997/3/1/24/pdf',
            },
        ],
    }

    record = InspireRecord.create(record_json)

    assert doc1_expected_key in record.files.keys
    assert len(record.files) == len(record_json['documents'])
    assert len(record['documents']) == len(record_json['documents'])
    file_content = open(record.files[doc1_expected_key].obj.file.uri).read()
    assert file_content == doc1_expected_file_content

    doc1_old_api_url = record['documents'][0]['url']
    record.clear()
    record_json.update(copy.deepcopy(update_to_record))

    with patch(
        'inspirehep.modules.records.api.fsopen',
        mock_open(read_data=doc2_expected_file_content),
    ):
        record.update(record_json, only_new=True)

    assert len(record['documents']) == len(update_to_record['documents'])
    for document in record['documents']:
        assert document['key'] in [doc1_expected_key, doc2_expected_key]
        if document['key'] == doc1_expected_key:
            file_content = open(
                record.files[doc1_expected_key].obj.file.uri
            ).read()
            assert file_content == doc1_expected_file_content
            assert document['url'] == doc1_old_api_url

        elif document['key'] == doc2_expected_key:
            file_content = open(
                record.files[doc2_expected_key].obj.file.uri
            ).read()
            assert file_content == doc2_expected_file_content
Esempio n. 49
0
def test_literature_citations_api_with_superseded_records(app, api_client):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 111,
        'titles': [
            {
                'title': 'Jessica Jones',
            },
        ],
        '_collections': ['Literature']
    }
    record = InspireRecord.create(record_json)
    record.commit()

    citing_superseded_json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        'related_records': [{
            'record': {
                '$ref': 'https://link-to-successor'
            },
            'relation': 'successor'
        }],
        'document_type': [
            'article',
        ],
        'control_number':
        222,
        'titles': [
            {
                'title': 'Frank Castle',
            },
        ],
        'references': [{
            'record': {
                '$ref': record._get_ref()
            }
        }],
        '_collections': ['Literature']
    }
    citing_superseded_record = InspireRecord.create(citing_superseded_json)
    citing_superseded_record.commit()
    db.session.commit()

    current_search.flush_and_refresh('records-hep')

    response = api_client.get('/literature/111/citations',
                              headers={'Accept': 'application/json'})
    result = json.loads(response.get_data(as_text=True))

    expected_metadata = {"citation_count": 0, "citations": []}

    expected_metadata['citations'].sort()
    result['metadata']['citations'].sort()

    assert response.status_code == 200
    assert expected_metadata == result['metadata']

    _delete_record('lit', 111)
    _delete_record('lit', 222)
Esempio n. 50
0
def test_literature_citations_api_sorted_by_earliest_date(api_client):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 111,
        'titles': [
            {
                'title': 'Jessica Jones',
            },
        ],
        '_collections': ['Literature']
    }
    record = InspireRecord.create(record_json)
    record.commit()

    record_json_ref_1 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 222,
        'titles': [
            {
                'title': 'Frank Castle',
            },
        ],
        'preprint_date': '2013-10-08',
        'references': [
            {
                'record': {
                    '$ref': record._get_ref()
                }
            }
        ],
        '_collections': ['Literature']
    }
    record_ref_1 = InspireRecord.create(record_json_ref_1)
    record_ref_1.commit()

    record_json_ref_2 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'preprint_date': '2015-10-08',
        'control_number': 333,
        'titles': [
            {
                'title': 'Luke Cage',
            },
        ],
        'references': [
            {
                'record': {
                    '$ref': record._get_ref()
                }
            }
        ],
        '_collections': ['Literature']
    }
    record_ref_2 = InspireRecord.create(record_json_ref_2)
    record_ref_2.commit()

    record_json_ref_3 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'preprint_date': '2015-11-08',
        'control_number': 444,
        'titles': [
            {
                'title': 'John Doe',
            },
        ],
        'references': [
            {
                'record': {
                    '$ref': record._get_ref()
                }
            }
        ],
        '_collections': ['Literature']
    }
    record_ref_3 = InspireRecord.create(record_json_ref_3)
    record_ref_3.commit()

    db.session.commit()
    es.indices.refresh('records-hep')

    response = api_client.get(
        '/literature/111/citations',
        headers={'Accept': 'application/json'}
    )
    result = json.loads(response.get_data(as_text=True))

    expected_metadata = {
        "citation_count": 3,
        "citations": [
            {
                "control_number": 444,
                "titles": [
                    {
                        "title": "John Doe"
                    }
                ],
                "earliest_date": "2015-11-08"
            },
            {
                "control_number": 333,
                "titles": [
                    {
                        "title": "Luke Cage"
                    }
                ],
                "earliest_date": "2015-10-08"
            },
            {
                "control_number": 222,
                "titles": [
                    {
                        "title": "Frank Castle"
                    }
                ],
                "earliest_date": "2013-10-08"
            }
        ]
    }

    assert response.status_code == 200
    assert expected_metadata == result['metadata']

    _delete_record('lit', 111)
    _delete_record('lit', 222)
    _delete_record('lit', 333)
    _delete_record('lit', 444)
Esempio n. 51
0
def test_literature_citations_api_with_not_existing_pid_value(api_client):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 111,
        'titles': [
            {
                'title': 'Jessica Jones',
            },
        ],
        '_collections': ['Literature']
    }
    record = InspireRecord.create(record_json)
    record.commit()

    record_json_ref_1 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 222,
        'titles': [
            {
                'title': 'Frank Castle',
            },
        ],
        'references': [{
            'record': {
                '$ref': record._get_ref()
            }
        }],
        '_collections': ['Literature']
    }
    record_ref_1 = InspireRecord.create(record_json_ref_1)
    record_ref_1.commit()

    record_json_ref_2 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 333,
        'titles': [
            {
                'title': 'Luke Cage',
            },
        ],
        'references': [{
            'record': {
                '$ref': record._get_ref()
            }
        }],
        '_collections': ['Literature']
    }
    record_ref_2 = InspireRecord.create(record_json_ref_2)
    record_ref_2.commit()

    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    response = api_client.get('/literature/444/citations',
                              headers={'Accept': 'application/json'})

    assert response.status_code == 404

    _delete_record('lit', 111)
    _delete_record('lit', 222)
    _delete_record('lit', 333)
Esempio n. 52
0
def test_literature_citations_api_with_full_citing_record(api_client):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 111,
        'titles': [
            {
                'title': 'Jessica Jones',
            },
        ],
        '_collections': ['Literature']
    }
    record = InspireRecord.create(record_json)
    record.commit()

    record_json_ref_1 = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number':
        222,
        'titles': [
            {
                'title': 'Frank Castle',
            },
        ],
        'authors': [{
            "full_name": "Urhan, Ahmet",
        }],
        'publication_info': [{
            "artid":
            "HAL Id : hal-01735421, https://hal.archives-ouvertes.fr/hal-01735421",
            "page_start": "1",
        }],
        'references': [{
            'record': {
                '$ref': record._get_ref()
            }
        }],
        '_collections': ['Literature']
    }
    record_ref_1 = InspireRecord.create(record_json_ref_1)
    record_ref_1.commit()

    record_json_ref_2 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 333,
        'titles': [
            {
                'title': 'Luke Cage',
            },
        ],
        'references': [{
            'record': {
                '$ref': record._get_ref()
            }
        }],
        '_collections': ['Literature']
    }
    record_ref_2 = InspireRecord.create(record_json_ref_2)
    record_ref_2.commit()

    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    response = api_client.get('/literature/111/citations',
                              headers={'Accept': 'application/json'})
    result = json.loads(response.get_data(as_text=True))

    result['metadata']['citations'].sort()

    expected_metadata = {
        "citation_count":
        2,
        "citations": [{
            'authors': [{
                "full_name":
                "Urhan, Ahmet",
                "first_name":
                "Ahmet",
                "last_name":
                "Urhan",
                "signature_block":
                "URANa",
                "uuid":
                result['metadata']['citations'][1]['authors'][0]['uuid']
            }],
            'control_number':
            222,
            'titles': [
                {
                    'title': 'Frank Castle',
                },
            ]
        }, {
            "control_number": 333,
            "titles": [{
                "title": "Luke Cage"
            }]
        }]
    }

    assert response.status_code == 200

    expected_metadata['citations'].sort()

    assert expected_metadata == result['metadata']

    _delete_record('lit', 111)
    _delete_record('lit', 222)
    _delete_record('lit', 333)
Esempio n. 53
0
def test_create_with_multiple_source_records(isolated_app):
    expected_file_content = 'dummy body'
    rec1_expected_key = '1_Fulltext.pdf'
    rec2_expected_key = '2_Fulltext.pdf'
    rec3_expected_keys = [
        '3_Fulltext.pdf',
        '3_Fulltext.pdf_1',
    ]

    record1_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
        'documents': [{
            'key': 'Fulltext.pdf',
            'url': '/some/non/existing/path.pdf',
            'description': 'record1 document',
        }],
    }

    record2_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 2,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
        'documents': [{
            'key': 'Fulltext.pdf',
            'url': '/some/non/existing/path.pdf',
            'description': 'record2 document',
        }],
    }

    record3_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 3,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
    }

    record1 = InspireRecord.create(record1_json)
    rec1_file_content = open(
        record1.files[rec1_expected_key].obj.file.uri
    ).read()
    assert rec1_file_content == expected_file_content

    record2 = InspireRecord.create(record2_json)
    rec2_file_content = open(
        record2.files[rec2_expected_key].obj.file.uri
    ).read()
    assert rec2_file_content == expected_file_content

    record3_json['documents'] = copy.deepcopy(record1['documents'])
    record3_json['documents'].extend(copy.deepcopy(record2['documents']))
    record3 = InspireRecord.create(
        record3_json,
        files_src_records=[record1, record2],
    )

    assert len(record3.files) == (
        len(record1_json['documents']) +
        len(record2_json['documents'])
    )
    assert rec3_expected_keys == record3.files.keys
    for file_key in record3.files.keys:
        rec3_file_content = open(
            record3.files[file_key].obj.file.uri
        ).read()
        assert rec3_file_content == expected_file_content

    expected_descs = [
        orig_doc['description'] for orig_doc in record3_json['documents']
    ]
    current_descs = [
        doc['description'] for doc in record3['documents']
    ]
    assert current_descs == expected_descs
Esempio n. 54
0
def create_author(profile):
    """Create a new author profile based on a given signature.

    The method receives a dictionary representing an author.
    Based on the values, it creates a dictionary in the invenio_records format.
    After all the fields are processed, the method calls create_record
    from invenio_records.api to put the new record.

    :param profile:
        A signature representing an author's to be created as a profile.

        Example:
            profile = {u'affiliations': [{u'value': u'Yerevan Phys. Inst.'}],
                       u'alternative_name': None,
                       u'curated_relation': False,
                       u'email': None,
                       u'full_name': u'Chatrchyan, Serguei',
                       u'inspire_id': None,
                       u'orcid': None,
                       u'profile': u'',
                       u'recid': None,
                       u'role': None,
                       u'uuid': u'd63537a8-1df4-4436-b5ed-224da5b5028c'}

    :return:
        A recid, where the new profile can be accessed.

        Example:
            "1234"
    """
    name = profile.get('full_name')

    # Template of an initial record.
    record = {'collections': [{'primary': 'HEPNAMES'}],
              'name': {'value': name},
              '$schema': _get_author_schema()}

    # The author's email address.
    # Unfortunately the method will not correlate a given e-mail address
    # with an affiliation.
    if 'email' in profile:
        email = profile.get('email')

        record['positions'] = []
        record['positions'].append({'email': email})

    # The author can be a member of more than one affiliation.
    if 'affiliations' in profile:
        affiliations = profile.get('affiliations')

        if 'positions' not in record:
            record['positions'] = []

        for affiliation in affiliations:
            name = affiliation.get('value')
            recid = affiliation.get('recid', None)

            if recid:
                record['positions'].append(
                    {'institution': {'name': name, 'recid': recid}})
            else:
                record['positions'].append(
                    {'institution': {'name': name}})

    # FIXME: The method should also collect the useful data
    #        from the publication, like category field, subject,
    #        etc.

    # Disconnect the signal on insert of a new record.
    after_record_insert.disconnect(append_new_record_to_queue)

    # Create a new author profile.
    record = InspireRecord.create(record, id_=None)

    # Create Inspire recid.
    record_pid = inspire_recid_minter(record.id, record)

    # Extend the new record with Inspire recid and self key.
    record['control_number'] = record_pid.pid_value
    record['self'] = inspire_dojson_utils.get_record_ref(
        record_pid.pid_value, 'authors')

    # Apply the changes.
    record.commit()
    db.session.commit()

    # Reconnect the disconnected signal.
    after_record_insert.connect(append_new_record_to_queue)

    # Report.
    logger.info("Created profile: %s", record_pid.pid_value)

    # Return the recid of new profile to which signatures will point to.
    return record_pid.pid_value
def test_index_after_commit_indexes_also_cites_two_records(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json1 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This is the record being cited'}],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited1 = InspireRecord.create(data=json1, skip_files=True)
    cited1.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', cited1['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    json2 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This also is the record being cited'}],
        'control_number': 9998,
        '_collections': ['Literature']
    }

    cited2 = InspireRecord.create(data=json2, skip_files=True)
    cited2.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', cited2['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec1).total == 0
    assert LiteratureSearch.citations(es_rec2).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Record citing the first one'}],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [
            {
                'reference': {
                    'authors': [{'full_name': 'Smith, J.'}],
                }
            }
        ]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec1).total == 0
    assert LiteratureSearch.citations(es_rec2).total == 0

    references = {
        'references': [
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/9998'
                },
            },
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/9999'
                },
            }
        ]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 3)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 1
    assert es_rec2['citation_count'] == 1
    assert LiteratureSearch.citations(es_rec1).total == 1
    assert LiteratureSearch.citations(es_rec2).total == 1

    _delete_record('lit', record['control_number'])
    _delete_record('lit', cited1['control_number'])
    _delete_record('lit', cited2['control_number'])
def test_index_after_commit_indexes_also_cites_record_when_new_citation_is_added(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This is the record being cited'}],
        'control_number': 9999,
        '_collections': ['Literature']
    }
    cited = InspireRecord.create(data=json_data, skip_files=True)
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', cited['control_number'], 1
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Record citing the first one'}],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [
            {"reference": {'authors': [{'full_name': 'Smith, J.'}]}}
        ]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 1
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    references = {
        'references': [
            {
                "curated_relation": False,
                "record": {
                    "$ref": "http://localhost:5000/api/literature/9999"
                },
                "reference": {
                    'authors': [{'full_name': 'Smith, J.'}],
                }
            }
        ]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 2
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 1
    assert LiteratureSearch.citations(es_rec).total == 1

    _delete_record('lit', 8888)
    _delete_record('lit', 9999)
Esempio n. 57
0
def test_create_with_multiple_source_records(app):
    expected_file_content = 'dummy body'
    rec1_expected_key = '1_Fulltext.pdf'
    rec2_expected_key = '2_Fulltext.pdf'
    rec3_expected_keys = [
        '3_Fulltext.pdf',
        '3_Fulltext.pdf_1',
    ]

    record1_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 1,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
        'documents': [{
            'key': 'Fulltext.pdf',
            'url': '/some/non/existing/path.pdf',
            'description': 'record1 document',
        }],
    }

    record2_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 2,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
        'documents': [{
            'key': 'Fulltext.pdf',
            'url': '/some/non/existing/path.pdf',
            'description': 'record2 document',
        }],
    }

    record3_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 3,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': [
            'Literature'
        ],  # DESY harvest
    }

    record1 = InspireRecord.create(record1_json)
    rec1_file_content = open(
        record1.files[rec1_expected_key].obj.file.uri
    ).read()
    assert rec1_file_content == expected_file_content

    record2 = InspireRecord.create(record2_json)
    rec2_file_content = open(
        record2.files[rec2_expected_key].obj.file.uri
    ).read()
    assert rec2_file_content == expected_file_content

    record3_json['documents'] = copy.deepcopy(record1['documents'])
    record3_json['documents'].extend(copy.deepcopy(record2['documents']))
    record3 = InspireRecord.create(
        record3_json,
        files_src_records=[record1, record2],
    )

    assert len(record3.files) == (
        len(record1_json['documents']) +
        len(record2_json['documents'])
    )
    assert rec3_expected_keys == record3.files.keys
    for file_key in record3.files.keys:
        rec3_file_content = open(
            record3.files[file_key].obj.file.uri
        ).read()
        assert rec3_file_content == expected_file_content

    expected_descs = [
        orig_doc['description'] for orig_doc in record3_json['documents']
    ]
    current_descs = [
        doc['description'] for doc in record3['documents']
    ]
    assert current_descs == expected_descs
Esempio n. 58
0
def test_literature_citations_api_with_parameter_page_2(api_client):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 111,
        'titles': [
            {
                'title': 'Jessica Jones',
            },
        ],
        '_collections': ['Literature']
    }
    record = InspireRecord.create(record_json)
    record.commit()

    record_json_ref_1 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 222,
        'titles': [
            {
                'title': 'Frank Castle',
            },
        ],
        'references': [{
            'record': {
                '$ref': record._get_ref()
            }
        }],
        '_collections': ['Literature']
    }
    record_ref_1 = InspireRecord.create(record_json_ref_1)
    record_ref_1.commit()

    record_json_ref_2 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 333,
        'titles': [
            {
                'title': 'Luke Cage',
            },
        ],
        'references': [{
            'record': {
                '$ref': record._get_ref()
            }
        }],
        '_collections': ['Literature']
    }
    record_ref_2 = InspireRecord.create(record_json_ref_2)
    record_ref_2.commit()

    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    response = api_client.get('/literature/111/citations?size=1&page=2',
                              headers={'Accept': 'application/json'})
    result = json.loads(response.get_data(as_text=True))

    expected_metadata = [{
        "citation_count":
        2,
        "citations": [
            {
                "control_number": 222,
                "titles": [{
                    "title": "Frank Castle"
                }]
            },
        ]
    }, {
        "citation_count":
        2,
        "citations": [
            {
                "control_number": 333,
                "titles": [{
                    "title": "Luke Cage"
                }]
            },
        ]
    }]

    assert response.status_code == 200
    assert result['metadata'] in expected_metadata

    _delete_record('lit', 111)
    _delete_record('lit', 222)
    _delete_record('lit', 333)
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.

    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This is the record being cited'}],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited = InspireRecord.create(data=json_data, skip_files=True)
    cited.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', 9999, 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Record citing the first one'}],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/9999'
                },
                'reference': {
                    'authors': [{'full_name': 'Smith, J.'}],
                }
            }
        ]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)

    # execute mocked task pretending record is not committed yet to DB
    _delete_record('lit', record['control_number'])
    with pytest.raises(RecordGetterError):
        # XXX: celery in eager mode does not retry, so it raises the first time
        index_modified_citations_from_record(*expected_args)

    _delete_record('lit', cited['control_number'])