def _delete_merged_records(pid_type, merged_pid_value, deleted_pid_value, merged_uuid, deleted_uuid): InspireRecord.get_record(merged_uuid)._delete(force=True) InspireRecord.get_record(deleted_uuid)._delete(force=True) merged_pid = PersistentIdentifier.get(pid_type, merged_pid_value) deleted_pid = PersistentIdentifier.get(pid_type, deleted_pid_value) Redirect.query.filter(Redirect.id == deleted_pid.object_uuid).delete() db.session.delete(merged_pid) db.session.delete(deleted_pid) db.session.commit()
def _delete_merged_records(pid_type, merged_pid_value, deleted_pid_value, merged_uuid, deleted_uuid): InspireRecord.get_record(merged_uuid)._delete(force=True) InspireRecord.get_record(deleted_uuid)._delete(force=True) merged_pid = PersistentIdentifier.get(pid_type, merged_pid_value) deleted_pid = PersistentIdentifier.get(pid_type, deleted_pid_value) Redirect.query.filter(Redirect.id == deleted_pid.object_uuid).delete() db.session.delete(merged_pid) db.session.delete(deleted_pid) db.session.commit()
def test_update_authors_recid_method(small_app): """Test the method responsible for updating author's recid.""" from inspirehep.modules.disambiguation.tasks import update_authors_recid pid = PersistentIdentifier.get('lit', 4328) publication_id = str(pid.object_uuid) signature = InspireRecord.get_record(publication_id)['authors'][0]['uuid'] profile_recid = "314159265" update_authors_recid(publication_id, signature, profile_recid) assert InspireRecord.get_record(publication_id)['authors'][0]['recid'] == \ profile_recid
def sample_record(app): record = { "$schema": "http://localhost:5000/schemas/records/hep.json", "control_number": 123, "titles": [ { "title": "Supersymmetric gauge field theory and string theory" } ], "collections": [ { "primary": "HEP" }, { "primary": "THESIS" } ] } record = _create_and_index_record(record) record_id = record.id yield record pid = PersistentIdentifier.get('lit', '123') db.session.delete(pid) record = InspireRecord.get_record(record_id) record._delete(force=True) current_app.extensions[ 'invenio-db'].versioning_manager.transaction_cls.query.delete() db.session.commit()
def record_insert_or_replace(json, skip_files=False): """Insert or replace a record.""" pid_type = get_pid_type_from_schema(json['$schema']) control_number = json['control_number'] try: pid = PersistentIdentifier.get(pid_type, control_number) record = InspireRecord.get_record(pid.object_uuid) record.clear() record.update(json, skip_files=skip_files) if json.get('legacy_creation_date'): record.model.created = datetime.strptime( json['legacy_creation_date'], '%Y-%m-%d') record.commit() except PIDDoesNotExistError: record = InspireRecord.create(json, id_=None, skip_files=skip_files) if json.get('legacy_creation_date'): record.model.created = datetime.strptime( json['legacy_creation_date'], '%Y-%m-%d') inspire_recid_minter(str(record.id), json) if json.get('deleted'): new_recid = get_recid_from_ref(json.get('new_record')) if not new_recid: record.delete() return record
def actions(): for uuid in uuids: try: record = InspireRecord.get_record(uuid) yield create_index_op(record, version_type='force') except NoResultFound as e: logger.warn('Record %s failed to load: %s', uuid, e)
def record_insert_or_replace(json, skip_files=False): """Insert or replace a record.""" pid_type = get_pid_type_from_schema(json['$schema']) control_number = json['control_number'] try: pid = PersistentIdentifier.get(pid_type, control_number) record = InspireRecord.get_record(pid.object_uuid) record.clear() record.update(json, skip_files=skip_files) if json.get('legacy_creation_date'): record.model.created = datetime.strptime(json['legacy_creation_date'], '%Y-%m-%d') record.commit() except PIDDoesNotExistError: record = InspireRecord.create(json, id_=None, skip_files=skip_files) if json.get('legacy_creation_date'): record.model.created = datetime.strptime(json['legacy_creation_date'], '%Y-%m-%d') inspire_recid_minter(str(record.id), json) if json.get('deleted'): new_recid = get_recid_from_ref(json.get('new_record')) if not new_recid: record.delete() return record
def index_by_id(self, record_uuid): """ Index a record by record identifier Args: record_uuid: Record uuid """ return self.index(InspireRecord.get_record(record_uuid))
def store_record(obj, eng): """Insert or replace a record.""" is_update = obj.extra_data.get('is-update') is_authors = eng.workflow_definition.data_type == 'authors' if is_update: if not is_authors and not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER', False): obj.log.info( 'skipping update record, feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is disabled.' ) return record = InspireRecord.get_record(obj.extra_data['head_uuid']) obj.data['control_number'] = record['control_number'] record.clear() record.update(obj.data, files_src_records=[obj]) else: # Skip the files to avoid issues in case the record has already pid # TODO: remove the skip files once labs becomes master record = InspireRecord.create(obj.data, id_=None, skip_files=True) # Create persistent identifier. # Now that we have a recid, we can properly download the documents record.download_documents_and_figures(src_records=[obj]) obj.data['control_number'] = record['control_number'] # store head_uuid to store the root later obj.extra_data['head_uuid'] = str(record.id) record.commit() obj.save() db.session.commit()
def sample_record(app): record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': [ 'Literature', ], 'control_number': 111, 'document_type': [ 'article', ], 'titles': [{ 'title': 'sample' }], } record = _create_and_index_record(record) record_id = record.id yield record pid = PersistentIdentifier.get('lit', '111') db.session.delete(pid) record = InspireRecord.get_record(record_id) record._delete(force=True) current_app.extensions[ 'invenio-db'].versioning_manager.transaction_cls.query.delete() db.session.commit()
def store_record(obj, eng): """Insert or replace a record.""" is_update = obj.extra_data.get('is-update') is_authors = eng.workflow_definition.data_type == 'authors' if not current_app.config.get("FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT"): with db.session.begin_nested(): if is_update: if not is_authors and not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER', False): obj.log.info( 'skipping update record, feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is disabled.' ) return record = InspireRecord.get_record(obj.extra_data['head_uuid']) obj.data['control_number'] = record['control_number'] record.clear() record.update(obj.data, files_src_records=[obj]) else: # Skip the files to avoid issues in case the record has already pid # TODO: remove the skip files once labs becomes master record = InspireRecord.create(obj.data, id_=None, skip_files=True) # Create persistent identifier. # Now that we have a recid, we can properly download the documents record.download_documents_and_figures(src_records=[obj]) obj.data['control_number'] = record['control_number'] # store head_uuid to store the root later obj.extra_data['head_uuid'] = str(record.id) record.commit() obj.save() else: store_record_inspirehep_api(obj, eng, is_update, is_authors)
def sample_record(app): record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': [ 'Literature', ], 'control_number': 111, 'document_type': [ 'article', ], 'titles': [ {'title': 'sample'} ], } record = _create_and_index_record(record) record_id = record.id yield record pid = PersistentIdentifier.get('lit', '111') db.session.delete(pid) record = InspireRecord.get_record(record_id) record._delete(force=True) current_app.extensions[ 'invenio-db'].versioning_manager.transaction_cls.query.delete() db.session.commit()
def test_appoint_profile_from_claimed_signature(small_app): """Check the module for the case where claimed signature takes everything. """ from inspirehep.modules.disambiguation.tasks import ( disambiguation_clustering, update_authors_recid) old_record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid) old_record = get_es_record_by_uuid(old_record_id) old_author_uuid = old_record['authors'][0]['uuid'] # Add phonetic block to the record. old_record['authors'][0]['signature_block'] = "HAGp" old_record['authors'][0]['recid'] = "2" es.index(index='records-hep', doc_type='hep', id=old_record_id, body=old_record) es.indices.refresh('records-hep') record_id = str(PersistentIdentifier.get('lit', 1358492).object_uuid) record = get_es_record_by_uuid(record_id) author_uuid = record['authors'][0]['uuid'] # Add phonetic block to the record. record['authors'][0]['signature_block'] = "HAGp" record['authors'][0]['recid'] = "314159265" record['authors'][0]['curated_relation'] = True es.index(index='records-hep', doc_type='hep', id=record_id, body=record) es.indices.refresh('records-hep') with patch("celery.current_app.send_task", return_value=_BeardObject(({ "2": [old_author_uuid, author_uuid] }, {}))): with patch( "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid): disambiguation_clustering("HAGp") assert InspireRecord.get_record(old_record_id)['authors'][0]['recid'] == \ "314159265" assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == \ "314159265"
def actions(): for uuid in uuids: try: record = InspireRecord.get_record(uuid) if record.get('deleted', False): logger.debug("Record already %s deleted, not indexing!", uuid) continue yield create_index_op(record, version_type='force') except NoResultFound as e: logger.warn('Record %s failed to load: %s', uuid, e)
def test_appoint_profile_from_claimed_signature(small_app): """Check the module for the case where claimed signature takes everything. """ from inspirehep.modules.disambiguation.tasks import ( disambiguation_clustering, update_authors_recid ) old_record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid) old_record = get_es_record_by_uuid(old_record_id) old_author_uuid = old_record['authors'][0]['uuid'] # Add phonetic block to the record. old_record['authors'][0]['signature_block'] = "HAGp" old_record['authors'][0]['recid'] = "2" es.index(index='records-hep', doc_type='hep', id=old_record_id, body=old_record) es.indices.refresh('records-hep') record_id = str(PersistentIdentifier.get('lit', 1358492).object_uuid) record = get_es_record_by_uuid(record_id) author_uuid = record['authors'][0]['uuid'] # Add phonetic block to the record. record['authors'][0]['signature_block'] = "HAGp" record['authors'][0]['recid'] = "314159265" record['authors'][0]['curated_relation'] = True es.index(index='records-hep', doc_type='hep', id=record_id, body=record) es.indices.refresh('records-hep') with patch("celery.current_app.send_task", return_value=_BeardObject( ({"2": [old_author_uuid, author_uuid]}, {}))): with patch("inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid): disambiguation_clustering("HAGp") assert InspireRecord.get_record(old_record_id)['authors'][0]['recid'] == \ "314159265" assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == \ "314159265"
def test_append_updated_record_to_queue_same_data(small_app): """Check if for the same record, the receiver will skip the publication.""" pid = PersistentIdentifier.get('lit', 11883) publication_id = str(pid.object_uuid) record = InspireRecord.get_record(publication_id) append_updated_record_to_queue(None, record, record, 'records-hep', 'hep') assert str(record.id) != \ DisambiguationRecord.query.order_by(desc('id')).first().record_id
def test_append_updated_record_to_queue_same_data(small_app): """Check if for the same record, the receiver will skip the publication.""" pid = PersistentIdentifier.get('lit', 11883) publication_id = str(pid.object_uuid) record = InspireRecord.get_record(publication_id) append_updated_record_to_queue(None, record, record, "records-hep", "hep") assert str(record.id) != \ DisambiguationRecord.query.order_by(desc("id")).first().record_id
def _get_updated_record(obj): """TODO: use only head_uuid once we have the merger.""" if 'head_uuid' in obj.extra_data: updated_record = InspireRecord.get_record( obj.extra_data['head_uuid'], ) else: pid_type = get_pid_type_from_schema(obj.data['$schema']) updated_record_id = obj.extra_data['matches']['approved'] updated_record = get_db_record(pid_type, updated_record_id) return updated_record
def actions(): for uuid in uuids: try: record = InspireRecord.get_record(uuid) if record.get('deleted', False): logger.debug("Record already %s deleted, not indexing!", uuid) continue yield create_index_op(record, version_type='force') except NoResultFound as e: logger.warn('Record %s failed to load: %s', uuid, e)
def _get_updated_record(obj): """TODO: use only head_uuid once we have them merger.""" if 'head_uuid' in obj.extra_data: updated_record = InspireRecord.get_record( obj.extra_data['head_uuid'], ) else: pid_type = get_pid_type_from_schema(obj.data['$schema']) updated_record_id = obj.extra_data['record_matches'][0] updated_record = get_db_record(pid_type, updated_record_id) return updated_record
def record_to_merge(workflow_app): json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': [ 'Literature' ], 'authors': [ { 'full_name': 'Jessica, Jones', }, ], 'document_type': [ 'thesis' ], 'number_of_pages': 100, 'preprint_date': '2016-11-16', 'public_notes': [ { 'source': 'arXiv', 'value': '100 pages, 36 figures' } ], 'titles': [ { 'title': 'Alias Investigations' } ], 'dois': [ { 'value': '10.1007/978-3-319-15001-7' } ], } record = InspireRecord.create(json, id_=None, skip_files=True) record.commit() rec_uuid = record.id db.session.commit() es.indices.refresh('records-hep') yield record record = InspireRecord.get_record(rec_uuid) pid = PersistentIdentifier.get( pid_type='lit', pid_value=record['control_number'] ) pid.unassign() pid.delete() record.delete() record.commit()
def record_to_merge(workflow_app): json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': [ 'Literature' ], 'authors': [ { 'full_name': 'Jessica, Jones', }, ], 'document_type': [ 'thesis' ], 'number_of_pages': 100, 'preprint_date': '2016-11-16', 'public_notes': [ { 'source': 'arXiv', 'value': '100 pages, 36 figures' } ], 'titles': [ { 'title': 'Alias Investigations' } ], 'dois': [ { 'value': '10.1007/978-3-319-15001-7' } ], } record = InspireRecord.create(json, id_=None, skip_files=True) record.commit() rec_uuid = record.id db.session.commit() current_search.flush_and_refresh('records-hep') yield record record = InspireRecord.get_record(rec_uuid) pid = PersistentIdentifier.get( pid_type='lit', pid_value=record['control_number'] ) pid.unassign() pid.delete() record.delete() record.commit()
def update_authors_recid(record_id, uuid, profile_recid): """Update author profile for a given signature. The method receives UUIDs representing record and signature respectively together with an author profile recid. The new recid will be placed in the signature with the given UUID. :param record_id: A string representing UUID of a given record. Example: record_id = "a5afb151-8f75-4e91-8dc1-05e7e8e8c0b8" :param uuid: A string representing UUID of a given signature. Example: uuid = "c2f432bd-2f52-4c16-ac66-096f168c762f" :param profile_recid: A string representing author profile recid, that updated signature should point to. Example: profile_recid = "1" """ try: record = InspireRecord.get_record(record_id) update_flag = False for author in record['authors']: if author['uuid'] == uuid: author['recid'] = str(profile_recid) update_flag = True if update_flag: # Disconnect the signal on insert of a new record. before_record_index.disconnect(append_updated_record_to_queue) # Update the record in the database. record.commit() db.session.commit() except StaleDataError as exc: raise update_authors_recid.retry(exc=exc) finally: # Reconnect the disconnected signal. before_record_index.connect(append_updated_record_to_queue) # Report. logger.info("Updated signature %s with profile %s", uuid, profile_recid)
def update_authors_recid(record_id, uuid, profile_recid): """Update author profile for a given signature. The method receives UUIDs representing record and signature respectively together with an author profile recid. The new recid will be placed in the signature with the given UUID. :param record_id: A string representing UUID of a given record. Example: record_id = "a5afb151-8f75-4e91-8dc1-05e7e8e8c0b8" :param uuid: A string representing UUID of a given signature. Example: uuid = "c2f432bd-2f52-4c16-ac66-096f168c762f" :param profile_recid: A string representing author profile recid, that updated signature should point to. Example: profile_recid = "1" """ try: record = InspireRecord.get_record(record_id) update_flag = False for author in record['authors']: if author['uuid'] == uuid: author['recid'] = str(profile_recid) update_flag = True if update_flag: # Disconnect the signal on insert of a new record. before_record_index.disconnect(append_updated_record_to_queue) # Update the record in the database. record.commit() db.session.commit() except StaleDataError as exc: raise update_authors_recid.retry(exc=exc) finally: # Reconnect the disconnected signal. before_record_index.connect(append_updated_record_to_queue) # Report. logger.info("Updated signature %s with profile %s", uuid, profile_recid)
def test_append_updated_record_to_queue(small_app): """Test the receiver responsible for queuing updated HEP records.""" pid = PersistentIdentifier.get('lit', 4328) publication_id = str(pid.object_uuid) record = InspireRecord.get_record(publication_id) record_to_update = deepcopy(record) record_to_update['authors'][0]['full_name'] = 'John Smith' append_updated_record_to_queue(None, record_to_update, record_to_update, 'records-hep', 'hep') assert str(record_to_update.id) == \ DisambiguationRecord.query.order_by(desc('id')).first().record_id
def test_append_updated_record_to_queue(small_app): """Test the receiver responsible for queuing updated HEP records.""" pid = PersistentIdentifier.get('lit', 4328) publication_id = str(pid.object_uuid) record = InspireRecord.get_record(publication_id) record_to_update = deepcopy(record) record_to_update['authors'][0]['full_name'] = "John Smith" append_updated_record_to_queue(None, record_to_update, record_to_update, "records-hep", "hep") assert str(record_to_update.id) == \ DisambiguationRecord.query.order_by(desc("id")).first().record_id
def merge_articles(obj, eng): """Merge two articles. The workflow payload is overwritten by the merged record, the conflicts are stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which contains the endpoint which resolves the merge conflicts. Note: When the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it will skip the merge. """ if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'): return None matched_control_number = obj.extra_data['matches']['approved'] head_uuid = PersistentIdentifier.get('lit', matched_control_number).object_uuid head_record = InspireRecord.get_record(head_uuid) update = obj.data update_source = LiteratureReader(obj.data).source head_root = read_wf_record_source(record_uuid=head_record.id, source=update_source.lower()) head_root = head_root.json if head_root else {} obj.extra_data['head_uuid'] = str(head_uuid) obj.extra_data['head_version_id'] = head_record.model.version_id obj.extra_data['merger_head_revision'] = head_record.revision_id obj.extra_data['merger_original_root'] = deepcopy(head_root) merged, conflicts = merge( head=head_record.to_dict(), root=head_root, update=update, ) obj.data = merged if conflicts: obj.extra_data['conflicts'] = conflicts obj.extra_data['conflicts_metadata'] = { 'datetime': datetime.now().strftime("%b %d, %Y, %H:%M:%S %p"), 'update_source': update_source, } obj.extra_data['callback_url'] = \ get_resolve_merge_conflicts_callback_url() obj.save()
def record_from_db(workflow_app): json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'document_type': ['article'], 'titles': [{ 'title': 'Fancy title for a new record' }], 'arxiv_eprints': [{ 'categories': ['hep-th'], 'value': '1407.7587' }], 'control_number': 1234, 'authors': [ { 'full_name': 'Maldacena, J.' }, { 'full_name': 'Strominger, A.' }, ], 'abstracts': [{ 'source': 'arxiv', 'value': 'A basic abstract.' }], 'report_numbers': [{ 'value': 'DESY-17-036' }] } record = InspireRecord.create(json, id_=None, skip_files=True) record.commit() rec_uuid = record.id db.session.commit() es.indices.refresh('records-hep') yield record record = InspireRecord.get_record(rec_uuid) pid = PersistentIdentifier.get(pid_type='lit', pid_value=record['control_number']) pid.unassign() pid.delete() record.delete() record.commit()
def test_create_author_method(small_app): """Test the method for generating new author profiles.""" signature = { 'affiliations': [{'value': 'Copenhagen U.'}], 'curated_relation': False, 'full_name': 'Glashow, S.L.', 'uuid': '6a3d43be-e962-4c20-8908-a81bd39447b5' } recid = create_author(signature) pid = PersistentIdentifier.get('aut', recid) record = InspireRecord.get_record(pid.object_uuid) assert record['_collections'] == ['Authors'] assert record['name'] == {'value': 'Glashow, S.L.'} assert record['positions'] == [{'institution': {'name': 'Copenhagen U.'}}]
def _is_stale_data(workflow_object): is_update = workflow_object.extra_data.get('is-update') head_version_id = workflow_object.extra_data.get('head_version_id') if not is_update or head_version_id is None: return False head_uuid = workflow_object.extra_data.get('head_uuid') record = InspireRecord.get_record(head_uuid) if record.model.version_id != head_version_id: workflow_object.log.info( 'Working with stale data:', 'Expecting version %d but found %d' % (head_version_id, record.revision_id)) return True return False
def _is_stale_data(workflow_object): is_update = workflow_object.extra_data.get('is-update') head_version_id = workflow_object.extra_data.get('head_version_id') if not is_update or head_version_id is None: return False head_uuid = workflow_object.extra_data.get('head_uuid') record = InspireRecord.get_record(head_uuid) if record.model.version_id != head_version_id: workflow_object.log.info( 'Working with stale data:', 'Expecting version %d but found %d' % ( head_version_id, record.revision_id ) ) return True return False
def merge_articles(obj, eng): """Merge two articles. The workflow payload is overwritten by the merged record, the conflicts are stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which contains the endpoint which resolves the merge conflicts. Note: When the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it will skip the merge. """ if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'): return None matched_control_number = obj.extra_data['matches']['approved'] head_uuid = PersistentIdentifier.get('lit', matched_control_number).object_uuid obj.extra_data['head_uuid'] = str(head_uuid) head = InspireRecord.get_record(head_uuid) update = obj.data update_source = get_source(update).lower() head_root = read_wf_record_source(record_uuid=head.id, source=update_source) head_root = head_root.json if head_root else {} merged, conflicts = merge( head=head.dumps(), root=head_root, update=update, ) obj.data = merged if conflicts: obj.extra_data['conflicts'] = conflicts obj.extra_data['callback_url'] = \ get_resolve_merge_conflicts_callback_url() obj.save()
def _delete_action(self, payload): """ Bulk delete action. Args: payload: Decoded message body. Returns: Dictionary defining an Elasticsearch bulk 'delete' action. """ index, doc_type = payload.get('index'), payload.get('doc_type') if not (index and doc_type): record = InspireRecord.get_record(payload['id']) index, doc_type = self.record_to_index(record) return { '_op_type': 'delete', '_index': index, '_type': doc_type, '_id': payload['id'], }
def modify_record(pid_type, pid_value): """ Context manager to modify metadata of a single record by PID. The context manager makes a `dict` containing all metadata of the record available inside the ``with`` block. Modifying that ``dict`` will perform the modifications at the end of the block. Example: >>> with modify_record('lit', 1505221) as data: ... data['titles'][0] = {'title': 'My new title'} """ uuid = PersistentIdentifier.query.filter_by(pid_type=pid_type, pid_value=str(pid_value)).one().object_uuid record = InspireRecord.get_record(uuid) data = record.to_dict() yield data record.clear() record.update(data) record.commit() db.session.commit()
def store_record(obj, eng): """Insert or replace a record.""" is_update = obj.extra_data.get('is-update') if is_update: record = InspireRecord.get_record(obj.extra_data['head_uuid']) record.clear() record.update(obj.data, files_src_records=[obj]) else: record = InspireRecord.create(obj.data, id_=None) # Create persistent identifier. created_pid = inspire_recid_minter(str(record.id), record).pid_value # Now that we have a recid, we can properly download the documents record.download_documents_and_figures(src_records=[obj]) obj.data['control_number'] = created_pid # store head_uuid to store the root later obj.extra_data['head_uuid'] = str(record.id) record.commit() obj.save() db.session.commit()
def _index_action(self, payload): """ Bulk index action. Args: payload: Decoded message body. Returns: Dictionary defining an Elasticsearch bulk 'index' action. """ record = InspireRecord.get_record(payload['id']) index, doc_type = self.record_to_index(record) return { '_op_type': 'index', '_index': index, '_type': doc_type, '_id': str(record.id), '_version': record.revision_id, '_version_type': self._version_type, '_source': self._prepare_record(record, index, doc_type), }
def record_insert_or_replace(json): """Insert or replace a record.""" control_number = json.get('control_number', json.get('recid')) if control_number: pid_type = get_pid_type_from_schema(json['$schema']) try: pid = PersistentIdentifier.get(pid_type, control_number) record = InspireRecord.get_record(pid.object_uuid) record.clear() record.update(json) record.commit() except PIDDoesNotExistError: record = InspireRecord.create(json, id_=None) # Create persistent identifier. inspire_recid_minter(str(record.id), json) if json.get('deleted'): new_recid = get_recid_from_ref(json.get('new_record')) if not new_recid: record.delete() return record
def record_from_db(workflow_app): json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'document_type': ['article'], 'titles': [{'title': 'Fancy title for a new record'}], 'arxiv_eprints': [ {'categories': ['hep-th'], 'value': '1407.7587'} ], 'control_number': 1234, 'authors': [ {'full_name': 'Maldacena, J.'}, {'full_name': 'Strominger, A.'}, ], 'abstracts': [ {'source': 'arxiv', 'value': 'A basic abstract.'} ], 'report_numbers': [{'value': 'DESY-17-036'}] } record = InspireRecord.create(json, id_=None, skip_files=True) record.commit() rec_uuid = record.id db.session.commit() es.indices.refresh('records-hep') yield record record = InspireRecord.get_record(rec_uuid) pid = PersistentIdentifier.get( pid_type='lit', pid_value=record['control_number'] ) pid.unassign() pid.delete() record.delete() record.commit()
def modify_record(pid_type, pid_value): """ Context manager to modify metadata of a single record by PID. The context manager makes a `dict` containing all metadata of the record available inside the ``with`` block. Modifying that ``dict`` will perform the modifications at the end of the block. Example: >>> with modify_record('lit', 1505221) as data: ... data['titles'][0] = {'title': 'My new title'} """ uuid = PersistentIdentifier.query.filter_by( pid_type=pid_type, pid_value=str(pid_value)).one().object_uuid record = InspireRecord.get_record(uuid) data = record.to_dict() yield data record.clear() record.update(data) record.commit() db.session.commit()
def merge_articles(obj, eng): """Merge two articles. The workflow payload is overwritten by the merged record, the conflicts are stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which contains the endpoint which resolves the merge conflicts. Note: For the time being the ``root`` will be ignored, and we'll rely only on the ``head``, hence it is a rootless implementation. Also when the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it will skip the merge. """ if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'): return None matched_control_number = obj.extra_data['matches']['approved'] head_uuid = PersistentIdentifier.get('lit', matched_control_number).object_uuid obj.extra_data['head_uuid'] = str(head_uuid) head = InspireRecord.get_record(head_uuid) root = {} update = obj.data merged, conflicts = merge(head=head.dumps(), root=root, update=update) obj.data = merged if conflicts: obj.extra_data['conflicts'] = conflicts obj.extra_data['callback_url'] = \ get_resolve_merge_conflicts_callback_url() obj.save()
def test_single_signature_with_no_profile(small_app): """Check the module for the case with a single, new signature.""" from inspirehep.modules.disambiguation.tasks import ( disambiguation_clustering, update_authors_recid) record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid) record = get_es_record_by_uuid(record_id) author_uuid = record['authors'][0]['uuid'] # Add phonetic block to the record. record['authors'][0]['signature_block'] = "HAGp" es.index(index='records-hep', doc_type='hep', id=record_id, body=record) es.indices.refresh('records-hep') with patch("celery.current_app.send_task", return_value=_BeardObject(({}, { "0": [author_uuid] }))): with patch( "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid): disambiguation_clustering("HAGp") assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == "1"
def test_single_signature_with_no_profile(small_app): """Check the module for the case with a single, new signature.""" from inspirehep.modules.disambiguation.tasks import ( disambiguation_clustering, update_authors_recid ) record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid) record = get_es_record_by_uuid(record_id) author_uuid = record['authors'][0]['uuid'] # Add phonetic block to the record. record['authors'][0]['signature_block'] = "HAGp" es.index(index='records-hep', doc_type='hep', id=record_id, body=record) es.indices.refresh('records-hep') with patch("celery.current_app.send_task", return_value=_BeardObject(({}, {"0": [author_uuid]}))): with patch("inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid): disambiguation_clustering("HAGp") assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == "1"
def test_solve_claim_conflicts(small_app): """Check the module for the case where at least two claimed signatures are assigned to the same cluster. """ from inspirehep.modules.disambiguation.tasks import ( disambiguation_clustering, update_authors_recid) # Claimed signature #1. glashow_record_id_claimed = str( PersistentIdentifier.get('lit', 4328).object_uuid) glashow_record_claimed = get_es_record_by_uuid(glashow_record_id_claimed) glashow_record_uuid_claimed = glashow_record_claimed['authors'][0]['uuid'] # Add phonetic block to the record. glashow_record_claimed['authors'][0]['signature_block'] = "HAGp" glashow_record_claimed['authors'][0]['curated_relation'] = True glashow_record_claimed['authors'][0]['recid'] = "3" es.index(index='records-hep', doc_type='hep', id=glashow_record_id_claimed, body=glashow_record_claimed) es.indices.refresh('records-hep') # Claimed signature #2. higgs_record_id_claimed = str( PersistentIdentifier.get('lit', 1358492).object_uuid) higgs_record_claimed = get_es_record_by_uuid(higgs_record_id_claimed) higgs_record_uuid_claimed = higgs_record_claimed['authors'][0]['uuid'] # Add phonetic block to the record. higgs_record_claimed['authors'][0]['signature_block'] = "HAGp" higgs_record_claimed['authors'][0]['curated_relation'] = True higgs_record_claimed['authors'][0]['recid'] = "4" es.index(index='records-hep', doc_type='hep', id=higgs_record_id_claimed, body=higgs_record_claimed) es.indices.refresh('records-hep') # Not claimed signature. higgs_record_id_not_claimed = str( PersistentIdentifier.get('lit', 11883).object_uuid) higgs_record_not_claimed = get_es_record_by_uuid( higgs_record_id_not_claimed) higgs_record_uuid_not_claimed = higgs_record_not_claimed['authors'][0][ 'uuid'] # Add phonetic block to the record. higgs_record_not_claimed['authors'][0]['signature_block'] = "HAGp" es.index(index='records-hep', doc_type='hep', id=higgs_record_id_not_claimed, body=higgs_record_not_claimed) es.indices.refresh('records-hep') with patch("celery.current_app.send_task", return_value=_BeardObject(({ "3": [ glashow_record_uuid_claimed, higgs_record_uuid_claimed, higgs_record_uuid_not_claimed ] }, {}))): with patch( "inspirehep.modules.disambiguation.logic._solve_claims_conflict", return_value=_ConflictObject({ higgs_record_uuid_claimed: [higgs_record_uuid_not_claimed] })): with patch( "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid): disambiguation_clustering("HAGp") assert InspireRecord.get_record( higgs_record_id_not_claimed)['authors'][0]['recid'] == "4"
def get_db_record(pid_type, recid): from inspirehep.modules.records.api import InspireRecord pid = PersistentIdentifier.get(pid_type, recid) return InspireRecord.get_record(pid.object_uuid)
def test_solve_claim_conflicts(small_app): """Check the module for the case where at least two claimed signatures are assigned to the same cluster. """ from inspirehep.modules.disambiguation.tasks import ( disambiguation_clustering, update_authors_recid ) # Claimed signature #1. glashow_record_id_claimed = str( PersistentIdentifier.get('lit', 4328).object_uuid) glashow_record_claimed = get_es_record_by_uuid( glashow_record_id_claimed) glashow_record_uuid_claimed = glashow_record_claimed[ 'authors'][0]['uuid'] # Add phonetic block to the record. glashow_record_claimed['authors'][0]['signature_block'] = "HAGp" glashow_record_claimed['authors'][0]['curated_relation'] = True glashow_record_claimed['authors'][0]['recid'] = "3" es.index(index='records-hep', doc_type='hep', id=glashow_record_id_claimed, body=glashow_record_claimed) es.indices.refresh('records-hep') # Claimed signature #2. higgs_record_id_claimed = str( PersistentIdentifier.get('lit', 1358492).object_uuid) higgs_record_claimed = get_es_record_by_uuid( higgs_record_id_claimed) higgs_record_uuid_claimed = higgs_record_claimed[ 'authors'][0]['uuid'] # Add phonetic block to the record. higgs_record_claimed['authors'][0]['signature_block'] = "HAGp" higgs_record_claimed['authors'][0]['curated_relation'] = True higgs_record_claimed['authors'][0]['recid'] = "4" es.index(index='records-hep', doc_type='hep', id=higgs_record_id_claimed, body=higgs_record_claimed) es.indices.refresh('records-hep') # Not claimed signature. higgs_record_id_not_claimed = str( PersistentIdentifier.get('lit', 11883).object_uuid) higgs_record_not_claimed = get_es_record_by_uuid( higgs_record_id_not_claimed) higgs_record_uuid_not_claimed = higgs_record_not_claimed[ 'authors'][0]['uuid'] # Add phonetic block to the record. higgs_record_not_claimed['authors'][0]['signature_block'] = "HAGp" es.index(index='records-hep', doc_type='hep', id=higgs_record_id_not_claimed, body=higgs_record_not_claimed) es.indices.refresh('records-hep') with patch("celery.current_app.send_task", return_value=_BeardObject( ({"3": [glashow_record_uuid_claimed, higgs_record_uuid_claimed, higgs_record_uuid_not_claimed]}, {}))): with patch( "inspirehep.modules.disambiguation.logic._solve_claims_conflict", return_value=_ConflictObject( {higgs_record_uuid_claimed: [ higgs_record_uuid_not_claimed]})): with patch("inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid): disambiguation_clustering("HAGp") assert InspireRecord.get_record( higgs_record_id_not_claimed)['authors'][0]['recid'] == "4"
def delete_by_id(self, record_uuid): """Delete record from index by record identifier.""" self.delete(InspireRecord.get_record(record_uuid))
def get_db_record(pid_type, recid): from inspirehep.modules.records.api import InspireRecord pid = PersistentIdentifier.get(pid_type, recid) return InspireRecord.get_record(pid.object_uuid)