def index_record(self, uuid, record_version=None, force_delete=None): """Record indexing. Args: self: task instance (binded automatically) uuid (str): UUID of the record which should be reindexed. record_version (int): Version of the record to reindex (will be checked). force_delete (bool): if set to True will delete record from es even if metadata says that record is not deleted. Returns: list(dict): Statistics from processing references. """ LOGGER.debug("Indexing record", uuid=str(uuid), version=record_version) record = get_record(uuid, record_version) if not force_delete: deleted = record.get("deleted", False) if force_delete or deleted: try: InspireRecordIndexer().delete(record) LOGGER.debug("Record removed from ES", uuid=str(uuid)) except NotFoundError: LOGGER.debug("Record to delete not found", uuid=str(uuid)) else: InspireRecordIndexer().index(record) uuids_to_reindex = set() if isinstance(record, LiteratureRecord): uuids_to_reindex |= record.get_linked_papers_if_reference_changed() if current_app.config.get("FEATURE_FLAG_ENABLE_SELF_CITATIONS"): uuids_to_reindex |= ( record.get_all_connected_records_uuids_of_modified_authors()) uuids_to_reindex |= ( record. get_all_connected_records_uuids_of_modified_collaborations()) if isinstance(record, AuthorsRecord): uuids_to_reindex |= ( record.get_linked_author_records_uuids_if_author_changed_name()) if isinstance(record, ConferencesRecord): uuids_to_reindex |= ( record. get_linked_literature_record_uuids_if_conference_title_changed()) if uuids_to_reindex: batch_index(list(uuids_to_reindex))
def test_process_bulk_record_for_index_default_values( record_to_index_mock, prepare_record_mock, build_alias_mocked ): record = LiteratureRecord({}) indexer = InspireRecordIndexer() expected_data = { "_op_type": "index", "_index": "prefixed-index", "_type": "test_type", "_id": str(record.id), "_version": record.revision_id, "_version_type": "external_gte", "_source": {}, } bulk_data = indexer._process_bulk_record_for_index(record) assert record_to_index_mock.call_count == 1 assert prepare_record_mock.call_count == 1 assert expected_data == bulk_data
def index_record(self, uuid, record_version=None, force_delete=None): """Record indexing. Args: self: task instance (binded automatically) uuid (str): UUID of the record which should be reindexed. record_version (int): Version of the record to reindex (will be checked). force_delete (bool): if set to True will delete record from es even if metadata says that record is not deleted. Returns: list(dict): Statistics from processing references. """ LOGGER.debug("Indexing record", uuid=str(uuid), version=record_version) record = InspireRecord.get_record(uuid, with_deleted=True, record_version=record_version) if not force_delete: deleted = record.get("deleted", False) if force_delete or deleted: try: InspireRecordIndexer().delete(record) LOGGER.debug("Record removed from ES", uuid=str(uuid)) except NotFoundError: LOGGER.debug("Record to delete not found", uuid=str(uuid)) else: try: InspireRecordIndexer().index(record) except ConflictError as err: LOGGER.warning( "VersionConflict on record indexing.", uuid=str(uuid), record_version=record_version, force_delete=force_delete, error=err, ) uuids_to_reindex = get_references_to_update(record) if uuids_to_reindex: batch_index(list(uuids_to_reindex))
def batch_index(self, records_uuids, request_timeout=None): """Process all provided references and index them in bulk. Be sure that uuids are not duplicated in batch. Args: records_uuids (list): list of uuids to process. All duplicates will be removed. request_timeout: Timeout in which ES should respond. Otherwise break. Returns: dict: dict with success count and failure list (with uuids of failed records) """ LOGGER.info(f"Starting task `batch_index for {len(records_uuids)} records") return InspireRecordIndexer().bulk_index(records_uuids, request_timeout)
def test_indexer_prepare_record( mixins_current_app_mock, record_metadata_mock, current_app_mock, receiver_mock, query_mock, mock_latex_us_display, mock_latex_eu_display, mock_bibtex_display, mock_referenced_authors, ): query_mock.return_value.filter_by.return_value.count.return_value = 1 query_mock.return_value.filter_by.return_value.filter.return_value.count.return_value = ( 1) record = LiteratureRecord({}) indexer = InspireRecordIndexer() # Assume that record methods was already tested expected = record.serialize_for_es() processed = indexer._prepare_record(record, "index_name", "document_type") assert receiver_mock.send.call_count == 1 assert expected == processed
def hard_delete(self): recid = self["control_number"] with db.session.begin_nested(): pids = PersistentIdentifier.query.filter( PersistentIdentifier.object_uuid == self.id ).all() for pid in pids: if pid.pid_provider == "recid": RecordIdentifier.query.filter_by(recid=pid.pid_value).delete() db.session.delete(pid) db.session.delete(self.model) try: InspireRecordIndexer().delete(self) except NotFoundError: LOGGER.info("Record not found in ES", recid=recid, uuid=self.id) LOGGER.info("Record hard deleted", recid=recid)
def index_record(self, uuid, record_version=None, force_delete=None): """Record indexing. Args: self: task instance (binded automatically) uuid (str): UUID of the record which should be reindexed. record_version (int): Version of the record to reindex (will be checked). force_delete (bool): if set to True will delete record from es even if metadata says that record is not deleted. Returns: list(dict): Statistics from processing references. """ LOGGER.debug("Indexing record", uuid=str(uuid), version=record_version) record = InspireRecord.get_record(uuid, with_deleted=True, record_version=record_version) InspireRecordIndexer().index(record, record_version=record_version, force_delete=force_delete) uuids_to_reindex = get_references_to_update(record) if uuids_to_reindex: batch_index(list(uuids_to_reindex))