예제 #1
0
def index_record(self, uuid, record_version=None, force_delete=None):
    """Record indexing.

    Args:
        self: task instance (binded automatically)
        uuid (str): UUID of the record which should be reindexed.
        record_version (int): Version of the record to reindex (will be checked).
        force_delete (bool): if set to True will delete record from es even if
            metadata says that record is not deleted.
    Returns:
        list(dict): Statistics from processing references.
    """
    LOGGER.debug("Indexing record", uuid=str(uuid), version=record_version)
    record = get_record(uuid, record_version)

    if not force_delete:
        deleted = record.get("deleted", False)

    if force_delete or deleted:
        try:
            InspireRecordIndexer().delete(record)
            LOGGER.debug("Record removed from ES", uuid=str(uuid))
        except NotFoundError:
            LOGGER.debug("Record to delete not found", uuid=str(uuid))
    else:
        InspireRecordIndexer().index(record)

    uuids_to_reindex = set()
    if isinstance(record, LiteratureRecord):
        uuids_to_reindex |= record.get_linked_papers_if_reference_changed()
        if current_app.config.get("FEATURE_FLAG_ENABLE_SELF_CITATIONS"):
            uuids_to_reindex |= (
                record.get_all_connected_records_uuids_of_modified_authors())
            uuids_to_reindex |= (
                record.
                get_all_connected_records_uuids_of_modified_collaborations())
    if isinstance(record, AuthorsRecord):
        uuids_to_reindex |= (
            record.get_linked_author_records_uuids_if_author_changed_name())

    if isinstance(record, ConferencesRecord):
        uuids_to_reindex |= (
            record.
            get_linked_literature_record_uuids_if_conference_title_changed())

    if uuids_to_reindex:
        batch_index(list(uuids_to_reindex))
예제 #2
0
def test_process_bulk_record_for_index_default_values(
    record_to_index_mock, prepare_record_mock, build_alias_mocked
):
    record = LiteratureRecord({})
    indexer = InspireRecordIndexer()
    expected_data = {
        "_op_type": "index",
        "_index": "prefixed-index",
        "_type": "test_type",
        "_id": str(record.id),
        "_version": record.revision_id,
        "_version_type": "external_gte",
        "_source": {},
    }

    bulk_data = indexer._process_bulk_record_for_index(record)

    assert record_to_index_mock.call_count == 1
    assert prepare_record_mock.call_count == 1
    assert expected_data == bulk_data
예제 #3
0
def index_record(self, uuid, record_version=None, force_delete=None):
    """Record indexing.

    Args:
        self: task instance (binded automatically)
        uuid (str): UUID of the record which should be reindexed.
        record_version (int): Version of the record to reindex (will be checked).
        force_delete (bool): if set to True will delete record from es even if
            metadata says that record is not deleted.
    Returns:
        list(dict): Statistics from processing references.
    """
    LOGGER.debug("Indexing record", uuid=str(uuid), version=record_version)
    record = InspireRecord.get_record(uuid,
                                      with_deleted=True,
                                      record_version=record_version)
    if not force_delete:
        deleted = record.get("deleted", False)

    if force_delete or deleted:
        try:
            InspireRecordIndexer().delete(record)
            LOGGER.debug("Record removed from ES", uuid=str(uuid))
        except NotFoundError:
            LOGGER.debug("Record to delete not found", uuid=str(uuid))
    else:
        try:
            InspireRecordIndexer().index(record)
        except ConflictError as err:
            LOGGER.warning(
                "VersionConflict on record indexing.",
                uuid=str(uuid),
                record_version=record_version,
                force_delete=force_delete,
                error=err,
            )

    uuids_to_reindex = get_references_to_update(record)

    if uuids_to_reindex:
        batch_index(list(uuids_to_reindex))
예제 #4
0
def batch_index(self, records_uuids, request_timeout=None):
    """Process all provided references and index them in bulk.
    Be sure that uuids are not duplicated in batch.
    Args:
        records_uuids (list): list of uuids to process. All duplicates will be removed.
        request_timeout: Timeout in which ES should respond. Otherwise break.

    Returns:
        dict: dict with success count and failure list
                (with uuids of failed records)
    """
    LOGGER.info(f"Starting task `batch_index for {len(records_uuids)} records")
    return InspireRecordIndexer().bulk_index(records_uuids, request_timeout)
예제 #5
0
def test_indexer_prepare_record(
    mixins_current_app_mock,
    record_metadata_mock,
    current_app_mock,
    receiver_mock,
    query_mock,
    mock_latex_us_display,
    mock_latex_eu_display,
    mock_bibtex_display,
    mock_referenced_authors,
):
    query_mock.return_value.filter_by.return_value.count.return_value = 1
    query_mock.return_value.filter_by.return_value.filter.return_value.count.return_value = (
        1)
    record = LiteratureRecord({})
    indexer = InspireRecordIndexer()
    # Assume that record methods was already tested
    expected = record.serialize_for_es()

    processed = indexer._prepare_record(record, "index_name", "document_type")
    assert receiver_mock.send.call_count == 1
    assert expected == processed
예제 #6
0
파일: base.py 프로젝트: MJedr/inspirehep
    def hard_delete(self):
        recid = self["control_number"]
        with db.session.begin_nested():
            pids = PersistentIdentifier.query.filter(
                PersistentIdentifier.object_uuid == self.id
            ).all()
            for pid in pids:
                if pid.pid_provider == "recid":
                    RecordIdentifier.query.filter_by(recid=pid.pid_value).delete()
                db.session.delete(pid)
            db.session.delete(self.model)

            try:
                InspireRecordIndexer().delete(self)
            except NotFoundError:
                LOGGER.info("Record not found in ES", recid=recid, uuid=self.id)

        LOGGER.info("Record hard deleted", recid=recid)
예제 #7
0
def index_record(self, uuid, record_version=None, force_delete=None):
    """Record indexing.

    Args:
        self: task instance (binded automatically)
        uuid (str): UUID of the record which should be reindexed.
        record_version (int): Version of the record to reindex (will be checked).
        force_delete (bool): if set to True will delete record from es even if
            metadata says that record is not deleted.
    Returns:
        list(dict): Statistics from processing references.
    """
    LOGGER.debug("Indexing record", uuid=str(uuid), version=record_version)
    record = InspireRecord.get_record(uuid,
                                      with_deleted=True,
                                      record_version=record_version)

    InspireRecordIndexer().index(record,
                                 record_version=record_version,
                                 force_delete=force_delete)
    uuids_to_reindex = get_references_to_update(record)

    if uuids_to_reindex:
        batch_index(list(uuids_to_reindex))