def index_after_commit(sender, changes): """Index a record in ES after it was committed to the DB. This cannot happen in an ``after_record_commit`` receiver from Invenio-Records because, despite the name, at that point we are not yet sure whether the record has been really committed to the DB. """ for model_instance, change in changes: if isinstance(model_instance, RecordMetadata): if change in ("insert", "update", "delete"): LOGGER.debug(f"Record commited", change=change, uuid=str(model_instance.id)) pid_type = PidStoreBase.get_pid_type_from_schema( model_instance.json.get("$schema")) delete = "delete" in changes arguments = InspireRecord.get_subclasses( )[pid_type]._record_index(model_instance.json, _id=str(model_instance.id), force_delete=delete) arguments["record_version"] = model_instance.version_id LOGGER.debug( f"Record sending to index", uuid=str(model_instance.id), delete=delete, ) current_celery_app.send_task( "inspirehep.records.indexer.tasks.index_record", kwargs=arguments) else: raise RuntimeError("Wrong operation `%s` on record %r", change, model_instance.id)
def update_references_pointing_to_merged_record(refs_to_schema, merged_record_uri, new_record_uri): for index, path in refs_to_schema: query = get_query_for_given_path(index, path, merged_record_uri) es_index_name = f"records-{index}" matched_records = InspireSearch( index=es_index_name).query(query).scan() for matched_record in matched_records: pid_type = current_app.config["SCHEMA_TO_PID_TYPES"][index] record_class = InspireRecord.get_subclasses()[pid_type] matched_inspire_record_data = ( db.session.query(RecordMetadata).with_for_update().filter_by( id=matched_record.meta.id).first()) matched_inspire_record = record_class( matched_inspire_record_data.json, model=matched_inspire_record_data) referenced_records_in_path = flatten_list( get_value(matched_inspire_record, path[:-len(".$ref")], [])) for referenced_record in referenced_records_in_path: update_reference_if_reference_uri_matches( referenced_record, merged_record_uri, new_record_uri) matched_inspire_record.update(dict(matched_inspire_record)) LOGGER.info("Updated reference for record", uuid=str(matched_inspire_record.id)) db.session.commit()
def get_config_for_given_path(index, path): record_with_reference_pid = current_app.config["SCHEMA_TO_PID_TYPES"][ index] nested_fields = InspireRecord.get_subclasses( )[record_with_reference_pid].nested_record_fields config = (generate_matcher_config_for_nested_reference_field(index, path) if path.split(".")[0] in nested_fields else generate_matcher_config_for_reference_field(index, path)) return config
def get_query_for_given_path(index, path, record_ref): record_with_reference_pid = current_app.config["SCHEMA_TO_PID_TYPES"][ index] nested_fields = InspireRecord.get_subclasses( )[record_with_reference_pid].nested_record_fields if path.split(".")[0] in nested_fields: query = Q("nested", path=path.split(".")[0], query=Q("match", **{path: record_ref})) else: query = Q("match", **{path: record_ref}) return query
def test_get_subclasses_from_inspire_records(): expected = { "lit": LiteratureRecord, "aut": AuthorsRecord, "job": JobsRecord, "jou": JournalsRecord, "exp": ExperimentsRecord, "con": ConferencesRecord, "dat": DataRecord, "ins": InstitutionsRecord, } subclasses = InspireRecord.get_subclasses() assert subclasses == expected
def test_get_subclasses(): subclasses = InspireRecord.get_subclasses() expected_subclasses = { "lit": LiteratureRecord, "aut": AuthorsRecord, "job": JobsRecord, "jou": JournalsRecord, "exp": ExperimentsRecord, "con": ConferencesRecord, "dat": DataRecord, "ins": InstitutionsRecord, "sem": SeminarsRecord, } assert subclasses == expected_subclasses