def match_references_by_uuids(literature_uuids): record_json = type_coerce(RecordMetadata.json, JSONB) has_references = record_json.has_key("references") # noqa: W601 selected_uuids = RecordMetadata.id.in_(literature_uuids) with_references_query = RecordMetadata.query.filter(selected_uuids, has_references) for record_metadata in with_references_query.all(): references = record_metadata.json.get("references") match_result = match_references(references) if match_result["any_link_modified"]: literature = LiteratureRecord(record_metadata.json, model=record_metadata) literature["references"] = match_result["matched_references"] literature.update(dict(literature)) db.session.commit() LOGGER.info("MATCHER-after-commit") added_recids = match_result["added_recids"] removed_recids = match_result["removed_recids"] LOGGER.info( "References are matched", uuid=record_metadata.id, recid=record_metadata.json["control_number"], added_recids=added_recids, added_recid_count=len(added_recids), removed_recids=removed_recids, removed_recid_count=len(removed_recids), )
def match_references_by_uuids(literature_uuids): record_json = type_coerce(RecordMetadata.json, JSONB) has_references = record_json.has_key("references") # noqa: W601 selected_uuids = RecordMetadata.id.in_(literature_uuids) not_deleted = or_( # exclude deleted records incase some are deleted after uuids are fetched by the callee not_(record_json.has_key("deleted")), # noqa: W601 not_(record_json["deleted"] == cast(True, JSONB)), ) with_references_query = RecordMetadata.query.filter( selected_uuids, has_references, not_deleted ) for record_metadata in with_references_query.all(): references = record_metadata.json["references"] match_result = match_references(references) if not match_result["any_link_modified"]: continue literature = LiteratureRecord(record_metadata.json, model=record_metadata) literature["references"] = dedupe_list(match_result["matched_references"]) literature.update(dict(literature)) db.session.commit() added_recids = match_result["added_recids"] removed_recids = match_result["removed_recids"] LOGGER.info( "References are matched", uuid=record_metadata.id, recid=record_metadata.json["control_number"], added_recids=added_recids, added_recid_count=len(added_recids), removed_recids=removed_recids, removed_recid_count=len(removed_recids), )