def search_serial_by_title(title): """Return serial search by title.""" series_search = current_app_ils.series_search_cls() search = series_search\ .query("query_string", query=f'title:"{title}"')\ .filter("term", mode_of_issuance="SERIAL") return search
def link_documents_and_serials(): """Link documents/multiparts and serials.""" document_class = current_app_ils.document_record_cls document_search = current_app_ils.document_search_cls() series_class = current_app_ils.series_record_cls series_search = current_app_ils.series_search_cls() def link_records_and_serial(record_cls, search): for hit in search.scan(): # Skip linking if the hit doesn't have a legacy recid since it # means it's a volume of a multipart if "legacy_recid" not in hit: continue record = record_cls.get_record_by_pid(hit.pid) check_for_special_series(record) for serial in get_serials_by_child_recid(hit.legacy_recid): volume = get_migrated_volume_by_serial_title( record, serial["title"]) create_parent_child_relation(serial, record, SERIAL_RELATION, volume) RecordRelationIndexer().index(record, serial) def link_record_and_journal(record_cls, search): for hit in search.scan(): if "legacy_recid" not in hit: continue record = record_cls.get_record_by_pid(hit.pid) for journal in hit["_migration"]["journal_record_legacy_recids"]: serial = get_record_by_legacy_recid(series_class, journal["recid"]) create_parent_child_relation(serial, record, SERIAL_RELATION, journal["volume"]) del record["publication_info"] record.commit() db.session.commit() click.echo("Creating serial relations...") link_records_and_serial( document_class, document_search.filter("term", _migration__has_serial=True), ) link_records_and_serial( series_class, series_search.filter( "bool", filter=[ Q("term", mode_of_issuance="MULTIPART_MONOGRAPH"), Q("term", _migration__has_serial=True), ], ), ) link_record_and_journal( document_class, document_search.filter("term", _migration__has_journal=True), )
def search_series_with_relations(): """Return series with relations.""" series_search = current_app_ils.series_search_cls() search = series_search.filter( "bool", filter=[ Q("term", _migration__has_related=True), ], ) return search
def search_series_by_issn(issn): """Find series by ISSN.""" series_search = current_app_ils.series_search_cls() search = series_search.query( "bool", must=[ Q("term", identifiers__scheme="ISSN"), Q("term", identifiers__value=issn), ], ) return search
def get_serials_by_child_recid(recid): """Search serials by children recid.""" series_search = current_app_ils.series_search_cls() series_class = current_app_ils.series_record_cls search = series_search.query( "bool", filter=[ Q("term", mode_of_issuance="SERIAL"), Q("term", _migration__children=recid), ], ) for hit in search.params(scroll='1h').scan(): yield series_class.get_record_by_pid(hit.pid)
def get_multipart_by_multipart_id(multipart_id): """Search multiparts by its identifier.""" series_search = current_app_ils.series_search_cls() series_cls = current_app_ils.series_record_cls # f.e. multipart id = vol234 search = series_search.query("match", _migration__multipart_id=multipart_id).filter( "match", mode_of_issuance="MULTIPART_MONOGRAPH") result = search.execute() hits_total = result.hits.total.value if hits_total == 1: return series_cls.get_record_by_pid(result.hits[0].pid) if hits_total == 0: click.secho("no multipart found with id {}".format(multipart_id), fg="red") else: raise MultipartMigrationError( "found more than one multipart id {}".format(multipart_id))
def validate_serial_records(): """Validate that serials were migrated successfully. Performs the following checks: * Find duplicate serials * Ensure all children of migrated serials were migrated """ def validate_serial_relation(serial, recids): document_cls = current_app_ils.document_record_cls relations = serial.relations.get().get("serial", []) if len(recids) != len(relations): click.echo("[Serial {}] Incorrect number of children: {} " "(expected {})".format(serial["pid"], len(relations), len(recids))) for relation in relations: child = document_cls.get_record_by_pid( relation["pid"], pid_type=relation["pid_type"]) if "legacy_recid" in child and child["legacy_recid"] not in recids: click.echo("[Serial {}] Unexpected child with legacy " "recid: {}".format(serial["pid"], child["legacy_recid"])) titles = set() series_search = current_app_ils.series_search_cls() search = series_search.filter("term", mode_of_issuance="SERIAL") for serial_hit in search.params(scroll='1h').scan(): # Store titles and check for duplicates if "title" in serial_hit: title = serial_hit.title if title in titles: current_app.logger.warning( 'Serial title "{}" already exists'.format(title)) else: titles.add(title) # Check if any children are missing children = serial_hit._migration.children serial = Series.get_record_by_pid(serial_hit.pid) validate_serial_relation(serial, children) click.echo("Serial validation check done!")
def search_series_by_title(title): """Find series by title.""" series_search = current_app_ils.series_search_cls() title = title.lower() search = series_search.filter("term", title__normalized_keyword=title) return search
def link_documents_and_serials(): """Link documents/multiparts and serials.""" document_class = current_app_ils.document_record_cls document_search = current_app_ils.document_search_cls() series_class = current_app_ils.series_record_cls series_search = current_app_ils.series_search_cls() journal_legacy_pid_type =\ current_app.config["CDS_ILS_SERIES_LEGACY_PID_TYPE"] def link_records_and_serial(record_cls, search): click.echo(f"FOUND {search.count()} serial related records.") for hit in search.params(scroll='1h').scan(): try: click.echo(f"Processing record {hit.pid}.") # Skip linking if the hit doesn't have a legacy recid since it # means it's a volume of a multipart if "legacy_recid" not in hit: continue record = record_cls.get_record_by_pid(hit.pid) check_for_special_series(record) for serial in get_serials_by_child_recid(hit.legacy_recid): volume = get_migrated_volume_by_serial_title( record, serial["title"]) create_parent_child_relation(serial, record, SERIAL_RELATION, volume) RecordRelationIndexer().index(record, serial) # mark done record["_migration"]["has_serial"] = False record.commit() db.session.commit() except Exception as exc: handler = relation_exception_handlers.get(exc.__class__) if handler: legacy_recid = None if hasattr(hit, "legacy_recid"): legacy_recid = hit.legacy_recid handler(exc, new_pid=hit.pid, legacy_id=legacy_recid) else: raise exc def link_record_and_journal(record_cls, search): click.echo(f"FOUND {search.count()} journal related records.") for hit in search.params(scroll='1h').scan(): click.echo(f"Processing record {hit.pid}.") try: if "legacy_recid" not in hit: continue record = record_cls.get_record_by_pid(hit.pid) for journal in \ hit["_migration"]["journal_record_legacy_recids"]: serial = get_record_by_legacy_recid( series_class, journal_legacy_pid_type, journal["recid"]) create_parent_child_relation(serial, record, SERIAL_RELATION, journal["volume"]) # mark done record["_migration"]["has_journal"] = False record.commit() db.session.commit() except Exception as exc: handler = relation_exception_handlers.get(exc.__class__) if handler: legacy_recid = None if hasattr(hit, "legacy_recid"): legacy_recid = hit.legacy_recid handler(exc, new_pid=hit.pid, legacy_id=legacy_recid) else: raise exc click.echo("Creating serial relations...") link_records_and_serial( document_class, document_search.filter("term", _migration__has_serial=True)) link_records_and_serial( series_class, series_search.filter( "bool", filter=[ Q("term", mode_of_issuance="MULTIPART_MONOGRAPH"), Q("term", _migration__has_serial=True), ], ), ) link_record_and_journal( document_class, document_search.filter("term", _migration__has_journal=True), )