def fetch_most_loaned_documents(from_date, to_date, bucket_size): """Fetch the documents with the most loans within the date interval.""" # Create loans aggregation most_loaned = get_most_loaned_documents(from_date, to_date, bucket_size) # Prepare the loan and extension count document_pids = [] document_metadata = {} loan_result = most_loaned.execute() for bucket in loan_result.aggregations.most_loaned_documents.buckets: document_pid = bucket["key"] loan_count = bucket["doc_count"] loan_extensions = int(bucket["extensions"]["value"]) document_pids.append(document_pid) document_metadata[document_pid] = dict(loans=loan_count, extensions=loan_extensions) # Enhance the document serializer doc_search = current_app_ils.document_search_cls() doc_search = doc_search.with_preference_param().params(version=True) doc_search = doc_search.search_by_pid(*document_pids) result = doc_search.execute() for hit in result.hits: pid = hit["pid"] hit["loan_count"] = document_metadata[pid]["loans"] hit["loan_extensions"] = document_metadata[pid]["extensions"] res = result.to_dict() res["hits"]["hits"] = sorted(res["hits"]["hits"], key=lambda hit: hit["_source"]["loan_count"], reverse=True) return res
def link_documents_and_serials(): """Link documents/multiparts and serials.""" document_class = current_app_ils.document_record_cls document_search = current_app_ils.document_search_cls() series_class = current_app_ils.series_record_cls series_search = current_app_ils.series_search_cls() def link_records_and_serial(record_cls, search): for hit in search.scan(): # Skip linking if the hit doesn't have a legacy recid since it # means it's a volume of a multipart if "legacy_recid" not in hit: continue record = record_cls.get_record_by_pid(hit.pid) check_for_special_series(record) for serial in get_serials_by_child_recid(hit.legacy_recid): volume = get_migrated_volume_by_serial_title( record, serial["title"]) create_parent_child_relation(serial, record, SERIAL_RELATION, volume) RecordRelationIndexer().index(record, serial) def link_record_and_journal(record_cls, search): for hit in search.scan(): if "legacy_recid" not in hit: continue record = record_cls.get_record_by_pid(hit.pid) for journal in hit["_migration"]["journal_record_legacy_recids"]: serial = get_record_by_legacy_recid(series_class, journal["recid"]) create_parent_child_relation(serial, record, SERIAL_RELATION, journal["volume"]) del record["publication_info"] record.commit() db.session.commit() click.echo("Creating serial relations...") link_records_and_serial( document_class, document_search.filter("term", _migration__has_serial=True), ) link_records_and_serial( series_class, series_search.filter( "bool", filter=[ Q("term", mode_of_issuance="MULTIPART_MONOGRAPH"), Q("term", _migration__has_serial=True), ], ), ) link_record_and_journal( document_class, document_search.filter("term", _migration__has_journal=True), )
def search_documents_with_siblings_relations(): """Return documents with siblings relations.""" document_search = current_app_ils.document_search_cls() search = document_search.filter( "bool", filter=[ Q("term", _migration__has_related=True), ], ) return search
def get_documents_with_external_eitems(): """Return documents with eitems from external providers to be migrated.""" document_search = current_app_ils.document_search_cls() search = document_search.filter( "bool", filter=[ Q("term", _migration__eitems_has_external=True), ], ) return search
def get_documents_with_proxy_eitems(): """Return documents with eitems behind proxy to be migrated.""" document_search = current_app_ils.document_search_cls() search = document_search.filter( "bool", filter=[ Q("term", _migration__eitems_has_proxy=True), ], ) return search
def get_all_documents_with_files(): """Return all documents with files to be migrated.""" document_search = current_app_ils.document_search_cls() search = document_search.filter( "bool", filter=[ Q("term", _migration__has_files=True), ], ) return search
def search_documents_by_doi(doi): """Find document by ISBN.""" document_search = current_app_ils.document_search_cls() search = document_search.query( "bool", must=[ Q("term", identifiers__scheme="DOI"), Q("term", identifiers__value=doi), ], ) return search
def search_document_by_title_authors(title, authors, subtitle=None): """Find document by title and authors.""" document_search = current_app_ils.document_search_cls() if subtitle: search = (document_search.query("match", title=title).filter( "match", alternative_titles__value=subtitle).filter( "match", authors__full_name=" ".join(authors))) else: search = document_search.query("match", title=title).filter( "match", authors__full_name=" ".join(authors)) return search
def get_document_by_legacy_recid(legacy_recid): """Search documents by its legacy recid.""" document_search = current_app_ils.document_search_cls() document_cls = current_app_ils.document_record_cls search = document_search.query( "bool", filter=[Q("term", legacy_recid=legacy_recid)]) result = search.execute() hits_total = check_search_results(result, legacy_recid, "legacy recid") if hits_total == 1: return document_cls.get_record_by_pid(result.hits[0].pid)
def search_document_by_title_authors(title, authors, subtitle=None): """Find document by title and authors.""" document_search = current_app_ils.document_search_cls() title = title.lower() if subtitle: search = (document_search.filter( "term", title__normalized_keyword=title).filter( "match", alternative_titles__value=subtitle).filter( "match", authors__full_name__full_words=" ".join(authors))) else: search = (document_search.filter( "term", title__normalized_keyword=title).filter( "match", authors__full_name__full_words=" ".join(authors))) return search
def fuzzy_search_document(title, authors): """Search fuzzy matches of document and title.""" # check the fuzzy search options under: # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html document_search = current_app_ils.document_search_cls() search = document_search.query( Match(title__keyword={ "fuzziness": "AUTO", "fuzzy_transpositions": "true", "query": title, })).filter( Match( authors__full_name={ "query": " ".join(authors), "fuzziness": "AUTO", "fuzzy_transpositions": "true", })) return search
def get_document_by_barcode(barcode): """Return document from barcode search.""" document_class = current_app_ils.document_record_cls document_search = current_app_ils.document_search_cls() search = document_search.query( "query_string", query='_migration.items.barcode:"{}"'.format(barcode)) result = search.execute() hits_total = result.hits.total.value if hits_total == 1: click.secho( "! document found with item barcode {}".format(barcode), fg="green", ) return document_class.get_record_by_pid(result.hits[0].pid) else: click.secho( "no document found with barcode {}".format(barcode), fg="red", ) raise DocumentMigrationError( "found more than one document with barcode {}".format(barcode))
def link_documents_and_serials(): """Link documents/multiparts and serials.""" document_class = current_app_ils.document_record_cls document_search = current_app_ils.document_search_cls() series_class = current_app_ils.series_record_cls series_search = current_app_ils.series_search_cls() journal_legacy_pid_type =\ current_app.config["CDS_ILS_SERIES_LEGACY_PID_TYPE"] def link_records_and_serial(record_cls, search): click.echo(f"FOUND {search.count()} serial related records.") for hit in search.params(scroll='1h').scan(): try: click.echo(f"Processing record {hit.pid}.") # Skip linking if the hit doesn't have a legacy recid since it # means it's a volume of a multipart if "legacy_recid" not in hit: continue record = record_cls.get_record_by_pid(hit.pid) check_for_special_series(record) for serial in get_serials_by_child_recid(hit.legacy_recid): volume = get_migrated_volume_by_serial_title( record, serial["title"]) create_parent_child_relation(serial, record, SERIAL_RELATION, volume) RecordRelationIndexer().index(record, serial) # mark done record["_migration"]["has_serial"] = False record.commit() db.session.commit() except Exception as exc: handler = relation_exception_handlers.get(exc.__class__) if handler: legacy_recid = None if hasattr(hit, "legacy_recid"): legacy_recid = hit.legacy_recid handler(exc, new_pid=hit.pid, legacy_id=legacy_recid) else: raise exc def link_record_and_journal(record_cls, search): click.echo(f"FOUND {search.count()} journal related records.") for hit in search.params(scroll='1h').scan(): click.echo(f"Processing record {hit.pid}.") try: if "legacy_recid" not in hit: continue record = record_cls.get_record_by_pid(hit.pid) for journal in \ hit["_migration"]["journal_record_legacy_recids"]: serial = get_record_by_legacy_recid( series_class, journal_legacy_pid_type, journal["recid"]) create_parent_child_relation(serial, record, SERIAL_RELATION, journal["volume"]) # mark done record["_migration"]["has_journal"] = False record.commit() db.session.commit() except Exception as exc: handler = relation_exception_handlers.get(exc.__class__) if handler: legacy_recid = None if hasattr(hit, "legacy_recid"): legacy_recid = hit.legacy_recid handler(exc, new_pid=hit.pid, legacy_id=legacy_recid) else: raise exc click.echo("Creating serial relations...") link_records_and_serial( document_class, document_search.filter("term", _migration__has_serial=True)) link_records_and_serial( series_class, series_search.filter( "bool", filter=[ Q("term", mode_of_issuance="MULTIPART_MONOGRAPH"), Q("term", _migration__has_serial=True), ], ), ) link_record_and_journal( document_class, document_search.filter("term", _migration__has_journal=True), )