Python document_search_cls Exemples, invenio_app_ils.proxies.current_app_ils.document_search_cls Python Exemples

Exemple #1

0

Afficher le fichier

def fetch_most_loaned_documents(from_date, to_date, bucket_size):
    """Fetch the documents with the most loans within the date interval."""
    # Create loans aggregation
    most_loaned = get_most_loaned_documents(from_date, to_date, bucket_size)

    # Prepare the loan and extension count
    document_pids = []
    document_metadata = {}
    loan_result = most_loaned.execute()
    for bucket in loan_result.aggregations.most_loaned_documents.buckets:
        document_pid = bucket["key"]
        loan_count = bucket["doc_count"]
        loan_extensions = int(bucket["extensions"]["value"])
        document_pids.append(document_pid)
        document_metadata[document_pid] = dict(loans=loan_count,
                                               extensions=loan_extensions)

    # Enhance the document serializer
    doc_search = current_app_ils.document_search_cls()
    doc_search = doc_search.with_preference_param().params(version=True)
    doc_search = doc_search.search_by_pid(*document_pids)
    result = doc_search.execute()

    for hit in result.hits:
        pid = hit["pid"]
        hit["loan_count"] = document_metadata[pid]["loans"]
        hit["loan_extensions"] = document_metadata[pid]["extensions"]

    res = result.to_dict()
    res["hits"]["hits"] = sorted(res["hits"]["hits"],
                                 key=lambda hit: hit["_source"]["loan_count"],
                                 reverse=True)

    return res

Exemple #2

0

Afficher le fichier

Fichier : api.py Projet : kprzerwa/cds-ils

def link_documents_and_serials():
    """Link documents/multiparts and serials."""
    document_class = current_app_ils.document_record_cls
    document_search = current_app_ils.document_search_cls()
    series_class = current_app_ils.series_record_cls
    series_search = current_app_ils.series_search_cls()

    def link_records_and_serial(record_cls, search):
        for hit in search.scan():
            # Skip linking if the hit doesn't have a legacy recid since it
            # means it's a volume of a multipart
            if "legacy_recid" not in hit:
                continue
            record = record_cls.get_record_by_pid(hit.pid)
            check_for_special_series(record)
            for serial in get_serials_by_child_recid(hit.legacy_recid):
                volume = get_migrated_volume_by_serial_title(
                    record, serial["title"])
                create_parent_child_relation(serial, record, SERIAL_RELATION,
                                             volume)
                RecordRelationIndexer().index(record, serial)

    def link_record_and_journal(record_cls, search):
        for hit in search.scan():
            if "legacy_recid" not in hit:
                continue
            record = record_cls.get_record_by_pid(hit.pid)
            for journal in hit["_migration"]["journal_record_legacy_recids"]:
                serial = get_record_by_legacy_recid(series_class,
                                                    journal["recid"])
                create_parent_child_relation(serial, record, SERIAL_RELATION,
                                             journal["volume"])

                del record["publication_info"]
                record.commit()
                db.session.commit()

    click.echo("Creating serial relations...")
    link_records_and_serial(
        document_class,
        document_search.filter("term", _migration__has_serial=True),
    )
    link_records_and_serial(
        series_class,
        series_search.filter(
            "bool",
            filter=[
                Q("term", mode_of_issuance="MULTIPART_MONOGRAPH"),
                Q("term", _migration__has_serial=True),
            ],
        ),
    )
    link_record_and_journal(
        document_class,
        document_search.filter("term", _migration__has_journal=True),
    )

Exemple #3

0

Afficher le fichier

Fichier : api.py Projet : kprzerwa/cds-ils

def search_documents_with_siblings_relations():
    """Return documents with siblings relations."""
    document_search = current_app_ils.document_search_cls()
    search = document_search.filter(
        "bool",
        filter=[
            Q("term", _migration__has_related=True),
        ],
    )
    return search

Exemple #4

0

Afficher le fichier

Fichier : api.py Projet : kprzerwa/cds-ils

def get_documents_with_external_eitems():
    """Return documents with eitems from external providers to be migrated."""
    document_search = current_app_ils.document_search_cls()
    search = document_search.filter(
        "bool",
        filter=[
            Q("term", _migration__eitems_has_external=True),
        ],
    )
    return search

Exemple #5

0

Afficher le fichier

Fichier : api.py Projet : kprzerwa/cds-ils

def get_documents_with_proxy_eitems():
    """Return documents with eitems behind proxy to be migrated."""
    document_search = current_app_ils.document_search_cls()
    search = document_search.filter(
        "bool",
        filter=[
            Q("term", _migration__eitems_has_proxy=True),
        ],
    )
    return search

Exemple #6

0

Afficher le fichier

Fichier : api.py Projet : kprzerwa/cds-ils

def get_all_documents_with_files():
    """Return all documents with files to be migrated."""
    document_search = current_app_ils.document_search_cls()
    search = document_search.filter(
        "bool",
        filter=[
            Q("term", _migration__has_files=True),
        ],
    )
    return search

Exemple #7

0

Afficher le fichier

def search_documents_by_doi(doi):
    """Find document by ISBN."""
    document_search = current_app_ils.document_search_cls()
    search = document_search.query(
        "bool",
        must=[
            Q("term", identifiers__scheme="DOI"),
            Q("term", identifiers__value=doi),
        ],
    )
    return search

Exemple #8

0

Afficher le fichier

Fichier : api.py Projet : kprzerwa/cds-ils

def search_document_by_title_authors(title, authors, subtitle=None):
    """Find document by title and authors."""
    document_search = current_app_ils.document_search_cls()
    if subtitle:
        search = (document_search.query("match", title=title).filter(
            "match", alternative_titles__value=subtitle).filter(
                "match", authors__full_name=" ".join(authors)))
    else:
        search = document_search.query("match", title=title).filter(
            "match", authors__full_name=" ".join(authors))

    return search

Exemple #9

0

Afficher le fichier

Fichier : api.py Projet : kprzerwa/cds-ils

def get_document_by_legacy_recid(legacy_recid):
    """Search documents by its legacy recid."""
    document_search = current_app_ils.document_search_cls()
    document_cls = current_app_ils.document_record_cls

    search = document_search.query(
        "bool", filter=[Q("term", legacy_recid=legacy_recid)])
    result = search.execute()
    hits_total = check_search_results(result, legacy_recid, "legacy recid")

    if hits_total == 1:
        return document_cls.get_record_by_pid(result.hits[0].pid)

Exemple #10

0

Afficher le fichier

def search_document_by_title_authors(title, authors, subtitle=None):
    """Find document by title and authors."""
    document_search = current_app_ils.document_search_cls()

    title = title.lower()
    if subtitle:
        search = (document_search.filter(
            "term", title__normalized_keyword=title).filter(
                "match", alternative_titles__value=subtitle).filter(
                    "match", authors__full_name__full_words=" ".join(authors)))
    else:
        search = (document_search.filter(
            "term", title__normalized_keyword=title).filter(
                "match", authors__full_name__full_words=" ".join(authors)))
    return search

Exemple #11

0

Afficher le fichier

def fuzzy_search_document(title, authors):
    """Search fuzzy matches of document and title."""
    # check the fuzzy search options under:
    # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html
    document_search = current_app_ils.document_search_cls()
    search = document_search.query(
        Match(title__keyword={
            "fuzziness": "AUTO",
            "fuzzy_transpositions": "true",
            "query": title,
        })).filter(
            Match(
                authors__full_name={
                    "query": " ".join(authors),
                    "fuzziness": "AUTO",
                    "fuzzy_transpositions": "true",
                }))
    return search

Exemple #12

0

Afficher le fichier

Fichier : api.py Projet : kprzerwa/cds-ils

def get_document_by_barcode(barcode):
    """Return document from barcode search."""
    document_class = current_app_ils.document_record_cls
    document_search = current_app_ils.document_search_cls()
    search = document_search.query(
        "query_string", query='_migration.items.barcode:"{}"'.format(barcode))

    result = search.execute()
    hits_total = result.hits.total.value

    if hits_total == 1:
        click.secho(
            "! document found with item barcode {}".format(barcode),
            fg="green",
        )
        return document_class.get_record_by_pid(result.hits[0].pid)

    else:
        click.secho(
            "no document found with barcode {}".format(barcode),
            fg="red",
        )
        raise DocumentMigrationError(
            "found more than one document with barcode {}".format(barcode))

Exemple #13

0

Afficher le fichier

def link_documents_and_serials():
    """Link documents/multiparts and serials."""
    document_class = current_app_ils.document_record_cls
    document_search = current_app_ils.document_search_cls()
    series_class = current_app_ils.series_record_cls
    series_search = current_app_ils.series_search_cls()
    journal_legacy_pid_type =\
        current_app.config["CDS_ILS_SERIES_LEGACY_PID_TYPE"]

    def link_records_and_serial(record_cls, search):
        click.echo(f"FOUND {search.count()} serial related records.")
        for hit in search.params(scroll='1h').scan():
            try:
                click.echo(f"Processing record {hit.pid}.")
                # Skip linking if the hit doesn't have a legacy recid since it
                # means it's a volume of a multipart
                if "legacy_recid" not in hit:
                    continue
                record = record_cls.get_record_by_pid(hit.pid)
                check_for_special_series(record)
                for serial in get_serials_by_child_recid(hit.legacy_recid):
                    volume = get_migrated_volume_by_serial_title(
                        record, serial["title"])
                    create_parent_child_relation(serial, record,
                                                 SERIAL_RELATION, volume)
                    RecordRelationIndexer().index(record, serial)
                # mark done
                record["_migration"]["has_serial"] = False
                record.commit()
                db.session.commit()
            except Exception as exc:
                handler = relation_exception_handlers.get(exc.__class__)
                if handler:
                    legacy_recid = None
                    if hasattr(hit, "legacy_recid"):
                        legacy_recid = hit.legacy_recid
                    handler(exc, new_pid=hit.pid, legacy_id=legacy_recid)
                else:
                    raise exc

    def link_record_and_journal(record_cls, search):
        click.echo(f"FOUND {search.count()} journal related records.")
        for hit in search.params(scroll='1h').scan():
            click.echo(f"Processing record {hit.pid}.")
            try:
                if "legacy_recid" not in hit:
                    continue
                record = record_cls.get_record_by_pid(hit.pid)
                for journal in \
                        hit["_migration"]["journal_record_legacy_recids"]:
                    serial = get_record_by_legacy_recid(
                        series_class, journal_legacy_pid_type,
                        journal["recid"])
                    create_parent_child_relation(serial, record,
                                                 SERIAL_RELATION,
                                                 journal["volume"])

                    # mark done
                    record["_migration"]["has_journal"] = False
                    record.commit()
                    db.session.commit()
            except Exception as exc:
                handler = relation_exception_handlers.get(exc.__class__)
                if handler:
                    legacy_recid = None
                    if hasattr(hit, "legacy_recid"):
                        legacy_recid = hit.legacy_recid
                    handler(exc, new_pid=hit.pid, legacy_id=legacy_recid)
                else:
                    raise exc

    click.echo("Creating serial relations...")

    link_records_and_serial(
        document_class,
        document_search.filter("term", _migration__has_serial=True))
    link_records_and_serial(
        series_class,
        series_search.filter(
            "bool",
            filter=[
                Q("term", mode_of_issuance="MULTIPART_MONOGRAPH"),
                Q("term", _migration__has_serial=True),
            ],
        ),
    )
    link_record_and_journal(
        document_class,
        document_search.filter("term", _migration__has_journal=True),
    )