예제 #1
0
def search_serial_by_title(title):
    """Return serial search by title."""
    series_search = current_app_ils.series_search_cls()
    search = series_search\
        .query("query_string", query=f'title:"{title}"')\
        .filter("term", mode_of_issuance="SERIAL")
    return search
예제 #2
0
파일: api.py 프로젝트: kprzerwa/cds-ils
def link_documents_and_serials():
    """Link documents/multiparts and serials."""
    document_class = current_app_ils.document_record_cls
    document_search = current_app_ils.document_search_cls()
    series_class = current_app_ils.series_record_cls
    series_search = current_app_ils.series_search_cls()

    def link_records_and_serial(record_cls, search):
        for hit in search.scan():
            # Skip linking if the hit doesn't have a legacy recid since it
            # means it's a volume of a multipart
            if "legacy_recid" not in hit:
                continue
            record = record_cls.get_record_by_pid(hit.pid)
            check_for_special_series(record)
            for serial in get_serials_by_child_recid(hit.legacy_recid):
                volume = get_migrated_volume_by_serial_title(
                    record, serial["title"])
                create_parent_child_relation(serial, record, SERIAL_RELATION,
                                             volume)
                RecordRelationIndexer().index(record, serial)

    def link_record_and_journal(record_cls, search):
        for hit in search.scan():
            if "legacy_recid" not in hit:
                continue
            record = record_cls.get_record_by_pid(hit.pid)
            for journal in hit["_migration"]["journal_record_legacy_recids"]:
                serial = get_record_by_legacy_recid(series_class,
                                                    journal["recid"])
                create_parent_child_relation(serial, record, SERIAL_RELATION,
                                             journal["volume"])

                del record["publication_info"]
                record.commit()
                db.session.commit()

    click.echo("Creating serial relations...")
    link_records_and_serial(
        document_class,
        document_search.filter("term", _migration__has_serial=True),
    )
    link_records_and_serial(
        series_class,
        series_search.filter(
            "bool",
            filter=[
                Q("term", mode_of_issuance="MULTIPART_MONOGRAPH"),
                Q("term", _migration__has_serial=True),
            ],
        ),
    )
    link_record_and_journal(
        document_class,
        document_search.filter("term", _migration__has_journal=True),
    )
예제 #3
0
def search_series_with_relations():
    """Return series with relations."""
    series_search = current_app_ils.series_search_cls()
    search = series_search.filter(
        "bool",
        filter=[
            Q("term", _migration__has_related=True),
        ],
    )
    return search
예제 #4
0
def search_series_by_issn(issn):
    """Find series by ISSN."""
    series_search = current_app_ils.series_search_cls()
    search = series_search.query(
        "bool",
        must=[
            Q("term", identifiers__scheme="ISSN"),
            Q("term", identifiers__value=issn),
        ],
    )
    return search
예제 #5
0
def get_serials_by_child_recid(recid):
    """Search serials by children recid."""
    series_search = current_app_ils.series_search_cls()
    series_class = current_app_ils.series_record_cls
    search = series_search.query(
        "bool",
        filter=[
            Q("term", mode_of_issuance="SERIAL"),
            Q("term", _migration__children=recid),
        ],
    )
    for hit in search.params(scroll='1h').scan():
        yield series_class.get_record_by_pid(hit.pid)
예제 #6
0
def get_multipart_by_multipart_id(multipart_id):
    """Search multiparts by its identifier."""
    series_search = current_app_ils.series_search_cls()
    series_cls = current_app_ils.series_record_cls
    # f.e. multipart id = vol234
    search = series_search.query("match",
                                 _migration__multipart_id=multipart_id).filter(
                                     "match",
                                     mode_of_issuance="MULTIPART_MONOGRAPH")
    result = search.execute()
    hits_total = result.hits.total.value
    if hits_total == 1:
        return series_cls.get_record_by_pid(result.hits[0].pid)
    if hits_total == 0:
        click.secho("no multipart found with id {}".format(multipart_id),
                    fg="red")
    else:
        raise MultipartMigrationError(
            "found more than one multipart id {}".format(multipart_id))
예제 #7
0
def validate_serial_records():
    """Validate that serials were migrated successfully.

    Performs the following checks:
    * Find duplicate serials
    * Ensure all children of migrated serials were migrated
    """
    def validate_serial_relation(serial, recids):
        document_cls = current_app_ils.document_record_cls

        relations = serial.relations.get().get("serial", [])
        if len(recids) != len(relations):
            click.echo("[Serial {}] Incorrect number of children: {} "
                       "(expected {})".format(serial["pid"], len(relations),
                                              len(recids)))
        for relation in relations:

            child = document_cls.get_record_by_pid(
                relation["pid"], pid_type=relation["pid_type"])
            if "legacy_recid" in child and child["legacy_recid"] not in recids:
                click.echo("[Serial {}] Unexpected child with legacy "
                           "recid: {}".format(serial["pid"],
                                              child["legacy_recid"]))

    titles = set()
    series_search = current_app_ils.series_search_cls()

    search = series_search.filter("term", mode_of_issuance="SERIAL")
    for serial_hit in search.params(scroll='1h').scan():
        # Store titles and check for duplicates
        if "title" in serial_hit:
            title = serial_hit.title
            if title in titles:
                current_app.logger.warning(
                    'Serial title "{}" already exists'.format(title))
            else:
                titles.add(title)
        # Check if any children are missing
        children = serial_hit._migration.children
        serial = Series.get_record_by_pid(serial_hit.pid)
        validate_serial_relation(serial, children)

    click.echo("Serial validation check done!")
예제 #8
0
def search_series_by_title(title):
    """Find series by title."""
    series_search = current_app_ils.series_search_cls()
    title = title.lower()
    search = series_search.filter("term", title__normalized_keyword=title)
    return search
예제 #9
0
def link_documents_and_serials():
    """Link documents/multiparts and serials."""
    document_class = current_app_ils.document_record_cls
    document_search = current_app_ils.document_search_cls()
    series_class = current_app_ils.series_record_cls
    series_search = current_app_ils.series_search_cls()
    journal_legacy_pid_type =\
        current_app.config["CDS_ILS_SERIES_LEGACY_PID_TYPE"]

    def link_records_and_serial(record_cls, search):
        click.echo(f"FOUND {search.count()} serial related records.")
        for hit in search.params(scroll='1h').scan():
            try:
                click.echo(f"Processing record {hit.pid}.")
                # Skip linking if the hit doesn't have a legacy recid since it
                # means it's a volume of a multipart
                if "legacy_recid" not in hit:
                    continue
                record = record_cls.get_record_by_pid(hit.pid)
                check_for_special_series(record)
                for serial in get_serials_by_child_recid(hit.legacy_recid):
                    volume = get_migrated_volume_by_serial_title(
                        record, serial["title"])
                    create_parent_child_relation(serial, record,
                                                 SERIAL_RELATION, volume)
                    RecordRelationIndexer().index(record, serial)
                # mark done
                record["_migration"]["has_serial"] = False
                record.commit()
                db.session.commit()
            except Exception as exc:
                handler = relation_exception_handlers.get(exc.__class__)
                if handler:
                    legacy_recid = None
                    if hasattr(hit, "legacy_recid"):
                        legacy_recid = hit.legacy_recid
                    handler(exc, new_pid=hit.pid, legacy_id=legacy_recid)
                else:
                    raise exc

    def link_record_and_journal(record_cls, search):
        click.echo(f"FOUND {search.count()} journal related records.")
        for hit in search.params(scroll='1h').scan():
            click.echo(f"Processing record {hit.pid}.")
            try:
                if "legacy_recid" not in hit:
                    continue
                record = record_cls.get_record_by_pid(hit.pid)
                for journal in \
                        hit["_migration"]["journal_record_legacy_recids"]:
                    serial = get_record_by_legacy_recid(
                        series_class, journal_legacy_pid_type,
                        journal["recid"])
                    create_parent_child_relation(serial, record,
                                                 SERIAL_RELATION,
                                                 journal["volume"])

                    # mark done
                    record["_migration"]["has_journal"] = False
                    record.commit()
                    db.session.commit()
            except Exception as exc:
                handler = relation_exception_handlers.get(exc.__class__)
                if handler:
                    legacy_recid = None
                    if hasattr(hit, "legacy_recid"):
                        legacy_recid = hit.legacy_recid
                    handler(exc, new_pid=hit.pid, legacy_id=legacy_recid)
                else:
                    raise exc

    click.echo("Creating serial relations...")

    link_records_and_serial(
        document_class,
        document_search.filter("term", _migration__has_serial=True))
    link_records_and_serial(
        series_class,
        series_search.filter(
            "bool",
            filter=[
                Q("term", mode_of_issuance="MULTIPART_MONOGRAPH"),
                Q("term", _migration__has_serial=True),
            ],
        ),
    )
    link_record_and_journal(
        document_class,
        document_search.filter("term", _migration__has_journal=True),
    )