Exemple #1
0
def validate_multipart_records():
    """Validate that multiparts were migrated successfully.

    Performs the following checks:
    * Ensure all volumes of migrated multiparts were migrated
    """
    def validate_multipart_relation(multipart, volumes):
        document_cls = current_app_ils.document_record_cls
        relations = multipart.relations.get().get("multipart_monograph", [])
        titles = [volume["title"] for volume in volumes if "title" in volume]
        count = len(set(v["volume"] for v in volumes))
        if count != len(relations):
            click.echo("[Multipart {}] Incorrect number of volumes: {} "
                       "(expected {})".format(multipart["pid"], len(relations),
                                              count))
        for relation in relations:

            child = document_cls.get_record_by_pid(
                relation["pid"], pid_type=relation["pid_type"])
            if child["title"] not in titles:
                click.echo('[Multipart {}] Title "{}" does not exist in '
                           "migration data".format(multipart["pid"],
                                                   child["title"]))

    search = SeriesSearch().filter("term",
                                   mode_of_issuance="MULTIPART_MONOGRAPH")
    for multipart_hit in search.scan():
        # Check if any child is missing
        if "volumes" in multipart_hit._migration:
            volumes = multipart_hit._migration.volumes
            multipart = Series.get_record_by_pid(multipart_hit.pid)
            validate_multipart_relation(multipart, volumes)

    click.echo("Multipart validation check done!")
Exemple #2
0
def validate_multipart_records():
    """Validate that multiparts were migrated successfully.

    Performs the following checks:
    * Ensure all volumes of migrated multiparts were migrated
    """
    def validate_multipart_relation(multipart, volumes):
        relations = multipart.relations.get().get('multipart_monograph', [])
        titles = [volume['title'] for volume in volumes if 'title' in volume]
        count = len(set(v['volume'] for v in volumes))
        if count != len(relations):
            click.echo('[Multipart {}] Incorrect number of volumes: {} '
                       '(expected {})'.format(multipart['pid'], len(relations),
                                              count))
        for relation in relations:
            child = Document.get_record_by_pid(relation['pid'],
                                               pid_type=relation['pid_type'])
            if child['title']['title'] not in titles:
                click.echo('[Multipart {}] Title "{}" does not exist in '
                           'migration data'.format(multipart['pid'],
                                                   child['title']['title']))

    search = SeriesSearch().filter('term',
                                   mode_of_issuance='MULTIPART_MONOGRAPH')
    for multipart_hit in search.scan():
        # Check if any child is missing
        if 'volumes' in multipart_hit._migration:
            volumes = multipart_hit._migration.volumes
            multipart = Series.get_record_by_pid(multipart_hit.pid)
            validate_multipart_relation(multipart, volumes)

    click.echo('Multipart validation check done!')
Exemple #3
0
def get_serials_by_child_recid(recid):
    """Search serials by children recid."""
    search = SeriesSearch().query('bool',
                                  filter=[
                                      Q('term', mode_of_issuance='SERIAL'),
                                      Q('term', _migration__children=recid),
                                  ])
    for hit in search.scan():
        yield Series.get_record_by_pid(hit.pid)
Exemple #4
0
def test_journal_relation_from_publication_info(app):
    """Test journal-document relation from publication info field."""

    document_data = {
        "$schema": "https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json",
        "created_by": {"type": "script", "value": "test"},
        "pid": "4321",
        "legacy_recid": "1111",
        "title": "Book: A Book",
        "document_type": "BOOK",
        "authors": [{"full_name": "Author Author"}],
        "abstracts": [{"value": "This is an abstract"}],
        "language": ["it"],
        "publication_year": "2020",
        "identifiers": [{"scheme": "ISBN", "value": "0123456789"}],
        "cover_metadata": {"ISBN": "0123456789"},
        "publication_info": [{"journal_issue": "issue"}],
        "_migration": {
            "has_journal": True,
            "journal_record_legacy_recids": [
                {
                    "recid": "1234",
                    "volume": None,
                }
            ],
        },
    }

    journal_data = {
        "$schema": "https://127.0.0.1:5000/schemas/series/series-v1.0.0.json",
        "pid": "serid-4",
        "title": "Dispersion Forces",
        "authors": ["Buhmann, Stefan Yoshi"],
        "abstract": "This is a multipart monograph",
        "mode_of_issuance": "SERIAL",
        "legacy_recid": "1234",
    }

    record_uuid = mint_record_pid(
        DOCUMENT_PID_TYPE, "pid", {"pid": document_data["pid"]}
    )
    document = Document.create(document_data, record_uuid)
    record_uuid = mint_record_pid(
        SERIES_PID_TYPE, "pid", {"pid": journal_data["pid"]}
    )
    journal = Series.create(journal_data, record_uuid)
    legacy_recid_minter(journal["legacy_recid"], record_uuid)
    db.session.commit()
    ri = RecordIndexer()
    ri.index(document)
    ri.index(journal)
    current_search.flush_and_refresh(index="*")

    link_documents_and_serials()

    document_rec = Document.get_record_by_pid(document["pid"])
    assert "serial" in document_rec.relations
Exemple #5
0
def get_serials_by_child_recid(recid):
    """Search serials by children recid."""
    search = SeriesSearch().query(
        "bool",
        filter=[
            Q("term", mode_of_issuance="SERIAL"),
            Q("term", _migration__children=recid),
        ],
    )
    for hit in search.scan():
        yield Series.get_record_by_pid(hit.pid)
Exemple #6
0
def get_multipart_by_legacy_recid(recid):
    """Search multiparts by its legacy recid."""
    search = SeriesSearch().query(
        'bool',
        filter=[
            Q('term', mode_of_issuance='MULTIPART_MONOGRAPH'),
            Q('term', legacy_recid=recid),
        ])
    result = search.execute()
    if result.hits.total < 1:
        raise MultipartMigrationError(
            'no multipart found with legacy recid {}'.format(recid))
    elif result.hits.total > 1:
        raise MultipartMigrationError(
            'found more than one multipart with recid {}'.format(recid))
    else:
        return Series.get_record_by_pid(result.hits[0].pid)
Exemple #7
0
def validate_serial_records():
    """Validate that serials were migrated successfully.

    Performs the following checks:
    * Find duplicate serials
    * Ensure all children of migrated serials were migrated
    """

    def validate_serial_relation(serial, recids):
        relations = serial.relations.get().get("serial", [])
        if len(recids) != len(relations):
            click.echo(
                "[Serial {}] Incorrect number of children: {} "
                "(expected {})".format(
                    serial["pid"], len(relations), len(recids)
                )
            )
        for relation in relations:
            child = Document.get_record_by_pid(
                relation["pid"], pid_type=relation["pid_type"]
            )
            if "legacy_recid" in child and child["legacy_recid"] not in recids:
                click.echo(
                    "[Serial {}] Unexpected child with legacy "
                    "recid: {}".format(serial["pid"], child["legacy_recid"])
                )

    titles = set()
    search = SeriesSearch().filter("term", mode_of_issuance="SERIAL")
    for serial_hit in search.scan():
        # Store titles and check for duplicates
        if "title" in serial_hit:
            title = serial_hit.title
            if title in titles:
                current_app.logger.warning(
                    'Serial title "{}" already exists'.format(title)
                )
            else:
                titles.add(title)
        # Check if any children are missing
        children = serial_hit._migration.children
        serial = Series.get_record_by_pid(serial_hit.pid)
        validate_serial_relation(serial, children)

    click.echo("Serial validation check done!")
Exemple #8
0
def get_multipart_by_legacy_recid(recid):
    """Search multiparts by its legacy recid."""
    search = SeriesSearch().query(
        'bool',
        filter=[
            Q('term', mode_of_issuance='MULTIPART_MONOGRAPH'),
            Q('term', legacy_recid=recid),
        ])
    result = search.execute()
    hits_total = result.hits.total if lt_es7 else result.hits.total.value
    if not result.hits or hits_total < 1:
        click.secho('no multipart found with legacy recid {}'.format(recid),
                    fg='red')
        # TODO uncomment with cleaner data
        # raise MultipartMigrationError(
        #     'no multipart found with legacy recid {}'.format(recid))
    elif hits_total > 1:
        raise MultipartMigrationError(
            'found more than one multipart with recid {}'.format(recid))
    else:
        return Series.get_record_by_pid(result.hits[0].pid)
Exemple #9
0
def testdata(app, db, es_clear, system_user):
    """Create, index and return test data."""
    indexer = RecordIndexer()

    locations = load_json_from_datadir("locations.json")
    for location in locations:
        record = Location.create(location)
        mint_record_pid(LOCATION_PID_TYPE, "pid", record)
        record.commit()
        db.session.commit()
        indexer.index(record)

    internal_locations = load_json_from_datadir("internal_locations.json")
    for internal_location in internal_locations:
        record = InternalLocation.create(internal_location)
        mint_record_pid(
            INTERNAL_LOCATION_PID_TYPE, "pid", record
        )
        record.commit()
        db.session.commit()
        indexer.index(record)

    documents = load_json_from_datadir("documents.json")
    for doc in documents:
        record = Document.create(doc)
        mint_record_pid(DOCUMENT_PID_TYPE, "pid", record)
        record.commit()
        db.session.commit()
        indexer.index(record)

    items = load_json_from_datadir("items.json")
    for item in items:
        record = Item.create(item)
        mint_record_pid(ITEM_PID_TYPE, "pid", record)
        record.commit()
        db.session.commit()
        indexer.index(record)

    loans = load_json_from_datadir("loans.json")
    for loan in loans:
        record = Loan.create(loan)
        mint_record_pid(CIRCULATION_LOAN_PID_TYPE, "pid", record)
        record.commit()
        db.session.commit()
        indexer.index(record)

    series = load_json_from_datadir("series.json")
    for serie in series:
        record = Series.create(serie)
        mint_record_pid(SERIES_PID_TYPE, "pid", record)
        record.commit()
        db.session.commit()
        indexer.index(record)

    # flush all indices after indexing, otherwise ES won't be ready for tests
    current_search.flush_and_refresh(index='*')
    return {
        "documents": documents,
        "items": items,
        "loans": loans,
        "locations": locations,
        "series": series,
    }
Exemple #10
0
 def relations_resolver(series_pid):
     """Resolve record relations."""
     series = Series.get_record_by_pid(series_pid)
     return series.relations