Example #1
0
def validate_multipart_records():
    """Validate that multiparts were migrated successfully.

    Performs the following checks:
    * Ensure all volumes of migrated multiparts were migrated
    """
    def validate_multipart_relation(multipart, volumes):
        relations = multipart.relations.get().get('multipart_monograph', [])
        titles = [volume['title'] for volume in volumes if 'title' in volume]
        count = len(set(v['volume'] for v in volumes))
        if count != len(relations):
            click.echo('[Multipart {}] Incorrect number of volumes: {} '
                       '(expected {})'.format(multipart['pid'], len(relations),
                                              count))
        for relation in relations:
            child = Document.get_record_by_pid(relation['pid'],
                                               pid_type=relation['pid_type'])
            if child['title']['title'] not in titles:
                click.echo('[Multipart {}] Title "{}" does not exist in '
                           'migration data'.format(multipart['pid'],
                                                   child['title']['title']))

    search = SeriesSearch().filter('term',
                                   mode_of_issuance='MULTIPART_MONOGRAPH')
    for multipart_hit in search.scan():
        # Check if any child is missing
        if 'volumes' in multipart_hit._migration:
            volumes = multipart_hit._migration.volumes
            multipart = Series.get_record_by_pid(multipart_hit.pid)
            validate_multipart_relation(multipart, volumes)

    click.echo('Multipart validation check done!')
Example #2
0
def get_serials_by_child_recid(recid):
    """Search serials by children recid."""
    search = SeriesSearch().query('bool',
                                  filter=[
                                      Q('term', mode_of_issuance='SERIAL'),
                                      Q('term', _migration__children=recid),
                                  ])
    for hit in search.scan():
        yield Series.get_record_by_pid(hit.pid)
Example #3
0
def link_documents_and_serials():
    """Link documents/multiparts and serials."""
    def link_records_and_serial(record_cls, search):
        for hit in search.scan():
            # Skip linking if the hit doesn't have a legacy recid since it
            # means it's a volume of a multipart
            if 'legacy_recid' not in hit:
                continue
            record = record_cls.get_record_by_pid(hit.pid)
            for serial in get_serials_by_child_recid(hit.legacy_recid):
                volume = get_migrated_volume_by_serial_title(
                    record, serial['title']['title'])
                create_parent_child_relation(
                    serial, record, current_app.config['SERIAL_RELATION'],
                    volume)

    click.echo('Creating serial relations...')
    link_records_and_serial(
        Document,
        DocumentSearch().filter('term', _migration__has_serial=True))
    link_records_and_serial(
        Series,
        SeriesSearch().filter('bool',
                              filter=[
                                  Q('term',
                                    mode_of_issuance='MULTIPART_MONOGRAPH'),
                                  Q('term', _migration__has_serial=True),
                              ]))
Example #4
0
def get_multipart_by_legacy_recid(recid):
    """Search multiparts by its legacy recid."""
    search = SeriesSearch().query(
        'bool',
        filter=[
            Q('term', mode_of_issuance='MULTIPART_MONOGRAPH'),
            Q('term', legacy_recid=recid),
        ])
    result = search.execute()
    if result.hits.total < 1:
        raise MultipartMigrationError(
            'no multipart found with legacy recid {}'.format(recid))
    elif result.hits.total > 1:
        raise MultipartMigrationError(
            'found more than one multipart with recid {}'.format(recid))
    else:
        return Series.get_record_by_pid(result.hits[0].pid)
Example #5
0
def validate_serial_records():
    """Validate that serials were migrated successfully.

    Performs the following checks:
    * Find duplicate serials
    * Ensure all children of migrated serials were migrated
    """
    def validate_serial_relation(serial, recids):
        relations = serial.relations.get().get('serial', [])
        if len(recids) != len(relations):
            click.echo('[Serial {}] Incorrect number of children: {} '
                       '(expected {})'.format(serial['pid'], len(relations),
                                              len(recids)))
        for relation in relations:
            child = Document.get_record_by_pid(relation['pid'],
                                               pid_type=relation['pid_type'])
            if 'legacy_recid' in child and child['legacy_recid'] not in recids:
                click.echo('[Serial {}] Unexpected child with legacy '
                           'recid: {}'.format(serial['pid'],
                                              child['legacy_recid']))

    titles = set()
    search = SeriesSearch().filter('term', mode_of_issuance='SERIAL')
    for serial_hit in search.scan():
        # Store titles and check for duplicates
        if 'title' in serial_hit and 'title' in serial_hit.title:
            title = serial_hit.title.title
            if title in titles:
                current_app.logger.warning(
                    'Serial title "{}" already exists'.format(title))
            else:
                titles.add(title)
        # Check if any children are missing
        children = serial_hit._migration.children
        serial = Series.get_record_by_pid(serial_hit.pid)
        validate_serial_relation(serial, children)

    click.echo('Serial validation check done!')
Example #6
0
def index_documents_and_series_after_keyword_indexed(keyword_pid):
    """Index documents and series to re-compute keyword information."""
    def index_record(cls, search):
        log_func = partial(_log,
                           origin_rec_type='Keyword',
                           origin_recid=keyword_pid,
                           dest_rec_type=cls.__name__)

        log_func(msg=MSG_ORIGIN)
        for record in search.search_by_keyword_pid(keyword_pid).scan():
            pid = record["pid"]
            _index_record_by_pid(cls, pid, log_func)

    index_record(Document, DocumentSearch())
    index_record(Series, SeriesSearch())