def validate_multipart_records(): """Validate that multiparts were migrated successfully. Performs the following checks: * Ensure all volumes of migrated multiparts were migrated """ def validate_multipart_relation(multipart, volumes): relations = multipart.relations.get().get('multipart_monograph', []) titles = [volume['title'] for volume in volumes if 'title' in volume] count = len(set(v['volume'] for v in volumes)) if count != len(relations): click.echo('[Multipart {}] Incorrect number of volumes: {} ' '(expected {})'.format(multipart['pid'], len(relations), count)) for relation in relations: child = Document.get_record_by_pid(relation['pid'], pid_type=relation['pid_type']) if child['title']['title'] not in titles: click.echo('[Multipart {}] Title "{}" does not exist in ' 'migration data'.format(multipart['pid'], child['title']['title'])) search = SeriesSearch().filter('term', mode_of_issuance='MULTIPART_MONOGRAPH') for multipart_hit in search.scan(): # Check if any child is missing if 'volumes' in multipart_hit._migration: volumes = multipart_hit._migration.volumes multipart = Series.get_record_by_pid(multipart_hit.pid) validate_multipart_relation(multipart, volumes) click.echo('Multipart validation check done!')
def get_serials_by_child_recid(recid): """Search serials by children recid.""" search = SeriesSearch().query('bool', filter=[ Q('term', mode_of_issuance='SERIAL'), Q('term', _migration__children=recid), ]) for hit in search.scan(): yield Series.get_record_by_pid(hit.pid)
def link_documents_and_serials(): """Link documents/multiparts and serials.""" def link_records_and_serial(record_cls, search): for hit in search.scan(): # Skip linking if the hit doesn't have a legacy recid since it # means it's a volume of a multipart if 'legacy_recid' not in hit: continue record = record_cls.get_record_by_pid(hit.pid) for serial in get_serials_by_child_recid(hit.legacy_recid): volume = get_migrated_volume_by_serial_title( record, serial['title']['title']) create_parent_child_relation( serial, record, current_app.config['SERIAL_RELATION'], volume) click.echo('Creating serial relations...') link_records_and_serial( Document, DocumentSearch().filter('term', _migration__has_serial=True)) link_records_and_serial( Series, SeriesSearch().filter('bool', filter=[ Q('term', mode_of_issuance='MULTIPART_MONOGRAPH'), Q('term', _migration__has_serial=True), ]))
def get_multipart_by_legacy_recid(recid): """Search multiparts by its legacy recid.""" search = SeriesSearch().query( 'bool', filter=[ Q('term', mode_of_issuance='MULTIPART_MONOGRAPH'), Q('term', legacy_recid=recid), ]) result = search.execute() if result.hits.total < 1: raise MultipartMigrationError( 'no multipart found with legacy recid {}'.format(recid)) elif result.hits.total > 1: raise MultipartMigrationError( 'found more than one multipart with recid {}'.format(recid)) else: return Series.get_record_by_pid(result.hits[0].pid)
def validate_serial_records(): """Validate that serials were migrated successfully. Performs the following checks: * Find duplicate serials * Ensure all children of migrated serials were migrated """ def validate_serial_relation(serial, recids): relations = serial.relations.get().get('serial', []) if len(recids) != len(relations): click.echo('[Serial {}] Incorrect number of children: {} ' '(expected {})'.format(serial['pid'], len(relations), len(recids))) for relation in relations: child = Document.get_record_by_pid(relation['pid'], pid_type=relation['pid_type']) if 'legacy_recid' in child and child['legacy_recid'] not in recids: click.echo('[Serial {}] Unexpected child with legacy ' 'recid: {}'.format(serial['pid'], child['legacy_recid'])) titles = set() search = SeriesSearch().filter('term', mode_of_issuance='SERIAL') for serial_hit in search.scan(): # Store titles and check for duplicates if 'title' in serial_hit and 'title' in serial_hit.title: title = serial_hit.title.title if title in titles: current_app.logger.warning( 'Serial title "{}" already exists'.format(title)) else: titles.add(title) # Check if any children are missing children = serial_hit._migration.children serial = Series.get_record_by_pid(serial_hit.pid) validate_serial_relation(serial, children) click.echo('Serial validation check done!')
def index_documents_and_series_after_keyword_indexed(keyword_pid): """Index documents and series to re-compute keyword information.""" def index_record(cls, search): log_func = partial(_log, origin_rec_type='Keyword', origin_recid=keyword_pid, dest_rec_type=cls.__name__) log_func(msg=MSG_ORIGIN) for record in search.search_by_keyword_pid(keyword_pid).scan(): pid = record["pid"] _index_record_by_pid(cls, pid, log_func) index_record(Document, DocumentSearch()) index_record(Series, SeriesSearch())