def validate_multipart_records(): """Validate that multiparts were migrated successfully. Performs the following checks: * Ensure all volumes of migrated multiparts were migrated """ def validate_multipart_relation(multipart, volumes): document_cls = current_app_ils.document_record_cls relations = multipart.relations.get().get("multipart_monograph", []) titles = [volume["title"] for volume in volumes if "title" in volume] count = len(set(v["volume"] for v in volumes)) if count != len(relations): click.echo("[Multipart {}] Incorrect number of volumes: {} " "(expected {})".format(multipart["pid"], len(relations), count)) for relation in relations: child = document_cls.get_record_by_pid( relation["pid"], pid_type=relation["pid_type"]) if child["title"] not in titles: click.echo('[Multipart {}] Title "{}" does not exist in ' "migration data".format(multipart["pid"], child["title"])) search = SeriesSearch().filter("term", mode_of_issuance="MULTIPART_MONOGRAPH") for multipart_hit in search.scan(): # Check if any child is missing if "volumes" in multipart_hit._migration: volumes = multipart_hit._migration.volumes multipart = Series.get_record_by_pid(multipart_hit.pid) validate_multipart_relation(multipart, volumes) click.echo("Multipart validation check done!")
def validate_multipart_records(): """Validate that multiparts were migrated successfully. Performs the following checks: * Ensure all volumes of migrated multiparts were migrated """ def validate_multipart_relation(multipart, volumes): relations = multipart.relations.get().get('multipart_monograph', []) titles = [volume['title'] for volume in volumes if 'title' in volume] count = len(set(v['volume'] for v in volumes)) if count != len(relations): click.echo('[Multipart {}] Incorrect number of volumes: {} ' '(expected {})'.format(multipart['pid'], len(relations), count)) for relation in relations: child = Document.get_record_by_pid(relation['pid'], pid_type=relation['pid_type']) if child['title']['title'] not in titles: click.echo('[Multipart {}] Title "{}" does not exist in ' 'migration data'.format(multipart['pid'], child['title']['title'])) search = SeriesSearch().filter('term', mode_of_issuance='MULTIPART_MONOGRAPH') for multipart_hit in search.scan(): # Check if any child is missing if 'volumes' in multipart_hit._migration: volumes = multipart_hit._migration.volumes multipart = Series.get_record_by_pid(multipart_hit.pid) validate_multipart_relation(multipart, volumes) click.echo('Multipart validation check done!')
def get_serials_by_child_recid(recid): """Search serials by children recid.""" search = SeriesSearch().query('bool', filter=[ Q('term', mode_of_issuance='SERIAL'), Q('term', _migration__children=recid), ]) for hit in search.scan(): yield Series.get_record_by_pid(hit.pid)
def test_journal_relation_from_publication_info(app): """Test journal-document relation from publication info field.""" document_data = { "$schema": "https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json", "created_by": {"type": "script", "value": "test"}, "pid": "4321", "legacy_recid": "1111", "title": "Book: A Book", "document_type": "BOOK", "authors": [{"full_name": "Author Author"}], "abstracts": [{"value": "This is an abstract"}], "language": ["it"], "publication_year": "2020", "identifiers": [{"scheme": "ISBN", "value": "0123456789"}], "cover_metadata": {"ISBN": "0123456789"}, "publication_info": [{"journal_issue": "issue"}], "_migration": { "has_journal": True, "journal_record_legacy_recids": [ { "recid": "1234", "volume": None, } ], }, } journal_data = { "$schema": "https://127.0.0.1:5000/schemas/series/series-v1.0.0.json", "pid": "serid-4", "title": "Dispersion Forces", "authors": ["Buhmann, Stefan Yoshi"], "abstract": "This is a multipart monograph", "mode_of_issuance": "SERIAL", "legacy_recid": "1234", } record_uuid = mint_record_pid( DOCUMENT_PID_TYPE, "pid", {"pid": document_data["pid"]} ) document = Document.create(document_data, record_uuid) record_uuid = mint_record_pid( SERIES_PID_TYPE, "pid", {"pid": journal_data["pid"]} ) journal = Series.create(journal_data, record_uuid) legacy_recid_minter(journal["legacy_recid"], record_uuid) db.session.commit() ri = RecordIndexer() ri.index(document) ri.index(journal) current_search.flush_and_refresh(index="*") link_documents_and_serials() document_rec = Document.get_record_by_pid(document["pid"]) assert "serial" in document_rec.relations
def get_serials_by_child_recid(recid): """Search serials by children recid.""" search = SeriesSearch().query( "bool", filter=[ Q("term", mode_of_issuance="SERIAL"), Q("term", _migration__children=recid), ], ) for hit in search.scan(): yield Series.get_record_by_pid(hit.pid)
def get_multipart_by_legacy_recid(recid): """Search multiparts by its legacy recid.""" search = SeriesSearch().query( 'bool', filter=[ Q('term', mode_of_issuance='MULTIPART_MONOGRAPH'), Q('term', legacy_recid=recid), ]) result = search.execute() if result.hits.total < 1: raise MultipartMigrationError( 'no multipart found with legacy recid {}'.format(recid)) elif result.hits.total > 1: raise MultipartMigrationError( 'found more than one multipart with recid {}'.format(recid)) else: return Series.get_record_by_pid(result.hits[0].pid)
def validate_serial_records(): """Validate that serials were migrated successfully. Performs the following checks: * Find duplicate serials * Ensure all children of migrated serials were migrated """ def validate_serial_relation(serial, recids): relations = serial.relations.get().get("serial", []) if len(recids) != len(relations): click.echo( "[Serial {}] Incorrect number of children: {} " "(expected {})".format( serial["pid"], len(relations), len(recids) ) ) for relation in relations: child = Document.get_record_by_pid( relation["pid"], pid_type=relation["pid_type"] ) if "legacy_recid" in child and child["legacy_recid"] not in recids: click.echo( "[Serial {}] Unexpected child with legacy " "recid: {}".format(serial["pid"], child["legacy_recid"]) ) titles = set() search = SeriesSearch().filter("term", mode_of_issuance="SERIAL") for serial_hit in search.scan(): # Store titles and check for duplicates if "title" in serial_hit: title = serial_hit.title if title in titles: current_app.logger.warning( 'Serial title "{}" already exists'.format(title) ) else: titles.add(title) # Check if any children are missing children = serial_hit._migration.children serial = Series.get_record_by_pid(serial_hit.pid) validate_serial_relation(serial, children) click.echo("Serial validation check done!")
def get_multipart_by_legacy_recid(recid): """Search multiparts by its legacy recid.""" search = SeriesSearch().query( 'bool', filter=[ Q('term', mode_of_issuance='MULTIPART_MONOGRAPH'), Q('term', legacy_recid=recid), ]) result = search.execute() hits_total = result.hits.total if lt_es7 else result.hits.total.value if not result.hits or hits_total < 1: click.secho('no multipart found with legacy recid {}'.format(recid), fg='red') # TODO uncomment with cleaner data # raise MultipartMigrationError( # 'no multipart found with legacy recid {}'.format(recid)) elif hits_total > 1: raise MultipartMigrationError( 'found more than one multipart with recid {}'.format(recid)) else: return Series.get_record_by_pid(result.hits[0].pid)
def testdata(app, db, es_clear, system_user): """Create, index and return test data.""" indexer = RecordIndexer() locations = load_json_from_datadir("locations.json") for location in locations: record = Location.create(location) mint_record_pid(LOCATION_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) internal_locations = load_json_from_datadir("internal_locations.json") for internal_location in internal_locations: record = InternalLocation.create(internal_location) mint_record_pid( INTERNAL_LOCATION_PID_TYPE, "pid", record ) record.commit() db.session.commit() indexer.index(record) documents = load_json_from_datadir("documents.json") for doc in documents: record = Document.create(doc) mint_record_pid(DOCUMENT_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) items = load_json_from_datadir("items.json") for item in items: record = Item.create(item) mint_record_pid(ITEM_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) loans = load_json_from_datadir("loans.json") for loan in loans: record = Loan.create(loan) mint_record_pid(CIRCULATION_LOAN_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) series = load_json_from_datadir("series.json") for serie in series: record = Series.create(serie) mint_record_pid(SERIES_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) # flush all indices after indexing, otherwise ES won't be ready for tests current_search.flush_and_refresh(index='*') return { "documents": documents, "items": items, "loans": loans, "locations": locations, "series": series, }
def relations_resolver(series_pid): """Resolve record relations.""" series = Series.get_record_by_pid(series_pid) return series.relations