def test_related_add_multiple_children(app, testdata): """Test adding an existing related record that was on a different node.""" # Test language docs = testdata["documents"] series = testdata["series"] doc1 = Document.get_record_by_pid(docs[0]["pid"]) doc2 = Document.get_record_by_pid(docs[1]["pid"]) ser3 = Series.get_record_by_pid(series[0]["pid"]) assert len(doc1.related.editions) == 0 assert len(doc2.related.editions) == 0 assert len(ser3.related.editions) == 0 doc1.related.add_edition(doc2) doc1.commit() doc1 = Document.get_record_by_pid(docs[0]["pid"]) doc2 = Document.get_record_by_pid(docs[1]["pid"]) ser3 = Series.get_record_by_pid(series[0]["pid"]) assert len(doc1.related.editions) == 1 assert len(doc2.related.editions) == 1 assert len(ser3.related.editions) == 0 doc1.related.add_edition(ser3) doc1.commit() doc1 = Document.get_record_by_pid(docs[0]["pid"]) doc2 = Document.get_record_by_pid(docs[1]["pid"]) ser3 = Series.get_record_by_pid(series[0]["pid"]) assert len(doc1.related.editions) == 2 assert len(doc2.related.editions) == 2 assert len(ser3.related.editions) == 2
def related_record(testdata): """An example of a record with several relations.""" docs = testdata["documents"] series = testdata["series"] doc1 = Document.get_record_by_pid(docs[0]["pid"]) doc2 = Document.get_record_by_pid(docs[1]["pid"]) doc3 = Document.get_record_by_pid(docs[2]["pid"]) doc4 = Document.get_record_by_pid(docs[3]["pid"]) ser5 = Series.get_record_by_pid(series[0]["pid"]) doc6 = Document.get_record_by_pid(docs[4]["pid"]) doc7 = Document.get_record_by_pid(docs[5]["pid"]) doc8 = Document.get_record_by_pid(docs[6]["pid"]) doc9 = Document.get_record_by_pid(docs[7]["pid"]) doc1.related.add_edition(doc2) doc1.related.add_edition(doc3) doc1.related.add_edition(doc4) doc1.related.add_edition(ser5) doc1.related.add_language(doc6) doc6.related.add_edition(doc7) doc6.related.add_edition(doc8) doc1.related.add_language(doc9) return doc1
def validate_multipart_records(): """Validate that multiparts were migrated successfully. Performs the following checks: * Ensure all volumes of migrated multiparts were migrated """ def validate_multipart_relation(multipart, volumes): relations = multipart.relations.get().get('multipart_monograph', []) titles = [volume['title'] for volume in volumes if 'title' in volume] count = len(set(v['volume'] for v in volumes)) if count != len(relations): click.echo('[Multipart {}] Incorrect number of volumes: {} ' '(expected {})'.format(multipart['pid'], len(relations), count)) for relation in relations: child = Document.get_record_by_pid(relation['pid'], pid_type=relation['pid_type']) if child['title']['title'] not in titles: click.echo('[Multipart {}] Title "{}" does not exist in ' 'migration data'.format(multipart['pid'], child['title']['title'])) search = SeriesSearch().filter('term', mode_of_issuance='MULTIPART_MONOGRAPH') for multipart_hit in search.scan(): # Check if any child is missing if 'volumes' in multipart_hit._migration: volumes = multipart_hit._migration.volumes multipart = Series.get_record_by_pid(multipart_hit.pid) validate_multipart_relation(multipart, volumes) click.echo('Multipart validation check done!')
def get_serials_by_child_recid(recid): """Search serials by children recid.""" search = SeriesSearch().query('bool', filter=[ Q('term', mode_of_issuance='SERIAL'), Q('term', _migration__children=recid), ]) for hit in search.scan(): yield Series.get_record_by_pid(hit.pid)
def test_related_add_existing_child(related_record, testdata): """Add a related language to an existing relation graph.""" docs = testdata["documents"] series = testdata["series"] doc1 = Document.get_record_by_pid(docs[0]["pid"]) doc6 = Document.get_record_by_pid(docs[4]["pid"]) ser10 = Series.get_record_by_pid(series[1]["pid"]) # Should fail if trying to add a child that already has relations with pytest.raises(RelatedRecordError): ser10.related.add_language(doc1) with pytest.raises(RelatedRecordError): ser10.related.add_language(doc6)
def get_multipart_by_legacy_recid(recid): """Search multiparts by its legacy recid.""" search = SeriesSearch().query( 'bool', filter=[ Q('term', mode_of_issuance='MULTIPART_MONOGRAPH'), Q('term', legacy_recid=recid), ]) result = search.execute() if result.hits.total < 1: raise MultipartMigrationError( 'no multipart found with legacy recid {}'.format(recid)) elif result.hits.total > 1: raise MultipartMigrationError( 'found more than one multipart with recid {}'.format(recid)) else: return Series.get_record_by_pid(result.hits[0].pid)
def test_related_complex_record(related_record, testdata): """Test the related record fixture.""" docs = testdata["documents"] series = testdata["series"] doc1 = Document.get_record_by_pid(docs[0]["pid"]) doc2 = Document.get_record_by_pid(docs[1]["pid"]) doc3 = Document.get_record_by_pid(docs[2]["pid"]) doc4 = Document.get_record_by_pid(docs[3]["pid"]) ser5 = Series.get_record_by_pid(series[0]["pid"]) doc6 = Document.get_record_by_pid(docs[4]["pid"]) doc7 = Document.get_record_by_pid(docs[5]["pid"]) doc8 = Document.get_record_by_pid(docs[6]["pid"]) doc9 = Document.get_record_by_pid(docs[7]["pid"]) assert len(doc9.related.editions) == 0 for rec in (doc1, doc2, doc3, doc4, ser5): assert len(rec.related.editions) == 4 for rec in (doc6, doc7, doc8): assert len(rec.related.editions) == 2 for rec in (doc1, doc6, doc9): assert len(rec.related.languages) == 2 doc1.related.remove_language(doc6) for rec in (doc1, doc9): assert len(rec.related.languages) == 1 assert len(doc6.related.languages) == 0 for rec in (doc6, doc7, doc8): assert len(rec.related.editions) == 2 ser5.related.remove_edition(doc3) for rec in (doc1, doc2, doc4, ser5): assert len(rec.related.editions) == 3 assert len(doc3.related.editions) == 0
def validate_serial_records(): """Validate that serials were migrated successfully. Performs the following checks: * Find duplicate serials * Ensure all children of migrated serials were migrated """ def validate_serial_relation(serial, recids): relations = serial.relations.get().get('serial', []) if len(recids) != len(relations): click.echo('[Serial {}] Incorrect number of children: {} ' '(expected {})'.format(serial['pid'], len(relations), len(recids))) for relation in relations: child = Document.get_record_by_pid(relation['pid'], pid_type=relation['pid_type']) if 'legacy_recid' in child and child['legacy_recid'] not in recids: click.echo('[Serial {}] Unexpected child with legacy ' 'recid: {}'.format(serial['pid'], child['legacy_recid'])) titles = set() search = SeriesSearch().filter('term', mode_of_issuance='SERIAL') for serial_hit in search.scan(): # Store titles and check for duplicates if 'title' in serial_hit and 'title' in serial_hit.title: title = serial_hit.title.title if title in titles: current_app.logger.warning( 'Serial title "{}" already exists'.format(title)) else: titles.add(title) # Check if any children are missing children = serial_hit._migration.children serial = Series.get_record_by_pid(serial_hit.pid) validate_serial_relation(serial, children) click.echo('Serial validation check done!')
def test_related_add_editions_to_child(app, testdata): """Test adding related editions to a child.""" doc1 = Document.get_record_by_pid(testdata["documents"][0]["pid"]) doc2 = Document.get_record_by_pid(testdata["documents"][1]["pid"]) ser3 = Series.get_record_by_pid(testdata["series"][0]["pid"]) doc1.related.add_edition(doc2) doc2.related.add_edition(ser3) parent_editions = doc1.related.editions child1_editions = doc2.related.editions child2_editions = ser3.related.editions assert len(parent_editions) == 2 assert len(child1_editions) == 2 assert len(child2_editions) == 2 assert parent_editions[0] == doc2 assert parent_editions[1] == ser3 assert child1_editions[0] == doc1 assert child1_editions[1] == ser3 assert child2_editions[0] == doc2 assert child2_editions[1] == doc1
def test_related_add_editions_to_parent(app, testdata): """Test adding related editions.""" doc1 = Document.get_record_by_pid(testdata["documents"][0]["pid"]) doc2 = Document.get_record_by_pid(testdata["documents"][1]["pid"]) child2 = Series.get_record_by_pid(testdata["series"][0]["pid"]) doc1.related.add_edition(doc2) parent_editions = doc1.related.editions child_editions = doc2.related.editions assert len(parent_editions) == 1 assert len(child_editions) == 1 assert parent_editions[0] == doc2 assert child_editions[0] == doc1 doc1.related.add_edition(child2) parent_editions = doc1.related.editions child_editions = child2.related.editions assert len(parent_editions) == 2 assert len(child_editions) == 2 assert parent_editions[0] == doc2 assert parent_editions[1] == child2 assert child_editions[0] == doc1 assert child_editions[1] == doc2
def testdata(app, db, es_clear): """Create, index and return test data.""" indexer = RecordIndexer() locations = load_json_from_datadir("locations.json") for location in locations: record = Location.create(location) mint_record_pid(LOCATION_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) internal_locations = load_json_from_datadir("internal_locations.json") for internal_location in internal_locations: record = InternalLocation.create(internal_location) mint_record_pid(INTERNAL_LOCATION_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) keywords = load_json_from_datadir("keywords.json") for keyword in keywords: record = Keyword.create(keyword) mint_record_pid(KEYWORD_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) series_data = load_json_from_datadir("series.json") for series in series_data: record = Series.create(series) mint_record_pid(SERIES_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) documents = load_json_from_datadir("documents.json") for doc in documents: record = Document.create(doc) mint_record_pid(DOCUMENT_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) items = load_json_from_datadir("items.json") for item in items: record = Item.create(item) mint_record_pid(ITEM_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) eitems = load_json_from_datadir("eitems.json") for eitem in eitems: record = EItem.create(eitem) mint_record_pid(EITEM_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) loans = load_json_from_datadir("loans.json") for loan in loans: record = Loan.create(loan) mint_record_pid(CIRCULATION_LOAN_PID_TYPE, "pid", record) record.commit() db.session.commit() indexer.index(record) # flush all indices after indexing, otherwise ES won't be ready for tests current_search.flush_and_refresh(index='*') return { "locations": locations, "internal_locations": internal_locations, "documents": documents, "items": items, "loans": loans, "keywords": keywords, "series": series_data, }
def relations_resolver(series_pid): """Resolve record relations.""" series = Series.get_record_by_pid(series_pid) return series.relations.get()