def test_sort_by_priority(self): # Make editions created by the license source, the metadata # wrangler, and library staff. admin = self._edition(data_source_name=DataSource.LIBRARY_STAFF, with_license_pool=False) od = self._edition(data_source_name=DataSource.OVERDRIVE, with_license_pool=False) mw = self._edition(data_source_name=DataSource.METADATA_WRANGLER, with_license_pool=False) # Create an invalid edition with no data source. (This shouldn't # happen.) no_data_source = self._edition(with_license_pool=False) no_data_source.data_source = None def ids(l): return [x for x in l] # The invalid edition is the lowest priority. The admin # interface and metadata wrangler take precedence over any # other data sources. expect = [no_data_source, od, mw, admin] actual = Edition.sort_by_priority(expect) assert ids(expect) == ids(actual) # If you specify which data source is associated with the # license for the book, you will boost its priority above that # of the metadata wrangler. expect = [no_data_source, mw, od, admin] actual = Edition.sort_by_priority(expect, od.data_source) assert ids(expect) == ids(actual)
def test_missing_coverage_from(self): gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG) oclc = DataSource.lookup(self._db, DataSource.OCLC) web = DataSource.lookup(self._db, DataSource.WEB) # Here are two Gutenberg records. g1, ignore = Edition.for_foreign_id(self._db, gutenberg, Identifier.GUTENBERG_ID, "1") g2, ignore = Edition.for_foreign_id(self._db, gutenberg, Identifier.GUTENBERG_ID, "2") # One of them has coverage from OCLC Classify c1 = self._coverage_record(g1, oclc) # The other has coverage from a specific operation on OCLC Classify c2 = self._coverage_record(g2, oclc, "some operation") # Here's a web record, just sitting there. w, ignore = Edition.for_foreign_id(self._db, web, Identifier.URI, "http://www.foo.com/") # If we run missing_coverage_from we pick up the Gutenberg # record with no generic OCLC coverage. It doesn't pick up the # other Gutenberg record, it doesn't pick up the web record, # and it doesn't pick up the OCLC coverage for a specific # operation. [in_gutenberg_but_not_in_oclc ] = Identifier.missing_coverage_from(self._db, [Identifier.GUTENBERG_ID], oclc).all() assert g2.primary_identifier == in_gutenberg_but_not_in_oclc # If we ask about a specific operation, we get the Gutenberg # record that has coverage for that operation, but not the one # that has generic OCLC coverage. [has_generic_coverage_only ] = Identifier.missing_coverage_from(self._db, [Identifier.GUTENBERG_ID], oclc, "some operation").all() assert g1.primary_identifier == has_generic_coverage_only # We don't put web sites into OCLC, so this will pick up the # web record (but not the Gutenberg record). [in_web_but_not_in_oclc ] = Identifier.missing_coverage_from(self._db, [Identifier.URI], oclc).all() assert w.primary_identifier == in_web_but_not_in_oclc # We don't use the web as a source of coverage, so this will # return both Gutenberg records (but not the web record). assert [g1.primary_identifier.id, g2.primary_identifier.id] == sorted([ x.id for x in Identifier.missing_coverage_from( self._db, [Identifier.GUTENBERG_ID], web) ])
def test_missing_coverage_from_with_cutoff_date(self): gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG) oclc = DataSource.lookup(self._db, DataSource.OCLC) web = DataSource.lookup(self._db, DataSource.WEB) # Here's an Edition with a coverage record from OCLC classify. gutenberg, ignore = Edition.for_foreign_id(self._db, gutenberg, Identifier.GUTENBERG_ID, "1") identifier = gutenberg.primary_identifier oclc = DataSource.lookup(self._db, DataSource.OCLC) coverage = self._coverage_record(gutenberg, oclc) # The CoverageRecord knows when the coverage was provided. timestamp = coverage.timestamp # If we ask for Identifiers that are missing coverage records # as of that time, we see nothing. assert ([] == Identifier.missing_coverage_from( self._db, [identifier.type], oclc, count_as_missing_before=timestamp).all()) # But if we give a time one second later, the Identifier is # missing coverage. assert [identifier] == Identifier.missing_coverage_from( self._db, [identifier.type], oclc, count_as_missing_before=timestamp + datetime.timedelta(seconds=1), ).all()
def test_for_foreign_id(self): """Verify we can get a data source's view of a foreign id.""" data_source = DataSource.lookup(self._db, DataSource.GUTENBERG) id = "549" type = Identifier.GUTENBERG_ID record, was_new = Edition.for_foreign_id(self._db, data_source, type, id) assert data_source == record.data_source identifier = record.primary_identifier assert id == identifier.identifier assert type == identifier.type assert True == was_new assert [identifier] == record.equivalent_identifiers() # We can get the same work record by providing only the name # of the data source. record, was_new = Edition.for_foreign_id(self._db, DataSource.GUTENBERG, type, id) assert data_source == record.data_source assert identifier == record.primary_identifier assert False == was_new
def test_custom_lists(self): # A Collection can be associated with one or more CustomLists. list1, ignore = get_one_or_create(self._db, CustomList, name=self._str) list2, ignore = get_one_or_create(self._db, CustomList, name=self._str) self.collection.customlists = [list1, list2] assert 0 == len(list1.entries) assert 0 == len(list2.entries) # When a new pool is added to the collection and its presentation edition is # calculated for the first time, it's automatically added to the lists. work = self._work(collection=self.collection, with_license_pool=True) assert 1 == len(list1.entries) assert 1 == len(list2.entries) assert work == list1.entries[0].work assert work == list2.entries[0].work # Now remove it from one of the lists. If its presentation edition changes # again or its pool changes works, it's not added back. self._db.delete(list1.entries[0]) self._db.commit() assert 0 == len(list1.entries) assert 1 == len(list2.entries) pool = work.license_pools[0] identifier = pool.identifier staff_data_source = DataSource.lookup(self._db, DataSource.LIBRARY_STAFF) staff_edition, ignore = Edition.for_foreign_id( self._db, staff_data_source, identifier.type, identifier.identifier ) staff_edition.title = self._str work.calculate_presentation() assert 0 == len(list1.entries) assert 1 == len(list2.entries) new_work = self._work(collection=self.collection) pool.work = new_work assert 0 == len(list1.entries) assert 1 == len(list2.entries)
def test_author_contributors(self): data_source = DataSource.lookup(self._db, DataSource.GUTENBERG) id = self._str type = Identifier.GUTENBERG_ID edition, was_new = Edition.for_foreign_id(self._db, data_source, type, id) # We've listed the same person as primary author and author. [alice], ignore = Contributor.lookup(self._db, "Adder, Alice") edition.add_contributor( alice, [Contributor.AUTHOR_ROLE, Contributor.PRIMARY_AUTHOR_ROLE]) # We've listed a different person as illustrator. [bob], ignore = Contributor.lookup(self._db, "Bitshifter, Bob") edition.add_contributor(bob, [Contributor.ILLUSTRATOR_ROLE]) # Both contributors show up in .contributors. assert set([alice, bob]) == edition.contributors # Only the author shows up in .author_contributors, and she # only shows up once. assert [alice] == edition.author_contributors
def test_merge(self): # Here's Robert. [robert], ignore = Contributor.lookup(self._db, sort_name="Robert") # Here's Bob. [bob], ignore = Contributor.lookup(self._db, sort_name="Jones, Bob") bob.extra["foo"] = "bar" bob.aliases = ["Bobby"] bob.viaf = "viaf" bob.lc = "lc" bob.display_name = "Bob Jones" bob.family_name = "Bobb" bob.wikipedia_name = "Bob_(Person)" # Each is a contributor to a Edition. data_source = DataSource.lookup(self._db, DataSource.GUTENBERG) roberts_book, ignore = Edition.for_foreign_id(self._db, data_source, Identifier.GUTENBERG_ID, "1") roberts_book.add_contributor(robert, Contributor.AUTHOR_ROLE) bobs_book, ignore = Edition.for_foreign_id(self._db, data_source, Identifier.GUTENBERG_ID, "10") bobs_book.add_contributor(bob, Contributor.AUTHOR_ROLE) # In a shocking turn of events, it transpires that "Bob" and # "Robert" are the same person. We merge "Bob" into Robert # thusly: bob.merge_into(robert) # 'Bob' is now listed as an alias for Robert, as is Bob's # alias. assert ["Jones, Bob", "Bobby"] == robert.aliases # The extra information associated with Bob is now associated # with Robert. assert "bar" == robert.extra["foo"] assert "viaf" == robert.viaf assert "lc" == robert.lc assert "Bobb" == robert.family_name assert "Bob Jones" == robert.display_name assert "Robert" == robert.sort_name assert "Bob_(Person)" == robert.wikipedia_name # The standalone 'Bob' record has been removed from the database. assert ([] == self._db.query(Contributor).filter( Contributor.sort_name == "Bob").all()) # Bob's book is now associated with 'Robert', not the standalone # 'Bob' record. assert [robert] == bobs_book.author_contributors # confirm the sort_name is propagated, if not already set in the destination contributor robert.sort_name = None [bob], ignore = Contributor.lookup(self._db, sort_name="Jones, Bob") bob.merge_into(robert) assert "Jones, Bob" == robert.sort_name
def test_recursive_edition_equivalence(self): # Here's a Edition for a Project Gutenberg text. gutenberg, gutenberg_pool = self._edition( data_source_name=DataSource.GUTENBERG, identifier_type=Identifier.GUTENBERG_ID, identifier_id="1", with_open_access_download=True, title="Original Gutenberg text", ) # Here's a Edition for an Open Library text. open_library, open_library_pool = self._edition( data_source_name=DataSource.OPEN_LIBRARY, identifier_type=Identifier.OPEN_LIBRARY_ID, identifier_id="W1111", with_open_access_download=True, title="Open Library record", ) # We've learned from OCLC Classify that the Gutenberg text is # equivalent to a certain OCLC Number. We've learned from OCLC # Linked Data that the Open Library text is equivalent to the # same OCLC Number. oclc_classify = DataSource.lookup(self._db, DataSource.OCLC) oclc_linked_data = DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA) oclc_number, ignore = Identifier.for_foreign_id( self._db, Identifier.OCLC_NUMBER, "22") gutenberg.primary_identifier.equivalent_to(oclc_classify, oclc_number, 1) open_library.primary_identifier.equivalent_to(oclc_linked_data, oclc_number, 1) # Here's a Edition for a Recovering the Classics cover. web_source = DataSource.lookup(self._db, DataSource.WEB) recovering, ignore = Edition.for_foreign_id( self._db, web_source, Identifier.URI, "http://recoveringtheclassics.com/pride-and-prejudice.jpg", ) recovering.title = "Recovering the Classics cover" # We've manually associated that Edition's URI directly # with the Project Gutenberg text. manual = DataSource.lookup(self._db, DataSource.MANUAL) gutenberg.primary_identifier.equivalent_to( manual, recovering.primary_identifier, 1) # Finally, here's a completely unrelated Edition, which # will not be showing up. gutenberg2, gutenberg2_pool = self._edition( data_source_name=DataSource.GUTENBERG, identifier_type=Identifier.GUTENBERG_ID, identifier_id="2", with_open_access_download=True, title="Unrelated Gutenberg record.", ) # When we call equivalent_editions on the Project Gutenberg # Edition, we get three Editions: the Gutenberg record # itself, the Open Library record, and the Recovering the # Classics record. # # We get the Open Library record because it's associated with # the same OCLC Number as the Gutenberg record. We get the # Recovering the Classics record because it's associated # directly with the Gutenberg record. results = list(gutenberg.equivalent_editions()) assert 3 == len(results) assert gutenberg in results assert open_library in results assert recovering in results # Here's a Work that incorporates one of the Gutenberg records. work = self._work() work.license_pools.extend([gutenberg2_pool]) # Its set-of-all-editions contains only one record. assert 1 == work.all_editions().count() # If we add the other Gutenberg record to it, then its # set-of-all-editions is extended by that record, *plus* # all the Editions equivalent to that record. work.license_pools.extend([gutenberg_pool]) assert 4 == work.all_editions().count()