def test_repr(self, _, identifier_type, identifier, title): """Test that Identifier.__repr__ correctly works with both ASCII and non-ASCII symbols. :param _: Name of the test case :type _: str :param identifier_type: Type of the identifier :type identifier_type: str :param identifier: Identifier's value :type identifier: str :param title: Presentation edition's title :type title: str """ # Arrange identifier = Identifier(type=identifier_type, identifier=identifier) if title: edition = create_autospec(spec=Edition) edition.title = PropertyMock(return_value=title) identifier.primarily_identifies = PropertyMock( return_value=[edition]) # Act # NOTE: we are not interested in the result returned by repr, # we just want to make sure that repr doesn't throw any unexpected exceptions _ = repr(identifier)
def test_missing_coverage_from_with_cutoff_date(self): gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG) oclc = DataSource.lookup(self._db, DataSource.OCLC) web = DataSource.lookup(self._db, DataSource.WEB) # Here's an Edition with a coverage record from OCLC classify. gutenberg, ignore = Edition.for_foreign_id(self._db, gutenberg, Identifier.GUTENBERG_ID, "1") identifier = gutenberg.primary_identifier oclc = DataSource.lookup(self._db, DataSource.OCLC) coverage = self._coverage_record(gutenberg, oclc) # The CoverageRecord knows when the coverage was provided. timestamp = coverage.timestamp # If we ask for Identifiers that are missing coverage records # as of that time, we see nothing. assert ([] == Identifier.missing_coverage_from( self._db, [identifier.type], oclc, count_as_missing_before=timestamp).all()) # But if we give a time one second later, the Identifier is # missing coverage. assert [identifier] == Identifier.missing_coverage_from( self._db, [identifier.type], oclc, count_as_missing_before=timestamp + datetime.timedelta(seconds=1), ).all()
def test_missing_coverage_from(self): gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG) oclc = DataSource.lookup(self._db, DataSource.OCLC) web = DataSource.lookup(self._db, DataSource.WEB) # Here are two Gutenberg records. g1, ignore = Edition.for_foreign_id(self._db, gutenberg, Identifier.GUTENBERG_ID, "1") g2, ignore = Edition.for_foreign_id(self._db, gutenberg, Identifier.GUTENBERG_ID, "2") # One of them has coverage from OCLC Classify c1 = self._coverage_record(g1, oclc) # The other has coverage from a specific operation on OCLC Classify c2 = self._coverage_record(g2, oclc, "some operation") # Here's a web record, just sitting there. w, ignore = Edition.for_foreign_id(self._db, web, Identifier.URI, "http://www.foo.com/") # If we run missing_coverage_from we pick up the Gutenberg # record with no generic OCLC coverage. It doesn't pick up the # other Gutenberg record, it doesn't pick up the web record, # and it doesn't pick up the OCLC coverage for a specific # operation. [in_gutenberg_but_not_in_oclc ] = Identifier.missing_coverage_from(self._db, [Identifier.GUTENBERG_ID], oclc).all() assert g2.primary_identifier == in_gutenberg_but_not_in_oclc # If we ask about a specific operation, we get the Gutenberg # record that has coverage for that operation, but not the one # that has generic OCLC coverage. [has_generic_coverage_only ] = Identifier.missing_coverage_from(self._db, [Identifier.GUTENBERG_ID], oclc, "some operation").all() assert g1.primary_identifier == has_generic_coverage_only # We don't put web sites into OCLC, so this will pick up the # web record (but not the Gutenberg record). [in_web_but_not_in_oclc ] = Identifier.missing_coverage_from(self._db, [Identifier.URI], oclc).all() assert w.primary_identifier == in_web_but_not_in_oclc # We don't use the web as a source of coverage, so this will # return both Gutenberg records (but not the web record). assert [g1.primary_identifier.id, g2.primary_identifier.id] == sorted([ x.id for x in Identifier.missing_coverage_from( self._db, [Identifier.GUTENBERG_ID], web) ])
def test_for_foreign_id(self): identifier_type = Identifier.ISBN isbn = "3293000061" # Getting the data automatically creates a database record. identifier, was_new = Identifier.for_foreign_id( self._db, identifier_type, isbn) assert Identifier.ISBN == identifier.type assert isbn == identifier.identifier assert True == was_new # If we get it again we get the same data, but it's no longer new. identifier2, was_new = Identifier.for_foreign_id( self._db, identifier_type, isbn) assert identifier == identifier2 assert False == was_new # If we pass in no data we get nothing back. assert None == Identifier.for_foreign_id(self._db, None, None)
def test_parse_urn(self): # We can parse our custom URNs back into identifiers. identifier = self._identifier() self._db.commit() new_identifier, ignore = Identifier.parse_urn(self._db, identifier.urn) assert identifier == new_identifier # We can parse urn:isbn URNs into ISBN identifiers. ISBN-10s are # converted to ISBN-13s. identifier, ignore = Identifier.for_foreign_id(self._db, Identifier.ISBN, "9781449358068") isbn_urn = "urn:isbn:1449358063" isbn_identifier, ignore = Identifier.parse_urn(self._db, isbn_urn) assert Identifier.ISBN == isbn_identifier.type assert "9781449358068" == isbn_identifier.identifier isbn_urn = "urn:isbn:9781449358068" isbn_identifier2, ignore = Identifier.parse_urn(self._db, isbn_urn) assert isbn_identifier2 == isbn_identifier # We can parse ordinary http: or https: URLs into URI # identifiers. http_identifier, ignore = Identifier.parse_urn(self._db, "http://example.com") assert Identifier.URI == http_identifier.type assert "http://example.com" == http_identifier.identifier https_identifier, ignore = Identifier.parse_urn( self._db, "https://example.com") assert Identifier.URI == https_identifier.type assert "https://example.com" == https_identifier.identifier # We can parse UUIDs. uuid_identifier, ignore = Identifier.parse_urn( self._db, "urn:uuid:04377e87-ab69-41c8-a2a4-812d55dc0952") assert Identifier.URI == uuid_identifier.type assert ("urn:uuid:04377e87-ab69-41c8-a2a4-812d55dc0952" == uuid_identifier.identifier) # A URN we can't handle raises an exception. ftp_urn = "ftp://example.com" pytest.raises(ValueError, Identifier.parse_urn, self._db, ftp_urn) # An invalid ISBN raises an exception. pytest.raises(ValueError, Identifier.parse_urn, self._db, "urn:isbn:notanisbn") # Pass in None and you get None. assert None == Identifier.parse_urn(self._db, None)
def test_urn(self): # ISBN identifiers use the ISBN URN scheme. identifier, ignore = Identifier.for_foreign_id(self._db, Identifier.ISBN, "9781449358068") assert "urn:isbn:9781449358068" == identifier.urn # URI identifiers don't need a URN scheme. identifier, ignore = Identifier.for_foreign_id(self._db, Identifier.URI, "http://example.com/") assert identifier.identifier == identifier.urn # Gutenberg identifiers use Gutenberg's URL-based sceheme identifier = self._identifier(Identifier.GUTENBERG_ID) assert (Identifier.GUTENBERG_URN_SCHEME_PREFIX + identifier.identifier == identifier.urn) # All other identifiers use our custom URN scheme. identifier = self._identifier(Identifier.OVERDRIVE_ID) assert identifier.urn.startswith(Identifier.URN_SCHEME_PREFIX)
def test_for_foreign_id_without_autocreate(self): identifier_type = Identifier.ISBN isbn = self._str # We don't want to auto-create a database record, so we set # autocreate=False identifier, was_new = Identifier.for_foreign_id(self._db, identifier_type, isbn, autocreate=False) assert None == identifier assert False == was_new
def test_from_asin(self): isbn10 = "1449358063" isbn13 = "9781449358068" asin = "B0088IYM3C" isbn13_with_dashes = "978-144-935-8068" i_isbn10, new1 = Identifier.from_asin(self._db, isbn10) i_isbn13, new2 = Identifier.from_asin(self._db, isbn13) i_asin, new3 = Identifier.from_asin(self._db, asin) i_isbn13_2, new4 = Identifier.from_asin(self._db, isbn13_with_dashes) # The three ISBNs are equivalent, so they got turned into the same # Identifier, using the ISBN13. assert i_isbn10 == i_isbn13 assert i_isbn13_2 == i_isbn13 assert Identifier.ISBN == i_isbn10.type assert isbn13 == i_isbn10.identifier assert True == new1 assert False == new2 assert False == new4 assert Identifier.ASIN == i_asin.type assert asin == i_asin.identifier
def test_missing_coverage_from_with_collection(self): gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG) identifier = self._identifier() collection1 = self._default_collection collection2 = self._collection() self._coverage_record(identifier, gutenberg, collection=collection1) # The Identifier has coverage in collection 1. assert ([] == Identifier.missing_coverage_from( self._db, [identifier.type], gutenberg, collection=collection1).all()) # It is missing coverage in collection 2. assert [identifier] == Identifier.missing_coverage_from( self._db, [identifier.type], gutenberg, collection=collection2).all() # If no collection is specified, we look for a CoverageRecord # that also has no collection specified, and the Identifier is # not treated as covered. assert [identifier ] == Identifier.missing_coverage_from(self._db, [identifier.type], gutenberg).all()
def test_for_foreign_id_rejects_invalid_identifiers(self): with pytest.raises(ValueError) as excinfo: Identifier.for_foreign_id(self._db, Identifier.BIBLIOTHECA_ID, "foo/bar") assert '"foo/bar" is not a valid Bibliotheca ID.' in str(excinfo.value)
def test_for_foreign_id_by_deprecated_type(self): threem_id, is_new = Identifier.for_foreign_id(self._db, "3M ID", self._str) assert Identifier.BIBLIOTHECA_ID == threem_id.type assert Identifier.BIBLIOTHECA_ID != "3M ID"
def test_recursively_equivalent_identifier_ids(self): identifier = self._identifier() data_source = DataSource.lookup(self._db, DataSource.MANUAL) strong_equivalent = self._identifier() identifier.equivalent_to(data_source, strong_equivalent, 0.9) weak_equivalent = self._identifier() identifier.equivalent_to(data_source, weak_equivalent, 0.2) level_2_equivalent = self._identifier() strong_equivalent.equivalent_to(data_source, level_2_equivalent, 0.5) level_3_equivalent = self._identifier() level_2_equivalent.equivalent_to(data_source, level_3_equivalent, 0.9) level_4_equivalent = self._identifier() level_3_equivalent.equivalent_to(data_source, level_4_equivalent, 0.6) unrelated = self._identifier() # With a low threshold and enough levels, we find all the identifiers. high_levels_low_threshold = PresentationCalculationPolicy( equivalent_identifier_levels=5, equivalent_identifier_threshold=0.1) equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [identifier.id], policy=high_levels_low_threshold) assert (set([ identifier.id, strong_equivalent.id, weak_equivalent.id, level_2_equivalent.id, level_3_equivalent.id, level_4_equivalent.id, ]) == set(equivs[identifier.id])) # If we only look at one level, we don't find the level 2, 3, or 4 identifiers. one_level = PresentationCalculationPolicy( equivalent_identifier_levels=1, equivalent_identifier_threshold=0.1) equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [identifier.id], policy=one_level) assert set([identifier.id, strong_equivalent.id, weak_equivalent.id]) == set(equivs[identifier.id]) # If we raise the threshold, we don't find the weak identifier. one_level_high_threshold = PresentationCalculationPolicy( equivalent_identifier_levels=1, equivalent_identifier_threshold=0.4) equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [identifier.id], policy=one_level_high_threshold) assert set([identifier.id, strong_equivalent.id]) == set(equivs[identifier.id]) # For deeper levels, the strength is the product of the strengths # of all the equivalencies in between the two identifiers. # In this example: # identifier - level_2_equivalent = 0.9 * 0.5 = 0.45 # identifier - level_3_equivalent = 0.9 * 0.5 * 0.9 = 0.405 # identifier - level_4_equivalent = 0.9 * 0.5 * 0.9 * 0.6 = 0.243 # With a threshold of 0.5, level 2 and all subsequent levels are too weak. high_levels_high_threshold = PresentationCalculationPolicy( equivalent_identifier_levels=5, equivalent_identifier_threshold=0.5) equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [identifier.id], policy=high_levels_high_threshold) assert set([identifier.id, strong_equivalent.id]) == set(equivs[identifier.id]) # With a threshold of 0.25, level 2 is strong enough, but level # 4 is too weak. high_levels_lower_threshold = PresentationCalculationPolicy( equivalent_identifier_levels=5, equivalent_identifier_threshold=0.25) equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [identifier.id], policy=high_levels_lower_threshold) assert (set([ identifier.id, strong_equivalent.id, level_2_equivalent.id, level_3_equivalent.id, ]) == set(equivs[identifier.id])) # It also works if we start from other identifiers. equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [strong_equivalent.id], policy=high_levels_low_threshold) assert (set([ identifier.id, strong_equivalent.id, weak_equivalent.id, level_2_equivalent.id, level_3_equivalent.id, level_4_equivalent.id, ]) == set(equivs[strong_equivalent.id])) equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [level_4_equivalent.id], policy=high_levels_low_threshold) assert (set([ identifier.id, strong_equivalent.id, level_2_equivalent.id, level_3_equivalent.id, level_4_equivalent.id, ]) == set(equivs[level_4_equivalent.id])) equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [level_4_equivalent.id], policy=high_levels_high_threshold) assert set([ level_2_equivalent.id, level_3_equivalent.id, level_4_equivalent.id ]) == set(equivs[level_4_equivalent.id]) # A chain of very strong equivalents can keep a high strength # even at deep levels. This wouldn't work if we changed the strength # threshold by level instead of accumulating a strength product. another_identifier = self._identifier() l2 = self._identifier() l3 = self._identifier() l4 = self._identifier() l2.equivalent_to(data_source, another_identifier, 1) l3.equivalent_to(data_source, l2, 1) l4.equivalent_to(data_source, l3, 0.9) high_levels_fairly_high_threshold = PresentationCalculationPolicy( equivalent_identifier_levels=5, equivalent_identifier_threshold=0.89) equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [another_identifier.id], high_levels_fairly_high_threshold) assert set([another_identifier.id, l2.id, l3.id, l4.id]) == set(equivs[another_identifier.id]) # We can look for multiple identifiers at once. two_levels_high_threshold = PresentationCalculationPolicy( equivalent_identifier_levels=2, equivalent_identifier_threshold=0.8) equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [identifier.id, level_3_equivalent.id], policy=two_levels_high_threshold, ) assert set([identifier.id, strong_equivalent.id]) == set(equivs[identifier.id]) assert set([level_2_equivalent.id, level_3_equivalent.id ]) == set(equivs[level_3_equivalent.id]) # By setting a cutoff, you can say to look deep in the tree, # but stop looking as soon as you have a certain number of # equivalents. with_cutoff = PresentationCalculationPolicy( equivalent_identifier_levels=5, equivalent_identifier_threshold=0.1, equivalent_identifier_cutoff=1, ) equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [identifier.id], policy=with_cutoff) # The cutoff was set to 1, but we always go at least one level # deep, and that gives us three equivalent identifiers. We # don't artificially trim it back down to 1. assert 3 == len(equivs[identifier.id]) # Increase the cutoff, and we get more identifiers. with_cutoff.equivalent_identifier_cutoff = 5 equivs = Identifier.recursively_equivalent_identifier_ids( self._db, [identifier.id], policy=with_cutoff) assert len(equivs[identifier.id]) > 3 # The query() method uses the same db function, but returns # equivalents for all identifiers together so it can be used # as a subquery. query = Identifier.recursively_equivalent_identifier_ids_query( Identifier.id, policy=high_levels_low_threshold) query = query.where(Identifier.id == identifier.id) results = self._db.execute(query) equivalent_ids = [r[0] for r in results] assert (set([ identifier.id, strong_equivalent.id, weak_equivalent.id, level_2_equivalent.id, level_3_equivalent.id, level_4_equivalent.id, ]) == set(equivalent_ids)) query = Identifier.recursively_equivalent_identifier_ids_query( Identifier.id, policy=two_levels_high_threshold) query = query.where( Identifier.id.in_([identifier.id, level_3_equivalent.id])) results = self._db.execute(query) equivalent_ids = [r[0] for r in results] assert (set([ identifier.id, strong_equivalent.id, level_2_equivalent.id, level_3_equivalent.id, ]) == set(equivalent_ids))
def test_parse_urns(self): identifier = self._identifier() fake_urn = "what_even_is_this" new_urn = Identifier.URN_SCHEME_PREFIX + "Overdrive%20ID/nosuchidentifier" # Also create a different URN that would result in the same identifier. same_new_urn = Identifier.URN_SCHEME_PREFIX + "Overdrive%20ID/NOSUCHidentifier" urns = [identifier.urn, fake_urn, new_urn, same_new_urn] results = Identifier.parse_urns(self._db, urns, autocreate=False) identifiers_by_urn, failures = results # By default, no new identifiers are created. All URNs for identifiers # that aren't in the db are included in the list of failures. assert sorted([fake_urn, new_urn, same_new_urn]) == sorted(failures) # Only the existing identifier is included in the results. assert 1 == len(identifiers_by_urn) assert {identifier.urn: identifier} == identifiers_by_urn # By default, new identifiers are created, too. results = Identifier.parse_urns(self._db, urns) identifiers_by_urn, failures = results # Only the fake URN is returned as a failure. assert [fake_urn] == failures # Only two additional identifiers have been created. assert 2 == len(identifiers_by_urn) # One is the existing identifier. assert identifier == identifiers_by_urn[identifier.urn] # And the new identifier has been created. new_identifier = identifiers_by_urn[new_urn] assert isinstance(new_identifier, Identifier) assert new_identifier in self._db assert Identifier.OVERDRIVE_ID == new_identifier.type assert "nosuchidentifier" == new_identifier.identifier # By passing in a list of allowed_types we can stop certain # types of Identifiers from being looked up, even if they # already exist. isbn_urn = "urn:isbn:9781453219539" urns = [new_urn, isbn_urn] only_overdrive = [Identifier.OVERDRIVE_ID] only_isbn = [Identifier.OVERDRIVE_ID] everything = [] success, failure = Identifier.parse_urns( self._db, urns, allowed_types=[Identifier.OVERDRIVE_ID]) assert new_urn in success assert isbn_urn in failure success, failure = Identifier.parse_urns( self._db, urns, allowed_types=[Identifier.OVERDRIVE_ID, Identifier.ISBN]) assert new_urn in success assert isbn_urn in success assert [] == failure # If the allowed_types is empty, no URNs can be looked up # -- this is most likely the caller's mistake. success, failure = Identifier.parse_urns(self._db, urns, allowed_types=[]) assert new_urn in failure assert isbn_urn in failure
def test_calculate_presentation_cover(self): # Here's a cover image with a thumbnail. representation, ignore = get_one_or_create(self._db, Representation, url="http://cover") representation.media_type = Representation.JPEG_MEDIA_TYPE representation.mirrored_at = utc_now() representation.mirror_url = "http://mirror/cover" thumb, ignore = get_one_or_create(self._db, Representation, url="http://thumb") thumb.media_type = Representation.JPEG_MEDIA_TYPE thumb.mirrored_at = utc_now() thumb.mirror_url = "http://mirror/thumb" thumb.thumbnail_of_id = representation.id # Verify that a cover for the edition's primary identifier is used. e, pool = self._edition(with_license_pool=True) link, ignore = e.primary_identifier.add_link(Hyperlink.IMAGE, "http://cover", e.data_source) link.resource.representation = representation e.calculate_presentation() assert "http://mirror/cover" == e.cover_full_url assert "http://mirror/thumb" == e.cover_thumbnail_url # Verify that a cover will be used even if it's some # distance away along the identifier-equivalence line. e, pool = self._edition(with_license_pool=True) oclc_classify = DataSource.lookup(self._db, DataSource.OCLC) oclc_number, ignore = Identifier.for_foreign_id( self._db, Identifier.OCLC_NUMBER, "22") e.primary_identifier.equivalent_to(oclc_classify, oclc_number, 1) link, ignore = oclc_number.add_link(Hyperlink.IMAGE, "http://cover", oclc_classify) link.resource.representation = representation e.calculate_presentation() assert "http://mirror/cover" == e.cover_full_url assert "http://mirror/thumb" == e.cover_thumbnail_url # Verify that a nearby cover takes precedence over a # faraway cover. link, ignore = e.primary_identifier.add_link(Hyperlink.IMAGE, "http://nearby-cover", e.data_source) nearby, ignore = get_one_or_create(self._db, Representation, url=link.resource.url) nearby.media_type = Representation.JPEG_MEDIA_TYPE nearby.mirrored_at = utc_now() nearby.mirror_url = "http://mirror/nearby-cover" link.resource.representation = nearby nearby_thumb, ignore = get_one_or_create(self._db, Representation, url="http://nearby-thumb") nearby_thumb.media_type = Representation.JPEG_MEDIA_TYPE nearby_thumb.mirrored_at = utc_now() nearby_thumb.mirror_url = "http://mirror/nearby-thumb" nearby_thumb.thumbnail_of_id = nearby.id e.calculate_presentation() assert "http://mirror/nearby-cover" == e.cover_full_url assert "http://mirror/nearby-thumb" == e.cover_thumbnail_url # Verify that a thumbnail is used even if there's # no full-sized cover. e, pool = self._edition(with_license_pool=True) link, ignore = e.primary_identifier.add_link(Hyperlink.THUMBNAIL_IMAGE, "http://thumb", e.data_source) link.resource.representation = thumb e.calculate_presentation() assert None == e.cover_full_url assert "http://mirror/thumb" == e.cover_thumbnail_url
def test_calculate_evaluate_summary_quality_with_privileged_data_sources( self): e, pool = self._edition(with_license_pool=True) oclc = DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA) overdrive = DataSource.lookup(self._db, DataSource.OVERDRIVE) # There's a perfunctory description from Overdrive. l1, new = pool.add_link(Hyperlink.SHORT_DESCRIPTION, None, overdrive, "text/plain", "F") overdrive_resource = l1.resource # There's a much better description from OCLC Linked Data. l2, new = pool.add_link( Hyperlink.DESCRIPTION, None, oclc, "text/plain", """Nothing about working with his former high school crush, Stephanie Stephens, is ideal. Still, if Aaron Caruthers intends to save his grandmother's bakery, he must. Good thing he has a lot of ideas he can't wait to implement. He never imagines Stephanie would have her own ideas for the business. Or that they would clash with his!""", ) oclc_resource = l2.resource # In a head-to-head evaluation, the OCLC Linked Data description wins. ids = [e.primary_identifier.id] champ1, resources = Identifier.evaluate_summary_quality(self._db, ids) assert set([overdrive_resource, oclc_resource]) == set(resources) assert oclc_resource == champ1 # But if we say that Overdrive is the privileged data source, it wins # automatically. The other resource isn't even considered. champ2, resources2 = Identifier.evaluate_summary_quality( self._db, ids, [overdrive]) assert overdrive_resource == champ2 assert [overdrive_resource] == resources2 # If we say that some other data source is privileged, and # there are no descriptions from that data source, a # head-to-head evaluation is performed, and OCLC Linked Data # wins. threem = DataSource.lookup(self._db, DataSource.THREEM) champ3, resources3 = Identifier.evaluate_summary_quality( self._db, ids, [threem]) assert set([overdrive_resource, oclc_resource]) == set(resources3) assert oclc_resource == champ3 # If there are two privileged data sources and there's no # description from the first, the second is used. champ4, resources4 = Identifier.evaluate_summary_quality( self._db, ids, [threem, overdrive]) assert [overdrive_resource] == resources4 assert overdrive_resource == champ4 # Even an empty string wins if it's from the most privileged data source. # This is not a silly example. The librarian may choose to set the description # to an empty string in the admin inteface, to override a bad overdrive/etc. description. staff = DataSource.lookup(self._db, DataSource.LIBRARY_STAFF) l3, new = pool.add_link(Hyperlink.SHORT_DESCRIPTION, None, staff, "text/plain", "") staff_resource = l3.resource champ5, resources5 = Identifier.evaluate_summary_quality( self._db, ids, [staff, overdrive]) assert [staff_resource] == resources5 assert staff_resource == champ5
def test_recursive_edition_equivalence(self): # Here's a Edition for a Project Gutenberg text. gutenberg, gutenberg_pool = self._edition( data_source_name=DataSource.GUTENBERG, identifier_type=Identifier.GUTENBERG_ID, identifier_id="1", with_open_access_download=True, title="Original Gutenberg text", ) # Here's a Edition for an Open Library text. open_library, open_library_pool = self._edition( data_source_name=DataSource.OPEN_LIBRARY, identifier_type=Identifier.OPEN_LIBRARY_ID, identifier_id="W1111", with_open_access_download=True, title="Open Library record", ) # We've learned from OCLC Classify that the Gutenberg text is # equivalent to a certain OCLC Number. We've learned from OCLC # Linked Data that the Open Library text is equivalent to the # same OCLC Number. oclc_classify = DataSource.lookup(self._db, DataSource.OCLC) oclc_linked_data = DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA) oclc_number, ignore = Identifier.for_foreign_id( self._db, Identifier.OCLC_NUMBER, "22") gutenberg.primary_identifier.equivalent_to(oclc_classify, oclc_number, 1) open_library.primary_identifier.equivalent_to(oclc_linked_data, oclc_number, 1) # Here's a Edition for a Recovering the Classics cover. web_source = DataSource.lookup(self._db, DataSource.WEB) recovering, ignore = Edition.for_foreign_id( self._db, web_source, Identifier.URI, "http://recoveringtheclassics.com/pride-and-prejudice.jpg", ) recovering.title = "Recovering the Classics cover" # We've manually associated that Edition's URI directly # with the Project Gutenberg text. manual = DataSource.lookup(self._db, DataSource.MANUAL) gutenberg.primary_identifier.equivalent_to( manual, recovering.primary_identifier, 1) # Finally, here's a completely unrelated Edition, which # will not be showing up. gutenberg2, gutenberg2_pool = self._edition( data_source_name=DataSource.GUTENBERG, identifier_type=Identifier.GUTENBERG_ID, identifier_id="2", with_open_access_download=True, title="Unrelated Gutenberg record.", ) # When we call equivalent_editions on the Project Gutenberg # Edition, we get three Editions: the Gutenberg record # itself, the Open Library record, and the Recovering the # Classics record. # # We get the Open Library record because it's associated with # the same OCLC Number as the Gutenberg record. We get the # Recovering the Classics record because it's associated # directly with the Gutenberg record. results = list(gutenberg.equivalent_editions()) assert 3 == len(results) assert gutenberg in results assert open_library in results assert recovering in results # Here's a Work that incorporates one of the Gutenberg records. work = self._work() work.license_pools.extend([gutenberg2_pool]) # Its set-of-all-editions contains only one record. assert 1 == work.all_editions().count() # If we add the other Gutenberg record to it, then its # set-of-all-editions is extended by that record, *plus* # all the Editions equivalent to that record. work.license_pools.extend([gutenberg_pool]) assert 4 == work.all_editions().count()