Ejemplo n.º 1
0
    def test_repr(self, _, identifier_type, identifier, title):
        """Test that Identifier.__repr__ correctly works with both ASCII and non-ASCII symbols.

        :param _: Name of the test case
        :type _: str

        :param identifier_type: Type of the identifier
        :type identifier_type: str

        :param identifier: Identifier's value
        :type identifier: str

        :param title: Presentation edition's title
        :type title: str
        """
        # Arrange
        identifier = Identifier(type=identifier_type, identifier=identifier)

        if title:
            edition = create_autospec(spec=Edition)
            edition.title = PropertyMock(return_value=title)

            identifier.primarily_identifies = PropertyMock(
                return_value=[edition])

        # Act
        # NOTE: we are not interested in the result returned by repr,
        # we just want to make sure that repr doesn't throw any unexpected exceptions
        _ = repr(identifier)
Ejemplo n.º 2
0
    def test_missing_coverage_from_with_cutoff_date(self):
        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        oclc = DataSource.lookup(self._db, DataSource.OCLC)
        web = DataSource.lookup(self._db, DataSource.WEB)

        # Here's an Edition with a coverage record from OCLC classify.
        gutenberg, ignore = Edition.for_foreign_id(self._db, gutenberg,
                                                   Identifier.GUTENBERG_ID,
                                                   "1")
        identifier = gutenberg.primary_identifier
        oclc = DataSource.lookup(self._db, DataSource.OCLC)
        coverage = self._coverage_record(gutenberg, oclc)

        # The CoverageRecord knows when the coverage was provided.
        timestamp = coverage.timestamp

        # If we ask for Identifiers that are missing coverage records
        # as of that time, we see nothing.
        assert ([] == Identifier.missing_coverage_from(
            self._db, [identifier.type],
            oclc,
            count_as_missing_before=timestamp).all())

        # But if we give a time one second later, the Identifier is
        # missing coverage.
        assert [identifier] == Identifier.missing_coverage_from(
            self._db,
            [identifier.type],
            oclc,
            count_as_missing_before=timestamp + datetime.timedelta(seconds=1),
        ).all()
Ejemplo n.º 3
0
    def test_missing_coverage_from(self):
        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        oclc = DataSource.lookup(self._db, DataSource.OCLC)
        web = DataSource.lookup(self._db, DataSource.WEB)

        # Here are two Gutenberg records.
        g1, ignore = Edition.for_foreign_id(self._db, gutenberg,
                                            Identifier.GUTENBERG_ID, "1")

        g2, ignore = Edition.for_foreign_id(self._db, gutenberg,
                                            Identifier.GUTENBERG_ID, "2")

        # One of them has coverage from OCLC Classify
        c1 = self._coverage_record(g1, oclc)

        # The other has coverage from a specific operation on OCLC Classify
        c2 = self._coverage_record(g2, oclc, "some operation")

        # Here's a web record, just sitting there.
        w, ignore = Edition.for_foreign_id(self._db, web, Identifier.URI,
                                           "http://www.foo.com/")

        # If we run missing_coverage_from we pick up the Gutenberg
        # record with no generic OCLC coverage. It doesn't pick up the
        # other Gutenberg record, it doesn't pick up the web record,
        # and it doesn't pick up the OCLC coverage for a specific
        # operation.
        [in_gutenberg_but_not_in_oclc
         ] = Identifier.missing_coverage_from(self._db,
                                              [Identifier.GUTENBERG_ID],
                                              oclc).all()

        assert g2.primary_identifier == in_gutenberg_but_not_in_oclc

        # If we ask about a specific operation, we get the Gutenberg
        # record that has coverage for that operation, but not the one
        # that has generic OCLC coverage.

        [has_generic_coverage_only
         ] = Identifier.missing_coverage_from(self._db,
                                              [Identifier.GUTENBERG_ID], oclc,
                                              "some operation").all()
        assert g1.primary_identifier == has_generic_coverage_only

        # We don't put web sites into OCLC, so this will pick up the
        # web record (but not the Gutenberg record).
        [in_web_but_not_in_oclc
         ] = Identifier.missing_coverage_from(self._db, [Identifier.URI],
                                              oclc).all()
        assert w.primary_identifier == in_web_but_not_in_oclc

        # We don't use the web as a source of coverage, so this will
        # return both Gutenberg records (but not the web record).
        assert [g1.primary_identifier.id, g2.primary_identifier.id] == sorted([
            x.id for x in Identifier.missing_coverage_from(
                self._db, [Identifier.GUTENBERG_ID], web)
        ])
Ejemplo n.º 4
0
    def test_for_foreign_id(self):
        identifier_type = Identifier.ISBN
        isbn = "3293000061"

        # Getting the data automatically creates a database record.
        identifier, was_new = Identifier.for_foreign_id(
            self._db, identifier_type, isbn)
        assert Identifier.ISBN == identifier.type
        assert isbn == identifier.identifier
        assert True == was_new

        # If we get it again we get the same data, but it's no longer new.
        identifier2, was_new = Identifier.for_foreign_id(
            self._db, identifier_type, isbn)
        assert identifier == identifier2
        assert False == was_new

        # If we pass in no data we get nothing back.
        assert None == Identifier.for_foreign_id(self._db, None, None)
Ejemplo n.º 5
0
    def test_parse_urn(self):

        # We can parse our custom URNs back into identifiers.
        identifier = self._identifier()
        self._db.commit()
        new_identifier, ignore = Identifier.parse_urn(self._db, identifier.urn)
        assert identifier == new_identifier

        # We can parse urn:isbn URNs into ISBN identifiers. ISBN-10s are
        # converted to ISBN-13s.
        identifier, ignore = Identifier.for_foreign_id(self._db,
                                                       Identifier.ISBN,
                                                       "9781449358068")
        isbn_urn = "urn:isbn:1449358063"
        isbn_identifier, ignore = Identifier.parse_urn(self._db, isbn_urn)
        assert Identifier.ISBN == isbn_identifier.type
        assert "9781449358068" == isbn_identifier.identifier

        isbn_urn = "urn:isbn:9781449358068"
        isbn_identifier2, ignore = Identifier.parse_urn(self._db, isbn_urn)
        assert isbn_identifier2 == isbn_identifier

        # We can parse ordinary http: or https: URLs into URI
        # identifiers.
        http_identifier, ignore = Identifier.parse_urn(self._db,
                                                       "http://example.com")
        assert Identifier.URI == http_identifier.type
        assert "http://example.com" == http_identifier.identifier

        https_identifier, ignore = Identifier.parse_urn(
            self._db, "https://example.com")
        assert Identifier.URI == https_identifier.type
        assert "https://example.com" == https_identifier.identifier

        # We can parse UUIDs.
        uuid_identifier, ignore = Identifier.parse_urn(
            self._db, "urn:uuid:04377e87-ab69-41c8-a2a4-812d55dc0952")
        assert Identifier.URI == uuid_identifier.type
        assert ("urn:uuid:04377e87-ab69-41c8-a2a4-812d55dc0952" ==
                uuid_identifier.identifier)

        # A URN we can't handle raises an exception.
        ftp_urn = "ftp://example.com"
        pytest.raises(ValueError, Identifier.parse_urn, self._db, ftp_urn)

        # An invalid ISBN raises an exception.
        pytest.raises(ValueError, Identifier.parse_urn, self._db,
                      "urn:isbn:notanisbn")

        # Pass in None and you get None.
        assert None == Identifier.parse_urn(self._db, None)
Ejemplo n.º 6
0
    def test_urn(self):
        # ISBN identifiers use the ISBN URN scheme.
        identifier, ignore = Identifier.for_foreign_id(self._db,
                                                       Identifier.ISBN,
                                                       "9781449358068")
        assert "urn:isbn:9781449358068" == identifier.urn

        # URI identifiers don't need a URN scheme.
        identifier, ignore = Identifier.for_foreign_id(self._db,
                                                       Identifier.URI,
                                                       "http://example.com/")
        assert identifier.identifier == identifier.urn

        # Gutenberg identifiers use Gutenberg's URL-based sceheme
        identifier = self._identifier(Identifier.GUTENBERG_ID)
        assert (Identifier.GUTENBERG_URN_SCHEME_PREFIX +
                identifier.identifier == identifier.urn)

        # All other identifiers use our custom URN scheme.
        identifier = self._identifier(Identifier.OVERDRIVE_ID)
        assert identifier.urn.startswith(Identifier.URN_SCHEME_PREFIX)
Ejemplo n.º 7
0
    def test_for_foreign_id_without_autocreate(self):
        identifier_type = Identifier.ISBN
        isbn = self._str

        # We don't want to auto-create a database record, so we set
        # autocreate=False
        identifier, was_new = Identifier.for_foreign_id(self._db,
                                                        identifier_type,
                                                        isbn,
                                                        autocreate=False)
        assert None == identifier
        assert False == was_new
Ejemplo n.º 8
0
    def test_from_asin(self):
        isbn10 = "1449358063"
        isbn13 = "9781449358068"
        asin = "B0088IYM3C"
        isbn13_with_dashes = "978-144-935-8068"

        i_isbn10, new1 = Identifier.from_asin(self._db, isbn10)
        i_isbn13, new2 = Identifier.from_asin(self._db, isbn13)
        i_asin, new3 = Identifier.from_asin(self._db, asin)
        i_isbn13_2, new4 = Identifier.from_asin(self._db, isbn13_with_dashes)

        # The three ISBNs are equivalent, so they got turned into the same
        # Identifier, using the ISBN13.
        assert i_isbn10 == i_isbn13
        assert i_isbn13_2 == i_isbn13
        assert Identifier.ISBN == i_isbn10.type
        assert isbn13 == i_isbn10.identifier
        assert True == new1
        assert False == new2
        assert False == new4

        assert Identifier.ASIN == i_asin.type
        assert asin == i_asin.identifier
Ejemplo n.º 9
0
    def test_missing_coverage_from_with_collection(self):
        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        identifier = self._identifier()
        collection1 = self._default_collection
        collection2 = self._collection()
        self._coverage_record(identifier, gutenberg, collection=collection1)

        # The Identifier has coverage in collection 1.
        assert ([] == Identifier.missing_coverage_from(
            self._db, [identifier.type], gutenberg,
            collection=collection1).all())

        # It is missing coverage in collection 2.
        assert [identifier] == Identifier.missing_coverage_from(
            self._db, [identifier.type], gutenberg,
            collection=collection2).all()

        # If no collection is specified, we look for a CoverageRecord
        # that also has no collection specified, and the Identifier is
        # not treated as covered.
        assert [identifier
                ] == Identifier.missing_coverage_from(self._db,
                                                      [identifier.type],
                                                      gutenberg).all()
Ejemplo n.º 10
0
 def test_for_foreign_id_rejects_invalid_identifiers(self):
     with pytest.raises(ValueError) as excinfo:
         Identifier.for_foreign_id(self._db, Identifier.BIBLIOTHECA_ID,
                                   "foo/bar")
     assert '"foo/bar" is not a valid Bibliotheca ID.' in str(excinfo.value)
Ejemplo n.º 11
0
 def test_for_foreign_id_by_deprecated_type(self):
     threem_id, is_new = Identifier.for_foreign_id(self._db, "3M ID",
                                                   self._str)
     assert Identifier.BIBLIOTHECA_ID == threem_id.type
     assert Identifier.BIBLIOTHECA_ID != "3M ID"
Ejemplo n.º 12
0
    def test_recursively_equivalent_identifier_ids(self):
        identifier = self._identifier()
        data_source = DataSource.lookup(self._db, DataSource.MANUAL)

        strong_equivalent = self._identifier()
        identifier.equivalent_to(data_source, strong_equivalent, 0.9)

        weak_equivalent = self._identifier()
        identifier.equivalent_to(data_source, weak_equivalent, 0.2)

        level_2_equivalent = self._identifier()
        strong_equivalent.equivalent_to(data_source, level_2_equivalent, 0.5)

        level_3_equivalent = self._identifier()
        level_2_equivalent.equivalent_to(data_source, level_3_equivalent, 0.9)

        level_4_equivalent = self._identifier()
        level_3_equivalent.equivalent_to(data_source, level_4_equivalent, 0.6)

        unrelated = self._identifier()

        # With a low threshold and enough levels, we find all the identifiers.
        high_levels_low_threshold = PresentationCalculationPolicy(
            equivalent_identifier_levels=5,
            equivalent_identifier_threshold=0.1)
        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [identifier.id], policy=high_levels_low_threshold)
        assert (set([
            identifier.id,
            strong_equivalent.id,
            weak_equivalent.id,
            level_2_equivalent.id,
            level_3_equivalent.id,
            level_4_equivalent.id,
        ]) == set(equivs[identifier.id]))

        # If we only look at one level, we don't find the level 2, 3, or 4 identifiers.
        one_level = PresentationCalculationPolicy(
            equivalent_identifier_levels=1,
            equivalent_identifier_threshold=0.1)
        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [identifier.id], policy=one_level)
        assert set([identifier.id, strong_equivalent.id,
                    weak_equivalent.id]) == set(equivs[identifier.id])

        # If we raise the threshold, we don't find the weak identifier.
        one_level_high_threshold = PresentationCalculationPolicy(
            equivalent_identifier_levels=1,
            equivalent_identifier_threshold=0.4)
        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [identifier.id], policy=one_level_high_threshold)
        assert set([identifier.id,
                    strong_equivalent.id]) == set(equivs[identifier.id])

        # For deeper levels, the strength is the product of the strengths
        # of all the equivalencies in between the two identifiers.

        # In this example:
        # identifier - level_2_equivalent = 0.9 * 0.5 = 0.45
        # identifier - level_3_equivalent = 0.9 * 0.5 * 0.9 = 0.405
        # identifier - level_4_equivalent = 0.9 * 0.5 * 0.9 * 0.6 = 0.243

        # With a threshold of 0.5, level 2 and all subsequent levels are too weak.
        high_levels_high_threshold = PresentationCalculationPolicy(
            equivalent_identifier_levels=5,
            equivalent_identifier_threshold=0.5)
        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [identifier.id], policy=high_levels_high_threshold)
        assert set([identifier.id,
                    strong_equivalent.id]) == set(equivs[identifier.id])

        # With a threshold of 0.25, level 2 is strong enough, but level
        # 4 is too weak.
        high_levels_lower_threshold = PresentationCalculationPolicy(
            equivalent_identifier_levels=5,
            equivalent_identifier_threshold=0.25)
        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [identifier.id], policy=high_levels_lower_threshold)
        assert (set([
            identifier.id,
            strong_equivalent.id,
            level_2_equivalent.id,
            level_3_equivalent.id,
        ]) == set(equivs[identifier.id]))

        # It also works if we start from other identifiers.
        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [strong_equivalent.id], policy=high_levels_low_threshold)
        assert (set([
            identifier.id,
            strong_equivalent.id,
            weak_equivalent.id,
            level_2_equivalent.id,
            level_3_equivalent.id,
            level_4_equivalent.id,
        ]) == set(equivs[strong_equivalent.id]))

        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [level_4_equivalent.id],
            policy=high_levels_low_threshold)
        assert (set([
            identifier.id,
            strong_equivalent.id,
            level_2_equivalent.id,
            level_3_equivalent.id,
            level_4_equivalent.id,
        ]) == set(equivs[level_4_equivalent.id]))

        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [level_4_equivalent.id],
            policy=high_levels_high_threshold)
        assert set([
            level_2_equivalent.id, level_3_equivalent.id, level_4_equivalent.id
        ]) == set(equivs[level_4_equivalent.id])

        # A chain of very strong equivalents can keep a high strength
        # even at deep levels. This wouldn't work if we changed the strength
        # threshold by level instead of accumulating a strength product.
        another_identifier = self._identifier()
        l2 = self._identifier()
        l3 = self._identifier()
        l4 = self._identifier()
        l2.equivalent_to(data_source, another_identifier, 1)
        l3.equivalent_to(data_source, l2, 1)
        l4.equivalent_to(data_source, l3, 0.9)
        high_levels_fairly_high_threshold = PresentationCalculationPolicy(
            equivalent_identifier_levels=5,
            equivalent_identifier_threshold=0.89)
        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [another_identifier.id],
            high_levels_fairly_high_threshold)
        assert set([another_identifier.id, l2.id, l3.id,
                    l4.id]) == set(equivs[another_identifier.id])

        # We can look for multiple identifiers at once.
        two_levels_high_threshold = PresentationCalculationPolicy(
            equivalent_identifier_levels=2,
            equivalent_identifier_threshold=0.8)
        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db,
            [identifier.id, level_3_equivalent.id],
            policy=two_levels_high_threshold,
        )
        assert set([identifier.id,
                    strong_equivalent.id]) == set(equivs[identifier.id])
        assert set([level_2_equivalent.id, level_3_equivalent.id
                    ]) == set(equivs[level_3_equivalent.id])

        # By setting a cutoff, you can say to look deep in the tree,
        # but stop looking as soon as you have a certain number of
        # equivalents.
        with_cutoff = PresentationCalculationPolicy(
            equivalent_identifier_levels=5,
            equivalent_identifier_threshold=0.1,
            equivalent_identifier_cutoff=1,
        )
        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [identifier.id], policy=with_cutoff)

        # The cutoff was set to 1, but we always go at least one level
        # deep, and that gives us three equivalent identifiers. We
        # don't artificially trim it back down to 1.
        assert 3 == len(equivs[identifier.id])

        # Increase the cutoff, and we get more identifiers.
        with_cutoff.equivalent_identifier_cutoff = 5
        equivs = Identifier.recursively_equivalent_identifier_ids(
            self._db, [identifier.id], policy=with_cutoff)
        assert len(equivs[identifier.id]) > 3

        # The query() method uses the same db function, but returns
        # equivalents for all identifiers together so it can be used
        # as a subquery.
        query = Identifier.recursively_equivalent_identifier_ids_query(
            Identifier.id, policy=high_levels_low_threshold)
        query = query.where(Identifier.id == identifier.id)
        results = self._db.execute(query)
        equivalent_ids = [r[0] for r in results]
        assert (set([
            identifier.id,
            strong_equivalent.id,
            weak_equivalent.id,
            level_2_equivalent.id,
            level_3_equivalent.id,
            level_4_equivalent.id,
        ]) == set(equivalent_ids))

        query = Identifier.recursively_equivalent_identifier_ids_query(
            Identifier.id, policy=two_levels_high_threshold)
        query = query.where(
            Identifier.id.in_([identifier.id, level_3_equivalent.id]))
        results = self._db.execute(query)
        equivalent_ids = [r[0] for r in results]
        assert (set([
            identifier.id,
            strong_equivalent.id,
            level_2_equivalent.id,
            level_3_equivalent.id,
        ]) == set(equivalent_ids))
Ejemplo n.º 13
0
    def test_parse_urns(self):
        identifier = self._identifier()
        fake_urn = "what_even_is_this"
        new_urn = Identifier.URN_SCHEME_PREFIX + "Overdrive%20ID/nosuchidentifier"
        # Also create a different URN that would result in the same identifier.
        same_new_urn = Identifier.URN_SCHEME_PREFIX + "Overdrive%20ID/NOSUCHidentifier"
        urns = [identifier.urn, fake_urn, new_urn, same_new_urn]

        results = Identifier.parse_urns(self._db, urns, autocreate=False)
        identifiers_by_urn, failures = results

        # By default, no new identifiers are created. All URNs for identifiers
        # that aren't in the db are included in the list of failures.
        assert sorted([fake_urn, new_urn, same_new_urn]) == sorted(failures)

        # Only the existing identifier is included in the results.
        assert 1 == len(identifiers_by_urn)
        assert {identifier.urn: identifier} == identifiers_by_urn

        # By default, new identifiers are created, too.
        results = Identifier.parse_urns(self._db, urns)
        identifiers_by_urn, failures = results

        # Only the fake URN is returned as a failure.
        assert [fake_urn] == failures

        # Only two additional identifiers have been created.
        assert 2 == len(identifiers_by_urn)

        # One is the existing identifier.
        assert identifier == identifiers_by_urn[identifier.urn]

        # And the new identifier has been created.
        new_identifier = identifiers_by_urn[new_urn]
        assert isinstance(new_identifier, Identifier)
        assert new_identifier in self._db
        assert Identifier.OVERDRIVE_ID == new_identifier.type
        assert "nosuchidentifier" == new_identifier.identifier

        # By passing in a list of allowed_types we can stop certain
        # types of Identifiers from being looked up, even if they
        # already exist.
        isbn_urn = "urn:isbn:9781453219539"
        urns = [new_urn, isbn_urn]
        only_overdrive = [Identifier.OVERDRIVE_ID]
        only_isbn = [Identifier.OVERDRIVE_ID]
        everything = []

        success, failure = Identifier.parse_urns(
            self._db, urns, allowed_types=[Identifier.OVERDRIVE_ID])
        assert new_urn in success
        assert isbn_urn in failure

        success, failure = Identifier.parse_urns(
            self._db,
            urns,
            allowed_types=[Identifier.OVERDRIVE_ID, Identifier.ISBN])
        assert new_urn in success
        assert isbn_urn in success
        assert [] == failure

        # If the allowed_types is empty, no URNs can be looked up
        # -- this is most likely the caller's mistake.
        success, failure = Identifier.parse_urns(self._db,
                                                 urns,
                                                 allowed_types=[])
        assert new_urn in failure
        assert isbn_urn in failure
Ejemplo n.º 14
0
    def test_calculate_presentation_cover(self):
        # Here's a cover image with a thumbnail.
        representation, ignore = get_one_or_create(self._db,
                                                   Representation,
                                                   url="http://cover")
        representation.media_type = Representation.JPEG_MEDIA_TYPE
        representation.mirrored_at = utc_now()
        representation.mirror_url = "http://mirror/cover"
        thumb, ignore = get_one_or_create(self._db,
                                          Representation,
                                          url="http://thumb")
        thumb.media_type = Representation.JPEG_MEDIA_TYPE
        thumb.mirrored_at = utc_now()
        thumb.mirror_url = "http://mirror/thumb"
        thumb.thumbnail_of_id = representation.id

        # Verify that a cover for the edition's primary identifier is used.
        e, pool = self._edition(with_license_pool=True)
        link, ignore = e.primary_identifier.add_link(Hyperlink.IMAGE,
                                                     "http://cover",
                                                     e.data_source)
        link.resource.representation = representation
        e.calculate_presentation()
        assert "http://mirror/cover" == e.cover_full_url
        assert "http://mirror/thumb" == e.cover_thumbnail_url

        # Verify that a cover will be used even if it's some
        # distance away along the identifier-equivalence line.
        e, pool = self._edition(with_license_pool=True)
        oclc_classify = DataSource.lookup(self._db, DataSource.OCLC)
        oclc_number, ignore = Identifier.for_foreign_id(
            self._db, Identifier.OCLC_NUMBER, "22")
        e.primary_identifier.equivalent_to(oclc_classify, oclc_number, 1)
        link, ignore = oclc_number.add_link(Hyperlink.IMAGE, "http://cover",
                                            oclc_classify)
        link.resource.representation = representation
        e.calculate_presentation()
        assert "http://mirror/cover" == e.cover_full_url
        assert "http://mirror/thumb" == e.cover_thumbnail_url

        # Verify that a nearby cover takes precedence over a
        # faraway cover.
        link, ignore = e.primary_identifier.add_link(Hyperlink.IMAGE,
                                                     "http://nearby-cover",
                                                     e.data_source)
        nearby, ignore = get_one_or_create(self._db,
                                           Representation,
                                           url=link.resource.url)
        nearby.media_type = Representation.JPEG_MEDIA_TYPE
        nearby.mirrored_at = utc_now()
        nearby.mirror_url = "http://mirror/nearby-cover"
        link.resource.representation = nearby
        nearby_thumb, ignore = get_one_or_create(self._db,
                                                 Representation,
                                                 url="http://nearby-thumb")
        nearby_thumb.media_type = Representation.JPEG_MEDIA_TYPE
        nearby_thumb.mirrored_at = utc_now()
        nearby_thumb.mirror_url = "http://mirror/nearby-thumb"
        nearby_thumb.thumbnail_of_id = nearby.id
        e.calculate_presentation()
        assert "http://mirror/nearby-cover" == e.cover_full_url
        assert "http://mirror/nearby-thumb" == e.cover_thumbnail_url

        # Verify that a thumbnail is used even if there's
        # no full-sized cover.
        e, pool = self._edition(with_license_pool=True)
        link, ignore = e.primary_identifier.add_link(Hyperlink.THUMBNAIL_IMAGE,
                                                     "http://thumb",
                                                     e.data_source)
        link.resource.representation = thumb
        e.calculate_presentation()
        assert None == e.cover_full_url
        assert "http://mirror/thumb" == e.cover_thumbnail_url
Ejemplo n.º 15
0
    def test_calculate_evaluate_summary_quality_with_privileged_data_sources(
            self):
        e, pool = self._edition(with_license_pool=True)
        oclc = DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA)
        overdrive = DataSource.lookup(self._db, DataSource.OVERDRIVE)

        # There's a perfunctory description from Overdrive.
        l1, new = pool.add_link(Hyperlink.SHORT_DESCRIPTION, None, overdrive,
                                "text/plain", "F")

        overdrive_resource = l1.resource

        # There's a much better description from OCLC Linked Data.
        l2, new = pool.add_link(
            Hyperlink.DESCRIPTION,
            None,
            oclc,
            "text/plain",
            """Nothing about working with his former high school crush, Stephanie Stephens, is ideal. Still, if Aaron Caruthers intends to save his grandmother's bakery, he must. Good thing he has a lot of ideas he can't wait to implement. He never imagines Stephanie would have her own ideas for the business. Or that they would clash with his!""",
        )
        oclc_resource = l2.resource

        # In a head-to-head evaluation, the OCLC Linked Data description wins.
        ids = [e.primary_identifier.id]
        champ1, resources = Identifier.evaluate_summary_quality(self._db, ids)

        assert set([overdrive_resource, oclc_resource]) == set(resources)
        assert oclc_resource == champ1

        # But if we say that Overdrive is the privileged data source, it wins
        # automatically. The other resource isn't even considered.
        champ2, resources2 = Identifier.evaluate_summary_quality(
            self._db, ids, [overdrive])
        assert overdrive_resource == champ2
        assert [overdrive_resource] == resources2

        # If we say that some other data source is privileged, and
        # there are no descriptions from that data source, a
        # head-to-head evaluation is performed, and OCLC Linked Data
        # wins.
        threem = DataSource.lookup(self._db, DataSource.THREEM)
        champ3, resources3 = Identifier.evaluate_summary_quality(
            self._db, ids, [threem])
        assert set([overdrive_resource, oclc_resource]) == set(resources3)
        assert oclc_resource == champ3

        # If there are two privileged data sources and there's no
        # description from the first, the second is used.
        champ4, resources4 = Identifier.evaluate_summary_quality(
            self._db, ids, [threem, overdrive])
        assert [overdrive_resource] == resources4
        assert overdrive_resource == champ4

        # Even an empty string wins if it's from the most privileged data source.
        # This is not a silly example.  The librarian may choose to set the description
        # to an empty string in the admin inteface, to override a bad overdrive/etc. description.
        staff = DataSource.lookup(self._db, DataSource.LIBRARY_STAFF)
        l3, new = pool.add_link(Hyperlink.SHORT_DESCRIPTION, None, staff,
                                "text/plain", "")
        staff_resource = l3.resource

        champ5, resources5 = Identifier.evaluate_summary_quality(
            self._db, ids, [staff, overdrive])
        assert [staff_resource] == resources5
        assert staff_resource == champ5
Ejemplo n.º 16
0
    def test_recursive_edition_equivalence(self):

        # Here's a Edition for a Project Gutenberg text.
        gutenberg, gutenberg_pool = self._edition(
            data_source_name=DataSource.GUTENBERG,
            identifier_type=Identifier.GUTENBERG_ID,
            identifier_id="1",
            with_open_access_download=True,
            title="Original Gutenberg text",
        )

        # Here's a Edition for an Open Library text.
        open_library, open_library_pool = self._edition(
            data_source_name=DataSource.OPEN_LIBRARY,
            identifier_type=Identifier.OPEN_LIBRARY_ID,
            identifier_id="W1111",
            with_open_access_download=True,
            title="Open Library record",
        )

        # We've learned from OCLC Classify that the Gutenberg text is
        # equivalent to a certain OCLC Number. We've learned from OCLC
        # Linked Data that the Open Library text is equivalent to the
        # same OCLC Number.
        oclc_classify = DataSource.lookup(self._db, DataSource.OCLC)
        oclc_linked_data = DataSource.lookup(self._db,
                                             DataSource.OCLC_LINKED_DATA)

        oclc_number, ignore = Identifier.for_foreign_id(
            self._db, Identifier.OCLC_NUMBER, "22")
        gutenberg.primary_identifier.equivalent_to(oclc_classify, oclc_number,
                                                   1)
        open_library.primary_identifier.equivalent_to(oclc_linked_data,
                                                      oclc_number, 1)

        # Here's a Edition for a Recovering the Classics cover.
        web_source = DataSource.lookup(self._db, DataSource.WEB)
        recovering, ignore = Edition.for_foreign_id(
            self._db,
            web_source,
            Identifier.URI,
            "http://recoveringtheclassics.com/pride-and-prejudice.jpg",
        )
        recovering.title = "Recovering the Classics cover"

        # We've manually associated that Edition's URI directly
        # with the Project Gutenberg text.
        manual = DataSource.lookup(self._db, DataSource.MANUAL)
        gutenberg.primary_identifier.equivalent_to(
            manual, recovering.primary_identifier, 1)

        # Finally, here's a completely unrelated Edition, which
        # will not be showing up.
        gutenberg2, gutenberg2_pool = self._edition(
            data_source_name=DataSource.GUTENBERG,
            identifier_type=Identifier.GUTENBERG_ID,
            identifier_id="2",
            with_open_access_download=True,
            title="Unrelated Gutenberg record.",
        )

        # When we call equivalent_editions on the Project Gutenberg
        # Edition, we get three Editions: the Gutenberg record
        # itself, the Open Library record, and the Recovering the
        # Classics record.
        #
        # We get the Open Library record because it's associated with
        # the same OCLC Number as the Gutenberg record. We get the
        # Recovering the Classics record because it's associated
        # directly with the Gutenberg record.
        results = list(gutenberg.equivalent_editions())
        assert 3 == len(results)
        assert gutenberg in results
        assert open_library in results
        assert recovering in results

        # Here's a Work that incorporates one of the Gutenberg records.
        work = self._work()
        work.license_pools.extend([gutenberg2_pool])

        # Its set-of-all-editions contains only one record.
        assert 1 == work.all_editions().count()

        # If we add the other Gutenberg record to it, then its
        # set-of-all-editions is extended by that record, *plus*
        # all the Editions equivalent to that record.
        work.license_pools.extend([gutenberg_pool])
        assert 4 == work.all_editions().count()