def test_sort_by_priority(self):

        # Make editions created by the license source, the metadata
        # wrangler, and library staff.
        admin = self._edition(data_source_name=DataSource.LIBRARY_STAFF,
                              with_license_pool=False)
        od = self._edition(data_source_name=DataSource.OVERDRIVE,
                           with_license_pool=False)
        mw = self._edition(data_source_name=DataSource.METADATA_WRANGLER,
                           with_license_pool=False)

        # Create an invalid edition with no data source. (This shouldn't
        # happen.)
        no_data_source = self._edition(with_license_pool=False)
        no_data_source.data_source = None

        def ids(l):
            return [x for x in l]

        # The invalid edition is the lowest priority. The admin
        # interface and metadata wrangler take precedence over any
        # other data sources.
        expect = [no_data_source, od, mw, admin]
        actual = Edition.sort_by_priority(expect)
        assert ids(expect) == ids(actual)

        # If you specify which data source is associated with the
        # license for the book, you will boost its priority above that
        # of the metadata wrangler.
        expect = [no_data_source, mw, od, admin]
        actual = Edition.sort_by_priority(expect, od.data_source)
        assert ids(expect) == ids(actual)
Beispiel #2
0
    def test_missing_coverage_from(self):
        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        oclc = DataSource.lookup(self._db, DataSource.OCLC)
        web = DataSource.lookup(self._db, DataSource.WEB)

        # Here are two Gutenberg records.
        g1, ignore = Edition.for_foreign_id(self._db, gutenberg,
                                            Identifier.GUTENBERG_ID, "1")

        g2, ignore = Edition.for_foreign_id(self._db, gutenberg,
                                            Identifier.GUTENBERG_ID, "2")

        # One of them has coverage from OCLC Classify
        c1 = self._coverage_record(g1, oclc)

        # The other has coverage from a specific operation on OCLC Classify
        c2 = self._coverage_record(g2, oclc, "some operation")

        # Here's a web record, just sitting there.
        w, ignore = Edition.for_foreign_id(self._db, web, Identifier.URI,
                                           "http://www.foo.com/")

        # If we run missing_coverage_from we pick up the Gutenberg
        # record with no generic OCLC coverage. It doesn't pick up the
        # other Gutenberg record, it doesn't pick up the web record,
        # and it doesn't pick up the OCLC coverage for a specific
        # operation.
        [in_gutenberg_but_not_in_oclc
         ] = Identifier.missing_coverage_from(self._db,
                                              [Identifier.GUTENBERG_ID],
                                              oclc).all()

        assert g2.primary_identifier == in_gutenberg_but_not_in_oclc

        # If we ask about a specific operation, we get the Gutenberg
        # record that has coverage for that operation, but not the one
        # that has generic OCLC coverage.

        [has_generic_coverage_only
         ] = Identifier.missing_coverage_from(self._db,
                                              [Identifier.GUTENBERG_ID], oclc,
                                              "some operation").all()
        assert g1.primary_identifier == has_generic_coverage_only

        # We don't put web sites into OCLC, so this will pick up the
        # web record (but not the Gutenberg record).
        [in_web_but_not_in_oclc
         ] = Identifier.missing_coverage_from(self._db, [Identifier.URI],
                                              oclc).all()
        assert w.primary_identifier == in_web_but_not_in_oclc

        # We don't use the web as a source of coverage, so this will
        # return both Gutenberg records (but not the web record).
        assert [g1.primary_identifier.id, g2.primary_identifier.id] == sorted([
            x.id for x in Identifier.missing_coverage_from(
                self._db, [Identifier.GUTENBERG_ID], web)
        ])
Beispiel #3
0
    def test_missing_coverage_from_with_cutoff_date(self):
        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        oclc = DataSource.lookup(self._db, DataSource.OCLC)
        web = DataSource.lookup(self._db, DataSource.WEB)

        # Here's an Edition with a coverage record from OCLC classify.
        gutenberg, ignore = Edition.for_foreign_id(self._db, gutenberg,
                                                   Identifier.GUTENBERG_ID,
                                                   "1")
        identifier = gutenberg.primary_identifier
        oclc = DataSource.lookup(self._db, DataSource.OCLC)
        coverage = self._coverage_record(gutenberg, oclc)

        # The CoverageRecord knows when the coverage was provided.
        timestamp = coverage.timestamp

        # If we ask for Identifiers that are missing coverage records
        # as of that time, we see nothing.
        assert ([] == Identifier.missing_coverage_from(
            self._db, [identifier.type],
            oclc,
            count_as_missing_before=timestamp).all())

        # But if we give a time one second later, the Identifier is
        # missing coverage.
        assert [identifier] == Identifier.missing_coverage_from(
            self._db,
            [identifier.type],
            oclc,
            count_as_missing_before=timestamp + datetime.timedelta(seconds=1),
        ).all()
    def test_for_foreign_id(self):
        """Verify we can get a data source's view of a foreign id."""
        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        id = "549"
        type = Identifier.GUTENBERG_ID

        record, was_new = Edition.for_foreign_id(self._db, data_source, type,
                                                 id)
        assert data_source == record.data_source
        identifier = record.primary_identifier
        assert id == identifier.identifier
        assert type == identifier.type
        assert True == was_new
        assert [identifier] == record.equivalent_identifiers()

        # We can get the same work record by providing only the name
        # of the data source.
        record, was_new = Edition.for_foreign_id(self._db,
                                                 DataSource.GUTENBERG, type,
                                                 id)
        assert data_source == record.data_source
        assert identifier == record.primary_identifier
        assert False == was_new
    def test_custom_lists(self):
        # A Collection can be associated with one or more CustomLists.
        list1, ignore = get_one_or_create(self._db, CustomList, name=self._str)
        list2, ignore = get_one_or_create(self._db, CustomList, name=self._str)
        self.collection.customlists = [list1, list2]
        assert 0 == len(list1.entries)
        assert 0 == len(list2.entries)

        # When a new pool is added to the collection and its presentation edition is
        # calculated for the first time, it's automatically added to the lists.
        work = self._work(collection=self.collection, with_license_pool=True)
        assert 1 == len(list1.entries)
        assert 1 == len(list2.entries)
        assert work == list1.entries[0].work
        assert work == list2.entries[0].work

        # Now remove it from one of the lists. If its presentation edition changes
        # again or its pool changes works, it's not added back.
        self._db.delete(list1.entries[0])
        self._db.commit()
        assert 0 == len(list1.entries)
        assert 1 == len(list2.entries)

        pool = work.license_pools[0]
        identifier = pool.identifier
        staff_data_source = DataSource.lookup(self._db, DataSource.LIBRARY_STAFF)
        staff_edition, ignore = Edition.for_foreign_id(
            self._db, staff_data_source, identifier.type, identifier.identifier
        )

        staff_edition.title = self._str
        work.calculate_presentation()
        assert 0 == len(list1.entries)
        assert 1 == len(list2.entries)

        new_work = self._work(collection=self.collection)
        pool.work = new_work
        assert 0 == len(list1.entries)
        assert 1 == len(list2.entries)
    def test_author_contributors(self):
        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        id = self._str
        type = Identifier.GUTENBERG_ID

        edition, was_new = Edition.for_foreign_id(self._db, data_source, type,
                                                  id)

        # We've listed the same person as primary author and author.
        [alice], ignore = Contributor.lookup(self._db, "Adder, Alice")
        edition.add_contributor(
            alice, [Contributor.AUTHOR_ROLE, Contributor.PRIMARY_AUTHOR_ROLE])

        # We've listed a different person as illustrator.
        [bob], ignore = Contributor.lookup(self._db, "Bitshifter, Bob")
        edition.add_contributor(bob, [Contributor.ILLUSTRATOR_ROLE])

        # Both contributors show up in .contributors.
        assert set([alice, bob]) == edition.contributors

        # Only the author shows up in .author_contributors, and she
        # only shows up once.
        assert [alice] == edition.author_contributors
Beispiel #7
0
    def test_merge(self):

        # Here's Robert.
        [robert], ignore = Contributor.lookup(self._db, sort_name="Robert")

        # Here's Bob.
        [bob], ignore = Contributor.lookup(self._db, sort_name="Jones, Bob")
        bob.extra["foo"] = "bar"
        bob.aliases = ["Bobby"]
        bob.viaf = "viaf"
        bob.lc = "lc"
        bob.display_name = "Bob Jones"
        bob.family_name = "Bobb"
        bob.wikipedia_name = "Bob_(Person)"

        # Each is a contributor to a Edition.
        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)

        roberts_book, ignore = Edition.for_foreign_id(self._db, data_source,
                                                      Identifier.GUTENBERG_ID,
                                                      "1")
        roberts_book.add_contributor(robert, Contributor.AUTHOR_ROLE)

        bobs_book, ignore = Edition.for_foreign_id(self._db, data_source,
                                                   Identifier.GUTENBERG_ID,
                                                   "10")
        bobs_book.add_contributor(bob, Contributor.AUTHOR_ROLE)

        # In a shocking turn of events, it transpires that "Bob" and
        # "Robert" are the same person. We merge "Bob" into Robert
        # thusly:
        bob.merge_into(robert)

        # 'Bob' is now listed as an alias for Robert, as is Bob's
        # alias.
        assert ["Jones, Bob", "Bobby"] == robert.aliases

        # The extra information associated with Bob is now associated
        # with Robert.
        assert "bar" == robert.extra["foo"]

        assert "viaf" == robert.viaf
        assert "lc" == robert.lc
        assert "Bobb" == robert.family_name
        assert "Bob Jones" == robert.display_name
        assert "Robert" == robert.sort_name
        assert "Bob_(Person)" == robert.wikipedia_name

        # The standalone 'Bob' record has been removed from the database.
        assert ([] == self._db.query(Contributor).filter(
            Contributor.sort_name == "Bob").all())

        # Bob's book is now associated with 'Robert', not the standalone
        # 'Bob' record.
        assert [robert] == bobs_book.author_contributors

        # confirm the sort_name is propagated, if not already set in the destination contributor
        robert.sort_name = None
        [bob], ignore = Contributor.lookup(self._db, sort_name="Jones, Bob")
        bob.merge_into(robert)
        assert "Jones, Bob" == robert.sort_name
    def test_recursive_edition_equivalence(self):

        # Here's a Edition for a Project Gutenberg text.
        gutenberg, gutenberg_pool = self._edition(
            data_source_name=DataSource.GUTENBERG,
            identifier_type=Identifier.GUTENBERG_ID,
            identifier_id="1",
            with_open_access_download=True,
            title="Original Gutenberg text",
        )

        # Here's a Edition for an Open Library text.
        open_library, open_library_pool = self._edition(
            data_source_name=DataSource.OPEN_LIBRARY,
            identifier_type=Identifier.OPEN_LIBRARY_ID,
            identifier_id="W1111",
            with_open_access_download=True,
            title="Open Library record",
        )

        # We've learned from OCLC Classify that the Gutenberg text is
        # equivalent to a certain OCLC Number. We've learned from OCLC
        # Linked Data that the Open Library text is equivalent to the
        # same OCLC Number.
        oclc_classify = DataSource.lookup(self._db, DataSource.OCLC)
        oclc_linked_data = DataSource.lookup(self._db,
                                             DataSource.OCLC_LINKED_DATA)

        oclc_number, ignore = Identifier.for_foreign_id(
            self._db, Identifier.OCLC_NUMBER, "22")
        gutenberg.primary_identifier.equivalent_to(oclc_classify, oclc_number,
                                                   1)
        open_library.primary_identifier.equivalent_to(oclc_linked_data,
                                                      oclc_number, 1)

        # Here's a Edition for a Recovering the Classics cover.
        web_source = DataSource.lookup(self._db, DataSource.WEB)
        recovering, ignore = Edition.for_foreign_id(
            self._db,
            web_source,
            Identifier.URI,
            "http://recoveringtheclassics.com/pride-and-prejudice.jpg",
        )
        recovering.title = "Recovering the Classics cover"

        # We've manually associated that Edition's URI directly
        # with the Project Gutenberg text.
        manual = DataSource.lookup(self._db, DataSource.MANUAL)
        gutenberg.primary_identifier.equivalent_to(
            manual, recovering.primary_identifier, 1)

        # Finally, here's a completely unrelated Edition, which
        # will not be showing up.
        gutenberg2, gutenberg2_pool = self._edition(
            data_source_name=DataSource.GUTENBERG,
            identifier_type=Identifier.GUTENBERG_ID,
            identifier_id="2",
            with_open_access_download=True,
            title="Unrelated Gutenberg record.",
        )

        # When we call equivalent_editions on the Project Gutenberg
        # Edition, we get three Editions: the Gutenberg record
        # itself, the Open Library record, and the Recovering the
        # Classics record.
        #
        # We get the Open Library record because it's associated with
        # the same OCLC Number as the Gutenberg record. We get the
        # Recovering the Classics record because it's associated
        # directly with the Gutenberg record.
        results = list(gutenberg.equivalent_editions())
        assert 3 == len(results)
        assert gutenberg in results
        assert open_library in results
        assert recovering in results

        # Here's a Work that incorporates one of the Gutenberg records.
        work = self._work()
        work.license_pools.extend([gutenberg2_pool])

        # Its set-of-all-editions contains only one record.
        assert 1 == work.all_editions().count()

        # If we add the other Gutenberg record to it, then its
        # set-of-all-editions is extended by that record, *plus*
        # all the Editions equivalent to that record.
        work.license_pools.extend([gutenberg_pool])
        assert 4 == work.all_editions().count()