Esempio n. 1
0
    def test_extract_multiple_works_with_author_restriction(self):
        """We can choose to only accept works by a given author."""
        xml = self.sample_data("multi_work_response.xml")

        [wrong_author], ignore = Contributor.lookup(self._db, sort_name="Wrong Author")
        status, swids = OCLCXMLParser.parse(
            self._db, xml, languages=["eng"], authors=[wrong_author])
        # This person is not listed as an author of any work in the dataset,
        # so none of those works were picked up.
        eq_(0, len(swids))

        [melville], ignore = Contributor.lookup(self._db, sort_name="Melville, Herman")
        status, swids = OCLCXMLParser.parse(
            self._db, xml, languages=["eng"], authors=[melville])

        # We picked up 11 of the 25 works in the dataset.
        eq_(11, len(swids))

        # The missing works (as you can verify by looking at
        # oclc_multi_work_response.xml) either don't credit Herman
        # Melville at all (the 1956 Gregory Peck movie "Moby Dick"),
        # credit him as "Associated name" rather than as an author
        # (four books about "Moby Dick"), or credit him as an author
        # but not as the primary author (academic works and adaptations).
        for missing in '10798812', '13424036', '22658644', '250604212', '474972877', '13358012', '153927888', '13206523', '46935692', "14135019", "51088077", "105446800", "164732682", "26863225":
            assert missing not in swids
Esempio n. 2
0
    def _contributor_sublanes(self, _db):
        """Create contributor sublanes"""
        viable_contributors = list()
        roles_by_priority = list(Contributor.author_contributor_tiers())[1:]

        while roles_by_priority and not viable_contributors:
            author_roles = roles_by_priority.pop(0)
            viable_contributors = [
                c.contributor for c in self.edition.contributions
                if c.role in author_roles
            ]

        for contributor in viable_contributors:
            contributor_name = None
            if contributor.display_name:
                # Prefer display names over sort names for easier URIs
                # at the /works/contributor/<NAME> route.
                contributor_name = contributor.display_name
            else:
                contributor_name = contributor.sort_name

            contributor_lane = ContributorLane(_db,
                                               self.library,
                                               contributor_name,
                                               parent=self)
            yield contributor_lane
Esempio n. 3
0
    def _contributor_sublanes(self, _db):
        """Create contributor sublanes"""
        viable_contributors = list()
        roles_by_priority = list(Contributor.author_contributor_tiers())[1:]

        while roles_by_priority and not viable_contributors:
            author_roles = roles_by_priority.pop(0)
            viable_contributors = [c.contributor
                                   for c in self.edition.contributions
                                   if c.role in author_roles]

        for contributor in viable_contributors:
            contributor_name = None
            if contributor.display_name:
                # Prefer display names over sort names for easier URIs
                # at the /works/contributor/<NAME> route.
                contributor_name = contributor.display_name
            else:
                contributor_name = contributor.sort_name

            contributor_lane = ContributorLane(
                self.get_library(_db), contributor_name, parent=self,
                languages=self.languages, audiences=self.audiences,
            )
            yield contributor_lane
Esempio n. 4
0
    def test_to_edition_sets_sort_author_name_if_obvious(self):
        [contributor], ignore = Contributor.lookup(self._db, u"Hawkins, Paula")
        contributor.display_name = u"Paula Hawkins"

        title = NYTBestSellerListTitle(self.one_list_title)
        edition = title.to_edition(self._db, self.metadata_client)
        eq_(contributor.sort_name, edition.sort_author)
        eq_(contributor.display_name, edition.author)
        assert edition.permanent_work_id is not None
Esempio n. 5
0
    def test_to_edition_sets_sort_author_name_if_obvious(self):
        [contributor], ignore = Contributor.lookup(
            self._db, u"Hawkins, Paula")
        contributor.display_name = u"Paula Hawkins"

        title = NYTBestSellerListTitle(self.one_list_title, Edition.BOOK_MEDIUM)
        edition = title.to_edition(self._db, self.metadata_client)
        eq_(contributor.sort_name, edition.sort_author)
        eq_(contributor.display_name, edition.author)
        assert edition.permanent_work_id is not None
Esempio n. 6
0
    def test_set_equivalence(self):
        edition = self._edition()
        edition.title = "The House on Mango Street"
        edition.add_contributor(Contributor(viaf="112460612"),
                                Contributor.AUTHOR_ROLE)
        identifier = edition.primary_identifier

        i1 = self._identifier()
        identifierdata1 = IdentifierData(type=i1.type,
                                         identifier=i1.identifier)
        good_metadata = Metadata(DataSource.lookup(self._db,
                                                   DataSource.GUTENBERG),
                                 primary_identifier=identifierdata1,
                                 title="The House on Mango Street",
                                 contributors=[Contributor(viaf="112460612")])

        i2 = self._identifier()
        identifierdata2 = IdentifierData(type=i2.type,
                                         identifier=i2.identifier)
        bad_metadata = Metadata(DataSource.lookup(self._db,
                                                  DataSource.GUTENBERG),
                                primary_identifier=identifierdata2,
                                title="Calvin & Hobbes",
                                contributors=[Contributor(viaf="101010")])

        self.provider.set_equivalence(identifier, good_metadata)
        self.provider.set_equivalence(identifier, bad_metadata)
        equivalencies = Equivalency.for_identifiers(self._db,
                                                    [identifier]).all()

        # The identifier for the bad metadata isn't made equivalent
        eq_([i1], [x.output for x in equivalencies])
        eq_([1], [x.strength for x in equivalencies])

        # But if the existing identifier has no editions, they're made equivalent.
        identifier = self._identifier()
        self.provider.set_equivalence(identifier, bad_metadata)
        equivalencies = Equivalency.for_identifiers(self._db,
                                                    [identifier]).all()
        eq_([i2], [x.output for x in equivalencies])
        eq_([1], [x.strength for x in equivalencies])
    def test_extract_multiple_works_with_author_restriction(self):
        """We can choose to only accept works by a given author."""
        xml = self.sample_data("multi_work_response.xml")

        [wrong_author], ignore = Contributor.lookup(self._db, sort_name="Wrong Author")
        status, swids = OCLCXMLParser.parse(self._db, xml, languages=["eng"], authors=[wrong_author])
        # This person is not listed as an author of any work in the dataset,
        # so none of those works were picked up.
        eq_(0, len(swids))

        [melville], ignore = Contributor.lookup(self._db, sort_name="Melville, Herman")
        status, swids = OCLCXMLParser.parse(self._db, xml, languages=["eng"], authors=[melville])

        # We picked up 11 of the 25 works in the dataset.
        eq_(11, len(swids))

        # The missing works (as you can verify by looking at
        # oclc_multi_work_response.xml) either don't credit Herman
        # Melville at all (the 1956 Gregory Peck movie "Moby Dick"),
        # credit him as "Associated name" rather than as an author
        # (four books about "Moby Dick"), or credit him as an author
        # but not as the primary author (academic works and adaptations).
        for missing in (
            "10798812",
            "13424036",
            "22658644",
            "250604212",
            "474972877",
            "13358012",
            "153927888",
            "13206523",
            "46935692",
            "14135019",
            "51088077",
            "105446800",
            "164732682",
            "26863225",
        ):
            assert missing not in swids
Esempio n. 8
0
    def _contributor_sublanes(self, _db):
        """Create contributor sublanes"""
        viable_contributors = list()
        roles_by_priority = list(Contributor.author_contributor_tiers())[1:]

        while roles_by_priority and not viable_contributors:
            author_roles = roles_by_priority.pop(0)
            viable_contributors = [c.contributor
                                   for c in self.edition.contributions
                                   if c.role in author_roles]

        library = self.get_library(_db)
        for contributor in viable_contributors:
            contributor_lane = ContributorLane(
                library, contributor, parent=self,
                languages=self.languages, audiences=self.audiences,
            )
            yield contributor_lane
Esempio n. 9
0
    def _get_sublanes(self, _db, license_pool, novelist_api=None):
        sublanes = list()
        edition = license_pool.presentation_edition

        # Create contributor sublanes.
        viable_contributors = list()
        roles_by_priority = list(Contributor.author_contributor_tiers())[1:]

        while roles_by_priority and not viable_contributors:
            author_roles = roles_by_priority.pop(0)
            viable_contributors = [c.contributor for c in edition.contributions
                                   if c.role in author_roles]

        for contributor in viable_contributors:
            contributor_name = None
            if contributor.display_name:
                # Prefer display names over sort names for easier URIs
                # at the /works/contributor/<NAME> route.
                contributor_name = contributor.display_name
            else:
                contributor_name = contributor.sort_name

            contributor_lane = ContributorLane(
                _db, contributor_name, contributor_id=contributor.id,
                parent=self
            )
            sublanes.append(contributor_lane)

        # Create a recommendations sublane.
        try:
            lane_name = "Recommendations for %s by %s" % (
                license_pool.work.title, license_pool.work.author
            )
            recommendation_lane = RecommendationLane(
                _db, license_pool, lane_name, novelist_api=novelist_api,
                parent=self
            )
            if recommendation_lane.recommendations:
                sublanes.append(recommendation_lane)
        except ValueError, e:
            # NoveList isn't configured.
            pass
Esempio n. 10
0
    def _parse_single_author(cls, _db, author,
                             lc=None, viaf=None,
                             existing_authors=[],
                             default_role=Contributor.AUTHOR_ROLE,
                             primary_author=None):
        default_role_used = False
        # First find roles if present
        # "Giles, Lionel, 1875-1958 [Writer of added commentary; Translator]"
        author = author.strip()
        m = cls.ROLES.search(author)
        if m:
            author = author[:m.start()].strip()
            role_string = m.groups()[0]
            roles = [x.strip() for x in role_string.split(";")]
        elif default_role:
            roles = [default_role]
            default_role_used = True
        else:
            roles = []

        # Author string now looks like
        # "Giles, Lionel, 1875-1958"
        m = cls.LIFESPAN.search(author)
        kwargs = dict()
        if m:
            author = author[:m.start()].strip()
            birth, death = m.groups()
            if birth:
                kwargs[Contributor.BIRTH_DATE] = birth
            if death:
                kwargs[Contributor.DEATH_DATE] = death

        # Author string now looks like
        # "Giles, Lionel,"
        if author.endswith(","):
            author = author[:-1]

        contributor = None
        if not author:
            # No name was given for the author.
            return None, roles, default_role_used

        if primary_author and author == primary_author.sort_name:
            if Contributor.AUTHOR_ROLE in roles:
                roles.remove(Contributor.AUTHOR_ROLE)
            if Contributor.UNKNOWN_ROLE in roles:
                roles.remove(Contributor.UNKNOWN_ROLE)
            roles.insert(0, Contributor.PRIMARY_AUTHOR_ROLE)

        if existing_authors:
            # Calling Contributor.lookup will result in a database
            # hit, and looking up a contributor based on name may
            # result in multiple results (see below). We'll have no
            # way of distinguishing between those results. If
            # possible, it's much more reliable to look through
            # existing_authors (the authors derived from an entry's
            # <authors> tag).
            for x in existing_authors:
                if cls._contributor_match(x, author, lc, viaf):
                    contributor = x
                    break
            if contributor:
                was_new = False

        if not contributor:
            contributor, was_new = Contributor.lookup(
                _db, author, viaf, lc, extra=kwargs)
        if isinstance(contributor, list):
            # We asked for an author based solely on the name, which makes
            # Contributor.lookup() return a list.
            if len(contributor) == 1:
                # Fortunately, either the database knows about only
                # one author with that name, or it didn't know about
                # any authors with that name and it just created one,
                # so we can unambiguously use it.
                contributor = contributor[0]
            else:
                # Uh-oh. The database knows about multiple authors
                # with that name.  We have no basis for deciding which
                # author we mean. But we would prefer to identify with
                # an author who has a known LC or VIAF number.
                #
                # This should happen very rarely because of our check
                # against existing_authors above. But it will happen
                # for authors that have a work in Project Gutenberg.
                with_id = [x for x in contributor if x.lc is not None
                           or x.viaf is not None]
                if with_id:
                    contributor = with_id[0]
                else:
                    contributor = contributor[0]
        return contributor, roles, default_role_used
Esempio n. 11
0
    def _parse_single_author(cls,
                             _db,
                             author,
                             lc=None,
                             viaf=None,
                             existing_authors=[],
                             default_role=Contributor.AUTHOR_ROLE,
                             primary_author=None):
        default_role_used = False
        # First find roles if present
        # "Giles, Lionel, 1875-1958 [Writer of added commentary; Translator]"
        author = author.strip()
        m = cls.ROLES.search(author)
        if m:
            author = author[:m.start()].strip()
            role_string = m.groups()[0]
            roles = [x.strip() for x in role_string.split(";")]
        elif default_role:
            roles = [default_role]
            default_role_used = True
        else:
            roles = []

        # Author string now looks like
        # "Giles, Lionel, 1875-1958"
        m = cls.LIFESPAN.search(author)
        kwargs = dict()
        if m:
            author = author[:m.start()].strip()
            birth, death = m.groups()
            if birth:
                kwargs[Contributor.BIRTH_DATE] = birth
            if death:
                kwargs[Contributor.DEATH_DATE] = death

        # Author string now looks like
        # "Giles, Lionel,"
        if author.endswith(","):
            author = author[:-1]

        contributor = None
        if not author:
            # No name was given for the author.
            return None, roles, default_role_used

        if primary_author and author == primary_author.sort_name:
            if Contributor.AUTHOR_ROLE in roles:
                roles.remove(Contributor.AUTHOR_ROLE)
            if Contributor.UNKNOWN_ROLE in roles:
                roles.remove(Contributor.UNKNOWN_ROLE)
            roles.insert(0, Contributor.PRIMARY_AUTHOR_ROLE)

        if existing_authors:
            # Calling Contributor.lookup will result in a database
            # hit, and looking up a contributor based on name may
            # result in multiple results (see below). We'll have no
            # way of distinguishing between those results. If
            # possible, it's much more reliable to look through
            # existing_authors (the authors derived from an entry's
            # <authors> tag).
            for x in existing_authors:
                if cls._contributor_match(x, author, lc, viaf):
                    contributor = x
                    break
            if contributor:
                was_new = False

        if not contributor:
            contributor, was_new = Contributor.lookup(_db,
                                                      author,
                                                      viaf,
                                                      lc,
                                                      extra=kwargs)
        if isinstance(contributor, list):
            # We asked for an author based solely on the name, which makes
            # Contributor.lookup() return a list.
            if len(contributor) == 1:
                # Fortunately, either the database knows about only
                # one author with that name, or it didn't know about
                # any authors with that name and it just created one,
                # so we can unambiguously use it.
                contributor = contributor[0]
            else:
                # Uh-oh. The database knows about multiple authors
                # with that name.  We have no basis for deciding which
                # author we mean. But we would prefer to identify with
                # an author who has a known LC or VIAF number.
                #
                # This should happen very rarely because of our check
                # against existing_authors above. But it will happen
                # for authors that have a work in Project Gutenberg.
                with_id = [
                    x for x in contributor
                    if x.lc is not None or x.viaf is not None
                ]
                if with_id:
                    contributor = with_id[0]
                else:
                    contributor = contributor[0]
        return contributor, roles, default_role_used