def test_extract_multiple_works_with_author_restriction(self): """We can choose to only accept works by a given author.""" xml = self.sample_data("multi_work_response.xml") [wrong_author], ignore = Contributor.lookup(self._db, sort_name="Wrong Author") status, swids = OCLCXMLParser.parse( self._db, xml, languages=["eng"], authors=[wrong_author]) # This person is not listed as an author of any work in the dataset, # so none of those works were picked up. eq_(0, len(swids)) [melville], ignore = Contributor.lookup(self._db, sort_name="Melville, Herman") status, swids = OCLCXMLParser.parse( self._db, xml, languages=["eng"], authors=[melville]) # We picked up 11 of the 25 works in the dataset. eq_(11, len(swids)) # The missing works (as you can verify by looking at # oclc_multi_work_response.xml) either don't credit Herman # Melville at all (the 1956 Gregory Peck movie "Moby Dick"), # credit him as "Associated name" rather than as an author # (four books about "Moby Dick"), or credit him as an author # but not as the primary author (academic works and adaptations). for missing in '10798812', '13424036', '22658644', '250604212', '474972877', '13358012', '153927888', '13206523', '46935692', "14135019", "51088077", "105446800", "164732682", "26863225": assert missing not in swids
def _contributor_sublanes(self, _db): """Create contributor sublanes""" viable_contributors = list() roles_by_priority = list(Contributor.author_contributor_tiers())[1:] while roles_by_priority and not viable_contributors: author_roles = roles_by_priority.pop(0) viable_contributors = [ c.contributor for c in self.edition.contributions if c.role in author_roles ] for contributor in viable_contributors: contributor_name = None if contributor.display_name: # Prefer display names over sort names for easier URIs # at the /works/contributor/<NAME> route. contributor_name = contributor.display_name else: contributor_name = contributor.sort_name contributor_lane = ContributorLane(_db, self.library, contributor_name, parent=self) yield contributor_lane
def _contributor_sublanes(self, _db): """Create contributor sublanes""" viable_contributors = list() roles_by_priority = list(Contributor.author_contributor_tiers())[1:] while roles_by_priority and not viable_contributors: author_roles = roles_by_priority.pop(0) viable_contributors = [c.contributor for c in self.edition.contributions if c.role in author_roles] for contributor in viable_contributors: contributor_name = None if contributor.display_name: # Prefer display names over sort names for easier URIs # at the /works/contributor/<NAME> route. contributor_name = contributor.display_name else: contributor_name = contributor.sort_name contributor_lane = ContributorLane( self.get_library(_db), contributor_name, parent=self, languages=self.languages, audiences=self.audiences, ) yield contributor_lane
def test_to_edition_sets_sort_author_name_if_obvious(self): [contributor], ignore = Contributor.lookup(self._db, u"Hawkins, Paula") contributor.display_name = u"Paula Hawkins" title = NYTBestSellerListTitle(self.one_list_title) edition = title.to_edition(self._db, self.metadata_client) eq_(contributor.sort_name, edition.sort_author) eq_(contributor.display_name, edition.author) assert edition.permanent_work_id is not None
def test_to_edition_sets_sort_author_name_if_obvious(self): [contributor], ignore = Contributor.lookup( self._db, u"Hawkins, Paula") contributor.display_name = u"Paula Hawkins" title = NYTBestSellerListTitle(self.one_list_title, Edition.BOOK_MEDIUM) edition = title.to_edition(self._db, self.metadata_client) eq_(contributor.sort_name, edition.sort_author) eq_(contributor.display_name, edition.author) assert edition.permanent_work_id is not None
def test_set_equivalence(self): edition = self._edition() edition.title = "The House on Mango Street" edition.add_contributor(Contributor(viaf="112460612"), Contributor.AUTHOR_ROLE) identifier = edition.primary_identifier i1 = self._identifier() identifierdata1 = IdentifierData(type=i1.type, identifier=i1.identifier) good_metadata = Metadata(DataSource.lookup(self._db, DataSource.GUTENBERG), primary_identifier=identifierdata1, title="The House on Mango Street", contributors=[Contributor(viaf="112460612")]) i2 = self._identifier() identifierdata2 = IdentifierData(type=i2.type, identifier=i2.identifier) bad_metadata = Metadata(DataSource.lookup(self._db, DataSource.GUTENBERG), primary_identifier=identifierdata2, title="Calvin & Hobbes", contributors=[Contributor(viaf="101010")]) self.provider.set_equivalence(identifier, good_metadata) self.provider.set_equivalence(identifier, bad_metadata) equivalencies = Equivalency.for_identifiers(self._db, [identifier]).all() # The identifier for the bad metadata isn't made equivalent eq_([i1], [x.output for x in equivalencies]) eq_([1], [x.strength for x in equivalencies]) # But if the existing identifier has no editions, they're made equivalent. identifier = self._identifier() self.provider.set_equivalence(identifier, bad_metadata) equivalencies = Equivalency.for_identifiers(self._db, [identifier]).all() eq_([i2], [x.output for x in equivalencies]) eq_([1], [x.strength for x in equivalencies])
def test_extract_multiple_works_with_author_restriction(self): """We can choose to only accept works by a given author.""" xml = self.sample_data("multi_work_response.xml") [wrong_author], ignore = Contributor.lookup(self._db, sort_name="Wrong Author") status, swids = OCLCXMLParser.parse(self._db, xml, languages=["eng"], authors=[wrong_author]) # This person is not listed as an author of any work in the dataset, # so none of those works were picked up. eq_(0, len(swids)) [melville], ignore = Contributor.lookup(self._db, sort_name="Melville, Herman") status, swids = OCLCXMLParser.parse(self._db, xml, languages=["eng"], authors=[melville]) # We picked up 11 of the 25 works in the dataset. eq_(11, len(swids)) # The missing works (as you can verify by looking at # oclc_multi_work_response.xml) either don't credit Herman # Melville at all (the 1956 Gregory Peck movie "Moby Dick"), # credit him as "Associated name" rather than as an author # (four books about "Moby Dick"), or credit him as an author # but not as the primary author (academic works and adaptations). for missing in ( "10798812", "13424036", "22658644", "250604212", "474972877", "13358012", "153927888", "13206523", "46935692", "14135019", "51088077", "105446800", "164732682", "26863225", ): assert missing not in swids
def _contributor_sublanes(self, _db): """Create contributor sublanes""" viable_contributors = list() roles_by_priority = list(Contributor.author_contributor_tiers())[1:] while roles_by_priority and not viable_contributors: author_roles = roles_by_priority.pop(0) viable_contributors = [c.contributor for c in self.edition.contributions if c.role in author_roles] library = self.get_library(_db) for contributor in viable_contributors: contributor_lane = ContributorLane( library, contributor, parent=self, languages=self.languages, audiences=self.audiences, ) yield contributor_lane
def _get_sublanes(self, _db, license_pool, novelist_api=None): sublanes = list() edition = license_pool.presentation_edition # Create contributor sublanes. viable_contributors = list() roles_by_priority = list(Contributor.author_contributor_tiers())[1:] while roles_by_priority and not viable_contributors: author_roles = roles_by_priority.pop(0) viable_contributors = [c.contributor for c in edition.contributions if c.role in author_roles] for contributor in viable_contributors: contributor_name = None if contributor.display_name: # Prefer display names over sort names for easier URIs # at the /works/contributor/<NAME> route. contributor_name = contributor.display_name else: contributor_name = contributor.sort_name contributor_lane = ContributorLane( _db, contributor_name, contributor_id=contributor.id, parent=self ) sublanes.append(contributor_lane) # Create a recommendations sublane. try: lane_name = "Recommendations for %s by %s" % ( license_pool.work.title, license_pool.work.author ) recommendation_lane = RecommendationLane( _db, license_pool, lane_name, novelist_api=novelist_api, parent=self ) if recommendation_lane.recommendations: sublanes.append(recommendation_lane) except ValueError, e: # NoveList isn't configured. pass
def _parse_single_author(cls, _db, author, lc=None, viaf=None, existing_authors=[], default_role=Contributor.AUTHOR_ROLE, primary_author=None): default_role_used = False # First find roles if present # "Giles, Lionel, 1875-1958 [Writer of added commentary; Translator]" author = author.strip() m = cls.ROLES.search(author) if m: author = author[:m.start()].strip() role_string = m.groups()[0] roles = [x.strip() for x in role_string.split(";")] elif default_role: roles = [default_role] default_role_used = True else: roles = [] # Author string now looks like # "Giles, Lionel, 1875-1958" m = cls.LIFESPAN.search(author) kwargs = dict() if m: author = author[:m.start()].strip() birth, death = m.groups() if birth: kwargs[Contributor.BIRTH_DATE] = birth if death: kwargs[Contributor.DEATH_DATE] = death # Author string now looks like # "Giles, Lionel," if author.endswith(","): author = author[:-1] contributor = None if not author: # No name was given for the author. return None, roles, default_role_used if primary_author and author == primary_author.sort_name: if Contributor.AUTHOR_ROLE in roles: roles.remove(Contributor.AUTHOR_ROLE) if Contributor.UNKNOWN_ROLE in roles: roles.remove(Contributor.UNKNOWN_ROLE) roles.insert(0, Contributor.PRIMARY_AUTHOR_ROLE) if existing_authors: # Calling Contributor.lookup will result in a database # hit, and looking up a contributor based on name may # result in multiple results (see below). We'll have no # way of distinguishing between those results. If # possible, it's much more reliable to look through # existing_authors (the authors derived from an entry's # <authors> tag). for x in existing_authors: if cls._contributor_match(x, author, lc, viaf): contributor = x break if contributor: was_new = False if not contributor: contributor, was_new = Contributor.lookup( _db, author, viaf, lc, extra=kwargs) if isinstance(contributor, list): # We asked for an author based solely on the name, which makes # Contributor.lookup() return a list. if len(contributor) == 1: # Fortunately, either the database knows about only # one author with that name, or it didn't know about # any authors with that name and it just created one, # so we can unambiguously use it. contributor = contributor[0] else: # Uh-oh. The database knows about multiple authors # with that name. We have no basis for deciding which # author we mean. But we would prefer to identify with # an author who has a known LC or VIAF number. # # This should happen very rarely because of our check # against existing_authors above. But it will happen # for authors that have a work in Project Gutenberg. with_id = [x for x in contributor if x.lc is not None or x.viaf is not None] if with_id: contributor = with_id[0] else: contributor = contributor[0] return contributor, roles, default_role_used
def _parse_single_author(cls, _db, author, lc=None, viaf=None, existing_authors=[], default_role=Contributor.AUTHOR_ROLE, primary_author=None): default_role_used = False # First find roles if present # "Giles, Lionel, 1875-1958 [Writer of added commentary; Translator]" author = author.strip() m = cls.ROLES.search(author) if m: author = author[:m.start()].strip() role_string = m.groups()[0] roles = [x.strip() for x in role_string.split(";")] elif default_role: roles = [default_role] default_role_used = True else: roles = [] # Author string now looks like # "Giles, Lionel, 1875-1958" m = cls.LIFESPAN.search(author) kwargs = dict() if m: author = author[:m.start()].strip() birth, death = m.groups() if birth: kwargs[Contributor.BIRTH_DATE] = birth if death: kwargs[Contributor.DEATH_DATE] = death # Author string now looks like # "Giles, Lionel," if author.endswith(","): author = author[:-1] contributor = None if not author: # No name was given for the author. return None, roles, default_role_used if primary_author and author == primary_author.sort_name: if Contributor.AUTHOR_ROLE in roles: roles.remove(Contributor.AUTHOR_ROLE) if Contributor.UNKNOWN_ROLE in roles: roles.remove(Contributor.UNKNOWN_ROLE) roles.insert(0, Contributor.PRIMARY_AUTHOR_ROLE) if existing_authors: # Calling Contributor.lookup will result in a database # hit, and looking up a contributor based on name may # result in multiple results (see below). We'll have no # way of distinguishing between those results. If # possible, it's much more reliable to look through # existing_authors (the authors derived from an entry's # <authors> tag). for x in existing_authors: if cls._contributor_match(x, author, lc, viaf): contributor = x break if contributor: was_new = False if not contributor: contributor, was_new = Contributor.lookup(_db, author, viaf, lc, extra=kwargs) if isinstance(contributor, list): # We asked for an author based solely on the name, which makes # Contributor.lookup() return a list. if len(contributor) == 1: # Fortunately, either the database knows about only # one author with that name, or it didn't know about # any authors with that name and it just created one, # so we can unambiguously use it. contributor = contributor[0] else: # Uh-oh. The database knows about multiple authors # with that name. We have no basis for deciding which # author we mean. But we would prefer to identify with # an author who has a known LC or VIAF number. # # This should happen very rarely because of our check # against existing_authors above. But it will happen # for authors that have a work in Project Gutenberg. with_id = [ x for x in contributor if x.lc is not None or x.viaf is not None ] if with_id: contributor = with_id[0] else: contributor = contributor[0] return contributor, roles, default_role_used