def test_creator_names_picks_up_contributors(self): graph = json.loads( self.sample_data("no_author_only_contributor.jsonld"))['@graph'] eq_(([], []), OCLCLinkedData.creator_names(graph)) eq_((['Thug Kitchen LLC.'], []), OCLCLinkedData.creator_names(graph, 'contributor'))
def test_creator_names_gathers_external_uris(self): graph = json.loads( self.sample_data("creator_includes_viaf_uris.jsonld"))['@graph'] names, uris = OCLCLinkedData.creator_names(graph) eq_([], names) eq_(set(["http://id.loc.gov/authorities/names/n2013058227", "http://viaf.org/viaf/221233754", "http://viaf.org/viaf/305306689"]), set(uris))
def test_book_info_to_metadata(self): oclc = OCLCLinkedData(self._db) subgraph = json.loads(self.sample_data("galapagos.jsonld"))['@graph'] [book] = [book for book in oclc.books(subgraph)] metadata_obj = OCLCLinkedData(self._db).book_info_to_metadata( subgraph, book ) # A metadata object is returned, with the proper OCLC identifier. eq_(True, isinstance(metadata_obj, Metadata)) eq_(Identifier.OCLC_NUMBER, metadata_obj.primary_identifier.type) eq_(u"11866009", metadata_obj.primary_identifier.identifier) # It has publication information & ISBNs eq_(u"Galápagos : a novel", metadata_obj.title) eq_(u'Delacorte Press/Seymour Lawrence', metadata_obj.publisher) eq_(1985, metadata_obj.published.year) eq_(1, len(metadata_obj.links)) assert "ghost of a shipbuilder" in metadata_obj.links[0].content eq_(4, len(metadata_obj.identifiers)) eq_(1, len(metadata_obj.contributors)) [viaf] = [c.viaf for c in metadata_obj.contributors] eq_(u"71398958", viaf) eq_(10, len(metadata_obj.subjects)) # Make sure a book with no English title doesn't break anything. subgraph[14]['name']['@language'] = 'fr' [book] = [book for book in oclc.books(subgraph)] metadata_obj = OCLCLinkedData(self._db).book_info_to_metadata( subgraph, book ) # The metadata has no title. eq_(None, metadata_obj.title)
def test_extract_useful_data(self): subgraph = json.loads( self.sample_data('galapagos.jsonld') )['@graph'] [book] = [book for book in OCLCLinkedData.books(subgraph)] (oclc_id_type, oclc_id, titles, descriptions, subjects, creator_uris, publishers, publication_dates, example_uris) = OCLCLinkedData.extract_useful_data(subgraph, book) eq_(Identifier.OCLC_NUMBER, oclc_id_type) eq_(u"11866009", oclc_id) eq_([u"Galápagos : a novel"], titles) eq_(1, len(descriptions)) # Even though there are 11 links in the books "about" list, # "http://subject.example.wo/internal_lookup" does not get included as # a subject because it doesn't have an internal lookup. eq_(1, len(subjects[Subject.DDC])) eq_(1, len(subjects[Subject.FAST])) eq_(4, len(subjects[Subject.TAG])) eq_(1, len(subjects[Subject.PLACE])) # Meanwhile, the made-up LCSH subject that also doesn't have an # internal lookup is included because its details can be parsed from # the url: "http://id.loc.gov/authorities/subjects/sh12345678" eq_(3, len(subjects[Subject.LCSH])) eq_(1, len(creator_uris)) eq_(["Delacorte Press/Seymour Lawrence"], publishers) eq_(["1985"], publication_dates) eq_(2, len(example_uris))
def test_extract_contributor(self): # It pulls relevant contributor data out of an OCLC person entity graph. sloane_info = json.loads( self.sample_data('sloane_crosley.jsonld'))['@graph'][1] result = OCLCLinkedData.extract_contributor(sloane_info) eq_(result['family_name'], 'Crosley') eq_(result['display_name'], 'Sloane Crosley') flanagan_info = json.loads( self.sample_data('john_flanagan_multiname.jsonld')) flanagan_info = flanagan_info['@graph'][1] result = OCLCLinkedData.extract_contributor(flanagan_info) eq_(result['family_name'], 'Flanagan') eq_(result['display_name'], 'John Anthony Flanagan') eq_(result['extra']['birthDate'], '1944') # TODO: Modify the contributor extraction to handle cases where # maiden names are included and/or multiple name options are the # same except for capitalization. rice_info = json.loads(self.sample_data('anne_rice.jsonld')) result = OCLCLinkedData.extract_contributor(rice_info['@graph'][1]) eq_(result['family_name'], "O'Brien Rice") eq_(result['display_name'], "Anne O'Brien Rice") eq_(result['extra']['birthDate'], '1941')
def __init__(self, _db, oclcld=None, viaf=None): self._db = _db self.oclcld = oclcld or OCLCLinkedData(_db) self.viaf = viaf or VIAFClient(_db) self.log = logging.getLogger("Author name canonicalizer")