Esempio n. 1
0
    def test_creator_names_picks_up_contributors(self):
        graph = json.loads(
            self.sample_data("no_author_only_contributor.jsonld"))['@graph']

        eq_(([], []), OCLCLinkedData.creator_names(graph))
        eq_((['Thug Kitchen LLC.'], []),
            OCLCLinkedData.creator_names(graph, 'contributor'))
Esempio n. 2
0
    def test_creator_names_gathers_external_uris(self):
        graph = json.loads(
            self.sample_data("creator_includes_viaf_uris.jsonld"))['@graph']

        names, uris = OCLCLinkedData.creator_names(graph)
        eq_([], names)
        eq_(set(["http://id.loc.gov/authorities/names/n2013058227",
                 "http://viaf.org/viaf/221233754",
                 "http://viaf.org/viaf/305306689"]),
            set(uris))
Esempio n. 3
0
    def test_book_info_to_metadata(self):
        oclc = OCLCLinkedData(self._db)
        subgraph = json.loads(self.sample_data("galapagos.jsonld"))['@graph']
        [book] = [book for book in oclc.books(subgraph)]

        metadata_obj = OCLCLinkedData(self._db).book_info_to_metadata(
            subgraph, book
        )

        # A metadata object is returned, with the proper OCLC identifier.
        eq_(True, isinstance(metadata_obj, Metadata))
        eq_(Identifier.OCLC_NUMBER, metadata_obj.primary_identifier.type)
        eq_(u"11866009", metadata_obj.primary_identifier.identifier)

        # It has publication information & ISBNs
        eq_(u"Galápagos : a novel", metadata_obj.title)
        eq_(u'Delacorte Press/Seymour Lawrence', metadata_obj.publisher)
        eq_(1985, metadata_obj.published.year)
        eq_(1, len(metadata_obj.links))
        assert "ghost of a shipbuilder" in metadata_obj.links[0].content
        eq_(4, len(metadata_obj.identifiers))

        eq_(1, len(metadata_obj.contributors))
        [viaf] = [c.viaf for c in metadata_obj.contributors]
        eq_(u"71398958", viaf)
        eq_(10, len(metadata_obj.subjects))

        # Make sure a book with no English title doesn't break anything.
        subgraph[14]['name']['@language'] = 'fr'
        [book] = [book for book in oclc.books(subgraph)]

        metadata_obj = OCLCLinkedData(self._db).book_info_to_metadata(
            subgraph, book
        )

        # The metadata has no title.
        eq_(None, metadata_obj.title)
Esempio n. 4
0
    def test_extract_useful_data(self):
        subgraph = json.loads(
            self.sample_data('galapagos.jsonld')
        )['@graph']
        [book] = [book for book in OCLCLinkedData.books(subgraph)]

        (oclc_id_type,
         oclc_id,
         titles,
         descriptions,
         subjects,
         creator_uris,
         publishers,
         publication_dates,
         example_uris) = OCLCLinkedData.extract_useful_data(subgraph, book)

        eq_(Identifier.OCLC_NUMBER, oclc_id_type)
        eq_(u"11866009", oclc_id)
        eq_([u"Galápagos : a novel"], titles)
        eq_(1, len(descriptions))

        # Even though there are 11 links in the books "about" list,
        # "http://subject.example.wo/internal_lookup" does not get included as
        # a subject because it doesn't have an internal lookup.
        eq_(1, len(subjects[Subject.DDC]))
        eq_(1, len(subjects[Subject.FAST]))
        eq_(4, len(subjects[Subject.TAG]))
        eq_(1, len(subjects[Subject.PLACE]))
        # Meanwhile, the made-up LCSH subject that also doesn't have an
        # internal lookup is included because its details can be parsed from
        # the url: "http://id.loc.gov/authorities/subjects/sh12345678"
        eq_(3, len(subjects[Subject.LCSH]))

        eq_(1, len(creator_uris))
        eq_(["Delacorte Press/Seymour Lawrence"], publishers)
        eq_(["1985"], publication_dates)
        eq_(2, len(example_uris))
Esempio n. 5
0
    def test_extract_contributor(self):
        # It pulls relevant contributor data out of an OCLC person entity graph.
        sloane_info = json.loads(
            self.sample_data('sloane_crosley.jsonld'))['@graph'][1]
        result = OCLCLinkedData.extract_contributor(sloane_info)
        eq_(result['family_name'], 'Crosley')
        eq_(result['display_name'], 'Sloane Crosley')

        flanagan_info = json.loads(
            self.sample_data('john_flanagan_multiname.jsonld'))
        flanagan_info = flanagan_info['@graph'][1]
        result = OCLCLinkedData.extract_contributor(flanagan_info)
        eq_(result['family_name'], 'Flanagan')
        eq_(result['display_name'], 'John Anthony Flanagan')
        eq_(result['extra']['birthDate'], '1944')

        # TODO: Modify the contributor extraction to handle cases where
        # maiden names are included and/or multiple name options are the
        # same except for capitalization.
        rice_info = json.loads(self.sample_data('anne_rice.jsonld'))
        result = OCLCLinkedData.extract_contributor(rice_info['@graph'][1])
        eq_(result['family_name'], "O'Brien Rice")
        eq_(result['display_name'], "Anne O'Brien Rice")
        eq_(result['extra']['birthDate'], '1941')
Esempio n. 6
0
 def __init__(self, _db, oclcld=None, viaf=None):
     self._db = _db
     self.oclcld = oclcld or OCLCLinkedData(_db)
     self.viaf = viaf or VIAFClient(_db)
     self.log = logging.getLogger("Author name canonicalizer")