def test_extract_metadata_from_elementtree_treats_message_as_failure(self): data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER) feed = open( os.path.join(self.resource_path, "unrecognized_identifier.opds")).read() values, failures = OPDSImporter.extract_metadata_from_elementtree( feed, data_source) # We have no Metadata objects and one CoverageFailure. eq_({}, values) # The CoverageFailure contains the information that was in a # <simplified:message> tag in unrecognized_identifier.opds. key = 'http://www.gutenberg.org/ebooks/100' eq_([key], failures.keys()) failure = failures[key] eq_("404: I've never heard of this work.", failure.exception) eq_(key, failure.obj.urn)
def test_extract_metadata_from_elementtree(self): data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER) data, failures = OPDSImporter.extract_metadata_from_elementtree( self.content_server_feed, data_source) # There are 76 entries in the feed, and we got metadata for # every one of them. eq_(76, len(data)) eq_(0, len(failures)) # We're going to do spot checks on a book and a periodical. # First, the book. book_id = 'urn:librarysimplified.org/terms/id/Gutenberg%20ID/1022' book = data[book_id] eq_(Edition.BOOK_MEDIUM, book['medium']) [contributor] = book['contributors'] eq_("Thoreau, Henry David", contributor.sort_name) eq_([Contributor.AUTHOR_ROLE], contributor.roles) subjects = book['subjects'] eq_(['LCSH', 'LCSH', 'LCSH', 'LCC'], [x.type for x in subjects]) eq_(['Essays', 'Nature', 'Walking', 'PS'], [x.identifier for x in subjects]) eq_([None, None, None, 'American Literature'], [x.name for x in book['subjects']]) eq_([1, 1, 1, 10], [x.weight for x in book['subjects']]) eq_([], book['measurements']) [link] = book['links'] eq_(Hyperlink.OPEN_ACCESS_DOWNLOAD, link.rel) eq_("http://www.gutenberg.org/ebooks/1022.epub.noimages", link.href) eq_(Representation.EPUB_MEDIA_TYPE, link.media_type) # And now, the periodical. periodical_id = 'urn:librarysimplified.org/terms/id/Gutenberg%20ID/10441' periodical = data[periodical_id] eq_(Edition.PERIODICAL_MEDIUM, periodical['medium']) subjects = periodical['subjects'] eq_([ 'LCSH', 'LCSH', 'LCSH', 'LCSH', 'LCC', 'schema:audience', 'schema:typicalAgeRange' ], [x.type for x in subjects]) eq_([ 'Courtship -- Fiction', 'New York (N.Y.) -- Fiction', 'Fantasy fiction', 'Magic -- Fiction', 'PZ', 'Children', '7' ], [x.identifier for x in subjects]) eq_([1, 1, 1, 1, 1, 100, 100], [x.weight for x in subjects]) r1, r2, r3 = periodical['measurements'] eq_(Measurement.QUALITY, r1.quantity_measured) eq_(0.3333, r1.value) eq_(1, r1.weight) eq_(Measurement.RATING, r2.quantity_measured) eq_(0.6, r2.value) eq_(1, r2.weight) eq_(Measurement.POPULARITY, r3.quantity_measured) eq_(0.25, r3.value) eq_(1, r3.weight)