def test_add_summary(self): work = self._work(with_license_pool=True) work.summary_text = "<p>Summary</p>" record = Record() Annotator.add_summary(record, work) self._check_field(record, "520", {"a": b" Summary "})
def test_leader(self): work = self._work(with_license_pool=True) leader = Annotator.leader(work) assert "00000nam 2200000 4500" == leader # If there's already a marc record cached, the record status changes. work.marc_record = "cached" leader = Annotator.leader(work) assert "00000cam 2200000 4500" == leader
def test_add_audience(self): for audience, term in list(Annotator.AUDIENCE_TERMS.items()): work = self._work(audience=audience) record = Record() Annotator.add_audience(record, work) self._check_field( record, "385", { "a": term, "2": "tlctarget", }, )
def test_create_record_roundtrip(self): # Create a marc record from a work with special characters # in both the title and author name and round-trip it to # the DB and back again to make sure we are creating records # we can understand. # # We freeze the current time here, because a MARC record has # a timestamp when it was created and we need the created # records to match. annotator = Annotator() # Creates a new record and saves it to the database work = self._work( title="Little Mimi\u2019s First Counting Lesson", authors=["Lagerlo\xf6f, Selma Ottiliana Lovisa,"], with_license_pool=True, ) record = MARCExporter.create_record(work, annotator) loaded_record = MARCExporter.create_record(work, annotator) assert record.as_marc() == loaded_record.as_marc() # Loads a existing record from the DB db = Session(self.connection) new_work = get_one(db, Work, id=work.id) new_record = MARCExporter.create_record(new_work, annotator) assert record.as_marc() == new_record.as_marc()
def test_add_simplified_genres(self): work = self._work(with_license_pool=True) fantasy, ignore = Genre.lookup(self._db, "Fantasy", autocreate=True) romance, ignore = Genre.lookup(self._db, "Romance", autocreate=True) work.genres = [fantasy, romance] record = Record() Annotator.add_simplified_genres(record, work) fields = record.get_fields("650") [fantasy_field, romance_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) assert ["0", "7"] == fantasy_field.indicators assert "Fantasy" == fantasy_field.get_subfields("a")[0] assert "Library Simplified" == fantasy_field.get_subfields("2")[0] assert ["0", "7"] == romance_field.indicators assert "Romance" == romance_field.get_subfields("a")[0] assert "Library Simplified" == romance_field.get_subfields("2")[0]
def test_add_control_fields(self): # This edition has one format and was published before 1900. edition, pool = self._edition(with_license_pool=True) identifier = pool.identifier edition.issued = datetime_utc(956, 1, 1) now = utc_now() record = Record() Annotator.add_control_fields(record, identifier, pool, edition) self._check_control_field(record, "001", identifier.urn) assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value() self._check_control_field(record, "006", "m d ") self._check_control_field(record, "007", "cr cn ---anuuu") self._check_control_field( record, "008", now.strftime("%y%m%d") + "s0956 xxu eng ") # This French edition has two formats and was published in 2018. edition2, pool2 = self._edition(with_license_pool=True) identifier2 = pool2.identifier edition2.issued = datetime_utc(2018, 2, 3) edition2.language = "fre" LicensePoolDeliveryMechanism.set( pool2.data_source, identifier2, Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT, ) record = Record() Annotator.add_control_fields(record, identifier2, pool2, edition2) self._check_control_field(record, "001", identifier2.urn) assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value() self._check_control_field(record, "006", "m d ") self._check_control_field(record, "007", "cr cn ---mnuuu") self._check_control_field( record, "008", now.strftime("%y%m%d") + "s2018 xxu fre ")
def test_add_formats(self): edition, pool = self._edition(with_license_pool=True) epub_no_drm, ignore = DeliveryMechanism.lookup( self._db, Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM) pool.delivery_mechanisms[0].delivery_mechanism = epub_no_drm LicensePoolDeliveryMechanism.set( pool.data_source, pool.identifier, Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT, ) record = Record() Annotator.add_formats(record, pool) fields = record.get_fields("538") assert 2 == len(fields) [pdf, epub] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) assert "Adobe PDF eBook" == pdf.get_subfields("a")[0] assert [" ", " "] == pdf.indicators assert "EPUB eBook" == epub.get_subfields("a")[0] assert [" ", " "] == epub.indicators
def test_add_publisher(self): edition = self._edition() edition.publisher = self._str edition.issued = datetime_utc(1894, 4, 5) record = Record() Annotator.add_publisher(record, edition) self._check_field( record, "264", { "a": "[Place of publication not identified]", "b": edition.publisher, "c": "1894", }, [" ", "1"], ) # If there's no publisher, the field is left out. record = Record() edition.publisher = None Annotator.add_publisher(record, edition) assert [] == record.get_fields("264")
def test_add_title(self): edition = self._edition() edition.title = "The Good Soldier" edition.sort_title = "Good Soldier, The" edition.subtitle = "A Tale of Passion" record = Record() Annotator.add_title(record, edition) [field] = record.get_fields("245") self._check_field( record, "245", { "a": edition.title, "b": edition.subtitle, "c": edition.author, }, ["0", "4"], ) # If there's no subtitle or no author, those subfields are left out. edition.subtitle = None edition.author = None record = Record() Annotator.add_title(record, edition) [field] = record.get_fields("245") self._check_field( record, "245", { "a": edition.title, }, ["0", "4"], ) assert [] == field.get_subfields("b") assert [] == field.get_subfields("c")
def test_add_contributors(self): author = "a" author2 = "b" translator = "c" # Edition with one author gets a 100 field and no 700 fields. edition = self._edition(authors=[author]) edition.sort_author = "sorted" record = Record() Annotator.add_contributors(record, edition) assert [] == record.get_fields("700") self._check_field(record, "100", {"a": edition.sort_author}, ["1", " "]) # Edition with two authors and a translator gets three 700 fields and no 100 fields. edition = self._edition(authors=[author, author2]) edition.add_contributor(translator, Contributor.TRANSLATOR_ROLE) record = Record() Annotator.add_contributors(record, edition) assert [] == record.get_fields("100") fields = record.get_fields("700") for field in fields: assert ["1", " "] == field.indicators [author_field, author2_field, translator_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) assert author == author_field.get_subfields("a")[0] assert Contributor.PRIMARY_AUTHOR_ROLE == author_field.get_subfields( "e")[0] assert author2 == author2_field.get_subfields("a")[0] assert Contributor.AUTHOR_ROLE == author2_field.get_subfields("e")[0] assert translator == translator_field.get_subfields("a")[0] assert Contributor.TRANSLATOR_ROLE == translator_field.get_subfields( "e")[0]
def test_add_isbn(self): isbn = self._identifier(identifier_type=Identifier.ISBN) record = Record() Annotator.add_isbn(record, isbn) self._check_field(record, "020", {"a": isbn.identifier}) # If the identifier isn't an ISBN, but has an equivalent that is, it still # works. equivalent = self._identifier() data_source = DataSource.lookup(self._db, DataSource.OCLC) equivalent.equivalent_to(data_source, isbn, 1) record = Record() Annotator.add_isbn(record, equivalent) self._check_field(record, "020", {"a": isbn.identifier}) # If there is no ISBN, the field is left out. non_isbn = self._identifier() record = Record() Annotator.add_isbn(record, non_isbn) assert [] == record.get_fields("020")
def test_add_series(self): edition = self._edition() edition.series = self._str edition.series_position = 5 record = Record() Annotator.add_series(record, edition) self._check_field( record, "490", { "a": edition.series, "v": str(edition.series_position), }, ["0", " "], ) # If there's no series position, the same field is used without # the v subfield. edition.series_position = None record = Record() Annotator.add_series(record, edition) self._check_field( record, "490", { "a": edition.series, }, ["0", " "], ) [field] = record.get_fields("490") assert [] == field.get_subfields("v") # If there's no series, the field is left out. edition.series = None record = Record() Annotator.add_series(record, edition) assert [] == record.get_fields("490")
def test_create_record(self): work = self._work( with_license_pool=True, title="old title", authors=["old author"], data_source_name=DataSource.OVERDRIVE, ) annotator = Annotator() # The record isn't cached yet, so a new record is created and cached. assert None == work.marc_record record = MARCExporter.create_record(work, annotator) [title_field] = record.get_fields("245") assert "old title" == title_field.get_subfields("a")[0] [author_field] = record.get_fields("100") assert "author, old" == author_field.get_subfields("a")[0] [distributor_field] = record.get_fields("264") assert DataSource.OVERDRIVE == distributor_field.get_subfields("b")[0] cached = work.marc_record assert "old title" in cached assert "author, old" in cached # The distributor isn't part of the cached record. assert DataSource.OVERDRIVE not in cached work.presentation_edition.title = "new title" work.presentation_edition.sort_author = "author, new" new_data_source = DataSource.lookup(self._db, DataSource.BIBLIOTHECA) work.license_pools[0].data_source = new_data_source # Now that the record is cached, creating a record will # use the cache. Distributor will be updated since it's # not part of the cached record. record = MARCExporter.create_record(work, annotator) [title_field] = record.get_fields("245") assert "old title" == title_field.get_subfields("a")[0] [author_field] = record.get_fields("100") assert "author, old" == author_field.get_subfields("a")[0] [distributor_field] = record.get_fields("264") assert DataSource.BIBLIOTHECA == distributor_field.get_subfields( "b")[0] # But we can force an update to the cached record. record = MARCExporter.create_record(work, annotator, force_create=True) [title_field] = record.get_fields("245") assert "new title" == title_field.get_subfields("a")[0] [author_field] = record.get_fields("100") assert "author, new" == author_field.get_subfields("a")[0] [distributor_field] = record.get_fields("264") assert DataSource.BIBLIOTHECA == distributor_field.get_subfields( "b")[0] cached = work.marc_record assert "old title" not in cached assert "author, old" not in cached assert "new title" in cached assert "author, new" in cached # If we pass in an integration, it's passed along to the annotator. integration = self._integration() class MockAnnotator(Annotator): integration = None def annotate_work_record(self, work, pool, edition, identifier, record, integration): self.integration = integration annotator = MockAnnotator() record = MARCExporter.create_record(work, annotator, integration=integration) assert integration == annotator.integration
def test_add_ebooks_subject(self): record = Record() Annotator.add_ebooks_subject(record) self._check_field(record, "655", {"a": "Electronic books."}, [" ", "0"])
def test_records(self): integration = self._integration() now = utc_now() exporter = MARCExporter.from_config(self._default_library) annotator = Annotator() lane = self._lane("Test Lane", genres=["Mystery"]) w1 = self._work(genre="Mystery", with_open_access_download=True) w2 = self._work(genre="Mystery", with_open_access_download=True) search_engine = MockExternalSearchIndex() search_engine.bulk_update([w1, w2]) # If there's a storage protocol but not corresponding storage integration, # it raises an exception. pytest.raises(Exception, exporter.records, lane, annotator) # If there is a storage integration, the output file is mirrored. mirror_integration = self._external_integration( ExternalIntegration.S3, ExternalIntegration.STORAGE_GOAL, username="******", password="******", ) mirror = MockS3Uploader() exporter.records( lane, annotator, mirror_integration, mirror=mirror, query_batch_size=1, upload_batch_size=1, search_engine=search_engine, ) # The file was mirrored and a CachedMARCFile was created to track the mirrored file. assert 1 == len(mirror.uploaded) [cache] = self._db.query(CachedMARCFile).all() assert self._default_library == cache.library assert lane == cache.lane assert mirror.uploaded[0] == cache.representation assert None == cache.representation.content assert ("https://test-marc-bucket.s3.amazonaws.com/%s/%s/%s.mrc" % ( self._default_library.short_name, quote(str(cache.representation.fetched_at)), quote(lane.display_name), ) == mirror.uploaded[0].mirror_url) assert None == cache.start_time assert cache.end_time > now # The content was uploaded in two parts. assert 2 == len(mirror.content[0]) complete_file = b"".join(mirror.content[0]) records = list(MARCReader(complete_file)) assert 2 == len(records) title_fields = [record.get_fields("245") for record in records] titles = [fields[0].get_subfields("a")[0] for fields in title_fields] assert set([w1.title, w2.title]) == set(titles) assert w1.title in w1.marc_record assert w2.title in w2.marc_record self._db.delete(cache) # It also works with a WorkList instead of a Lane, in which case # there will be no lane in the CachedMARCFile. worklist = WorkList() worklist.initialize(self._default_library, display_name="All Books") mirror = MockS3Uploader() exporter.records( worklist, annotator, mirror_integration, mirror=mirror, query_batch_size=1, upload_batch_size=1, search_engine=search_engine, ) assert 1 == len(mirror.uploaded) [cache] = self._db.query(CachedMARCFile).all() assert self._default_library == cache.library assert None == cache.lane assert mirror.uploaded[0] == cache.representation assert None == cache.representation.content assert ("https://test-marc-bucket.s3.amazonaws.com/%s/%s/%s.mrc" % ( self._default_library.short_name, quote(str(cache.representation.fetched_at)), quote(worklist.display_name), ) == mirror.uploaded[0].mirror_url) assert None == cache.start_time assert cache.end_time > now assert 2 == len(mirror.content[0]) complete_file = b"".join(mirror.content[0]) records = list(MARCReader(complete_file)) assert 2 == len(records) self._db.delete(cache) # If a start time is set, it's used in the mirror url. # # (Our mock search engine returns everthing in its 'index', # so this doesn't test that the start time is actually used to # find works -- that's in the search index tests and the # tests of MARCExporterFacets.) start_time = now - datetime.timedelta(days=3) mirror = MockS3Uploader() exporter.records( lane, annotator, mirror_integration, start_time=start_time, mirror=mirror, query_batch_size=2, upload_batch_size=2, search_engine=search_engine, ) [cache] = self._db.query(CachedMARCFile).all() assert self._default_library == cache.library assert lane == cache.lane assert mirror.uploaded[0] == cache.representation assert None == cache.representation.content assert ("https://test-marc-bucket.s3.amazonaws.com/%s/%s-%s/%s.mrc" % ( self._default_library.short_name, quote(str(start_time)), quote(str(cache.representation.fetched_at)), quote(lane.display_name), ) == mirror.uploaded[0].mirror_url) assert start_time == cache.start_time assert cache.end_time > now self._db.delete(cache) # If the search engine returns no contents for the lane, # nothing will be mirrored, but a CachedMARCFile is still # created to track that we checked for updates. empty_search_engine = MockExternalSearchIndex() mirror = MockS3Uploader() exporter.records( lane, annotator, mirror_integration, mirror=mirror, search_engine=empty_search_engine, ) assert [] == mirror.content[0] [cache] = self._db.query(CachedMARCFile).all() assert cache.representation == mirror.uploaded[0] assert self._default_library == cache.library assert lane == cache.lane assert None == cache.representation.content assert None == cache.start_time assert cache.end_time > now self._db.delete(cache)
def test_add_distributor(self): edition, pool = self._edition(with_license_pool=True) record = Record() Annotator.add_distributor(record, pool) self._check_field(record, "264", {"b": pool.data_source.name}, [" ", "2"])
def test_add_physical_description(self): book = self._edition() book.medium = Edition.BOOK_MEDIUM audio = self._edition() audio.medium = Edition.AUDIO_MEDIUM record = Record() Annotator.add_physical_description(record, book) self._check_field(record, "300", {"a": "1 online resource"}) self._check_field( record, "336", { "a": "text", "b": "txt", "2": "rdacontent", }, ) self._check_field( record, "337", { "a": "computer", "b": "c", "2": "rdamedia", }, ) self._check_field( record, "338", { "a": "online resource", "b": "cr", "2": "rdacarrier", }, ) self._check_field( record, "347", { "a": "text file", "2": "rda", }, ) self._check_field( record, "380", { "a": "eBook", "2": "tlcgt", }, ) record = Record() Annotator.add_physical_description(record, audio) self._check_field( record, "300", { "a": "1 sound file", "b": "digital", }, ) self._check_field( record, "336", { "a": "spoken word", "b": "spw", "2": "rdacontent", }, ) self._check_field( record, "337", { "a": "computer", "b": "c", "2": "rdamedia", }, ) self._check_field( record, "338", { "a": "online resource", "b": "cr", "2": "rdacarrier", }, ) self._check_field( record, "347", { "a": "audio file", "2": "rda", }, ) assert [] == record.get_fields("380")
def test_add_system_details(self): record = Record() Annotator.add_system_details(record) self._check_field(record, "538", {"a": "Mode of access: World Wide Web."})
def test_add_marc_organization_code(self): record = Record() Annotator.add_marc_organization_code(record, "US-MaBoDPL") self._check_control_field(record, "003", "US-MaBoDPL")