def test_update(self): # update() can modify the fields of a Timestamp that aren't # used to identify it. stamp = Timestamp.stamp(self._db, "service", Timestamp.SCRIPT_TYPE) start = datetime_utc(2010, 1, 2) finish = datetime_utc(2018, 3, 4) achievements = self._str counter = self._id exception = self._str stamp.update(start, finish, achievements, counter, exception) assert start == stamp.start assert finish == stamp.finish assert achievements == stamp.achievements assert counter == stamp.counter assert exception == stamp.exception # .exception is the only field update() will set to a value of # None. For all other fields, None means "don't update the existing # value". stamp.update() assert start == stamp.start assert finish == stamp.finish assert achievements == stamp.achievements assert counter == stamp.counter assert None == stamp.exception
def test_patron_activity(self): data = self.get_data("patron_response.json") self.api.queue_response(200, content=data) patron = self._patron() patron.authorization_identifier = "123" [loan] = self.api.patron_activity(patron, "pin") # An appropriate Enki API call was issued. [method, url, headers, data, params, kwargs] = self.api.requests.pop() assert "get" == method assert self.api.base_url + "UserAPI" == url assert "getSEPatronData" == params["method"] assert "123" == params["username"] assert "pin" == params["password"] # In particular, the Enki library ID associated with the # patron's library was used as the 'lib' parameter. assert "c" == params["lib"] # The result is a single LoanInfo. assert isinstance(loan, LoanInfo) assert Identifier.ENKI_ID == loan.identifier_type assert DataSource.ENKI == loan.data_source_name assert "231" == loan.identifier assert self.collection == loan.collection(self._db) assert datetime_utc(2017, 8, 15, 14, 56, 51) == loan.start_date assert datetime_utc(2017, 9, 5, 14, 56, 51) == loan.end_date
def test_checkout_acs_parser(self): """Test that checkout info for ACS Enki books is parsed correctly.""" data = self.get_data("checked_out_acs.json") result = json.loads(data) loan = self.api.parse_patron_loans( result["result"]["checkedOutItems"][0]) assert loan.data_source_name == DataSource.ENKI assert loan.identifier_type == Identifier.ENKI_ID assert loan.identifier == "3334" assert loan.start_date == datetime_utc(2017, 8, 23, 19, 42, 35, 0) assert loan.end_date == datetime_utc(2017, 9, 13, 19, 42, 35, 0)
def test_update(self): # Create two books that are part of series, and one book that # is not. series1 = self._edition() series1.series = "Series 1" series2 = self._edition() series2.series = "Series Two" no_series = self._edition() assert None == no_series.series update_time = datetime_utc(2015, 1, 1) # To create necessary mocked objects, # _customlist calls _work # which calls _edition, which makes an edition and a pool (through _licensepool) # then makes work through get_one_or_create custom_list, ignore = self._customlist() manager = BooksInSeries(custom_list) manager.update(update_time) [entry1] = [x for x in custom_list.entries if x.edition.series == "Series 1"] [entry2] = [x for x in custom_list.entries if x.edition.series == "Series Two"] assert update_time == entry1.first_appearance assert update_time == entry1.most_recent_appearance # In a shocking twist, one of the entries turns out not to # have a series, while the entry previously thought not to # have a series actually does. series2.series = None no_series.series = "Actually I do have a series." self._db.commit() new_update_time = datetime_utc(2016, 1, 1) manager.update(new_update_time) # Entry #2 has been removed from the list, and a new entry added. [old_entry] = [x for x in custom_list.entries if x.edition.series == "Series 1"] [new_entry] = [ x for x in custom_list.entries if x.edition.series == "Actually I do have a series." ] assert update_time == old_entry.first_appearance assert new_update_time == old_entry.most_recent_appearance assert new_update_time == new_entry.first_appearance assert new_update_time == new_entry.most_recent_appearance
def test_all_ids_without_date(self): # TODO: This tests that all_ids doesn't crash when you pass in # an empty date. It doesn't test anything about all_ids except the # return value. monitor = OdiloCirculationMonitor(self._db, self.collection, api_class=MockOdiloAPI) assert monitor, "Monitor null !!" assert ExternalIntegration.ODILO == monitor.protocol, "Wat??" records_metadata_data, records_metadata_json = self.sample_json( "records_metadata.json") monitor.api.queue_response(200, content=records_metadata_data) availability_data = self.sample_data("record_availability.json") for record in records_metadata_json: monitor.api.queue_response(200, content=availability_data) monitor.api.queue_response(200, content="[]") # No more resources retrieved updated, new = monitor.all_ids(datetime_utc(2017, 9, 1)) assert 10 == updated assert 10 == new self.api.log.info( "Odilo circulation monitor without date finished ok!!")
def test_fulfill_success(self): # Test the fulfill() method. patron = self._patron() patron.authorization_identifier = "123" pool = self._licensepool(None) data = self.get_data("checked_out_acs.json") self.api.queue_response(200, content=data) fulfillment = self.api.fulfill(patron, "pin", pool, "internal format") # An appropriate request to the "getSELink" endpoint was made., [method, url, headers, data, params, kwargs] = self.api.requests.pop() assert "get" == method assert self.api.base_url + "UserAPI" == url assert "getSELink" == params["method"] assert "123" == params["username"] assert "pin" == params["password"] # In particular, the Enki library ID associated with the # patron's library was used as the 'lib' parameter. assert "c" == params["lib"] # A FulfillmentInfo for the loan was returned. assert isinstance(fulfillment, FulfillmentInfo) assert fulfillment.identifier == pool.identifier.identifier assert fulfillment.collection_id == pool.collection.id assert DeliveryMechanism.ADOBE_DRM == fulfillment.content_type assert fulfillment.content_link.startswith( "http://afs.enkilibrary.org/fulfillment/URLLink.acsm") assert fulfillment.content_expires == datetime_utc( 2017, 9, 13, 19, 42, 35, 0)
def test_checkout_success(self): # Test the checkout() method. patron = self._patron() patron.authorization_identifier = "123" pool = self._licensepool(None) data = self.get_data("checked_out_acs.json") self.api.queue_response(200, content=data) loan = self.api.checkout(patron, "pin", pool, "internal format") # An appropriate request to the "getSELink" endpoint was made., [method, url, headers, data, params, kwargs] = self.api.requests.pop() assert "get" == method assert self.api.base_url + "UserAPI" == url assert "getSELink" == params["method"] assert "123" == params["username"] assert "pin" == params["password"] # In particular, the Enki library ID associated with the # patron's library was used as the 'lib' parameter. assert "c" == params["lib"] # A LoanInfo for the loan was returned. assert isinstance(loan, LoanInfo) assert loan.identifier == pool.identifier.identifier assert loan.collection_id == pool.collection.id assert loan.start_date == None assert loan.end_date == datetime_utc(2017, 9, 13, 19, 42, 35, 0)
def test_monitor_lifecycle(self): monitor = MockMonitor(self._db, self._default_collection) monitor.default_start_time = datetime_utc(2010, 1, 1) # There is no timestamp for this monitor. def get_timestamp(): return get_one(self._db, Timestamp, service=monitor.service_name) assert None == get_timestamp() # Run the monitor. monitor.run() # The monitor ran once and then stopped. [progress] = monitor.run_records # The TimestampData passed in to run_once() had the # Monitor's default start time as its .start, and an empty # time for .finish. assert monitor.default_start_time == progress.start assert None == progress.finish # But the Monitor's underlying timestamp has been updated with # the time that the monitor actually took to run. timestamp = get_timestamp() assert timestamp.start > monitor.default_start_time assert timestamp.finish > timestamp.start self.time_eq(utc_now(), timestamp.start) # cleanup() was called once. assert [True] == monitor.cleanup_records
class Mock(TimelineMonitor): DEFAULT_START_TIME = datetime_utc(2011, 1, 1) SERVICE_NAME = "doomed" def catch_up_from(self, start, cutoff, progress): self.started_at = start progress.exception = "oops"
def test_no_import_loop(self): # We stop processing a feed's 'next' link if it links to a URL we've # already seen. data = sample_data("metadata_updates_response.opds", "opds") self.lookup.queue_response( 200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data) data = data.replace(b"http://next-link/", b"http://different-link/") self.lookup.queue_response( 200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data) # This introduces a loop. data = data.replace(b"http://next-link/", b"http://next-link/") self.lookup.queue_response( 200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data) new_timestamp = self.monitor.run_once(self.ts) # Even though all these pages had the same content, we kept # processing them until we encountered a 'next' link we had # seen before; then we stopped. first, second, third = self.monitor.imports assert (None, None) == first assert (None, "http://next-link/") == second assert (None, "http://different-link/") == third assert datetime_utc(2016, 9, 20, 19, 37, 2) == new_timestamp.finish
def test_can_insert_measurement_after_the_fact(self): old = datetime_utc(2011, 1, 1) new = datetime_utc(2012, 1, 1) wi = self._identifier() m1 = wi.add_measurement(self.source, Measurement.DOWNLOADS, 10, taken_at=new) assert True == m1.is_most_recent m2 = wi.add_measurement(self.source, Measurement.DOWNLOADS, 5, taken_at=old) assert True == m1.is_most_recent
def test_repr(self): patron = self._patron(external_identifier="a patron") patron.authorization_expires = datetime_utc(2018, 1, 2, 3, 4, 5) patron.last_external_sync = None assert ( "<Patron authentication_identifier=None expires=2018-01-02 sync=None>" == repr(patron))
class MockMilleniumPatronAPI(MilleniumPatronAPI): """This mocks the API on a higher level than the HTTP level. It is not used in the tests of the MilleniumPatronAPI class. It is used in the Adobe Vendor ID tests but maybe it shouldn't. """ # For expiration dates we're using UTC instead of local time for # convenience; the difference doesn't matter because the dates in # question are at least 10 days away from the current date. # This user's card has expired. user1 = PatronData( permanent_id="12345", authorization_identifier="0", username="******", authorization_expires=datetime_utc(2015, 4, 1), ) # This user's card still has ten days on it. the_future = utc_now() + datetime.timedelta(days=10) user2 = PatronData( permanent_id="67890", authorization_identifier="5", username="******", authorization_expires=the_future, ) users = [user1, user2] def __init__(self): pass def remote_authenticate(self, barcode, pin): """A barcode that's 14 digits long is treated as valid, no matter which PIN is used. That's so real barcode/PIN combos can be passed through to third parties. Otherwise, valid test PIN is the first character of the barcode repeated four times. """ u = self.dump(barcode) if "ERRNUM" in u: return False return len(barcode) == 14 or pin == barcode[0] * 4 def remote_patron_lookup(self, patron_or_patrondata): # We have a couple custom barcodes. look_for = patron_or_patrondata.authorization_identifier for u in self.users: if u.authorization_identifier == look_for: return u return None
def test_run_once_returning_timestampdata(self): # If a Monitor's run_once implementation returns a TimestampData, # that's the data used to set the Monitor's Timestamp, even if # the data doesn't make sense by the standards used by the main # Monitor class. start = datetime_utc(2011, 1, 1) finish = datetime_utc(2012, 1, 1) class Mock(MockMonitor): def run_once(self, progress): return TimestampData(start=start, finish=finish, counter=-100) monitor = Mock(self._db, self._default_collection) monitor.run() timestamp = monitor.timestamp() assert start == timestamp.start assert finish == timestamp.finish assert -100 == timestamp.counter
def test_run_once(self): dummy_value = object() class Mock(EnkiImport): incremental_import_called_with = dummy_value def full_import(self): self.full_import_called = True return 10 def incremental_import(self, since): self.incremental_import_called_with = since return 4, 7 importer = Mock(self._db, self.collection, api_class=self.api) # If the incoming TimestampData makes it look like the process # has never successfully completed, full_import() is called. progress = TimestampData(start=None) importer.run_once(progress) assert True == importer.full_import_called assert ( "New or modified titles: 10. Titles with circulation changes: 0." == progress.achievements) # It doesn't call incremental_import(). assert dummy_value == importer.incremental_import_called_with # If run_once() is called with a TimestampData that indicates # an earlier successful run, a time five minutes before the # previous completion time is passed into incremental_import() importer.full_import_called = False a_while_ago = datetime_utc(2011, 1, 1) even_earlier = a_while_ago - datetime.timedelta(days=100) timestamp = TimestampData(start=even_earlier, finish=a_while_ago) new_timestamp = importer.run_once(timestamp) passed_in = importer.incremental_import_called_with expect = a_while_ago - importer.OVERLAP assert abs((passed_in - expect).total_seconds()) < 2 # full_import was not called. assert False == importer.full_import_called # The proposed new TimestampData covers the entire timespan # from the 'expect' period to now. assert expect == new_timestamp.start now = utc_now() assert (now - new_timestamp.finish).total_seconds() < 2 assert ( "New or modified titles: 4. Titles with circulation changes: 7." == new_timestamp.achievements)
def test_run_once(self): # Setup authentication and Metadata Wrangler details. lp = self._licensepool(None, data_source_name=DataSource.BIBLIOTHECA, collection=self.collection) lp.identifier.type = Identifier.BIBLIOTHECA_ID isbn = Identifier.parse_urn(self._db, "urn:isbn:9781594632556")[0] lp.identifier.equivalent_to( DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1) assert [] == lp.identifier.links assert [] == lp.identifier.measurements # Queue some data to be found. responses = ( "metadata_updates_response.opds", "metadata_updates_empty_response.opds", ) for filename in responses: data = sample_data(filename, "opds") self.lookup.queue_response( 200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data) timestamp = self.ts new_timestamp = self.monitor.run_once(timestamp) # We have a new value to use for the Monitor's timestamp -- the # earliest date seen in the last OPDS feed that contained # any entries. assert datetime_utc(2016, 9, 20, 19, 37, 2) == new_timestamp.finish assert "Editions processed: 1" == new_timestamp.achievements # Normally run_once() doesn't update the monitor's timestamp, # but this implementation does, so that work isn't redone if # run_once() crashes or the monitor is killed. assert new_timestamp.finish == self.monitor.timestamp().finish # The original Identifier has information from the # mock Metadata Wrangler. mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) assert 3 == len(lp.identifier.links) [quality] = lp.identifier.measurements assert mw_source == quality.data_source # Check the URLs we processed. url1, url2 = [x[0] for x in self.lookup.requests] # The first URL processed was the default one for the # MetadataWranglerOPDSLookup. assert self.lookup.get_collection_url( self.lookup.UPDATES_ENDPOINT) == url1 # The second URL processed was whatever we saw in the 'next' link. assert "http://next-link/" == url2
def test_add_control_fields(self): # This edition has one format and was published before 1900. edition, pool = self._edition(with_license_pool=True) identifier = pool.identifier edition.issued = datetime_utc(956, 1, 1) now = utc_now() record = Record() Annotator.add_control_fields(record, identifier, pool, edition) self._check_control_field(record, "001", identifier.urn) assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value() self._check_control_field(record, "006", "m d ") self._check_control_field(record, "007", "cr cn ---anuuu") self._check_control_field( record, "008", now.strftime("%y%m%d") + "s0956 xxu eng ") # This French edition has two formats and was published in 2018. edition2, pool2 = self._edition(with_license_pool=True) identifier2 = pool2.identifier edition2.issued = datetime_utc(2018, 2, 3) edition2.language = "fre" LicensePoolDeliveryMechanism.set( pool2.data_source, identifier2, Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT, ) record = Record() Annotator.add_control_fields(record, identifier2, pool2, edition2) self._check_control_field(record, "001", identifier2.urn) assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value() self._check_control_field(record, "006", "m d ") self._check_control_field(record, "007", "cr cn ---mnuuu") self._check_control_field( record, "008", now.strftime("%y%m%d") + "s2018 xxu fre ")
def test_calculate_until(self): start = datetime_utc(2010, 1, 1) # The cycle time is one week. default_loan = datetime.timedelta(days=6) default_reservation = datetime.timedelta(days=1) # I'm 20th in line for 4 books. # # After 7 days, four copies are released and I am 16th in line. # After 14 days, those copies are released and I am 12th in line. # After 21 days, those copies are released and I am 8th in line. # After 28 days, those copies are released and I am 4th in line. # After 35 days, those copies are released and get my notification. a = Hold._calculate_until(start, 20, 4, default_loan, default_reservation) assert a == start + datetime.timedelta(days=(7 * 5)) # If I am 21st in line, I need to wait six weeks. b = Hold._calculate_until(start, 21, 4, default_loan, default_reservation) assert b == start + datetime.timedelta(days=(7 * 6)) # If I am 3rd in line, I only need to wait seven days--that's when # I'll get the notification message. b = Hold._calculate_until(start, 3, 4, default_loan, default_reservation) assert b == start + datetime.timedelta(days=7) # A new person gets the book every week. Someone has the book now # and there are 3 people ahead of me in the queue. I will get # the book in 7 days + 3 weeks c = Hold._calculate_until(start, 3, 1, default_loan, default_reservation) assert c == start + datetime.timedelta(days=(7 * 4)) # I'm first in line for 1 book. After 7 days, one copy is # released and I'll get my notification. a = Hold._calculate_until(start, 1, 1, default_loan, default_reservation) assert a == start + datetime.timedelta(days=7) # The book is reserved to me. I need to hurry up and check it out. d = Hold._calculate_until(start, 0, 1, default_loan, default_reservation) assert d == start + datetime.timedelta(days=1) # If there are no licenses, I will never get the book. e = Hold._calculate_until(start, 10, 0, default_loan, default_reservation) assert e == None
def test_empty_feed_stops_import(self): # We don't follow the 'next' link of an empty feed. data = sample_data("metadata_updates_empty_response.opds", "opds") self.lookup.queue_response( 200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data) new_timestamp = self.monitor.run() # We could have followed the 'next' link, but we chose not to. assert [(None, None)] == self.monitor.imports assert 1 == len(self.lookup.requests) # Since there were no <entry> tags, the timestamp's finish # date was set to the <updated> date of the feed itself, minus # one day (to avoid race conditions). assert datetime_utc(2016, 9, 19, 19, 37, 10) == self.monitor.timestamp().finish
def test_datetime_utc(self, time, formatted, isoformat): """`datetime_utc` is a wrapper around `datetime.datetime` but it also includes UTC information when it is created. """ time_format = "%Y-%m-%dT%H:%M:%S" dt = datetime.datetime(*time, tzinfo=pytz.UTC) util_dt = datetime_utc(*time) # The util function is the same as the datetime function with # pytz UTC information. assert dt == util_dt # A datetime object is returned and works like any datetime object. assert util_dt.tzinfo == pytz.UTC assert util_dt.strftime(time_format) == formatted assert util_dt.isoformat() == isoformat assert util_dt.year == time[0] assert util_dt.month == time[1] assert util_dt.day == time[2]
def test_import_one_feed(self): data = sample_data("metadata_updates_response.opds", "opds") self.lookup.queue_response( 200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data) next_links, editions, timestamp = self.monitor.import_one_feed( None, None) # The 'next' links found in the OPDS feed are returned. assert ["http://next-link/"] == next_links # Insofar as is possible, all <entry> tags are converted into # Editions. assert ["9781594632556" ] == [x.primary_identifier.identifier for x in editions] # The earliest time found in the OPDS feed is returned as a # candidate for the Monitor's timestamp. assert datetime_utc(2016, 9, 20, 19, 37, 2) == timestamp
def test_recent_activity(self): now = utc_now() epoch = datetime_utc(1970, 1, 1) epoch_plus_one_hour = epoch + datetime.timedelta(hours=1) data = self.get_data("get_recent_activity.json") self.api.queue_response(200, content=data) activity = list(self.api.recent_activity(epoch, epoch_plus_one_hour)) assert 43 == len(activity) for i in activity: assert isinstance(i, CirculationData) [method, url, headers, data, params, kwargs] = self.api.requests.pop() assert "get" == method assert "https://enkilibrary.org/API/ItemAPI" == url assert "getRecentActivityTime" == params["method"] assert "0" == params["stime"] assert "3600" == params["etime"] # Unlike some API calls, it's not necessary to pass 'lib' in here. assert "lib" not in params
def test_parser(self): """Parse an ONIX file into Metadata objects.""" file = self.sample_data("onix_example.xml") metadata_records = ONIXExtractor().parse(BytesIO(file), "MIT Press") assert 2 == len(metadata_records) record = metadata_records[0] assert "Safe Spaces, Brave Spaces" == record.title assert "Diversity and Free Expression in Education" == record.subtitle assert "Palfrey, John" == record.contributors[0].sort_name assert "John Palfrey" == record.contributors[0].display_name assert "Palfrey" == record.contributors[0].family_name assert "Head of School at Phillips Academy" in record.contributors[ 0].biography assert "The MIT Press" == record.publisher assert None == record.imprint assert "9780262343664" == record.primary_identifier.identifier assert Identifier.ISBN == record.primary_identifier.type assert "eng" == record.language assert datetime_utc(2017, 10, 6) == record.issued subjects = record.subjects assert 7 == len(subjects) assert "EDU015000" == subjects[0].identifier assert Classifier.AUDIENCE_ADULT == subjects[-1].identifier assert Classifier.BISAC == subjects[0].type assert Classification.TRUSTED_DISTRIBUTOR_WEIGHT == subjects[0].weight assert Edition.BOOK_MEDIUM == record.medium assert 2017 == record.issued.year assert 1 == len(record.links) assert ( "the essential democratic values of diversity and free expression" in record.links[0].content) record = metadata_records[1] assert Edition.AUDIO_MEDIUM == record.medium assert "The Test Corporation" == record.contributors[0].display_name assert "Test Corporation, The" == record.contributors[0].sort_name
def test_add_publisher(self): edition = self._edition() edition.publisher = self._str edition.issued = datetime_utc(1894, 4, 5) record = Record() Annotator.add_publisher(record, edition) self._check_field( record, "264", { "a": "[Place of publication not identified]", "b": edition.publisher, "c": "1894", }, [" ", "1"], ) # If there's no publisher, the field is left out. record = Record() edition.publisher = None Annotator.add_publisher(record, edition) assert [] == record.get_fields("264")
def test_profile_document(self): # synchronize_annotations always shows up as settable, even if # the current value is None. self.patron.authorization_identifier = "abcd" assert None == self.patron.synchronize_annotations rep = self.store.profile_document assert { "simplified:authorization_identifier": "abcd", "settings": { "simplified:synchronize_annotations": None }, } == rep self.patron.synchronize_annotations = True self.patron.authorization_expires = datetime_utc( 2016, 1, 1, 10, 20, 30) rep = self.store.profile_document assert { "simplified:authorization_expires": "2016-01-01T10:20:30Z", "simplified:authorization_identifier": "abcd", "settings": { "simplified:synchronize_annotations": True }, } == rep
def test_strptime_utc(self, expect, date_string, format): assert strptime_utc(date_string, format) == datetime_utc(*expect)
property_value = property_value.isoformat() + "Z" if isinstance(rwpm_item, list): result.append(property_value) else: result[property_object.key] = property_value elif isinstance(rwpm_item, RegistryItem): result = rwpm_item.key return result PROQUEST_PUBLICATION_1 = OPDS2Publication( metadata=PresentationMetadata( identifier="urn:proquest.com/document-id/1", title="Publićation # 1", modified=datetime_utc(2020, 1, 31, 0, 0, 0), ), links=LinkList([ Link( href="https://feed.org/document-id/1", rels=[OPDS2LinkRelationsRegistry.ACQUISITION], ) ]), ) PROQUEST_PUBLICATION_2 = OPDS2Publication( metadata=PresentationMetadata( identifier="urn:proquest.com/document-id/2", title="Publication # 2", modified=datetime_utc(2020, 1, 30, 0, 0, 0), ),
class TestSAMLFederatedMetadataExpirationValidator(object): @parameterized.expand([ ( "incorrect_xml", utc_now(), fixtures.INCORRECT_XML, SAMLFederatedMetadataValidationError, ), ( "without_valid_until_attribute", utc_now(), fixtures.FEDERATED_METADATA_WITHOUT_VALID_UNTIL_ATTRIBUTE, SAMLFederatedMetadataValidationError, ), ( "with_expired_valid_until_attribute", fixtures.FEDERATED_METADATA_VALID_UNTIL + SAMLFederatedMetadataExpirationValidator.MAX_CLOCK_SKEW + datetime.timedelta(minutes=1), fixtures.FEDERATED_METADATA_WITH_VALID_UNTIL_ATTRIBUTE, SAMLFederatedMetadataValidationError, ), ( "with_valid_until_attribute_too_far_in_the_future", fixtures.FEDERATED_METADATA_VALID_UNTIL - SAMLFederatedMetadataExpirationValidator.MAX_VALID_TIME - datetime.timedelta(minutes=1), fixtures.FEDERATED_METADATA_WITH_VALID_UNTIL_ATTRIBUTE, SAMLFederatedMetadataValidationError, ), ( "with_valid_until_attribute_less_than_current_time_and_less_than_max_clock_skew", fixtures.FEDERATED_METADATA_VALID_UNTIL + SAMLFederatedMetadataExpirationValidator.MAX_CLOCK_SKEW, fixtures.FEDERATED_METADATA_WITH_VALID_UNTIL_ATTRIBUTE, None, ), ( "with_valid_until_attribute_greater_than_current_time_and_less_than_max_valid_time", fixtures.FEDERATED_METADATA_VALID_UNTIL - SAMLFederatedMetadataExpirationValidator.MAX_VALID_TIME + datetime.timedelta(minutes=1), fixtures.FEDERATED_METADATA_WITH_VALID_UNTIL_ATTRIBUTE, None, ), ( "with_real_incommon_metadata", datetime_utc(2020, 11, 26, 14, 32, 42), open( os.path.join( os.path.dirname(os.path.abspath(__file__)), "../../../files/saml/incommon-metadata-idp-only.xml", )).read(), None, ), ]) def test_validate(self, _, current_time, metadata, expected_exception): # Arrange validator = SAMLFederatedMetadataExpirationValidator() federation = SAMLFederation(incommon.FEDERATION_TYPE, incommon.IDP_METADATA_SERVICE_URL) # Act, assert with freeze_time(current_time): if expected_exception: with pytest.raises(expected_exception): validator.validate(federation, metadata) else: validator.validate(federation, metadata)
class TestS3Uploader(S3UploaderTest): def test_names(self): # The NAME associated with this class must be the same as its # key in the MirrorUploader implementation registry, and it's # better if it's the same as the name of the external # integration. assert S3Uploader.NAME == ExternalIntegration.S3 assert (S3Uploader == MirrorUploader.IMPLEMENTATION_REGISTRY[ ExternalIntegration.S3]) def test_instantiation(self): integration = self._external_integration( ExternalIntegration.S3, goal=ExternalIntegration.STORAGE_GOAL) integration.username = "******" integration.password = "******" integration.setting( S3UploaderConfiguration.URL_TEMPLATE_KEY).value = "a transform" uploader = MirrorUploader.implementation(integration) assert True == isinstance(uploader, S3Uploader) # The URL_TEMPLATE_KEY setting becomes the .url_transform # attribute on the S3Uploader object. assert "a transform" == uploader.url_transform @parameterized.expand([ ("empty_credentials", None, None), ("empty_string_credentials", "", ""), ("non_empty_string_credentials", "username", "password"), ]) def test_initialization(self, name, username, password): # Arrange settings = {"username": username, "password": password} integration = self._external_integration( ExternalIntegration.S3, goal=ExternalIntegration.STORAGE_GOAL, settings=settings, ) client_class = MagicMock() # Act S3Uploader(integration, client_class=client_class) # Assert assert client_class.call_count == 2 service_name = client_class.call_args_list[0].args[0] region_name = client_class.call_args_list[0].kwargs["region_name"] aws_access_key_id = client_class.call_args_list[0].kwargs[ "aws_access_key_id"] aws_secret_access_key = client_class.call_args_list[0].kwargs[ "aws_secret_access_key"] config = client_class.call_args_list[0].kwargs["config"] assert service_name == "s3" assert region_name == S3UploaderConfiguration.S3_DEFAULT_REGION assert aws_access_key_id == None assert aws_secret_access_key == None assert config.signature_version == botocore.UNSIGNED assert (config.s3["addressing_style"] == S3UploaderConfiguration.S3_DEFAULT_ADDRESSING_STYLE) service_name = client_class.call_args_list[1].args[0] region_name = client_class.call_args_list[1].kwargs["region_name"] aws_access_key_id = client_class.call_args_list[1].kwargs[ "aws_access_key_id"] aws_secret_access_key = client_class.call_args_list[1].kwargs[ "aws_secret_access_key"] assert service_name == "s3" assert region_name == S3UploaderConfiguration.S3_DEFAULT_REGION assert aws_access_key_id == (username if username != "" else None) assert aws_secret_access_key == (password if password != "" else None) assert "config" not in client_class.call_args_list[1].kwargs def test_custom_client_class(self): """You can specify a client class to use instead of boto3.client.""" integration = self._integration() uploader = S3Uploader(integration, MockS3Client) assert isinstance(uploader.client, MockS3Client) def test_get_bucket(self): buckets = { S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "banana", S3UploaderConfiguration.BOOK_COVERS_BUCKET_KEY: "bucket", } buckets_plus_irrelevant_setting = dict(buckets) buckets_plus_irrelevant_setting["not-a-bucket-at-all"] = "value" uploader = self._create_s3_uploader(**buckets_plus_irrelevant_setting) # This S3Uploader knows about the configured buckets. It # wasn't informed of the irrelevant 'not-a-bucket-at-all' # setting. assert buckets == uploader.buckets # get_bucket just does a lookup in .buckets uploader.buckets["foo"] = object() result = uploader.get_bucket("foo") assert uploader.buckets["foo"] == result @parameterized.expand([ ( "s3_url_with_path_without_slash", "a-bucket", "a-path", "https://a-bucket.s3.amazonaws.com/a-path", None, ), ( "s3_dummy_url_with_path_without_slash", "dummy", "dummy", "https://dummy.s3.amazonaws.com/dummy", None, ), ( "s3_path_style_url_with_path_without_slash", "a-bucket", "a-path", "https://s3.amazonaws.com/a-bucket/a-path", None, S3AddressingStyle.PATH.value, ), ( "s3_path_style_dummy_url_with_path_without_slash", "dummy", "dummy", "https://s3.amazonaws.com/dummy/dummy", None, S3AddressingStyle.PATH.value, ), ( "s3_url_with_path_with_slash", "a-bucket", "/a-path", "https://a-bucket.s3.amazonaws.com/a-path", None, ), ( "s3_path_style_url_with_path_with_slash", "a-bucket", "/a-path", "https://s3.amazonaws.com/a-bucket/a-path", None, S3AddressingStyle.PATH.value, ), ( "s3_url_with_custom_region_and_path_without_slash", "a-bucket", "a-path", "https://a-bucket.s3.us-east-2.amazonaws.com/a-path", "us-east-2", ), ( "s3_path_style_url_with_custom_region_and_path_without_slash", "a-bucket", "a-path", "https://s3.us-east-2.amazonaws.com/a-bucket/a-path", "us-east-2", S3AddressingStyle.PATH.value, ), ( "s3_url_with_custom_region_and_path_with_slash", "a-bucket", "/a-path", "https://a-bucket.s3.us-east-3.amazonaws.com/a-path", "us-east-3", ), ( "s3_path_style_url_with_custom_region_and_path_with_slash", "a-bucket", "/a-path", "https://s3.us-east-3.amazonaws.com/a-bucket/a-path", "us-east-3", S3AddressingStyle.PATH.value, ), ( "custom_http_url_and_path_without_slash", "http://a-bucket.com/", "a-path", "http://a-bucket.com/a-path", None, ), ( "custom_http_url_and_path_with_slash", "http://a-bucket.com/", "/a-path", "http://a-bucket.com/a-path", None, ), ( "custom_http_url_and_path_without_slash", "https://a-bucket.com/", "a-path", "https://a-bucket.com/a-path", None, ), ( "custom_http_url_and_path_with_slash", "https://a-bucket.com/", "/a-path", "https://a-bucket.com/a-path", None, ), ]) def test_url(self, name, bucket, path, expected_result, region=None, addressing_style=None): # Arrange uploader = self._create_s3_uploader(region=region, addressing_style=addressing_style) # Act result = uploader.url(bucket, path) # Assert assert result == expected_result @parameterized.expand([ ( "implicit_s3_url_template", "bucket", "the key", "https://bucket.s3.amazonaws.com/the%20key", ), ( "implicit_s3_url_template_with_custom_region", "bucket", "the key", "https://bucket.s3.us-east-2.amazonaws.com/the%20key", None, "us-east-2", ), ( "explicit_s3_url_template", "bucket", "the key", "https://bucket.s3.amazonaws.com/the%20key", S3UploaderConfiguration.URL_TEMPLATE_DEFAULT, ), ( "explicit_s3_url_template_with_custom_region", "bucket", "the key", "https://bucket.s3.us-east-2.amazonaws.com/the%20key", S3UploaderConfiguration.URL_TEMPLATE_DEFAULT, "us-east-2", ), ( "http_url_template", "bucket", "the këy", "http://bucket/the%20k%C3%ABy", S3UploaderConfiguration.URL_TEMPLATE_HTTP, ), ( "https_url_template", "bucket", "the këy", "https://bucket/the%20k%C3%ABy", S3UploaderConfiguration.URL_TEMPLATE_HTTPS, ), ]) def test_final_mirror_url(self, name, bucket, key, expected_result, url_transform=None, region=None): # Arrange uploader = self._create_s3_uploader(region=region) if url_transform: uploader.url_transform = url_transform # Act result = uploader.final_mirror_url(bucket, key) # Assert if not url_transform: assert (S3UploaderConfiguration.URL_TEMPLATE_DEFAULT == uploader.url_transform) assert result == expected_result def test_key_join(self): """Test the code used to build S3 keys from parts.""" parts = ["Gutenberg", b"Gutenberg ID", 1234, "Die Flügelmaus+.epub"] assert ("Gutenberg/Gutenberg%20ID/1234/Die%20Fl%C3%BCgelmaus%2B.epub" == S3Uploader.key_join(parts)) @parameterized.expand([ ( "with_gutenberg_cover_generator_data_source", "test-book-covers-s3-bucket", DataSource.GUTENBERG_COVER_GENERATOR, "https://test-book-covers-s3-bucket.s3.amazonaws.com/Gutenberg%20Illustrated/", ), ( "with_overdrive_data_source", "test-book-covers-s3-bucket", DataSource.OVERDRIVE, "https://test-book-covers-s3-bucket.s3.amazonaws.com/Overdrive/", ), ( "with_overdrive_data_source_and_scaled_size", "test-book-covers-s3-bucket", DataSource.OVERDRIVE, "https://test-book-covers-s3-bucket.s3.amazonaws.com/scaled/300/Overdrive/", 300, ), ( "with_gutenberg_cover_generator_data_source_and_custom_region", "test-book-covers-s3-bucket", DataSource.GUTENBERG_COVER_GENERATOR, "https://test-book-covers-s3-bucket.s3.us-east-3.amazonaws.com/Gutenberg%20Illustrated/", None, "us-east-3", ), ( "with_overdrive_data_source_and_custom_region", "test-book-covers-s3-bucket", DataSource.OVERDRIVE, "https://test-book-covers-s3-bucket.s3.us-east-3.amazonaws.com/Overdrive/", None, "us-east-3", ), ( "with_overdrive_data_source_and_scaled_size_and_custom_region", "test-book-covers-s3-bucket", DataSource.OVERDRIVE, "https://test-book-covers-s3-bucket.s3.us-east-3.amazonaws.com/scaled/300/Overdrive/", 300, "us-east-3", ), ]) def test_cover_image_root( self, name, bucket, data_source_name, expected_result, scaled_size=None, region=None, ): # Arrange uploader = self._create_s3_uploader(region=region) data_source = DataSource.lookup(self._db, data_source_name) # Act result = uploader.cover_image_root(bucket, data_source, scaled_size=scaled_size) # Assert assert result == expected_result @parameterized.expand([ ( "with_default_region", "test-open-access-s3-bucket", "https://test-open-access-s3-bucket.s3.amazonaws.com/", ), ( "with_custom_region", "test-open-access-s3-bucket", "https://test-open-access-s3-bucket.s3.us-east-3.amazonaws.com/", "us-east-3", ), ]) def test_content_root(self, name, bucket, expected_result, region=None): # Arrange uploader = self._create_s3_uploader(region=region) # Act result = uploader.content_root(bucket) # Assert assert result == expected_result @parameterized.expand([ ( "s3_url", "test-marc-s3-bucket", "SHORT", "https://test-marc-s3-bucket.s3.amazonaws.com/SHORT/", ), ( "s3_url_with_custom_region", "test-marc-s3-bucket", "SHORT", "https://test-marc-s3-bucket.s3.us-east-2.amazonaws.com/SHORT/", "us-east-2", ), ("custom_http_url", "http://my-feed/", "SHORT", "http://my-feed/SHORT/"), ("custom_https_url", "https://my-feed/", "SHORT", "https://my-feed/SHORT/"), ]) def test_marc_file_root(self, name, bucket, library_name, expected_result, region=None): # Arrange uploader = self._create_s3_uploader(region=region) library = self._library(short_name=library_name) # Act result = uploader.marc_file_root(bucket, library) # Assert assert result == expected_result @parameterized.expand([ ( "with_identifier", { S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks" }, "ABOOK", "https://thebooks.s3.amazonaws.com/Gutenberg%20ID/ABOOK.epub", ), ( "with_custom_extension", { S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks" }, "ABOOK", "https://thebooks.s3.amazonaws.com/Gutenberg%20ID/ABOOK.pdf", "pdf", ), ( "with_custom_dotted_extension", { S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks" }, "ABOOK", "https://thebooks.s3.amazonaws.com/Gutenberg%20ID/ABOOK.pdf", ".pdf", ), ( "with_custom_data_source", { S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks" }, "ABOOK", "https://thebooks.s3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK.epub", None, DataSource.UNGLUE_IT, ), ( "with_custom_title", { S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks" }, "ABOOK", "https://thebooks.s3.amazonaws.com/Gutenberg%20ID/ABOOK/On%20Books.epub", None, None, "On Books", ), ( "with_custom_extension_and_title_and_data_source", { S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks" }, "ABOOK", "https://thebooks.s3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK/On%20Books.pdf", ".pdf", DataSource.UNGLUE_IT, "On Books", ), ( "with_custom_extension_and_title_and_data_source_and_region", { S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks" }, "ABOOK", "https://thebooks.s3.us-east-3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK/On%20Books.pdf", ".pdf", DataSource.UNGLUE_IT, "On Books", "us-east-3", ), ( "with_protected_access_and_custom_extension_and_title_and_data_source_and_region", { S3UploaderConfiguration.PROTECTED_CONTENT_BUCKET_KEY: "thebooks" }, "ABOOK", "https://thebooks.s3.us-east-3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK/On%20Books.pdf", ".pdf", DataSource.UNGLUE_IT, "On Books", "us-east-3", False, ), ]) def test_book_url( self, name, buckets, identifier, expected_result, extension=None, data_source_name=None, title=None, region=None, open_access=True, ): # Arrange identifier = self._identifier(foreign_id=identifier) uploader = self._create_s3_uploader(region=region, **buckets) parameters = {"identifier": identifier, "open_access": open_access} if extension: parameters["extension"] = extension if title: parameters["title"] = title if data_source_name: data_source = DataSource.lookup(self._db, DataSource.UNGLUE_IT) parameters["data_source"] = data_source # Act result = uploader.book_url(**parameters) # Assert assert result == expected_result @parameterized.expand([ ( "without_scaled_size", { S3UploaderConfiguration.BOOK_COVERS_BUCKET_KEY: "thecovers" }, DataSource.UNGLUE_IT, "ABOOK", "filename", "https://thecovers.s3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK/filename", ), ( "without_scaled_size_and_with_custom_region", { S3UploaderConfiguration.BOOK_COVERS_BUCKET_KEY: "thecovers" }, DataSource.UNGLUE_IT, "ABOOK", "filename", "https://thecovers.s3.us-east-3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK/filename", None, "us-east-3", ), ( "with_scaled_size", { S3UploaderConfiguration.BOOK_COVERS_BUCKET_KEY: "thecovers" }, DataSource.UNGLUE_IT, "ABOOK", "filename", "https://thecovers.s3.amazonaws.com/scaled/601/unglue.it/Gutenberg%20ID/ABOOK/filename", 601, ), ( "with_scaled_size_and_custom_region", { S3UploaderConfiguration.BOOK_COVERS_BUCKET_KEY: "thecovers" }, DataSource.UNGLUE_IT, "ABOOK", "filename", "https://thecovers.s3.us-east-3.amazonaws.com/scaled/601/unglue.it/Gutenberg%20ID/ABOOK/filename", 601, "us-east-3", ), ]) def test_cover_image_url( self, name, buckets, data_source_name, identifier, filename, expected_result, scaled_size=None, region=None, ): # identifier = self._identifier(foreign_id="ABOOK") # buckets = {S3Uploader.BOOK_COVERS_BUCKET_KEY : 'thecovers'} # uploader = self._uploader(**buckets) # m = uploader.cover_image_url # # unglueit = DataSource.lookup(self._db, DataSource.UNGLUE_IT) # identifier = self._identifier(foreign_id="ABOOK") # eq_('https://s3.amazonaws.com/thecovers/scaled/601/unglue.it/Gutenberg+ID/ABOOK/filename', # m(unglueit, identifier, "filename", scaled_size=601)) # Arrange data_source = DataSource.lookup(self._db, data_source_name) identifier = self._identifier(foreign_id=identifier) uploader = self._create_s3_uploader(region=region, **buckets) # Act result = uploader.cover_image_url(data_source, identifier, filename, scaled_size=scaled_size) # Assert assert result == expected_result @parameterized.expand([ ( "with_s3_bucket_and_end_time", "marc", "SHORT", "Lane", datetime_utc(2020, 1, 1, 0, 0, 0), "https://marc.s3.amazonaws.com/SHORT/2020-01-01%2000%3A00%3A00%2B00%3A00/Lane.mrc", ), ( "with_s3_bucket_and_end_time_and_start_time", "marc", "SHORT", "Lane", datetime_utc(2020, 1, 2, 0, 0, 0), "https://marc.s3.amazonaws.com/SHORT/2020-01-01%2000%3A00%3A00%2B00%3A00-2020-01-02%2000%3A00%3A00%2B00%3A00/Lane.mrc", datetime_utc(2020, 1, 1, 0, 0, 0), ), ( "with_s3_bucket_and_end_time_and_start_time_and_custom_region", "marc", "SHORT", "Lane", datetime_utc(2020, 1, 2, 0, 0, 0), "https://marc.s3.us-east-2.amazonaws.com/SHORT/2020-01-01%2000%3A00%3A00%2B00%3A00-2020-01-02%2000%3A00%3A00%2B00%3A00/Lane.mrc", datetime_utc(2020, 1, 1, 0, 0, 0), "us-east-2", ), ( "with_http_bucket_and_end_time_and_start_time", "http://marc", "SHORT", "Lane", datetime_utc(2020, 1, 2, 0, 0, 0), "http://marc/SHORT/2020-01-01%2000%3A00%3A00%2B00%3A00-2020-01-02%2000%3A00%3A00%2B00%3A00/Lane.mrc", datetime_utc(2020, 1, 1, 0, 0, 0), ), ( "with_https_bucket_and_end_time_and_start_time", "https://marc", "SHORT", "Lane", datetime_utc(2020, 1, 2, 0, 0, 0), "https://marc/SHORT/2020-01-01%2000%3A00%3A00%2B00%3A00-2020-01-02%2000%3A00%3A00%2B00%3A00/Lane.mrc", datetime_utc(2020, 1, 1, 0, 0, 0), ), ]) def test_marc_file_url( self, name, bucket, library_name, lane_name, end_time, expected_result, start_time=None, region=None, ): # Arrange library = self._library(short_name=library_name) lane = self._lane(display_name=lane_name) buckets = {S3UploaderConfiguration.MARC_BUCKET_KEY: bucket} uploader = self._create_s3_uploader(region=region, **buckets) # Act result = uploader.marc_file_url(library, lane, end_time, start_time) # Assert assert result == expected_result @parameterized.expand([ ( "s3_path_style_request_without_region", "https://s3.amazonaws.com/bucket/directory/filename.jpg", ("bucket", "directory/filename.jpg"), ), ( "s3_path_style_request_with_region", "https://s3.us-east-2.amazonaws.com/bucket/directory/filename.jpg", ("bucket", "directory/filename.jpg"), ), ( "s3_virtual_hosted_style_request_with_global_endpoint", "https://bucket.s3.amazonaws.com/directory/filename.jpg", ("bucket", "directory/filename.jpg"), ), ( "s3_virtual_hosted_style_request_with_dashed_region", "https://bucket.s3-us-east-2.amazonaws.com/directory/filename.jpg", ("bucket", "directory/filename.jpg"), ), ( "s3_virtual_hosted_style_request_with_dotted_region", "https://bucket.s3.us-east-2.amazonaws.com/directory/filename.jpg", ("bucket", "directory/filename.jpg"), ), ( "http_url", "http://book-covers.nypl.org/directory/filename.jpg", ("book-covers.nypl.org", "directory/filename.jpg"), ), ( "https_url", "https://book-covers.nypl.org/directory/filename.jpg", ("book-covers.nypl.org", "directory/filename.jpg"), ), ( "http_url_with_escaped_symbols", "http://book-covers.nypl.org/directory/filename+with+spaces%21.jpg", ("book-covers.nypl.org", "directory/filename with spaces!.jpg"), ), ( "http_url_with_escaped_symbols_but_unquote_set_to_false", "http://book-covers.nypl.org/directory/filename+with+spaces%21.jpg", ("book-covers.nypl.org", "directory/filename+with+spaces%21.jpg"), False, ), ]) def test_split_url(self, name, url, expected_result, unquote=True): # Arrange s3_uploader = self._create_s3_uploader() # Act result = s3_uploader.split_url(url, unquote) # Assert assert result == expected_result def test_mirror_one(self): edition, pool = self._edition(with_license_pool=True) original_cover_location = "http://example.com/a-cover.png" content = open(self.sample_cover_path("test-book-cover.png"), "rb").read() cover, ignore = pool.add_link( Hyperlink.IMAGE, original_cover_location, edition.data_source, Representation.PNG_MEDIA_TYPE, content=content, ) cover_rep = cover.resource.representation assert None == cover_rep.mirrored_at original_epub_location = "https://books.com/a-book.epub" epub, ignore = pool.add_link( Hyperlink.OPEN_ACCESS_DOWNLOAD, original_epub_location, edition.data_source, Representation.EPUB_MEDIA_TYPE, content="i'm an epub", ) epub_rep = epub.resource.representation assert None == epub_rep.mirrored_at s3 = self._create_s3_uploader(client_class=MockS3Client) # Mock final_mirror_url so we can verify that it's called with # the right arguments def mock_final_mirror_url(bucket, key): return "final_mirror_url was called with bucket %s, key %s" % ( bucket, key) s3.final_mirror_url = mock_final_mirror_url book_url = "http://books-go/here.epub" cover_url = "http://s3.amazonaws.com/covers-go/here.png" s3.mirror_one(cover.resource.representation, cover_url) s3.mirror_one(epub.resource.representation, book_url) [ [data1, bucket1, key1, args1, ignore1], [data2, bucket2, key2, args2, ignore2], ] = s3.client.uploads # Both representations have had .mirror_url set and been # mirrored to those URLs. assert data1.startswith(b"\x89") assert "covers-go" == bucket1 assert "here.png" == key1 assert Representation.PNG_MEDIA_TYPE == args1["ContentType"] assert (utc_now() - cover_rep.mirrored_at).seconds < 10 assert b"i'm an epub" == data2 assert "books-go" == bucket2 assert "here.epub" == key2 assert Representation.EPUB_MEDIA_TYPE == args2["ContentType"] # In both cases, mirror_url was set to the result of final_mirror_url. assert ( "final_mirror_url was called with bucket books-go, key here.epub" == epub_rep.mirror_url) assert ( "final_mirror_url was called with bucket covers-go, key here.png" == cover_rep.mirror_url) # mirrored-at was set when the representation was 'mirrored' for rep in epub_rep, cover_rep: assert (utc_now() - rep.mirrored_at).seconds < 10 def test_mirror_failure(self): edition, pool = self._edition(with_license_pool=True) original_epub_location = "https://books.com/a-book.epub" epub, ignore = pool.add_link( Hyperlink.OPEN_ACCESS_DOWNLOAD, original_epub_location, edition.data_source, Representation.EPUB_MEDIA_TYPE, content="i'm an epub", ) epub_rep = epub.resource.representation uploader = self._create_s3_uploader(MockS3Client) # A network failure is treated as a transient error. uploader.client.fail_with = BotoCoreError() uploader.mirror_one(epub_rep, self._url) assert None == epub_rep.mirrored_at assert None == epub_rep.mirror_exception # An S3 credential failure is treated as a transient error. response = dict(Error=dict( Code=401, Message="Bad credentials", )) uploader.client.fail_with = ClientError(response, "SomeOperation") uploader.mirror_one(epub_rep, self._url) assert None == epub_rep.mirrored_at assert None == epub_rep.mirror_exception # Because the file was not successfully uploaded, # final_mirror_url was never called and mirror_url is # was not set. assert None == epub_rep.mirror_url # A bug in the code is not treated as a transient error -- # the exception propagates through. uploader.client.fail_with = Exception("crash!") pytest.raises(Exception, uploader.mirror_one, epub_rep, self._url) def test_svg_mirroring(self): edition, pool = self._edition(with_license_pool=True) original = self._url # Create an SVG cover for the book. svg = """<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> <svg xmlns="http://www.w3.org/2000/svg" width="100" height="50"> <ellipse cx="50" cy="25" rx="50" ry="25" style="fill:blue;"/> </svg>""" hyperlink, ignore = pool.add_link( Hyperlink.IMAGE, original, edition.data_source, Representation.SVG_MEDIA_TYPE, content=svg, ) # 'Upload' it to S3. s3 = self._create_s3_uploader(MockS3Client) s3.mirror_one(hyperlink.resource.representation, self._url) [[data, bucket, key, args, ignore]] = s3.client.uploads assert Representation.SVG_MEDIA_TYPE == args["ContentType"] assert b"svg" in data assert b"PNG" not in data def test_multipart_upload(self): class MockMultipartS3Upload(MultipartS3Upload): completed = None aborted = None def __init__(self, uploader, representation, mirror_to): self.parts = [] MockMultipartS3Upload.completed = False MockMultipartS3Upload.aborted = False def upload_part(self, content): self.parts.append(content) def complete(self): MockMultipartS3Upload.completed = True def abort(self): MockMultipartS3Upload.aborted = True rep, ignore = create( self._db, Representation, url="http://books.mrc", media_type=Representation.MARC_MEDIA_TYPE, ) s3 = self._create_s3_uploader(MockS3Client) # Successful upload with s3.multipart_upload(rep, rep.url, upload_class=MockMultipartS3Upload) as upload: assert [] == upload.parts assert False == upload.completed assert False == upload.aborted upload.upload_part("Part 1") upload.upload_part("Part 2") assert ["Part 1", "Part 2"] == upload.parts assert True == MockMultipartS3Upload.completed assert False == MockMultipartS3Upload.aborted assert None == rep.mirror_exception class FailingMultipartS3Upload(MockMultipartS3Upload): def upload_part(self, content): raise Exception("Error!") # Failed during upload with s3.multipart_upload( rep, rep.url, upload_class=FailingMultipartS3Upload) as upload: upload.upload_part("Part 1") assert False == MockMultipartS3Upload.completed assert True == MockMultipartS3Upload.aborted assert "Error!" == rep.mirror_exception class AnotherFailingMultipartS3Upload(MockMultipartS3Upload): def complete(self): raise Exception("Error!") rep.mirror_exception = None # Failed during completion with s3.multipart_upload( rep, rep.url, upload_class=AnotherFailingMultipartS3Upload) as upload: upload.upload_part("Part 1") assert False == MockMultipartS3Upload.completed assert True == MockMultipartS3Upload.aborted assert "Error!" == rep.mirror_exception @parameterized.expand([ ( "default_expiration_parameter", None, int(S3UploaderConfiguration.S3_DEFAULT_PRESIGNED_URL_EXPIRATION), ), ( "empty_expiration_parameter", { S3UploaderConfiguration.S3_PRESIGNED_URL_EXPIRATION: 100 }, 100, ), ]) def test_sign_url(self, name, expiration_settings, expected_expiration): # Arrange region = "us-east-1" bucket = "bucket" filename = "filename" url = "https://{0}.s3.{1}.amazonaws.com/{2}".format( bucket, region, filename) expected_url = url + "?AWSAccessKeyId=KEY&Expires=1&Signature=S" settings = expiration_settings if expiration_settings else {} s3_uploader = self._create_s3_uploader(region=region, **settings) s3_uploader.split_url = MagicMock(return_value=(bucket, filename)) s3_uploader.client.generate_presigned_url = MagicMock( return_value=expected_url) # Act result = s3_uploader.sign_url(url) # Assert assert result == expected_url s3_uploader.split_url.assert_called_once_with(url) s3_uploader.client.generate_presigned_url.assert_called_once_with( "get_object", ExpiresIn=expected_expiration, Params={ "Bucket": bucket, "Key": filename }, )
def test_log(self): # Basic test of CirculationEvent.log. pool = self._licensepool(edition=None) library = self._default_library event_name = CirculationEvent.DISTRIBUTOR_CHECKOUT old_value = 10 new_value = 8 start = datetime_utc(2019, 1, 1) end = datetime_utc(2019, 1, 2) location = "Westgate Branch" m = CirculationEvent.log event, is_new = m( self._db, license_pool=pool, event_name=event_name, library=library, old_value=old_value, new_value=new_value, start=start, end=end, location=location, ) assert True == is_new assert pool == event.license_pool assert library == event.library assert -2 == event.delta # calculated from old_value and new_value assert start == event.start assert end == event.end assert location == event.location # If log finds another event with the same license pool, # library, event name, and start date, that event is returned # unchanged. event, is_new = m( self._db, license_pool=pool, event_name=event_name, library=library, start=start, # These values will be ignored. old_value=500, new_value=200, end=utc_now(), location="another location", ) assert False == is_new assert pool == event.license_pool assert library == event.library assert -2 == event.delta assert start == event.start assert end == event.end assert location == event.location # If no timestamp is provided, the current time is used. This # is the most common case, so basically a new event will be # created each time you call log(). event, is_new = m( self._db, license_pool=pool, event_name=event_name, library=library, old_value=old_value, new_value=new_value, end=end, location=location, ) assert (utc_now() - event.start).total_seconds() < 2 assert True == is_new assert pool == event.license_pool assert library == event.library assert -2 == event.delta assert end == event.end assert location == event.location