Exemple #1
0
    def test_update(self):
        # update() can modify the fields of a Timestamp that aren't
        # used to identify it.
        stamp = Timestamp.stamp(self._db, "service", Timestamp.SCRIPT_TYPE)
        start = datetime_utc(2010, 1, 2)
        finish = datetime_utc(2018, 3, 4)
        achievements = self._str
        counter = self._id
        exception = self._str
        stamp.update(start, finish, achievements, counter, exception)

        assert start == stamp.start
        assert finish == stamp.finish
        assert achievements == stamp.achievements
        assert counter == stamp.counter
        assert exception == stamp.exception

        # .exception is the only field update() will set to a value of
        # None. For all other fields, None means "don't update the existing
        # value".
        stamp.update()
        assert start == stamp.start
        assert finish == stamp.finish
        assert achievements == stamp.achievements
        assert counter == stamp.counter
        assert None == stamp.exception
    def test_patron_activity(self):
        data = self.get_data("patron_response.json")
        self.api.queue_response(200, content=data)
        patron = self._patron()
        patron.authorization_identifier = "123"
        [loan] = self.api.patron_activity(patron, "pin")

        # An appropriate Enki API call was issued.
        [method, url, headers, data, params, kwargs] = self.api.requests.pop()
        assert "get" == method
        assert self.api.base_url + "UserAPI" == url
        assert "getSEPatronData" == params["method"]
        assert "123" == params["username"]
        assert "pin" == params["password"]

        # In particular, the Enki library ID associated with the
        # patron's library was used as the 'lib' parameter.
        assert "c" == params["lib"]

        # The result is a single LoanInfo.
        assert isinstance(loan, LoanInfo)
        assert Identifier.ENKI_ID == loan.identifier_type
        assert DataSource.ENKI == loan.data_source_name
        assert "231" == loan.identifier
        assert self.collection == loan.collection(self._db)
        assert datetime_utc(2017, 8, 15, 14, 56, 51) == loan.start_date
        assert datetime_utc(2017, 9, 5, 14, 56, 51) == loan.end_date
 def test_checkout_acs_parser(self):
     """Test that checkout info for ACS Enki books is parsed correctly."""
     data = self.get_data("checked_out_acs.json")
     result = json.loads(data)
     loan = self.api.parse_patron_loans(
         result["result"]["checkedOutItems"][0])
     assert loan.data_source_name == DataSource.ENKI
     assert loan.identifier_type == Identifier.ENKI_ID
     assert loan.identifier == "3334"
     assert loan.start_date == datetime_utc(2017, 8, 23, 19, 42, 35, 0)
     assert loan.end_date == datetime_utc(2017, 9, 13, 19, 42, 35, 0)
Exemple #4
0
    def test_update(self):
        # Create two books that are part of series, and one book that
        # is not.
        series1 = self._edition()
        series1.series = "Series 1"

        series2 = self._edition()
        series2.series = "Series Two"

        no_series = self._edition()
        assert None == no_series.series

        update_time = datetime_utc(2015, 1, 1)

        # To create necessary mocked objects,
        # _customlist calls _work
        #    which calls _edition, which makes an edition and a pool (through _licensepool)
        #    then makes work through get_one_or_create
        custom_list, ignore = self._customlist()
        manager = BooksInSeries(custom_list)
        manager.update(update_time)

        [entry1] = [x for x in custom_list.entries if x.edition.series == "Series 1"]
        [entry2] = [x for x in custom_list.entries if x.edition.series == "Series Two"]

        assert update_time == entry1.first_appearance
        assert update_time == entry1.most_recent_appearance

        # In a shocking twist, one of the entries turns out not to
        # have a series, while the entry previously thought not to
        # have a series actually does.
        series2.series = None
        no_series.series = "Actually I do have a series."
        self._db.commit()

        new_update_time = datetime_utc(2016, 1, 1)

        manager.update(new_update_time)

        # Entry #2 has been removed from the list, and a new entry added.
        [old_entry] = [x for x in custom_list.entries if x.edition.series == "Series 1"]
        [new_entry] = [
            x
            for x in custom_list.entries
            if x.edition.series == "Actually I do have a series."
        ]
        assert update_time == old_entry.first_appearance
        assert new_update_time == old_entry.most_recent_appearance
        assert new_update_time == new_entry.first_appearance
        assert new_update_time == new_entry.most_recent_appearance
    def test_all_ids_without_date(self):
        # TODO: This tests that all_ids doesn't crash when you pass in
        # an empty date. It doesn't test anything about all_ids except the
        # return value.

        monitor = OdiloCirculationMonitor(self._db,
                                          self.collection,
                                          api_class=MockOdiloAPI)
        assert monitor, "Monitor null !!"
        assert ExternalIntegration.ODILO == monitor.protocol, "Wat??"

        records_metadata_data, records_metadata_json = self.sample_json(
            "records_metadata.json")
        monitor.api.queue_response(200, content=records_metadata_data)

        availability_data = self.sample_data("record_availability.json")
        for record in records_metadata_json:
            monitor.api.queue_response(200, content=availability_data)

        monitor.api.queue_response(200,
                                   content="[]")  # No more resources retrieved

        updated, new = monitor.all_ids(datetime_utc(2017, 9, 1))
        assert 10 == updated
        assert 10 == new

        self.api.log.info(
            "Odilo circulation monitor without date finished ok!!")
    def test_fulfill_success(self):
        # Test the fulfill() method.
        patron = self._patron()
        patron.authorization_identifier = "123"
        pool = self._licensepool(None)

        data = self.get_data("checked_out_acs.json")
        self.api.queue_response(200, content=data)
        fulfillment = self.api.fulfill(patron, "pin", pool, "internal format")

        # An appropriate request to the "getSELink" endpoint was made.,
        [method, url, headers, data, params, kwargs] = self.api.requests.pop()
        assert "get" == method
        assert self.api.base_url + "UserAPI" == url
        assert "getSELink" == params["method"]
        assert "123" == params["username"]
        assert "pin" == params["password"]

        # In particular, the Enki library ID associated with the
        # patron's library was used as the 'lib' parameter.
        assert "c" == params["lib"]

        # A FulfillmentInfo for the loan was returned.
        assert isinstance(fulfillment, FulfillmentInfo)
        assert fulfillment.identifier == pool.identifier.identifier
        assert fulfillment.collection_id == pool.collection.id
        assert DeliveryMechanism.ADOBE_DRM == fulfillment.content_type
        assert fulfillment.content_link.startswith(
            "http://afs.enkilibrary.org/fulfillment/URLLink.acsm")
        assert fulfillment.content_expires == datetime_utc(
            2017, 9, 13, 19, 42, 35, 0)
    def test_checkout_success(self):
        # Test the checkout() method.
        patron = self._patron()
        patron.authorization_identifier = "123"
        pool = self._licensepool(None)

        data = self.get_data("checked_out_acs.json")
        self.api.queue_response(200, content=data)
        loan = self.api.checkout(patron, "pin", pool, "internal format")

        # An appropriate request to the "getSELink" endpoint was made.,
        [method, url, headers, data, params, kwargs] = self.api.requests.pop()
        assert "get" == method
        assert self.api.base_url + "UserAPI" == url
        assert "getSELink" == params["method"]
        assert "123" == params["username"]
        assert "pin" == params["password"]

        # In particular, the Enki library ID associated with the
        # patron's library was used as the 'lib' parameter.
        assert "c" == params["lib"]

        # A LoanInfo for the loan was returned.
        assert isinstance(loan, LoanInfo)
        assert loan.identifier == pool.identifier.identifier
        assert loan.collection_id == pool.collection.id
        assert loan.start_date == None
        assert loan.end_date == datetime_utc(2017, 9, 13, 19, 42, 35, 0)
Exemple #8
0
    def test_monitor_lifecycle(self):
        monitor = MockMonitor(self._db, self._default_collection)
        monitor.default_start_time = datetime_utc(2010, 1, 1)

        # There is no timestamp for this monitor.
        def get_timestamp():
            return get_one(self._db, Timestamp, service=monitor.service_name)

        assert None == get_timestamp()

        # Run the monitor.
        monitor.run()

        # The monitor ran once and then stopped.
        [progress] = monitor.run_records

        # The TimestampData passed in to run_once() had the
        # Monitor's default start time as its .start, and an empty
        # time for .finish.
        assert monitor.default_start_time == progress.start
        assert None == progress.finish

        # But the Monitor's underlying timestamp has been updated with
        # the time that the monitor actually took to run.
        timestamp = get_timestamp()
        assert timestamp.start > monitor.default_start_time
        assert timestamp.finish > timestamp.start
        self.time_eq(utc_now(), timestamp.start)

        # cleanup() was called once.
        assert [True] == monitor.cleanup_records
Exemple #9
0
        class Mock(TimelineMonitor):
            DEFAULT_START_TIME = datetime_utc(2011, 1, 1)
            SERVICE_NAME = "doomed"

            def catch_up_from(self, start, cutoff, progress):
                self.started_at = start
                progress.exception = "oops"
    def test_no_import_loop(self):
        # We stop processing a feed's 'next' link if it links to a URL we've
        # already seen.

        data = sample_data("metadata_updates_response.opds", "opds")
        self.lookup.queue_response(
            200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data)
        data = data.replace(b"http://next-link/", b"http://different-link/")
        self.lookup.queue_response(
            200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data)

        # This introduces a loop.
        data = data.replace(b"http://next-link/", b"http://next-link/")
        self.lookup.queue_response(
            200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data)
        new_timestamp = self.monitor.run_once(self.ts)

        # Even though all these pages had the same content, we kept
        # processing them until we encountered a 'next' link we had
        # seen before; then we stopped.
        first, second, third = self.monitor.imports
        assert (None, None) == first
        assert (None, "http://next-link/") == second
        assert (None, "http://different-link/") == third

        assert datetime_utc(2016, 9, 20, 19, 37, 2) == new_timestamp.finish
    def test_can_insert_measurement_after_the_fact(self):

        old = datetime_utc(2011, 1, 1)
        new = datetime_utc(2012, 1, 1)

        wi = self._identifier()
        m1 = wi.add_measurement(self.source,
                                Measurement.DOWNLOADS,
                                10,
                                taken_at=new)
        assert True == m1.is_most_recent

        m2 = wi.add_measurement(self.source,
                                Measurement.DOWNLOADS,
                                5,
                                taken_at=old)
        assert True == m1.is_most_recent
    def test_repr(self):

        patron = self._patron(external_identifier="a patron")

        patron.authorization_expires = datetime_utc(2018, 1, 2, 3, 4, 5)
        patron.last_external_sync = None
        assert (
            "<Patron authentication_identifier=None expires=2018-01-02 sync=None>"
            == repr(patron))
Exemple #13
0
class MockMilleniumPatronAPI(MilleniumPatronAPI):
    """This mocks the API on a higher level than the HTTP level.

    It is not used in the tests of the MilleniumPatronAPI class.  It
    is used in the Adobe Vendor ID tests but maybe it shouldn't.
    """

    # For expiration dates we're using UTC instead of local time for
    # convenience; the difference doesn't matter because the dates in
    # question are at least 10 days away from the current date.

    # This user's card has expired.
    user1 = PatronData(
        permanent_id="12345",
        authorization_identifier="0",
        username="******",
        authorization_expires=datetime_utc(2015, 4, 1),
    )

    # This user's card still has ten days on it.
    the_future = utc_now() + datetime.timedelta(days=10)
    user2 = PatronData(
        permanent_id="67890",
        authorization_identifier="5",
        username="******",
        authorization_expires=the_future,
    )

    users = [user1, user2]

    def __init__(self):
        pass

    def remote_authenticate(self, barcode, pin):
        """A barcode that's 14 digits long is treated as valid,
        no matter which PIN is used.

        That's so real barcode/PIN combos can be passed through to
        third parties.

        Otherwise, valid test PIN is the first character of the barcode
        repeated four times.

        """
        u = self.dump(barcode)
        if "ERRNUM" in u:
            return False
        return len(barcode) == 14 or pin == barcode[0] * 4

    def remote_patron_lookup(self, patron_or_patrondata):
        # We have a couple custom barcodes.
        look_for = patron_or_patrondata.authorization_identifier
        for u in self.users:
            if u.authorization_identifier == look_for:
                return u
        return None
Exemple #14
0
    def test_run_once_returning_timestampdata(self):
        # If a Monitor's run_once implementation returns a TimestampData,
        # that's the data used to set the Monitor's Timestamp, even if
        # the data doesn't make sense by the standards used by the main
        # Monitor class.
        start = datetime_utc(2011, 1, 1)
        finish = datetime_utc(2012, 1, 1)

        class Mock(MockMonitor):
            def run_once(self, progress):
                return TimestampData(start=start, finish=finish, counter=-100)

        monitor = Mock(self._db, self._default_collection)
        monitor.run()

        timestamp = monitor.timestamp()
        assert start == timestamp.start
        assert finish == timestamp.finish
        assert -100 == timestamp.counter
    def test_run_once(self):
        dummy_value = object()

        class Mock(EnkiImport):
            incremental_import_called_with = dummy_value

            def full_import(self):
                self.full_import_called = True
                return 10

            def incremental_import(self, since):
                self.incremental_import_called_with = since
                return 4, 7

        importer = Mock(self._db, self.collection, api_class=self.api)

        # If the incoming TimestampData makes it look like the process
        # has never successfully completed, full_import() is called.
        progress = TimestampData(start=None)
        importer.run_once(progress)
        assert True == importer.full_import_called
        assert (
            "New or modified titles: 10. Titles with circulation changes: 0."
            == progress.achievements)

        # It doesn't call incremental_import().
        assert dummy_value == importer.incremental_import_called_with

        # If run_once() is called with a TimestampData that indicates
        # an earlier successful run, a time five minutes before the
        # previous completion time is passed into incremental_import()
        importer.full_import_called = False

        a_while_ago = datetime_utc(2011, 1, 1)
        even_earlier = a_while_ago - datetime.timedelta(days=100)
        timestamp = TimestampData(start=even_earlier, finish=a_while_ago)
        new_timestamp = importer.run_once(timestamp)

        passed_in = importer.incremental_import_called_with
        expect = a_while_ago - importer.OVERLAP
        assert abs((passed_in - expect).total_seconds()) < 2

        # full_import was not called.
        assert False == importer.full_import_called

        # The proposed new TimestampData covers the entire timespan
        # from the 'expect' period to now.
        assert expect == new_timestamp.start
        now = utc_now()
        assert (now - new_timestamp.finish).total_seconds() < 2
        assert (
            "New or modified titles: 4. Titles with circulation changes: 7." ==
            new_timestamp.achievements)
    def test_run_once(self):
        # Setup authentication and Metadata Wrangler details.
        lp = self._licensepool(None,
                               data_source_name=DataSource.BIBLIOTHECA,
                               collection=self.collection)
        lp.identifier.type = Identifier.BIBLIOTHECA_ID
        isbn = Identifier.parse_urn(self._db, "urn:isbn:9781594632556")[0]
        lp.identifier.equivalent_to(
            DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1)
        assert [] == lp.identifier.links
        assert [] == lp.identifier.measurements

        # Queue some data to be found.
        responses = (
            "metadata_updates_response.opds",
            "metadata_updates_empty_response.opds",
        )
        for filename in responses:
            data = sample_data(filename, "opds")
            self.lookup.queue_response(
                200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data)

        timestamp = self.ts
        new_timestamp = self.monitor.run_once(timestamp)

        # We have a new value to use for the Monitor's timestamp -- the
        # earliest date seen in the last OPDS feed that contained
        # any entries.
        assert datetime_utc(2016, 9, 20, 19, 37, 2) == new_timestamp.finish
        assert "Editions processed: 1" == new_timestamp.achievements

        # Normally run_once() doesn't update the monitor's timestamp,
        # but this implementation does, so that work isn't redone if
        # run_once() crashes or the monitor is killed.
        assert new_timestamp.finish == self.monitor.timestamp().finish

        # The original Identifier has information from the
        # mock Metadata Wrangler.
        mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        assert 3 == len(lp.identifier.links)
        [quality] = lp.identifier.measurements
        assert mw_source == quality.data_source

        # Check the URLs we processed.
        url1, url2 = [x[0] for x in self.lookup.requests]

        # The first URL processed was the default one for the
        # MetadataWranglerOPDSLookup.
        assert self.lookup.get_collection_url(
            self.lookup.UPDATES_ENDPOINT) == url1

        # The second URL processed was whatever we saw in the 'next' link.
        assert "http://next-link/" == url2
Exemple #17
0
    def test_add_control_fields(self):
        # This edition has one format and was published before 1900.
        edition, pool = self._edition(with_license_pool=True)
        identifier = pool.identifier
        edition.issued = datetime_utc(956, 1, 1)

        now = utc_now()
        record = Record()

        Annotator.add_control_fields(record, identifier, pool, edition)
        self._check_control_field(record, "001", identifier.urn)
        assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value()
        self._check_control_field(record, "006", "m        d        ")
        self._check_control_field(record, "007", "cr cn ---anuuu")
        self._check_control_field(
            record, "008",
            now.strftime("%y%m%d") + "s0956    xxu                 eng  ")

        # This French edition has two formats and was published in 2018.
        edition2, pool2 = self._edition(with_license_pool=True)
        identifier2 = pool2.identifier
        edition2.issued = datetime_utc(2018, 2, 3)
        edition2.language = "fre"
        LicensePoolDeliveryMechanism.set(
            pool2.data_source,
            identifier2,
            Representation.PDF_MEDIA_TYPE,
            DeliveryMechanism.ADOBE_DRM,
            RightsStatus.IN_COPYRIGHT,
        )

        record = Record()
        Annotator.add_control_fields(record, identifier2, pool2, edition2)
        self._check_control_field(record, "001", identifier2.urn)
        assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value()
        self._check_control_field(record, "006", "m        d        ")
        self._check_control_field(record, "007", "cr cn ---mnuuu")
        self._check_control_field(
            record, "008",
            now.strftime("%y%m%d") + "s2018    xxu                 fre  ")
    def test_calculate_until(self):
        start = datetime_utc(2010, 1, 1)

        # The cycle time is one week.
        default_loan = datetime.timedelta(days=6)
        default_reservation = datetime.timedelta(days=1)

        # I'm 20th in line for 4 books.
        #
        # After 7 days, four copies are released and I am 16th in line.
        # After 14 days, those copies are released and I am 12th in line.
        # After 21 days, those copies are released and I am 8th in line.
        # After 28 days, those copies are released and I am 4th in line.
        # After 35 days, those copies are released and get my notification.
        a = Hold._calculate_until(start, 20, 4, default_loan,
                                  default_reservation)
        assert a == start + datetime.timedelta(days=(7 * 5))

        # If I am 21st in line, I need to wait six weeks.
        b = Hold._calculate_until(start, 21, 4, default_loan,
                                  default_reservation)
        assert b == start + datetime.timedelta(days=(7 * 6))

        # If I am 3rd in line, I only need to wait seven days--that's when
        # I'll get the notification message.
        b = Hold._calculate_until(start, 3, 4, default_loan,
                                  default_reservation)
        assert b == start + datetime.timedelta(days=7)

        # A new person gets the book every week. Someone has the book now
        # and there are 3 people ahead of me in the queue. I will get
        # the book in 7 days + 3 weeks
        c = Hold._calculate_until(start, 3, 1, default_loan,
                                  default_reservation)
        assert c == start + datetime.timedelta(days=(7 * 4))

        # I'm first in line for 1 book. After 7 days, one copy is
        # released and I'll get my notification.
        a = Hold._calculate_until(start, 1, 1, default_loan,
                                  default_reservation)
        assert a == start + datetime.timedelta(days=7)

        # The book is reserved to me. I need to hurry up and check it out.
        d = Hold._calculate_until(start, 0, 1, default_loan,
                                  default_reservation)
        assert d == start + datetime.timedelta(days=1)

        # If there are no licenses, I will never get the book.
        e = Hold._calculate_until(start, 10, 0, default_loan,
                                  default_reservation)
        assert e == None
    def test_empty_feed_stops_import(self):
        # We don't follow the 'next' link of an empty feed.
        data = sample_data("metadata_updates_empty_response.opds", "opds")
        self.lookup.queue_response(
            200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data)

        new_timestamp = self.monitor.run()

        # We could have followed the 'next' link, but we chose not to.
        assert [(None, None)] == self.monitor.imports
        assert 1 == len(self.lookup.requests)

        # Since there were no <entry> tags, the timestamp's finish
        # date was set to the <updated> date of the feed itself, minus
        # one day (to avoid race conditions).
        assert datetime_utc(2016, 9, 19, 19, 37,
                            10) == self.monitor.timestamp().finish
    def test_datetime_utc(self, time, formatted, isoformat):
        """`datetime_utc` is a wrapper around `datetime.datetime` but it also
        includes UTC information when it is created.
        """
        time_format = "%Y-%m-%dT%H:%M:%S"
        dt = datetime.datetime(*time, tzinfo=pytz.UTC)
        util_dt = datetime_utc(*time)

        # The util function is the same as the datetime function with
        # pytz UTC information.
        assert dt == util_dt
        # A datetime object is returned and works like any datetime object.
        assert util_dt.tzinfo == pytz.UTC
        assert util_dt.strftime(time_format) == formatted
        assert util_dt.isoformat() == isoformat
        assert util_dt.year == time[0]
        assert util_dt.month == time[1]
        assert util_dt.day == time[2]
    def test_import_one_feed(self):
        data = sample_data("metadata_updates_response.opds", "opds")
        self.lookup.queue_response(
            200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data)

        next_links, editions, timestamp = self.monitor.import_one_feed(
            None, None)

        # The 'next' links found in the OPDS feed are returned.
        assert ["http://next-link/"] == next_links

        # Insofar as is possible, all <entry> tags are converted into
        # Editions.
        assert ["9781594632556"
                ] == [x.primary_identifier.identifier for x in editions]

        # The earliest time found in the OPDS feed is returned as a
        # candidate for the Monitor's timestamp.
        assert datetime_utc(2016, 9, 20, 19, 37, 2) == timestamp
    def test_recent_activity(self):
        now = utc_now()
        epoch = datetime_utc(1970, 1, 1)
        epoch_plus_one_hour = epoch + datetime.timedelta(hours=1)
        data = self.get_data("get_recent_activity.json")
        self.api.queue_response(200, content=data)
        activity = list(self.api.recent_activity(epoch, epoch_plus_one_hour))
        assert 43 == len(activity)
        for i in activity:
            assert isinstance(i, CirculationData)
        [method, url, headers, data, params, kwargs] = self.api.requests.pop()
        assert "get" == method
        assert "https://enkilibrary.org/API/ItemAPI" == url
        assert "getRecentActivityTime" == params["method"]
        assert "0" == params["stime"]
        assert "3600" == params["etime"]

        # Unlike some API calls, it's not necessary to pass 'lib' in here.
        assert "lib" not in params
Exemple #23
0
    def test_parser(self):
        """Parse an ONIX file into Metadata objects."""

        file = self.sample_data("onix_example.xml")
        metadata_records = ONIXExtractor().parse(BytesIO(file), "MIT Press")

        assert 2 == len(metadata_records)

        record = metadata_records[0]
        assert "Safe Spaces, Brave Spaces" == record.title
        assert "Diversity and Free Expression in Education" == record.subtitle
        assert "Palfrey, John" == record.contributors[0].sort_name
        assert "John Palfrey" == record.contributors[0].display_name
        assert "Palfrey" == record.contributors[0].family_name
        assert "Head of School at Phillips Academy" in record.contributors[
            0].biography
        assert "The MIT Press" == record.publisher
        assert None == record.imprint
        assert "9780262343664" == record.primary_identifier.identifier
        assert Identifier.ISBN == record.primary_identifier.type
        assert "eng" == record.language
        assert datetime_utc(2017, 10, 6) == record.issued
        subjects = record.subjects
        assert 7 == len(subjects)
        assert "EDU015000" == subjects[0].identifier
        assert Classifier.AUDIENCE_ADULT == subjects[-1].identifier
        assert Classifier.BISAC == subjects[0].type
        assert Classification.TRUSTED_DISTRIBUTOR_WEIGHT == subjects[0].weight
        assert Edition.BOOK_MEDIUM == record.medium
        assert 2017 == record.issued.year

        assert 1 == len(record.links)
        assert (
            "the essential democratic values of diversity and free expression"
            in record.links[0].content)

        record = metadata_records[1]
        assert Edition.AUDIO_MEDIUM == record.medium
        assert "The Test Corporation" == record.contributors[0].display_name
        assert "Test Corporation, The" == record.contributors[0].sort_name
Exemple #24
0
    def test_add_publisher(self):
        edition = self._edition()
        edition.publisher = self._str
        edition.issued = datetime_utc(1894, 4, 5)

        record = Record()
        Annotator.add_publisher(record, edition)
        self._check_field(
            record,
            "264",
            {
                "a": "[Place of publication not identified]",
                "b": edition.publisher,
                "c": "1894",
            },
            [" ", "1"],
        )

        # If there's no publisher, the field is left out.
        record = Record()
        edition.publisher = None
        Annotator.add_publisher(record, edition)
        assert [] == record.get_fields("264")
    def test_profile_document(self):
        # synchronize_annotations always shows up as settable, even if
        # the current value is None.
        self.patron.authorization_identifier = "abcd"
        assert None == self.patron.synchronize_annotations
        rep = self.store.profile_document
        assert {
            "simplified:authorization_identifier": "abcd",
            "settings": {
                "simplified:synchronize_annotations": None
            },
        } == rep

        self.patron.synchronize_annotations = True
        self.patron.authorization_expires = datetime_utc(
            2016, 1, 1, 10, 20, 30)
        rep = self.store.profile_document
        assert {
            "simplified:authorization_expires": "2016-01-01T10:20:30Z",
            "simplified:authorization_identifier": "abcd",
            "settings": {
                "simplified:synchronize_annotations": True
            },
        } == rep
 def test_strptime_utc(self, expect, date_string, format):
     assert strptime_utc(date_string, format) == datetime_utc(*expect)
Exemple #27
0
                property_value = property_value.isoformat() + "Z"
            if isinstance(rwpm_item, list):
                result.append(property_value)
            else:
                result[property_object.key] = property_value
    elif isinstance(rwpm_item, RegistryItem):
        result = rwpm_item.key

    return result


PROQUEST_PUBLICATION_1 = OPDS2Publication(
    metadata=PresentationMetadata(
        identifier="urn:proquest.com/document-id/1",
        title="Publićation # 1",
        modified=datetime_utc(2020, 1, 31, 0, 0, 0),
    ),
    links=LinkList([
        Link(
            href="https://feed.org/document-id/1",
            rels=[OPDS2LinkRelationsRegistry.ACQUISITION],
        )
    ]),
)

PROQUEST_PUBLICATION_2 = OPDS2Publication(
    metadata=PresentationMetadata(
        identifier="urn:proquest.com/document-id/2",
        title="Publication # 2",
        modified=datetime_utc(2020, 1, 30, 0, 0, 0),
    ),
Exemple #28
0
class TestSAMLFederatedMetadataExpirationValidator(object):
    @parameterized.expand([
        (
            "incorrect_xml",
            utc_now(),
            fixtures.INCORRECT_XML,
            SAMLFederatedMetadataValidationError,
        ),
        (
            "without_valid_until_attribute",
            utc_now(),
            fixtures.FEDERATED_METADATA_WITHOUT_VALID_UNTIL_ATTRIBUTE,
            SAMLFederatedMetadataValidationError,
        ),
        (
            "with_expired_valid_until_attribute",
            fixtures.FEDERATED_METADATA_VALID_UNTIL +
            SAMLFederatedMetadataExpirationValidator.MAX_CLOCK_SKEW +
            datetime.timedelta(minutes=1),
            fixtures.FEDERATED_METADATA_WITH_VALID_UNTIL_ATTRIBUTE,
            SAMLFederatedMetadataValidationError,
        ),
        (
            "with_valid_until_attribute_too_far_in_the_future",
            fixtures.FEDERATED_METADATA_VALID_UNTIL -
            SAMLFederatedMetadataExpirationValidator.MAX_VALID_TIME -
            datetime.timedelta(minutes=1),
            fixtures.FEDERATED_METADATA_WITH_VALID_UNTIL_ATTRIBUTE,
            SAMLFederatedMetadataValidationError,
        ),
        (
            "with_valid_until_attribute_less_than_current_time_and_less_than_max_clock_skew",
            fixtures.FEDERATED_METADATA_VALID_UNTIL +
            SAMLFederatedMetadataExpirationValidator.MAX_CLOCK_SKEW,
            fixtures.FEDERATED_METADATA_WITH_VALID_UNTIL_ATTRIBUTE,
            None,
        ),
        (
            "with_valid_until_attribute_greater_than_current_time_and_less_than_max_valid_time",
            fixtures.FEDERATED_METADATA_VALID_UNTIL -
            SAMLFederatedMetadataExpirationValidator.MAX_VALID_TIME +
            datetime.timedelta(minutes=1),
            fixtures.FEDERATED_METADATA_WITH_VALID_UNTIL_ATTRIBUTE,
            None,
        ),
        (
            "with_real_incommon_metadata",
            datetime_utc(2020, 11, 26, 14, 32, 42),
            open(
                os.path.join(
                    os.path.dirname(os.path.abspath(__file__)),
                    "../../../files/saml/incommon-metadata-idp-only.xml",
                )).read(),
            None,
        ),
    ])
    def test_validate(self, _, current_time, metadata, expected_exception):
        # Arrange
        validator = SAMLFederatedMetadataExpirationValidator()
        federation = SAMLFederation(incommon.FEDERATION_TYPE,
                                    incommon.IDP_METADATA_SERVICE_URL)

        # Act, assert
        with freeze_time(current_time):
            if expected_exception:
                with pytest.raises(expected_exception):
                    validator.validate(federation, metadata)
            else:
                validator.validate(federation, metadata)
Exemple #29
0
class TestS3Uploader(S3UploaderTest):
    def test_names(self):
        # The NAME associated with this class must be the same as its
        # key in the MirrorUploader implementation registry, and it's
        # better if it's the same as the name of the external
        # integration.
        assert S3Uploader.NAME == ExternalIntegration.S3
        assert (S3Uploader == MirrorUploader.IMPLEMENTATION_REGISTRY[
            ExternalIntegration.S3])

    def test_instantiation(self):
        integration = self._external_integration(
            ExternalIntegration.S3, goal=ExternalIntegration.STORAGE_GOAL)
        integration.username = "******"
        integration.password = "******"
        integration.setting(
            S3UploaderConfiguration.URL_TEMPLATE_KEY).value = "a transform"
        uploader = MirrorUploader.implementation(integration)
        assert True == isinstance(uploader, S3Uploader)

        # The URL_TEMPLATE_KEY setting becomes the .url_transform
        # attribute on the S3Uploader object.
        assert "a transform" == uploader.url_transform

    @parameterized.expand([
        ("empty_credentials", None, None),
        ("empty_string_credentials", "", ""),
        ("non_empty_string_credentials", "username", "password"),
    ])
    def test_initialization(self, name, username, password):
        # Arrange
        settings = {"username": username, "password": password}
        integration = self._external_integration(
            ExternalIntegration.S3,
            goal=ExternalIntegration.STORAGE_GOAL,
            settings=settings,
        )
        client_class = MagicMock()

        # Act
        S3Uploader(integration, client_class=client_class)

        # Assert
        assert client_class.call_count == 2

        service_name = client_class.call_args_list[0].args[0]
        region_name = client_class.call_args_list[0].kwargs["region_name"]
        aws_access_key_id = client_class.call_args_list[0].kwargs[
            "aws_access_key_id"]
        aws_secret_access_key = client_class.call_args_list[0].kwargs[
            "aws_secret_access_key"]
        config = client_class.call_args_list[0].kwargs["config"]
        assert service_name == "s3"
        assert region_name == S3UploaderConfiguration.S3_DEFAULT_REGION
        assert aws_access_key_id == None
        assert aws_secret_access_key == None
        assert config.signature_version == botocore.UNSIGNED
        assert (config.s3["addressing_style"] ==
                S3UploaderConfiguration.S3_DEFAULT_ADDRESSING_STYLE)

        service_name = client_class.call_args_list[1].args[0]
        region_name = client_class.call_args_list[1].kwargs["region_name"]
        aws_access_key_id = client_class.call_args_list[1].kwargs[
            "aws_access_key_id"]
        aws_secret_access_key = client_class.call_args_list[1].kwargs[
            "aws_secret_access_key"]
        assert service_name == "s3"
        assert region_name == S3UploaderConfiguration.S3_DEFAULT_REGION
        assert aws_access_key_id == (username if username != "" else None)
        assert aws_secret_access_key == (password if password != "" else None)
        assert "config" not in client_class.call_args_list[1].kwargs

    def test_custom_client_class(self):
        """You can specify a client class to use instead of boto3.client."""
        integration = self._integration()
        uploader = S3Uploader(integration, MockS3Client)
        assert isinstance(uploader.client, MockS3Client)

    def test_get_bucket(self):
        buckets = {
            S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "banana",
            S3UploaderConfiguration.BOOK_COVERS_BUCKET_KEY: "bucket",
        }
        buckets_plus_irrelevant_setting = dict(buckets)
        buckets_plus_irrelevant_setting["not-a-bucket-at-all"] = "value"
        uploader = self._create_s3_uploader(**buckets_plus_irrelevant_setting)

        # This S3Uploader knows about the configured buckets.  It
        # wasn't informed of the irrelevant 'not-a-bucket-at-all'
        # setting.
        assert buckets == uploader.buckets

        # get_bucket just does a lookup in .buckets
        uploader.buckets["foo"] = object()
        result = uploader.get_bucket("foo")
        assert uploader.buckets["foo"] == result

    @parameterized.expand([
        (
            "s3_url_with_path_without_slash",
            "a-bucket",
            "a-path",
            "https://a-bucket.s3.amazonaws.com/a-path",
            None,
        ),
        (
            "s3_dummy_url_with_path_without_slash",
            "dummy",
            "dummy",
            "https://dummy.s3.amazonaws.com/dummy",
            None,
        ),
        (
            "s3_path_style_url_with_path_without_slash",
            "a-bucket",
            "a-path",
            "https://s3.amazonaws.com/a-bucket/a-path",
            None,
            S3AddressingStyle.PATH.value,
        ),
        (
            "s3_path_style_dummy_url_with_path_without_slash",
            "dummy",
            "dummy",
            "https://s3.amazonaws.com/dummy/dummy",
            None,
            S3AddressingStyle.PATH.value,
        ),
        (
            "s3_url_with_path_with_slash",
            "a-bucket",
            "/a-path",
            "https://a-bucket.s3.amazonaws.com/a-path",
            None,
        ),
        (
            "s3_path_style_url_with_path_with_slash",
            "a-bucket",
            "/a-path",
            "https://s3.amazonaws.com/a-bucket/a-path",
            None,
            S3AddressingStyle.PATH.value,
        ),
        (
            "s3_url_with_custom_region_and_path_without_slash",
            "a-bucket",
            "a-path",
            "https://a-bucket.s3.us-east-2.amazonaws.com/a-path",
            "us-east-2",
        ),
        (
            "s3_path_style_url_with_custom_region_and_path_without_slash",
            "a-bucket",
            "a-path",
            "https://s3.us-east-2.amazonaws.com/a-bucket/a-path",
            "us-east-2",
            S3AddressingStyle.PATH.value,
        ),
        (
            "s3_url_with_custom_region_and_path_with_slash",
            "a-bucket",
            "/a-path",
            "https://a-bucket.s3.us-east-3.amazonaws.com/a-path",
            "us-east-3",
        ),
        (
            "s3_path_style_url_with_custom_region_and_path_with_slash",
            "a-bucket",
            "/a-path",
            "https://s3.us-east-3.amazonaws.com/a-bucket/a-path",
            "us-east-3",
            S3AddressingStyle.PATH.value,
        ),
        (
            "custom_http_url_and_path_without_slash",
            "http://a-bucket.com/",
            "a-path",
            "http://a-bucket.com/a-path",
            None,
        ),
        (
            "custom_http_url_and_path_with_slash",
            "http://a-bucket.com/",
            "/a-path",
            "http://a-bucket.com/a-path",
            None,
        ),
        (
            "custom_http_url_and_path_without_slash",
            "https://a-bucket.com/",
            "a-path",
            "https://a-bucket.com/a-path",
            None,
        ),
        (
            "custom_http_url_and_path_with_slash",
            "https://a-bucket.com/",
            "/a-path",
            "https://a-bucket.com/a-path",
            None,
        ),
    ])
    def test_url(self,
                 name,
                 bucket,
                 path,
                 expected_result,
                 region=None,
                 addressing_style=None):
        # Arrange
        uploader = self._create_s3_uploader(region=region,
                                            addressing_style=addressing_style)

        # Act
        result = uploader.url(bucket, path)

        # Assert
        assert result == expected_result

    @parameterized.expand([
        (
            "implicit_s3_url_template",
            "bucket",
            "the key",
            "https://bucket.s3.amazonaws.com/the%20key",
        ),
        (
            "implicit_s3_url_template_with_custom_region",
            "bucket",
            "the key",
            "https://bucket.s3.us-east-2.amazonaws.com/the%20key",
            None,
            "us-east-2",
        ),
        (
            "explicit_s3_url_template",
            "bucket",
            "the key",
            "https://bucket.s3.amazonaws.com/the%20key",
            S3UploaderConfiguration.URL_TEMPLATE_DEFAULT,
        ),
        (
            "explicit_s3_url_template_with_custom_region",
            "bucket",
            "the key",
            "https://bucket.s3.us-east-2.amazonaws.com/the%20key",
            S3UploaderConfiguration.URL_TEMPLATE_DEFAULT,
            "us-east-2",
        ),
        (
            "http_url_template",
            "bucket",
            "the këy",
            "http://bucket/the%20k%C3%ABy",
            S3UploaderConfiguration.URL_TEMPLATE_HTTP,
        ),
        (
            "https_url_template",
            "bucket",
            "the këy",
            "https://bucket/the%20k%C3%ABy",
            S3UploaderConfiguration.URL_TEMPLATE_HTTPS,
        ),
    ])
    def test_final_mirror_url(self,
                              name,
                              bucket,
                              key,
                              expected_result,
                              url_transform=None,
                              region=None):
        # Arrange
        uploader = self._create_s3_uploader(region=region)

        if url_transform:
            uploader.url_transform = url_transform

        # Act
        result = uploader.final_mirror_url(bucket, key)

        # Assert
        if not url_transform:
            assert (S3UploaderConfiguration.URL_TEMPLATE_DEFAULT ==
                    uploader.url_transform)

        assert result == expected_result

    def test_key_join(self):
        """Test the code used to build S3 keys from parts."""
        parts = ["Gutenberg", b"Gutenberg ID", 1234, "Die Flügelmaus+.epub"]
        assert ("Gutenberg/Gutenberg%20ID/1234/Die%20Fl%C3%BCgelmaus%2B.epub"
                == S3Uploader.key_join(parts))

    @parameterized.expand([
        (
            "with_gutenberg_cover_generator_data_source",
            "test-book-covers-s3-bucket",
            DataSource.GUTENBERG_COVER_GENERATOR,
            "https://test-book-covers-s3-bucket.s3.amazonaws.com/Gutenberg%20Illustrated/",
        ),
        (
            "with_overdrive_data_source",
            "test-book-covers-s3-bucket",
            DataSource.OVERDRIVE,
            "https://test-book-covers-s3-bucket.s3.amazonaws.com/Overdrive/",
        ),
        (
            "with_overdrive_data_source_and_scaled_size",
            "test-book-covers-s3-bucket",
            DataSource.OVERDRIVE,
            "https://test-book-covers-s3-bucket.s3.amazonaws.com/scaled/300/Overdrive/",
            300,
        ),
        (
            "with_gutenberg_cover_generator_data_source_and_custom_region",
            "test-book-covers-s3-bucket",
            DataSource.GUTENBERG_COVER_GENERATOR,
            "https://test-book-covers-s3-bucket.s3.us-east-3.amazonaws.com/Gutenberg%20Illustrated/",
            None,
            "us-east-3",
        ),
        (
            "with_overdrive_data_source_and_custom_region",
            "test-book-covers-s3-bucket",
            DataSource.OVERDRIVE,
            "https://test-book-covers-s3-bucket.s3.us-east-3.amazonaws.com/Overdrive/",
            None,
            "us-east-3",
        ),
        (
            "with_overdrive_data_source_and_scaled_size_and_custom_region",
            "test-book-covers-s3-bucket",
            DataSource.OVERDRIVE,
            "https://test-book-covers-s3-bucket.s3.us-east-3.amazonaws.com/scaled/300/Overdrive/",
            300,
            "us-east-3",
        ),
    ])
    def test_cover_image_root(
        self,
        name,
        bucket,
        data_source_name,
        expected_result,
        scaled_size=None,
        region=None,
    ):
        # Arrange
        uploader = self._create_s3_uploader(region=region)
        data_source = DataSource.lookup(self._db, data_source_name)

        # Act
        result = uploader.cover_image_root(bucket,
                                           data_source,
                                           scaled_size=scaled_size)

        # Assert
        assert result == expected_result

    @parameterized.expand([
        (
            "with_default_region",
            "test-open-access-s3-bucket",
            "https://test-open-access-s3-bucket.s3.amazonaws.com/",
        ),
        (
            "with_custom_region",
            "test-open-access-s3-bucket",
            "https://test-open-access-s3-bucket.s3.us-east-3.amazonaws.com/",
            "us-east-3",
        ),
    ])
    def test_content_root(self, name, bucket, expected_result, region=None):
        # Arrange
        uploader = self._create_s3_uploader(region=region)

        # Act
        result = uploader.content_root(bucket)

        # Assert
        assert result == expected_result

    @parameterized.expand([
        (
            "s3_url",
            "test-marc-s3-bucket",
            "SHORT",
            "https://test-marc-s3-bucket.s3.amazonaws.com/SHORT/",
        ),
        (
            "s3_url_with_custom_region",
            "test-marc-s3-bucket",
            "SHORT",
            "https://test-marc-s3-bucket.s3.us-east-2.amazonaws.com/SHORT/",
            "us-east-2",
        ),
        ("custom_http_url", "http://my-feed/", "SHORT",
         "http://my-feed/SHORT/"),
        ("custom_https_url", "https://my-feed/", "SHORT",
         "https://my-feed/SHORT/"),
    ])
    def test_marc_file_root(self,
                            name,
                            bucket,
                            library_name,
                            expected_result,
                            region=None):
        # Arrange
        uploader = self._create_s3_uploader(region=region)
        library = self._library(short_name=library_name)

        # Act
        result = uploader.marc_file_root(bucket, library)

        # Assert
        assert result == expected_result

    @parameterized.expand([
        (
            "with_identifier",
            {
                S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks"
            },
            "ABOOK",
            "https://thebooks.s3.amazonaws.com/Gutenberg%20ID/ABOOK.epub",
        ),
        (
            "with_custom_extension",
            {
                S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks"
            },
            "ABOOK",
            "https://thebooks.s3.amazonaws.com/Gutenberg%20ID/ABOOK.pdf",
            "pdf",
        ),
        (
            "with_custom_dotted_extension",
            {
                S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks"
            },
            "ABOOK",
            "https://thebooks.s3.amazonaws.com/Gutenberg%20ID/ABOOK.pdf",
            ".pdf",
        ),
        (
            "with_custom_data_source",
            {
                S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks"
            },
            "ABOOK",
            "https://thebooks.s3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK.epub",
            None,
            DataSource.UNGLUE_IT,
        ),
        (
            "with_custom_title",
            {
                S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks"
            },
            "ABOOK",
            "https://thebooks.s3.amazonaws.com/Gutenberg%20ID/ABOOK/On%20Books.epub",
            None,
            None,
            "On Books",
        ),
        (
            "with_custom_extension_and_title_and_data_source",
            {
                S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks"
            },
            "ABOOK",
            "https://thebooks.s3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK/On%20Books.pdf",
            ".pdf",
            DataSource.UNGLUE_IT,
            "On Books",
        ),
        (
            "with_custom_extension_and_title_and_data_source_and_region",
            {
                S3UploaderConfiguration.OA_CONTENT_BUCKET_KEY: "thebooks"
            },
            "ABOOK",
            "https://thebooks.s3.us-east-3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK/On%20Books.pdf",
            ".pdf",
            DataSource.UNGLUE_IT,
            "On Books",
            "us-east-3",
        ),
        (
            "with_protected_access_and_custom_extension_and_title_and_data_source_and_region",
            {
                S3UploaderConfiguration.PROTECTED_CONTENT_BUCKET_KEY:
                "thebooks"
            },
            "ABOOK",
            "https://thebooks.s3.us-east-3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK/On%20Books.pdf",
            ".pdf",
            DataSource.UNGLUE_IT,
            "On Books",
            "us-east-3",
            False,
        ),
    ])
    def test_book_url(
        self,
        name,
        buckets,
        identifier,
        expected_result,
        extension=None,
        data_source_name=None,
        title=None,
        region=None,
        open_access=True,
    ):
        # Arrange
        identifier = self._identifier(foreign_id=identifier)
        uploader = self._create_s3_uploader(region=region, **buckets)

        parameters = {"identifier": identifier, "open_access": open_access}

        if extension:
            parameters["extension"] = extension
        if title:
            parameters["title"] = title

        if data_source_name:
            data_source = DataSource.lookup(self._db, DataSource.UNGLUE_IT)
            parameters["data_source"] = data_source

        # Act
        result = uploader.book_url(**parameters)

        # Assert
        assert result == expected_result

    @parameterized.expand([
        (
            "without_scaled_size",
            {
                S3UploaderConfiguration.BOOK_COVERS_BUCKET_KEY: "thecovers"
            },
            DataSource.UNGLUE_IT,
            "ABOOK",
            "filename",
            "https://thecovers.s3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK/filename",
        ),
        (
            "without_scaled_size_and_with_custom_region",
            {
                S3UploaderConfiguration.BOOK_COVERS_BUCKET_KEY: "thecovers"
            },
            DataSource.UNGLUE_IT,
            "ABOOK",
            "filename",
            "https://thecovers.s3.us-east-3.amazonaws.com/unglue.it/Gutenberg%20ID/ABOOK/filename",
            None,
            "us-east-3",
        ),
        (
            "with_scaled_size",
            {
                S3UploaderConfiguration.BOOK_COVERS_BUCKET_KEY: "thecovers"
            },
            DataSource.UNGLUE_IT,
            "ABOOK",
            "filename",
            "https://thecovers.s3.amazonaws.com/scaled/601/unglue.it/Gutenberg%20ID/ABOOK/filename",
            601,
        ),
        (
            "with_scaled_size_and_custom_region",
            {
                S3UploaderConfiguration.BOOK_COVERS_BUCKET_KEY: "thecovers"
            },
            DataSource.UNGLUE_IT,
            "ABOOK",
            "filename",
            "https://thecovers.s3.us-east-3.amazonaws.com/scaled/601/unglue.it/Gutenberg%20ID/ABOOK/filename",
            601,
            "us-east-3",
        ),
    ])
    def test_cover_image_url(
        self,
        name,
        buckets,
        data_source_name,
        identifier,
        filename,
        expected_result,
        scaled_size=None,
        region=None,
    ):
        # identifier = self._identifier(foreign_id="ABOOK")
        # buckets = {S3Uploader.BOOK_COVERS_BUCKET_KEY : 'thecovers'}
        # uploader = self._uploader(**buckets)
        # m = uploader.cover_image_url
        #
        # unglueit = DataSource.lookup(self._db, DataSource.UNGLUE_IT)
        # identifier = self._identifier(foreign_id="ABOOK")
        # eq_('https://s3.amazonaws.com/thecovers/scaled/601/unglue.it/Gutenberg+ID/ABOOK/filename',
        #     m(unglueit, identifier, "filename", scaled_size=601))

        # Arrange
        data_source = DataSource.lookup(self._db, data_source_name)
        identifier = self._identifier(foreign_id=identifier)
        uploader = self._create_s3_uploader(region=region, **buckets)

        # Act
        result = uploader.cover_image_url(data_source,
                                          identifier,
                                          filename,
                                          scaled_size=scaled_size)

        # Assert
        assert result == expected_result

    @parameterized.expand([
        (
            "with_s3_bucket_and_end_time",
            "marc",
            "SHORT",
            "Lane",
            datetime_utc(2020, 1, 1, 0, 0, 0),
            "https://marc.s3.amazonaws.com/SHORT/2020-01-01%2000%3A00%3A00%2B00%3A00/Lane.mrc",
        ),
        (
            "with_s3_bucket_and_end_time_and_start_time",
            "marc",
            "SHORT",
            "Lane",
            datetime_utc(2020, 1, 2, 0, 0, 0),
            "https://marc.s3.amazonaws.com/SHORT/2020-01-01%2000%3A00%3A00%2B00%3A00-2020-01-02%2000%3A00%3A00%2B00%3A00/Lane.mrc",
            datetime_utc(2020, 1, 1, 0, 0, 0),
        ),
        (
            "with_s3_bucket_and_end_time_and_start_time_and_custom_region",
            "marc",
            "SHORT",
            "Lane",
            datetime_utc(2020, 1, 2, 0, 0, 0),
            "https://marc.s3.us-east-2.amazonaws.com/SHORT/2020-01-01%2000%3A00%3A00%2B00%3A00-2020-01-02%2000%3A00%3A00%2B00%3A00/Lane.mrc",
            datetime_utc(2020, 1, 1, 0, 0, 0),
            "us-east-2",
        ),
        (
            "with_http_bucket_and_end_time_and_start_time",
            "http://marc",
            "SHORT",
            "Lane",
            datetime_utc(2020, 1, 2, 0, 0, 0),
            "http://marc/SHORT/2020-01-01%2000%3A00%3A00%2B00%3A00-2020-01-02%2000%3A00%3A00%2B00%3A00/Lane.mrc",
            datetime_utc(2020, 1, 1, 0, 0, 0),
        ),
        (
            "with_https_bucket_and_end_time_and_start_time",
            "https://marc",
            "SHORT",
            "Lane",
            datetime_utc(2020, 1, 2, 0, 0, 0),
            "https://marc/SHORT/2020-01-01%2000%3A00%3A00%2B00%3A00-2020-01-02%2000%3A00%3A00%2B00%3A00/Lane.mrc",
            datetime_utc(2020, 1, 1, 0, 0, 0),
        ),
    ])
    def test_marc_file_url(
        self,
        name,
        bucket,
        library_name,
        lane_name,
        end_time,
        expected_result,
        start_time=None,
        region=None,
    ):
        # Arrange
        library = self._library(short_name=library_name)
        lane = self._lane(display_name=lane_name)
        buckets = {S3UploaderConfiguration.MARC_BUCKET_KEY: bucket}
        uploader = self._create_s3_uploader(region=region, **buckets)

        # Act
        result = uploader.marc_file_url(library, lane, end_time, start_time)

        # Assert
        assert result == expected_result

    @parameterized.expand([
        (
            "s3_path_style_request_without_region",
            "https://s3.amazonaws.com/bucket/directory/filename.jpg",
            ("bucket", "directory/filename.jpg"),
        ),
        (
            "s3_path_style_request_with_region",
            "https://s3.us-east-2.amazonaws.com/bucket/directory/filename.jpg",
            ("bucket", "directory/filename.jpg"),
        ),
        (
            "s3_virtual_hosted_style_request_with_global_endpoint",
            "https://bucket.s3.amazonaws.com/directory/filename.jpg",
            ("bucket", "directory/filename.jpg"),
        ),
        (
            "s3_virtual_hosted_style_request_with_dashed_region",
            "https://bucket.s3-us-east-2.amazonaws.com/directory/filename.jpg",
            ("bucket", "directory/filename.jpg"),
        ),
        (
            "s3_virtual_hosted_style_request_with_dotted_region",
            "https://bucket.s3.us-east-2.amazonaws.com/directory/filename.jpg",
            ("bucket", "directory/filename.jpg"),
        ),
        (
            "http_url",
            "http://book-covers.nypl.org/directory/filename.jpg",
            ("book-covers.nypl.org", "directory/filename.jpg"),
        ),
        (
            "https_url",
            "https://book-covers.nypl.org/directory/filename.jpg",
            ("book-covers.nypl.org", "directory/filename.jpg"),
        ),
        (
            "http_url_with_escaped_symbols",
            "http://book-covers.nypl.org/directory/filename+with+spaces%21.jpg",
            ("book-covers.nypl.org", "directory/filename with spaces!.jpg"),
        ),
        (
            "http_url_with_escaped_symbols_but_unquote_set_to_false",
            "http://book-covers.nypl.org/directory/filename+with+spaces%21.jpg",
            ("book-covers.nypl.org", "directory/filename+with+spaces%21.jpg"),
            False,
        ),
    ])
    def test_split_url(self, name, url, expected_result, unquote=True):
        # Arrange
        s3_uploader = self._create_s3_uploader()

        # Act
        result = s3_uploader.split_url(url, unquote)

        # Assert
        assert result == expected_result

    def test_mirror_one(self):
        edition, pool = self._edition(with_license_pool=True)
        original_cover_location = "http://example.com/a-cover.png"
        content = open(self.sample_cover_path("test-book-cover.png"),
                       "rb").read()
        cover, ignore = pool.add_link(
            Hyperlink.IMAGE,
            original_cover_location,
            edition.data_source,
            Representation.PNG_MEDIA_TYPE,
            content=content,
        )
        cover_rep = cover.resource.representation
        assert None == cover_rep.mirrored_at

        original_epub_location = "https://books.com/a-book.epub"
        epub, ignore = pool.add_link(
            Hyperlink.OPEN_ACCESS_DOWNLOAD,
            original_epub_location,
            edition.data_source,
            Representation.EPUB_MEDIA_TYPE,
            content="i'm an epub",
        )
        epub_rep = epub.resource.representation
        assert None == epub_rep.mirrored_at

        s3 = self._create_s3_uploader(client_class=MockS3Client)

        # Mock final_mirror_url so we can verify that it's called with
        # the right arguments
        def mock_final_mirror_url(bucket, key):
            return "final_mirror_url was called with bucket %s, key %s" % (
                bucket, key)

        s3.final_mirror_url = mock_final_mirror_url

        book_url = "http://books-go/here.epub"
        cover_url = "http://s3.amazonaws.com/covers-go/here.png"
        s3.mirror_one(cover.resource.representation, cover_url)
        s3.mirror_one(epub.resource.representation, book_url)
        [
            [data1, bucket1, key1, args1, ignore1],
            [data2, bucket2, key2, args2, ignore2],
        ] = s3.client.uploads

        # Both representations have had .mirror_url set and been
        # mirrored to those URLs.
        assert data1.startswith(b"\x89")
        assert "covers-go" == bucket1
        assert "here.png" == key1
        assert Representation.PNG_MEDIA_TYPE == args1["ContentType"]
        assert (utc_now() - cover_rep.mirrored_at).seconds < 10

        assert b"i'm an epub" == data2
        assert "books-go" == bucket2
        assert "here.epub" == key2
        assert Representation.EPUB_MEDIA_TYPE == args2["ContentType"]

        # In both cases, mirror_url was set to the result of final_mirror_url.
        assert (
            "final_mirror_url was called with bucket books-go, key here.epub"
            == epub_rep.mirror_url)
        assert (
            "final_mirror_url was called with bucket covers-go, key here.png"
            == cover_rep.mirror_url)

        # mirrored-at was set when the representation was 'mirrored'
        for rep in epub_rep, cover_rep:
            assert (utc_now() - rep.mirrored_at).seconds < 10

    def test_mirror_failure(self):
        edition, pool = self._edition(with_license_pool=True)
        original_epub_location = "https://books.com/a-book.epub"
        epub, ignore = pool.add_link(
            Hyperlink.OPEN_ACCESS_DOWNLOAD,
            original_epub_location,
            edition.data_source,
            Representation.EPUB_MEDIA_TYPE,
            content="i'm an epub",
        )
        epub_rep = epub.resource.representation

        uploader = self._create_s3_uploader(MockS3Client)

        # A network failure is treated as a transient error.
        uploader.client.fail_with = BotoCoreError()
        uploader.mirror_one(epub_rep, self._url)
        assert None == epub_rep.mirrored_at
        assert None == epub_rep.mirror_exception

        # An S3 credential failure is treated as a transient error.
        response = dict(Error=dict(
            Code=401,
            Message="Bad credentials",
        ))
        uploader.client.fail_with = ClientError(response, "SomeOperation")
        uploader.mirror_one(epub_rep, self._url)
        assert None == epub_rep.mirrored_at
        assert None == epub_rep.mirror_exception

        # Because the file was not successfully uploaded,
        # final_mirror_url was never called and mirror_url is
        # was not set.
        assert None == epub_rep.mirror_url

        # A bug in the code is not treated as a transient error --
        # the exception propagates through.
        uploader.client.fail_with = Exception("crash!")
        pytest.raises(Exception, uploader.mirror_one, epub_rep, self._url)

    def test_svg_mirroring(self):
        edition, pool = self._edition(with_license_pool=True)
        original = self._url

        # Create an SVG cover for the book.
        svg = """<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">

<svg xmlns="http://www.w3.org/2000/svg" width="100" height="50">
    <ellipse cx="50" cy="25" rx="50" ry="25" style="fill:blue;"/>
</svg>"""
        hyperlink, ignore = pool.add_link(
            Hyperlink.IMAGE,
            original,
            edition.data_source,
            Representation.SVG_MEDIA_TYPE,
            content=svg,
        )

        # 'Upload' it to S3.
        s3 = self._create_s3_uploader(MockS3Client)
        s3.mirror_one(hyperlink.resource.representation, self._url)
        [[data, bucket, key, args, ignore]] = s3.client.uploads

        assert Representation.SVG_MEDIA_TYPE == args["ContentType"]
        assert b"svg" in data
        assert b"PNG" not in data

    def test_multipart_upload(self):
        class MockMultipartS3Upload(MultipartS3Upload):
            completed = None
            aborted = None

            def __init__(self, uploader, representation, mirror_to):
                self.parts = []
                MockMultipartS3Upload.completed = False
                MockMultipartS3Upload.aborted = False

            def upload_part(self, content):
                self.parts.append(content)

            def complete(self):
                MockMultipartS3Upload.completed = True

            def abort(self):
                MockMultipartS3Upload.aborted = True

        rep, ignore = create(
            self._db,
            Representation,
            url="http://books.mrc",
            media_type=Representation.MARC_MEDIA_TYPE,
        )

        s3 = self._create_s3_uploader(MockS3Client)

        # Successful upload
        with s3.multipart_upload(rep,
                                 rep.url,
                                 upload_class=MockMultipartS3Upload) as upload:
            assert [] == upload.parts
            assert False == upload.completed
            assert False == upload.aborted

            upload.upload_part("Part 1")
            upload.upload_part("Part 2")

            assert ["Part 1", "Part 2"] == upload.parts

        assert True == MockMultipartS3Upload.completed
        assert False == MockMultipartS3Upload.aborted
        assert None == rep.mirror_exception

        class FailingMultipartS3Upload(MockMultipartS3Upload):
            def upload_part(self, content):
                raise Exception("Error!")

        # Failed during upload
        with s3.multipart_upload(
                rep, rep.url, upload_class=FailingMultipartS3Upload) as upload:
            upload.upload_part("Part 1")

        assert False == MockMultipartS3Upload.completed
        assert True == MockMultipartS3Upload.aborted
        assert "Error!" == rep.mirror_exception

        class AnotherFailingMultipartS3Upload(MockMultipartS3Upload):
            def complete(self):
                raise Exception("Error!")

        rep.mirror_exception = None
        # Failed during completion
        with s3.multipart_upload(
                rep, rep.url,
                upload_class=AnotherFailingMultipartS3Upload) as upload:
            upload.upload_part("Part 1")

        assert False == MockMultipartS3Upload.completed
        assert True == MockMultipartS3Upload.aborted
        assert "Error!" == rep.mirror_exception

    @parameterized.expand([
        (
            "default_expiration_parameter",
            None,
            int(S3UploaderConfiguration.S3_DEFAULT_PRESIGNED_URL_EXPIRATION),
        ),
        (
            "empty_expiration_parameter",
            {
                S3UploaderConfiguration.S3_PRESIGNED_URL_EXPIRATION: 100
            },
            100,
        ),
    ])
    def test_sign_url(self, name, expiration_settings, expected_expiration):
        # Arrange
        region = "us-east-1"
        bucket = "bucket"
        filename = "filename"
        url = "https://{0}.s3.{1}.amazonaws.com/{2}".format(
            bucket, region, filename)
        expected_url = url + "?AWSAccessKeyId=KEY&Expires=1&Signature=S"
        settings = expiration_settings if expiration_settings else {}
        s3_uploader = self._create_s3_uploader(region=region, **settings)
        s3_uploader.split_url = MagicMock(return_value=(bucket, filename))
        s3_uploader.client.generate_presigned_url = MagicMock(
            return_value=expected_url)

        # Act
        result = s3_uploader.sign_url(url)

        # Assert
        assert result == expected_url
        s3_uploader.split_url.assert_called_once_with(url)
        s3_uploader.client.generate_presigned_url.assert_called_once_with(
            "get_object",
            ExpiresIn=expected_expiration,
            Params={
                "Bucket": bucket,
                "Key": filename
            },
        )
Exemple #30
0
    def test_log(self):
        # Basic test of CirculationEvent.log.

        pool = self._licensepool(edition=None)
        library = self._default_library
        event_name = CirculationEvent.DISTRIBUTOR_CHECKOUT
        old_value = 10
        new_value = 8
        start = datetime_utc(2019, 1, 1)
        end = datetime_utc(2019, 1, 2)
        location = "Westgate Branch"

        m = CirculationEvent.log
        event, is_new = m(
            self._db,
            license_pool=pool,
            event_name=event_name,
            library=library,
            old_value=old_value,
            new_value=new_value,
            start=start,
            end=end,
            location=location,
        )
        assert True == is_new
        assert pool == event.license_pool
        assert library == event.library
        assert -2 == event.delta  # calculated from old_value and new_value
        assert start == event.start
        assert end == event.end
        assert location == event.location

        # If log finds another event with the same license pool,
        # library, event name, and start date, that event is returned
        # unchanged.
        event, is_new = m(
            self._db,
            license_pool=pool,
            event_name=event_name,
            library=library,
            start=start,
            # These values will be ignored.
            old_value=500,
            new_value=200,
            end=utc_now(),
            location="another location",
        )
        assert False == is_new
        assert pool == event.license_pool
        assert library == event.library
        assert -2 == event.delta
        assert start == event.start
        assert end == event.end
        assert location == event.location

        # If no timestamp is provided, the current time is used. This
        # is the most common case, so basically a new event will be
        # created each time you call log().
        event, is_new = m(
            self._db,
            license_pool=pool,
            event_name=event_name,
            library=library,
            old_value=old_value,
            new_value=new_value,
            end=end,
            location=location,
        )
        assert (utc_now() - event.start).total_seconds() < 2
        assert True == is_new
        assert pool == event.license_pool
        assert library == event.library
        assert -2 == event.delta
        assert end == event.end
        assert location == event.location