def test_ensure_coverage_changes_status(self): """Verify that processing an item that has a preexisting CoverageRecord can change the status of that CoverageRecord. """ always = AlwaysSuccessfulCoverageProvider("Always successful", self.input_identifier_types, self.output_source) persistent = NeverSuccessfulCoverageProvider( "Persistent failures", self.input_identifier_types, self.output_source) transient = TransientFailureCoverageProvider( "Persistent failures", self.input_identifier_types, self.output_source) # Cover the same identifier multiple times, simulating all # possible states of a CoverageRecord. The same CoverageRecord # is used every time and the status is changed appropriately # after every run. c1 = persistent.ensure_coverage(self.identifier, force=True) eq_(CoverageRecord.PERSISTENT_FAILURE, c1.status) c2 = transient.ensure_coverage(self.identifier, force=True) eq_(c2, c1) eq_(CoverageRecord.TRANSIENT_FAILURE, c1.status) c3 = always.ensure_coverage(self.identifier, force=True) eq_(c3, c1) eq_(CoverageRecord.SUCCESS, c1.status) c4 = persistent.ensure_coverage(self.identifier, force=True) eq_(c4, c1) eq_(CoverageRecord.PERSISTENT_FAILURE, c1.status)
def test_items_that_need_coverage(self): cutoff_time = datetime.datetime(2016, 1, 1) record = CoverageRecord.add_for(self.edition, self.output_source, timestamp=cutoff_time) provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source, cutoff_time=cutoff_time) eq_([], provider.items_that_need_coverage().all()) one_second_after = cutoff_time + datetime.timedelta(seconds=1) provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source, cutoff_time=one_second_after) eq_([self.identifier], provider.items_that_need_coverage().all()) provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source) eq_([], provider.items_that_need_coverage().all())
def test_run_once_and_update_timestamp(self): """Test run_once_and_update_timestamp. It should cover items that have no CoverageRecord at all, then items whose previous coverage attempt resulted in a transient failure. This doubles as a test of AlwaysSuccessfulCoverageProvider's ability to always create a CoverageRecord. """ # We start with no Timestamp. eq_([], self._db.query(CoverageRecord).all()) # We previously tried to coverage the identifier we normally # use in these tests, but got a transient failure. self._coverage_record(self.identifier, self.output_source, status=CoverageRecord.TRANSIENT_FAILURE) # Here's a new identifier that has no coverage at all. no_coverage = self._identifier() # And here's an identifier that has a persistent failure. persistent_failure = self._identifier() self._coverage_record(persistent_failure, self.output_source, status=CoverageRecord.PERSISTENT_FAILURE) eq_([], self._db.query(Timestamp).all()) provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source) provider.run() # The timestamp is now set. [timestamp] = self._db.query(Timestamp).all() eq_("Always successful", timestamp.service) # The identifier with no coverage and the identifier with a # transient failure now have coverage records that indicate # success. [transient_failure_has_gone] = self.identifier.coverage_records eq_(CoverageRecord.SUCCESS, transient_failure_has_gone.status) [now_has_coverage] = no_coverage.coverage_records eq_(CoverageRecord.SUCCESS, now_has_coverage.status) # The identifier that had the transient failure was processed # second, even though it was created first in the # database. That's because we do the work in two passes: first # we process identifiers where coverage has never been # attempted, then we process identifiers with transient failures. eq_([no_coverage, self.identifier], provider.attempts)
def test_irrelevant_provider_is_not_called(self): gutenberg_monitor = AlwaysSuccessfulCoverageProvider( "Gutenberg monitor", self.gutenberg, self.oclc) oclc_monitor = NeverSuccessfulCoverageProvider("OCLC monitor", self.oclc, self.overdrive) monitor = PresentationReadyMonitor(self._db, [gutenberg_monitor, oclc_monitor]) result = monitor.prepare(self.work) # There were no failures. eq_([], result) # The monitor that takes Gutenberg identifiers as input ran. eq_([self.work.presentation_edition.primary_identifier], gutenberg_monitor.attempts) # The monitor that takes OCLC editions as input did not. # (If it had, it would have failed.) eq_([], oclc_monitor.attempts) # The work has not been set to presentation ready--that's # handled elsewhere. eq_(False, self.work.presentation_ready)
def test_make_batch_presentation_ready_sets_presentation_ready_on_success( self): success = AlwaysSuccessfulCoverageProvider("Provider 1", self.gutenberg, self.oclc) monitor = PresentationReadyMonitor(self._db, [success]) monitor.process_batch([self.work]) eq_(None, self.work.presentation_ready_exception) eq_(True, self.work.presentation_ready)
def test_ensure_coverage_respects_operation(self): # Two providers with the same output source but different operations. provider1 = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source, operation="foo") provider2 = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source, operation="bar") # Ensure coverage of both providers. coverage1 = provider1.ensure_coverage(self.edition) eq_("foo", coverage1.operation) old_timestamp = coverage1.timestamp coverage2 = provider2.ensure_coverage(self.edition) eq_("bar", coverage2.operation) # There are now two CoverageRecords, one for each operation. eq_(set([coverage1, coverage2]), set(self._db.query(CoverageRecord))) # If we try to ensure coverage again, no work is done and we # get the old coverage record back. new_coverage = provider1.ensure_coverage(self.edition) eq_(new_coverage, coverage1) new_coverage.timestamp = old_timestamp
def test_prepare_returns_failing_providers(self): success = AlwaysSuccessfulCoverageProvider("Monitor 1", self.gutenberg, self.oclc) failure = NeverSuccessfulCoverageProvider("Monitor 2", self.gutenberg, self.overdrive) monitor = PresentationReadyMonitor(self._db, [success, failure]) result = monitor.prepare(self.work) eq_([failure], result)
def test_ensure_coverage(self): provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source) record = provider.ensure_coverage(self.edition) assert isinstance(record, CoverageRecord) eq_(self.identifier, record.identifier) eq_(self.output_source, record.data_source) eq_(None, record.exception) # There is now one CoverageRecord -- the one returned by # ensure_coverage(). [record2] = self._db.query(CoverageRecord).all() eq_(record2, record) # The coverage provider's timestamp was not updated, because # we're using ensure_coverage. eq_([], self._db.query(Timestamp).all())
def test_make_batch_presentation_ready_sets_exception_on_failure(self): success = AlwaysSuccessfulCoverageProvider("Provider 1", self.gutenberg, self.oclc) failure = NeverSuccessfulCoverageProvider("Provider 2", self.gutenberg, self.overdrive) monitor = PresentationReadyMonitor(self._db, [success, failure]) monitor.process_batch([self.work]) eq_(False, self.work.presentation_ready) eq_("Provider(s) failed: Provider 2", self.work.presentation_ready_exception)
def test_run_on_specific_identifiers(self): provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source) provider.workset_size = 3 to_be_tested = [self._identifier() for i in range(6)] not_to_be_tested = [self._identifier() for i in range(6)] counts, records = provider.run_on_specific_identifiers(to_be_tested) # Six identifiers were covered in two batches. eq_((6, 0, 0), counts) eq_(6, len(records)) # Only the identifiers in to_be_tested were covered. assert all(isinstance(x, CoverageRecord) for x in records) eq_(set(to_be_tested), set([x.identifier for x in records])) for i in to_be_tested: assert i in provider.attempts for i in not_to_be_tested: assert i not in provider.attempts
def test_should_update(self): cutoff = datetime.datetime(2016, 1, 1) provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source, cutoff_time=cutoff) # If coverage is missing, we should update. eq_(True, provider.should_update(None)) # If coverage is outdated, we should update. record, ignore = CoverageRecord.add_for(self.identifier, self.output_source) record.timestamp = datetime.datetime(2015, 1, 1) eq_(True, provider.should_update(record)) # If coverage is up-to-date, we should not update. record.timestamp = cutoff eq_(False, provider.should_update(record))
def test_items_that_need_coverage_respects_operation(self): record1 = CoverageRecord.add_for(self.identifier, self.output_source) # Here's a provider that carries out the 'foo' operation. provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source, operation='foo') # It is missing coverage for self.identifier, because the # CoverageRecord we created at the start of this test has no # operation. eq_([self.identifier], provider.items_that_need_coverage().all()) # Here's a provider that has no operation set. provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source) # It is not missing coverage for self.identifier, because the # CoverageRecord we created at the start of the test takes # care of it. eq_([], provider.items_that_need_coverage().all())
def test_operation_included_in_records(self): provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source, operation=CoverageRecord.SYNC_OPERATION) result = provider.ensure_coverage(self.edition) # The provider's operation is added to the record on success [record] = self._db.query(CoverageRecord).all() eq_(record.operation, CoverageRecord.SYNC_OPERATION) self._db.delete(record) provider = NeverSuccessfulCoverageProvider( "Never successful", self.input_identifier_types, self.output_source, operation=CoverageRecord.REAP_OPERATION) result = provider.ensure_coverage(self.edition) # The provider's operation is added to the record on failure [record] = self._db.query(CoverageRecord).all() eq_(record.operation, CoverageRecord.REAP_OPERATION)
def test_no_input_identifier_types(self): # It's okay to pass in None to the constructor--it means you # are looking for all identifier types. provider = AlwaysSuccessfulCoverageProvider("Always successful", None, self.output_source) eq_(None, provider.input_identifier_types)
def test_process_batch_and_handle_results(self): e1, p1 = self._edition(with_license_pool=True) i1 = e1.primary_identifier e2, p2 = self._edition(with_license_pool=True) i2 = e2.primary_identifier success_provider = AlwaysSuccessfulCoverageProvider( "Success", self.input_identifier_types, self.output_source, operation="i succeed") batch = [i1, i2] counts, successes = success_provider.process_batch_and_handle_results( batch) # Two successes. eq_((2, 0, 0), counts) # Each represented with a CoverageRecord with status='success' assert all(isinstance(x, CoverageRecord) for x in successes) eq_([CoverageRecord.SUCCESS] * 2, [x.status for x in successes]) # Each associated with one of the identifiers... eq_(set([i1, i2]), set([x.identifier for x in successes])) # ...and with the coverage provider's operation. eq_(['i succeed'] * 2, [x.operation for x in successes]) # Now try a different CoverageProvider which creates transient # failures. transient_failure_provider = TransientFailureCoverageProvider( "Transient failure", self.input_identifier_types, self.output_source, operation="i fail transiently") counts, failures = transient_failure_provider.process_batch_and_handle_results( batch) # Two transient failures. eq_((0, 2, 0), counts) # New coverage records were added to track the transient # failures. eq_([CoverageRecord.TRANSIENT_FAILURE] * 2, [x.status for x in failures]) eq_(["i fail transiently"] * 2, [x.operation for x in failures]) # Another way of getting transient failures is to just ignore every # item you're told to process. task_ignoring_provider = TaskIgnoringCoverageProvider( "Ignores all tasks", self.input_identifier_types, self.output_source, operation="i ignore") counts, records = task_ignoring_provider.process_batch_and_handle_results( batch) eq_((0, 2, 0), counts) eq_([CoverageRecord.TRANSIENT_FAILURE] * 2, [x.status for x in records]) eq_(["i ignore"] * 2, [x.operation for x in records]) # Or you can go really bad and have persistent failures. persistent_failure_provider = NeverSuccessfulCoverageProvider( "Persistent failure", self.input_identifier_types, self.output_source, operation="i will always fail") counts, results = persistent_failure_provider.process_batch_and_handle_results( batch) # Two persistent failures. eq_((0, 0, 2), counts) assert all([isinstance(x, CoverageRecord) for x in results]) eq_(["What did you expect?", "What did you expect?"], [x.exception for x in results]) eq_([CoverageRecord.PERSISTENT_FAILURE] * 2, [x.status for x in results]) eq_(["i will always fail"] * 2, [x.operation for x in results])