def process_batch(self, identifiers): identifier_strings = self.api.create_identifier_strings(identifiers) response = self.api.availability(title_ids=identifier_strings) seen_identifiers = set() batch_results = [] for metadata, availability in self.parser.process_all( response.content): identifier, is_new = metadata.primary_identifier.load(self._db) if not identifier in identifiers: # Enki told us about a book we didn't ask # for. This shouldn't happen, but if it does we should # do nothing further. continue seen_identifiers.add(identifier.identifier) result = self.set_metadata(identifier, metadata) if not isinstance(result, CoverageFailure): result = self.handle_success(identifier) batch_results.append(result) # Create a CoverageFailure object for each original identifier # not mentioned in the results. for identifier_string in identifier_strings: if identifier_string not in seen_identifiers: identifier, ignore = Identifier.for_foreign_id( self._db, api.ENKI_ID, identifier_string) result = CoverageFailure(identifier, "Book not found in Enki", data_source=self.output_source, transient=True) batch_results.append(result) return batch_results
def failure(self, identifier, error, transient=True): """Create a CoverageFailure object with an associated Collection""" return CoverageFailure( identifier, error, data_source=self.data_source, transient=transient, collection=self.collection, )
def test_process_batch(self): provider = self._provider() # Here are an Edition and a LicensePool for the same identifier but # from different data sources. We would expect this to happen # when talking to the open-access content server. edition = self._edition(data_source_name=DataSource.OA_CONTENT_SERVER) identifier = edition.primary_identifier license_source = DataSource.lookup(self._db, DataSource.GUTENBERG) pool, is_new = LicensePool.for_foreign_id(self._db, license_source, identifier.type, identifier.identifier) eq_(None, pool.work) # Here's a second identifier that's doomed to failure. identifier = self._identifier() messages_by_id = { identifier.urn: CoverageFailure(identifier, "201: try again later") } provider.queue_import_results([edition], [pool], [], messages_by_id) fake_batch = [object()] success, failure = provider.process_batch(fake_batch) # The batch was provided to lookup_and_import_batch. eq_([fake_batch], provider.batches) # The Edition and LicensePool have been knitted together into # a Work. eq_(edition, pool.presentation_edition) assert pool.work != None # The license pool was finalized. eq_([pool], provider.finalized) # The failure stayed a CoverageFailure object. eq_(identifier, failure.obj) eq_(True, failure.transient)
def process_batch(self, batch): """Perform a Simplified lookup and import the resulting OPDS feed.""" imported_editions, pools, works, error_messages_by_id = self.lookup_and_import_batch( batch ) results = [] leftover_identifiers = set() # We grant coverage if an Edition was created from the operation. for edition in imported_editions: identifier = edition.primary_identifier results.append(identifier) leftover_identifiers.add(identifier) # We may also have created a LicensePool from the operation. for pool in pools: self.finalize_license_pool(pool) identifier = pool.identifier if identifier in leftover_identifiers: leftover_identifiers.remove(identifier) else: msg = "OPDS import operation imported LicensePool, but no Edition." results.append( CoverageFailure( identifier, msg, data_source=self.output_source, transient=True ) ) for identifier in leftover_identifiers: self.log.warn( "OPDS import operation imported Edition for %r, but no LicensePool.", identifier ) for failure in error_messages_by_id.values(): results.append(failure) return results
def test_process_batch(self): provider = self._provider() # Here are an Edition and a LicensePool for the same identifier but # from different data sources. We would expect this to happen # when talking to the open-access content server. edition = self._edition(data_source_name=DataSource.OA_CONTENT_SERVER) identifier = edition.primary_identifier license_source = DataSource.lookup(self._db, DataSource.GUTENBERG) pool, is_new = LicensePool.for_foreign_id( self._db, license_source, identifier.type, identifier.identifier, collection=self._default_collection, ) assert None == pool.work # Here's a second Edition/LicensePool that's going to cause a # problem: the LicensePool will show up in the results, but # the corresponding Edition will not. edition2, pool2 = self._edition(with_license_pool=True) # Here's an identifier that can't be looked up at all, # and an identifier that shows up in messages_by_id because # its simplified:message was determined to indicate success # rather than failure. error_identifier = self._identifier() not_an_error_identifier = self._identifier() messages_by_id = { error_identifier.urn: CoverageFailure( error_identifier, "500: internal error" ), not_an_error_identifier.urn: not_an_error_identifier, } # When we call CoverageProvider.process_batch(), it's going to # return the information we just set up: a matched # Edition/LicensePool pair, a mismatched LicensePool, and an # error message. provider.queue_import_results([edition], [pool, pool2], [], messages_by_id) # Make the CoverageProvider do its thing. fake_batch = [object()] ( success_import, failure_mismatched, failure_message, success_message, ) = provider.process_batch(fake_batch) # The fake batch was provided to lookup_and_import_batch. assert [fake_batch] == provider.batches # The matched Edition/LicensePool pair was returned. assert success_import == edition.primary_identifier # The LicensePool of that pair was passed into finalize_license_pool. # The mismatched LicensePool was not. assert [pool] == provider.finalized # The mismatched LicensePool turned into a CoverageFailure # object. assert isinstance(failure_mismatched, CoverageFailure) assert ( "OPDS import operation imported LicensePool, but no Edition." == failure_mismatched.exception ) assert pool2.identifier == failure_mismatched.obj assert True == failure_mismatched.transient # The OPDSMessage with status code 500 was returned as a # CoverageFailure object. assert isinstance(failure_message, CoverageFailure) assert "500: internal error" == failure_message.exception assert error_identifier == failure_message.obj assert True == failure_message.transient # The identifier that had a treat-as-success OPDSMessage was returned # as-is. assert not_an_error_identifier == success_message
def test_process_batch(self): provider = self._provider() # Here are an Edition and a LicensePool for the same identifier but # from different data sources. We would expect this to happen # when talking to the open-access content server. edition = self._edition(data_source_name=DataSource.OA_CONTENT_SERVER) identifier = edition.primary_identifier license_source = DataSource.lookup(self._db, DataSource.GUTENBERG) pool, is_new = LicensePool.for_foreign_id( self._db, license_source, identifier.type, identifier.identifier, collection=self._default_collection) eq_(None, pool.work) # Here's a second Edition/LicensePool that's going to cause a # problem: the LicensePool will show up in the results, but # the corresponding Edition will not. edition2, pool2 = self._edition(with_license_pool=True) # Here's an identifier that can't be looked up at all. identifier = self._identifier() messages_by_id = { identifier.urn: CoverageFailure(identifier, "201: try again later") } # When we call CoverageProvider.process_batch(), it's going to # return the information we just set up: a matched # Edition/LicensePool pair, a mismatched LicensePool, and an # error message. provider.queue_import_results([edition], [pool, pool2], [], messages_by_id) # Make the CoverageProvider do its thing. fake_batch = [object()] success, failure1, failure2 = provider.process_batch(fake_batch) # The fake batch was provided to lookup_and_import_batch. eq_([fake_batch], provider.batches) # The matched Edition/LicensePool pair was returned. eq_(success, edition.primary_identifier) # The LicensePool of that pair was passed into finalize_license_pool. # The mismatched LicensePool was not. eq_([pool], provider.finalized) # The mismatched LicensePool turned into a CoverageFailure # object. assert isinstance(failure1, CoverageFailure) eq_('OPDS import operation imported LicensePool, but no Edition.', failure1.exception) eq_(pool2.identifier, failure1.obj) eq_(True, failure1.transient) # The failure was returned as a CoverageFailure object. assert isinstance(failure2, CoverageFailure) eq_(identifier, failure2.obj) eq_(True, failure2.transient)