Ejemplo n.º 1
0
    def process_batch(self, identifiers):
        identifier_strings = self.api.create_identifier_strings(identifiers)
        response = self.api.availability(title_ids=identifier_strings)
        seen_identifiers = set()
        batch_results = []
        for metadata, availability in self.parser.process_all(
                response.content):
            identifier, is_new = metadata.primary_identifier.load(self._db)
            if not identifier in identifiers:
                # Enki told us about a book we didn't ask
                # for. This shouldn't happen, but if it does we should
                # do nothing further.
                continue
            seen_identifiers.add(identifier.identifier)
            result = self.set_metadata(identifier, metadata)
            if not isinstance(result, CoverageFailure):
                result = self.handle_success(identifier)
            batch_results.append(result)

        # Create a CoverageFailure object for each original identifier
        # not mentioned in the results.
        for identifier_string in identifier_strings:
            if identifier_string not in seen_identifiers:
                identifier, ignore = Identifier.for_foreign_id(
                    self._db, api.ENKI_ID, identifier_string)
                result = CoverageFailure(identifier,
                                         "Book not found in Enki",
                                         data_source=self.output_source,
                                         transient=True)
                batch_results.append(result)
        return batch_results
Ejemplo n.º 2
0
 def failure(self, identifier, error, transient=True):
     """Create a CoverageFailure object with an associated Collection"""
     return CoverageFailure(
         identifier,
         error,
         data_source=self.data_source,
         transient=transient,
         collection=self.collection,
     )
Ejemplo n.º 3
0
    def test_process_batch(self):
        provider = self._provider()

        # Here are an Edition and a LicensePool for the same identifier but
        # from different data sources. We would expect this to happen
        # when talking to the open-access content server.
        edition = self._edition(data_source_name=DataSource.OA_CONTENT_SERVER)
        identifier = edition.primary_identifier

        license_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        pool, is_new = LicensePool.for_foreign_id(self._db, license_source,
                                                  identifier.type,
                                                  identifier.identifier)
        eq_(None, pool.work)

        # Here's a second identifier that's doomed to failure.
        identifier = self._identifier()
        messages_by_id = {
            identifier.urn: CoverageFailure(identifier, "201: try again later")
        }

        provider.queue_import_results([edition], [pool], [], messages_by_id)

        fake_batch = [object()]
        success, failure = provider.process_batch(fake_batch)

        # The batch was provided to lookup_and_import_batch.
        eq_([fake_batch], provider.batches)

        # The Edition and LicensePool have been knitted together into
        # a Work.
        eq_(edition, pool.presentation_edition)
        assert pool.work != None

        # The license pool was finalized.
        eq_([pool], provider.finalized)

        # The failure stayed a CoverageFailure object.
        eq_(identifier, failure.obj)
        eq_(True, failure.transient)
Ejemplo n.º 4
0
    def process_batch(self, batch):
        """Perform a Simplified lookup and import the resulting OPDS feed."""
        imported_editions, pools, works, error_messages_by_id = self.lookup_and_import_batch(
            batch
        )

        results = []
        leftover_identifiers = set()
        # We grant coverage if an Edition was created from the operation.
        for edition in imported_editions:
            identifier = edition.primary_identifier
            results.append(identifier)
            leftover_identifiers.add(identifier)

        # We may also have created a LicensePool from the operation.
        for pool in pools:
            self.finalize_license_pool(pool)
            identifier = pool.identifier
            if identifier in leftover_identifiers:
                leftover_identifiers.remove(identifier)
            else:
                msg = "OPDS import operation imported LicensePool, but no Edition."
                results.append(
                    CoverageFailure(
                        identifier, msg, data_source=self.output_source,
                        transient=True
                    )
                )
        for identifier in leftover_identifiers:
            self.log.warn(
                "OPDS import operation imported Edition for %r, but no LicensePool.", 
                identifier
            )

        for failure in error_messages_by_id.values():
            results.append(failure)
        return results
Ejemplo n.º 5
0
    def test_process_batch(self):
        provider = self._provider()

        # Here are an Edition and a LicensePool for the same identifier but
        # from different data sources. We would expect this to happen
        # when talking to the open-access content server.
        edition = self._edition(data_source_name=DataSource.OA_CONTENT_SERVER)
        identifier = edition.primary_identifier

        license_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        pool, is_new = LicensePool.for_foreign_id(
            self._db,
            license_source,
            identifier.type,
            identifier.identifier,
            collection=self._default_collection,
        )
        assert None == pool.work

        # Here's a second Edition/LicensePool that's going to cause a
        # problem: the LicensePool will show up in the results, but
        # the corresponding Edition will not.
        edition2, pool2 = self._edition(with_license_pool=True)

        # Here's an identifier that can't be looked up at all,
        # and an identifier that shows up in messages_by_id because
        # its simplified:message was determined to indicate success
        # rather than failure.
        error_identifier = self._identifier()
        not_an_error_identifier = self._identifier()
        messages_by_id = {
            error_identifier.urn: CoverageFailure(
                error_identifier, "500: internal error"
            ),
            not_an_error_identifier.urn: not_an_error_identifier,
        }

        # When we call CoverageProvider.process_batch(), it's going to
        # return the information we just set up: a matched
        # Edition/LicensePool pair, a mismatched LicensePool, and an
        # error message.
        provider.queue_import_results([edition], [pool, pool2], [], messages_by_id)

        # Make the CoverageProvider do its thing.
        fake_batch = [object()]
        (
            success_import,
            failure_mismatched,
            failure_message,
            success_message,
        ) = provider.process_batch(fake_batch)

        # The fake batch was provided to lookup_and_import_batch.
        assert [fake_batch] == provider.batches

        # The matched Edition/LicensePool pair was returned.
        assert success_import == edition.primary_identifier

        # The LicensePool of that pair was passed into finalize_license_pool.
        # The mismatched LicensePool was not.
        assert [pool] == provider.finalized

        # The mismatched LicensePool turned into a CoverageFailure
        # object.
        assert isinstance(failure_mismatched, CoverageFailure)
        assert (
            "OPDS import operation imported LicensePool, but no Edition."
            == failure_mismatched.exception
        )
        assert pool2.identifier == failure_mismatched.obj
        assert True == failure_mismatched.transient

        # The OPDSMessage with status code 500 was returned as a
        # CoverageFailure object.
        assert isinstance(failure_message, CoverageFailure)
        assert "500: internal error" == failure_message.exception
        assert error_identifier == failure_message.obj
        assert True == failure_message.transient

        # The identifier that had a treat-as-success OPDSMessage was returned
        # as-is.
        assert not_an_error_identifier == success_message
Ejemplo n.º 6
0
    def test_process_batch(self):
        provider = self._provider()

        # Here are an Edition and a LicensePool for the same identifier but
        # from different data sources. We would expect this to happen
        # when talking to the open-access content server.
        edition = self._edition(data_source_name=DataSource.OA_CONTENT_SERVER)
        identifier = edition.primary_identifier

        license_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        pool, is_new = LicensePool.for_foreign_id(
            self._db,
            license_source,
            identifier.type,
            identifier.identifier,
            collection=self._default_collection)
        eq_(None, pool.work)

        # Here's a second Edition/LicensePool that's going to cause a
        # problem: the LicensePool will show up in the results, but
        # the corresponding Edition will not.
        edition2, pool2 = self._edition(with_license_pool=True)

        # Here's an identifier that can't be looked up at all.
        identifier = self._identifier()
        messages_by_id = {
            identifier.urn: CoverageFailure(identifier, "201: try again later")
        }

        # When we call CoverageProvider.process_batch(), it's going to
        # return the information we just set up: a matched
        # Edition/LicensePool pair, a mismatched LicensePool, and an
        # error message.
        provider.queue_import_results([edition], [pool, pool2], [],
                                      messages_by_id)

        # Make the CoverageProvider do its thing.
        fake_batch = [object()]
        success, failure1, failure2 = provider.process_batch(fake_batch)

        # The fake batch was provided to lookup_and_import_batch.
        eq_([fake_batch], provider.batches)

        # The matched Edition/LicensePool pair was returned.
        eq_(success, edition.primary_identifier)

        # The LicensePool of that pair was passed into finalize_license_pool.
        # The mismatched LicensePool was not.
        eq_([pool], provider.finalized)

        # The mismatched LicensePool turned into a CoverageFailure
        # object.
        assert isinstance(failure1, CoverageFailure)
        eq_('OPDS import operation imported LicensePool, but no Edition.',
            failure1.exception)
        eq_(pool2.identifier, failure1.obj)
        eq_(True, failure1.transient)

        # The failure was returned as a CoverageFailure object.
        assert isinstance(failure2, CoverageFailure)
        eq_(identifier, failure2.obj)
        eq_(True, failure2.transient)