Ejemplo n.º 1
0
    def test_fetch_catalog(self):
        # Test our ability to retrieve essential information from a
        # remote registry's root catalog.
        class Mock(RemoteRegistry):
            def _extract_catalog_information(self, response):
                self.extracted_from = response
                return "Essential information"

        # The behavior of fetch_catalog() depends on what comes back
        # when we ask the remote registry for its root catalog.
        client = DummyHTTPClient()

        # If the result is a problem detail document, that document is
        # the return value of fetch_catalog().
        problem = REMOTE_INTEGRATION_FAILED.detailed("oops")
        client.responses.append(problem)
        registry = Mock(self.integration)
        result = registry.fetch_catalog(do_get=client.do_get)
        assert self.integration.url == client.requests.pop()
        assert problem == result

        # If the response looks good, it's passed into
        # _extract_catalog_information(), and the result of _that_
        # method is the return value of fetch_catalog.
        client.queue_requests_response(200, content="A root catalog")
        [queued] = client.responses
        assert "Essential information" == registry.fetch_catalog(
            "custom catalog URL", do_get=client.do_get)
        assert "custom catalog URL" == client.requests.pop()
Ejemplo n.º 2
0
    def test_discovery_service_library_registrations_get(self):
        # Here's a discovery service.
        discovery_service, ignore = create(
            self._db,
            ExternalIntegration,
            protocol=ExternalIntegration.OPDS_REGISTRATION,
            goal=ExternalIntegration.DISCOVERY_GOAL,
        )

        # We'll be making a mock request to this URL later.
        discovery_service.setting(
            ExternalIntegration.URL).value = "http://service-url/"

        # We successfully registered this library with the service.
        succeeded, ignore = create(
            self._db,
            Library,
            name="Library 1",
            short_name="L1",
        )
        config = ConfigurationSetting.for_library_and_externalintegration
        config(self._db, "library-registration-status", succeeded,
               discovery_service).value = "success"

        # We tried to register this library with the service but were
        # unsuccessful.
        config(self._db, "library-registration-stage", succeeded,
               discovery_service).value = "production"
        failed, ignore = create(
            self._db,
            Library,
            name="Library 2",
            short_name="L2",
        )
        config(
            self._db,
            "library-registration-status",
            failed,
            discovery_service,
        ).value = "failure"
        config(
            self._db,
            "library-registration-stage",
            failed,
            discovery_service,
        ).value = "testing"

        # We've never tried to register this library with the service.
        unregistered, ignore = create(
            self._db,
            Library,
            name="Library 3",
            short_name="L3",
        )
        discovery_service.libraries = [succeeded, failed]

        # When a client sends a GET request to the controller, the
        # controller is going to call
        # RemoteRegistry.fetch_registration_document() to try and find
        # the discovery services' terms of service. That's going to
        # make one or two HTTP requests.

        # First, let's try the scenario where the discovery serivce is
        # working and has a terms-of-service.
        client = DummyHTTPClient()

        # In this case we'll make two requests. The first request will
        # ask for the root catalog, where we'll look for a
        # registration link.
        root_catalog = dict(
            links=[dict(href="http://register-here/", rel="register")])
        client.queue_requests_response(200,
                                       RemoteRegistry.OPDS_2_TYPE,
                                       content=json.dumps(root_catalog))

        # The second request will fetch that registration link -- then
        # we'll look for TOS data inside.
        registration_document = dict(links=[
            dict(rel="terms-of-service", type="text/html", href="http://tos/"),
            dict(
                rel="terms-of-service",
                type="text/html",
                href=
                "data:text/html;charset=utf-8;base64,PHA+SG93IGFib3V0IHRoYXQgVE9TPC9wPg==",
            ),
        ])
        client.queue_requests_response(
            200,
            RemoteRegistry.OPDS_2_TYPE,
            content=json.dumps(registration_document))

        controller = (self.manager.
                      admin_discovery_service_library_registrations_controller)
        m = controller.process_discovery_service_library_registrations
        with self.request_context_with_admin("/", method="GET"):
            response = m(do_get=client.do_get)
            # The document we get back from the controller is a
            # dictionary with useful information on all known
            # discovery integrations -- just one, in this case.
            [service] = response["library_registrations"]
            assert discovery_service.id == service["id"]

            # The two mock HTTP requests we predicted actually
            # happened.  The target of the first request is the URL to
            # the discovery service's main catalog. The second request
            # is to the "register" link found in that catalog.
            assert ["http://service-url/",
                    "http://register-here/"] == client.requests

            # The TOS link and TOS HTML snippet were recovered from
            # the registration document served in response to the
            # second HTTP request, and included in the dictionary.
            assert "http://tos/" == service["terms_of_service_link"]
            assert "<p>How about that TOS</p>" == service[
                "terms_of_service_html"]
            assert None == service["access_problem"]

            # The dictionary includes a 'libraries' object, a list of
            # dictionaries with information about the relationships
            # between this discovery integration and every library
            # that's tried to register with it.
            info1, info2 = service["libraries"]

            # Here's the library that successfully registered.
            assert info1 == dict(short_name=succeeded.short_name,
                                 status="success",
                                 stage="production")

            # And here's the library that tried to register but
            # failed.
            assert info2 == dict(short_name=failed.short_name,
                                 status="failure",
                                 stage="testing")

            # Note that `unregistered`, the library that never tried
            # to register with this discover service, is not included.

            # Now let's try the controller method again, except this
            # time the discovery service's web server is down. The
            # first request will return a ProblemDetail document, and
            # there will be no second request.
            client.requests = []
            client.queue_requests_response(
                502,
                content=REMOTE_INTEGRATION_FAILED,
            )
            response = m(do_get=client.do_get)

            # Everything looks good, except that there's no TOS data
            # available.
            [service] = response["library_registrations"]
            assert discovery_service.id == service["id"]
            assert 2 == len(service["libraries"])
            assert None == service["terms_of_service_link"]
            assert None == service["terms_of_service_html"]

            # The problem detail document that prevented the TOS data
            # from showing up has been converted to a dictionary and
            # included in the dictionary of information for this
            # discovery service.
            assert REMOTE_INTEGRATION_FAILED.uri == service["access_problem"][
                "type"]

            # When the user lacks the SYSTEM_ADMIN role, the
            # controller won't even start processing their GET
            # request.
            self.admin.remove_role(AdminRole.SYSTEM_ADMIN)
            self._db.flush()
            pytest.raises(AdminNotAuthorized, m)
Ejemplo n.º 3
0
    def test_fetch_registration_document(self):
        # Test our ability to retrieve terms-of-service information
        # from a remote registry, assuming the registry makes that
        # information available.

        # First, test the case where we can't even get the catalog
        # document.
        class Mock(RemoteRegistry):
            def fetch_catalog(self, do_get):
                self.fetch_catalog_called_with = do_get
                return REMOTE_INTEGRATION_FAILED

        registry = Mock(object())
        result = registry.fetch_registration_document()

        # Our mock fetch_catalog was called with a method that would
        # have made a real HTTP request.
        assert HTTP.debuggable_get == registry.fetch_catalog_called_with

        # But the fetch_catalog method returned a problem detail,
        # which became the return value of
        # fetch_registration_document.
        assert REMOTE_INTEGRATION_FAILED == result

        # Test the case where we get the catalog document but we can't
        # get the registration document.
        client = DummyHTTPClient()
        client.responses.append(REMOTE_INTEGRATION_FAILED)

        class Mock(RemoteRegistry):
            def fetch_catalog(self, do_get):
                return "http://register-here/", "vendor id"

            def _extract_registration_information(self, response):
                self._extract_registration_information_called_with = response
                return "TOS link", "TOS HTML data"

        registry = Mock(object())
        result = registry.fetch_registration_document(client.do_get)
        # A request was made to the registration URL mentioned in the catalog.
        assert "http://register-here/" == client.requests.pop()
        assert [] == client.requests

        # But the request returned a problem detail, which became the
        # return value of the method.
        assert REMOTE_INTEGRATION_FAILED == result

        # Finally, test the case where we can get both documents.

        client.queue_requests_response(200, content="a registration document")
        result = registry.fetch_registration_document(client.do_get)

        # Another request was made to the registration URL.
        assert "http://register-here/" == client.requests.pop()
        assert [] == client.requests

        # Our mock of _extract_registration_information was called
        # with the mock response to that request.
        response = registry._extract_registration_information_called_with
        assert b"a registration document" == response.content

        # The return value of _extract_registration_information was
        # propagated as the return value of
        # fetch_registration_document.
        assert ("TOS link", "TOS HTML data") == result
class TestContentCafeAPI(DatabaseTest):

    base_path = os.path.split(__file__)[0]
    resource_path = os.path.join(base_path, "files", "content_cafe")

    def data_file(self, path):
        """Return the contents of a test data file."""
        return open(os.path.join(self.resource_path, path)).read()

    def setup(self):
        super(TestContentCafeAPI, self).setup()
        self.http = DummyHTTPClient()
        self.soap = MockSOAPClient(popularity_value=5)
        self.api = ContentCafeAPI(
            self._db, 'uid', 'pw', self.soap, self.http.do_get
        )
        self.identifier = self._identifier(identifier_type=Identifier.ISBN)
        self.args = dict(userid=self.api.user_id, password=self.api.password,
                         isbn=self.identifier.identifier)

    def test_from_config(self):
        # Without an integration, an error is raised.
        assert_raises(
            CannotLoadConfiguration, ContentCafeAPI.from_config, self._db
        )

        # With incomplete integrations, an error is raised.
        integration = self._external_integration(
            ExternalIntegration.CONTENT_CAFE,
            goal=ExternalIntegration.METADATA_GOAL,
            username=u'yup'
        )
        assert_raises(
            CannotLoadConfiguration, ContentCafeAPI.from_config, self._db
        )

        integration.username = None
        integration.password = u'yurp'
        assert_raises(
            CannotLoadConfiguration, ContentCafeAPI.from_config, self._db
        )

        integration.username = u'yup'
        result = ContentCafeAPI.from_config(
            self._db, soap_client=object()
        )
        eq_(True, isinstance(result, ContentCafeAPI))

        # NOTE: We can't test the case where soap_client is not
        # mocked, because the ContentCafeSOAPClient constructor makes
        # a real HTTP request to load its WSDL file. We might be able
        # to improve this by seeing how mockable SudsClient is, or by
        # mocking ContentCafeAPISOAPClient.WSDL_URL as a file:// URL.

    def test_data_source(self):
        eq_(DataSource.CONTENT_CAFE, self.api.data_source.name)

    def test_create_metadata(self):

        class Mock(ContentCafeAPI):

            popularity_measurement = "a popularity measurement"
            annotate_calls = []

            def add_reviews(self, *args):
                self.add_reviews_called_with = args

            def add_descriptions(self, *args):
                self.add_descriptions_called_with = args

            def add_author_notes(self, *args):
                self.add_author_notes_called_with = args

            def add_excerpt(self, *args):
                self.add_excerpt_called_with = args

            def measure_popularity(self, *args):
                self.measure_popularity_called_with = args
                return self.popularity_measurement

            def is_suitable_image(self, image):
                self.is_suitable_image_called_with = image
                return True

        api = Mock(self._db, 'uid', 'pw', self.soap, self.http.do_get)
        m = api.create_metadata

        # First we will make a request for a cover image. If that
        # gives a 404 error, we return nothing and don't bother making
        # any more requests.
        self.http.queue_requests_response(404)
        eq_(None, m(self.identifier))
        request_url = self.http.requests.pop()
        image_url = api.image_url % self.args
        eq_(image_url, request_url)
        eq_([], self.http.requests)

        # If the cover image request succeeds, we turn it into a LinkData
        # and add it to a new Metadata object. We then pass the
        # Metadata object a number of other methods to get additional
        # information from Content Cafe.
        #
        # We then call measure_popularity, and add its return value
        # to Metadata.measurements.
        self.http.queue_requests_response(200, 'image/png', content='an image!')

        # Here's the result.
        metadata = m(self.identifier)

        # Here's the image LinkData.
        [image] = metadata.links
        eq_(Hyperlink.IMAGE, image.rel)
        eq_(image_url, image.href)
        eq_('image/png', image.media_type)
        eq_('an image!', image.content)

        # We ran the image through our mocked version of is_suitable_image,
        # and it said it was fine.
        eq_(image.content, api.is_suitable_image_called_with)

        # Here's the popularity measurement.
        eq_([api.popularity_measurement], metadata.measurements)

        # Confirm that the mock methods were called with the right
        # arguments -- their functionality is tested individually
        # below.
        expected_args = (metadata, self.identifier, self.args)
        for called_with in (
            api.add_reviews_called_with, api.add_descriptions_called_with,
            api.add_author_notes_called_with, api.add_excerpt_called_with,
        ):
            eq_(expected_args, called_with)
        eq_((self.identifier, api.ONE_YEAR_AGO),
            api.measure_popularity_called_with)

        # If measure_popularity returns nothing, metadata.measurements
        # will be left empty.
        api.popularity_measurement = None
        self.http.queue_requests_response(200, 'image/png', content='an image!')
        metadata = m(self.identifier)
        eq_([], metadata.measurements)

    def test_annotate_with_web_resources(self):
        metadata = Metadata(DataSource.CONTENT_CAFE)
        rel = self._str

        # We're going to be grabbing this URL and
        # scraping it.
        url_template = "http://url/%(arg1)s"
        args = dict(arg1='value')

        # A couple of useful functions for scraping.
        class MockScrapers(object):
            scrape_called = False
            explode_called = False
            def scrape(self, soup):
                self.scrape_called = True
                return [soup.find('content').string]

            def explode(self, soup):
                self.explode_called = True
                raise Exception("I'll never be called")
        scrapers = MockScrapers()

        # When the result of the HTTP request contains a certain phrase,
        # we don't even bother scraping.
        m = self.api.annotate_with_web_resources
        http = self.http
        http.queue_requests_response(
            200, 'text/html', content='There is no data!'
        )
        m(metadata, self.identifier, args, url_template, "no data!", rel,
          scrapers.explode)
        # We made the request but nothing happened.
        expect_url = url_template % args
        eq_(expect_url, self.http.requests.pop())
        eq_(False, scrapers.explode_called)
        eq_(None, metadata.title)
        eq_([], metadata.links)

        # Otherwise, we try to scrape.
        good_content = '<html><span class="PageHeader2">Book title</span><content>Here you go</content>'
        http.queue_requests_response(200, 'text/html', content=good_content)
        m(metadata, self.identifier, args, url_template, "no data!", rel,
          scrapers.scrape)
        eq_(True, scrapers.scrape_called)

        # We called _extract_title and took a Content Cafe title out
        # for the Metadata object.
        eq_("Book title", metadata.title)

        # Then we called mock_scrape, which gave us the content for
        # one LinkData.
        [link] = metadata.links
        eq_(rel, link.rel)
        eq_(None, link.href)
        eq_("text/html", link.media_type)
        eq_("Here you go", link.content)

    def test__extract_title(self):
        # Standalone test of the _extract_title helper method.

        def assert_title(title, expect):
            markup = '<html><span class="PageHeader2">%s</span><content>Description</content>' % title
            soup = BeautifulSoup(markup, 'lxml')
            eq_(expect, ContentCafeAPI._extract_title(soup))


        # A normal book title is successfully extracted.
        assert_title("A great book", "A great book")

        # A supposed title that's in KNOWN_BAD_TITLES is ignored.
        assert_title("No content currently exists for this item", None)

    def test_add_reviews(self):
        """Verify that add_reviews works in a real case."""
        metadata = Metadata(DataSource.CONTENT_CAFE)
        content = self.data_file("reviews.html")
        self.http.queue_requests_response(200, 'text/html', content=content)
        self.api.add_reviews(metadata, self.identifier, self.args)

        # We extracted six reviews from the sample file.
        reviews = metadata.links
        eq_(6, len(reviews))
        assert all([x.rel==Hyperlink.REVIEW for x in reviews])
        assert "isn't a myth!" in reviews[0].content

        # We incidentally figured out the book's title.
        eq_("Shadow Thieves", metadata.title)

    def test_add_author_notes(self):
        """Verify that add_author_notes works in a real case."""
        metadata = Metadata(DataSource.CONTENT_CAFE)
        content = self.data_file("author_notes.html")
        self.http.queue_requests_response(200, 'text/html', content=content)
        self.api.add_author_notes(metadata, self.identifier, self.args)

        [notes] = metadata.links
        eq_(Hyperlink.AUTHOR, notes.rel)
        assert 'Brenda researched turtles' in notes.content

        # We incidentally figured out the book's title.
        eq_("Franklin's Christmas Gift", metadata.title)

    def test_add_excerpt(self):
        """Verify that add_excerpt works in a real case."""
        metadata = Metadata(DataSource.CONTENT_CAFE)
        content = self.data_file("excerpt.html")
        self.http.queue_requests_response(200, 'text/html', content=content)
        self.api.add_excerpt(metadata, self.identifier, self.args)

        [excerpt] = metadata.links
        eq_(Hyperlink.SAMPLE, excerpt.rel)
        assert 'Franklin loved his marbles.' in excerpt.content

        # We incidentally figured out the book's title.
        eq_("Franklin's Christmas Gift", metadata.title)

    def test_measure_popularity(self):
        """Verify that measure_popularity turns the output of
        a SOAP request into a MeasurementData.
        """
        cutoff = object()

        # Call it.
        result = self.api.measure_popularity(self.identifier, cutoff)

        # The SOAP client's estimated_popularity method was called.
        expect = (self.identifier.identifier, cutoff)
        eq_(expect, self.soap.estimated_popularity_calls.pop())

        # The result was turned into a MeasurementData.
        assert isinstance(result, MeasurementData)
        eq_(Measurement.POPULARITY, result.quantity_measured)
        eq_(self.soap.popularity_value, result.value)

        # If the SOAP API doesn't return a popularity value, no
        # MeasurementData is created.
        self.soap.popularity_value = None
        result = self.api.measure_popularity(self.identifier, cutoff)
        eq_(expect, self.soap.estimated_popularity_calls.pop())
        eq_(None, result)

    def test_is_suitable_image(self):
        # Images are rejected if we can tell they are Content Cafe's
        # stand-in images.
        m = ContentCafeAPI.is_suitable_image

        content = self.data_file("stand-in-image.png")
        eq_(False, m(content))

        # Otherwise, it's fine. We don't check that the image is
        # valid, only that it's not a stand-in image.
        eq_(True, m("I'm not a stand-in image."))
class TestContentCafeAPI(DatabaseTest):

    base_path = os.path.split(__file__)[0]
    resource_path = os.path.join(base_path, "files", "content_cafe")

    def data_file(self, path):
        """Return the contents of a test data file."""
        return open(os.path.join(self.resource_path, path)).read()

    def setup(self):
        super(TestContentCafeAPI, self).setup()
        self.http = DummyHTTPClient()
        self.soap = MockSOAPClient(popularity_value=5)
        self.api = ContentCafeAPI(self._db, 'uid', 'pw', self.soap,
                                  self.http.do_get)
        self.identifier = self._identifier(identifier_type=Identifier.ISBN)
        self.args = dict(userid=self.api.user_id,
                         password=self.api.password,
                         isbn=self.identifier.identifier)

    def test_from_config(self):
        # Without an integration, an error is raised.
        assert_raises(CannotLoadConfiguration, ContentCafeAPI.from_config,
                      self._db)

        # With incomplete integrations, an error is raised.
        integration = self._external_integration(
            ExternalIntegration.CONTENT_CAFE,
            goal=ExternalIntegration.METADATA_GOAL,
            username=u'yup')
        assert_raises(CannotLoadConfiguration, ContentCafeAPI.from_config,
                      self._db)

        integration.username = None
        integration.password = u'yurp'
        assert_raises(CannotLoadConfiguration, ContentCafeAPI.from_config,
                      self._db)

        integration.username = u'yup'
        result = ContentCafeAPI.from_config(self._db, soap_client=object())
        eq_(True, isinstance(result, ContentCafeAPI))

        # NOTE: We can't test the case where soap_client is not
        # mocked, because the ContentCafeSOAPClient constructor makes
        # a real HTTP request to load its WSDL file. We might be able
        # to improve this by seeing how mockable SudsClient is, or by
        # mocking ContentCafeAPISOAPClient.WSDL_URL as a file:// URL.

    def test_data_source(self):
        eq_(DataSource.CONTENT_CAFE, self.api.data_source.name)

    def test_create_metadata(self):
        class Mock(ContentCafeAPI):

            popularity_measurement = "a popularity measurement"
            annotate_calls = []

            def add_reviews(self, *args):
                self.add_reviews_called_with = args

            def add_descriptions(self, *args):
                self.add_descriptions_called_with = args

            def add_author_notes(self, *args):
                self.add_author_notes_called_with = args

            def add_excerpt(self, *args):
                self.add_excerpt_called_with = args

            def measure_popularity(self, *args):
                self.measure_popularity_called_with = args
                return self.popularity_measurement

            def is_suitable_image(self, image):
                self.is_suitable_image_called_with = image
                return True

        api = Mock(self._db, 'uid', 'pw', self.soap, self.http.do_get)
        m = api.create_metadata

        # First we will make a request for a cover image. If that
        # gives a 404 error, we return nothing and don't bother making
        # any more requests.
        self.http.queue_requests_response(404)
        eq_(None, m(self.identifier))
        request_url = self.http.requests.pop()
        image_url = api.image_url % self.args
        eq_(image_url, request_url)
        eq_([], self.http.requests)

        # If the cover image request succeeds, we turn it into a LinkData
        # and add it to a new Metadata object. We then pass the
        # Metadata object a number of other methods to get additional
        # information from Content Cafe.
        #
        # We then call measure_popularity, and add its return value
        # to Metadata.measurements.
        self.http.queue_requests_response(200,
                                          'image/png',
                                          content='an image!')

        # Here's the result.
        metadata = m(self.identifier)

        # Here's the image LinkData.
        [image] = metadata.links
        eq_(Hyperlink.IMAGE, image.rel)
        eq_(image_url, image.href)
        eq_('image/png', image.media_type)
        eq_('an image!', image.content)

        # We ran the image through our mocked version of is_suitable_image,
        # and it said it was fine.
        eq_(image.content, api.is_suitable_image_called_with)

        # Here's the popularity measurement.
        eq_([api.popularity_measurement], metadata.measurements)

        # Confirm that the mock methods were called with the right
        # arguments -- their functionality is tested individually
        # below.
        expected_args = (metadata, self.identifier, self.args)
        for called_with in (
                api.add_reviews_called_with,
                api.add_descriptions_called_with,
                api.add_author_notes_called_with,
                api.add_excerpt_called_with,
        ):
            eq_(expected_args, called_with)
        eq_((self.identifier, api.ONE_YEAR_AGO),
            api.measure_popularity_called_with)

        # If measure_popularity returns nothing, metadata.measurements
        # will be left empty.
        api.popularity_measurement = None
        self.http.queue_requests_response(200,
                                          'image/png',
                                          content='an image!')
        metadata = m(self.identifier)
        eq_([], metadata.measurements)

    def test_annotate_with_web_resources(self):
        metadata = Metadata(DataSource.CONTENT_CAFE)
        rel = self._str

        # We're going to be grabbing this URL and
        # scraping it.
        url_template = "http://url/%(arg1)s"
        args = dict(arg1='value')

        # A couple of useful functions for scraping.
        class MockScrapers(object):
            scrape_called = False
            explode_called = False

            def scrape(self, soup):
                self.scrape_called = True
                return [soup.find('content').string]

            def explode(self, soup):
                self.explode_called = True
                raise Exception("I'll never be called")

        scrapers = MockScrapers()

        # When the result of the HTTP request contains a certain phrase,
        # we don't even bother scraping.
        m = self.api.annotate_with_web_resources
        http = self.http
        http.queue_requests_response(200,
                                     'text/html',
                                     content='There is no data!')
        m(metadata, self.identifier, args, url_template, "no data!", rel,
          scrapers.explode)
        # We made the request but nothing happened.
        expect_url = url_template % args
        eq_(expect_url, self.http.requests.pop())
        eq_(False, scrapers.explode_called)
        eq_(None, metadata.title)
        eq_([], metadata.links)

        # Otherwise, we try to scrape.
        good_content = '<html><span class="PageHeader2">Book title</span><content>Here you go</content>'
        http.queue_requests_response(200, 'text/html', content=good_content)
        m(metadata, self.identifier, args, url_template, "no data!", rel,
          scrapers.scrape)
        eq_(True, scrapers.scrape_called)

        # We called _extract_title and took a Content Cafe title out
        # for the Metadata object.
        eq_("Book title", metadata.title)

        # Then we called mock_scrape, which gave us the content for
        # one LinkData.
        [link] = metadata.links
        eq_(rel, link.rel)
        eq_(None, link.href)
        eq_("text/html", link.media_type)
        eq_("Here you go", link.content)

    def test__extract_title(self):
        # Standalone test of the _extract_title helper method.

        def assert_title(title, expect):
            markup = '<html><span class="PageHeader2">%s</span><content>Description</content>' % title
            soup = BeautifulSoup(markup, 'lxml')
            eq_(expect, ContentCafeAPI._extract_title(soup))

        # A normal book title is successfully extracted.
        assert_title("A great book", "A great book")

        # A supposed title that's in KNOWN_BAD_TITLES is ignored.
        assert_title("No content currently exists for this item", None)

    def test_add_reviews(self):
        """Verify that add_reviews works in a real case."""
        metadata = Metadata(DataSource.CONTENT_CAFE)
        content = self.data_file("reviews.html")
        self.http.queue_requests_response(200, 'text/html', content=content)
        self.api.add_reviews(metadata, self.identifier, self.args)

        # We extracted six reviews from the sample file.
        reviews = metadata.links
        eq_(6, len(reviews))
        assert all([x.rel == Hyperlink.REVIEW for x in reviews])
        assert "isn't a myth!" in reviews[0].content

        # We incidentally figured out the book's title.
        eq_("Shadow Thieves", metadata.title)

    def test_add_author_notes(self):
        """Verify that add_author_notes works in a real case."""
        metadata = Metadata(DataSource.CONTENT_CAFE)
        content = self.data_file("author_notes.html")
        self.http.queue_requests_response(200, 'text/html', content=content)
        self.api.add_author_notes(metadata, self.identifier, self.args)

        [notes] = metadata.links
        eq_(Hyperlink.AUTHOR, notes.rel)
        assert 'Brenda researched turtles' in notes.content

        # We incidentally figured out the book's title.
        eq_("Franklin's Christmas Gift", metadata.title)

    def test_add_excerpt(self):
        """Verify that add_excerpt works in a real case."""
        metadata = Metadata(DataSource.CONTENT_CAFE)
        content = self.data_file("excerpt.html")
        self.http.queue_requests_response(200, 'text/html', content=content)
        self.api.add_excerpt(metadata, self.identifier, self.args)

        [excerpt] = metadata.links
        eq_(Hyperlink.SAMPLE, excerpt.rel)
        assert 'Franklin loved his marbles.' in excerpt.content

        # We incidentally figured out the book's title.
        eq_("Franklin's Christmas Gift", metadata.title)

    def test_measure_popularity(self):
        """Verify that measure_popularity turns the output of
        a SOAP request into a MeasurementData.
        """
        cutoff = object()

        # Call it.
        result = self.api.measure_popularity(self.identifier, cutoff)

        # The SOAP client's estimated_popularity method was called.
        expect = (self.identifier.identifier, cutoff)
        eq_(expect, self.soap.estimated_popularity_calls.pop())

        # The result was turned into a MeasurementData.
        assert isinstance(result, MeasurementData)
        eq_(Measurement.POPULARITY, result.quantity_measured)
        eq_(self.soap.popularity_value, result.value)

        # If the SOAP API doesn't return a popularity value, no
        # MeasurementData is created.
        self.soap.popularity_value = None
        result = self.api.measure_popularity(self.identifier, cutoff)
        eq_(expect, self.soap.estimated_popularity_calls.pop())
        eq_(None, result)

    def test_is_suitable_image(self):
        # Images are rejected if we can tell they are Content Cafe's
        # stand-in images.
        m = ContentCafeAPI.is_suitable_image

        content = self.data_file("stand-in-image.png")
        eq_(False, m(content))

        # Otherwise, it's fine. We don't check that the image is
        # valid, only that it's not a stand-in image.
        eq_(True, m("I'm not a stand-in image."))