Ejemplo n.º 1
0
    def test_fetch_handle_download(self):
        credentials = self.univision_credentials()

        medium = self.db.create(table='media',
                                insert_hash={
                                    'name':
                                    f"Media for test feed {credentials.url}",
                                    'url': 'http://www.univision.com/',
                                })

        feed = self.db.create(table='feeds',
                              insert_hash={
                                  'name': 'feed',
                                  'type': 'univision',
                                  'url': credentials.url,
                                  'media_id': medium['media_id'],
                              })

        download = create_download_for_feed(db=self.db, feed=feed)

        handler = handler_for_download(db=self.db, download=download)
        assert isinstance(handler, DownloadFeedUnivisionHandler)

        # Recreate handler with mock configuration
        handler = DownloadFeedUnivisionHandler(
            crawler_config=self._mock_crawler_config())

        response = handler.fetch_download(db=self.db, download=download)
        assert response

        handler.store_response(db=self.db,
                               download=download,
                               response=response)

        download = self.db.find_by_id(table='downloads',
                                      object_id=download['downloads_id'])
        assert download
        assert download[
            'state'] == 'success', f"Download's state is not 'success': {download['state']}"
        assert not download[
            'error_message'], f"Download's error_message should be empty: {download['error_message']}"

        if self.expect_to_find_some_stories():
            story_downloads = self.db.query(
                """
                SELECT *
                FROM downloads
                WHERE feeds_id = %(feeds_id)s
                  AND type = 'content'
                  AND state = 'pending'
            """, {
                    'feeds_id': download['feeds_id'],
                }).hashes()
            assert story_downloads, 'One or more story downloads were derived from feed'
Ejemplo n.º 2
0
    def test_api_request(self):
        """Make an API request, see if it succeeds."""

        credentials = self.univision_credentials()

        handler = DownloadFeedUnivisionHandler(crawler_config=self._mock_crawler_config())
        api_request_url = handler._api_request_url_with_signature_from_config(api_url=credentials.url)
        assert api_request_url, 'API request URL is not empty'

        ua = UserAgent()
        ua.set_timeout(30)

        response = ua.get(api_request_url)
        assert response.is_success(), 'API request was successful'

        json_string = response.decoded_content()
        assert json_string, 'JSON response is not empty'

        json = response.decoded_json()
        assert json.get('status', None) == 'success', "JSON response was successful"
        assert 'data' in json, 'JSON response has "data" key'
    def test_api_request_signature(self):
        # Invalid inputs:

        # Empty input
        with pytest.raises(McCrawlerFetcherHardError):
            # noinspection PyTypeChecker
            DownloadFeedUnivisionHandler._api_request_url_with_signature(
                api_url=None,
                client_id=None,
                client_secret=None,
            )

        # Invalid URL
        with pytest.raises(McCrawlerFetcherHardError):
            DownloadFeedUnivisionHandler._api_request_url_with_signature(
                api_url='ftp://',
                client_id='client_id',
                client_secret='secret_key',
            )

        # URL with "client_id"
        with pytest.raises(McCrawlerFetcherHardError):
            DownloadFeedUnivisionHandler._api_request_url_with_signature(
                api_url='http://www.test.com/?client_id=a',
                client_id='client_id',
                client_secret='secret_key',
            )

        # Sanitization and query parameter sorting
        assert DownloadFeedUnivisionHandler._api_request_url_with_signature(
            api_url='http://www.test.com/',  # with slash
            client_id='client_id',
            client_secret='client_secret',
        ) == DownloadFeedUnivisionHandler._api_request_url_with_signature(
            api_url='http://www.test.com',  # without slash
            client_id='client_id',
            client_secret='client_secret',
        ), 'With and without ending slash'

        assert 'a=a&b=a&b=b&c=a&c=b&c=c' in DownloadFeedUnivisionHandler._api_request_url_with_signature(
            api_url='http://www.test.com/?c=c&c=b&c=a&b=b&b=a&a=a',
            client_id='client_id',
            client_secret='client_secret',
        ), 'Sorted query parameters'
Ejemplo n.º 4
0
def handler_for_download(db: DatabaseHandler,
                         download: dict) -> AbstractDownloadHandler:
    """Returns correct handler for download."""

    download = decode_object_from_bytes_if_needed(download)

    downloads_id = int(download['downloads_id'])
    download_type = download['type']

    if download_type == 'feed':
        feeds_id = int(download['feeds_id'])
        feed = db.find_by_id(table='feeds', object_id=feeds_id)
        feed_type = feed['type']

        if feed_type == 'syndicated':
            handler = DownloadFeedSyndicatedHandler()
        elif feed_type == 'web_page':
            handler = DownloadFeedWebPageHandler()
        elif feed_type == 'univision':
            handler = DownloadFeedUnivisionHandler()
        elif feed_type == 'podcast':
            handler = DownloadFeedPodcastHandler()
        else:
            # Unknown feed type is a hard error as we don't types that we don't know about to be there
            raise McCrawlerFetcherHardError(
                f"Unknown feed type '{feed_type}' for feed {feeds_id}, download {downloads_id}"
            )
    elif download_type == 'content':
        handler = DownloadContentHandler()
    else:
        # Unknown download type is a hard error as we don't types that we don't know about to be there
        raise McCrawlerFetcherHardError(
            f"Unknown download type '{download_type}' for download {downloads_id}"
        )

    return handler