Example #1
0
    def test_get_content_readability(self, rget):

        def mocked_get(url):
            assert 'abc123' in url
            return Response({
                'content': '<p>Test content</p>'
            })

        rget.side_effect = mocked_get

        url = 'http://doesnotexist/path'
        with self.settings(READABILITY_PARSER_KEY='abc123'):
            content, status = scraper.get_content_readability(url)
            eq_(content, 'Test content')
            eq_(status, 200)

            # or use the scrape_url()
            result = scraper.scrape_urls([url])
            eq_(result['text'], 'Test content')
            eq_(result['results'][0], {
                'worked': True,
                'status': 200,
                'url': url
            })

        with self.settings(READABILITY_PARSER_KEY=None):
            content, status = scraper.get_content_readability(url)
            eq_(content, None)
            eq_(status, 'No READABILITY_PARSER_KEY setting set up')
Example #2
0
    def test_get_content_readability(self, rget):
        def mocked_get(url):
            assert 'abc123' in url
            return Response({'content': '<p>Test content</p>'})

        rget.side_effect = mocked_get

        url = 'http://doesnotexist/path'
        with self.settings(READABILITY_PARSER_KEY='abc123'):
            content, status = scraper.get_content_readability(url)
            eq_(content, 'Test content')
            eq_(status, 200)

            # or use the scrape_url()
            result = scraper.scrape_urls([url])
            eq_(result['text'], 'Test content')
            eq_(result['results'][0], {
                'worked': True,
                'status': 200,
                'url': url
            })

        with self.settings(READABILITY_PARSER_KEY=None):
            content, status = scraper.get_content_readability(url)
            eq_(content, None)
            eq_(status, 'No READABILITY_PARSER_KEY setting set up')
Example #3
0
    def test_get_content_readability(self, mocked_parser_client):

        parser = mock.Mock()

        def mocked_get_article_content(url):
            return _Parsed('<p>Test content</p>')

        parser.get_article_content = mocked_get_article_content
        mocked_parser_client.return_value = parser

        url = 'http://doesnotexist/path'
        with self.settings(READABILITY_PARSER_KEY='abc123'):
            content, status = scraper.get_content_readability(url)
            eq_(content, 'Test content')
            eq_(status, 200)

            # or use the scrape_url()
            result = scraper.scrape_urls([url])
            eq_(result['text'], 'Test content')
            eq_(result['results'][0], {
                'worked': True,
                'status': 200,
                'url': url
            })

        with self.settings(READABILITY_PARSER_KEY=None):
            content, status = scraper.get_content_readability(url)
            eq_(content, None)
            eq_(status, 'No READABILITY_PARSER_KEY setting set up')
Example #4
0
    def test_get_content_readability(self, mocked_parser_client):

        parser = mock.Mock()

        def mocked_get_article_content(url):
            return _Parsed('<p>Test content</p>')

        parser.get_article_content = mocked_get_article_content
        mocked_parser_client.return_value = parser

        url = 'http://doesnotexist/path'
        with self.settings(READABILITY_PARSER_KEY='abc123'):
            content, status = scraper.get_content_readability(url)
            eq_(content, 'Test content')
            eq_(status, 200)

            # or use the scrape_url()
            result = scraper.scrape_urls([url])
            eq_(result['text'], 'Test content')
            eq_(result['results'][0], {
                'worked': True,
                'status': 200,
                'url': url
            })

        with self.settings(READABILITY_PARSER_KEY=None):
            content, status = scraper.get_content_readability(url)
            eq_(content, None)
            eq_(status, 'No READABILITY_PARSER_KEY setting set up')
Example #5
0
    def test_get_content_readability_failed(self, rget):
        def mocked_get(url):
            assert 'abc123' in url
            return Response({}, status_code=500)

        rget.side_effect = mocked_get

        url = 'http://doesnotexist/path'
        with self.settings(READABILITY_PARSER_KEY='abc123'):
            content, status = scraper.get_content_readability(url)
            eq_(content, '')
            eq_(status, 500)
Example #6
0
    def test_get_content_readability_failed(self, rget):

        def mocked_get(url):
            assert 'abc123' in url
            return Response({}, status_code=500)

        rget.side_effect = mocked_get

        url = 'http://doesnotexist/path'
        with self.settings(READABILITY_PARSER_KEY='abc123'):
            content, status = scraper.get_content_readability(url)
            eq_(content, '')
            eq_(status, 500)
Example #7
0
    def test_get_content_readability_failed(self, mocked_parser_client):

        parser = mock.Mock()

        def mocked_get_article_content(url):
            return _Parsed(None, status=500)

        parser.get_article_content = mocked_get_article_content
        mocked_parser_client.return_value = parser

        url = 'http://doesnotexist/path'
        with self.settings(READABILITY_PARSER_KEY='abc123'):
            content, status = scraper.get_content_readability(url)
            eq_(content, '')
            eq_(status, 500)
Example #8
0
    def test_get_content_readability_failed(self, mocked_parser_client):

        parser = mock.Mock()

        def mocked_get_article_content(url):
            return _Parsed(None, status=500)

        parser.get_article_content = mocked_get_article_content
        mocked_parser_client.return_value = parser

        url = 'http://doesnotexist/path'
        with self.settings(READABILITY_PARSER_KEY='abc123'):
            content, status = scraper.get_content_readability(url)
            eq_(content, '')
            eq_(status, 500)