Exemple #1
0
def article():
    """POST data url required, html optional"""
    response.content_type = 'application/json'
    url, html = request.forms.get("url"), request.forms.get("html")

    if not url:
        abort(404)

    metadoc = Metadoc(url=url, html=html)
    metadoc.query_all()

    payload = metadoc.return_ball()  # Preserve order
    return json.dumps(payload)
Exemple #2
0
class MetadocModuleTest(asynctest.TestCase):
    def setUp(self):
        self.url = "https://theintercept.com/2016/11/26/laura-ingraham-lifezette/"
        article_path = "tests/fixtures/theintercept.com/laura-ingraham-lifezette.html"
        with open(article_path, 'r') as article:
            self.article_html = article.read()

        self.metadoc = Metadoc(url=self.url, html=self.article_html)

    @asynctest.ignore_loop
    def test_init(self):
        assert self.metadoc.url == self.url
        assert self.metadoc.html == self.article_html

    @asynctest.ignore_loop
    def test_query_all(self):
        self.metadoc.query_all()
        result = self.metadoc.return_ball()
        assert result

    @asynctest.ignore_loop
    def test_extract(self):
        self.metadoc.query_extract()
        assert self.metadoc.extractor

    @asynctest.ignore_loop
    async def test_social(self):
        await self.metadoc.query_social()
        assert self.metadoc.activity

    @asynctest.ignore_loop
    async def test_domain(self):
        self.metadoc.query_domain()
        assert self.metadoc.domain

    @asynctest.ignore_loop
    async def test_no_url_fail(self):
        with pytest.raises(AttributeError):
            Metadoc()

    @asynctest.ignore_loop
    async def test_invalid_url_fail(self):
        with pytest.raises(Exception):
            from metadoc import Metadoc
            foo = Metadoc(url="https://theintercept.com/404/", html=None)

    async def test_no_html(self):
        metadoc = Metadoc(url=self.url)
        metadoc.query_all()
Exemple #3
0
 async def test_no_html(self):
     metadoc = Metadoc(url=self.url)
     metadoc.query_all()