def test_url2wordcloud_bad_url(monkeypatch):
    url = 'http://www.test.com'
    content = '<html><body>500 SERVER ERROR.</body></html>'
    with requests_mock.Mocker() as m:
        m.get(url, content=content.encode('utf-8'), status_code=404)
        res = wordcloud.url2wordcloud(url, min_len=10)
        assert res == []
def test_url2wordcloud_bad_url(monkeypatch):
    url = 'http://www.test.com'
    content = '<html><body>500 SERVER ERROR.</body></html>'
    with requests_mock.Mocker() as m:
        m.get(url, content=content.encode('utf-8'), status_code=404)
        res = wordcloud.url2wordcloud(url, min_len=10)
        assert res == []
def test_url2wordcloud_min_len_none_one(monkeypatch):
    url = 'http://www.test.com'
    content = '<html><body>a a prettylongwordfortest sort word</body></html>'
    with requests_mock.Mocker() as m:
        m.get(url, content=content.encode('utf-8'))
        res = wordcloud.url2wordcloud(url, min_len=10)
        assert isinstance(res, list)
        words = [w['text'] for w in res]
        assert words == ['prettylongwordfortest']
def test_url2wordcloud_limit(monkeypatch):
    url = 'http://www.test.com'
    content = '<html><body>these are are great great great words</body></html>'
    with requests_mock.Mocker() as m:
        m.get(url, content=content.encode('utf-8'))
        res = wordcloud.url2wordcloud(url, limit=2)
        assert isinstance(res, list)
        words = [w['text'] for w in res]
        assert any([words == ['words', 'great'], words == ['great', 'words']])
Esempio n. 5
0
def test_url2wordcloud_min_len_none(monkeypatch):
    url = 'http://www.test.com'
    content = '<html><body>; great words short word $</body></html>'
    with requests_mock.Mocker() as m:
        m.get(url, content=content)
        res = wordcloud.url2wordcloud(url, min_len=10)
        assert isinstance(res, list)
        words = [w['text'] for w in res]
        assert words == []
def test_url2wordcloud_min_len_none_one(monkeypatch):
    url = 'http://www.test.com'
    content = '<html><body>a a prettylongwordfortest sort word</body></html>'
    with requests_mock.Mocker() as m:
        m.get(url, content=content.encode('utf-8'))
        res = wordcloud.url2wordcloud(url, min_len=10)
        assert isinstance(res, list)
        words = [w['text'] for w in res]
        assert words == ['prettylongwordfortest']
def test_url2wordcloud_exclude_punct(monkeypatch):
    url = 'http://www.test.com'
    content = '<html><body>; great words $ &!</body></html>'
    with requests_mock.Mocker() as m:
        m.get(url, content=content.encode('utf-8'))
        res = wordcloud.url2wordcloud(url, exclude_punct=True)
        assert isinstance(res, list)
        words = [w['text'] for w in res]
        for punct in [';', '$', '&!']:
            assert punct not in words
        for w in ['great', 'words']:
            assert w in words
def test_url2wordcloud_limit(monkeypatch):
    url = 'http://www.test.com'
    content = '<html><body>these are are great great great words</body></html>'
    with requests_mock.Mocker() as m:
        m.get(url, content=content.encode('utf-8'))
        res = wordcloud.url2wordcloud(url, limit=2)
        assert isinstance(res, list)
        words = [w['text'] for w in res]
        assert any([
            words == ['words', 'great'],
            words == ['great', 'words']
        ])
def test_url2wordcloud_exclude_punct(monkeypatch):
    url = 'http://www.test.com'
    content = '<html><body>; great words $ &!</body></html>'
    with requests_mock.Mocker() as m:
        m.get(url, content=content.encode('utf-8'))
        res = wordcloud.url2wordcloud(url, exclude_punct=True)
        assert isinstance(res, list)
        words = [w['text'] for w in res]
        for punct in [';', '$', '&!']:
            assert punct not in words
        for w in ['great', 'words']:
            assert w in words