Esempio n. 1
0
def test_seen():
    c = {'Robots': {'RobotsCacheSize': 1, 'RobotsCacheTimeout': 1}}
    config.set_config(c)
    dl = datalayer.Datalayer()
    assert not dl.seen_url(URL('http://example.com'))
    dl.add_seen_url(URL('http://example.com'))
    assert dl.seen_url(URL('http://example.com'))
Esempio n. 2
0
def test_robotscache():
    c = {'Robots': {'RobotsCacheSize': 1, 'RobotsCacheTimeout': 1}}
    config.set_config(c)
    dl = datalayer.Datalayer()
    with pytest.raises(KeyError):
        dl.read_robots_cache('http://example.com')
    dl.cache_robots('http://example.com', b'THIS IS A TEST')
    assert dl.read_robots_cache('http://example.com') == b'THIS IS A TEST'
Esempio n. 3
0
def test_summarize(capsys):
    c = {'Robots': {'RobotsCacheSize': 1, 'RobotsCacheTimeout': 1}}
    config.set_config(c)
    dl = datalayer.Datalayer()
    dl.add_seen_url(URL('http://example.com'))
    dl.add_seen_url(URL('http://example2.com'))
    dl.summarize()

    out, err = capsys.readouterr()

    assert len(err) == 0
    assert out.startswith('2 seen_urls')
Esempio n. 4
0
def test_saveload():
    tf = tempfile.NamedTemporaryFile(delete=False)
    name = tf.name

    c = {'Robots': {'RobotsCacheSize': 1, 'RobotsCacheTimeout': 1}}
    config.set_config(c)
    dl = datalayer.Datalayer()
    dl.add_seen_url(URL('http://example.com'))
    assert dl.seen_url(URL('http://example.com'))

    with open(name, 'wb') as f:
        dl.save(f)
    dl.add_seen_url(URL('http://example2.com'))
    with open(name, 'rb') as f:
        dl.load(f)

    assert dl.seen_url(URL('http://example.com'))
    assert not dl.seen_url(URL('http://example2.com'))
    os.unlink(name)
    assert not os.path.exists(name)