Beispiel #1
0
def test_crawl_basic_bookkeeping(storage, simple_feed):
    crawl.crawl(simple_feed)

    feed_dir = join(storage, 'feeds', 'http-com-example-feed')
    with open(join(feed_dir, 'conditional-get')) as conditional_get_state:
        assert 'the-etag\nthe date\n' == conditional_get_state.read()

    with dbm.open(join(feed_dir, 'entries.db')) as entry_url_db:
        assert 'http://example.com/entry-1' in entry_url_db
        assert 'http://example.com/entry-2' in entry_url_db
Beispiel #2
0
def test_crawl_basic_file_structure(storage, simple_feed):
    crawl.crawl(simple_feed)

    assert isdir(storage)
    assert isdir(join(storage, 'feeds'))

    feed_dir = join(storage, 'feeds', 'http-com-example-feed')
    assert isdir(feed_dir)

    assert isfile(join(feed_dir, 'conditional-get'))
    assert isfile(join(feed_dir, 'entries.db'))

    assert isdir(join(feed_dir, '2015-05-10T00:00:00Z-http-com-example-entry-1'))
    assert isdir(join(feed_dir, '2015-05-11T00:00:00Z-http-com-example-entry-2'))

    entry_dir = join(feed_dir, '2015-05-10T00:00:00Z-http-com-example-entry-1')
    assert isfile(join(entry_dir, 'entry.json'))
Beispiel #3
0
def test_crawl_conditional_get_restore(storage, simple_feed):
    crawl.crawl(simple_feed)
    with mock.patch('skim.crawl.save_feed', side_effect=Exception):
        # it should not attempt to save anything in the case of a 304
        crawl.crawl(simple_feed)