Beispiel #1
0
def check_cached_with_date(n_threads):
    r = next(
        download(fake_get_should_not_run,
                 fake_warehouse_with_date, ['http://foo.bar'],
                 '2014-03-01',
                 n_threads=n_threads))
    n.assert_equal(r.text, 'baz')
    r = next(
        download(fake_get,
                 fake_warehouse_with_date, ['http://foo.bar'],
                 '2014-03-02',
                 n_threads=n_threads))
    n.assert_equal(r.text, 'lalala')
Beispiel #2
0
def check_cached(n_threads):
    r = next(
        download(fake_get,
                 fake_warehouse, ['http://foo.bar'],
                 None,
                 n_threads=n_threads))
    n.assert_equal(r.text, 'baz')
Beispiel #3
0
def check_bad_scheme(n_threads):
    with n.assert_raises(ValueError):
        next(
            download(fake_get,
                     fake_warehouse, ['ftp://example.com'],
                     None,
                     n_threads=n_threads))
Beispiel #4
0
def sites(get = requests.get, url = 'http://www.craigslist.org/about/sites', cachedir = 'sites'):
    '''
    Generate craigslist sites.
    '''
    warehouse = Warehouse(cachedir)

    response = download(get, warehouse, url, None)
    html = lxml.html.fromstring(response.text)

    return set(filter(None, (urlsplit(href).netloc for href in html.xpath('//a/@href'))))
def _sections(get, warehouse, url):
    response = download(get, warehouse, url, None)
    html = lxml.html.fromstring(response.text)
    for href in map(str, html.xpath('id("main")/descendant::a/@href')):
        if len(href) == 4:
            yield href.rstrip('/')
        elif href.startswith('/i/personals/'):
            yield href.replace('/i/personals?category=', '')
        elif href.startswith('/i/'):
            warnings.warn('Go to %s to see more sections.' % href)
def _sections(get, warehouse, url):
    response = download(get, warehouse, url, None)
    html = lxml.html.fromstring(response.text)
    for href in map(str, html.xpath('id("main")/descendant::a/@href')):
        if len(href) == 4:
            yield href.rstrip('/')
        elif href.startswith('/i/personals/'):
            yield href.replace('/i/personals?category=','')
        elif href.startswith('/i/'):
            warnings.warn('Go to %s to see more sections.' % href)
Beispiel #7
0
def test_download_one():
    r = download(fake_get, {}, 'http://foo.bar', None)
    n.assert_equal(r.text, 'lalala')
Beispiel #8
0
def check_not_cached(n_threads):
    d = {}
    r = next(
        download(fake_get, d, ['http://foo.bar'], None, n_threads=n_threads))
    n.assert_equal(r.text, 'lalala')
    n.assert_dict_equal(d, {'http://foo.bar': fake_get(None)})
def test_download_one():
    r = download(fake_get, {}, 'http://foo.bar', None)
    n.assert_equal(r.text, 'lalala')
def check_not_cached(n_threads):
    d = {}
    r = next(download(fake_get, d, ['http://foo.bar'], None, n_threads = n_threads))
    n.assert_equal(r.text, 'lalala')
    n.assert_dict_equal(d, {'http://foo.bar': fake_get(None)})
def check_cached_with_date(n_threads):
    r = next(download(fake_get_should_not_run, fake_warehouse_with_date, ['http://foo.bar'], '2014-03-01', n_threads = n_threads))
    n.assert_equal(r.text, 'baz')
    r = next(download(fake_get, fake_warehouse_with_date, ['http://foo.bar'], '2014-03-02', n_threads = n_threads))
    n.assert_equal(r.text, 'lalala')
def check_cached(n_threads):
    r = next(download(fake_get, fake_warehouse, ['http://foo.bar'], None, n_threads = n_threads))
    n.assert_equal(r.text, 'baz')
def check_bad_scheme(n_threads):
    with n.assert_raises(ValueError):
        next(download(fake_get, fake_warehouse, ['ftp://example.com'], None, n_threads = n_threads))