def check_cached_with_date(n_threads): r = next( download(fake_get_should_not_run, fake_warehouse_with_date, ['http://foo.bar'], '2014-03-01', n_threads=n_threads)) n.assert_equal(r.text, 'baz') r = next( download(fake_get, fake_warehouse_with_date, ['http://foo.bar'], '2014-03-02', n_threads=n_threads)) n.assert_equal(r.text, 'lalala')
def check_cached(n_threads): r = next( download(fake_get, fake_warehouse, ['http://foo.bar'], None, n_threads=n_threads)) n.assert_equal(r.text, 'baz')
def check_bad_scheme(n_threads): with n.assert_raises(ValueError): next( download(fake_get, fake_warehouse, ['ftp://example.com'], None, n_threads=n_threads))
def sites(get = requests.get, url = 'http://www.craigslist.org/about/sites', cachedir = 'sites'): ''' Generate craigslist sites. ''' warehouse = Warehouse(cachedir) response = download(get, warehouse, url, None) html = lxml.html.fromstring(response.text) return set(filter(None, (urlsplit(href).netloc for href in html.xpath('//a/@href'))))
def _sections(get, warehouse, url): response = download(get, warehouse, url, None) html = lxml.html.fromstring(response.text) for href in map(str, html.xpath('id("main")/descendant::a/@href')): if len(href) == 4: yield href.rstrip('/') elif href.startswith('/i/personals/'): yield href.replace('/i/personals?category=', '') elif href.startswith('/i/'): warnings.warn('Go to %s to see more sections.' % href)
def _sections(get, warehouse, url): response = download(get, warehouse, url, None) html = lxml.html.fromstring(response.text) for href in map(str, html.xpath('id("main")/descendant::a/@href')): if len(href) == 4: yield href.rstrip('/') elif href.startswith('/i/personals/'): yield href.replace('/i/personals?category=','') elif href.startswith('/i/'): warnings.warn('Go to %s to see more sections.' % href)
def test_download_one(): r = download(fake_get, {}, 'http://foo.bar', None) n.assert_equal(r.text, 'lalala')
def check_not_cached(n_threads): d = {} r = next( download(fake_get, d, ['http://foo.bar'], None, n_threads=n_threads)) n.assert_equal(r.text, 'lalala') n.assert_dict_equal(d, {'http://foo.bar': fake_get(None)})
def check_not_cached(n_threads): d = {} r = next(download(fake_get, d, ['http://foo.bar'], None, n_threads = n_threads)) n.assert_equal(r.text, 'lalala') n.assert_dict_equal(d, {'http://foo.bar': fake_get(None)})
def check_cached_with_date(n_threads): r = next(download(fake_get_should_not_run, fake_warehouse_with_date, ['http://foo.bar'], '2014-03-01', n_threads = n_threads)) n.assert_equal(r.text, 'baz') r = next(download(fake_get, fake_warehouse_with_date, ['http://foo.bar'], '2014-03-02', n_threads = n_threads)) n.assert_equal(r.text, 'lalala')
def check_cached(n_threads): r = next(download(fake_get, fake_warehouse, ['http://foo.bar'], None, n_threads = n_threads)) n.assert_equal(r.text, 'baz')
def check_bad_scheme(n_threads): with n.assert_raises(ValueError): next(download(fake_get, fake_warehouse, ['ftp://example.com'], None, n_threads = n_threads))