Esempio n. 1
0
def main():
    urls_fn = sys.argv[1]
    images_dir = sys.argv[2]
    urlfns = []
    for url in open(urls_fn):
        url = url.strip()
        fn = os.path.join(images_dir, layout.ext_img_url2fn(url))
        if not os.path.exists(fn):
            urlfns.append((url, fn))
    fetchall(urlfns)
Esempio n. 2
0
def main():
    urls_fn = sys.argv[1]
    images_dir = sys.argv[2]

    bot = fetcher.Fetcher(report_cb=report_cb)
    for i,url in enumerate(open(urls_fn)):
        url = url.strip()
        fn = os.path.join(images_dir, layout.ext_img_url2fn(url))
        if not os.path.exists(fn):
            bot.add(url, fn)

    #print bot.jobs
    bot.run()
Esempio n. 3
0
def parse_css(css, cssimagedir):
    urls = [u for u in re.findall('url\((.*?)\)', css) if not u.startswith('data:')] + extra
    for u in urls:
        url = layout.norm_ext_img_url(u)
        lurl = layout.ext_img_url2local_cssimg_url(url)
        fn = layout.ext_img_url2fn(url, keep_ext=False)
        ofn = os.path.join(cssimagedir, fn)
        #print url, lurl, fn, ofn
        try:
            open(ofn, 'w').write( urllib2.urlopen(url).read() )
            css = css.replace(u, lurl)
        except urllib2.URLError, e:
            print 'ERR', e, url