def main(): urls_fn = sys.argv[1] images_dir = sys.argv[2] urlfns = [] for url in open(urls_fn): url = url.strip() fn = os.path.join(images_dir, layout.ext_img_url2fn(url)) if not os.path.exists(fn): urlfns.append((url, fn)) fetchall(urlfns)
def main(): urls_fn = sys.argv[1] images_dir = sys.argv[2] bot = fetcher.Fetcher(report_cb=report_cb) for i,url in enumerate(open(urls_fn)): url = url.strip() fn = os.path.join(images_dir, layout.ext_img_url2fn(url)) if not os.path.exists(fn): bot.add(url, fn) #print bot.jobs bot.run()
def parse_css(css, cssimagedir): urls = [u for u in re.findall('url\((.*?)\)', css) if not u.startswith('data:')] + extra for u in urls: url = layout.norm_ext_img_url(u) lurl = layout.ext_img_url2local_cssimg_url(url) fn = layout.ext_img_url2fn(url, keep_ext=False) ofn = os.path.join(cssimagedir, fn) #print url, lurl, fn, ofn try: open(ofn, 'w').write( urllib2.urlopen(url).read() ) css = css.replace(u, lurl) except urllib2.URLError, e: print 'ERR', e, url