def download(url, origin, image_urls): print '# debug:', url li = [] for img_url in image_urls: img_obj = Image(cfg.BASE_DIR, get_subdir_name(url), img_url) readme = """origin: {origin} fuskator: {fuskator} {urls} """.format(origin=origin, fuskator=url, urls='\n'.join([x for x in image_urls])) img_obj.readme = readme li.append(img_obj) unbuffered() shuffle(li) # randomize the order, # think of the webserver log too ;) if len(li) > 0: print '#', url print '# number of images:', len(li) print '# download dir.:', li[0].get_local_dir() for img in li: img.download() sys.stdout.write('.') if cfg.SLEEP_BETWEEN_IMAGES: scraper.sleep(3,3) print
tree = etree.ElementTree(root) if os.path.exists(folder + '/' + BAK): tree.write(input, pretty_print=True, xml_declaration=True) print '# written to', XML print '# please restart Firefox' def main(): # proxy = get_us_proxy_from_file() proxy = None if not proxy: try: proxy = get_best_us_proxy_from_web() except IndexError: print >>sys.stderr, 'Warning: no working proxy found.' sys.exit(1) # else # proxy = '97.65.200.194:8080' # for testing only print '#', proxy ip = proxy port = "" if ':' in proxy: ip, port = proxy.split(':') foxyproxy(ip, port) ############################################################################# if __name__ == "__main__": unbuffered() main()