def download(url, origin, image_urls): print '# debug:', url li = [] for img_url in image_urls: img_obj = Image(cfg.BASE_DIR, get_subdir_name(url), img_url) readme = """origin: {origin} fuskator: {fuskator} {urls} """.format(origin=origin, fuskator=url, urls='\n'.join([x for x in image_urls])) img_obj.readme = readme li.append(img_obj) unbuffered() shuffle(li) # randomize the order, # think of the webserver log too ;) if len(li) > 0: print '#', url print '# number of images:', len(li) print '# download dir.:', li[0].get_local_dir() for img in li: img.download() sys.stdout.write('.') if cfg.SLEEP_BETWEEN_IMAGES: scraper.sleep(3,3) print
def test_sleep(): sleep_time = scraper.sleep(fix=5, plus=5, test=True) assert 5.0 <= sleep_time <= 10.0