def cache_fill_loop(): global sources while True: # fill cache up to min_cache_imgs if Crawler.info()["images"] < min_cache_imgs_before_refill: while Crawler.info()["images"] < min_cache_imgs: random.choice(sources).crawl() # sleep for non-invasive threading ;) time.sleep(1.337)
def cache_status(): info = Crawler.info() msg = "images cached: %d (%d bytes) - already crawled: %d (%d bytes)" % \ (info["images"], info["images_size"], info["blacklist"], info["blacklist_size"]) logger.info(msg) for crawler in sources: for site in sources[crawler]: key = crawler + "_" + site if key in info["images_per_site"]: factor = 1 if crawler in factors and site in factors[crawler]: factor = factors[crawler][site] count = info["images_per_site"][key] bar = "|" for i in range(0, count / 5): if i < min_cache_imgs_before_refill / 5: bar += "#" else: bar += "*" sitestats = ("%15s - %-15s with factor %4.1f: %2d Images " + bar) % (crawler, site, factor, count) logger.info(sitestats) msg += "\r\n" + sitestats return msg
def cache_fill_loop(): global sources while True: # fill cache up to min_cache_imgs per site info = Crawler.info() for crawler in sources: for site in sources[crawler]: key = crawler + "_" + site if key not in info["images_per_site"] or info["images_per_site"][key] < min_cache_imgs_before_refill: try: sources[crawler][site].crawl() info = Crawler.info() except Exception as e: logger.error("Error in crawler %s - %s: %s" % (crawler, site, e)) break # sleep for non-invasive threading ;) time.sleep(1.337)
def cache_status(): info = Crawler.info() msg = "images cached: %d (%d bytes) - already crawled: %d (%d bytes)" %\ (info["images"], info["images_size"], info["blacklist"], info["blacklist_size"]) logger.info(msg) return msg