コード例 #1
0
ファイル: nichtparasoup.py プロジェクト: makefu/dochparasoup
def cache_fill_loop():
    global sources
    while True:  # fill cache up to min_cache_imgs
        if Crawler.info()["images"] < min_cache_imgs_before_refill:
            while Crawler.info()["images"] < min_cache_imgs:
                random.choice(sources).crawl()

        # sleep for non-invasive threading ;)
        time.sleep(1.337)
コード例 #2
0
ファイル: nichtparasoup.py プロジェクト: k4cg/nichtparasoup
def cache_status():
    info = Crawler.info()
    msg = "images cached: %d (%d bytes) - already crawled: %d (%d bytes)" % \
          (info["images"], info["images_size"], info["blacklist"], info["blacklist_size"])
    logger.info(msg)

    for crawler in sources:
        for site in sources[crawler]:
            key = crawler + "_" + site
            if key in info["images_per_site"]:

                factor = 1
                if crawler in factors and site in factors[crawler]:
                    factor = factors[crawler][site]

                count = info["images_per_site"][key]

                bar = "|"
                for i in range(0, count / 5):
                    if i < min_cache_imgs_before_refill / 5:
                        bar += "#"
                    else:
                        bar += "*"

                sitestats = ("%15s - %-15s with factor %4.1f: %2d Images " + bar) % (crawler, site, factor, count)
                logger.info(sitestats)
                msg += "\r\n" + sitestats
    return msg
コード例 #3
0
ファイル: nichtparasoup.py プロジェクト: k4cg/nichtparasoup
def cache_fill_loop():
    global sources
    while True:  # fill cache up to min_cache_imgs per site

        info = Crawler.info()
        for crawler in sources:
            for site in sources[crawler]:
                key = crawler + "_" + site

                if key not in info["images_per_site"] or info["images_per_site"][key] < min_cache_imgs_before_refill:
                    try:
                        sources[crawler][site].crawl()
                        info = Crawler.info()
                    except Exception as e:
                        logger.error("Error in crawler %s - %s: %s" % (crawler, site, e))
                        break

        # sleep for non-invasive threading ;)
        time.sleep(1.337)
コード例 #4
0
ファイル: nichtparasoup.py プロジェクト: makefu/dochparasoup
def cache_status():
    info = Crawler.info()
    msg = "images cached: %d (%d bytes) - already crawled: %d (%d bytes)" %\
          (info["images"], info["images_size"], info["blacklist"], info["blacklist_size"])
    logger.info(msg)
    return msg