Esempio n. 1
0
def crawl(mod):
    cache = Cache()
    repos = mod.get_repos(test)
    i = 0
    for repo in repos:
        print str(i) + "/" + str(len(repos)), repo
        s = time.clock()
        if not last:
            repo.last_crawl = None
        last_crawl, rels = mod.crawl_repo(repo)
        total_new = downstream.add_releases(repo, rels, test, cache)
        if total_new > 0:
            cache.evict([(None, repo.distro_id)])
        downstream.set_last_crawl(repo, last_crawl, test)
        print "\t" + str(total_new), "new releases", "\t\t", time.clock() - s, "secs"
        i += 1
Esempio n. 2
0
def crawl(mod):
    cache = Cache()
    repos = mod.get_repos(test)
    i = 0
    for repo in repos:
        print str(i) + "/" + str(len(repos)), repo
        s = time.clock()
        if not last:
            repo.last_crawl = None
        last_crawl, rels = mod.crawl_repo(repo)
        total_new = downstream.add_releases(repo, rels, test, cache)
        if total_new > 0:
            cache.evict([(None, repo.distro_id)])
        downstream.set_last_crawl(repo, last_crawl, test)
        print "\t" + str(
            total_new), "new releases", "\t\t", time.clock() - s, "secs"
        i += 1
Esempio n. 3
0
total_start = time.clock()
stats = []
for d in downstream_targets:
    s = time.clock()
    try:
        stats.append((d, crawl(DISTROS[d])))
    except:
        print "error from distro:", d
        print traceback.format_exc()
    gc.collect()
    print time.clock() - s, "distro seconds"

for u in upstream_targets:
    s = time.clock()
    try:
        stats.append((u, UPSTREAM[u].crawl(test)))
    except:
        print "error from upstream:", u
        print traceback.format_exc()
    gc.collect()
    print time.clock() - s, "upstream seconds"

cache = Cache()
cache.evict([(None, None)])

print time.clock() - total_start, "seconds total"

save_to = open("crawl_stats/" + str(int(time.time())) + ".pickle", "w")
pickle.dump(stats, save_to)
save_to.close()
Esempio n. 4
0
total_start = time.clock()
stats = []
for d in downstream_targets:
    s = time.clock()
    try:
        stats.append((d, crawl(DISTROS[d])))
    except:
        print "error from distro:", d
        print traceback.format_exc()
    gc.collect()
    print time.clock() - s, "distro seconds"

for u in upstream_targets:
    s = time.clock()
    try:
        stats.append((u, UPSTREAM[u].crawl(test)))
    except:
        print "error from upstream:", u
        print traceback.format_exc()
    gc.collect()
    print time.clock() - s, "upstream seconds"

cache = Cache()
cache.evict([(None, None)])

print time.clock() - total_start, "seconds total"

save_to = open("crawl_stats/" + str(int(time.time())) + ".pickle", "w")
pickle.dump(stats, save_to)
save_to.close()