import json import pdb import sys from snmweb.usage_cache import UsageCache, freshDb from pymongo import MongoClient, Connection c = Connection() dest = freshDb(c, "snm-cooked-test") with open("testdata.json") as f: testdata = json.load(f) cache = UsageCache(dest, True, "TACC") assert len(cache.apps) == 0, "Empty cache has apps" assert cache.max_co_uses["static"] == 0, "Empty cache has static use count" assert cache.max_co_uses["logical"] == 0, "Empty cache has logical use count" def right_after_first(cache): assert len(cache.apps) == 11, "Did not add first app" assert cache.apps["datasets"]["usage"]["2014-10-14"] == 1 assert len(cache.apps["datasets"]["user_list"]["2014-10-14"]) == 1 assert cache.max_co_uses["static"] == 1 assert cache.max_co_uses["logical"] == 1 assert cache.apps["graphics"]["co_occurence"]["grDevices"]["static"] == 0 assert cache.apps["graphics"]["co_occurence"]["grDevices"]["logical"] == 1 def right_after_second(cache):
usecache.registerPacket(packet) if queue.empty() and usecache.dirty: usecache.saveToMongo() queue.task_done() def initializeThreads(usecache): t = Thread(target=worker, args=(usecache,)) t.daemon = True t.start() if __name__ == "__main__": c = Connection() # True=assume logical link between "root" non-dependent packages c.drop_database("snm-tacc") usecache = UsageCache(openOrCreate(c, "snm-tacc"), True, "TACC", useWeakDeps=True) initializeThreads(usecache) rownum = 0 try: for raw in c["snm-tacc-raw"]["scimapInfo"].find(timeout=False): rownum += 1 queue.put(raw) while (not(queue.empty())): time.sleep(1) print "Spooling through queued packets at row#", rownum queue.join() print "Saving" if usecache.dirty: usecache.saveToMongo() print "Saved."
try: record = json.loads(data, object_hook=scrub_dots) record["receivedEpoch"] = int(time.time()) registerParsed(c, record, ip, usecache, dbraw) except Exception as e: print "Error: " + str(e) (r1, r2, r3) = sys.exc_info() print traceback.format_exception(r1, r2, r3) def initializeThreads(usecache): t = Thread(target=worker, args=(usecache, )) t.daemon = True t.start() def finalizeThreads(): """Used by taccParse and reprocess""" while (not (queue.empty())): time.sleep(1) if __name__ == "__main__": c = Connection() # True=assume logical link between "root" non-dependent packages usecache = UsageCache(openOrCreate(c, "snm-r"), True, "R") initializeThreads(usecache) await ()
c = Connection() if len(sys.argv) < 4: print "Usage:", sys.argv[ 0], "<sqlite repo db>", "<mongo db>", "<appinfo file>" quit() sqlitedb = sys.argv[1] snmdb = sys.argv[2] appinfo = sys.argv[3] if (not os.path.isfile(sqlitedb)): print sqlitedb, "does not exist." quit() rs = RepoScrape(sqlitedb) rs.makeAppInfo() rs.writeAppInfo(appinfo) # True=assume logical link between "root" non-dependent packages c.drop_database(snmdb) usecache = UsageCache(openOrCreate(c, snmdb), True, "R") initializeThreads(usecache) for raw in c["snm-raw-records"]["scimapInfo"].find(): queue.put(raw) finalizeThreads() usecache.insertGitData(rs) usecache.insertPublicationData(rs)
def importTaccData(taccfiles): c = Connection() c.drop_database("snm-tacc-raw") usecache = UsageCache(freshDb(c, "snm-tacc"), False, "TACC") initializeThreads(usecache) # Pass1: build appname table for jobpart in forTaccPart(taccfiles): buildAppnameTable(jobpart) fiveOnly() print "--------------------------------------------" nLabels = appExecs.keys() n = len(nLabels) #lookup = { nLabels[i]: i for i in range(0,n) } #dsm = numpy.zeros((n,n), dtype=numpy.int) jobsizeHist = defaultdict(int) app_counts = defaultdict(int) found = 0 not_found = 0 counter = 0 for job in forTaccLongJob(taccfiles): jobsizeHist[len(job)] += 1 # Make jobnum -> {start: N, end: N, appset: [a,b,c], follows: {J->delay, J->delay}} jobnum = defaultdict(dict) for j in job: id = j["jobID"] niceExec = guess1App(j) if id not in jobnum: jobnum[id]["start"] = int(j["startEpoch"]) jobnum[id]["end"] = int(j["startEpoch"]) + int( float(j["runTime"])) if (niceExec != ''): jobnum[id]["appset"] = set([niceExec]) else: jobnum[id]["appset"] = set([]) jobnum[id]["follows"] = defaultdict(dict) jobnum[id]["logical"] = defaultdict(set) else: jobnum[id]["start"] = min(int(j["startEpoch"]), jobnum[id]["start"]) jobnum[id]["end"] = max( int(j["startEpoch"]) + int(float(j["runTime"])), jobnum[id]["end"]) if (niceExec != ''): jobnum[id]["appset"].add(niceExec) for j in jobnum: for k in jobnum: if j != k: if (jobnum[j]["end"] < jobnum[k]["start"]): jobnum[k]["follows"][ j] = jobnum[k]["start"] - jobnum[j]["end"] for xc in (jobnum[k]["appset"] - jobnum[j]["appset"]): jobnum[k]["logical"][xc] = jobnum[k]["logical"][ xc].union(jobnum[j]["appset"] - jobnum[k]["appset"]) for j in job: counter = counter + 1 app = guess1App(j) app_counts[app] += 1 if (app != ''): found += 1 rec = copy.copy(j) rec["endEpoch"] = int(float(rec["startEpoch"])) + int( float(rec["runTime"])) rec["startTime"] = "" rec["dynDeps"] = [] rec["exec"] = app rec["user"] = userAnonymize(rec["user"]) rec["dynPackDeps"] = [] rec["weakDeps"] = [] jobinf = jobnum[rec["jobID"]] if (app in jobinf["logical"] and jobinf["logical"][app]): rec["weakPackDeps"] = {app: list(jobinf["logical"][app])} else: rec["weakPackDeps"] = {} rec["weakPackDeps"] = scrub_dots(rec["weakPackDeps"]) if (isinstance(rec["pkgT"], list)): rec["pkgT"] = {} else: # commented-out version includes some less-interesting library information rec["pkgT"] = {k: [] for (k, v) in rec["pkgT"].items()} # rec["pkgT"] = { k: v["libA"] for (k,v) in rec["pkgT"].items() } rec["pkgT"][app] = rec["pkgT"].keys() rec["pkgT"] = scrub_dots(rec["pkgT"]) data = json.dumps(rec) registerParsed(c, rec, "0.0.0.0", usecache, dbraw="snm-tacc-raw") prevapp = app prevapptime = rec["endEpoch"] else: not_found += 1 finalizeThreads() for app in app_counts: print app, app_counts[app] print "Found", found * 100.0 / (found + not_found), "% of", ( found + not_found), "runs"
if __name__ == "__main__": c = Connection() if len(sys.argv) < 4: print "Usage:", sys.argv[0], "<sqlite repo db>", "<mongo db>", "<appinfo file>" quit() sqlitedb = sys.argv[1] snmdb = sys.argv[2] appinfo = sys.argv[3] if (not os.path.isfile(sqlitedb)): print sqlitedb, "does not exist." quit() rs = RepoScrape(sqlitedb) rs.makeAppInfo() rs.writeAppInfo(appinfo) # True=assume logical link between "root" non-dependent packages c.drop_database(snmdb) usecache = UsageCache(openOrCreate(c, snmdb), True, "R") initializeThreads(usecache) for raw in c["snm-raw-records"]["scimapInfo"].find(): queue.put(raw) finalizeThreads() usecache.insertGitData(rs) usecache.insertPublicationData(rs)
import json import pdb import sys from snmweb.usage_cache import UsageCache, freshDb from pymongo import MongoClient, Connection c = Connection() dest = freshDb(c, "snm-cooked-test") with open("testdata.json") as f: testdata = json.load(f) cache = UsageCache(dest, True, "TACC") assert len(cache.apps) == 0, "Empty cache has apps" assert cache.max_co_uses["static"] == 0, "Empty cache has static use count" assert cache.max_co_uses["logical"] == 0, "Empty cache has logical use count" def right_after_first(cache): assert len(cache.apps) == 11, "Did not add first app" assert cache.apps["datasets"]["usage"]["2014-10-14"] == 1 assert len(cache.apps["datasets"]["user_list"]["2014-10-14"]) == 1 assert cache.max_co_uses["static"] == 1 assert cache.max_co_uses["logical"] == 1 assert cache.apps["graphics"]["co_occurence"]["grDevices"]["static"] == 0 assert cache.apps["graphics"]["co_occurence"]["grDevices"]["logical"] == 1 def right_after_second(cache): assert len(cache.apps) == 11, "Did not add second app"