import json
import pdb
import sys
from snmweb.usage_cache import UsageCache, freshDb

from pymongo import MongoClient, Connection

c = Connection()
dest = freshDb(c, "snm-cooked-test")

with open("testdata.json") as f:
    testdata = json.load(f)


cache = UsageCache(dest, True, "TACC")
assert len(cache.apps) == 0, "Empty cache has apps"
assert cache.max_co_uses["static"] == 0, "Empty cache has static use count"
assert cache.max_co_uses["logical"] == 0, "Empty cache has logical use count"


def right_after_first(cache):
    assert len(cache.apps) == 11, "Did not add first app"
    assert cache.apps["datasets"]["usage"]["2014-10-14"] == 1
    assert len(cache.apps["datasets"]["user_list"]["2014-10-14"]) == 1
    assert cache.max_co_uses["static"] == 1
    assert cache.max_co_uses["logical"] == 1
    assert cache.apps["graphics"]["co_occurence"]["grDevices"]["static"] == 0
    assert cache.apps["graphics"]["co_occurence"]["grDevices"]["logical"] == 1


def right_after_second(cache):
Example #2
0
def importTaccData(taccfiles):
    c = Connection()
    c.drop_database("snm-tacc-raw")
    usecache = UsageCache(freshDb(c, "snm-tacc"), False, "TACC")
    initializeThreads(usecache)
    # Pass1: build appname table
    for jobpart in forTaccPart(taccfiles):
        buildAppnameTable(jobpart)

    fiveOnly()
    print "--------------------------------------------"

    nLabels = appExecs.keys()
    n = len(nLabels)
    #lookup = { nLabels[i]: i for i in range(0,n) }
    #dsm = numpy.zeros((n,n), dtype=numpy.int)

    jobsizeHist = defaultdict(int)
    app_counts = defaultdict(int)
    found = 0
    not_found = 0
    counter = 0
    for job in forTaccLongJob(taccfiles):
       jobsizeHist[len(job)] += 1

       # Make jobnum -> {start: N, end: N, appset: [a,b,c], follows: {J->delay, J->delay}}
       jobnum = defaultdict(dict)
       for j in job:
           id = j["jobID"]
           niceExec = guess1App(j)
           if id not in jobnum:
               jobnum[id]["start"] = int(j["startEpoch"])
               jobnum[id]["end"] = int(j["startEpoch"]) + int(float(j["runTime"]))
               if (niceExec != ''):
                   jobnum[id]["appset"] = set([niceExec])
               else:
                   jobnum[id]["appset"] = set([])
               jobnum[id]["follows"] = defaultdict(dict)
               jobnum[id]["logical"] = defaultdict(set)
           else:
               jobnum[id]["start"] = min(int(j["startEpoch"]), jobnum[id]["start"])
               jobnum[id]["end"] = max(int(j["startEpoch"]) + int(float(j["runTime"])), 
                                       jobnum[id]["end"])
               if (niceExec != ''):
                   jobnum[id]["appset"].add(niceExec)

       for j in jobnum:
           for k in jobnum:
               if j!=k:
                   if (jobnum[j]["end"] < jobnum[k]["start"]):
                       jobnum[k]["follows"][j] = jobnum[k]["start"] - jobnum[j]["end"]
                       for xc in (jobnum[k]["appset"] - jobnum[j]["appset"]):
                           jobnum[k]["logical"][xc] = jobnum[k]["logical"][xc].union(jobnum[j]["appset"] - jobnum[k]["appset"])


       for j in job:
           counter = counter + 1
           app = guess1App(j)
           app_counts[app] += 1
           if (app != ''):
               found += 1
               rec = copy.copy(j)
               rec["endEpoch"] = int(float(rec["startEpoch"])) + int(float(rec["runTime"]))
               rec["startTime"] = ""
               rec["dynDeps"] = []
               rec["exec"] = app
               rec["user"] = userAnonymize(rec["user"])
               rec["dynPackDeps"] = []
               rec["weakDeps"] = []
               jobinf = jobnum[rec["jobID"]]
               if (app in jobinf["logical"] and jobinf["logical"][app]):
                   rec["weakPackDeps"] = { app: list(jobinf["logical"][app]) }
               else:
                   rec["weakPackDeps"] = { }
               rec["weakPackDeps"] = scrub_dots(rec["weakPackDeps"])
               if (isinstance(rec["pkgT"], list)):
                   rec["pkgT"] = {}
               else:
                   # commented-out version includes some less-interesting library information
                   rec["pkgT"] = { k: [] for (k,v) in rec["pkgT"].items()  } 
                   # rec["pkgT"] = { k: v["libA"] for (k,v) in rec["pkgT"].items() }
               rec["pkgT"][app] = rec["pkgT"].keys()
               rec["pkgT"] = scrub_dots(rec["pkgT"])
               data = json.dumps(rec)
               registerParsed(c, rec, "0.0.0.0", usecache, dbraw="snm-tacc-raw")
               prevapp = app
               prevapptime = rec["endEpoch"]
           else:
               not_found += 1

    finalizeThreads()
    for app in app_counts:
        print app, app_counts[app]
    print "Found", found * 100.0/(found + not_found), "% of", (found+not_found), "runs"
Example #3
0
def importTaccData(taccfiles):
    c = Connection()
    c.drop_database("snm-tacc-raw")
    usecache = UsageCache(freshDb(c, "snm-tacc"), False, "TACC")
    initializeThreads(usecache)
    # Pass1: build appname table
    for jobpart in forTaccPart(taccfiles):
        buildAppnameTable(jobpart)

    fiveOnly()
    print "--------------------------------------------"

    nLabels = appExecs.keys()
    n = len(nLabels)
    #lookup = { nLabels[i]: i for i in range(0,n) }
    #dsm = numpy.zeros((n,n), dtype=numpy.int)

    jobsizeHist = defaultdict(int)
    app_counts = defaultdict(int)
    found = 0
    not_found = 0
    counter = 0
    for job in forTaccLongJob(taccfiles):
        jobsizeHist[len(job)] += 1

        # Make jobnum -> {start: N, end: N, appset: [a,b,c], follows: {J->delay, J->delay}}
        jobnum = defaultdict(dict)
        for j in job:
            id = j["jobID"]
            niceExec = guess1App(j)
            if id not in jobnum:
                jobnum[id]["start"] = int(j["startEpoch"])
                jobnum[id]["end"] = int(j["startEpoch"]) + int(
                    float(j["runTime"]))
                if (niceExec != ''):
                    jobnum[id]["appset"] = set([niceExec])
                else:
                    jobnum[id]["appset"] = set([])
                jobnum[id]["follows"] = defaultdict(dict)
                jobnum[id]["logical"] = defaultdict(set)
            else:
                jobnum[id]["start"] = min(int(j["startEpoch"]),
                                          jobnum[id]["start"])
                jobnum[id]["end"] = max(
                    int(j["startEpoch"]) + int(float(j["runTime"])),
                    jobnum[id]["end"])
                if (niceExec != ''):
                    jobnum[id]["appset"].add(niceExec)

        for j in jobnum:
            for k in jobnum:
                if j != k:
                    if (jobnum[j]["end"] < jobnum[k]["start"]):
                        jobnum[k]["follows"][
                            j] = jobnum[k]["start"] - jobnum[j]["end"]
                        for xc in (jobnum[k]["appset"] - jobnum[j]["appset"]):
                            jobnum[k]["logical"][xc] = jobnum[k]["logical"][
                                xc].union(jobnum[j]["appset"] -
                                          jobnum[k]["appset"])

        for j in job:
            counter = counter + 1
            app = guess1App(j)
            app_counts[app] += 1
            if (app != ''):
                found += 1
                rec = copy.copy(j)
                rec["endEpoch"] = int(float(rec["startEpoch"])) + int(
                    float(rec["runTime"]))
                rec["startTime"] = ""
                rec["dynDeps"] = []
                rec["exec"] = app
                rec["user"] = userAnonymize(rec["user"])
                rec["dynPackDeps"] = []
                rec["weakDeps"] = []
                jobinf = jobnum[rec["jobID"]]
                if (app in jobinf["logical"] and jobinf["logical"][app]):
                    rec["weakPackDeps"] = {app: list(jobinf["logical"][app])}
                else:
                    rec["weakPackDeps"] = {}
                rec["weakPackDeps"] = scrub_dots(rec["weakPackDeps"])
                if (isinstance(rec["pkgT"], list)):
                    rec["pkgT"] = {}
                else:
                    # commented-out version includes some less-interesting library information
                    rec["pkgT"] = {k: [] for (k, v) in rec["pkgT"].items()}
                    # rec["pkgT"] = { k: v["libA"] for (k,v) in rec["pkgT"].items() }
                rec["pkgT"][app] = rec["pkgT"].keys()
                rec["pkgT"] = scrub_dots(rec["pkgT"])
                data = json.dumps(rec)
                registerParsed(c,
                               rec,
                               "0.0.0.0",
                               usecache,
                               dbraw="snm-tacc-raw")
                prevapp = app
                prevapptime = rec["endEpoch"]
            else:
                not_found += 1

    finalizeThreads()
    for app in app_counts:
        print app, app_counts[app]
    print "Found", found * 100.0 / (found + not_found), "% of", (
        found + not_found), "runs"
Example #4
0
import json
import pdb
import sys
from snmweb.usage_cache import UsageCache, freshDb

from pymongo import MongoClient, Connection

c = Connection()
dest = freshDb(c, "snm-cooked-test")

with open("testdata.json") as f:
    testdata = json.load(f)

cache = UsageCache(dest, True, "TACC")
assert len(cache.apps) == 0, "Empty cache has apps"
assert cache.max_co_uses["static"] == 0, "Empty cache has static use count"
assert cache.max_co_uses["logical"] == 0, "Empty cache has logical use count"


def right_after_first(cache):
    assert len(cache.apps) == 11, "Did not add first app"
    assert cache.apps["datasets"]["usage"]["2014-10-14"] == 1
    assert len(cache.apps["datasets"]["user_list"]["2014-10-14"]) == 1
    assert cache.max_co_uses["static"] == 1
    assert cache.max_co_uses["logical"] == 1
    assert cache.apps["graphics"]["co_occurence"]["grDevices"]["static"] == 0
    assert cache.apps["graphics"]["co_occurence"]["grDevices"]["logical"] == 1


def right_after_second(cache):
    assert len(cache.apps) == 11, "Did not add second app"