import json
import pdb
import sys
from snmweb.usage_cache import UsageCache, freshDb

from pymongo import MongoClient, Connection

c = Connection()
dest = freshDb(c, "snm-cooked-test")

with open("testdata.json") as f:
    testdata = json.load(f)


cache = UsageCache(dest, True, "TACC")
assert len(cache.apps) == 0, "Empty cache has apps"
assert cache.max_co_uses["static"] == 0, "Empty cache has static use count"
assert cache.max_co_uses["logical"] == 0, "Empty cache has logical use count"


def right_after_first(cache):
    assert len(cache.apps) == 11, "Did not add first app"
    assert cache.apps["datasets"]["usage"]["2014-10-14"] == 1
    assert len(cache.apps["datasets"]["user_list"]["2014-10-14"]) == 1
    assert cache.max_co_uses["static"] == 1
    assert cache.max_co_uses["logical"] == 1
    assert cache.apps["graphics"]["co_occurence"]["grDevices"]["static"] == 0
    assert cache.apps["graphics"]["co_occurence"]["grDevices"]["logical"] == 1


def right_after_second(cache):
        usecache.registerPacket(packet)
        if queue.empty() and usecache.dirty:
            usecache.saveToMongo()
        queue.task_done()

def initializeThreads(usecache):
    t = Thread(target=worker, args=(usecache,))
    t.daemon = True
    t.start()

if __name__ == "__main__":
    c = Connection()
    
    # True=assume logical link between "root" non-dependent packages
    c.drop_database("snm-tacc")
    usecache = UsageCache(openOrCreate(c, "snm-tacc"), True, "TACC", useWeakDeps=True)  

    initializeThreads(usecache)
    rownum = 0
    try:
        for raw in c["snm-tacc-raw"]["scimapInfo"].find(timeout=False):
            rownum += 1
            queue.put(raw)
        while (not(queue.empty())):
            time.sleep(1)
        print "Spooling through queued packets at row#", rownum
        queue.join()
        print "Saving"
        if usecache.dirty:
            usecache.saveToMongo()
        print "Saved."
Example #3
0
    try:
        record = json.loads(data, object_hook=scrub_dots)
        record["receivedEpoch"] = int(time.time())
        registerParsed(c, record, ip, usecache, dbraw)
    except Exception as e:
        print "Error: " + str(e)
        (r1, r2, r3) = sys.exc_info()
        print traceback.format_exception(r1, r2, r3)


def initializeThreads(usecache):
    t = Thread(target=worker, args=(usecache, ))
    t.daemon = True
    t.start()


def finalizeThreads():
    """Used by taccParse and reprocess"""
    while (not (queue.empty())):
        time.sleep(1)


if __name__ == "__main__":
    c = Connection()

    # True=assume logical link between "root" non-dependent packages
    usecache = UsageCache(openOrCreate(c, "snm-r"), True, "R")

    initializeThreads(usecache)
    await ()
Example #4
0
    c = Connection()

    if len(sys.argv) < 4:
        print "Usage:", sys.argv[
            0], "<sqlite repo db>", "<mongo db>", "<appinfo file>"
        quit()

    sqlitedb = sys.argv[1]
    snmdb = sys.argv[2]
    appinfo = sys.argv[3]

    if (not os.path.isfile(sqlitedb)):
        print sqlitedb, "does not exist."
        quit()

    rs = RepoScrape(sqlitedb)
    rs.makeAppInfo()
    rs.writeAppInfo(appinfo)

    # True=assume logical link between "root" non-dependent packages
    c.drop_database(snmdb)
    usecache = UsageCache(openOrCreate(c, snmdb), True, "R")

    initializeThreads(usecache)
    for raw in c["snm-raw-records"]["scimapInfo"].find():
        queue.put(raw)
    finalizeThreads()

    usecache.insertGitData(rs)
    usecache.insertPublicationData(rs)
Example #5
0
def importTaccData(taccfiles):
    c = Connection()
    c.drop_database("snm-tacc-raw")
    usecache = UsageCache(freshDb(c, "snm-tacc"), False, "TACC")
    initializeThreads(usecache)
    # Pass1: build appname table
    for jobpart in forTaccPart(taccfiles):
        buildAppnameTable(jobpart)

    fiveOnly()
    print "--------------------------------------------"

    nLabels = appExecs.keys()
    n = len(nLabels)
    #lookup = { nLabels[i]: i for i in range(0,n) }
    #dsm = numpy.zeros((n,n), dtype=numpy.int)

    jobsizeHist = defaultdict(int)
    app_counts = defaultdict(int)
    found = 0
    not_found = 0
    counter = 0
    for job in forTaccLongJob(taccfiles):
        jobsizeHist[len(job)] += 1

        # Make jobnum -> {start: N, end: N, appset: [a,b,c], follows: {J->delay, J->delay}}
        jobnum = defaultdict(dict)
        for j in job:
            id = j["jobID"]
            niceExec = guess1App(j)
            if id not in jobnum:
                jobnum[id]["start"] = int(j["startEpoch"])
                jobnum[id]["end"] = int(j["startEpoch"]) + int(
                    float(j["runTime"]))
                if (niceExec != ''):
                    jobnum[id]["appset"] = set([niceExec])
                else:
                    jobnum[id]["appset"] = set([])
                jobnum[id]["follows"] = defaultdict(dict)
                jobnum[id]["logical"] = defaultdict(set)
            else:
                jobnum[id]["start"] = min(int(j["startEpoch"]),
                                          jobnum[id]["start"])
                jobnum[id]["end"] = max(
                    int(j["startEpoch"]) + int(float(j["runTime"])),
                    jobnum[id]["end"])
                if (niceExec != ''):
                    jobnum[id]["appset"].add(niceExec)

        for j in jobnum:
            for k in jobnum:
                if j != k:
                    if (jobnum[j]["end"] < jobnum[k]["start"]):
                        jobnum[k]["follows"][
                            j] = jobnum[k]["start"] - jobnum[j]["end"]
                        for xc in (jobnum[k]["appset"] - jobnum[j]["appset"]):
                            jobnum[k]["logical"][xc] = jobnum[k]["logical"][
                                xc].union(jobnum[j]["appset"] -
                                          jobnum[k]["appset"])

        for j in job:
            counter = counter + 1
            app = guess1App(j)
            app_counts[app] += 1
            if (app != ''):
                found += 1
                rec = copy.copy(j)
                rec["endEpoch"] = int(float(rec["startEpoch"])) + int(
                    float(rec["runTime"]))
                rec["startTime"] = ""
                rec["dynDeps"] = []
                rec["exec"] = app
                rec["user"] = userAnonymize(rec["user"])
                rec["dynPackDeps"] = []
                rec["weakDeps"] = []
                jobinf = jobnum[rec["jobID"]]
                if (app in jobinf["logical"] and jobinf["logical"][app]):
                    rec["weakPackDeps"] = {app: list(jobinf["logical"][app])}
                else:
                    rec["weakPackDeps"] = {}
                rec["weakPackDeps"] = scrub_dots(rec["weakPackDeps"])
                if (isinstance(rec["pkgT"], list)):
                    rec["pkgT"] = {}
                else:
                    # commented-out version includes some less-interesting library information
                    rec["pkgT"] = {k: [] for (k, v) in rec["pkgT"].items()}
                    # rec["pkgT"] = { k: v["libA"] for (k,v) in rec["pkgT"].items() }
                rec["pkgT"][app] = rec["pkgT"].keys()
                rec["pkgT"] = scrub_dots(rec["pkgT"])
                data = json.dumps(rec)
                registerParsed(c,
                               rec,
                               "0.0.0.0",
                               usecache,
                               dbraw="snm-tacc-raw")
                prevapp = app
                prevapptime = rec["endEpoch"]
            else:
                not_found += 1

    finalizeThreads()
    for app in app_counts:
        print app, app_counts[app]
    print "Found", found * 100.0 / (found + not_found), "% of", (
        found + not_found), "runs"
Example #6
0
if __name__ == "__main__":
    c = Connection()
    
    if len(sys.argv) < 4:
        print "Usage:", sys.argv[0], "<sqlite repo db>", "<mongo db>", "<appinfo file>"
        quit()

    sqlitedb = sys.argv[1]
    snmdb = sys.argv[2]
    appinfo = sys.argv[3]
    
    if (not os.path.isfile(sqlitedb)):
        print sqlitedb, "does not exist."
        quit()
     
    rs = RepoScrape(sqlitedb)
    rs.makeAppInfo()
    rs.writeAppInfo(appinfo)

    # True=assume logical link between "root" non-dependent packages
    c.drop_database(snmdb)
    usecache = UsageCache(openOrCreate(c, snmdb), True, "R")  

    initializeThreads(usecache)
    for raw in c["snm-raw-records"]["scimapInfo"].find():
        queue.put(raw)
    finalizeThreads()

    usecache.insertGitData(rs)
    usecache.insertPublicationData(rs)
Example #7
0
import json
import pdb
import sys
from snmweb.usage_cache import UsageCache, freshDb

from pymongo import MongoClient, Connection

c = Connection()
dest = freshDb(c, "snm-cooked-test")

with open("testdata.json") as f:
    testdata = json.load(f)

cache = UsageCache(dest, True, "TACC")
assert len(cache.apps) == 0, "Empty cache has apps"
assert cache.max_co_uses["static"] == 0, "Empty cache has static use count"
assert cache.max_co_uses["logical"] == 0, "Empty cache has logical use count"


def right_after_first(cache):
    assert len(cache.apps) == 11, "Did not add first app"
    assert cache.apps["datasets"]["usage"]["2014-10-14"] == 1
    assert len(cache.apps["datasets"]["user_list"]["2014-10-14"]) == 1
    assert cache.max_co_uses["static"] == 1
    assert cache.max_co_uses["logical"] == 1
    assert cache.apps["graphics"]["co_occurence"]["grDevices"]["static"] == 0
    assert cache.apps["graphics"]["co_occurence"]["grDevices"]["logical"] == 1


def right_after_second(cache):
    assert len(cache.apps) == 11, "Did not add second app"