Example #1
0
def importTaccData():
    # Pass1: build appname table
    for jobpart in forTaccPart():
        buildAppnameTable(jobpart)

    fiveOnly()
    print "--------------------------------------------"

    nLabels = appExecs.keys()
    n = len(nLabels)
    #lookup = { nLabels[i]: i for i in range(0,n) }
    #dsm = numpy.zeros((n,n), dtype=numpy.int)

    c = Connection()
    c.drop_database("snm-tacc")

    jobsizeHist = defaultdict(int)
    for job in forTaccLongJob():
        jobsizeHist[len(job)] += 1
        #logdeps = getLogicalDeps([guess1App(j) for j in job])
        prevapp = ""
        for j in job:
            app = guess1App(j)
            if (app != ''):
                rec = copy.copy(j)
                rec["endEpoch"] = rec["startEpoch"] + rec["runTime"]
                rec["startTime"] = ""
                rec["dynDeps"] = []
                rec["exec"] = app
                rec["dynPackDeps"] = []
                rec["weakDeps"] = []
                if (prevapp != "" and app != "" and app != prevapp):
                    rec["weakPackDeps"] = {app: [prevapp]}
                else:
                    rec["weakPackDeps"] = {}
                rec["weakPackDeps"] = scrub_dots(rec["weakPackDeps"])
                if (isinstance(rec["pkgT"], list)):
                    rec["pkgT"] = {}
                else:
                    rec["pkgT"] = {
                        k: v["libA"]
                        for (k, v) in rec["pkgT"].items()
                    }
                rec["pkgT"][app] = rec["pkgT"].keys()
                rec["pkgT"] = scrub_dots(rec["pkgT"])
                data = json.dumps(rec)
                registerParsed(c,
                               rec,
                               "0.0.0.0",
                               dbraw="snm-tacc-raw",
                               dbcooked="snm-tacc",
                               postponeCalc=True)
                prevapp = app
    recalcApps(c, c["snm-tacc"])
Example #2
0
def importTaccData():
    # Pass1: build appname table
    for jobpart in forTaccPart():
        buildAppnameTable(jobpart)

    fiveOnly()
    print "--------------------------------------------"

    nLabels = appExecs.keys()
    n = len(nLabels)
    #lookup = { nLabels[i]: i for i in range(0,n) }
    #dsm = numpy.zeros((n,n), dtype=numpy.int)

    c = Connection()
    c.drop_database("snm-tacc")

    jobsizeHist = defaultdict(int)
    for job in forTaccLongJob():
       jobsizeHist[len(job)] += 1
       #logdeps = getLogicalDeps([guess1App(j) for j in job])
       prevapp = ""
       for j in job:
           app = guess1App(j)
           if (app != ''):
               rec = copy.copy(j)
               rec["endEpoch"] = rec["startEpoch"] + rec["runTime"]
               rec["startTime"] = ""
               rec["dynDeps"] = []
               rec["exec"] = app
               rec["dynPackDeps"] = []
               rec["weakDeps"] = []
               if (prevapp != "" and app != "" and app != prevapp):
                   rec["weakPackDeps"] = { app: [prevapp] }
               else:
                   rec["weakPackDeps"] = { }
               rec["weakPackDeps"] = scrub_dots(rec["weakPackDeps"])
               if (isinstance(rec["pkgT"], list)):
                   rec["pkgT"] = {}
               else:
                   rec["pkgT"] = { k: v["libA"] for (k,v) in rec["pkgT"].items() }
               rec["pkgT"][app] = rec["pkgT"].keys()
               rec["pkgT"] = scrub_dots(rec["pkgT"])
               data = json.dumps(rec)
               registerParsed(c, rec, "0.0.0.0", dbraw="snm-tacc-raw", dbcooked="snm-tacc", postponeCalc = True)
               prevapp = app
    recalcApps(c, c["snm-tacc"])
Example #3
0
def importTaccData(taccfiles):
    c = Connection()
    c.drop_database("snm-tacc-raw")
    usecache = UsageCache(freshDb(c, "snm-tacc"), False, "TACC")
    initializeThreads(usecache)
    # Pass1: build appname table
    for jobpart in forTaccPart(taccfiles):
        buildAppnameTable(jobpart)

    fiveOnly()
    print "--------------------------------------------"

    nLabels = appExecs.keys()
    n = len(nLabels)
    #lookup = { nLabels[i]: i for i in range(0,n) }
    #dsm = numpy.zeros((n,n), dtype=numpy.int)

    jobsizeHist = defaultdict(int)
    app_counts = defaultdict(int)
    found = 0
    not_found = 0
    counter = 0
    for job in forTaccLongJob(taccfiles):
        jobsizeHist[len(job)] += 1

        # Make jobnum -> {start: N, end: N, appset: [a,b,c], follows: {J->delay, J->delay}}
        jobnum = defaultdict(dict)
        for j in job:
            id = j["jobID"]
            niceExec = guess1App(j)
            if id not in jobnum:
                jobnum[id]["start"] = int(j["startEpoch"])
                jobnum[id]["end"] = int(j["startEpoch"]) + int(
                    float(j["runTime"]))
                if (niceExec != ''):
                    jobnum[id]["appset"] = set([niceExec])
                else:
                    jobnum[id]["appset"] = set([])
                jobnum[id]["follows"] = defaultdict(dict)
                jobnum[id]["logical"] = defaultdict(set)
            else:
                jobnum[id]["start"] = min(int(j["startEpoch"]),
                                          jobnum[id]["start"])
                jobnum[id]["end"] = max(
                    int(j["startEpoch"]) + int(float(j["runTime"])),
                    jobnum[id]["end"])
                if (niceExec != ''):
                    jobnum[id]["appset"].add(niceExec)

        for j in jobnum:
            for k in jobnum:
                if j != k:
                    if (jobnum[j]["end"] < jobnum[k]["start"]):
                        jobnum[k]["follows"][
                            j] = jobnum[k]["start"] - jobnum[j]["end"]
                        for xc in (jobnum[k]["appset"] - jobnum[j]["appset"]):
                            jobnum[k]["logical"][xc] = jobnum[k]["logical"][
                                xc].union(jobnum[j]["appset"] -
                                          jobnum[k]["appset"])

        for j in job:
            counter = counter + 1
            app = guess1App(j)
            app_counts[app] += 1
            if (app != ''):
                found += 1
                rec = copy.copy(j)
                rec["endEpoch"] = int(float(rec["startEpoch"])) + int(
                    float(rec["runTime"]))
                rec["startTime"] = ""
                rec["dynDeps"] = []
                rec["exec"] = app
                rec["user"] = userAnonymize(rec["user"])
                rec["dynPackDeps"] = []
                rec["weakDeps"] = []
                jobinf = jobnum[rec["jobID"]]
                if (app in jobinf["logical"] and jobinf["logical"][app]):
                    rec["weakPackDeps"] = {app: list(jobinf["logical"][app])}
                else:
                    rec["weakPackDeps"] = {}
                rec["weakPackDeps"] = scrub_dots(rec["weakPackDeps"])
                if (isinstance(rec["pkgT"], list)):
                    rec["pkgT"] = {}
                else:
                    # commented-out version includes some less-interesting library information
                    rec["pkgT"] = {k: [] for (k, v) in rec["pkgT"].items()}
                    # rec["pkgT"] = { k: v["libA"] for (k,v) in rec["pkgT"].items() }
                rec["pkgT"][app] = rec["pkgT"].keys()
                rec["pkgT"] = scrub_dots(rec["pkgT"])
                data = json.dumps(rec)
                registerParsed(c,
                               rec,
                               "0.0.0.0",
                               usecache,
                               dbraw="snm-tacc-raw")
                prevapp = app
                prevapptime = rec["endEpoch"]
            else:
                not_found += 1

    finalizeThreads()
    for app in app_counts:
        print app, app_counts[app]
    print "Found", found * 100.0 / (found + not_found), "% of", (
        found + not_found), "runs"
Example #4
0
def importTaccData(taccfiles):
    c = Connection()
    c.drop_database("snm-tacc-raw")
    usecache = UsageCache(freshDb(c, "snm-tacc"), False, "TACC")
    initializeThreads(usecache)
    # Pass1: build appname table
    for jobpart in forTaccPart(taccfiles):
        buildAppnameTable(jobpart)

    fiveOnly()
    print "--------------------------------------------"

    nLabels = appExecs.keys()
    n = len(nLabels)
    #lookup = { nLabels[i]: i for i in range(0,n) }
    #dsm = numpy.zeros((n,n), dtype=numpy.int)

    jobsizeHist = defaultdict(int)
    app_counts = defaultdict(int)
    found = 0
    not_found = 0
    counter = 0
    for job in forTaccLongJob(taccfiles):
       jobsizeHist[len(job)] += 1

       # Make jobnum -> {start: N, end: N, appset: [a,b,c], follows: {J->delay, J->delay}}
       jobnum = defaultdict(dict)
       for j in job:
           id = j["jobID"]
           niceExec = guess1App(j)
           if id not in jobnum:
               jobnum[id]["start"] = int(j["startEpoch"])
               jobnum[id]["end"] = int(j["startEpoch"]) + int(float(j["runTime"]))
               if (niceExec != ''):
                   jobnum[id]["appset"] = set([niceExec])
               else:
                   jobnum[id]["appset"] = set([])
               jobnum[id]["follows"] = defaultdict(dict)
               jobnum[id]["logical"] = defaultdict(set)
           else:
               jobnum[id]["start"] = min(int(j["startEpoch"]), jobnum[id]["start"])
               jobnum[id]["end"] = max(int(j["startEpoch"]) + int(float(j["runTime"])), 
                                       jobnum[id]["end"])
               if (niceExec != ''):
                   jobnum[id]["appset"].add(niceExec)

       for j in jobnum:
           for k in jobnum:
               if j!=k:
                   if (jobnum[j]["end"] < jobnum[k]["start"]):
                       jobnum[k]["follows"][j] = jobnum[k]["start"] - jobnum[j]["end"]
                       for xc in (jobnum[k]["appset"] - jobnum[j]["appset"]):
                           jobnum[k]["logical"][xc] = jobnum[k]["logical"][xc].union(jobnum[j]["appset"] - jobnum[k]["appset"])


       for j in job:
           counter = counter + 1
           app = guess1App(j)
           app_counts[app] += 1
           if (app != ''):
               found += 1
               rec = copy.copy(j)
               rec["endEpoch"] = int(float(rec["startEpoch"])) + int(float(rec["runTime"]))
               rec["startTime"] = ""
               rec["dynDeps"] = []
               rec["exec"] = app
               rec["user"] = userAnonymize(rec["user"])
               rec["dynPackDeps"] = []
               rec["weakDeps"] = []
               jobinf = jobnum[rec["jobID"]]
               if (app in jobinf["logical"] and jobinf["logical"][app]):
                   rec["weakPackDeps"] = { app: list(jobinf["logical"][app]) }
               else:
                   rec["weakPackDeps"] = { }
               rec["weakPackDeps"] = scrub_dots(rec["weakPackDeps"])
               if (isinstance(rec["pkgT"], list)):
                   rec["pkgT"] = {}
               else:
                   # commented-out version includes some less-interesting library information
                   rec["pkgT"] = { k: [] for (k,v) in rec["pkgT"].items()  } 
                   # rec["pkgT"] = { k: v["libA"] for (k,v) in rec["pkgT"].items() }
               rec["pkgT"][app] = rec["pkgT"].keys()
               rec["pkgT"] = scrub_dots(rec["pkgT"])
               data = json.dumps(rec)
               registerParsed(c, rec, "0.0.0.0", usecache, dbraw="snm-tacc-raw")
               prevapp = app
               prevapptime = rec["endEpoch"]
           else:
               not_found += 1

    finalizeThreads()
    for app in app_counts:
        print app, app_counts[app]
    print "Found", found * 100.0/(found + not_found), "% of", (found+not_found), "runs"