def equivify(jail, usersDir, userID, projName): #getStatsOnGenerics(jail) screenNames = jail['*Names of Screens'] onlyUsedNames = [] code = [] for name in screenNames: screenCode = jail['screens'][name]['bky'] if isinstance(screenCode, dict): blks = screenCode['topBlocks'] blks = [b for b in blks if '*type' in b and b['*type'] == "component_event" and not mus.isDisabled(b) # [2019/08/01, lyn] Added this since previous version # was including some disabled event handlers here # and they caused problems later. Disabled is effectively # commented out, so treat them like not there. ] if len(blks) != 0: code.append(blks) onlyUsedNames.append(name) # screen names --> vvvvvvvvvvvvv # [2019/08/01] Changed the following four lines, adding userMaxes stuff projSet = eclasses.ProjectSet(code, onlyUsedNames, projName, userID) equivs.append(projSet) oldMaxEquivClassSize, oldMaxEquivClassNumBlocks = (0, 0) if userID not in userMaxes else userMaxes[userID] projMaxEquivClassSize, projMaxNumBlocks = projSet.maxes() userMaxes[userID] = ( max(oldMaxEquivClassSize, projMaxEquivClassSize), max(oldMaxEquivClassNumBlocks, projMaxNumBlocks) ) MyNum.num += 1 if MyNum.num % printMessagesEverySoOften == 0: mus.logwrite("equivify() in jailToEquivs()::" + str(MyNum.num) + ": " + os.path.join(usersDir, userID, projName))
def equivify(usersDir, userID, projName, jailHolder, jail): #getStatsOnGenerics(jail) screenNames = jail['*Names of Screens'] onlyUsedNames = [] code = [] for name in screenNames: screenCode = jail['screens'][name]['bky'] if isinstance(screenCode, dict): blks = screenCode['topBlocks'] blks = [ b for b in blks if '*type' in b and b['*type'] == "component_event" ] if len(blks) != 0: code.append(blks) onlyUsedNames.append(name) # screen names --> vvvvvvvvvvvvv equivs.append( eqclasses.ProjectSet(code, screenNames=onlyUsedNames, name=projName, programmer=userID)) # the user's id --> ^^^^^^^ MyNum.num += 1 if MyNum.num % printMessagesEverySoOften == 0: mus.logwrite("equivify() in jailToEquivs()::" + str(MyNum.num) + ": " + os.path.join(usersDir, userID, projName))
def loopThroughJAILs(fileName, archiveName): usersDir = os.path.splitext(Local.currentJail)[0] jailName = os.path.join(usersDir, fileName) #jailName = os.path.splitext(jailName)[0] jailFiles.append(jailName) Local.index += 1 if Local.index % 5000 == 0: mu.logwrite("loopThroughJAILs in compareFiles:: found " + str(Local.index) + " jail files.")
def iterateThroughAllJail(jailLocation, func, jailHolder, backupFunction=None, start=None, stop=None): printMessagesEvery = 10000 bigDirs = getDirectories(jailLocation) #mu.logwrite(bigDirs) def iterateThroughArchivedFiles(fileName, fileArchive): jail = json.load(fileArchive) splits = fileName.split("/") if len(splits) >= 2: if backupFunction != None: backupFunction("", splits[0], splits[1], jailHolder, jail) else: func("", splits[0], splits[1], jailHolder) mu.logwrite( "findmissing.py::iterateThroughAllJail::iterateThroughArchivedFiles: backupFunction Was None!" ) else: mu.logwrite( "findmissing.py::iterateThroughAllJail::iterateThroughArchivedFiles: splits was too short!" + " ".join(splits)) if len(bigDirs) > 0: mu.logwrite( "findmissing.py::iterateThroughAllJail: Found big directories.") if start != None and stop != None: bigDirs = [ bigDirs[i] for i in range(max(0, start), min(stop, len(bigDirs))) ] for big in bigDirs: littleDirs = getDirectories(os.path.join(jailLocation, big)) for little in littleDirs: files = getFileNames(os.path.join(jailLocation, big, little)) for f in files: func(big, little, f, jailHolder) else: mu.logwrite( "findmissing.py::iterateThroughAllJAil: No big directories found! Trying to find zip directories instead." ) files = getFileNames(jailLocation) files = [files[i] for i in range(max(0, start), min(stop, len(files)))] for f in files: zu.withUnzippedFiles(os.path.join(jailLocation, f), iterateThroughArchivedFiles)
def iterateThroughArchivedFiles(fileName, fileArchive): jail = json.load(fileArchive) splits = fileName.split("/") if len(splits) >= 2: if backupFunction != None: backupFunction("", splits[0], splits[1], jailHolder, jail) else: func("", splits[0], splits[1], jailHolder) mu.logwrite( "findmissing.py::iterateThroughAllJail::iterateThroughArchivedFiles: backupFunction Was None!" ) else: mu.logwrite( "findmissing.py::iterateThroughAllJail::iterateThroughArchivedFiles: splits was too short!" + " ".join(splits))
def findGenerizables(equivs): jailStats = { "equivs": [], "cols": [ "projectName", "programmer", "screen", "sizeOfEquivalenceClass", "numBlocks", "name" ], "totalCount": 0 } for projectSet in equivs: for screen, ec in projectSet.getScreenEquivItems(): if ec.isGenericifiable(): jailStats["equivs"].append({ "projectName": projectSet.projectName, "programmer": projectSet.programmerName, "screen": screen.screenName, "sizeOfEquivalenceClass": str(ec.size()), "numBlocks": str(ec.numBlocks()), "name": ec.getName() }) jailStats["totalCount"] += 1 if jailStats["totalCount"] % 1000 == 0: mus.logwrite( "genericizables.py::findGenerizables: {}\t{}\t{}\t{}\t{}" .format(jailStats["totalCount"], projectSet.projectName, projectSet.programmerName, screen.screenName, ec.getName())) #for projectSet in equivs: # for screen,ec,blk in projectSet.getScreenEquivBlockItems(): # mus.logwrite("{}: {}: {}".format(screen.screenName, ec.getName(), mus.getName(blk))) return jailStats
def furtherProcess(usersDir, userID, projName, jailDict, jail): screens = jail["*Names of Screens"] screenJail = jail["screens"] userIDKey = "userID" projNameKey = "projectName" blockNameKey = "blockName" #mus.logwrite("furtherProcess::{}, {}, {}".format(usersDir, userID, projName)) furtherProcess.numForLoops = 0 furtherProcess.numWhileLoops = 0 # Hopefully by short-circuiting, the second part won't actually throw any errors if (len(jailDict["loopsByProgrammer"]) == 0) or (jailDict["loopsByProgrammer"][ jailDict["numLoopsByProgrammer"]][userIDKey] != userID): jailDict["loopsByProgrammer"].append({ userIDKey: userID, "numForLoops": 0, "numWhileLoops": 0 }) jailDict["numLoopsByProgrammer"] += 1 jailDict["loopsByProject"].append({ userIDKey: userID, projNameKey: projName, "numForLoops": 0, "numWhileLoops": 0 }) jailDict["numLoopsByProj"] += 1 def enumerateLoop(blk): t = mus.getType(blk) if t != None: if t == "controls_forRange": furtherProcess.numForLoops += 1 elif t == "controls_while": furtherProcess.numWhileLoops += 1 for s in screens: if "bky" in screenJail[s]: jailDict["forLoopsByScreen"].append({ "screen": s, userIDKey: userID, projNameKey: projName, "numForLoops": 0, "numWhileLoops": 0 }) num = len(jailDict["forLoopsByScreen"]) - 1 if mus.isADictionary(screenJail[s]["bky"]): for block in screenJail[s]["bky"]["topBlocks"]: if not mus.isGlobalDeclaration(block): furtherProcess.numForLoops = 0 furtherProcess.numWhileLoops = 0 mus.countSomething(block, enumerateLoop) minidict = { userIDKey: userID, projNameKey: projName, blockNameKey: "\"{}\"".format(mus.getName(block)), "screen": s, "numForLoops": furtherProcess.numForLoops, "numWhileLoops": furtherProcess.numWhileLoops } #minidict[userIDKey] = userID #minidict[projNameKey] = projName #minidict[blockNameKey] = "\"{}\"".format(mus.getName(block)) #minidict["screen"] = s #minidict["numForLoops"] = furtherProcess.numForLoops jailDict["forLoopsByScreen"][num][ "numForLoops"] += furtherProcess.numForLoops jailDict["forLoopsByScreen"][num][ "numWhileLoops"] += furtherProcess.numWhileLoops #minidict["numWhileLoops"] = furtherProcess.numWhileLoops jailDict["loopStats"].append( copy.deepcopy(minidict)) jailDict["totalCount"] += 1 if jailDict["totalCount"] % printEvery == 0: mus.logwrite("{}\t{}\t{}\t{}".format( jailDict["totalCount"], usersDir, userID, projName)) jailDict["loopsByProject"][ jailDict["numLoopsByProj"]]["numForLoops"] += jailDict[ "forLoopsByScreen"][num]["numForLoops"] jailDict["loopsByProject"][ jailDict["numLoopsByProj"]]["numWhileLoops"] += jailDict[ "forLoopsByScreen"][num]["numWhileLoops"] jailDict["loopsByProgrammer"][jailDict["numLoopsByProgrammer"]][ "numForLoops"] += jailDict["loopsByProject"][ jailDict["numLoopsByProj"]]["numForLoops"] jailDict["loopsByProgrammer"][jailDict["numLoopsByProgrammer"]][ "numWhileLoops"] += jailDict["loopsByProject"][ jailDict["numLoopsByProj"]]["numWhileLoops"]
startStop = "" if args.kind == 10 else "_{}_{}".format(start, stop) printEvery = args.printEvery jails = enumerateLoops(location, start=start, stop=stop, printEvery=printEvery) numLoopsLocation = "num_loops{}{}.csv".format(strKind, startStop) forLoopsLocation = "for_loops_by_screen{}{}.csv".format(strKind, startStop) loopsByProjLoc = "loops_by_proj{}{}.csv".format(strKind, startStop) loopsByUserLoc = "loops_by_programmer{}{}.csv".format(strKind, startStop) mus.logwrite("# loops location: {}".format(numLoopsLocation)) mus.logwrite("loops by screen location: {}".format(forLoopsLocation)) mus.logwrite("loops by project location: {}".format(loopsByProjLoc)) mus.logwrite("loops by programmer loc: {}".format(loopsByUserLoc)) with open(numLoopsLocation, "w") as f: cols = [ "userID", "projectName", "screen", "blockName", "numForLoops", "numWhileLoops" ] csvlines = mus.makeCSVLines(cols, jails["loopStats"]) f.write(",".join(cols) + "\n") f.write(csvlines) f.flush() with open(forLoopsLocation, "w") as f:
def jailToEquivs(jailLocation): printMessagesEverySoOften = 10000 bigDirs = fm.getDirectories(jailLocation) equivs = [] global totalGenerics global totalHandlerBlocks global totalComponents class MyNum: num = 0 totalGenerics = 0 totalHandlerBlocks = 0 #num = 0 # Only used once in commented-out code, so this is technically obsolete """ def getStatsOnGenerics(jail): global totalGenerics global totalHandlerBlocks global totalComponents screenNames = jail["*Names of Screens"] for screen in screenNames: code = jail["screens"][screen]["bky"] if isinstance(code, dict): blocks = code["topBlocks"] blocks = [b for b in blocks if '*type' in b and b['*type'] == "component_event"] #blocks = [b for b in blocks if 'kind' in b and b['kind'] == "declaration" and "*type" in b and b["*type"] != "global_declaration"] for blk in blocks: totalGenerics += countGenerics(blk) totalHandlerBlocks += countBlocksInside(blk) comp, gen = countComponents(blk) totalComponents += comp """ def unarchivedFiles(usersDir, userID, projName): jail = mus.getJail(os.path.join(jailLocation, usersDir, userID, projName)) equivify(jail, usersDir, userID, projName) def archivedFiles(fileName, fileArchive): jail = json.load(fileArchive) splits = fileName.split("/") equivify(jail, "", splits[0], splits[1]) def equivify(jail, usersDir, userID, projName): #getStatsOnGenerics(jail) screenNames = jail['*Names of Screens'] onlyUsedNames = [] code = [] for name in screenNames: screenCode = jail['screens'][name]['bky'] if isinstance(screenCode, dict): blks = screenCode['topBlocks'] blks = [b for b in blks if '*type' in b and b['*type'] == "component_event" and not mus.isDisabled(b) # [2019/08/01, lyn] Added this since previous version # was including some disabled event handlers here # and they caused problems later. Disabled is effectively # commented out, so treat them like not there. ] if len(blks) != 0: code.append(blks) onlyUsedNames.append(name) # screen names --> vvvvvvvvvvvvv # [2019/08/01] Changed the following four lines, adding userMaxes stuff projSet = eclasses.ProjectSet(code, onlyUsedNames, projName, userID) equivs.append(projSet) oldMaxEquivClassSize, oldMaxEquivClassNumBlocks = (0, 0) if userID not in userMaxes else userMaxes[userID] projMaxEquivClassSize, projMaxNumBlocks = projSet.maxes() userMaxes[userID] = ( max(oldMaxEquivClassSize, projMaxEquivClassSize), max(oldMaxEquivClassNumBlocks, projMaxNumBlocks) ) MyNum.num += 1 if MyNum.num % printMessagesEverySoOften == 0: mus.logwrite("equivify() in jailToEquivs()::" + str(MyNum.num) + ": " + os.path.join(usersDir, userID, projName)) if len(bigDirs) == 0: files = fm.getFileNames(jailLocation) for f in files: zu.withUnzippedFiles(os.path.join(jailLocation, f), archivedFiles) else: for bigdir in bigDirs: littledirs = fm.getDirectories(os.path.join(jailLocation, bigdir)) for littledir in littledirs: files = fm.getFileNames(os.path.join(jailLocation, bigdir, littledir)) for f in files: unarchivedFiles(bigdir, littledir, f) '''jail = mus.getJail(os.path.join(jailLocation, bigdir, littledir, f)) screens = jail['*Names of Screens'] code = [] for s in screens: if isinstance(jail['screens'][s]['bky'], dict): code.append(jail['screens'][s]['bky']['topBlocks']) equivs.append(eclasses.ProjectSet(code, f, littledir)) num += 1 if num % printMessagesEverySoOften == 0: mus.logwrite(str(num) + " " + str(equivs[num-1].numScreens()))''' mus.logwrite("jailToEquivs():: Number of generics: " + str(totalGenerics)) mus.logwrite("jailToEquivs():: Total number of blocks in handlers: " + str(totalHandlerBlocks)) mus.logwrite("jailToEquivs():: Total number of component blocks: " + str(totalComponents)) return equivs
def combThroughJails(jailLocation, start=None, stop=None): jailStats = { "usersIDs": [], "totalCount": 0 } def processJail(usersDir, userID, projName, jailHolder): # AKA the big directory, the little directory, and the file name. jail = mus.getJail(os.path.join(jailLocation, usersDir, userID, projName)) furtherProcess(usersDir, userID, projName, jailHolder, jail) def furtherProcess(usersDir, userID, projName, jailHolder, jail): screens = jail["*Names of Screens"] if userID not in jailHolder: jailHolder[userID] = {} jailHolder["usersIDs"].append(userID) jailHolder[userID][projName] = {} jailHolder[userID][projName]["jail"] = jail screenJail = jail["screens"] blockLimit = 10 for s in screens: jailHolder[userID][projName]["counts by screens"] = {} jailHolder[userID][projName]["num blocks by screens"] = {} jailHolder[userID][projName]["num greater than limit"] = {} jailHolder[userID][projName]["kinds greater than limit"] = {} jailHolder[userID][projName]["kinds ratio"] = {} if "bky" in screenJail[s]: if mus.isADictionary(screenJail[s]["bky"]): #jailHolder[userID][projName]["counts by screens"][s] = {} for block in screenJail[s]["bky"]["topBlocks"]: if not mus.isGlobalDeclaration(block): count = countKindsOfBlocks(block) if "note" not in count: greaterThanLimit = False # [2019/01/16] The version before adding depths #for key in count: # greaterThanLimit = greaterThanLimit or count[key] >= 5 #[2019/01/16] The version with depths in account numGreaterThanLimit = 0 commonRepeatedBlocks = {} for depth in count: #mus.logwrite(mus.prettyPrint(count[depth])) if not depth.endswith(".5") and not depth.endswith(".0"): for key in count[depth]: #print key, count[depth][key] #bigEnough = count[depth][key] >= blockLimit #oldGreaterThanLimit = greaterThanLimit #greaterThanLimit = greaterThanLimit or (count[depth][key] >= blockLimit) if (count[depth][key] >= blockLimit): numGreaterThanLimit += 1 commonRepeatedBlocks[str(depth) + "." + key] = count[depth][key] #if (not oldGreaterThanLimit) and greaterThanLimit: # mus.logwrite(mus.prettyPrint(count)) if numGreaterThanLimit >= 2: if s not in jailHolder[userID][projName]["counts by screens"]: jailHolder[userID][projName]["counts by screens"][s] = {} jailHolder[userID][projName]["num blocks by screens"][s] = {} jailHolder[userID][projName]["num greater than limit"][s] = {} jailHolder[userID][projName]["kinds greater than limit"][s] = {} jailHolder[userID][projName]["kinds ratio"][s] = {} jailHolder[userID][projName]["counts by screens"][s][mus.getName(block)] = count jailHolder[userID][projName]["num blocks by screens"][s][mus.getName(block)] = mus.countBlocksInside(block) jailHolder[userID][projName]["num greater than limit"][s][mus.getName(block)] = numGreaterThanLimit jailHolder[userID][projName]["kinds greater than limit"][s][mus.getName(block)] = ";".join([(k + ":" + str(v)) for k,v in commonRepeatedBlocks.iteritems()]) jailHolder[userID][projName]["kinds ratio"][s][mus.getName(block)] = getAverageRatio(commonRepeatedBlocks) jailHolder["totalCount"] += 1 if jailHolder["totalCount"] % 1000 == 0: print jailHolder["totalCount"], usersDir, userID, projName fm.iterateThroughAllJail(jailLocation, processJail, jailStats, backupFunction=furtherProcess, start=start, stop=stop) removeEmptyScreens(jailStats) print "Number of users:", len(jailStats["usersIDs"]) for i in range(min(len(jailStats["usersIDs"]), 5)): uid = jailStats["usersIDs"][i] for proj in jailStats[uid]: mus.logwrite(uid + " " + proj) mus.logwrite(mus.prettyPrint(jailStats[uid][proj]["counts by screens"])) jailList = [] numProjs = 0 allBlocks = 0 numHandlers = 0 for uid in jailStats["usersIDs"]: for proj in jailStats[uid]: numProjs += 1 for screen in jailStats[uid][proj]["counts by screens"]: for handler in jailStats[uid][proj]["counts by screens"][screen]: numHandlers += 1 numBlocks = jailStats[uid][proj]["num blocks by screens"][screen][handler] numGreater = jailStats[uid][proj]["num greater than limit"][screen][handler] kindsGreater = jailStats[uid][proj]["kinds greater than limit"][screen][handler] kindsRatio = jailStats[uid][proj]["kinds ratio"][screen][handler] allBlocks += numBlocks item = {} item["programmer"] = uid item["project"] = proj item["screen"] = screen item["handler"] = handler item["numBlocks"] = str(numBlocks) item["numGreater"] = str(numGreater) item["kindsGreater"] = kindsGreater item["kindsRatio"] = str(kindsRatio) jailList.append(item) mus.logwrite("numHandlers: " + str(numHandlers)) mus.logwrite("avgBlocks: " + str(float(allBlocks) / float(numHandlers))) mus.logwrite("numProjects: " + str(numProjs)) return jailList
def compareFiles(aiaDir, jailDir): mu.logwrite("compareFiles:: beginning.") jailDirs = getDirectories(jailDir) mu.logwrite("compareFiles:: finished finding jail directory names.") aiaFiles = stripExtension(getFileNames(aiaDir)) mu.logwrite("compareFiles:: finished finding aia files.") aiaFilesOnly = [] jailFiles = [] mu.logwrite("compareFiles:: about to begin.") if len(jailDirs) > 0: jailFiles = getFileNames(jailDir) else: mu.logwrite("compareFiles:: using zip directories for jail.") jailZips = os.listdir(jailDir) jailZips = [z for z in jailZips if z.endswith(".zip")] class Local: index = 0 currentJail = "" def loopThroughJAILs(fileName, archiveName): usersDir = os.path.splitext(Local.currentJail)[0] jailName = os.path.join(usersDir, fileName) #jailName = os.path.splitext(jailName)[0] jailFiles.append(jailName) Local.index += 1 if Local.index % 5000 == 0: mu.logwrite("loopThroughJAILs in compareFiles:: found " + str(Local.index) + " jail files.") for z in jailZips: Local.currentJail = z zu.withUnzippedFiles(os.path.join(jailDir, z), loopThroughJAILs) mu.logwrite("compareFiles:: finished finding jail file names.") jailFiles = stripExtension(jailFiles) mu.logwrite("compareFiles:: finished stripping jail extensions.") mu.logwrite("compareFiles:: #aias: " + str(len(aiaFiles))) mu.logwrite("compareFiles:: #jail: " + str(len(jailFiles))) index = 0 for aia in aiaFiles: if aia not in jailFiles: aiaFilesOnly.append(aia) else: jailFiles.remove(aia) index += 1 if index % 5000 == 0: mu.logwrite("compareFiles:: # of checked aias: " + str(index)) return aiaFilesOnly
iterateThroughArchivedFiles) if __name__ == '__main__': mu.logFileName = "*whatever*" mu.createLogFile() aia10k = "/Users/audrey/Downloads/ai2_10k_random_users_deidentified_aias" jail10k = "/Users/audrey/Personal/School/College/Work/summer2018/jailconversion/10kjails" '''onlyaia = compareDirs(aia10k, jail10k) for d in onlyaia: print d print(len(onlyaia))''' only = compareFiles(aia10k, jail10k) for f in only: mu.logwrite(f) mu.logwrite("main:: # of files missing: " + str(len(only))) aia46k = "/Users/audrey/Downloads/ai2_46k_prolific_users_deidentified_aias" jail46kzipped = "/Users/audrey/Personal/School/College/Work/summer2018/jailconversion/46kjailzips" only46k = compareFiles(aia46k, jail46kzipped) mu.logwrite("main:: # of 46k files missing: " + str(len(only46k))) with open("46kaiamissings.txt", "w") as f: f.write("\n".join(only46k))