def runAssignment(assignment, students, args, helpers): clusters = {} allowPartners = assignment.args["allowPartners"] # for each file files = assignment.args["files"] for filename in files: # find collisions for student in students: safeFilename = common.makeFilenameSafe(filename) + args["sourceSuffix"] studentText = helpers.readFromPreprocessed(student, assignment.name, safeFilename) if studentText != None: hashedText = hashText(studentText) if hashedText not in clusters: clusters[hashedText] = common.Cluster(allowPartners, filename, 100) member = common.Member(student, assignment.name, helpers) clusters[hashedText].add(member) # process the clusters clusterArray = [] for key in clusters: clusterArray.append(clusters[key]) # write the results for this assignment # clustersToStandardJSON will make sure that there are at least two people in the cluster common.clustersToStandardJSON(clusterArray, assignment.name, args["resultsSuffix"], helpers) # say we're done with this assignment helpers.printf("Finished assignment '{}'...\n".format(assignment.name))
def runEntry(filename, students, helpers, assignment, args, allowPartners): # get the data assignName = assignment.name sourceSuffix = args["sourceSuffix"] resultsSuffix = args["resultsSuffix"] threshold = assignment.args["threshold"] above = args["above"] minThreshold = None if assignment.args.has_key("minThreshold"): minThreshold = assignment.args["minThreshold"] safeFilename = common.makeFilenameSafe(filename) + sourceSuffix filepath = helpers.getProcessedPath(assignName, safeFilename) if filepath != None: rawData = common.PairResults(assignName, safeFilename, helpers) data = [] # convert into python objects for pair in rawData.iterate(): data.append(pair) # get the mean mean = getMean(data) # get the deviation deviation = getDeviation(data, mean) helpers.printf("{}/{}: mean {}, deviation {}\n".format( assignName, filename, mean, deviation)) # filter out data filtered = filterData(data, mean, deviation, threshold, above, minThreshold) # create the clusters clusters = createClusters(filtered, filename, assignName, allowPartners, helpers) # flush to disk common.clustersToStandardJSON( clusters, assignName, common.makeFilenameSafe(filename) + resultsSuffix, helpers) # all done! helpers.printf("Finished '{}', with {} results!\n".format( assignName, len(clusters)))
def runEntry(filename, students, helpers, assignment, args, allowPartners): # get the data assignName = assignment.name sourceSuffix = args["sourceSuffix"] resultsSuffix = args["resultsSuffix"] threshold = assignment.args["threshold"] above = args["above"] minThreshold = None if assignment.args.has_key("minThreshold"): minThreshold = assignment.args["minThreshold"] safeFilename = common.makeFilenameSafe(filename) + sourceSuffix filepath = helpers.getProcessedPath(assignName, safeFilename) if filepath != None: rawData = common.PairResults(assignName, safeFilename, helpers) data = [] # convert into python objects for pair in rawData.iterate(): data.append(pair) # get the mean mean = getMean(data) # get the deviation deviation = getDeviation(data, mean) helpers.printf("{}/{}: mean {}, deviation {}\n".format(assignName, filename, mean, deviation)) # filter out data filtered = filterData(data, mean, deviation, threshold, above, minThreshold) # create the clusters clusters = createClusters(filtered, filename, assignName, allowPartners, helpers) # flush to disk common.clustersToStandardJSON(clusters, assignName, common.makeFilenameSafe(filename) + resultsSuffix, helpers) # all done! helpers.printf("Finished '{}', with {} results!\n".format(assignName, len(clusters)))
def runAssignment(assignment, students, args, helpers): clusters = {} allowPartners = assignment.args["allowPartners"] # for each file files = assignment.args["files"] for filename in files: # find collisions for student in students: safeFilename = common.makeFilenameSafe( filename) + args["sourceSuffix"] studentText = helpers.readFromPreprocessed(student, assignment.name, safeFilename) if studentText != None: hashedText = hashText(studentText) if hashedText not in clusters: clusters[hashedText] = common.Cluster( allowPartners, filename, 100) member = common.Member(student, assignment.name, helpers) clusters[hashedText].add(member) # process the clusters clusterArray = [] for key in clusters: clusterArray.append(clusters[key]) # write the results for this assignment # clustersToStandardJSON will make sure that there are at least two people in the cluster common.clustersToStandardJSON(clusterArray, assignment.name, args["resultsSuffix"], helpers) # say we're done with this assignment helpers.printf("Finished assignment '{}'...\n".format(assignment.name))
def runEntry(filename, students, helpers, assignment, args, allowPartners): # get the data assignName = assignment.name sourceSuffix = args["sourceSuffix"] resultsSuffix = args["resultsSuffix"] percent = float(args["percent"]) / 100.0 top = args["top"] safeFilename = common.makeFilenameSafe(filename) + sourceSuffix filepath = helpers.getProcessedPath(assignName, safeFilename) if filepath != None: rawData = common.PairResults(assignName, safeFilename, helpers) data = [] # convert into python objects i = 0 for pair in rawData.iterate(): data.append(pair) i += 1 if i % 100000 == 0: gc.collect() # sort them data.sort(sortFun) # calculate and print stats mean = getMean(data) dev = getDeviation(data, mean) helpers.printf("{}/{} mean: {}, std. devation: {}\n".format(assignName, filename, mean, dev)) # take to the top bottom percent takeNum = math.floor(float(len(data)) * percent) if "maxResults" in args: takeNum = min(args["maxResults"], takeNum) if top: data = data[::-1] # conveniently reverse results = [] taken = 0 index = 0 while taken < takeNum: current = data[index] member1 = common.Member(current.pair[0], assignName, helpers) member2 = common.Member(current.pair[1], assignName, helpers) cluster = common.Cluster(allowPartners, filename, current.score) cluster.add(member1) cluster.add(member2) if cluster.hasCheating() == False: # student are partners, ignore index += 1 continue # take this entry taken += 1 index += 1 results.append(current) if index % 50000 == 0: gc.collect() # create the clusters clusters = createClusters(results, filename, assignName, allowPartners, helpers) # group pairs if necessary if args.has_key("groupPairs") and args["groupPairs"] == True: clusters = common.groupPairClusters(clusters, top) # free up RAM gc.collect() # flush to disk common.clustersToStandardJSON(clusters, assignName, common.makeFilenameSafe(filename) + resultsSuffix, helpers) # all done! helpers.printf("Finished '{}/{}', with {} results!\n".format(assignName, filename, len(clusters)))
def runEntry(filename, students, helpers, assignment, args, allowPartners): # get the data assignName = assignment.name sourceSuffix = args["sourceSuffix"] resultsSuffix = args["resultsSuffix"] percent = float(args["percent"]) / 100.0 top = args["top"] safeFilename = common.makeFilenameSafe(filename) + sourceSuffix filepath = helpers.getProcessedPath(assignName, safeFilename) if filepath != None: rawData = common.PairResults(assignName, safeFilename, helpers) data = [] # convert into python objects i = 0 for pair in rawData.iterate(): data.append(pair) i += 1 if i % 100000 == 0: gc.collect() # sort them data.sort(sortFun) # calculate and print stats mean = getMean(data) dev = getDeviation(data, mean) helpers.printf("{}/{} mean: {}, std. devation: {}\n".format( assignName, filename, mean, dev)) # take to the top bottom percent takeNum = math.floor(float(len(data)) * percent) if "maxResults" in args: takeNum = min(args["maxResults"], takeNum) if top: data = data[::-1] # conveniently reverse results = [] taken = 0 index = 0 while taken < takeNum: current = data[index] member1 = common.Member(current.pair[0], assignName, helpers) member2 = common.Member(current.pair[1], assignName, helpers) cluster = common.Cluster(allowPartners, filename, current.score) cluster.add(member1) cluster.add(member2) if cluster.hasCheating() == False: # student are partners, ignore index += 1 continue # take this entry taken += 1 index += 1 results.append(current) if index % 50000 == 0: gc.collect() # create the clusters clusters = createClusters(results, filename, assignName, allowPartners, helpers) # group pairs if necessary if args.has_key("groupPairs") and args["groupPairs"] == True: clusters = common.groupPairClusters(clusters, top) # free up RAM gc.collect() # flush to disk common.clustersToStandardJSON( clusters, assignName, common.makeFilenameSafe(filename) + resultsSuffix, helpers) # all done! helpers.printf("Finished '{}/{}', with {} results!\n".format( assignName, filename, len(clusters)))