def waitForJobs(self, jobs, pollIntervalSeconds=60, timeout=None, verbose=True): print >> sys.stderr, "Waiting for results" waitTimer = Timer() while(True): jobStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0} for job in jobs: jobStatus[self.getJobStatus(job)] += 1 jobStatusString = str(jobStatus["QUEUED"]) + " queued, " + str(jobStatus["RUNNING"]) + " running, " + str(jobStatus["FINISHED"]) + " finished, " + str(jobStatus["FAILED"]) + " failed" if jobStatus["QUEUED"] + jobStatus["RUNNING"] == 0: if verbose: print >> sys.stderr, "\nAll runs done (" + jobStatusString + ")" break # decide what to do if timeout == None or timeoutTimer.getElapsedTime() < timeout: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(len(jobs)) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString() + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(len(jobs)) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString() + sleepString, time.sleep(5) else: if verbose: print >> sys.stderr, "\nTimed out, ", trainTimer.elapsedTimeToString() break return jobStatus
def waitForJobCount(self, targetCount=0, pollIntervalSeconds=60, verbose=True): if targetCount == -1: return numJobs = self.getNumJobs() if numJobs <= targetCount: return waitTimer = Timer() while numJobs > targetCount: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( numJobs) + " on " + accountName + " (limit=" + str( targetCount) + ")", waitTimer.elapsedTimeToString( ) + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + (10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( numJobs) + " on " + accountName + " (limit=" + str( targetCount) + ")", waitTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) numJobs = self.getNumJobs() print >> sys.stderr, "\nAll jobs done"
def waitForJobs(self, scriptNames, timeout=None): assert len(scriptNames) == len(outputFileNames) print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() combinationStatus = {} while (True): # count finished finished = 0 processStatus = { "FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0 } for scriptName in scriptNames: status = self.getLouhiStatus(scriptName) combinationStatus[id] = status processStatus[status] += 1 p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str( p["RUNNING"]) + " running, " + str( p["FINISHED"]) + " finished, " + str( p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All jobs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString( ) return False return True
def waitForJobs(self, jobs, pollIntervalSeconds=60, timeout=None, verbose=True): print >> sys.stderr, "Waiting for results" waitTimer = Timer() while (True): jobStatus = {"FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0} for job in jobs: jobStatus[self.getJobStatus(job)] += 1 jobStatusString = str(jobStatus["QUEUED"]) + " queued, " + str( jobStatus["RUNNING"]) + " running, " + str( jobStatus["FINISHED"]) + " finished, " + str( jobStatus["FAILED"]) + " failed" if jobStatus["QUEUED"] + jobStatus["RUNNING"] == 0: if verbose: print >> sys.stderr, "\nAll runs done (" + jobStatusString + ")" break # decide what to do if timeout == None or timeoutTimer.getElapsedTime() < timeout: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( len(jobs) ) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString( ) + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + ( 10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( len(jobs) ) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) else: if verbose: print >> sys.stderr, "\nTimed out, ", trainTimer.elapsedTimeToString( ) break return jobStatus
def waitForJobCount(self, targetCount=0, pollIntervalSeconds=60, verbose=True): if targetCount == -1: return numJobs = self.getNumJobs() if numJobs <= targetCount: return waitTimer = Timer() while numJobs > targetCount: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString, time.sleep(5) numJobs = self.getNumJobs() print >> sys.stderr, "\nAll jobs done"
def waitForJobs(self, scriptNames, timeout=None): assert len(scriptNames) == len(outputFileNames) print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() combinationStatus = {} while(True): # count finished finished = 0 processStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0} for scriptName in scriptNames: status = self.getLouhiStatus(scriptName) combinationStatus[id] = status processStatus[status] += 1 p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str(p["RUNNING"]) + " running, " + str(p["FINISHED"]) + " finished, " + str(p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All jobs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString() return False return True
def optimizeCSC(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None, cscConnection=None, downloadAllModels=False, steps="BOTH", threshold=False): bestResult = None combinationCount = 1 combinationIds = [] assert steps in ["BOTH", "SUBMIT", "RESULTS"], steps if type(classIds) == types.StringType: classIds = IdSet(filename=classIds) if Classifier.__name__ == "MultiLabelClassifier": negClass1 = True if "classifier" in combinations[0] and combinations[0][ "classifier"] == "svmperf": negClass1 = False print "negclass1", negClass1 Classifier.makeClassFiles(trainExamples, testExamples, classIds, negClass1=negClass1) if steps in ["BOTH", "SUBMIT"]: print >> sys.stderr, "Initializing runs" for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters " + str( combinationCount) + "/" + str( len(combinations)) + ":", str(combination) # Train combinationIds.append( Classifier.initTrainAndTestOnLouhi(trainExamples, testExamples, combination, cscConnection, workDir, classIds)) combinationCount += 1 else: for combination in combinations: idStr = "" for key in sorted(combination.keys()): idStr += "-" + str(key) + "_" + str(combination[key]) combinationIds.append(idStr) Stream.setIndent() if steps in ["BOTH", "RESULTS"]: Stream.setIndent(" ") print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() #combinationStatus = {} while (True): # count finished finished = 0 processStatus = { "FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0 } for id in combinationIds: #status = Classifier.getLouhiStatus(id, cscConnection) #combinationStatus[id] = status #processStatus[status] += 1 Classifier.getLouhiStatus(id, cscConnection, processStatus, classIds) p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str( p["RUNNING"]) + " running, " + str( p["FINISHED"]) + " finished, " + str( p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All runs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString( ) break print >> sys.stderr, "Evaluating results" #if type(testExamples) != types.ListType: # print >> sys.stderr, "Loading examples from file", testExamples # testExamples = ExampleUtils.readExamples(testExamples,False) bestCombinationId = None for i in range(len(combinationIds)): id = combinationIds[i] Stream.setIndent(" ") # Evaluate predictions = Classifier.getLouhiPredictions( id, cscConnection, workDir, classIds) if predictions == None: print >> sys.stderr, "No results for combination" + id else: if downloadAllModels: modelFileName = Classifier.downloadModel( id, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) print >> sys.stderr, "Evaluating results for combination" + id evaluationOutput = "evaluation" + id + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) if threshold: print >> sys.stderr, "Thresholding" evaluator.determineThreshold(testExamples, predictions) if Classifier.__name__ != "MultiLabelClassifier": if bestResult == None or evaluator.compare( bestResult[0] ) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [ evaluator, None, predictions, evaluationOutput, combinations[i] ] bestCombinationId = id else: assert Evaluator.__name__ == "MultiLabelEvaluator", Evaluator.__name__ if bestResult == None: bestResult = [{}, None] for className in classIds.Ids: if className != "neg" and "---" not in className: bestResult[0][className] = [ -1, None, classIds.getId(className), None ] for className in classIds.Ids: if className != "neg" and "---" not in className: fscore = evaluator.dataByClass[classIds.getId( className)].fscore if fscore > bestResult[0][className][0]: bestResult[0][className] = [ fscore, id, bestResult[0][className][2] ] if threshold: classId = classIds.getId(className, False) if classId in evaluator.thresholds: bestResult[0][className].append( evaluator.thresholds[classId]) else: bestResult[0][className].append(0.0) else: bestResult[0][className].append(None) bestCombinationId = bestResult os.remove(predictions) # remove predictions to save space Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] #if Classifier.__name__ == "MultiLabelClassifier": # evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) # Download best model and predictions modelFileName = Classifier.downloadModel(bestCombinationId, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) modelFileName = modelFileName + ".gz" #if Classifier.__name__ != "MultiLabelClassifier": #bestResult = [None, None] bestResult[1] = modelFileName return bestResult
def optimizeCSC(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None, cscConnection=None, downloadAllModels=False, steps="BOTH", threshold=False): bestResult = None combinationCount = 1 combinationIds = [] assert steps in ["BOTH", "SUBMIT", "RESULTS"], steps if type(classIds) == types.StringType: classIds = IdSet(filename=classIds) if Classifier.__name__ == "MultiLabelClassifier": negClass1 = True if "classifier" in combinations[0] and combinations[0]["classifier"] == "svmperf": negClass1 = False print "negclass1", negClass1 Classifier.makeClassFiles(trainExamples, testExamples, classIds, negClass1=negClass1) if steps in ["BOTH", "SUBMIT"]: print >> sys.stderr, "Initializing runs" for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters "+str(combinationCount)+"/"+str(len(combinations))+":", str(combination) # Train combinationIds.append(Classifier.initTrainAndTestOnLouhi(trainExamples, testExamples, combination, cscConnection, workDir, classIds) ) combinationCount += 1 else: for combination in combinations: idStr = "" for key in sorted(combination.keys()): idStr += "-" + str(key) + "_" + str(combination[key]) combinationIds.append(idStr) Stream.setIndent() if steps in ["BOTH", "RESULTS"]: Stream.setIndent(" ") print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() #combinationStatus = {} while(True): # count finished finished = 0 processStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0} for id in combinationIds: #status = Classifier.getLouhiStatus(id, cscConnection) #combinationStatus[id] = status #processStatus[status] += 1 Classifier.getLouhiStatus(id, cscConnection, processStatus, classIds) p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str(p["RUNNING"]) + " running, " + str(p["FINISHED"]) + " finished, " + str(p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All runs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString() break print >> sys.stderr, "Evaluating results" #if type(testExamples) != types.ListType: # print >> sys.stderr, "Loading examples from file", testExamples # testExamples = ExampleUtils.readExamples(testExamples,False) bestCombinationId = None for i in range(len(combinationIds)): id = combinationIds[i] Stream.setIndent(" ") # Evaluate predictions = Classifier.getLouhiPredictions(id, cscConnection, workDir, classIds) if predictions == None: print >> sys.stderr, "No results for combination" + id else: if downloadAllModels: modelFileName = Classifier.downloadModel(id, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) print >> sys.stderr, "Evaluating results for combination" + id evaluationOutput = "evaluation" + id + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) if threshold: print >> sys.stderr, "Thresholding" evaluator.determineThreshold(testExamples, predictions) if Classifier.__name__ != "MultiLabelClassifier": if bestResult == None or evaluator.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [evaluator, None, predictions, evaluationOutput, combinations[i]] bestCombinationId = id else: assert Evaluator.__name__ == "MultiLabelEvaluator", Evaluator.__name__ if bestResult == None: bestResult = [{}, None] for className in classIds.Ids: if className != "neg" and "---" not in className: bestResult[0][className] = [-1, None, classIds.getId(className), None] for className in classIds.Ids: if className != "neg" and "---" not in className: fscore = evaluator.dataByClass[classIds.getId(className)].fscore if fscore > bestResult[0][className][0]: bestResult[0][className] = [fscore, id, bestResult[0][className][2]] if threshold: classId = classIds.getId(className, False) if classId in evaluator.thresholds: bestResult[0][className].append(evaluator.thresholds[classId]) else: bestResult[0][className].append(0.0) else: bestResult[0][className].append(None) bestCombinationId = bestResult os.remove(predictions) # remove predictions to save space Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] #if Classifier.__name__ == "MultiLabelClassifier": # evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) # Download best model and predictions modelFileName = Classifier.downloadModel(bestCombinationId, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) modelFileName = modelFileName + ".gz" #if Classifier.__name__ != "MultiLabelClassifier": #bestResult = [None, None] bestResult[1] = modelFileName return bestResult