def optimizeLocal(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None): bestResult = None combinationCount = 1 for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters " + str(combinationCount) + "/" + str( len(combinations)) + ":", str(combination) Stream.setIndent(" ") combinationId = getCombinationString(combination) # Train trainOutput = "model-" + combinationId if workDir != None: trainOutput = os.path.join(workDir, trainOutput) print >> sys.stderr, "Training..." timer = Timer() Classifier.train(trainExamples, combination, trainOutput) print >> sys.stderr, "Training Complete, time:", timer.toString() # Test testOutput = "classifications-" + combinationId if workDir != None: testOutput = os.path.join(workDir, testOutput) print >> sys.stderr, "Testing..." timer = Timer() Classifier.test(testExamples, trainOutput, testOutput) print >> sys.stderr, "Testing Complete, time:", timer.toString() # Evaluate evaluationOutput = "evaluation-" + combinationId + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) Stream.setIndent(" ") evaluator = Evaluator.evaluate(testExamples, testOutput, classIds, evaluationOutput) #print >> sys.stderr, evaluator.toStringConcise(" ") if bestResult == None or evaluator.compare( bestResult[0] ) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [ evaluator, trainOutput, testOutput, evaluationOutput, combination ] combinationCount += 1 Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] return bestResult
def optimizeLocal(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None): bestResult = None combinationCount = 1 for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters "+str(combinationCount)+"/"+str(len(combinations))+":", str(combination) Stream.setIndent(" ") combinationId = getCombinationString(combination) # Train trainOutput = "model-" + combinationId if workDir != None: trainOutput = os.path.join(workDir, trainOutput) print >> sys.stderr, "Training..." timer = Timer() Classifier.train(trainExamples, combination, trainOutput) print >> sys.stderr, "Training Complete, time:", timer.toString() # Test testOutput = "classifications-" + combinationId if workDir != None: testOutput = os.path.join(workDir, testOutput) print >> sys.stderr, "Testing..." timer = Timer() Classifier.test(testExamples, trainOutput, testOutput) print >> sys.stderr, "Testing Complete, time:", timer.toString() # Evaluate evaluationOutput = "evaluation-" + combinationId + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) Stream.setIndent(" ") evaluator = Evaluator.evaluate(testExamples, testOutput, classIds, evaluationOutput) #print >> sys.stderr, evaluator.toStringConcise(" ") if bestResult == None or evaluator.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [evaluator, trainOutput, testOutput, evaluationOutput, combination] combinationCount += 1 Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] return bestResult
def optimizeCSC(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None, cscConnection=None, downloadAllModels=False, steps="BOTH", threshold=False): bestResult = None combinationCount = 1 combinationIds = [] assert steps in ["BOTH", "SUBMIT", "RESULTS"], steps if type(classIds) == types.StringType: classIds = IdSet(filename=classIds) if Classifier.__name__ == "MultiLabelClassifier": negClass1 = True if "classifier" in combinations[0] and combinations[0][ "classifier"] == "svmperf": negClass1 = False print "negclass1", negClass1 Classifier.makeClassFiles(trainExamples, testExamples, classIds, negClass1=negClass1) if steps in ["BOTH", "SUBMIT"]: print >> sys.stderr, "Initializing runs" for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters " + str( combinationCount) + "/" + str( len(combinations)) + ":", str(combination) # Train combinationIds.append( Classifier.initTrainAndTestOnLouhi(trainExamples, testExamples, combination, cscConnection, workDir, classIds)) combinationCount += 1 else: for combination in combinations: idStr = "" for key in sorted(combination.keys()): idStr += "-" + str(key) + "_" + str(combination[key]) combinationIds.append(idStr) Stream.setIndent() if steps in ["BOTH", "RESULTS"]: Stream.setIndent(" ") print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() #combinationStatus = {} while (True): # count finished finished = 0 processStatus = { "FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0 } for id in combinationIds: #status = Classifier.getLouhiStatus(id, cscConnection) #combinationStatus[id] = status #processStatus[status] += 1 Classifier.getLouhiStatus(id, cscConnection, processStatus, classIds) p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str( p["RUNNING"]) + " running, " + str( p["FINISHED"]) + " finished, " + str( p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All runs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString( ) break print >> sys.stderr, "Evaluating results" #if type(testExamples) != types.ListType: # print >> sys.stderr, "Loading examples from file", testExamples # testExamples = ExampleUtils.readExamples(testExamples,False) bestCombinationId = None for i in range(len(combinationIds)): id = combinationIds[i] Stream.setIndent(" ") # Evaluate predictions = Classifier.getLouhiPredictions( id, cscConnection, workDir, classIds) if predictions == None: print >> sys.stderr, "No results for combination" + id else: if downloadAllModels: modelFileName = Classifier.downloadModel( id, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) print >> sys.stderr, "Evaluating results for combination" + id evaluationOutput = "evaluation" + id + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) if threshold: print >> sys.stderr, "Thresholding" evaluator.determineThreshold(testExamples, predictions) if Classifier.__name__ != "MultiLabelClassifier": if bestResult == None or evaluator.compare( bestResult[0] ) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [ evaluator, None, predictions, evaluationOutput, combinations[i] ] bestCombinationId = id else: assert Evaluator.__name__ == "MultiLabelEvaluator", Evaluator.__name__ if bestResult == None: bestResult = [{}, None] for className in classIds.Ids: if className != "neg" and "---" not in className: bestResult[0][className] = [ -1, None, classIds.getId(className), None ] for className in classIds.Ids: if className != "neg" and "---" not in className: fscore = evaluator.dataByClass[classIds.getId( className)].fscore if fscore > bestResult[0][className][0]: bestResult[0][className] = [ fscore, id, bestResult[0][className][2] ] if threshold: classId = classIds.getId(className, False) if classId in evaluator.thresholds: bestResult[0][className].append( evaluator.thresholds[classId]) else: bestResult[0][className].append(0.0) else: bestResult[0][className].append(None) bestCombinationId = bestResult os.remove(predictions) # remove predictions to save space Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] #if Classifier.__name__ == "MultiLabelClassifier": # evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) # Download best model and predictions modelFileName = Classifier.downloadModel(bestCombinationId, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) modelFileName = modelFileName + ".gz" #if Classifier.__name__ != "MultiLabelClassifier": #bestResult = [None, None] bestResult[1] = modelFileName return bestResult
def optimizeCSC(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None, cscConnection=None, downloadAllModels=False, steps="BOTH", threshold=False): bestResult = None combinationCount = 1 combinationIds = [] assert steps in ["BOTH", "SUBMIT", "RESULTS"], steps if type(classIds) == types.StringType: classIds = IdSet(filename=classIds) if Classifier.__name__ == "MultiLabelClassifier": negClass1 = True if "classifier" in combinations[0] and combinations[0]["classifier"] == "svmperf": negClass1 = False print "negclass1", negClass1 Classifier.makeClassFiles(trainExamples, testExamples, classIds, negClass1=negClass1) if steps in ["BOTH", "SUBMIT"]: print >> sys.stderr, "Initializing runs" for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters "+str(combinationCount)+"/"+str(len(combinations))+":", str(combination) # Train combinationIds.append(Classifier.initTrainAndTestOnLouhi(trainExamples, testExamples, combination, cscConnection, workDir, classIds) ) combinationCount += 1 else: for combination in combinations: idStr = "" for key in sorted(combination.keys()): idStr += "-" + str(key) + "_" + str(combination[key]) combinationIds.append(idStr) Stream.setIndent() if steps in ["BOTH", "RESULTS"]: Stream.setIndent(" ") print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() #combinationStatus = {} while(True): # count finished finished = 0 processStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0} for id in combinationIds: #status = Classifier.getLouhiStatus(id, cscConnection) #combinationStatus[id] = status #processStatus[status] += 1 Classifier.getLouhiStatus(id, cscConnection, processStatus, classIds) p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str(p["RUNNING"]) + " running, " + str(p["FINISHED"]) + " finished, " + str(p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All runs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString() break print >> sys.stderr, "Evaluating results" #if type(testExamples) != types.ListType: # print >> sys.stderr, "Loading examples from file", testExamples # testExamples = ExampleUtils.readExamples(testExamples,False) bestCombinationId = None for i in range(len(combinationIds)): id = combinationIds[i] Stream.setIndent(" ") # Evaluate predictions = Classifier.getLouhiPredictions(id, cscConnection, workDir, classIds) if predictions == None: print >> sys.stderr, "No results for combination" + id else: if downloadAllModels: modelFileName = Classifier.downloadModel(id, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) print >> sys.stderr, "Evaluating results for combination" + id evaluationOutput = "evaluation" + id + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) if threshold: print >> sys.stderr, "Thresholding" evaluator.determineThreshold(testExamples, predictions) if Classifier.__name__ != "MultiLabelClassifier": if bestResult == None or evaluator.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [evaluator, None, predictions, evaluationOutput, combinations[i]] bestCombinationId = id else: assert Evaluator.__name__ == "MultiLabelEvaluator", Evaluator.__name__ if bestResult == None: bestResult = [{}, None] for className in classIds.Ids: if className != "neg" and "---" not in className: bestResult[0][className] = [-1, None, classIds.getId(className), None] for className in classIds.Ids: if className != "neg" and "---" not in className: fscore = evaluator.dataByClass[classIds.getId(className)].fscore if fscore > bestResult[0][className][0]: bestResult[0][className] = [fscore, id, bestResult[0][className][2]] if threshold: classId = classIds.getId(className, False) if classId in evaluator.thresholds: bestResult[0][className].append(evaluator.thresholds[classId]) else: bestResult[0][className].append(0.0) else: bestResult[0][className].append(None) bestCombinationId = bestResult os.remove(predictions) # remove predictions to save space Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] #if Classifier.__name__ == "MultiLabelClassifier": # evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) # Download best model and predictions modelFileName = Classifier.downloadModel(bestCombinationId, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) modelFileName = modelFileName + ".gz" #if Classifier.__name__ != "MultiLabelClassifier": #bestResult = [None, None] bestResult[1] = modelFileName return bestResult