def getNumNeighbors(self,trainingFile): temp = None if type(trainingFile) is list: temp = ModelBuilder(trainingFile,1) else: temp = ModelBuilder([trainingFile],1) temp.downSample() temp.buildScaler("scalerTemp.txt") temp.scaleData() numNeighbors = temp.buildModelNeighbors("modelTemp.txt",202,20,3,1) return numNeighbors
from gold.learn.ModelEvaluator import ModelEvaluator from gold.learn.Model import ModelBuilder from gold.learn.Model import Model temp = ModelBuilder(["working_data/latest/trainfeaturesBtL.csv"],1) temp.downSample() temp.buildScaler("working_data/models/trainfeaturesBtLScaler.txt") temp.scaleData() #temp.buildFeatureSelector("working_data/models/BtLFeatureSelector.txt",120) #temp.setFeaturesFromSelector("working_data/models/BtLFeatureSelector.txt") #temp.selectFeatures() #temp.buildModelAdaBoost("working_data/models/modelRF100BtL.txt") temp.buildModelRF("working_data/models/modelRF100BtL.txt",301,20,3,1) temp.setData(["working_data/latest/devfeaturesBtL.csv"],1) temp.setScaler("working_data/models/trainfeaturesBtLScaler.txt") temp.scaleData() #temp.setFeaturesFromSelector("working_data/models/BtLFeatureSelector.txt") #temp.selectFeatures() print temp.evaluateModel("working_data/models/modelRF100BtL.txt") temp = ModelBuilder(["working_data/latest/trainfeaturesWtK.csv"],1) temp.downSample() temp.buildScaler("working_data/models/trainfeaturesWtKScaler.txt") temp.scaleData() #temp.buildFeatureSelector("working_data/models/WtKFeatureSelector.txt",120) #temp.setFeaturesFromSelector("working_data/models/WtKFeatureSelector.txt") #temp.selectFeatures() #temp.buildModelAdaBoost("working_data/models/modelRF100WtK.txt") temp.buildModelRF("working_data/models/modelRF100Wtk.txt",301,20,3,1) temp.setData(["working_data/latest/devfeaturesWtK.csv"],1)
def testModel(self,trainingFile,testFile,numRuns,downsample,scale,pca,model,label): precision = 0 recall = 0 fmeasure = 0 accuracy = 0 runType = label if model == 0: runType = runType + "LDA" elif model == 1: runType = runType + "QDA" elif model == 2: runType = runType + "LogReg" elif model == 3: runType = runType + "kNN" elif model == 4: runType = runType + "NaiveBayes" elif model == 5: runType = runType + "LinearSVM" elif model == 6: runType = runType + "RandForest" elif model == 7: runType = runType + "AdaBoost" if scale == 1: runType = runType + "-scale" if downsample == 1: runType = runType + "-down" if pca == 1: runType = runType + "-pca" elif pca == 2: runType = runType + "-pcaW" featureSelector = "" if label == "BtL": featureSelector = "learn/BtLFeatureSelector.txt" elif label == "WtK": featureSelector = "learn/WtKFeatureSelector.txt" for i in range(0,numRuns): temp = ModelBuilder([trainingFile],1) if scale == 1: temp.buildScaler("scaler.txt") temp.scaleData() if downsample == 1: temp.downSample() #temp.setFeaturesFromSelector(featureSelector) #temp.selectFeatures() if pca == 1: try: temp.buildDimensionReducer("reducer.txt",False) temp.reduceDimensions() except Exception as error: print error numRuns = numRuns - 1 continue runType = runType + "-pcaErr" pca = 0 elif pca == 2: try: temp.buildDimensionReducer("reducer.txt",True) temp.reduceDimensions() except Exception as error: print error numRuns = numRuns - 1 continue runType = runType + "-pcaWErr" pca = 0 if model == 0: temp.buildModelLDA("model.txt") elif model == 1: temp.buildModelQDA("model.txt") elif model == 2: temp.buildModelLogReg("model.txt") elif model == 3: if downsample == 1: temp.buildModelNeighbors("model.txt",202,20,3,6) else: temp.buildModelNeighbors("model.txt",102,25,3,6) elif model == 4: temp.buildModelNB("model.txt") elif model == 5: temp.buildModelSVM("model.txt") elif model == 6: temp.buildModelRF("model.txt",202,20,3,6) elif model == 7: temp.buildModelAdaBoost("model.txt") temp.setData([testFile],1) if scale == 1: temp.setScaler("scaler.txt") temp.scaleData() #temp.setFeaturesFromSelector(featureSelector) #temp.selectFeatures() if pca == 1 or pca == 2: temp.setDimensionReducer("reducer.txt") temp.reduceDimensions() stats = temp.evaluateModel("model.txt") precision = precision + stats[0] recall = recall + stats[1] fmeasure = fmeasure + stats[2] accuracy = accuracy + stats[3] precision = float(precision) / float(numRuns + 0.000001) recall = float(recall) / float(numRuns + 0.000001) fmeasure = float(fmeasure) / float(numRuns + 0.000001) accuracy = float(accuracy) / float(numRuns + 0.000001) print runType + "," + str(precision) + "," + str(recall) + "," + str(fmeasure) + "," + str(accuracy) return runType + "," + str(precision) + "," + str(recall) + "," + str(fmeasure) + "," + str(accuracy)
import sys sys.path.append("/Users/zacharydaniels/Documents/GoLD/src/") from gold.learn.Model import ModelBuilder from gold.learn.Model import Model bestMeasure = 0 bestClass1Weight = 1 bestClass0Weight = 1 training = "extraneous/games/terminal/trainfeaturesBtL_T.csv" test = "extraneous/games/terminal/devfeaturesBtL_T.csv" temp = ModelBuilder([training],1) temp.buildScaler("extraneous/games/train/scaler.txt") temp.scaleData() temp2 = ModelBuilder([test],1) temp2.setScaler("extraneous/games/train/scaler.txt") temp2.scaleData() for class1Weight in range(1,20): for class0Weight in range(1,2): print "(" + str(class1Weight) + "," + str(class0Weight) + ")\n" temp.buildModelSVM("extraneous/games/train/model.txt",weights={1:class1Weight,0:class0Weight}) data = temp2.evaluateModel("extraneous/games/train/model.txt") if data[2] > bestMeasure: bestMeasure = data[2] bestClass1Weight = class1Weight bestClass0Weight = class0Weight print "Measure: " + str(bestMeasure) + ", Class 1 Weight: " + str(bestClass1Weight) + ", Class 0 Weight: " + str(bestClass0Weight) + "\n"
def buildModels(): #Down-sample and use SVM print('Building SVM models...') temp = ModelBuilder([base+"trainfeaturesBtL.csv"]) #temp.downSample() temp.buildModelSVM(base+"modelSVMBtL.txt") temp = ModelBuilder([base+"trainfeaturesWtK.csv"]) #temp.downSample() temp.buildModelSVM(base+"modelSVMWtK.txt") #Down-sample, scale data, and use SVM print('Building scaled SVM models...') temp = ModelBuilder([base+"trainfeaturesBtL.csv"]) #temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler.txt") temp.scaleData() temp.buildModelSVM(base+"modelSVMsBtL.txt") temp = ModelBuilder([base+"trainfeaturesWtK.csv"]) #temp.downSample() temp.buildScaler(base+"trainfeaturesWtKScaler.txt") temp.scaleData() temp.buildModelSVM(base+"modelSVMsWtK.txt") #Down-sample, scale data, apply PCA, and use SVM print('Building scaled PCA SVM models...') temp = ModelBuilder([base+"trainfeaturesBtL.csv"]) #temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler.txt") temp.scaleData() temp.buildDimensionReducer(base+"train/BtLReduce.txt",False) temp.reduceDimensions() temp.buildModelSVM(base+"modelSVMsrBtL.txt") temp = ModelBuilder([base+"trainfeaturesWtK.csv"]) #temp.downSample() temp.buildScaler(base+"trainfeaturesWtKScaler.txt") temp.scaleData() temp.buildDimensionReducer(base+"train/WtKReduce.txt",False) temp.reduceDimensions() temp.buildModelSVM(base+"modelSVMsrWtK.txt") #Down-sample, apply PCA with whitening, and use SVM print('Building PCA with whitening SVM models...') temp = ModelBuilder([base+"trainfeaturesBtL.csv"]) #temp.downSample() temp.buildDimensionReducer(base+"train/BtLReduceW.txt",True) temp.reduceDimensions() temp.buildModelSVM(base+"modelSVMsrwBtL.txt") temp = ModelBuilder([base+"trainfeaturesWtK.csv"]) #temp.downSample() temp.buildDimensionReducer(base+"train/WtKReduceW.txt",True) temp.reduceDimensions() temp.buildModelSVM(base+"modelSVMsrwWtK.txt") #Use other models print('Building other models...') temp = ModelBuilder([base+"trainfeaturesBtL.csv"]) #temp.downSample() #Weighted SVM: Class 0 Weight: 1, Class 1 Weight: 2 temp.buildModelSVM(base+"modelSVMcwBtL.txt",weights={0:2,1:3}) temp = ModelBuilder([base+"trainfeaturesWtK.csv"]) #temp.downSample() #Weighted SVM: Class 0 Weight: 1, Class 1 Weight: 2 temp.buildModelSVM(base+"modelSVMcwWtK.txt",weights={0:2,1:3}) #5-Nearest Neighbors temp = ModelBuilder([base+"trainfeaturesBtL.csv"]) temp.buildModelNeighbors(base+"modelNeighborsBtL.txt",5) temp = ModelBuilder([base+"trainfeaturesWtK.csv"]) temp.buildModelNeighbors(base+"modelNeighborsWtK.txt",5) #Random forest of 10 trees temp = ModelBuilder([base+"trainfeaturesBtL.csv"]) ##temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler.txt") temp.scaleData() temp.buildModelRF(base+"modelRF10BtL.txt",10) temp.buildModelRF(base+"modelRF100BtL.txt",100) temp.buildModelNB(base+"modelNBBtL.txt") temp = ModelBuilder([base+"trainfeaturesWtK.csv"]) ##temp.downSample() temp.buildScaler(base+"trainfeaturesWtKScaler.txt") temp.scaleData() temp.buildModelRF(base+"modelRF10WtK.txt",10) temp.buildModelRF(base+"modelRF100WtK.txt",100) temp.buildModelNB(base+"modelNBWtK.txt") print('Done! ')
def evaluateModels(): temp = ModelBuilder([base + "devfeaturesBtL.csv"]) stats = temp.evaluateModel(base + "modelSVMBtL.txt") printStats(stats, 'Unscaled SVM BtL') temp.setScaler(base + "trainfeaturesBtLScaler.txt") temp.scaleData() stats = temp.evaluateModel(base + "modelSVMsBtL.txt") printStats(stats, 'Scaled SVM BtL') stats = temp.evaluateModel(base + "modelNeighborsBtL.txt") printStats(stats, 'Scaled NN BtL') stats = temp.evaluateModel(base + "modelRF10BtL.txt") printStats(stats, 'Scaled RF10 BtL') stats = temp.evaluateModel(base + "modelRF100BtL.txt") printStats(stats, 'Scaled RF100 BtL') stats = temp.evaluateModel(base + "modelNBBtL.txt") temp.setDimensionReducer(base + "train/BtLReduce.txt") temp.reduceDimensions() stats = temp.evaluateModel(base + "modelSVMsrBtL.txt") printStats(stats, 'Scaled reduced SVM BtL') temp.setData([base + "devfeaturesBtL.csv"]) temp.setScaler(base + "trainfeaturesBtLScaler.txt") temp.scaleData() temp.setDimensionReducer(base + "train/BtLReduceW.txt") temp.reduceDimensions() stats = temp.evaluateModel(base + "modelSVMsrwBtL.txt") printStats(stats, 'Scaled reduced PCA whitened SVM BtL') # -------------------------- temp = ModelBuilder([base + "devfeaturesWtK.csv"]) stats = temp.evaluateModel(base + "modelSVMWtK.txt") printStats(stats, 'Unscaled SVM WtK') temp.setScaler(base + "trainfeaturesWtKScaler.txt") temp.scaleData() stats = temp.evaluateModel(base + "modelSVMsWtK.txt") printStats(stats, 'Scaled SVM WtK') stats = temp.evaluateModel(base + "modelNeighborsWtK.txt") printStats(stats, 'Scaled NN WtK') stats = temp.evaluateModel(base + "modelRF10WtK.txt") printStats(stats, 'Scaled RF10 WtK') stats = temp.evaluateModel(base + "modelRF100WtK.txt") printStats(stats, 'Scaled RF100 WtK') stats = temp.evaluateModel(base + "modelNBWtK.txt") temp.setDimensionReducer(base + "train/WtKReduce.txt") temp.reduceDimensions() stats = temp.evaluateModel(base + "modelSVMsrWtK.txt") printStats(stats, 'Scaled reduced SVM WtK') temp.setData([base + "devfeaturesWtK.csv"]) temp.setScaler(base + "trainfeaturesWtKScaler.txt") temp.scaleData() temp.setDimensionReducer(base + "train/WtKReduceW.txt") temp.reduceDimensions() stats = temp.evaluateModel(base + "modelSVMsrwWtK.txt") printStats(stats, 'Scaled reduced whitened SVM WtK')
def rebuildTermModels(downSample=False): temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) if downSample: temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler_T.txt") temp.scaleData() #temp.buildModelRF(base+"modelRF10BtL_T.txt",10) temp.buildModelRF(base+"modelRF10BtL_T.txt",10) temp.buildModelRF(base+"modelRF5BtL_T.txt",5) temp = ModelBuilder([base+"trainfeaturesWtK_T.csv"],1) if downSample: temp.downSample() temp.buildScaler(base+"trainfeaturesWtKScaler_T.txt") temp.scaleData() #temp.buildModelRF(base+"modelRF10WtK_T.txt",10) temp.buildModelRF(base+"modelRF10WtK_T.txt",10) temp.buildModelRF(base+"modelRF5WtK_T.txt",5)
def runModelSubset(self,trainingFile,testFile,subset,featureSelectionFlag,numRuns=3): stats = [0,0,0,0,0,0] features = set() for i in range(numRuns): temp = ModelBuilder(trainingFile,1) temp.setFeatures(subset) temp.downSample() temp.buildScaler("scalerXYZB.txt") temp.scaleData() featureIndices = subset if featureSelectionFlag: featureIndices = temp.buildFeatureSelectorAutomatic("selectorXYZB.txt",5) temp.selectFeatures() temp.buildModelRF("modelXYZB.txt",301,20,3,6) temp.setData(testFile,1) temp.setFeatures(subset) temp.setScaler("scalerXYZB.txt") temp.scaleData() if featureSelectionFlag: temp.setFeaturesFromSelector("selectorXYZB.txt") temp.selectFeatures() tempStats = temp.evaluateModel("modelXYZB.txt",plotROC=False) stats[0] = stats[0] + tempStats[0] stats[1] = stats[1] + tempStats[1] stats[2] = stats[2] + tempStats[2] stats[3] = stats[3] + tempStats[3] stats[4] = stats[4] + tempStats[4] stats[5] = stats[5] + len(featureIndices) features = features.union(set(featureIndices)) stats[0] = stats[0] / numRuns stats[1] = stats[1] / numRuns stats[2] = stats[2] / numRuns stats[3] = stats[3] / numRuns stats[4] = stats[4] / numRuns stats[5] = stats[5] / float(numRuns) return stats + [list(features)]
def buildTermModelsN(probtype, downSample=False): start = time.clock() istart = start if probtype == 1: suffix = 'BtL_T' ptdesc = 'BtL' else: suffix = 'WtK_T' ptdesc = 'WtK' #Down-sample and use SVM print('Building SVM models for {}...'.format(suffix)) temp = ModelBuilder([base+"trainfeaturesBtL_T.csv".format(suffix)],1) print('Loaded model builder {:.3f} sec...'.format(time.clock()-start)) if downSample: temp.downSample() temp.buildModelSVM(base+"modelSVMBtL_T.txt".format(suffix)) print('BtL model built {:.3f} sec...'.format(time.clock()-istart)) print('Building scaled SVM models...'.format(suffix)) temp = ModelBuilder([base+"trainfeaturesBtL_T.csv".format(suffix)],1) if downSample: temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler_T.txt") temp.scaleData() temp.buildModelSVM(base+"modelSVMsBtL_T.txt") #Down-sample, scale data, apply PCA, and use SVM print('Building scaled PCA SVM models...') temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) if downSample: temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler_T.txt") temp.scaleData() temp.buildDimensionReducer(base+"train/BtLReduce_T.txt",False) temp.reduceDimensions() temp.buildModelSVM(base+"modelSVMsrBtL_T.txt") #Down-sample, apply PCA with whitening, and use SVM print('Building PCA with whitening SVM models...') temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) if downSample: temp.downSample() temp.buildDimensionReducer(base+"train/BtLReduceW_T.txt",True) temp.reduceDimensions() temp.buildModelSVM(base+"modelSVMsrwBtL_T.txt") #Use other models print('Building other models...') temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) if downSample: temp.downSample() #Weighted SVM: Class 0 Weight: 1, Class 1 Weight: 2 temp.buildModelSVM(base+"modelSVMcwBtL_T.txt",weights={0:2,1:3}) temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) if downSample: temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler_T.txt") temp.scaleData() temp.buildModelRF(base+"modelRF5BtL_T.txt",5) temp.buildModelRF(base+"modelRF10BtL_T.txt",10) temp.buildModelRF(base+"modelRF100BtL_T.txt",100) temp.buildModelRF(base+"modelRF200BtL_T.txt",200) temp.buildModelNB(base+"modelNBBtL_T.txt")
def buildTermModels(downSample=False): start = time.clock() istart = start #Down-sample and use SVM print('Building SVM models...') temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) print('Loaded model builder {:.3f} sec...'.format(time.clock()-start)) if downSample: temp.downSample() temp.buildModelSVM(base+"modelSVMBtL_T.txt") print('BtL model built {:.3f} sec...'.format(time.clock()-istart)) istart = time.clock() temp = ModelBuilder([base+"trainfeaturesWtK_T.csv"],1) if downSample: temp.downSample() temp.buildModelSVM(base+"modelSVMWtK_T.txt") print('WtKmodel built {:.3f} sec...'.format(time.clock()-istart)) #Down-sample, scale data, and use SVM print('Building scaled SVM models...') temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) if downSample: temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler_T.txt") temp.scaleData() temp.buildModelSVM(base+"modelSVMsBtL_T.txt") temp = ModelBuilder([base+"trainfeaturesWtK_T.csv"],1) temp.downSample() temp.buildScaler(base+"trainfeaturesWtKScaler_T.txt") temp.scaleData() temp.buildModelSVM(base+"modelSVMsWtK_T.txt") #Down-sample, scale data, apply PCA, and use SVM print('Building scaled PCA SVM models...') temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) if downSample: temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler_T.txt") temp.scaleData() temp.buildDimensionReducer(base+"BtLReduce_T.txt",False) temp.reduceDimensions() temp.buildModelSVM(base+"modelSVMsrBtL_T.txt") temp = ModelBuilder([base+"trainfeaturesWtK_T.csv"],1) if downSample: temp.downSample() temp.buildScaler(base+"trainfeaturesWtKScaler_T.txt") temp.scaleData() temp.buildDimensionReducer(base+"WtKReduce_T.txt",False) temp.reduceDimensions() temp.buildModelSVM(base+"modelSVMsrWtK_T.txt") #Down-sample, apply PCA with whitening, and use SVM print('Building PCA with whitening SVM models...') temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) if downSample: temp.downSample() temp.buildDimensionReducer(base+"BtLReduceW_T.txt",True) temp.reduceDimensions() temp.buildModelSVM(base+"modelSVMsrwBtL_T.txt") temp = ModelBuilder([base+"trainfeaturesWtK_T.csv"],1) if downSample: temp.downSample() temp.buildDimensionReducer(base+"WtKReduceW_T.txt",True) temp.reduceDimensions() temp.buildModelSVM(base+"modelSVMsrwWtK_T.txt") #Use other models print('Building other models...') temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) if downSample: temp.downSample() #Weighted SVM: Class 0 Weight: 1, Class 1 Weight: 2 temp.buildModelSVM(base+"modelSVMcwBtL_T.txt",weights={0:2,1:3}) temp = ModelBuilder([base+"trainfeaturesWtK_T.csv"],1) if downSample: temp.downSample() #Weighted SVM: Class 0 Weight: 1, Class 1 Weight: 2 temp.buildModelSVM(base+"modelSVMcwWtK_T.txt",weights={0:2,1:3}) #5-Nearest Neighbors #temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) #temp.downSample() #temp.buildModelNeighbors(base+"modelNeighborsBtL_T.txt",5) #temp = ModelBuilder([base+"trainfeaturesWtK_T.csv"],1) #temp.downSample() #temp.buildModelNeighbors(base+"modelNeighborsWtK_T.txt",5) #Random forest of 10 trees temp = ModelBuilder([base+"trainfeaturesBtL_T.csv"],1) if downSample: temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler_T.txt") temp.scaleData() #temp.buildModelRF(base+"modelRF10BtL_T.txt",10) temp.buildModelRF(base+"modelRF10BtL_T.txt",10) temp.buildModelRF(base+"modelRF5BtL_T.txt",5) temp.buildModelRF(base+"modelRF100BtL_T.txt",100) temp.buildModelRF(base+"modelRF200BtL_T.txt",200) temp.buildModelNB(base+"modelNBBtL_T.txt") temp = ModelBuilder([base+"trainfeaturesWtK_T.csv"],1) if downSample: temp.downSample() temp.buildScaler(base+"trainfeaturesWtKScaler_T.txt") temp.scaleData() #temp.buildModelRF(base+"modelRF10WtK_T.txt",10) temp.buildModelRF(base+"modelRF10WtK_T.txt",10) temp.buildModelRF(base+"modelRF5WtK_T.txt",5) temp.buildModelRF(base+"modelRF100WtK_T.txt",100) temp.buildModelRF(base+"modelRF200WtK_T.txt",200) temp.buildModelNB(base+"modelNBWtK_T.txt") print('Done! ')
def evaluateModel(modelType): temp = ModelBuilder([base+"devfeatures"+modelType+".csv"],1) stats = temp.evaluateModel(base+"modelSVM"+modelType+".txt") printStats(stats, 'Unscaled SVM '+modelType) stats = temp.evaluateModel(base+"modelSVMcw"+modelType+".txt") printStats(stats, 'Unscaled weighted SVM '+modelType) stats = temp.evaluateModel(base+"modelNeighbors"+modelType+".txt") printStats(stats, 'Unscaled NN '+modelType) stats = temp.evaluateModel(base+"modelRF10"+modelType+".txt") printStats(stats, 'Unscaled RF10 '+modelType) stats = temp.evaluateModel(base+"modelRF100"+modelType+".txt") printStats(stats, 'Unscaled RF100 '+modelType) stats = temp.evaluateModel(base+"modelNB"+modelType+".txt") printStats(stats, 'Unscaled NB '+modelType) temp.setScaler(base+"trainfeaturesWtKScaler.txt") temp.scaleData() stats = temp.evaluateModel(base+"modelSVMs"+modelType+".txt") printStats(stats, 'Scaled SVM '+modelType) temp.setDimensionReducer(base+"WtKReduce.txt") temp.reduceDimensions() stats = temp.evaluateModel(base+"modelSVMsr"+modelType+".txt") printStats(stats, 'Scaled reduced SVM '+modelType) temp = ModelBuilder([base+"devfeatures"+modelType+".csv"],1) temp.setScaler(base+"trainfeaturesWtKScaler.txt") temp.scaleData() temp.setDimensionReducer(base+"WtKReduceW.txt") temp.reduceDimensions() stats = temp.evaluateModel(base+"modelSVMsrw"+modelType+".txt") printStats(stats, 'Scaled reduced whitened SVM '+modelType)
sys.path.append("/Users/zacharydaniels/Documents/GoLD/src/") from gold.features.trainer import MoveTrainer from gold.learn.Model import ModelBuilder from gold.learn.Model import Model #Extract Features temp = MoveTrainer(["extraneous/games/train/"]) temp.train() temp = MoveTrainer(["extraneous/games/dev/"]) temp.train() #0: Good move or bad move, 1: terminal solution state or not classType = 0 #SVM temp = ModelBuilder(["extraneous/games/train/featuresBtL.csv"], classType) temp.buildModelSVM("extraneous/games/train/modelSVMBtL.txt", weights='auto') #temp.setData(["extraneous/games/dev/featuresBtL.csv"],classType) temp.evaluateModel("extraneous/games/train/modelSVMBtL.txt") temp = ModelBuilder(["extraneous/games/train/featuresWtK.csv"], classType) temp.buildModelSVM("extraneous/games/train/modelSVMWtK.txt") #temp.setData(["extraneous/games/dev/featuresWtK.csv"],classType) temp.evaluateModel("extraneous/games/train/modelSVMWtK.txt") #Scale data and use SVM temp = ModelBuilder(["extraneous/games/train/featurestL.csv"], classType) temp.buildScaler("extraneous/games/train/featuresBtLScaler.txt") temp.scaleData() temp.buildModelSVM("extraneous/games/train/modelSVM.txt") temp.setData(["extraneous/games/dev/featuresBtL.csv"], classType) temp.setScaler("extraneous/games/train/featuresBtLScaler.txt")
def buildModels(): #Down-sample and use SVM print('Building SVM models...') temp = ModelBuilder([base + "trainfeaturesBtL.csv"]) #temp.downSample() temp.buildModelSVM(base + "modelSVMBtL.txt") temp = ModelBuilder([base + "trainfeaturesWtK.csv"]) #temp.downSample() temp.buildModelSVM(base + "modelSVMWtK.txt") #Down-sample, scale data, and use SVM print('Building scaled SVM models...') temp = ModelBuilder([base + "trainfeaturesBtL.csv"]) #temp.downSample() temp.buildScaler(base + "trainfeaturesBtLScaler.txt") temp.scaleData() temp.buildModelSVM(base + "modelSVMsBtL.txt") temp = ModelBuilder([base + "trainfeaturesWtK.csv"]) #temp.downSample() temp.buildScaler(base + "trainfeaturesWtKScaler.txt") temp.scaleData() temp.buildModelSVM(base + "modelSVMsWtK.txt") #Down-sample, scale data, apply PCA, and use SVM print('Building scaled PCA SVM models...') temp = ModelBuilder([base + "trainfeaturesBtL.csv"]) #temp.downSample() temp.buildScaler(base + "trainfeaturesBtLScaler.txt") temp.scaleData() temp.buildDimensionReducer(base + "train/BtLReduce.txt", False) temp.reduceDimensions() temp.buildModelSVM(base + "modelSVMsrBtL.txt") temp = ModelBuilder([base + "trainfeaturesWtK.csv"]) #temp.downSample() temp.buildScaler(base + "trainfeaturesWtKScaler.txt") temp.scaleData() temp.buildDimensionReducer(base + "train/WtKReduce.txt", False) temp.reduceDimensions() temp.buildModelSVM(base + "modelSVMsrWtK.txt") #Down-sample, apply PCA with whitening, and use SVM print('Building PCA with whitening SVM models...') temp = ModelBuilder([base + "trainfeaturesBtL.csv"]) #temp.downSample() temp.buildDimensionReducer(base + "train/BtLReduceW.txt", True) temp.reduceDimensions() temp.buildModelSVM(base + "modelSVMsrwBtL.txt") temp = ModelBuilder([base + "trainfeaturesWtK.csv"]) #temp.downSample() temp.buildDimensionReducer(base + "train/WtKReduceW.txt", True) temp.reduceDimensions() temp.buildModelSVM(base + "modelSVMsrwWtK.txt") #Use other models print('Building other models...') temp = ModelBuilder([base + "trainfeaturesBtL.csv"]) #temp.downSample() #Weighted SVM: Class 0 Weight: 1, Class 1 Weight: 2 temp.buildModelSVM(base + "modelSVMcwBtL.txt", weights={0: 2, 1: 3}) temp = ModelBuilder([base + "trainfeaturesWtK.csv"]) #temp.downSample() #Weighted SVM: Class 0 Weight: 1, Class 1 Weight: 2 temp.buildModelSVM(base + "modelSVMcwWtK.txt", weights={0: 2, 1: 3}) #5-Nearest Neighbors temp = ModelBuilder([base + "trainfeaturesBtL.csv"]) temp.buildModelNeighbors(base + "modelNeighborsBtL.txt", 5) temp = ModelBuilder([base + "trainfeaturesWtK.csv"]) temp.buildModelNeighbors(base + "modelNeighborsWtK.txt", 5) #Random forest of 10 trees temp = ModelBuilder([base + "trainfeaturesBtL.csv"]) ##temp.downSample() temp.buildScaler(base + "trainfeaturesBtLScaler.txt") temp.scaleData() temp.buildModelRF(base + "modelRF10BtL.txt", 10) temp.buildModelRF(base + "modelRF100BtL.txt", 100) temp.buildModelNB(base + "modelNBBtL.txt") temp = ModelBuilder([base + "trainfeaturesWtK.csv"]) ##temp.downSample() temp.buildScaler(base + "trainfeaturesWtKScaler.txt") temp.scaleData() temp.buildModelRF(base + "modelRF10WtK.txt", 10) temp.buildModelRF(base + "modelRF100WtK.txt", 100) temp.buildModelNB(base + "modelNBWtK.txt") print('Done! ')
def testModel(self,trainingFile,testFile,numRuns,downsample,scale,pca,model,label,num_trees,num_neighbors): precision = 0 recall = 0 fmeasure = 0 accuracy = 0 runType = label if model == 0: runType = runType + "LDA" elif model == 1: runType = runType + "QDA" elif model == 2: runType = runType + "LogReg" elif model == 3: runType = runType + "kNN" elif model == 4: runType = runType + "NaiveBayes" elif model == 5: runType = runType + "LinearSVM" elif model == 6: runType = runType + "RandForest" elif model == 7: runType = runType + "AdaBoost" if scale == 1: runType = runType + "-scale" if downsample == 1: runType = runType + "-down" if pca == 1: runType = runType + "-pca" elif pca == 2: runType = runType + "-pcaW" featureSelector = "" if label == "BtL": featureSelector = "learn/BtLFeatureSelector.txt" elif label == "WtK": featureSelector = "learn/WtKFeatureSelector.txt" for i in range(0,numRuns): temp = None if type(trainingFile) is list: temp = ModelBuilder(trainingFile,1) else: temp = ModelBuilder([trainingFile],1) if scale == 1: temp.buildScaler("scalerFOREVAL.txt") temp.scaleData() if downsample == 1: temp.downSample() #temp.setFeaturesFromSelector(featureSelector) #temp.selectFeatures() if pca == 1: try: temp.buildDimensionReducer("reducerFOREVAL.txt",False) temp.reduceDimensions() except Exception as error: print error numRuns = numRuns - 1 continue runType = runType + "-pcaErr" pca = 0 elif pca == 2: try: temp.buildDimensionReducer("reducerFOREVAL.txt",True) temp.reduceDimensions() except Exception as error: print error numRuns = numRuns - 1 continue runType = runType + "-pcaWErr" pca = 0 if model == 0: temp.buildModelLDA("modelFOREVAL.txt") elif model == 1: temp.buildModelQDA("modelFOREVAL.txt") elif model == 2: temp.buildModelLogReg("modelFOREVAL.txt") elif model == 3: temp.buildModelNeighbors("modelFOREVAL.txt",numNeighbors=num_neighbors) elif model == 4: temp.buildModelNB("modelFOREVAL.txt") elif model == 5: temp.buildModelSVM("modelFOREVAL.txt") elif model == 6: temp.buildModelRF("modelFOREVAL.txt",numTrees=num_trees) elif model == 7: temp.buildModelAdaBoost("modelFOREVAL.txt") temp.setData([testFile],1) if scale == 1: temp.setScaler("scalerFOREVAL.txt") temp.scaleData() #temp.setFeaturesFromSelector(featureSelector) #temp.selectFeatures() if pca == 1 or pca == 2: temp.setDimensionReducer("reducerFOREVAL.txt") temp.reduceDimensions() stats = temp.evaluateModel("modelFOREVAL.txt") precision = precision + stats[0] recall = recall + stats[1] fmeasure = fmeasure + stats[2] accuracy = accuracy + stats[3] precision = float(precision) / float(numRuns + 0.000001) recall = float(recall) / float(numRuns + 0.000001) fmeasure = float(fmeasure) / float(numRuns + 0.000001) accuracy = float(accuracy) / float(numRuns + 0.000001) print runType + "," + str(precision) + "," + str(recall) + "," + str(fmeasure) + "," + str(accuracy) return runType + "," + str(precision) + "," + str(recall) + "," + str(fmeasure) + "," + str(accuracy)
def evaluateTermModels(): maxB = 0.0 maxW = 0.0 temp = ModelBuilder([base+"devfeaturesBtL_T.csv"],1) stats = temp.evaluateModel(base+"modelSVMBtL_T.txt") maxB = max(maxB, stats[0]) printStats(stats, 'Unscaled SVM BtL') temp.setScaler(base+"trainfeaturesBtLScaler_T.txt") temp.scaleData() stats = temp.evaluateModel(base+"modelSVMsBtL_T.txt") maxB = max(maxB, stats[0]) printStats(stats, 'Scaled SVM BtL') #stats = temp.evaluateModel(base+"modelNeighborsBtL_T.txt") #printStats(stats, 'Scaled NN BtL') stats = temp.evaluateModel(base+"modelRF5BtL_T.txt") maxB = max(maxB, stats[0]) printStats(stats, 'Scaled RF5 BtL') stats = temp.evaluateModel(base+"modelRF10BtL_T.txt") maxB = max(maxB, stats[0]) printStats(stats, 'Scaled RF10 BtL') stats = temp.evaluateModel(base+"modelRF100BtL_T.txt") maxB = max(maxB, stats[0]) printStats(stats, 'Scaled RF100 BtL') stats = temp.evaluateModel(base+"modelRF200BtL_T.txt") maxB = max(maxB, stats[0]) printStats(stats, 'Scaled RF200 BtL') stats = temp.evaluateModel(base+"modelNBBtL_T.txt") maxB = max(maxB, stats[0]) printStats(stats, 'Scaled NB BtL') temp.setDimensionReducer(base+"BtLReduce_T.txt") temp.reduceDimensions() stats = temp.evaluateModel(base+"modelSVMsrBtL_T.txt") maxB = max(maxB, stats[0]) printStats(stats, 'Scaled reduced SVM BtL') #temp.setData([base+"devfeaturesBtL.csv"]) temp = ModelBuilder([base+"devfeaturesBtL_T.csv"],1) temp.setScaler(base+"trainfeaturesBtLScaler_T.txt") temp.scaleData() temp.setDimensionReducer(base+"BtLReduceW_T.txt") temp.reduceDimensions() stats = temp.evaluateModel(base+"modelSVMsrwBtL_T.txt") maxB = max(maxB, stats[0]) printStats(stats, 'Scaled reduced PCA whitened SVM BtL') # -------------------------- temp = ModelBuilder([base+"trainfeaturesWtK_T.csv"],1) temp.buildModelNeighbors(base+"modelNeighborsWtK_T.txt",5) temp = ModelBuilder([base+"devfeaturesWtK_T.csv"],1) stats = temp.evaluateModel(base+"modelSVMWtK_T.txt") maxW = max(maxW, stats[0]) printStats(stats, 'Unscaled SVM WtK') temp.setScaler(base+"trainfeaturesWtKScaler_T.txt") temp.scaleData() stats = temp.evaluateModel(base+"modelSVMsWtK_T.txt") maxW = max(maxW, stats[0]) printStats(stats, 'Scaled SVM WtK') #stats = temp.evaluateModel(base+"modelNeighborsWtK.txt") #printStats(stats, 'Scaled NN WtK') stats = temp.evaluateModel(base+"modelRF5WtK_T.txt") maxW = max(maxW, stats[0]) printStats(stats, 'Scaled RF5 WtK') stats = temp.evaluateModel(base+"modelRF10WtK_T.txt") maxW = max(maxW, stats[0]) printStats(stats, 'Scaled RF10 WtK') stats = temp.evaluateModel(base+"modelRF100WtK_T.txt") maxW = max(maxW, stats[0]) printStats(stats, 'Scaled RF100 WtK') stats = temp.evaluateModel(base+"modelRF200WtK_T.txt") maxW = max(maxW, stats[0]) printStats(stats, 'Scaled RF200 WtK') stats = temp.evaluateModel(base+"modelNBWtK_T.txt") maxW = max(maxW, stats[0]) printStats(stats, 'Scaled NB WtK') temp.setDimensionReducer(base+"WtKReduce_T.txt") temp.reduceDimensions() stats = temp.evaluateModel(base+"modelSVMsrWtK_T.txt") maxW = max(maxW, stats[0]) printStats(stats, 'Scaled reduced SVM WtK') temp = ModelBuilder([base+"devfeaturesWtK_T.csv"],1) temp.setScaler(base+"trainfeaturesWtKScaler_T.txt") temp.scaleData() temp.setDimensionReducer(base+"WtKReduceW_T.txt") temp.reduceDimensions() stats = temp.evaluateModel(base+"modelSVMsrwWtK_T.txt") maxW = max(maxW, stats[0]) printStats(stats, 'Scaled reduced whitened SVM WtK') return [maxB, maxW]
def __init__(self, dataFileList, outputFile): temp = ModelBuilder(dataFileList) temp.downSample() temp.buildModelSVM(outputFile)
def redoMF(): start = time.clock() istart = start temp = ModelBuilder([base+"trainfeaturesBtL.csv"],1) temp.downSample() temp.buildScaler(base+"trainfeaturesBtLScaler.txt") temp.scaleData() dev = ModelBuilder([base+'devfeaturesBtL.csv'],1) dev.setScaler(base+"trainfeaturesBtLScaler.txt") dev.scaleData() r = 0.5 while( r<0.92 ): temp.buildModelRF(base+"modelRF100BtL.txt",100) stats = dev.evaluateModel(base+"modelRF100BtL.txt") r = stats[1] printStats(stats, 'Scaled RF100 BtL')
def evaluateModels(): temp = ModelBuilder([base+"devfeaturesBtL.csv"]) stats = temp.evaluateModel(base+"modelSVMBtL.txt") printStats(stats, 'Unscaled SVM BtL') temp.setScaler(base+"trainfeaturesBtLScaler.txt") temp.scaleData() stats = temp.evaluateModel(base+"modelSVMsBtL.txt") printStats(stats, 'Scaled SVM BtL') stats = temp.evaluateModel(base+"modelNeighborsBtL.txt") printStats(stats, 'Scaled NN BtL') stats = temp.evaluateModel(base+"modelRF10BtL.txt") printStats(stats, 'Scaled RF10 BtL') stats = temp.evaluateModel(base+"modelRF100BtL.txt") printStats(stats, 'Scaled RF100 BtL') stats = temp.evaluateModel(base+"modelNBBtL.txt") temp.setDimensionReducer(base+"train/BtLReduce.txt") temp.reduceDimensions() stats = temp.evaluateModel(base+"modelSVMsrBtL.txt") printStats(stats, 'Scaled reduced SVM BtL') temp.setData([base+"devfeaturesBtL.csv"]) temp.setScaler(base+"trainfeaturesBtLScaler.txt") temp.scaleData() temp.setDimensionReducer(base+"train/BtLReduceW.txt") temp.reduceDimensions() stats = temp.evaluateModel(base+"modelSVMsrwBtL.txt") printStats(stats, 'Scaled reduced PCA whitened SVM BtL') # -------------------------- temp = ModelBuilder([base+"devfeaturesWtK.csv"]) stats = temp.evaluateModel(base+"modelSVMWtK.txt") printStats(stats, 'Unscaled SVM WtK') temp.setScaler(base+"trainfeaturesWtKScaler.txt") temp.scaleData() stats = temp.evaluateModel(base+"modelSVMsWtK.txt") printStats(stats, 'Scaled SVM WtK') stats = temp.evaluateModel(base+"modelNeighborsWtK.txt") printStats(stats, 'Scaled NN WtK') stats = temp.evaluateModel(base+"modelRF10WtK.txt") printStats(stats, 'Scaled RF10 WtK') stats = temp.evaluateModel(base+"modelRF100WtK.txt") printStats(stats, 'Scaled RF100 WtK') stats = temp.evaluateModel(base+"modelNBWtK.txt") temp.setDimensionReducer(base+"train/WtKReduce.txt") temp.reduceDimensions() stats = temp.evaluateModel(base+"modelSVMsrWtK.txt") printStats(stats, 'Scaled reduced SVM WtK') temp.setData([base+"devfeaturesWtK.csv"]) temp.setScaler(base+"trainfeaturesWtKScaler.txt") temp.scaleData() temp.setDimensionReducer(base+"train/WtKReduceW.txt") temp.reduceDimensions() stats = temp.evaluateModel(base+"modelSVMsrwWtK.txt") printStats(stats, 'Scaled reduced whitened SVM WtK')
import sys sys.path.append("/Users/zacharydaniels/Documents/GoLD/src/") from gold.learn.Model import ModelBuilder from gold.learn.Model import Model bestMeasure = 0 bestClass1Weight = 1 bestClass0Weight = 1 training = "extraneous/games/terminal/trainfeaturesBtL_T.csv" test = "extraneous/games/terminal/devfeaturesBtL_T.csv" temp = ModelBuilder([training], 1) temp.buildScaler("extraneous/games/train/scaler.txt") temp.scaleData() temp2 = ModelBuilder([test], 1) temp2.setScaler("extraneous/games/train/scaler.txt") temp2.scaleData() for class1Weight in range(1, 20): for class0Weight in range(1, 2): print "(" + str(class1Weight) + "," + str(class0Weight) + ")\n" temp.buildModelSVM("extraneous/games/train/model.txt", weights={ 1: class1Weight, 0: class0Weight }) data = temp2.evaluateModel("extraneous/games/train/model.txt") if data[2] > bestMeasure: bestMeasure = data[2]
def runModelSubset(self, trainingFile, testFile, subset, featureSelectionFlag, numRuns=3): stats = [0, 0, 0, 0, 0, 0] features = set() for i in range(numRuns): temp = ModelBuilder(trainingFile, 1) temp.setFeatures(subset) temp.downSample() temp.buildScaler("scalerXYZB.txt") temp.scaleData() featureIndices = subset if featureSelectionFlag: featureIndices = temp.buildFeatureSelectorAutomatic( "selectorXYZB.txt", 5) temp.selectFeatures() temp.buildModelRF("modelXYZB.txt", 301, 20, 3, 6) temp.setData(testFile, 1) temp.setFeatures(subset) temp.setScaler("scalerXYZB.txt") temp.scaleData() if featureSelectionFlag: temp.setFeaturesFromSelector("selectorXYZB.txt") temp.selectFeatures() tempStats = temp.evaluateModel("modelXYZB.txt", plotROC=False) stats[0] = stats[0] + tempStats[0] stats[1] = stats[1] + tempStats[1] stats[2] = stats[2] + tempStats[2] stats[3] = stats[3] + tempStats[3] stats[4] = stats[4] + tempStats[4] stats[5] = stats[5] + len(featureIndices) features = features.union(set(featureIndices)) stats[0] = stats[0] / numRuns stats[1] = stats[1] / numRuns stats[2] = stats[2] / numRuns stats[3] = stats[3] / numRuns stats[4] = stats[4] / numRuns stats[5] = stats[5] / float(numRuns) return stats + [list(features)]
def __init__(self,dataFileList,outputFile): temp = ModelBuilder(dataFileList) temp.downSample() temp.buildModelSVM(outputFile)