def run_debuging_tests(): inf = open(sys.argv[1]) data = np.array( [map(float, s.strip().split(',')) for s in inf.readlines()]) # print data.shape[0] data = np.random.permutation(data) # compute how much of the data is training and testing np.random.shuffle(data) train_rows = int(0.6 * data.shape[0]) test_rows = data.shape[0] - train_rows # separate out training and testing data trainX = data[:train_rows, 0:-1] trainY = data[:train_rows, -1] testX = data[train_rows:, 0:-1] testY = data[train_rows:, -1] # create a learner and train it learner = lrl.LinRegLearner(verbose=True) # create a LinRegLearner run_learner(learner, trainX, trainY, testX, testY) # training step learner = dt.DTLearner(leaf_size=1, verbose=False) # constructor run_learner(learner, trainX, trainY, testX, testY) # training step learner = rt.RTLearner(leaf_size=1, verbose=False) # constructor run_learner(learner, trainX, trainY, testX, testY) learner = bl.BagLearner(learner=lrl.LinRegLearner, kwargs={}, bags=10, boost=False, verbose=False) run_learner(learner, trainX, trainY, testX, testY) learner = il.InsaneLearner(verbose=False) run_learner(learner, trainX, trainY, testX, testY)
def insane(): import InsaneLearner as it learner = it.InsaneLearner(verbose=False) learner.addEvidence(trainX, trainY) Y = learner.query(testX)
# create a learner and train it if ltype == 0: learner = lrl.LinRegLearner(verbose = True) # create a LinRegLearner elif ltype == 1: print ('Creating DTLearner') learner = dtl.DTLearner(1,verbose=False) elif ltype == 2: print ('Creating RTLearner') learner = rtl.RTLearner(1, verbose=False) elif ltype == 3: print ('Creating BAGLearner') learner = bagl.BagLearner(dtl.DTLearner,kwargs = {"leaf_size":1},bags=15,verbose=False) elif ltype == 4: print ('Creating InsaneLearner') learner = itl.InsaneLearner(verbose=False) elif ltype == 5: print ('Creating plot for DTlearner for overfitting vs leaf size') leafSizePlot(dtl.DTLearner,trainX,trainY,testX,testY,"DTLearner",data,100) elif ltype == 6: print ('Creating plot for BagLearner for overfitting vs leaf size') leafSizePlot(bagl.BagLearner,trainX,trainY,testX,testY,"BagLearner",data,25) elif ltype == 7: print('Calculating metrics') calculateMetrics(data) if(ltype in range(0,5)): learner.addEvidence(trainX, trainY) # train it print learner.author() # evaluate in sample
# compute how much of the data is training and testing train_rows = int(0.6 * data.shape[0]) #train_rows = int(data.shape[0]) test_rows = data.shape[0] - train_rows # separate out training and testing data trainX = data[:train_rows, 0:-1] trainY = data[:train_rows, -1] testX = data[train_rows:, 0:-1] testY = data[train_rows:, -1] print trainX.shape #trainY = trainY.reshape(8,1) # create a learner and train it learner = il.InsaneLearner() learner.addEvidence(trainX, trainY) print learner.author() # evaluate in sample predY = learner.query(trainX) # get the predictions print "PRED Y Shape: " + str(predY.shape) print "TRAINX SHAPE: " + str(trainX.shape) rmse = math.sqrt(((trainY - predY)**2).sum() / trainY.shape[0]) print print "In sample results" print "RMSE: ", rmse c = np.corrcoef(predY, y=trainY) print "corr: ", c[0, 1] # evaluate out of sample
# compute how much of the data is training and testing train_rows = int(0.6* data.shape[0]) test_rows = data.shape[0] - train_rows # separate out training and testing data trainX = data[:train_rows,0:-1] trainY = data[:train_rows,-1] testX = data[train_rows:,0:-1] testY = data[train_rows:,-1] print testX.shape print testY.shape # create a learner and train it; API for BagLearner learner = it.InsaneLearner(verbose = False) # create a InsaneLearner learner.addEvidence(trainX, trainY) # train it print learner.author() # evaluate in sample predY = learner.query(trainX) # get the predictions rmse = math.sqrt(((trainY - predY) ** 2).sum()/trainY.shape[0]) print print "In sample results" print "RMSE: ", rmse c = np.corrcoef(predY, y=trainY) print "corr: ", c[0,1] # evaluate out of sample predY = learner.query(testX) # get the predictions rmse = math.sqrt(((testY - predY) ** 2).sum()/testY.shape[0])
import numpy as np import InsaneLearner as it if __name__ == '__main__': data = np.genfromtxt('Data/best4lrr_data.csv', delimiter=',') # np.random.shuffle(data) # Don't shuffle to compare output with single linreglearner split = int(0.6 * data.shape[0]) # 60-40 break into train-test sets trainX = data[:split, :-1] trainY = data[:split, -1] # last column is labels testX = data[split:, :-1] testY = data[split:, -1] # last column is labels learner = it.InsaneLearner(verbose=False) # constructor for InsaneLearner learner.add_Evidence(trainX, trainY) Y = learner.query(trainX) # get the predictions rmse = np.sqrt(((Y - trainY)**2).sum() / trainY.shape[0]) corr = np.corrcoef(Y, trainY) print("In sample results") print("RMSE: ", rmse) print("corr: ", corr[0, 1]) Y = learner.query(testX) # get the predictions rmse = np.sqrt(((Y - testY)**2).sum() / testY.shape[0]) corr = np.corrcoef(Y, testY) print print("Out of sample results") print("RMSE: ", rmse) print("corr: ", corr[0, 1])
# DTlearner # compute how much of the data is training and testing train_rows = int(0.6 * data.shape[0]) test_rows = data.shape[0] - train_rows # separate out training and testing data trainX = data[:train_rows, 0:-1] trainY = data[:train_rows, -1] testX = data[train_rows:, 0:-1] testY = data[train_rows:, -1] print testX.shape print testY.shape # create a learner and train it learner = it.InsaneLearner() # constructor learner.addEvidence(trainX, trainY) # training step Y = learner.query(testX) # query print learner.author() learner = lrl.LinRegLearner(verbose=True) # create a LinRegLearner learner.addEvidence(trainX, trainY) # train it pred = learner.query(trainX) print learner.author() # evaluate in sample # predY = dt_learner.query(trainX) # get the predictions rmse = math.sqrt(((trainY - pred)**2).sum() / trainY.shape[0]) print print "In sample results" print "RMSE: ", rmse c = np.corrcoef(pred, y=trainY)