def randomforest_test(filenames): for filename in filenames: rmse_series_randomforest=[] covariance_series_randomforest=[] for k in range (1,101): randomforestlearner = Randomforestlearner(k=k) get_set = randomforestlearner.getflatcsv(filename) get_set_60pr,get_set_40pr = numpy.split(get_set,[600]) (X,Y) = numpy.split(get_set,[2],axis=1) (XTrain,XTest) = numpy.split(X,[600]) (Ytrain,YTest) = numpy.split(Y,[600]) Y_Test = np.squeeze(np.asarray(YTest)) randomforestlearner.addEvidence(XTrain,Ytrain) Y_Return = numpy.array(randomforestlearner.query(XTest)) rmse_series_randomforest.append(get_rmse(Y_Test,Y_Return)) covariance_series_randomforest.append(get_correlation(Y_Test,Y_Return)) return (rmse_series_randomforest,covariance_series_randomforest)
def main(): curr_dirr = os.getcwd() os.chdir('proj3-data-fixed') datesandY = read_file('ML4T-000.csv') stack = createX(datesandY) # print len(stack) for i in range(1,10): datesandY = read_file('ML4T-00'+str(i)+'.csv') learnedvalues = createX(datesandY) stack = numpy.vstack((stack,learnedvalues)) for i in range(11,100): datesandY = read_file('ML4T-0'+str(i)+'.csv') learnedvalues = createX(datesandY) stack = numpy.vstack((stack,learnedvalues)) print len(stack) testdatesandY = read_file('ML4T-292.csv') test = createX(testdatesandY) (XTrain,YTrain) = numpy.split(stack,[5],axis=1) (XTest,YTest) = numpy.split(test,[5],axis=1) # print XTest randomforestlearner = Randomforestlearner(k=50) randomforestlearner.addEvidence(XTrain,YTrain) Y_Return = numpy.multiply(numpy.array(randomforestlearner.query(XTest)),-1) Y_Test = np.squeeze(np.asarray(YTest)) start=0 index=5 print len(Y_Return) print len(Y_Test) print len(testdatesandY) while index<len(testdatesandY)-26: Y_Return[start]=Y_Return[start]+testdatesandY[index] Y_Test[start]=Y_Test[start]+testdatesandY[index] start = start+1 index=index+1 os.chdir(curr_dirr) get_graph_two_plots(numpy.arange(1,101),Y_Return[:100],Y_Test[:100],"Days","Y","YpredictvsYactual_292_first100.jpg") last126_test = Y_Test[-126:] last126_return = Y_Return[-126:] get_graph_two_plots(numpy.arange(1,101),last126_return[:100],last126_test[:100],"Days","Y","YpredictvsYactual_292_last100.jpg") scatter(Y_Return,Y_Test,"scatterplot_292.jpg") mean_series = XTest[:,0] std_series =XTest[:,1] rsi_series = XTest[:,2] roc_series = XTest[:,3] slope_series = XTest[:,4] all_feature_graph(mean_series[:100],std_series[:100],rsi_series[:100],roc_series[:100],slope_series[:100],numpy.arange(1,101),"Days","Features","Allfeature_292.jpg") print "Correlation 292 is {0}".format(get_correlation(Y_Test,Y_Return)) print "RMSE 292 is {0}".format(get_rmse(Y_Test,Y_Return)) os.chdir('proj3-data-fixed') testdatesandY = read_file('ML4T-132.csv') test = createX(testdatesandY) (XTrain,YTrain) = numpy.split(stack,[5],axis=1) (XTest,YTest) = numpy.split(test,[5],axis=1) # print XTest randomforestlearner = Randomforestlearner(k=50) randomforestlearner.addEvidence(XTrain,YTrain) Y_Return = numpy.multiply(numpy.array(randomforestlearner.query(XTest)),-1) Y_Test = np.squeeze(np.asarray(YTest)) start=0 index=5 print len(Y_Return) print len(Y_Test) print len(testdatesandY) while index<len(testdatesandY)-26: Y_Return[start]=Y_Return[start]+testdatesandY[index] Y_Test[start]=Y_Test[start]+testdatesandY[index] start = start+1 index=index+1 os.chdir(curr_dirr) get_graph_two_plots(numpy.arange(1,101),Y_Return[:100],Y_Test[:100],"Days","Y","YpredictvsYactual_132_first100.jpg") last126_test = Y_Test[-126:] last126_return = Y_Return[-126:] get_graph_two_plots(numpy.arange(1,101),last126_return[:100],last126_test[:100],"Days","Y","YpredictvsYactual_132_last100.jpg") scatter(Y_Return,Y_Test,"scatterplot_132.jpg") mean_series = XTest[:,0] std_series =XTest[:,1] rsi_series = XTest[:,2] roc_series = XTest[:,3] slope_series = XTest[:,4] all_feature_graph(mean_series[:100],std_series[:100],rsi_series[:100],roc_series[:100],slope_series[:100],numpy.arange(1,101),"Days","Features","Allfeature_132.jpg") print "Correlation 132 is {0}".format(get_correlation(Y_Test,Y_Return)) print "RMSE 132 is {0}".format(get_rmse(Y_Test,Y_Return))