Beispiel #1
0
def randomforest_test(filenames):
    for filename in filenames:
        rmse_series_randomforest=[]
        covariance_series_randomforest=[]
        for k in range (1,101):
            randomforestlearner = Randomforestlearner(k=k)
            get_set = randomforestlearner.getflatcsv(filename)
            get_set_60pr,get_set_40pr = numpy.split(get_set,[600])
            (X,Y) = numpy.split(get_set,[2],axis=1)
            (XTrain,XTest) = numpy.split(X,[600])
            (Ytrain,YTest) = numpy.split(Y,[600])
            Y_Test = np.squeeze(np.asarray(YTest))
            randomforestlearner.addEvidence(XTrain,Ytrain)
            Y_Return = numpy.array(randomforestlearner.query(XTest))
            rmse_series_randomforest.append(get_rmse(Y_Test,Y_Return))
            covariance_series_randomforest.append(get_correlation(Y_Test,Y_Return))
    return (rmse_series_randomforest,covariance_series_randomforest)
Beispiel #2
0
def main():
    curr_dirr = os.getcwd()
    os.chdir('proj3-data-fixed')
    datesandY = read_file('ML4T-000.csv')
    stack = createX(datesandY)
    # print len(stack)
    for i in range(1,10):
        datesandY = read_file('ML4T-00'+str(i)+'.csv')
        learnedvalues = createX(datesandY)
        stack = numpy.vstack((stack,learnedvalues))
    for i in range(11,100):
        datesandY = read_file('ML4T-0'+str(i)+'.csv')
        learnedvalues = createX(datesandY)
        stack = numpy.vstack((stack,learnedvalues))
    print len(stack)
    testdatesandY = read_file('ML4T-292.csv')
    test = createX(testdatesandY)
    (XTrain,YTrain) = numpy.split(stack,[5],axis=1)
    (XTest,YTest) = numpy.split(test,[5],axis=1)
    # print XTest
    randomforestlearner = Randomforestlearner(k=50)
    randomforestlearner.addEvidence(XTrain,YTrain)
    Y_Return = numpy.multiply(numpy.array(randomforestlearner.query(XTest)),-1)
    Y_Test = np.squeeze(np.asarray(YTest))
    
    start=0
    index=5
    print len(Y_Return)
    print len(Y_Test)
    print len(testdatesandY)

    while index<len(testdatesandY)-26:
        Y_Return[start]=Y_Return[start]+testdatesandY[index]
        Y_Test[start]=Y_Test[start]+testdatesandY[index]
        start = start+1
        index=index+1
    os.chdir(curr_dirr)
    get_graph_two_plots(numpy.arange(1,101),Y_Return[:100],Y_Test[:100],"Days","Y","YpredictvsYactual_292_first100.jpg")
    last126_test = Y_Test[-126:]
    last126_return = Y_Return[-126:]
    get_graph_two_plots(numpy.arange(1,101),last126_return[:100],last126_test[:100],"Days","Y","YpredictvsYactual_292_last100.jpg")
    scatter(Y_Return,Y_Test,"scatterplot_292.jpg")
    mean_series = XTest[:,0]
    std_series =XTest[:,1]
    rsi_series = XTest[:,2]
    roc_series = XTest[:,3]
    slope_series = XTest[:,4]
    all_feature_graph(mean_series[:100],std_series[:100],rsi_series[:100],roc_series[:100],slope_series[:100],numpy.arange(1,101),"Days","Features","Allfeature_292.jpg")
    print "Correlation 292 is {0}".format(get_correlation(Y_Test,Y_Return))
    print "RMSE 292 is {0}".format(get_rmse(Y_Test,Y_Return))
    os.chdir('proj3-data-fixed')
    testdatesandY = read_file('ML4T-132.csv')
    test = createX(testdatesandY)
    (XTrain,YTrain) = numpy.split(stack,[5],axis=1)
    (XTest,YTest) = numpy.split(test,[5],axis=1)
    # print XTest
    randomforestlearner = Randomforestlearner(k=50)
    randomforestlearner.addEvidence(XTrain,YTrain)
    Y_Return = numpy.multiply(numpy.array(randomforestlearner.query(XTest)),-1)
    Y_Test = np.squeeze(np.asarray(YTest))
    
    start=0
    index=5
    print len(Y_Return)
    print len(Y_Test)
    print len(testdatesandY)

    while index<len(testdatesandY)-26:
        Y_Return[start]=Y_Return[start]+testdatesandY[index]
        Y_Test[start]=Y_Test[start]+testdatesandY[index]
        start = start+1
        index=index+1
    os.chdir(curr_dirr)
    get_graph_two_plots(numpy.arange(1,101),Y_Return[:100],Y_Test[:100],"Days","Y","YpredictvsYactual_132_first100.jpg")
    last126_test = Y_Test[-126:]
    last126_return = Y_Return[-126:]
    get_graph_two_plots(numpy.arange(1,101),last126_return[:100],last126_test[:100],"Days","Y","YpredictvsYactual_132_last100.jpg")
    scatter(Y_Return,Y_Test,"scatterplot_132.jpg")
    mean_series = XTest[:,0]
    std_series =XTest[:,1]
    rsi_series = XTest[:,2]
    roc_series = XTest[:,3]
    slope_series = XTest[:,4]
    all_feature_graph(mean_series[:100],std_series[:100],rsi_series[:100],roc_series[:100],slope_series[:100],numpy.arange(1,101),"Days","Features","Allfeature_132.jpg")
    print "Correlation 132 is {0}".format(get_correlation(Y_Test,Y_Return))
    print "RMSE 132 is {0}".format(get_rmse(Y_Test,Y_Return))