예제 #1
0
def knnlearner_test(filenames):
    for filename in filenames:
        train_time =[]
        query_time =[]
        rmse_series=[]
        rmse_series_insample=[]
        covariance_series=[]
        for i in xrange(1,51):
            knnlearner=KNNLearner(k=i)
            get_set = knnlearner.getflatcsv(filename)
            get_set_60pr,get_set_40pr = numpy.split(get_set,[600])
            (X,Y) = numpy.split(get_set,[2],axis=1)
            (XTrain,XTest) = numpy.split(X,[600])
            (Ytrain,YTest) = numpy.split(Y,[600])
            knnlearner.build_hash(get_set_60pr)
            with Timer() as t:
                knnlearner.addEvidence(XTrain,Ytrain)
            train_time.append(t.interval)
            query_X = numpy.array(XTest)
            with Timer() as t:
                (XY_return,Y_return) = knnlearner.query(XTest)
            query_time.append(t.interval)
            Y_Test = np.squeeze(np.asarray(YTest))
            Y_Return = numpy.array(Y_return)
            rmse_series.append(get_rmse(Y_Test,Y_Return))
            (XY_return_insample,Y_return_insample) = knnlearner.query(XTrain)
            Y_Train = np.squeeze(np.asarray(Ytrain))
            Y_return_insample = numpy.array(Y_return_insample)
            rmse_series_insample.append(get_rmse(Y_Train,Y_return_insample))
            covariance_series.append(get_correlation(Y_Test,Y_Return))
        min_rmse = min(float(i) for i in rmse_series)
        k_index = rmse_series.index(min_rmse)
        print "best k = ",k_index+1," for ",filename
        knnlearner_scatter = KNNLearner(k=k_index+1)
        get_set = knnlearner_scatter.getflatcsv(filename)
        get_set_60pr,get_set_40pr = numpy.split(get_set,[600])
        (X,Y) = numpy.split(get_set,[2],axis=1)
        (XTrain,XTest) = numpy.split(X,[600])
        (Ytrain,YTest) = numpy.split(Y,[600])
        knnlearner_scatter.build_hash(get_set_60pr)
        knnlearner_scatter.addEvidence(XTrain,Ytrain)
        (XY_return,Y_return) = knnlearner_scatter.query(XTest)
        Y_Test = np.squeeze(np.asarray(YTest))
        Y_Return = numpy.array(Y_return)
        scatter(Y_Return,Y_Test,"scatterplot("+filename+")(for bestk).pdf")
        get_graph(numpy.arange(1,51),train_time,"K","Train time in seconds","KNN_Train_time("+filename+").pdf",4)
        get_graph(numpy.arange(1,51),query_time,"K","Query time in seconds","KNN_Query_time("+filename+").pdf",4)
        get_graph(numpy.arange(1,51),rmse_series,"K","RMSE Error","RMSEvsk("+filename+").pdf")
        get_graph(numpy.arange(1,51),covariance_series,"K","Covariance Coefficeint","Covariance Coeff vs K("+filename+").pdf")
        get_graph_two_plots(numpy.arange(1,51),rmse_series_insample,rmse_series,"K","RMSE","insample_error_vs_outsample_error("+filename+").pdf")
예제 #2
0
def knnlearner_test(filenames):
    for filename in filenames:
        rmse_series=[]
        covariance_series=[]
        for i in xrange(1,101):
            knnlearner=KNNLearner(k=i)
            get_set = knnlearner.getflatcsv(filename)
            get_set_60pr,get_set_40pr = numpy.split(get_set,[600])
            (X,Y) = numpy.split(get_set,[2],axis=1)
            (XTrain,XTest) = numpy.split(X,[600])
            (Ytrain,YTest) = numpy.split(Y,[600])
            knnlearner.build_hash(get_set_60pr)
            knnlearner.addEvidence(XTrain,Ytrain)
            query_X = numpy.array(XTest)
            (XY_return,Y_return) = knnlearner.query(XTest)
            Y_Test = np.squeeze(np.asarray(YTest))
            Y_Return = numpy.array(Y_return)
            rmse_series.append(get_rmse(Y_Test,Y_Return))
            covariance_series.append(get_correlation(Y_Test,Y_Return))
    return (rmse_series,covariance_series)