def knnlearner_test(filenames): for filename in filenames: train_time =[] query_time =[] rmse_series=[] rmse_series_insample=[] covariance_series=[] for i in xrange(1,51): knnlearner=KNNLearner(k=i) get_set = knnlearner.getflatcsv(filename) get_set_60pr,get_set_40pr = numpy.split(get_set,[600]) (X,Y) = numpy.split(get_set,[2],axis=1) (XTrain,XTest) = numpy.split(X,[600]) (Ytrain,YTest) = numpy.split(Y,[600]) knnlearner.build_hash(get_set_60pr) with Timer() as t: knnlearner.addEvidence(XTrain,Ytrain) train_time.append(t.interval) query_X = numpy.array(XTest) with Timer() as t: (XY_return,Y_return) = knnlearner.query(XTest) query_time.append(t.interval) Y_Test = np.squeeze(np.asarray(YTest)) Y_Return = numpy.array(Y_return) rmse_series.append(get_rmse(Y_Test,Y_Return)) (XY_return_insample,Y_return_insample) = knnlearner.query(XTrain) Y_Train = np.squeeze(np.asarray(Ytrain)) Y_return_insample = numpy.array(Y_return_insample) rmse_series_insample.append(get_rmse(Y_Train,Y_return_insample)) covariance_series.append(get_correlation(Y_Test,Y_Return)) min_rmse = min(float(i) for i in rmse_series) k_index = rmse_series.index(min_rmse) print "best k = ",k_index+1," for ",filename knnlearner_scatter = KNNLearner(k=k_index+1) get_set = knnlearner_scatter.getflatcsv(filename) get_set_60pr,get_set_40pr = numpy.split(get_set,[600]) (X,Y) = numpy.split(get_set,[2],axis=1) (XTrain,XTest) = numpy.split(X,[600]) (Ytrain,YTest) = numpy.split(Y,[600]) knnlearner_scatter.build_hash(get_set_60pr) knnlearner_scatter.addEvidence(XTrain,Ytrain) (XY_return,Y_return) = knnlearner_scatter.query(XTest) Y_Test = np.squeeze(np.asarray(YTest)) Y_Return = numpy.array(Y_return) scatter(Y_Return,Y_Test,"scatterplot("+filename+")(for bestk).pdf") get_graph(numpy.arange(1,51),train_time,"K","Train time in seconds","KNN_Train_time("+filename+").pdf",4) get_graph(numpy.arange(1,51),query_time,"K","Query time in seconds","KNN_Query_time("+filename+").pdf",4) get_graph(numpy.arange(1,51),rmse_series,"K","RMSE Error","RMSEvsk("+filename+").pdf") get_graph(numpy.arange(1,51),covariance_series,"K","Covariance Coefficeint","Covariance Coeff vs K("+filename+").pdf") get_graph_two_plots(numpy.arange(1,51),rmse_series_insample,rmse_series,"K","RMSE","insample_error_vs_outsample_error("+filename+").pdf")
def knnlearner_test(filenames): for filename in filenames: rmse_series=[] covariance_series=[] for i in xrange(1,101): knnlearner=KNNLearner(k=i) get_set = knnlearner.getflatcsv(filename) get_set_60pr,get_set_40pr = numpy.split(get_set,[600]) (X,Y) = numpy.split(get_set,[2],axis=1) (XTrain,XTest) = numpy.split(X,[600]) (Ytrain,YTest) = numpy.split(Y,[600]) knnlearner.build_hash(get_set_60pr) knnlearner.addEvidence(XTrain,Ytrain) query_X = numpy.array(XTest) (XY_return,Y_return) = knnlearner.query(XTest) Y_Test = np.squeeze(np.asarray(YTest)) Y_Return = numpy.array(Y_return) rmse_series.append(get_rmse(Y_Test,Y_Return)) covariance_series.append(get_correlation(Y_Test,Y_Return)) return (rmse_series,covariance_series)