from kNN import NearestNeighborsRegressor from sklearn.neighbors import KNeighborsRegressor from Score import * import matplotlib.pyplot as plt import seaborn as sns from legacy_script import * from LeaveNOut import * from sklearn.svm import SVR import pdb input = pd.read_csv('./Soil_water_permeability_data/INPUT.csv',header=None) output = pd.read_csv('./Soil_water_permeability_data/OUTPUT.csv', header=None) coordinates = pd.read_csv('./Soil_water_permeability_data/COORDINATES.csv', header=None) # Normalization std_input = standardize_dataset(input) plot_data = [] for n in range(0,201,10): model = NearestNeighborsRegressor(n_neighbors=5) runner = LeaveNOut(zone_radius=n) predict, true = runner.run(data=std_input, model=model, labels=output.as_matrix(), n_out=1, coordinates = coordinates.as_matrix()) score = Score() plot_data.append([score.c_score(np.array(predict)[:, 0], np.array(true)[:, 0]), n, 'Concordance Index']) print "epoch %d " % n line_plot(np.array(plot_data), title="Concordance index by different Dead zone radius - Leave 1 out CV", x_title="Dead zone radius", y_title="C-Index")
import matplotlib.pyplot as plt import seaborn as sns from legacy_script import * from LeaveNOut import * from sklearn.svm import SVR import pdb data = pd.read_csv('Water_data.csv') # Split data into labels and features train_labels, train_data = np.hsplit(data, [3]) # Normalization train_data = standardize_dataset(train_data) train_labels = train_labels.as_matrix() #Try with different neighbors and different leave N out cross validation: for n in [1, 3]: plot_data = [] for i in range(1, 30, 1): model = NearestNeighborsRegressor(n_neighbors=i) runner = LeaveNOut() predict, true = runner.run(data=train_data, model=model, labels=train_labels, n_out=n) score = Score() plot_data.append([score.c_score(np.array(predict)[:, 0], np.array(true)[:, 0]), i, 'c_total']) plot_data.append([score.c_score(np.array(predict)[:, 1], np.array(true)[:, 1]), i, 'Cd']) plot_data.append([score.c_score(np.array(predict)[:, 2], np.array(true)[:, 2]), i, 'Pb']) line_plot(np.array(plot_data), title="C_index by different K Neighbors - Leave %s out CV" % n, x_title="K Neighbors", y_title="C-Index")