from kNN import NearestNeighborsRegressor
from sklearn.neighbors import KNeighborsRegressor
from Score import *
import matplotlib.pyplot as plt
import seaborn as sns
from legacy_script import *
from LeaveNOut import *
from sklearn.svm import SVR
import pdb

input = pd.read_csv('./Soil_water_permeability_data/INPUT.csv',header=None)
output = pd.read_csv('./Soil_water_permeability_data/OUTPUT.csv', header=None)
coordinates = pd.read_csv('./Soil_water_permeability_data/COORDINATES.csv', header=None)

# Normalization
std_input = standardize_dataset(input)
plot_data = []

for n in range(0,201,10):
    model = NearestNeighborsRegressor(n_neighbors=5)
    runner = LeaveNOut(zone_radius=n)
    predict, true = runner.run(data=std_input, model=model, labels=output.as_matrix(), n_out=1, coordinates = coordinates.as_matrix())
    score = Score()
    plot_data.append([score.c_score(np.array(predict)[:, 0], np.array(true)[:, 0]), n, 'Concordance Index'])
    print "epoch %d " % n

line_plot(np.array(plot_data), title="Concordance index by different Dead zone radius - Leave 1 out CV",
              x_title="Dead zone radius", y_title="C-Index")


import matplotlib.pyplot as plt
import seaborn as sns
from legacy_script import *
from LeaveNOut import *
from sklearn.svm import SVR
import pdb

data = pd.read_csv('Water_data.csv')
# Split data into labels and features
train_labels, train_data = np.hsplit(data, [3])

# Normalization
train_data = standardize_dataset(train_data)
train_labels = train_labels.as_matrix()

#Try with different neighbors and different leave N out cross validation:
for n in [1, 3]:
    plot_data = []

    for i in range(1, 30, 1):
        model = NearestNeighborsRegressor(n_neighbors=i)
        runner = LeaveNOut()
        predict, true = runner.run(data=train_data, model=model, labels=train_labels, n_out=n)
        score = Score()
        plot_data.append([score.c_score(np.array(predict)[:, 0], np.array(true)[:, 0]), i, 'c_total'])
        plot_data.append([score.c_score(np.array(predict)[:, 1], np.array(true)[:, 1]), i, 'Cd'])
        plot_data.append([score.c_score(np.array(predict)[:, 2], np.array(true)[:, 2]), i, 'Pb'])

    line_plot(np.array(plot_data), title="C_index by different K Neighbors - Leave %s out CV" % n,
              x_title="K Neighbors", y_title="C-Index")