# # Start QDA Classification
# print "Performing QDA Classification:"
# from sklearn.qda import QDA
# clf = QDA(priors=None, reg_param=0.001).fit(X_cropped, np.ravel(y_cropped[:]))
# y_validation_predicted = clf.predict(X_validation)
# print "Error rate for QDA (Validation): ", ml_aux.get_error_rate(y_validation,y_validation_predicted)



# Start Random Forest Classification
print "Performing Random Classification:"
from sklearn.ensemble import RandomForestClassifier
forest = RandomForestClassifier(n_estimators=500)
forest = forest.fit(X_cropped, np.ravel(y_cropped[:]))
y_validation_predicted = forest.predict(X_validation)
print "Error rate for Random Forest (Validation): ", ml_aux.get_error_rate(y_validation,y_validation_predicted)
# ml_aux.plot_confusion_matrix(y_validation, y_validation_predicted, "CM Random Forest (t1)")
# plt.show()

pickle.dump(forest,open('t5_random_forest.pkl','wb'))


# # Start k nearest neighbor Classification
# print "Performing kNN Classification:"
# from sklearn import neighbors
# knn_model = neighbors.KNeighborsClassifier(n_neighbors=2, algorithm='auto',leaf_size=15)
# knn_model.fit(X_cropped, y_cropped)
# # y_train_predicted = knn_model.predict(X_train)
# # print "Error Rate for kNN (Cropped): ", ml_aux.get_error_rate(y_train, y_train_predicted)
#
# y_validation_predicted =  knn_model.predict(X_validation)
X = df_merged.drop(["Genre","Song ID","Track ID"], axis = 1)
#Split
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
#Train
adaboost_model.fit(X_train,y_train)
#Predict
y_train_predicted = adaboost_model.predict(X_train)
y_test_predicted = adaboost_model.predict(X_test)

print "Number of Train Samples: ", (y_train.shape[0])
print "Number of Test Samples: ", (y_test.shape[0])

print "Train Classification Rate: ", (sum(y_train_predicted == y_train)) / float(y_train.shape[0])
print "Test Classification Rate: ", (sum(y_test_predicted == y_test)) / float(y_test.shape[0])

print ml_aux.getUniqueCount(y_train)
print ml_aux.getUniqueCount(y_test)

print "try func: ", ml_aux.get_error_rate(y_train, y_train_predicted)

print ml_aux.plot_confusion_matrix(y_train,y_train_predicted,"Train")
plt.show()


ml_aux.plot_confusion_matrix(y_test,y_test_predicted,"Test")
plt.show()



# Crop the dataset
maxval = crop_rock.find_second_max_value(df_train_toCrop)
df_cropped = crop_rock.drop_excess_rows(df_train_toCrop, maxval)
y_cropped = df_cropped["Genre"]
X_cropped = df_cropped.drop(["Genre"], axis=1)

# Start LDA Classification
print "Performing LDA Classification:"
from sklearn.lda import LDA

clf = LDA(solver="svd", shrinkage=None, n_components=None).fit(X_cropped, np.ravel(y_cropped[:]))

# Use X_cropped to get best model
y_train_predicted = clf.predict(X_train)
print "Error rate for LDA on Training: ", ml_aux.get_error_rate(y_train, y_train_predicted)
# ml_aux.plot_confusion_matrix(y_cropped, predicted, "CM on LDA cropped")
# plt.show()

y_validation_predicted = clf.predict(X_validation)
print "Error rate for LDA on Validation: ", ml_aux.get_error_rate(y_validation, y_validation_predicted)
# ml_aux.plot_confusion_matrix(y_validation, y_validation_predicted, "CM on LDA validation (t1)")
# plt.show()


# Start Adaboost Classification
from sklearn.ensemble import AdaBoostClassifier

adaboost_model = AdaBoostClassifier(n_estimators=50)
adaboost_model = adaboost_model.fit(X_cropped, y_cropped)
# predicted = adaboost_model.predict(X_cropped)