X = df_merged.drop(["Genre","Song ID","Track ID"], axis = 1)
#Split
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
#Train
adaboost_model.fit(X_train,y_train)
#Predict
y_train_predicted = adaboost_model.predict(X_train)
y_test_predicted = adaboost_model.predict(X_test)

print "Number of Train Samples: ", (y_train.shape[0])
print "Number of Test Samples: ", (y_test.shape[0])

print "Train Classification Rate: ", (sum(y_train_predicted == y_train)) / float(y_train.shape[0])
print "Test Classification Rate: ", (sum(y_test_predicted == y_test)) / float(y_test.shape[0])

print ml_aux.getUniqueCount(y_train)
print ml_aux.getUniqueCount(y_test)

print "try func: ", ml_aux.get_error_rate(y_train, y_train_predicted)

print ml_aux.plot_confusion_matrix(y_train,y_train_predicted,"Train")
plt.show()


ml_aux.plot_confusion_matrix(y_test,y_test_predicted,"Test")
plt.show()



__author__ = "Can Ozbek"

import pandas as pd
import numpy as np
import pylab
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import ml_aux_functions as ml_aux

#Read the files
df = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd.pkl")
df_train = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train.pkl") # 80%
df_test = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_test.pkl") # 20%

df_train_t1 = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train_t1.pkl")
df_train_t2 = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train_t2.pkl")
df_train_t3 = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train_t3.pkl")
df_train_t4 = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train_t4.pkl")
df_train_t5 = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train_t5.pkl")
print "Reading Done."

print "Histogram: "
print ml_aux.getUniqueCount(df_train_t1["Genre"])

ml_aux.plot_histogram(ml_aux.getUniqueCount(df_train_t1["Genre"]))
plt.show()