X = df_merged.drop(["Genre","Song ID","Track ID"], axis = 1) #Split from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y) #Train adaboost_model.fit(X_train,y_train) #Predict y_train_predicted = adaboost_model.predict(X_train) y_test_predicted = adaboost_model.predict(X_test) print "Number of Train Samples: ", (y_train.shape[0]) print "Number of Test Samples: ", (y_test.shape[0]) print "Train Classification Rate: ", (sum(y_train_predicted == y_train)) / float(y_train.shape[0]) print "Test Classification Rate: ", (sum(y_test_predicted == y_test)) / float(y_test.shape[0]) print ml_aux.getUniqueCount(y_train) print ml_aux.getUniqueCount(y_test) print "try func: ", ml_aux.get_error_rate(y_train, y_train_predicted) print ml_aux.plot_confusion_matrix(y_train,y_train_predicted,"Train") plt.show() ml_aux.plot_confusion_matrix(y_test,y_test_predicted,"Test") plt.show()
__author__ = "Can Ozbek" import pandas as pd import numpy as np import pylab import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix import ml_aux_functions as ml_aux #Read the files df = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd.pkl") df_train = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train.pkl") # 80% df_test = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_test.pkl") # 20% df_train_t1 = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train_t1.pkl") df_train_t2 = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train_t2.pkl") df_train_t3 = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train_t3.pkl") df_train_t4 = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train_t4.pkl") df_train_t5 = pd.read_pickle("/Users/ahmetcanozbek/Desktop/660Stuff/msd_train_t5.pkl") print "Reading Done." print "Histogram: " print ml_aux.getUniqueCount(df_train_t1["Genre"]) ml_aux.plot_histogram(ml_aux.getUniqueCount(df_train_t1["Genre"])) plt.show()