# [4:'FOLK', 5: 'SOUL', 6: 'ROCK', 7: 'POP', 8: 'BLUES']



if __name__ == '__main__':
    print '--- started ---'

    input1 = pickle.load(open("msd_train_t1.pkl", "rb"))
    input2 = pickle.load(open("msd_train_t2.pkl", "rb"))
    input3 = pickle.load(open("msd_train_t3.pkl", "rb"))
    input4 = pickle.load(open("msd_train_t4.pkl", "rb"))
    input5 = pickle.load(open("msd_train_t5.pkl", "rb"))
    # print input1.shape[0]
    # input = pickle.load(open("msd_train.pkl", "rb"))

    maxval1 = crop_rock.find_second_max_value(input1)
    maxval2 = crop_rock.find_second_max_value(input2)
    maxval3 = crop_rock.find_second_max_value(input3)
    maxval4 = crop_rock.find_second_max_value(input4)
    maxval5 = crop_rock.find_second_max_value(input5)
    # print maxval1
    # maxval = crop_rock.find_second_max_value(input)

    filtered1 = crop_rock.drop_excess_rows(input1, maxval1)
    filtered2 = crop_rock.drop_excess_rows(input2, maxval2)
    filtered3 = crop_rock.drop_excess_rows(input3, maxval3)
    filtered4 = crop_rock.drop_excess_rows(input4, maxval4)
    filtered5 = crop_rock.drop_excess_rows(input5, maxval5)
    # print filtered1.shape[0]
    # filtered = crop_rock.drop_excess_rows(input, maxval)
#Get rid of the rows that have missing values (nan) and UNCAT
df_full = df_full[ df_full["Genre"] != "UNCAT" ]
df_full = df_full.dropna()
y_full = df_full["Genre"]
X_full = df_full.drop(["Genre", "Track ID", "Year"], axis=1)

#Split the 80% of data to 70% Training and 30% Validation Data
from sklearn.cross_validation import train_test_split
X_train, X_validation, y_train, y_validation = \
                            train_test_split(X_full, y_full, train_size=0.7, random_state=42)
print "DEBUG: Data splitted"
df_train_toCrop = pd.concat([y_train, X_train], axis=1, join='inner')

#Crop the dataset
maxval = crop_rock.find_second_max_value(df_train_toCrop)
df_cropped = crop_rock.drop_excess_rows(df_train_toCrop, maxval)
y_cropped = df_cropped["Genre"]
X_cropped = df_cropped.drop(["Genre"], axis=1)

# # Start LDA Classification
# print "Performing LDA Classification:"
# from sklearn.lda import LDA
# clf = LDA(solver='svd', shrinkage=None, n_components=None).fit(X_cropped, np.ravel(y_cropped[:]))
#
# #Use X_cropped to get best model
# y_train_predicted = clf.predict(X_train)
# print "Error rate for LDA on Training: ", ml_aux.get_error_rate(y_train,y_train_predicted)
# # ml_aux.plot_confusion_matrix(y_cropped, predicted, "CM on LDA cropped")
# # plt.show()
#