print("Classification Number:", (c+1)) # Generating training and testing set x_train = np.empty((0,feat)) x_test = np.empty((0,feat)) for genere in genres.keys(): np.random.shuffle(genres[genere]) x_train = np.append(x_train, (genres[genere])[0:75, :] , axis = 0) x_test = np.append(x_test, (genres[genere]) [75:100, :] , axis = 0) # predictions for each classifier # SVM improves with KNN svm_predictions = classifiers.svm(x_train, y_train, x_test) # Adaptive Boosting ada_predictions = classifiers.boost(x_train, y_train, x_test) # Naive Baesian bay_predictions = classifiers.bayes(x_train, y_train, x_test) # Choose final prediction with rules: # 1. If baesian states one between "Classical", "Jazz", "Metal", "Reggae" pick it # 2. else if every classifier or 2/3 state the same pick that choice # 3. If every classifier states something different focus on Boost and SVM: # if both state on of: "Disco", "Jazz", "Metal", "Pop" or "Reggae" pick SVM # else if both state one of others pick Boost # else pick random between the two final_prediction = []
random.shuffle(train_set) Y_train = np.array([row[0] for row in train_set]) X_train = np.array([row[1] for row in train_set]) test_set = [] for key, value in test.items(): for file_id, words in value.items(): bow = np.zeros(len(vocab)) for word in words: if word in vocab: bow[vocab[word]] += 1 else: bow[vocab['</unk>']] += 1 tf = bow/len(words) tfidf = tf*idf_test avg_glove = np.zeros(300) for i in range(len(vocab)): avg_glove += tfidf[i]*glove[i] test_set.append([key, avg_glove]) print('shuffling test') random.shuffle(test_set) Y_test = np.array([row[0] for row in test_set]) X_test = np.array([row[1] for row in test_set]) print("Calling Classifiers\n") print("Accuracy for Naive Bayes is : ", naive_bayes(X_train, Y_train, X_test, Y_test)) print("Accuracy for Logistic Regression is : ", logistic_regression(X_train, Y_train, X_test, Y_test)) print("Accuracy for SVM is : ", svm(X_train, Y_train, X_test, Y_test)) print("Accuracy for FF Neural Net is : ", fnn(X_train, Y_train, X_test, Y_test)) # print("Accuracy for Recurrent Neural Net is : ", rnn(X_train, Y_train, X_test, Y_t))
print('Calculando Melhores Parametros\n') bp_dt = classifiers.fbp_dt() params_dt.append(bp_dt) bp_knn = classifiers.fbp_knn() params_knn.append(bp_knn) bp_mlp = classifiers.fbp_mlp() params_mlp.append(bp_mlp) print('Naive Bayes\nUsando Configuração Padrão') bp_svm = classifiers.fbp_svm() params_svm.append(bp_svm) dt.append(classifiers.dt(bp_dt)) knn.append(classifiers.knn(bp_knn)) mlp.append(classifiers.mlp(bp_mlp)) nb.append(classifiers.nb()) svm.append(classifiers.svm(bp_svm)) with open("results.csv", "w") as fp: fp.write( "Dt, Depth, Knn, Weights, K, Mlp, Max Ite, Hidden Layers, Learning Rate, Nb, No Params, Svm, Kernel, Penalty\n" ) for index in range(20): fp.write( "%f, %s, %f, %s, %s, %f, %s, %s, %s, %f, %s, %f, %s,%s\n" % (dt[index], params_dt[index], knn[index], '1-Distancia Normalizada' if inspect.isfunction( params_knn[index][0]) else params_knn[index][0], params_knn[index][1], mlp[index], params_mlp[index][0], params_mlp[index][1], params_mlp[index][2], nb[index], "Default", svm[index], params_svm[index][0], params_svm[index][1])) fp.write("\n%f, %s, %f, %s, %s, %f, %s, %s, %s, %f, %s, %f, %s,%s\n" %