def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ #declaring variables used for MLPClassifier hidden_layers = 6 solver_mode = 'adam' activation_mode = 'tanh' max_iter = 200 cf = MLPClassifier(hidden_layer_sizes=(hidden_layers, ), solver=solver_mode, activation=activation_mode, max_iter=max_iter) #training the classifier cf.fit(input2, target2[:, 1]) #calculate y_predicted and y_true for confusion matrix calculation #printing confusion matrix print(confusion_matrix(target2[:, 1], cf.predict(input2))) #plotting the hidden layer weights plot_hidden_layer_weights(cf.coefs_[0]) pass
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ # parse target2 2nd column pose2 = [] for target in target2: pose2.append(target[1]) mlp = MLPClassifier(activation='tanh', hidden_layer_sizes=6) print("===========fit started===========") mlp.fit(input2, pose2) print("===========fit finished===========") print("classes_: ", mlp.classes_) print("n_layers_: ", mlp.n_layers_) plot_hidden_layer_weights(mlp.coefs_[0]) print("===========predict started===========") prediction = mlp.predict(input2) print("===========predict finished===========") cnf_matrix = confusion_matrix(pose2, prediction) print(cnf_matrix) return
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ ## TODO - done hidden_units = 6 nn = MLPClassifier(activation=ACTIVATION, solver='adam', hidden_layer_sizes=(hidden_units, ), max_iter=200) pose = target2[:, 1] nn.fit(input2, pose) # using index 0 because of just one hidden layer hidden_layer_weights = nn.coefs_[0] y_pred = nn.predict(input2) matrix = confusion_matrix(pose, y_pred) print("The Confusion Matrix we obtained: \n" + str(matrix)) plot_hidden_layer_weights(hidden_layer_weights)
def ex_2_1(input2, target2): ''' • Write code to train a feed-forward neural network with 1 hidden layers containing 6 hidden units for pose recognition. Use dataset2 for training after normalization, ‘adam’ as the training solver and train for 200 iterations. • Calculate the confusion matrix • Plot the weights between each input neuron and the hidden neurons to visualize what the network has learnt in the first layer. inote Use scikit-learn’s confusion_matrix function to to calculate the confusion matrix. Documentation for this can be found here inote You can use the coefs_ attribute of the model to read the weights. It is a list of length nlayers − 1 where the ith element in the list represents the weight matrix corresponding to layer i. inote Use the plot_hidden_layer_weights in nn_classification_plot.py to plot the hidden weights. ''' # dataset2 = normalize(input2) already done by main x_train = input2 y_train = target2[:, 1] # print(y_train) nn = MLPClassifier(solver='adam', activation='tanh', max_iter=200, hidden_layer_sizes=(6, )) nn.fit(x_train, y_train) cm = confusion_matrix(y_train, nn.predict(x_train)) plot_hidden_layer_weights(nn.coefs_[0]) print(cm) pass
def ex_2_2(input1, target1, input2, target2): ## TODO scores = [] scores_train = [] classifiers = [] for i in range(10): classifier = MLPClassifier(hidden_layer_sizes=(20, ), solver="adam", max_iter=1000, activation="tanh", random_state=i) classifier.fit(input1, target1[:, 0]) scores.append(classifier.score(input2, target2[:, 0])) classifiers.append(classifier) scores_train.append(classifier.score(input1, target1[:, 0])) conf_mat = confusion_matrix(target2[:, 0], classifiers[np.argmax(scores)].predict(input2)) plot_histogram_of_acc(scores_train, scores) #plot_histogram_of_acc(classifiers[np.argmax(scores)], classifier.score(input2, target2[:, 0])) #plot_histogram_of_acc(classifier.score(input1, target1[:,0]), classifier.score(input2, target2[:,0])) predected_target = classifier.predict(input2) misclassified_images = [] for i in range(len(target2[:, 0])): if target2[:, 0][i] != predected_target[i]: misclassified_images.append(input2[i]) for i in range(len(misclassified_images)): plot_image(misclassified_images[i]) pass
def ex_2_2(input1, target1, input2, target2): list = [] train_acc = np.zeros(10) test_acc = np.zeros(10) for i in range(10): nn = MLPClassifier(hidden_layer_sizes=(20,),activation='tanh', max_iter=1000, random_state=None) list.append(nn) nn.fit(input1, target1[:,0]) train_acc[i] = nn.score(input1, target1[:,0]) test_acc[i] = nn.score(input2,target2[:,0]) i_best = np.where(test_acc == test_acc.min())[0][0] import pdb pdb.set_trace() y_pred = list[i_best].predict(input2) C = confusion_matrix(target2[:,0], y_pred, labels=None, sample_weight=None) """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ ## TODO return train_acc, test_acc, y_pred, C
def trainModel(): sss = [] train_list = [["comp.speech/train/s1.wav", 0], ["comp.speech/train/s2.wav", 1], ["comp.speech/train/s3.wav", 2], ["comp.speech/train/s4.wav", 3], ["comp.speech/train/s5.wav", 4], ["comp.speech/train/s6.wav", 5], ["comp.speech/train/s7.wav", 6], ["comp.speech/train/s8.wav", 7]] for wav_name in train_list: add_wav_to_db(wav_name[0], wav_name[1], sss) data = [] ans = [] i = 0 for index in xrange(len(sss)): for v in sss[index]: data.append(v[0]) ans.append(v[1]) clfNeural = MLPClassifier() clfNeural.fit(data, ans) clfForest = DecisionTreeClassifier(max_depth=250) clfForest.fit(data, ans) joblib.dump(clfNeural, 'model.pkl') joblib.dump(clfForest, 'forest.pkl')
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ ## TODO hidden_units = 20 test_face = target2[:, 0] train_face = target1[:, 0] test_accuracy = np.zeros(10) train_accuracy = np.zeros(10) best_network = 0 max_accuracy = 0 nn = MLPClassifier(activation=ACTIVATION, solver="adam", hidden_layer_sizes=(hidden_units, ), max_iter=1000) for i in range(0, 10): nn.random_state = i nn.fit(input1, train_face) train_accuracy[i] = nn.score(input1, train_face) test_accuracy[i] = nn.score(input2, test_face) if test_accuracy[i] > max_accuracy: best_network = nn max_accuracy = test_accuracy[i] plot_histogram_of_acc(train_accuracy, test_accuracy) # Use the best network to calculate the confusion matrix for the test set. y_pred = best_network.predict(input2) matrix = confusion_matrix(test_face, y_pred) print("The Confusion Matrix we obtained: \n" + str(matrix)) # Plot a few misclassified images. annas_favorit_number = 177 marcos_favorit_numer = 490 strugers_favorit_number_aka_best_mirp = 13 manfreds_favorit_number_is_a_emirp_a_lucky_fortunate_sexy_and_happy_prime = 79 best_numbers_ever = [ annas_favorit_number, strugers_favorit_number_aka_best_mirp, marcos_favorit_numer, manfreds_favorit_number_is_a_emirp_a_lucky_fortunate_sexy_and_happy_prime ] for _ in best_numbers_ever: misclassified = np.where(test_face != best_network.predict(input2)) plot_random_images(input2[misclassified])
def ex_2_1(input2, target2): ## TODO classifier = MLPClassifier(hidden_layer_sizes=(6, ), solver="adam", max_iter=200, activation="tanh") classifier.fit(input2, target2[:, 1]) con_mat = confusion_matrix(target2[:, 1], classifier.predict(input2)) plot_hidden_layer_weights(classifier.coefs_[0])
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ #declaring variables used for MLPClassifier hidden_layers = 20 solver_mode = 'adam' activation_mode = 'tanh' max_iter = 1000 max_accuracy = 0.0 train_accuracy = [] test_accuracy = [] cfn = [] m = 0 for m in range(10): cf = MLPClassifier(hidden_layer_sizes=(hidden_layers, ), activation=activation_mode, solver=solver_mode, random_state=m, max_iter=max_iter) cf.fit(input1, target1[:, 0]) train_accuracy.append(cf.score(input1, target1[:, 0])) current_test_accuracy = cf.score(input2, target2[:, 0]) test_accuracy.append(current_test_accuracy) plot_histogram_of_acc(train_accuracy[m], test_accuracy[m]) if current_test_accuracy > max_accuracy: cfn = confusion_matrix(target2[:, 0], cf.predict(input2)) max_accuracy = current_test_accuracy print(cfn) #plot_histogram_of_acc(train_accuracy, test_accuracy) #plot_random_images(input2) pass
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ ## TODO pose = target2[:,1] nn = MLPClassifier(hidden_layer_sizes=(6,) ,activation='tanh', max_iter=200) nn.fit(input2, pose) y_pred = nn.predict(input2) C = confusion_matrix(pose, y_pred, labels=None, sample_weight=None) plot_hidden_layer_weights(nn.coefs_[0]) return C
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ n = 10 train_acc = np.zeros(n) test_acc = np.zeros(n) pred_test = np.zeros((n, 564)) coefs = np.zeros((n, 960, 20)) #print(min(target1[:,0]), max(target1[:,0])) # we have 20 person for i in range(n): classifier = MLPClassifier(hidden_layer_sizes=(20, ), activation='tanh', solver='adam', max_iter=5000, random_state=i) classifier.fit(input1, target1[:, 0]) pred_test[i] = classifier.predict(input2) coefs[i] = classifier.coefs_[0] train_acc[i] = classifier.score(input1, target1[:, 0]) test_acc[i] = classifier.score(input2, target2[:, 0]) error = pred_test[1] - target2[:, 0] for j in range(len(error)): if (error[j] != 0): print(j) plot_random_images(np.row_stack((input2[175, :], input2[184, :]))) plot_random_images(np.row_stack((input2[210, :], input2[134, :]))) plot_random_images(np.row_stack((input2[223, :], input2[177, :]))) plot_random_images(np.row_stack((input2[179, :], input2[186, :]))) plot_histogram_of_acc(train_acc, test_acc) # best network with seed i=1 confmat = confusion_matrix(target2[:, 0], pred_test[1]) print(confmat) pass
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ train = input1 test = input2 target_train = target1[:, 1] target_test = target2[:, 1] ## TODO n_hidden_neurons = 20 accu_list_train = np.zeros((10,1)) accu_list_test = np.zeros((10, 1)) # Find the best seed for seed in range(10): nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,), random_state=seed) nn.fit(train, target_train) accu_list_train[seed] = nn.score(train, target_train) accu_list_test[seed] = nn.score(test, target_test) print(accu_list_train) print(accu_list_test) # Compute NN weights with the best seed best_seed = np.argmax(accu_list_train) best_nn = nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,),random_state=best_seed) best_nn.fit(train, target_train) # Evaluate the confusion matrix with best NN predictions = nn.predict(test) C = confusion_matrix(target_test, predictions) print(C) # Plot results plot_histogram_of_acc(accu_list_train, accu_list_test) print(accu_list_test) # Find misclassified images comp_array = target_test - predictions comp_vector2 = np.nonzero(comp_array)
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ ## TODO n_hidden_neurons = 6 nn = MLPClassifier(activation='tanh', solver='adam', max_iter=200, hidden_layer_sizes=(n_hidden_neurons,)) target = target2[:,2] ## Train the network nn.fit(input2, target) predictions = nn.predict(input2) C=confusion_matrix(target,predictions) hidden_layer_weights = nn.coefs_[0] plot_hidden_layer_weights(hidden_layer_weights) print(C)
def train(self, labeledDoc): """ Entrena el modelo final de clasificacion :param labeledDoc: objeto labeledDoc :return: True si todo correcto, Raise exception si fallo """ if self.save_loc == None: raise UnboundLocalError("Should have set the save path <setSaveLocation>") if self.dependenceModel == None: raise UnboundLocalError("Should have set the TextProcessing.Doc2Vec model <setDependenceModel>") tags_id = {} Y = [] X = [] for doc in labeledDoc: for tag in doc.tags[1:]: if tag not in tags_id: tags_id[tag] = len(tags_id) labeledDoc.reloadDoc() for doc in labeledDoc: tags = doc.tags text = doc.words auxY = np.zeros(len(tags_id)) for tag in tags[1:]: auxY[tags_id[tag]] = 1. Y.append(auxY) vecX = self.dependenceModel.predict(text)[0] X.append(vecX) Y = np.array(Y) X = np.array(X) clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1) clf.fit(X, Y) print clf.predict(X) joblib.dump(clf, self.save_loc) with open(self.save_loc+"_tags_id", "w") as fout: fout.write(json.dumps(tags_id))
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ classifier = MLPClassifier(hidden_layer_sizes=(6, ), activation='tanh', solver='adam', max_iter=200) classifier.fit(input2, target2[:, 1]) pred2 = classifier.predict(input2) confmat = confusion_matrix(target2[:, 1], pred2) coefs = classifier.coefs_ print(confmat) plot_hidden_layer_weights(coefs[0]) ## TODO pass
class MLPClassifierImpl(): def __init__(self, hidden_layer_sizes=(100,), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10): self._hyperparams = { 'hidden_layer_sizes': hidden_layer_sizes, 'activation': activation, 'solver': solver, 'alpha': alpha, 'batch_size': batch_size, 'learning_rate': learning_rate, 'learning_rate_init': learning_rate_init, 'power_t': power_t, 'max_iter': max_iter, 'shuffle': shuffle, 'random_state': random_state, 'tol': tol, 'verbose': verbose, 'warm_start': warm_start, 'momentum': momentum, 'nesterovs_momentum': nesterovs_momentum, 'early_stopping': early_stopping, 'validation_fraction': validation_fraction, 'beta_1': beta_1, 'beta_2': beta_2, 'epsilon': epsilon, 'n_iter_no_change': n_iter_no_change} def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X) def predict_proba(self, X): return self._sklearn_model.predict_proba(X)
def runTest(self, trainingFilename, startIndex, endIndex): a = Atomizer('learn') e = FeaturesExtractor() p = InputDataProcessor(a, e, (0.2, 0.8)) r = InputDataReader(p) (X, y) = r.read_features(trainingFilename) n = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, ), random_state=1) n.fit(X, y) a = Atomizer('test') e = FeaturesExtractor() t = Tester(a, e, n, 0.99) for i in range(startIndex, endIndex): testFilename = "suspicious-document{:05d}".format(i) test_file = r.get_file("dataSets/part{}/{}".format( 1, testFilename)) b = t.is_plagiarised(test_file) if b == False: continue print('odpowiedz systemu: ' + str(b[0])) print('stan rzeczywisty: ' + str(not not test_file['metadata'])) csv_file = open("wyniki.csv", 'a') wr = csv.writer(csv_file) list = [ trainingFilename, testFilename, str(b[0]), str(not not test_file['metadata']) ] wr.writerows([list])
def neural_net_2(train, test, val, train_out, test_out, val_out, BigSigma_inv): clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(100, 1), activation='logistic', batch_size=BATCH_HUMAN, shuffle=True, max_iter=5000) scaler = StandardScaler() scaler.fit(train) train1 = scaler.transform(train) # apply same transformation to test data test = scaler.transform(test) train_out = train_out.astype(float) clf.fit(X=train1, y=train_out) predict_test = clf.predict(test) predict_val = clf.predict(val) print("TEST ERMS ACCURACY", mean_squared_error(test_out, predict_test), acc_manual(test_out, predict_test)) print("VAL ERMS ACCURACY", mean_squared_error(val_out, predict_val), acc_manual(val_out, predict_test))
def compute(self): # Iterate Leave-One-Out Index over all vectors actual_matrix = self.get_actual_data_matrix() for params_list_index in range(len(self._params_list)): params = self._params_list[params_list_index] current_params_result = self._params_result_list[params_list_index] for loo_index in range(self.get_vector_count()): # Prepare data and labels for current leave one out train_data = [[ 0 for x in range(self.get_actual_feature_count()) ] for y in range(self.get_vector_count() - 1)] train_labels = [ 0 for x in range(0, self.get_vector_count() - 1) ] test_data = [[ 0 for x in range(0, self.get_actual_feature_count()) ] for y in range(1)] test_labels = [0 for x in range(1)] y1 = 0 for y in range(self.get_vector_count()): if (y != loo_index): for x in range(self.get_actual_feature_count()): train_data[y1][x] = actual_matrix[y][x] train_labels[y1] = self._labels[y] y1 = y1 + 1 for x in range(self.get_actual_feature_count()): test_data[0][x] = actual_matrix[loo_index][x] test_labels[0] = self._labels[loo_index] #clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) clf = MLPClassifier(**params) clf.fit(train_data, train_labels) res = clf.predict(test_data) current_params_result.predicted_labels[loo_index] = res[0] #print(repr(self.get_labels()[loo_index])+"\t"+repr(res[0])) self._commit_params_computation(params_list_index) self._complete_computation()
def ex_2_1(input2, target2): target2 = np.transpose(target2) target2 = target2[1] nn = MLPClassifier(hidden_layer_sizes=(8, ), activation='tanh', solver='adam', max_iter=200) model = nn.fit(input2, target2) y_predict = model.predict(input2) C = confusion_matrix(y_predict, target2) print(C) hidden_layer_weights = model.coefs_ plot_hidden_layer_weights(hidden_layer_weights[0]) pass
def ex_2_2(input1, target1, input2, target2): target1 = np.transpose(target1) target1 = target1[0] target2 = np.transpose(target2) target2 = target2[0] acc_train = np.zeros((10, )) acc_test = np.zeros((10, )) max = -1 for i in range(10): nn = MLPClassifier(random_state=i, hidden_layer_sizes=(20, ), activation='tanh', solver='adam', max_iter=1000) model = nn.fit(input1, target1) acc_train[i] = model.score(input1, target1) acc_test[i] = model.score(input2, target2) if acc_test[i] > max: max = acc_test[i] y_predict = model.predict(input2) C = confusion_matrix(target2, y_predict) k = 0 for i, a in enumerate(target2): if a != y_predict[i] and k < 20: plot_image(input2[i]) k = k + 1 hidden_layer_weights = model.coefs_ plot_hidden_layer_weights(hidden_layer_weights[0]) plot_histogram_of_acc(acc_train, acc_test) print(C) pass
a = Atomizer('learn') e = FeaturesExtractor() p = InputDataProcessor(a, e, (0.2, 0.8)) r = InputDataReader(p) r.read(part, start, end) print_time_interval("feature extraction") (X, y) = r.read_features('part{}_{}_{}.csv'.format(part, start, end)) print_time_interval("reading serialized features") n = MLPClassifier(solver=solver, hidden_layer_sizes=(hidden, hidden), verbose=True, activation='tanh', tol = 0.0) print(n) n = pickle.load( open( "network.bin", "rb" ) ) n.fit(X, y) print_time_interval("network learning") save(n) #a = Atomizer('test') #e = FeaturesExtractor() #t = Tester(a, e, n, 0.8) #test_file = r.get_file("dataSets/part{}/suspicious-document{:05d}".format(8, 500 * (8 - 1) + 1)) #b = t.is_plagiarised(test_file) #print('odpowiedz systemu: ' + str(b[0])) #print('stan rzeczywisty: ' + str(not not test_file['metadata'])) #print_time_interval() end finally:
#getting MNIST of size 70k images dataset = fetch_mldata("MNIST original") X = np.array(dataset.data) #Our Features y = np.array(dataset.target) #Our labels X = X.astype('float32') #splitting Dataset into Training and Testing dataset #First 60k instances are for Training and last 10k are for testing X_train, X_test = X[:60000], X[60000:] y_train, y_test = y[:60000], y[60000:] #Normalizing Our Features in range 0 and 1 X_train = X_train / 255 X_test = X_test / 255 #creating Neural Network # Neural Network has one hidden layer with 512 units # Neural NetWork is of size 784-512-10 mlp = MLPClassifier(hidden_layer_sizes=(512), max_iter=500, verbose=True) #fitting our model mlp.fit(X_train, y_train, epoch=50) print("Training set score: %f" % mlp.score(X_train, y_train)) #output : 0.99 print("Test set score: %f" % mlp.score(X_test, y_test)) #output :0.98 #saving our model joblib.dump(mlp, "model.pkl")
df = encode_data(df) df = delete_columns(df) df, label = seperate_label(df) df, scaler = scale_columns(df) pickle.dump(scaler, open('./scaler.model', 'wb')) x_train, x_test, y_train, y_test = train_test_split(df, label, test_size=.5) # classifier=tree.DecisionTreeClassifier() # classifier.fit(x_train,y_train) # predictions=classifier.predict(x_test) classifier = MLPClassifier() classifier.fit(x_train, y_train) predictions = classifier.predict(x_test) print("Accuracy:", accuracy_score(y_test, predictions)) pickle.dump(classifier, open("model.model", 'wb')) print( "Training completed. \nModel dumped succesfully..\n -----------------------" ) ###############Evaluating################# data = pd.read_csv("ITData_eval-unlabeled.csv") data.columns = header df2 = data.drop(['Satisfaction'], axis=1)
if mode == 'train': print "training" obj = ExerciseDataProvider(".") X = obj.x[:,0:125] y = obj.t Xt = obj.xt[:,0:125] yt = obj.tt print "input vec shape: ", X.shape # print y.shape # print X.shape[-1] clf_t = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(X.shape[-1], 19), random_state=1, spectral_mode='fft') clf_t.fit(X, y) with open('/afs/inf.ed.ac.uk/user/s12/s1235260/model_spec3.pkl', 'wb') as m: p.dump((clf_t, Xt, yt) , m) else: with open('/afs/inf.ed.ac.uk/user/s12/s1235260/model_spec3.pkl', 'rb') as m: clf, Xt, yt = p.load(m) y2 = clf.predict(Xt) print clf.coefs_[0].shape #.shape print y2, yt print len(y2), len(yt) acc = sum(y2==yt) / float(len(y2)) print acc #""" else:
from sklearn.neural_network.multilayer_perceptron import MLPClassifier from sklearn import datasets from sklearn.metrics import accuracy_score iris = datasets.load_iris() data = iris.data labels = iris.target # We add max_iter=1000 becaue the default is max_iter=200 and # it is not enough for full convergence mlp = MLPClassifier(random_state=1, max_iter=1000) mlp.fit(data, labels) pred = mlp.predict(data) print() print('Accuracy: %.2f' % accuracy_score(labels, pred))
def classify_mlp(data_path): result_path = '%s/mlp_results.txt' % os.path.abspath( os.path.join(os.path.dirname(data_path), os.path.join(os.pardir, os.pardir))) if os.path.exists(result_path): if data_path in open(result_path).read(): return True print(data_path) fname = "{}/train_labels.csv".format(data_path) if not os.path.exists(fname): return True tr_labels = np.loadtxt(fname) fname = "{}/train_embeddings.csv".format(data_path) tr_embeddings = np.loadtxt(fname) fname = "{}/val_labels.csv".format(data_path) val_labels = np.loadtxt(fname) fname = "{}/val_embeddings.csv".format(data_path) val_embeddings = np.loadtxt(fname) fname = "{}/test_labels.csv".format(data_path) te_labels = np.loadtxt(fname) fname = "{}/test_embeddings.csv".format(data_path) te_embeddings = np.loadtxt(fname) clf = MLPClassifier(random_state=2, max_iter=200000000, hidden_layer_sizes=(64, )) clf.fit(tr_embeddings, tr_labels) tr_score = clf.score(tr_embeddings, tr_labels) val_score = clf.score(val_embeddings, val_labels) te_score = clf.score(te_embeddings, te_labels) tr_predictions = clf.predict(tr_embeddings) val_predictions = clf.predict(val_embeddings) te_predictions = clf.predict(te_embeddings) tr_fscore = f1_score(tr_predictions, tr_labels, average="weighted") val_fscore = f1_score(val_predictions, val_labels, average="weighted") te_fscore = f1_score(te_predictions, te_labels, average="weighted") print("tr_score %s" % tr_score) print("val_score %s" % val_score) print("te_score %s" % te_score) with open(result_path, mode='a') as f: f.write( 'Data Path: %s\tTrain Accuracy:%s\tVal Accuracy:%s\tTest Accuracy:%s\tTrain FScore:%s\tVal FScore:%s\tTest FScore:%s\n' % (data_path, tr_score, val_score, te_score, tr_fscore, val_fscore, te_fscore)) conf_mat = confusion_matrix(te_labels, te_predictions) labels = sorted(list(set(list(te_labels)))) plot_confusion_matrix(conf_mat, classes=labels, normalize=True, title='Normalized confusion matrix', output=data_path, path_name='mlp_confusion_matrix', alg='mlp')
X = X.astype('float32') #splitting Dataset into Training and Testing dataset #First 60k instances are for Training and last 10k are for testing X_train, X_test = X[:60000], X[60000:] y_train, y_test = y[:60000], y[60000:] #Normalizing Our Features in range 0 and 1 X_train = X_train / 255 X_test = X_test / 255 #creating Neural Network # Neural Network has one hidden layer with 240 units # Neural NetWork is of size 784-240-10 mlp = MLPClassifier(hidden_layer_sizes=(240), max_iter=500, verbose=True) #fitting our model mlp.fit(X_train, y_train) ''' Final Output: Iteration 33, loss = 0.00299869 ''' print("Training set score: %f" % mlp.score(X_train, y_train)) #output : 0.99 print("Test set score: %f" % mlp.score(X_test, y_test)) #output :0.98 #saving our model joblib.dump(mlp, "model.pkl")
print(type(data)) print(type(row)) print(type(col)) fea_data_set=csr_matrix((data,(row,col)),shape=(row_index,max_col+1)) svd=TruncatedSVD(30) svd.fit(fea_data_set) x_new=svd.fit_transform(fea_data_set) # pca=PCA(n_components=30) # pca.fit(fea_data_set) # x_new=pca.transform(fea_data_set) xtrain,xtest,ytrain,ytest=train_test_split(x_new,label,test_size=0.2) lg.fit(xtrain,ytrain) nb.fit(xtrain,ytrain) forest.fit(xtrain,ytrain) SVM.fit(xtrain,ytrain) mlp.fit(xtrain,ytrain) print("------------") print(lg.score(xtest,ytest)) print(np.mean(lg.predict(xtest)-ytest)**2) print(lg.score(xtrain,ytrain)) print(np.mean(lg.predict(xtrain)-ytrain)**2) print("------------") print(nb.score(xtest,ytest)) print(np.mean(nb.predict(xtest)-ytest)**2) print(forest.score(xtest,ytest)) print(np.mean((forest.predict(xtest)-ytest)**2)) print(SVM.score(xtest,ytest)) print(np.mean((SVM.predict(xtest)-ytest)**2)) print(mlp.score(xtest,ytest)) print(np.mean((mlp.predict(xtest)-ytest)**2)) #训练了4个模型,分别是测试集为80%,70%,50%,30%的效果
from sklearn.preprocessing import StandardScaler scaler = StandardScaler() #fit only to the training data scaler.fit(X) StandardScaler(copy=True, with_mean=True, with_std=True) #now apply the transformations to the data: x_train_nn = scaler.transform(X) x_test_nn = scaler.transform(X_test) nn = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) print(nn.fit(x_train_nn, y)) print('Neural network model:') nn_pred_test = nn.predict(x_test_nn) #compute confusion matrix from sklearn import metrics #pred_obj = np.where(predictions==predictions[0],'N','Y') #print(pred_obj) cnf_matrix = metrics.confusion_matrix(y_test, nn_pred_test) print(cnf_matrix) #compute roc cureve import matplotlib.pyplot as plt y_pred_proba = nn.predict_proba(X_test)[::, 1] y_binary = np.where(y == 'N', 0, 1) fpr, tpr, _ = metrics.roc_curve(y_binary, y_pred_proba)
def classify(data_path, path=None, counter=None, alg='svm'): out = os.path.join(data_path, '%s_%s_%s' % (alg, path, 'confusion.png')) if os.path.exists(out): return True fname = "{}/labels.csv".format(data_path) paths = pd.read_csv(fname, header=None).as_matrix()[:, 1] paths = map(os.path.basename, paths) # Get the filename. # Remove the extension. paths = map(lambda x: x.split(".")[0], paths) paths = np.array(map(lambda path: os.path.splitext(path)[0], paths)) fname = "{}/reps.csv".format(data_path) rawEmbeddings = pd.read_csv(fname, header=None).as_matrix() # print(rawEmbeddings.shape, paths.shape) folds = cross_validation.KFold(n=len(rawEmbeddings), random_state=1, n_folds=10, shuffle=True) scores = [] fscores_weighted, fscores_macro, fscores_micro = [], [], [] for idx, (train, test) in enumerate(folds): print idx, alg if alg == 'knn': clf = neighbors.KNeighborsClassifier(1) elif alg == 'svm': clf = svm.SVC(kernel='linear', C=1, max_iter=200000000) # clf = svm.LinearSVC() # clf = svm.SVC(kernel="poly", degree=5, C=1, verbose=10) elif alg == 'nn': # clf = MLPClassifier(random_state=2, max_iter=200000000) clf = MLPClassifier(random_state=2, max_iter=200000000, hidden_layer_sizes=(96, 64, 32)) elif alg == 'nnd': # clf = MLPClassifier(random_state=2, max_iter=200000000) clf = MLPClassifier(random_state=2, max_iter=200000000) elif alg == 'poly': clf = svm.SVC(kernel="poly", max_iter=200000000) elif alg == 'rf': clf = RandomForestClassifier() clf.fit(rawEmbeddings[train], paths[train]) gc.collect() score = clf.score(rawEmbeddings[test], paths[test]) # print score, alg scores.append(score) prediction = clf.predict(rawEmbeddings[test]) fscore_weighted = f1_score(paths[test], prediction, average="weighted") fscores_weighted.append(fscore_weighted) fscore_macro = f1_score(paths[test], prediction, average="macro") fscores_macro.append(fscore_macro) fscore_micro = f1_score(paths[test], prediction, average="micro") fscores_micro.append(fscore_micro) accuracy_dir = os.path.abspath( os.path.join(data_path, 'accuracies_%s.txt' % alg)) with open(accuracy_dir, "wb") as file: for i in scores: file.writelines("%s,%s\n" % (str(i), str(counter))) # print "KNN Avg. score %s" % (reduce(operator.add, scores) / len(folds)) # print "MLP Avg. score %s" % (reduce(operator.add, scores3) / len(folds)) print "Avg. score %s" % (reduce(operator.add, scores) / len(folds)), data_path result_path = "{}/{}_{}.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, scores) / len(folds))), str(counter), alg)) fscores_weighted_result_path = "{}/{}_{}_fscores_weighted.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(fscores_weighted_result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, fscores_weighted) / len(folds))), str(counter), alg)) fscores_macro_result_path = "{}/{}_{}_fscores_macro.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(fscores_macro_result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, fscores_macro) / len(folds))), str(counter), alg)) fscores_micro_result_path = "{}/{}_{}_fscores_micro.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(fscores_micro_result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, fscores_micro) / len(folds))), str(counter), alg))