def neuralNetPredicter(train, target, test): clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(244,50,), random_state=1) clf.activation='logistic' clf.verbose=True clf.fit(train, target) predictions = clf.predict_proba(test) return predictions
def test_unsupported_activation(): with warnings.catch_warnings(): warnings.simplefilter("ignore") model = MLPClassifier(hidden_layer_sizes=(2, ), max_iter=10) model.fit([[1.0]], [True]) with pytest.raises(Exception) as ex: model.activation = 'fake22' emlearn.convert(model) assert 'Unsupported activation' in str(ex.value) assert 'fake22' in str(ex.value)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=10) XtrS, params = ml.rescale(X_train) Xvas, _ = ml.rescale(X_test, params) from imblearn.under_sampling import RandomUnderSampler rus = RandomUnderSampler(random_state=42) X_res, y_res = rus.fit_resample(XtrS, y_train) mlp = MLPClassifier(solver='sgd', max_iter=2000) mlp.hidden_layer_sizes = (100, 100, 100) mlp.activation = 'logistic' mlp.learning_rate_init = 0.1 mlp.learning_rate = 'adaptive' mlp.verbose = True mlp.fit(X_res, y_res) print(mlp.score(Xvas, y_test)) Xte = np.genfromtxt( 'C:\\Users\\radad\\OneDrive\\Desktop\\cs178\\CS178-Kaggle-Competition\\X_test.txt', delimiter=None) Yte = np.vstack((np.arange(Xte.shape[0]), mlp.predict_proba(Xte)[:, 1])).T np.savetxt( 'C:\\Users\\radad\\OneDrive\\Desktop\\cs178\\CS178-Kaggle-Competition\\Y_submit.txt', Yte,
[maxIter - 100, maxIter - 50, maxIter, maxIter + 50, maxIter + 100] } CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, n_jobs=-1) CV_rfc.fit(X_train, y_train) print(CV_rfc.best_params_) print("--- %s seconds ---" % (time.time() - start_time)) # In[98]: # Intializing the parameters to fit the classifier on test dataset hlSize = CV_rfc.best_params_['hidden_layer_sizes'] maxIter = CV_rfc.best_params_['max_iter'] start_time = time.time() clf = MLPClassifier(solver='lbfgs') clf.hidden_layer_sizes = (hlSize[0], ) clf.activation = 'relu' clf.max_iter = maxIter clf.fit(X_train, y_train) print("--- %s seconds ---" % (time.time() - start_time)) # In[99]: y_pred = clf.predict(X_test) print(y_pred) # In[101]: #Import scikit-learn metrics module for accuracy calculation from sklearn import metrics # Model Accuracy, how often is the classifier correct?
def neural_network(df_images, df_labels, no_of_neurons): '''This function build the neural network and then delegate the accuracy calculation. It also plots the graphs''' classifier = MLPClassifier(solver="adam", verbose=True, early_stopping=False, max_iter=1000) classifier.alpha = 0.05 classifier.hidden_layer_sizes = (no_of_neurons, ) classifier.activation = "relu" classifier.learning_rate_init = 0.1 # splitting the dataset train_X, test_X, train_y, test_y = train_test_split( df_images, df_labels, test_size=0.2, random_state=1, ) # fit the model classifier.fit(train_X, train_y) # for calculating accuracy manually print('\n------------------------accuracies for ' + str(no_of_neurons) + ' neurons----------------------------\n') train_accuracy, test_accuracy = calculate_accuracy(classifier, test_X, test_y, train_X, train_y) print( '\n---------------------Class wise accuracies-------------------------\n' ) classwise_accuracy(classifier, test_X, test_y, train_X, train_y) # plotting the loss curve vs iterations loss_values = classifier.loss_curve_ plt.plot(loss_values) plt.title('loss vs iterations for : ' + str(classifier.learning_rate_init) + ' learning rate and no of neurons ' + str(no_of_neurons)) plt.xlabel('iterations') plt.ylabel('loss') plt.show() # save the model to disk current_date_time = time.strftime("%d/%m/%Y") + '_' + time.strftime( "%H:%M:%S") filename = 'model_for_' + str( no_of_neurons) + '_' + current_date_time + '.sav' joblib.dump(classifier, filename.replace( '/', '_')) # saving the classifier using joblib library print('\nmodel saved in file ' + filename + '\n') # writing on csv file for reporting row_to_write = [] row_to_write.append([ filename, str(no_of_neurons), str(train_accuracy), str(test_accuracy) ]) with open('report_regul_mnist.csv', 'a') as writeFile: writer = csv.writer(writeFile) writer.writerows(row_to_write) # printing train set classwise accuracy # classwise_accuracy(classifier, test_X, test_y, train_X, train_y) compute_confusion_matrix(classifier, df_labels, test_X, test_y)
def neuralnet(dataset=None, nn_settings=None, train=False, save_settings=True, print_acc=False): """ The following function is a wrapper function that will use a neural network in order to best estimate the protein count of the input images. The solver used in this neural network is L-BFGS which is a quasi-Newton method that is theoretically and experimentally verified to have a faster convergence and works well with low- dimensional input. L-BFGS does not work well in other settings because of its high memory requirement and lack of use of minibatches. There are other methods that can be tried (Adam, SGD BatchNorm , Adadelta, Adagrad), but L-BFGS was the best choice for this application and for the time constraint given in producing this package. Parameters ---------- dataset : array[object] : dataset to use neural network on nn_settings : object : classifier data train : bool : decide on whether to train or test save_settings : bool : decide on whether to save classifier data to 'data' folder print_acc : bool : passes bool to accuracy() to tell function to print accuracy Return ------ count : list : The counts of the input dataset """ if dataset is None: dataset = [] count = None acc = None train_x = None train_y = None value = None lrn_dataset = None lrn_cnt = None if not train: save_settings = False assert (nn_settings is not None or train), "Neural Network should be training " \ "if there is no settings inputed" count = [] acc = [] if nn_settings is None: # using lbfgs (explained in docstring) classifier = MLPClassifier(solver="lbfgs") # 1 hidden layer with 100 hidden units classifier.hidden_layer_sizes = (100, ) # Using a tanh activation classifier.activation = "tanh" else: classifier = nn_settings train_x = [] train_y = [] value = [] lrn_dataset = [] lrn_cnt = [] for k in range(len(dataset)): value.append([]) for i in range(len(dataset[k].heights) // 3): value[k].append(dataset[k].heights[i * 3]) for k in range(len(dataset)): lrn_dataset.append([]) lrn_cnt.append(dataset[k].count) for ht_val in value[k]: lrn_dataset[k].append(ht_val[0]) train_x = lrn_dataset train_y = lrn_cnt if train: classifier.fit(train_x, train_y) if save_settings: save_objects([classifier], name='classifier_info.dat') count = classifier.predict(train_x) acc.append(accuracy(train_x, count, classifier, output=print_acc)) if train: print("Training complete") return return count
import os from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score, classification_report from loading_dataset import load_dataset import matplotlib.pyplot as plt from random import shuffle images_test, labels_test = load_dataset("../training_dataset/", 160) data = images_test.reshape(len(images_test), -1) classifier = MLPClassifier(solver="sgd") classifier.activation = "relu" classifier.learning_rate_init = 0.001 classifier.learning_rate = "adaptive" print(classifier) classifier.fit(data, labels_test) images_test, labels_test = load_dataset("../test_dataset/", 160, False) data_test = images_test.reshape(len(images_test), -1) expected = labels_test predicted = classifier.predict(data_test) accuracy = accuracy_score(expected, predicted) print("****************** average_score : " + str(accuracy))
#! /usr/bin/env python3 import pandas as pd import matplotlib.pyplot as plt import numpy as np from sklearn.neural_network import MLPClassifier #import snips X = pd.read_csv('HandWrittenDigit.csv') y = pd.read_csv('Labels.csv') classifier = MLPClassifier(solver='sgd') classifier.hidden_layer_size = (40, ) classifier.activation = 'logistic' classifier.fit(X, y) plt.plot(classifier.loss_curve_) plt.show()