class BCISignal(): def __init__(self, fs, bands, ch_names, states_labels, indexes): self.states_labels = states_labels self.bands = bands self.prefilter = FilterSequence([ButterFilter((0.5, 45), fs, len(ch_names))]) self.csp_pools = [SpatialDecompositionPool(ch_names, fs, bands, 'csp', indexes) for _label in states_labels] self.csp_transformer = None self.var_detector = InstantaneousVarianceFilter(len(bands)*len(indexes)*len(states_labels), n_taps=fs//2) self.classifier = MLPClassifier(hidden_layer_sizes=(), early_stopping=True, verbose=True) #self.classifier = RandomForestClassifier(max_depth=3, min_samples_leaf=100) def fit(self, X, y=None): X = self.prefilter.apply(X) for csp_pool, label in zip(self.csp_pools, self.states_labels): csp_pool.fit(X, y == label) self.csp_transformer = FilterStack([pool.get_filter_stack() for pool in self.csp_pools]) X = self.csp_transformer.apply(X) X = self.var_detector.apply(X) self.classifier.fit(X, y) print('Fit accuracy {}'.format(sum(self.classifier.predict(X) == y)/len(y))) def apply(self, chunk: np.ndarray): chunk = self.prefilter.apply(chunk) chunk = self.csp_transformer.apply(chunk) chunk = self.var_detector.apply(chunk) predicted_labels = self.classifier.predict(chunk) return predicted_labels
def mlp_cv_architecture(X,Y): kfold = KFold(X.shape[0], n_folds = 10) architectures = ( (500,2), (400,2), (400,100,2), (400,200,2), (400,100,50,2), (400,200,50,2) ) res_dict = {} for architecture in architectures: mlp = MLPClassifier( algorithm = 'sgd', learning_rate = 'adaptive', hidden_layer_sizes = architecture, random_state = 1) train_times = [] train_accuracy = [] test_accuracy = [] for train, test in kfold: t_tr = time.time() mlp.fit( X[train], Y[train] ) train_times.append( time.time() - t_tr ) acc_train = np.sum( np.equal( mlp.predict( X[train]), Y[train] ) ) / float(X[train].shape[0]) acc_test = np.sum( np.equal( mlp.predict( X[test]), Y[test] ) ) / float(X[test].shape[0]) train_accuracy.append( acc_train ) test_accuracy.append( acc_test ) res_dict[str(architecture)] = (np.mean(train_accuracy), np.std(train_accuracy), np.mean(test_accuracy), np.std(test_accuracy), np.mean(train_times), np.std(train_times)) with open('./../results/res_nncv_architecture.pkl', 'w') as f: pickle.dump(res_dict,f)
def naviBayes(train_X, train_y, test_X, test_y): # print train_y # print test_y # model = tfMultyPerceptron(train_X, train_y, test_X, test_y) # model.run() time_start = time.time() model = MLPClassifier(hidden_layer_sizes=(128, 32, 32, 128), max_iter=100, early_stopping=False, learning_rate_init=0.001, verbose=True) # model = MultinomialNB() # model = BernoulliNB() # model = KNeighborsClassifier() # model = DecisionTreeClassifier(max_depth=20, min_samples_leaf=0.01) # model = LinearSVC(random_state=0) # model.fit(X, y) model.fit(train_X, train_y) # model_1.fit(train_X, train_y) # model_2.fit(train_X, train_y) # model_3.fit(train_X, train_y) # model_4.fit(train_X, train_y) # model_5.fit(train_X, train_y) # All_model = [model, model_1, model_2, model_3, model_4, model_5] # train_pre = predct_all(All_model, train_X, train_y) # test_pre = predct_all(All_model, test_X, test_y) time_end = time.time() print "perceptron training cost time:{}".format(time_end - time_start) # model = OneVsRestClassifier(SVC(kernel='linear')) # model.fit(train_X, train_y) # save with open(config.BTMData + 'BayesModel/BTM_perceptron.model', 'wb') as fp: cPickle.dump(model, fp) # load model # model = None # with open(config.BTMData + 'BayesModel/bayes_BTM.model', 'rb') as fp: # model = cPickle.load(fp) # print 'train data set size:', len(train_y) # result = metrics.accuracy_score(train_pre, train_y) # 返回各自文本的所被分配到的类索引 # print"Predicting random boost train result: ", result # print 'train data set size:', len(train_y) # result = metrics.accuracy_score(test_pre, test_y) # 返回各自文本的所被分配到的类索引 # print "Predicting random boost test result:", result print 'train data set size:', len(train_y) result = model.score(train_X, train_y) # 返回各自文本的所被分配到的类索引 print"Predicting train result: ", result test_result = model.score(test_X, test_y) print "Predicting test set result: ", test_result top_train_result = model.predict_proba(train_X) print "top 3 predict train data accuracy rate: {}".format(cal_topThreeScore(model, top_train_result, train_y)) top_test_result = model.predict_proba(test_X) print "top 3 predict test data accuracy rate: {}".format(cal_topThreeScore(model, top_test_result, test_y))
class NeuralLearner(Learner.Learner): def __init__(self, FeatureMask): super(NeuralLearner, self).__init__(FeatureMask) self.expected = FeatureMask.LabelsForAllPoints #self.model = MLPClassifier(algorithm='sgd', hidden_layer_sizes=(64,32)) self.model = MLPClassifier(algorithm = 'sgd', learning_rate = 'constant', momentum = .9, nesterovs_momentum = True, learning_rate_init = 0.2) def FitAndPredict(self, mask): return self.Predict(self.Fit(mask)) def SetupInputActivations(self, FeatureMask): arr = np.hstack([FeatureMask.ForceStd.reshape(-1,1), FeatureMask.ForceMinMax.reshape(-1,1), FeatureMask.CannyFilter.reshape(-1,1)]) expected = FeatureMask.LabelsForAllPoints return arr, expected def Fit(self, mask): arr, expected = self.SetupInputActivations(mask) self.model.fit(arr, expected) def Predict(self, mask): arr, expected = self.SetupInputActivations(mask) return self.model.predict(arr).reshape(-1,1)
def train_on_source(X,Y): print "Start Learning Net on source" clf = MLPClassifier( algorithm = 'l-bfgs', alpha = 1e-5, hidden_layer_sizes = (500,2), random_state = 1, warm_start = 1, max_iter = 400) clf.fit(X,Y) #new_loss = 0 #old_loss = 10000 #for step in range(200): # clf.fit(X,Y) # new_loss = clf.loss_ # # stop training, if improvement is small # improvement = abs(new_loss - old_loss) # print "Step:", step, "Loss:", new_loss, "Improvement:", improvement # if improvement < 1.e-5: # print "Training converged!" # break # old_loss = new_loss print "Pretrained CLF on Source with num_iter:", clf.n_iter_ return clf
def main(): enc = OneHotEncoder(n_values=[7,7,7,7,7,7]) conn = sqlite3.connect('server.db') cursor = conn.cursor() all_ = pandas.read_sql_query('SELECT layers.burger, labels.output, layers.layer0, layers.layer1, layers.layer2, layers.layer3, layers.layer4, layers.layer5 FROM layers,labels WHERE layers.burger = labels.burger', conn, index_col='burger') X = all_.drop(['output'], axis=1) y = all_['output'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) clf = MLPClassifier(solver='adam', activation='relu', verbose=False, max_iter=10000, tol=1e-9, random_state=1) X_train_categoricals = X_train[column_names] tX_train_categoricals = enc.fit_transform(X_train_categoricals) clf.fit(tX_train_categoricals, y_train.as_matrix().astype(int)) X_test_categoricals = X_test[column_names] tX_test_categoricals = enc.fit_transform(X_test_categoricals) prediction = clf.predict(tX_test_categoricals) print(classification_report(y_test, prediction)) print_eval(y_test, prediction)
def neuralNetworkIteration(): import pydotplus a,b,c,d,e,f = traing_test_data_set(); alphalist = [.00001,.00003,.0001,.0003,.001,.003,.01,.03,1,10] for feature_number in range(1, 2): print("Feature Number : " + str(feature_number)); train_data, train_label = a[feature_number - 1], b[feature_number - 1]; test_data, test_label = c[feature_number - 1], d[feature_number - 1]; validation_data,validation_label = e[feature_number-1],f[feature_number-1]; for new_alpha in alphalist: iteration_output = "Iteration,Training Error,Validation Error\n"; from sklearn.neural_network import MLPClassifier clf = MLPClassifier(alpha=new_alpha, hidden_layer_sizes=(200,), random_state=1, activation='logistic', warm_start=True,max_iter=1); for iteration in range(1,500): clf.fit(train_data, train_label) prediction = clf.predict(validation_data); from sklearn.metrics import accuracy_score iteration_output+=str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0)); iteration_output+="\n"; print(str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0))) file_name = "For All Feature. Alpha = "+str(new_alpha)+" "+" Iteration data"+".csv"; print(file_name); datafile = open(file_name,"w",encoding="utf-8"); datafile.write(iteration_output); datafile.close();
def neuralNetwork(): import pydotplus a,b,c,d,e,f = traing_test_data_set(); for feature_number in range(1, 6): print("Feature Number : " + str(feature_number)); train_data, train_label = a[feature_number - 1], b[feature_number - 1]; test_data, test_label = c[feature_number - 1], d[feature_number - 1]; validation_data,validation_label = e[feature_number-1],f[feature_number-1]; from sklearn.neural_network import MLPClassifier clf = MLPClassifier(solver='lbfgs', alpha=.003, hidden_layer_sizes=(10,), random_state=1, activation='relu') clf.fit(train_data, train_label) tot = len(test_label); cnt = 0; prediction = clf.predict(test_data); for i in range(0, len(test_data)): if clf.predict([test_data[i]])[0] != test_label[i]: # print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i])); cnt += 1; from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score from sklearn.metrics import f1_score print("Complete for Feature :" + str(feature_number)); print("Train Score : " + str(clf.score(train_data, train_label))); print("Total test set size : " + str(len(test_label))); print("Correct prediction : " + str(tot - cnt)); print("Incorrect Prediction : " + str(cnt)); print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0)) print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0)) print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0)) print("Error Rate : " + str(cnt / tot * 100.0)); print("---------------------------------------\n");
def neuralNetworkIterationLogistic(): import pydotplus a,b,c,d,e,f = traing_test_data_set(); for feature_number in range(1, 6): iteration_output = "Iteration,Training Error,Validation Error\n"; print("Feature Number : " + str(feature_number)); train_data, train_label = a[feature_number - 1], b[feature_number - 1]; test_data, test_label = c[feature_number - 1], d[feature_number - 1]; validation_data,validation_label = e[feature_number-1],f[feature_number-1]; from sklearn.neural_network import MLPClassifier clf = MLPClassifier(alpha=1, hidden_layer_sizes=(15,), random_state=1, activation='logistic', warm_start=True,max_iter=1); for iteration in range(1,350): clf.fit(train_data, train_label) tot = len(validation_data); cnt = 0; prediction = clf.predict(validation_data); for i in range(0, len(validation_data)): if clf.predict([validation_data[i]])[0] != validation_label[i]: # print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i])); cnt += 1; from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score from sklearn.metrics import f1_score iteration_output+=str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0)); iteration_output+="\n"; print(str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0))) file_name = "Feature No "+str(feature_number)+" Iteration data"+".csv"; print(file_name); datafile = open(file_name,"w",encoding="utf-8"); datafile.write(iteration_output); datafile.close();
def train(): utl.print_title('Getting data...') X, Tc, X_test, Tc_test = dpp.getdata_arnold() #X, Tc, X_test, Tc_test = dpp.getdata_mnist() utl.print_title('Preparing data...') X, X_test = dpp.scale_data(X, X_test) T = dpp.one_hot_encode(Tc) T_test = dpp.one_hot_encode(Tc_test) utl.print_title('Sanity checks...') print('Shape X:', X.shape) print('Shape Tc:', Tc.shape) print('Shape T:', T.shape) print('Shape X_test:', X_test.shape) print('Shape Tc_test:', Tc_test.shape) print('Shape T_test:', T_test.shape) utl.print_title('Training the network...') classifier = MLPClassifier(solver='adam', learning_rate_init=1e-3, hidden_layer_sizes=(100), verbose=True, max_iter=200) classifier.fit(X, T) train_score, Pc = get_results(classifier, X, T) test_score, Pc_test = get_results(classifier, X_test, T_test) utl.print_title('Results:') print('Classification counts train (target): ', np.bincount(Tc.reshape(-1))) print('Classification counts train (prediction): ', np.bincount(Pc)) print('\nClassification counts test (target): ', np.bincount(Tc_test.reshape(-1))) print('Classification counts test (prediction): ', np.bincount(Pc_test)) print('\nTrain score: ', train_score) print('Test score: ', test_score)
def test_gradient(): # Test gradient. # This makes sure that the activation functions and their derivatives # are correct. The numerical and analytical computation of the gradient # should be close. for n_labels in [2, 3]: n_samples = 5 n_features = 10 X = np.random.random((n_samples, n_features)) y = 1 + np.mod(np.arange(n_samples) + 1, n_labels) Y = LabelBinarizer().fit_transform(y) for activation in ACTIVATION_TYPES: mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10, solver='lbfgs', alpha=1e-5, learning_rate_init=0.2, max_iter=1, random_state=1) mlp.fit(X, y) theta = np.hstack([l.ravel() for l in mlp.coefs_ + mlp.intercepts_]) layer_units = ([X.shape[1]] + [mlp.hidden_layer_sizes] + [mlp.n_outputs_]) activations = [] deltas = [] coef_grads = [] intercept_grads = [] activations.append(X) for i in range(mlp.n_layers_ - 1): activations.append(np.empty((X.shape[0], layer_units[i + 1]))) deltas.append(np.empty((X.shape[0], layer_units[i + 1]))) fan_in = layer_units[i] fan_out = layer_units[i + 1] coef_grads.append(np.empty((fan_in, fan_out))) intercept_grads.append(np.empty(fan_out)) # analytically compute the gradients def loss_grad_fun(t): return mlp._loss_grad_lbfgs(t, X, Y, activations, deltas, coef_grads, intercept_grads) [value, grad] = loss_grad_fun(theta) numgrad = np.zeros(np.size(theta)) n = np.size(theta, 0) E = np.eye(n) epsilon = 1e-5 # numerically compute the gradients for i in range(n): dtheta = E[:, i] * epsilon numgrad[i] = ((loss_grad_fun(theta + dtheta)[0] - loss_grad_fun(theta - dtheta)[0]) / (epsilon * 2.0)) assert_almost_equal(numgrad, grad)
def mlp_train(self,x_train,y_train): scaler = StandardScaler() scaler.fit(x_train) x_train = scaler.transform(x_train) clf = MLPClassifier(max_iter=500,alpha=1e-5,hidden_layer_sizes=(40,100,80),warm_start=True,random_state=0) clf.fit(x_train,y_train) return clf
def test_tolerance(): # Test tolerance. # It should force the solver to exit the loop when it converges. X = [[3, 2], [1, 6]] y = [1, 0] clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd') clf.fit(X, y) assert_greater(clf.max_iter, clf.n_iter_)
def test_adaptive_learning_rate(): X = [[3, 2], [1, 6]] y = [1, 0] clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd', learning_rate='adaptive') clf.fit(X, y) assert_greater(clf.max_iter, clf.n_iter_) assert_greater(1e-6, clf._optimizer.learning_rate)
def train(classes, y_samples, feature_dict, classes_dict): # Using dev version of slearn, 1.9 from sklearn.neural_network import MLPClassifier clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(50, 25), random_state=1, verbose=True) clf.fit(y_samples, classes) return clf
def fitMLPs(trainIndexes, datasets): classifiers = [] for (x,y) in datasets: cl = MLPClassifier(algorithm='l-bfgs', alpha=1e-4, hidden_layer_sizes=(76, 30), random_state=1, momentum=0.8) data, target = listToData(trainIndexes, x, y) cl.fit(data, target) classifiers.append(cl) return classifiers
def main(): iris = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target) classifier = MLPClassifier(max_iter=1000) classifier.fit(X_train, y_train) s = classifier.score(X_test, y_test) print(s)
def do_mlp(x_train, x_test, y_train, y_test): clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes = (10, 4), random_state = 1) clf.fit(x_train, y_train) y_pred = clf.predict(x_test) print(classification_report(y_test, y_pred))
def do_mlp(x_train, x_test, y_train, y_test): #mlp clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) clf.fit(x_train, y_train) y_pred = clf.predict(x_test) do_metrics(y_test,y_pred)
def test2(): X = [[0., 0.], [1., 1.]] y = [0, 1] clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(3), random_state=1, activation='relu') clf.fit(X,y) test_sample = [[2., 2.], [-1., -2.]] print clf.predict(test_sample) print clf.predict_proba(test_sample) output_mlp(clf)
def mlpTest(self): mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=1000, alpha=1e-4, solver ='sgd', verbose=10, tol=1e-4, random_state=1) mlp.fit(self.X_train,self.Y_train) predicted = mlp.predict(self.X_test) print("Classification report for classifier %s:\n%s\n" % (mlp, metrics.classification_report(self.Y_test, predicted))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(self.Y_test, predicted))
def fit_and_score_ann(x_train, y_train, x_test, y_test, config): ann = MLPClassifier(solver=config.ann.solver, max_iter=Configuration.ANN_MAX_ITERATIONS, alpha=config.ann.alpha, hidden_layer_sizes=(config.ann.hidden_neurons,), learning_rate='adaptive') ann.fit(x_train, y_train) return ann.score(x_test, y_test)
def MLP_classifier(train_x, train_y): clf = MLPClassifier(activation='relu', algorithm='adam', alpha=0.0001, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=True, epsilon=1e-08, hidden_layer_sizes=([50,50]), learning_rate='constant', learning_rate_init=0.01, max_iter=3000, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=0, shuffle=True, validation_fraction=0.1, verbose=False, warm_start=False) clf.fit(train_x, train_y) return clf
def test_early_stopping_stratified(): # Make sure data splitting for early stopping is stratified X = [[1, 2], [2, 3], [3, 4], [4, 5]] y = [0, 0, 0, 1] mlp = MLPClassifier(early_stopping=True) with pytest.raises( ValueError, match='The least populated class in y has only 1 member'): mlp.fit(X, y)
def do_mlp(x_train, x_test, y_train, y_test): # Building deep neural network clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes = (5, 2), random_state = 1) clf.fit(x_train, y_train) y_pred = clf.predict(x_test) print(classification_report(y_test, y_pred)) print metrics.confusion_matrix(y_test, y_pred)
def neuralNetwork(): import pydotplus a,b,c,d,e,f = traing_test_data_set(); for feature_number in range(1, 2): print("Feature Number : " + str(feature_number)); train_data, train_label = a[feature_number - 1], b[feature_number - 1]; test_data, test_label = c[feature_number - 1], d[feature_number - 1]; validation_data,validation_label = e[feature_number-1],f[feature_number-1]; from sklearn.preprocessing import StandardScaler scaler = StandardScaler(); scaler.fit(train_data); train_data = scaler.transform(train_data); test_data = scaler.transform(test_data); validation_data = scaler.transform(validation_data); from sklearn.neural_network import MLPClassifier clf = MLPClassifier(alpha=1, hidden_layer_sizes=(100,), random_state=1, activation='logistic', max_iter=1000); clf.fit(train_data, train_label) tot = len(test_label); cnt = 0; prediction = clf.predict(test_data); for i in range(0, len(test_data)): if prediction[i] != test_label[i]: print(str(i)+str(prediction[i])+" "+str(test_label[i])); cnt += 1; from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score from sklearn.metrics import f1_score print("Complete for Feature :" + str(feature_number)); print("Train data set size : " + str(len(train_data))); print("Train Score : " + str(clf.score(train_data, train_label))); print("Total test set size : " + str(len(test_label))); print("Correct prediction : " + str(tot - cnt)); print("Incorrect Prediction : " + str(cnt)); print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0)) print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0)) print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0)) print("Error Rate : " + str(cnt / tot * 100.0)); print("---------------------------------------\n"); tot = len(validation_label); cnt = 0; prediction = clf.predict(validation_data); for i in range(0, len(validation_label)): if prediction[i] != validation_label[i]: print(str(i)+str(prediction[i])+" "+str(validation_label[i])); cnt += 1; print("Total validation set size : " + str(len(validation_label))); print("Correct prediction : " + str(tot - cnt)); print("Incorrect Prediction : " + str(cnt)); print("Accuracy : " + str(accuracy_score(validation_label, prediction) * 100.0)) print("Precision : " + str(precision_score(validation_label, prediction, average='weighted') * 100.0)) print("F1 Score : " + str(f1_score(validation_label, prediction, average='weighted') * 100.0)) print("Error Rate : " + str(cnt / tot * 100.0)); print("---------------------------------------\n");
def neural_network_voting_systemLogistic(): import pydotplus a,b,c,d,e,f = traing_test_data_set(); iterations = [75, 60, 90, 95, 95]; voting_pred = list(); for i in range(0, len(d[0])): voting_pred.append([]); import random for feature_number in range(1, 6): print("Feature Number : " + str(feature_number)); train_data, train_label = a[feature_number - 1], b[feature_number - 1]; test_data, test_label = c[feature_number - 1], d[feature_number - 1]; # use feature scaling for rbf kernel # from sklearn.preprocessing import StandardScaler # scaler = StandardScaler(); # scaler.fit(train_data); # train_data = scaler.transform(train_data); # test_data = scaler.transform(test_data); #rnd = list(zip(train_data,train_label)); #random.shuffle(rnd); #train_data, train_label = zip(*rnd) from sklearn.preprocessing import StandardScaler scaler = StandardScaler(); scaler.fit(train_data); train_data = scaler.transform(train_data); test_data = scaler.transform(test_data); from sklearn.neural_network import MLPClassifier clf = MLPClassifier(alpha=1, hidden_layer_sizes=(15,), random_state=1, activation='logistic',max_iter =1000,early_stopping=False) clf.fit(train_data, train_label) tot = len(test_label); cnt = 0; print(clf.n_iter_); for i in range(0, len(test_data)): voting_pred[i].append(clf.predict([test_data[i]])[0]); tot = len(test_label); cnt = 0; prediction = list(); for i in range(0, len(test_data)): prediction.append(most_common(voting_pred[i])); if prediction[i] != test_label[i]: print(str(i) + " " + str(prediction[i]) + " " + str(test_label[i])); cnt += 1; from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score from sklearn.metrics import f1_score print("Complete for Voting system :"); print("Total test set size : " + str(len(test_label))); print("Correct prediction : " + str(tot - cnt)); print("Incorrect Prediction : " + str(cnt)); print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0)) print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0)) print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0)) print("Error Rate : " + str(cnt / tot * 100.0)); print("---------------------------------------\n");
def test_bool_and(self): x = ((0, 0), (1, 1), (1, 0), (0, 1)) y = ( 0, 1, 0, 0) mlp = MLPClassifier(hidden_layer_sizes=(), activation='logistic', max_iter=2, alpha=1e-4, algorithm='l-bfgs', verbose=False, tol=1e-4, random_state=1, learning_rate_init=.1) mlp.fit(x, y) assert mlp.predict(((0, 0))) == 0 assert mlp.predict(((0, 1))) == 0 assert mlp.predict(((1, 0))) == 0 assert mlp.predict(((1, 1))) == 1
def Neural_network(self, X_train, Y_train, X_test, Y_test): from sklearn import metrics from sklearn.neural_network import MLPClassifier modle = MLPClassifier() modle.fit(X_train, Y_train) expected = Y_test prediceted = modle.predict(X_test) ftp, tpr, thres = metrics.roc_curve(expected, prediceted) print metrics.classification_report(expected, prediceted) # print metrics.confusion_matrix(expected, prediceted) print metrics.auc(ftp, tpr)
class AnnClassifier(AbstractClassifier): def __init__(self, features, target, solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1): self.solver = solver self.alpha = alpha self.hidden_layer_sizes = hidden_layer_sizes self.random_state = random_state super(AnnClassifier, self).__init__(features, target) def __fit(self, features): self.clf = MLPClassifier(solver=self.solver, alpha=self.alpha, hidden_layer_sizes=self.hidden_layer_sizes, random_state=self.random_state) self.clf.fit(features, self.target)
while 1: with open(f_name, 'r') as f: threads = f.readlines() x_train, y_train = [], [] for t in threads: tr = list(map(int, t.replace('\n', '').split(','))) x_train.append(tr[:-1]) y_train.append(tr[-1]) x_train, y_train = np.array(x_train), np.array(y_train) zero, one = x_train[y_train == 0], x_train[y_train == 1] print('#', len(threads), 'y_train', dict(collections.Counter(y_train))) model = MLPClassifier(max_iter=100000, random_state=42) model.fit(x_train, y_train) pred_p = pred_model(model)[:, 0] Xr = X[np.logical_and(ppl[0] < pred_p, pred_p < ppl[1])] print(len(Xr), end=' ') point = get_closest(Xr, zero, one) """ grid0 = get_segments(point, min_max_X, 0) pred_p = model.predict_proba(grid0)[:, 0] Xr = grid0[np.logical_and(ppl[0] < pred_p, pred_p < ppl[1])] point = get_closest(Xr, zero, one) grid1 = get_segments(point, get_min_max(grid0), 1) pred_p = model.predict_proba(grid1)[:, 0]
# gp = GaussianProcessClassifier() # gp.fit(Xtrain, Ytrain) # # print("importances=", gp.feature_importances_) # # y_predict = gp.predict(Xtest) # print(f"Accuracy score for Gaussian Process Classifier Classifier is: {accuracy_score(Ytest, y_predict)}") # # print("GP importances=", gp.feature_importances_) # # cm = ConfusionMatrix(gp, classes=[0,1]) # cm.fit(Xtrain, Ytrain) # cm.score(Xtest, Ytest) # cm.show() # MLP Classifier ############################################# mlp = MLPClassifier() mlp.fit(Xtrain, Ytrain) # print("importances=", gp.feature_importances_) y_predict = mlp.predict(Xtest) print( f"\nAccuracy score for MLP Classifier Classifier is: {accuracy_score(Ytest, y_predict)}" ) # print("GP importances=", gp.feature_importances_) cm = ConfusionMatrix(mlp, classes=[0, 1]) cm.fit(Xtrain, Ytrain) cm.score(Xtest, Ytest) cm.show() # AdaBoostClassifier ####################################### ada = AdaBoostClassifier()
clf = svm.SVC()#calling the function clf.fit(X_train, y_train)# just does a simple fit of the data we seperated out for training pred_clf = clf.predict(X_test)# predect the test values #How the CLF model preformes print("SVM Classification") print(classification_report(y_test, pred_clf))# how the test data compares to the predected values print(confusion_matrix(y_test, pred_clf))# this give us a matrix on the mislabels between good and bad #================================= ##Neural Network #hidden layers is the nodes in the NN #Good for text based code or big data sets, picture processing #================================== #object = Classifier(how many nodes in each layer, max many iterations mlpc = MLPClassifier(hidden_layer_sizes=(11,11,11),max_iter=500) mlpc.fit(X_train, y_train) pred_mlpc = mlpc.predict(X_test) #How the NN model preformes print("Neural Network") print(classification_report(y_test, pred_mlpc))# how the test data compares to the predected values print(confusion_matrix(y_test, pred_mlpc))# this give us a matrix on the mislabels between good and bad #Score the AI from sklearn.metrics import accuracy_score #Test scrore bn = accuracy_score(y_test, pred_rfc) #Labelling code for printing dm = accuracy_score(y_test, pred_clf) #Labelling code for printing cm = accuracy_score(y_test, pred_mlpc) #Labelling code for printing print(bn, ' is the Forest score') print(dm, ' is the SVM Classification score') print(cm, ' is the Neural Network score')
def mp(X_train, y_train, X_test, hid=(100, 100)): from sklearn.neural_network import MLPClassifier knn = MLPClassifier(solver='adam', hidden_layer_sizes=hid) knn.fit(X_train, y_train) y_predict = knn.predict(X_test) return y_predict
data_for_predicting_Y_batch_2 = data_for_predicting_Y[order[(N / 3):( 2 * N / 3)], :] data_for_predicting_A_batch_2 = data_for_predicting_A[order[(N / 3):( 2 * N / 3)], :] pr_attr_batch_2 = pr_attr[order[(N / 3):(2 * N / 3)]] label_batch_2 = label[order[(N / 3):(2 * N / 3)]] data_for_predicting_Y_batch_3 = data_for_predicting_Y[order[(2 * N / 3):], :] data_for_predicting_A_batch_3 = data_for_predicting_A[order[(2 * N / 3):], :] pr_attr_batch_3 = pr_attr[order[(2 * N / 3):]] label_batch_3 = label[order[(2 * N / 3):]] #train classifiers on Batch 1 clf_for_Y.fit(data_for_predicting_Y_batch_1, label_batch_1) clf_for_A.fit(data_for_predicting_A_batch_1, pr_attr_batch_1) #make predictions on Batch 2 and 3 label_batch_2_PREDICTED = clf_for_Y.predict( data_for_predicting_Y_batch_2) label_batch_3_PREDICTED = clf_for_Y.predict( data_for_predicting_Y_batch_3) pr_attr_batch_2_PREDICTED = clf_for_A.predict( data_for_predicting_A_batch_2) pr_attr_batch_3_PREDICTED = clf_for_A.predict( data_for_predicting_A_batch_3) #run equalized odds (training on Batch 2 and predicting on Batch 3) with predicted attribute EO_PREDICTION_batch_3 = equalized_odds_pred(label_batch_2, label_batch_2_PREDICTED,
data = np.array(data) train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size = 0.1) class_weight = "balanced" print('Training Features Shape:', train_data.shape) print('Training Labels Shape:', train_labels.shape) print('Testing Features Shape:', test_data.shape) print('Testing Labels Shape:', test_labels.shape) print('Sample weights: '+ str(compute_sample_weight(class_weight=class_weight, y=train_labels))) # STEP 1 Training # try diff activation function, solver, hhidden layers, etc nn = MLPClassifier(activation="relu",solver="adam", alpha=1e-5) nn.fit(train_data, train_labels, sample_weight=compute_sample_weight(class_weight=class_weight, y=train_labels)) # STEP 2 Errors print("TRAINING ACCURACY: "+str(nn.score(train_data, train_labels))) print("TESTING ACCURACY: "+str(nn.score(test_data, test_labels))) predictions = nn.predict(test_data) conf_mat = confusion_matrix(test_labels, predictions) print(conf_mat) #print(rf.feature_importances_) # STEP 3 Save Ensemble #filename = 'LogReg_1.sav' #pickle.dump(ada, open(filename, 'wb'))
train_data.append(data[1:]) print('Loaded ' + str(len(train_label))) # step 2: PCA reduction + svm print('PCA Reduction and ANN fitting...') train_label = np.array(train_label) train_data = np.array(train_data) pca = PCA(n_components=COMPONENT_NUM, whiten=True) pca.fit(train_data) train_data = pca.transform(train_data) clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=RANOM_STATE) clf.fit(train_data, train_label) # step 3: plot PCAs into 2D plot # reference 1: https://jakevdp.github.io/PythonDataScienceHandbook/05.09-principal-component-analysis.html # ref 2: http://scikit-learn.org/0.17/auto_examples/svm/plot_iris.html def getcolor(index): if index % CIGTOTAL == 0: return 'r' elif index % CIGTOTAL == 1: return 'b' else: return 'g'
def mlp(self): mlp = MLPClassifier(C=1000, penalty='l2') mlp = mlp.fit(x_train, y_train) pred = mlp.predict(x_test) print("MLP's Accuracy score:", accuracy_score(pred, y_test)) return pred
image_size = 28 # width and length no_of_different_labels = 10 # i.e. 0, 1, 2, 3, ..., 9 image_pixels = image_size * image_size # create MLP mlp = MLPClassifier(hidden_layer_sizes=(100, ), max_iter=480, alpha=1e-4, solver='sgd', verbose=10, tol=1e-4, random_state=1, learning_rate_init=.1) # train MLP train_labels = train_labels.reshape(train_labels.shape[0],) print(train_imgs.shape, train_labels.shape) mlp.fit(train_imgs, train_labels) print("Training set score: %f" % mlp.score(train_imgs, train_labels)) print("Test set score: %f" % mlp.score(test_imgs, test_labels)) help(mlp.fit) # plots results fig, axes = plt.subplots(4, 4) # use global min / max to ensure all weights are shown on the same scale vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max() for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()): ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=.5 * vmin, vmax=.5 * vmax) ax.set_xticks(()) ax.set_yticks(()) plt.show()
from sklearn.neural_network import MLPClassifier import matplotlib.pyplot as plt import pandas as pd from sklearn.model_selection import train_test_split file = pd.read_csv('data2.csv',header=None) data = file.values base = 1 data_X = [[0 for col in range(5)] for row in range(len(data))] data_Y = [0 for b in range(len(data))] for i in range(len(data)): data_X[i][0] = data[i][0] data_X[i][1] = data[i][1] data_X[i][2] = data[i][2] data_X[i][3] = data[i][3] if data[i][4] == 'S': data_X[i][4] = base elif data[i][4] == 'SW': data_X[i][4] = 2*base elif data[i][4] == 'SE': data_X[i][4] = 3*base data_Y[i] = data[i][5] # data_Y = data[:,-1] X_train, X_test, Y_train, Y_test = train_test_split(data_X, data_Y, test_size=0.2, random_state=0) model = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 5), random_state=1) model.fit(X_train, Y_train) res = model.predict(X_test) print(res) print(Y_test)
print "prediction using support vector machine" print prediction from sklearn.naive_bayes import GaussianNB clf3 = GaussianNB() clf3.fit(X, Y) prediction = clf3.predict([[167.64, 76.43, 36.5]]) print "prediction using Gaussian Naive Bayes" print prediction from sklearn.neural_network import MLPClassifier clf4 = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) clf4.fit(X, Y) prediction = clf4.predict([[167.64, 76.43, 36.5]]) print "prediction using Neural Network" print prediction
learning_rate_init=learningRateInit, max_iter=maxIter, tol=tolRate, momentum=momentumRate, batch_size=batchSize, verbose=talkative) mlp5 = MLPClassifier(activation=activationMode, solver=solverMode, alpha=alphaParameter, learning_rate_init=learningRateInit, max_iter=maxIter, tol=tolRate, momentum=momentumRate, batch_size=batchSize, verbose=talkative) mlp1.fit(dfToTrain, a41) print("Fit 1") mlp2.fit(dfToTrain, a42) print("Fit 2") mlp3.fit(dfToTrain, a43) print("Fit 3") mlp4.fit(dfToTrain, a44) print("Fit 4") mlp5.fit(dfToTrain, a45) print("Fit 5") # Data to Predict df2 = openCSV('data/test3.csv') df2 = df2.dropna(subset=[ 'NU_NOTA_LC', 'NU_NOTA_CH', 'NU_NOTA_CN', 'NU_NOTA_REDACAO', 'NU_INSCRICAO' ])
X, y = list(zip(*features)) X = np.array(X) y = (np.array(y) == '1-0').astype(np.int) X = csr_matrix(X) X_tr, X_te, y_tr, y_te = train_test_split(X, y) model_lr = LogisticRegression() model_lr.fit(X_tr, y_tr) pred_lr = model_lr.predict_proba(X_te)[:, 1] print("auc lr", roc_auc_score(y_te, pred_lr)) model_nn = MLPClassifier(hidden_layer_sizes=(5, ), verbose=True) model_nn.fit(X_tr, y_tr) pred_nn = model_lr.predict_proba(X_te)[:, 1] print("auc lr", roc_auc_score(y_te, pred_nn)) model_lgbm = LGBMClassifier(n_estimators=100) model_lgbm.fit(X_tr.astype(np.float64), y_tr) pred_lgbm = model_lgbm.predict_proba(X_te.astype(np.float64))[:, 1] print("auc lgbm", roc_auc_score(y_te, pred_lgbm)) print("auc", roc_auc_score(y_te, pred_lr + pred_lgbm)) with open("lgbm_if_else.c", "wt") as out: out.write( parseAllTrees(model_lgbm.booster_.dump_model()['tree_info']).replace( "1.0000000180025095e-35", "1")) """
x=[] y=[] print(xx) files_name=[f for f in listdir('testsdc') if isfile(join('testsdc',f))] for name in files_name: img=cv2.imread(join('testsdc',name)) cv2.imshow('Learning Image',img) cv2.waitkey(100) cv2.desstroyAllWindows() img=cv2.blur(img,(5,5)) retval,img=cv2.threshold(img,201,255,cv2.THRESH_BINARY) img=cv2.resize(img,(24,24)) image_as_array=numpy.ndarray.flatten(numpy.array(img)) x.append(image_as_array) y.append(name.split('_')[0]) xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=42) scaler=StandardScaler() scaler.fit(xtrain) xtrain=scaler.transform(xtrain) xtest=scaler.transform(xtest) alg=MLPClassifier(solver='lbfgs',alpha=100.0,random_state=1,hidden_layer_sizes=50,verbose=True) alg.fit(xtrain,ytrain) print(alg.score(xtest,ytest)) joblib.dump(alg,'model.pkl') print(xx)
(9, 14, 14, 2), # 9 input, 14-14 neuron in 2 layers,1 output layer 'random_state': [1] } # Type of scoring to compare parameter combos acc_scorer = make_scorer(accuracy_score) # Run grid search grid_obj = GridSearchCV(ann_clf, parameters, scoring=acc_scorer) grid_obj = grid_obj.fit(X_train, y_train) # Pick the best combination of parameters ann_clf = grid_obj.best_estimator_ # Fit the best algorithm to the data ann_clf.fit(X_train, y_train) y_pred_ann = ann_clf.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm_ann = confusion_matrix(y_test, y_pred_ann) print(cm_ann) ann_result = accuracy_score(y_test, y_pred_ann) print(ann_result) recall_ann = cm_ann[0][0] / (cm_ann[0][0] + cm_ann[0][1]) precision_ann = cm_ann[0][0] / (cm_ann[0][0] + cm_ann[1][1]) print(recall_ann, precision_ann)
# Train MLPclassifier ### # Take 33% of the data for testing X_train, X_test, y_train, y_test = train_test_split( data_tf, labels, test_size=TEST_SIZE, random_state=42) # Note that another 10% of the taining data is used as validation data for early_stopping # Doing so allows the usage of an adaptive learning rate print("Creating MLPClassifier...") clf = MLPClassifier(solver=SOLVER, activation='tanh', verbose=True, early_stopping=False, hidden_layer_sizes=LAYER, max_iter=ITERATIONS, alpha=L2_PENALTY, learning_rate_init=LEARNING_RATE_INIT) print("Training ANN (max. " + str(ITERATIONS) + " itr.)...") clf.fit(X_train, y_train) ### # Export data ### print("\nExporting data structures:") print(" -> CountVectorizer") with open("export/export_count.dat", "wb+") as handle: pickle.dump(count_vect, handle) print(" -> Tf-idf Transformer") with open("export/export_tfidf.dat", "wb+") as handle: pickle.dump(tf_transformer, handle)
'27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60' }] y_test = data2['61'] mlp = MLPClassifier(hidden_layer_sizes=3, activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, max_iter=300) mlp.fit(x_train, y_train) print('MLP accuracy with 3 hidden layers:', mlp.score(x_test, y_test)) mlp = MLPClassifier(hidden_layer_sizes=5, activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, max_iter=300) mlp.fit(x_train, y_train) print('MLP accuracy with 5 hidden layers:', mlp.score(x_test, y_test)) mlp = MLPClassifier(hidden_layer_sizes=100, activation='relu',
def run(train=[], test=[], leafsize=5, bag=10): print print #overall accuracy RFACCout = 0.0 DTACCout = 0.0 SVMACCout = 0.0 RFCACCout = 0.0 MLPACCout = 0.0 for cv in range(0, 10): traindata = train[cv] testdata = test[cv] trainX = traindata[:, 0:-1] trainY = traindata[:, -1] testX = testdata[:, 0:-1] testY = testdata[:, -1] sizeTrainSet = len(trainX) #how many data in this train set sizeTestSet = len(testX) #how many data in this test set baselineTrain = np.float(np.sum(trainY)) / sizeTrainSet baselineTest = np.float(np.sum(testY)) / sizeTestSet #print np.sum(inSamY==trainY) # ======================== #Random Forest learner = rf.RandomForest(learner=rt.RandomTree, kwargs={"leaf_size": leafsize}, bags=bag, boost=False, verbose=False) learner.addEvidence(trainX, trainY) #inSamY = learner.query(trainX)#in sample test outSamY = learner.query(testX) #out sample test #inSamACC=np.float(np.sum(inSamY==trainY))/sizeTrainSet outSamACC = np.float(np.sum(outSamY == testY)) / sizeTestSet #RFACCin = RFACCin + inSamACC RFACCout = RFACCout + outSamACC # ======================== #Random Forest - SKLEARN rfc = RandomForestClassifier(n_estimators=bag) rfc.fit(trainX, trainY) RFCoutSamY = rfc.predict(testX) RFCoutSamACC = np.float(np.sum(RFCoutSamY == testY)) / sizeTestSet RFCACCout = RFCACCout + RFCoutSamACC # ======================== #Decision Tree clf = tree.DecisionTreeClassifier() clf = clf.fit(trainX, trainY) #DTinSamY = clf.predict(trainX) DToutSamY = clf.predict(testX) #DTinSamACC = np.float(np.sum(DTinSamY == trainY)) / sizeTrainSet DToutSamACC = np.float(np.sum(DToutSamY == testY)) / sizeTestSet #DTACCin = DTACCin + DTinSamACC DTACCout = DTACCout + DToutSamACC # ======================== #SVM svm = SVC() svm.fit(trainX, trainY) #SVMinSamY = svm.predict(trainX) SVMoutSamY = svm.predict(testX) #SVMinSamACC = np.float(np.sum(SVMinSamY == trainY)) / sizeTrainSet SVMoutSamACC = np.float(np.sum(SVMoutSamY == testY)) / sizeTestSet #SVMACCin = SVMACCin + SVMinSamACC SVMACCout = SVMACCout + SVMoutSamACC # ======================== # feed forward - neural net clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(100, ), random_state=1) clf.fit(trainX, trainY) MLPoutSamY = clf.predict(testX) MLPoutSamACC = np.float(np.sum(MLPoutSamY == testY)) / sizeTestSet MLPACCout = MLPACCout + MLPoutSamACC print "================" print "doing cross-valid " + str(cv + 1) + ":" #print "in-sample Accuracy baseline: "+str(max(baselineTrain,1-baselineTrain)) #print "in-sample Accuracy - Random Forest: " + str(inSamACC) #print "in-sample Accuracy - Decision Tree: " + str(DTinSamACC) #print "in-sample Accuracy - SVM: " + str(SVMinSamACC) print "out-sample Accuracy baseline: " + str( max(baselineTest, 1 - baselineTest)) print "out-sample Accuracy - Random Forest: " + str(outSamACC) print "out-sample Accuracy - Random Forest - SKLEARN: " + str( RFCoutSamACC) print "out-sample Accuracy - Decision Tree: " + str(DToutSamACC) print "out-sample Accuracy - SVM: " + str(SVMoutSamACC) print "out-sample Accuracy - Neural Net MLP: " + str(MLPoutSamACC) print print print "================================" print "cross validation done" print "Out-sample accuracy: " print "Random Forest: " + str(RFACCout / 10) print "Random Forest - SKLEARN: " + str(RFCACCout / 10) print "Decision Tree: " + str(DTACCout / 10) print "SVM: " + str(SVMACCout / 10) print "Neural Net MLP: " + str(MLPACCout / 10)
def evaluation_models(): # metodo per valutare i classificatori dopo aver effettuato training # i parametri dei vari classificatori vengono ottenuti dal metodo training() che li stamperà e verranno inseriti manualmente dataset_path = './training_set.csv' # bisogna inserire il precorso del file da testare testset_path = 'testset' # 1) #nel caso in cui ci fosse un solo dataset bisognerebbe commentare 1),2),3),4),5) e togliere il commento a 6) dataset = pd.read_csv(dataset_path) testsetdata = pd.read_csv(testset_path) #2) label = read_label(dataset) # La separazione 80/20 del dataset non la effettuo in quanto suppongo di avere 2 dataset uno per il training e uno per il testing # sostituisco i valori mancanti del testset con la media dei valori nel testingset for count in label: media = dataset[count].mean() testsetdata[count] = testsetdata[count].fillna(media) #3) dataset[count] = dataset[count].fillna(media) # separo gli attributi dal dataset training_x = dataset.iloc[:, 0:20].values training_y = dataset.iloc[:, 20].values test_x = testsetdata.iloc[:, 0:20].values #4) test_y = testsetdata.iloc[:, 20].values #5) #training_x, test_x, training_y, test_y = model.train_test_split(training_x, training_y, test_size=0.2, random_state=0) #6) # normalizzo i dati StandardScaler = preprocessing.MinMaxScaler() StandardScaler.fit(training_x) training_x = StandardScaler.transform(training_x) test_x = StandardScaler.transform(test_x) # futureselection test_x = featureSelection(test_x) training_x = featureSelection(training_x) # valutazione modello classifier = MLPClassifier(max_iter=10000, activation='relu', hidden_layer_sizes=(100, 50), learning_rate='adaptive', learning_rate_init=0.01, solver='sgd') classifier.fit(training_x, training_y) print('Risultati MLP') evaluation(classifier, test_x, test_y) classifier1 = RandomForestClassifier(criterion='entropy', max_depth=100, max_features='log2', min_samples_leaf=1, min_samples_split=2, n_estimators=400) classifier1.fit(training_x, training_y) print('Risultati RandomForest') evaluation(classifier1, test_x, test_y) classifier2 = SVC(C=10, decision_function_shape='ovo', gamma=10, kernel='rbf') classifier2.fit(training_x, training_y) print('Risultati SVC') evaluation(classifier2, test_x, test_y) classifier3 = DecisionTreeClassifier(criterion='entropy', max_depth=100, max_features=None, min_samples_leaf=1, min_samples_split=2, splitter='best') classifier3.fit(training_x, training_y) print('Risultati DecisionTree') evaluation(classifier3, test_x, test_y) classifier4 = GaussianNB(priors=None, var_smoothing=1e-9) classifier4.fit(training_x, training_y) print('Risultati NaiveBayes') evaluation(classifier4, test_x, test_y) classifier5 = KNeighborsClassifier(algorithm='auto', leaf_size=30, n_neighbors=10, p=3, weights='distance') classifier5.fit(training_x, training_y) print('Risultati KNeighbors') evaluation(classifier, test_x, test_y)
print(confusion_matrix(y_test, y_pred_svm)) print('==== PRECISION ====') print(precision_score(y_test, y_pred_svm) * 100, '%') print('==== ACCURACY ==== ') print(accuracy_score(y_test, y_pred_svm) * 100, '%') print('==== RECALL ==== ') print(recall_score(y_test, y_pred_svm, average='binary') * 100, '%') # =============================================================================================== # NEURAL NETWORK # =============================================================================================== # Moment at we start building the model time_ini_rn = time() mlp = MLPClassifier(solver='adam', activation='relu', hidden_layer_sizes=(6, 4), max_iter=1000) mlp.fit(X_train, y_train) # Moment at we end building the model time_fin_rn = time() # Time spent in build the model t_rn = time_fin_rn - time_ini_rn print('==== Tiempo de construccion de la RED NEURONAL ==== ') print(t_rn) # Predictions y_pred_rn = mlp.predict(X_test) # Time to classify: # Moment at we start time_ini_rn = time() # classify scores_rn = cross_val_score(mlp, X, y, cv=2, scoring='accuracy')
gauss = np.append(gauss,gaussian(image,clusterCenters[i][n],betaVals[i][n])) gaussiansTrain = np.vstack((gaussiansTrain,gauss)) gaussiansTrain = gaussiansTrain[1:] #computes hidden layer neuron values for testing images input gaussiansTest = np.zeros(KM) for image in test: gauss = np.array([]) for n in range(KM): gauss = np.append(gauss,gaussian(image,clusterCenters[i][n],betaVals[i][n])) gaussiansTest = np.vstack((gaussiansTest,gauss)) gaussiansTest = gaussiansTest[1:] #MLP Classifier model which takes gaussian neurons as input and predicts output clf = MLPClassifier(solver = "lbfgs", alpha=1e-5, hidden_layer_sizes=(KM,), activation = "identity") clf.fit(gaussiansTrain, trainLabels) #compares output to label then backpropagates to adjust weights predicted = clf.predict(gaussiansTest) #prediciton using test images to determine accuracy correct = 0 for t in range(size): if (np.array_equal(predicted[t], testLabels[t])): #checks for correctness correct += 1 percent = round(100*correct/size,2) print("Accuracy = ", percent, "%") percentages = np.append(percentages, percent) avg = round(np.mean(percentages),2) averages = np.append(averages,avg) print("")
print("accuracy: %s" % ((TP + TN) * 1.0 / sum([TP, TN, FP, FN]))) print("precision: %s " % ((TP) * 1.0 / (TP + FP))) print("recall: %s" % ((TP) * 1.0 / (TP + FN))) """ output: [ True True True True True True True] accuracy: 1.0 precision: 1.0 recall: 1.0 """ #使用sklearn实现感知器模型,sklearn使用的时交叉熵评估 clf = MLPClassifier(solver="lbfgs", alpha=1e-1, hidden_layer_sizes=5, random_state=1) clf.fit(X, Y) pred2 = clf.predict(X) print("使用sklearn构建的MLP模型在二分类问题上的评估结果如下:") print(pred2 == Y) [TP, TN, FP, FN] = evaluate(Y, pred2) print("accuracy: %s" % ((TP + TN) * 1.0 / sum([TP, TN, FP, FN]))) print("precision: %s " % ((TP) * 1.0 / (TP + FP))) print("recall: %s" % ((TP) * 1.0 / (TP + FN))) """ output: [ True True True True True True True] accuracy: 1.0 precision: 1.0 recall: 1.0 """
from sklearn.ensemble import RandomForestClassifier clf2 = RandomForestClassifier(max_depth=2, random_state=0) clf2.fit(X_train, y_train) clf2.predict(X_test) clf2.score(X_test, y_test) from sklearn.naive_bayes import GaussianNB clf3 = GaussianNB() clf3.fit(X_train, y_train) clf3.predict(X_test) clf3.score(X_test, y_test) from sklearn.neural_network import MLPClassifier clf4 = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1) clf4.fit(X_train, y_train) clf4.predict(X_test) clf4.score(X_test, y_test) from sklearn import linear_model clf5 = linear_model.LogisticRegression() clf5.fit(X_train,y_train) clf5.predict(X_test) clf5.score(X_test,y_test) plt.plot(clf1,clf2,clf3,clf4,clf5) plt.show()
if (target_test[x] == targets[x]): corrects += 1 print("Accuracy: {}".format(corrects / len(target_test))) my_classifier_accuracy += corrects / len(target_test) plt.plot(graph) plt.ylabel('Accuracy') plt.xlabel('Loop') plt.title('Iris') plt.show() mlp = MLPClassifier(hidden_layer_sizes=(4), learning_rate_init=0.08, max_iter=1000) mlp.fit(data_train, target_train) predictions = mlp.predict(data_test) corrects = 0 for x in range(len(target_test)): if (target_test[x] == predictions[x]): corrects += 1 scikit_classifier_accuracy += corrects / len(target_test) print("My accuracy: {}".format(my_classifier_accuracy / 10)) print("Scikits accuracy: {}".format(scikit_classifier_accuracy / 10)) # Pima Indian Diabetes headers = [ 'times_pregnant', 'glucose', 'blood_pressure', 'triceps', 'insulin', 'bmi',
# print(dataset.head()) print(dataset.describe().transpose()) train_x, test_x, train_y, test_y = train_test_split(dataset[HEADERS[1:-1]], dataset[HEADERS[-1]]) scaler = StandardScaler() scaler.fit(train_x) train_x = scaler.transform(train_x) test_x = scaler.transform(test_x) # min = None # for i in range(10): clf = MLPClassifier(activation="identity", learning_rate="invscaling") # clf = MLPClassifier(activation="logistic") # clf = MLPClassifier(activation="tanh") # clf = MLPClassifier(hidden_layer_sizes=(13,13),activation="relu", max_iter=300) clf.fit(train_x, train_y) print("Training Accuracy :", clf.score(train_x, train_y)) print("Test Accuracy :", clf.score(test_x, test_y)) print()
l = preprocess_input(x) image_list += [l] row = [0] * 6 #6 different labels row[dim] = 1 label_list += [row] dim += 1 x_train = np.concatenate((image_list)) y_train = np.array(label_list) sh = x_train.shape col_dim = sh[1] * sh[2] * sh[3] xx = x_train.reshape([sh[0], col_dim]) clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(10), random_state=1) clf.fit(xx, y_train) def claim_prediction(picture_file): #turning raw data into array: img = image.load_img(picture_file, target_size=(64, 64)) x_claim = image.img_to_array(img) x_claim = np.expand_dims(x_claim, axis=0) x_claim = preprocess_input(x_claim) #reshaping array: sh = x_claim.shape col_dim = sh[1] * sh[2] * sh[3] xx = x_claim.reshape([sh[0], col_dim]) y = clf.predict(xx.reshape(1, -1)) position = np.argmax(y) return ([labels[position], position])
solver="sgd", learning_rate_init=0.001, max_iter=len(train_df) * 2, shuffle=True, random_state=0, tol=1e-6, verbose=True, early_stopping=False, batch_size=BATCH_SIZE) X = train_df.iloc[:, :-1] y = train_df['Descript'] print("Starting training, batch size: %i, training samples: %i" % (BATCH_SIZE, len(X))) mlp_classifier.fit(X, y) print('Writing classifier to file. Time: {:.2f}'.format(time.time() - start_time)) print('Accuracy of classifier on train set: {:.2f},' ' time: {:.2f}, test set size: {:.2f}'.format(mlp_classifier.score(X, y), time.time() - start_time, len(X))) test_df = pd.read_csv('categoriacal/some_test.csv', header=0, index_col=0) test_df = test_df.drop(DROP, axis=1) test_df = test_df.sample(frac=1) test_df['PdDistrict'] = le.fit_transform(test_df['PdDistrict']) test_df['DayOfWeek'] = le.fit_transform(test_df['DayOfWeek']) test_df['Date'] = le.fit_transform(test_df['Date'])
train_dataset, test_dataset = getKFoldDatasets(i) dataset = ClassifierDataset(train_dataset, test_dataset) # Creating tf-idf training and test matrix tfIdfVectorizer=TfidfVectorizer(use_idf=True) train_matrix = tfIdfVectorizer.fit_transform(dataset.training_data).toarray() test_matrix = tfIdfVectorizer.transform(dataset.test_data).toarray() # Training KNN model print('Starting fit') clf = MLPClassifier(random_state=1, max_iter=300, hidden_layer_sizes=(20, 20, 20)) clf.fit(train_matrix, dataset.training_target) print('Finished fit') # Predicting test dataset classification print('Starting prediction') test_result = clf.predict(test_matrix) dataset.setTestResult(test_result) print('Finished prediction') # Printing metrics print('\n------------------------------------------------') print(f'Classification results {i} for 80% of full dataset') print('------------------------------------------------') precision_score, error, confusion_matrix = dataset.getResultMetrics()
def main(): quesSet = "hamlet_all.txt" labelSet = "labels_all.txt" words = getTextWords(quesSet) items, counts = getFreq(words) #printFreq(items) #print(len(items)) #print(counts) lines = getLineWords(quesSet) lineFreq = getLineFreq(lines, counts) print("Loading...") train_dataSet, train_hwLabels = readDataSet(labelSet, len(lines), len(items), lineFreq) QuesNum = len(train_dataSet) clf = MLPClassifier(hidden_layer_sizes=(50, ), activation='logistic', solver='adam', learning_rate_init=0.001, max_iter=1000) knn_hwLabels = readDataSet_K(labelSet, len(lines), len(items), lineFreq) knn = neighbors.KNeighborsClassifier(algorithm='kd_tree', n_neighbors=1) Knn = neighbors.NearestNeighbors(n_neighbors=3) #print(clf,'\n',knn) op = input("Do you want to do Cross-Validation? ('y' to confirm) ") if op == 'y': Error_M = 0 Error_K = 0 print("LOOCV initiated.\n") for j in range(QuesNum): Error_M, Error_K = validate(train_dataSet, train_hwLabels, knn_hwLabels, clf, knn, QuesNum, j, Error_M, Error_K) print("\nLOOCV complete.") print("Error (MLP Neural Network):", Error_M, "\nError (KNN Algorithm):", Error_K) print("Accuracy (MLP Neural Network):", 1 - Error_M / QuesNum, "\nAccuracy (KNN Algorithm):", 1 - Error_K / QuesNum) print("\nTraining with whole dataset...") clf.fit(train_dataSet, train_hwLabels) knn.fit(train_dataSet, knn_hwLabels) Knn.fit(train_dataSet, knn_hwLabels) print("Training complete.") op = input("\nEnter your questions ('n' to quit): ") while op != 'n': a = getQuesFreq(counts, op) res = clf.predict(a) Res = knn.predict(a) for v in a: if sum(v) == 0: print("Unknown question type") break Class, Classifier = getQuesClass(res, int(Res)) print("Question Type:", Class, Classifier) dist, neigh = Knn.kneighbors(a) #print(dist,neigh) for v in dist: maxDist = max(v) if maxDist >= 2: print("\nResults may compromise. Consider more questions:\n") for v in neigh: for qNum in v: print('\t', getQues("hamlet_all.txt", qNum), end='') op = input("\nEnter your questions ('n' to quit): ")
correct = 0 for x in range(0, len(classes)): if classes[x] == y[x]: correct += 1 print(str((correct / 150.0) * 100) + "% accurate") # clf classifier from skLearn clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=3) y = y.ravel() train_y = np.array(y).astype(int) clf.fit(normalized_X, train_y) clf.predict(normalized_X) print(clf.score(normalized_X, y)) print("\nPima-indians:\n") array = np.genfromtxt( "/Users/jeremy/Documents/cs450/pima-indians-diabetes.csv", delimiter=",") X = array[:, :-1] Y = array[:, -1:] normalized_X = preprocessing.normalize(X) normalized_X = np.insert(normalized_X, normalized_X.shape[1], -1, axis=1) num_cols = normalized_X.shape[1] neuralNet = NeuralNet(num_cols, [4, 3], normalized_X) for i in range(0, 200):