def naviBayes(train_X, train_y, test_X, test_y): # print train_y # print test_y # model = tfMultyPerceptron(train_X, train_y, test_X, test_y) # model.run() time_start = time.time() model = MLPClassifier(hidden_layer_sizes=(128, 32, 32, 128), max_iter=100, early_stopping=False, learning_rate_init=0.001, verbose=True) # model = MultinomialNB() # model = BernoulliNB() # model = KNeighborsClassifier() # model = DecisionTreeClassifier(max_depth=20, min_samples_leaf=0.01) # model = LinearSVC(random_state=0) # model.fit(X, y) model.fit(train_X, train_y) # model_1.fit(train_X, train_y) # model_2.fit(train_X, train_y) # model_3.fit(train_X, train_y) # model_4.fit(train_X, train_y) # model_5.fit(train_X, train_y) # All_model = [model, model_1, model_2, model_3, model_4, model_5] # train_pre = predct_all(All_model, train_X, train_y) # test_pre = predct_all(All_model, test_X, test_y) time_end = time.time() print "perceptron training cost time:{}".format(time_end - time_start) # model = OneVsRestClassifier(SVC(kernel='linear')) # model.fit(train_X, train_y) # save with open(config.BTMData + 'BayesModel/BTM_perceptron.model', 'wb') as fp: cPickle.dump(model, fp) # load model # model = None # with open(config.BTMData + 'BayesModel/bayes_BTM.model', 'rb') as fp: # model = cPickle.load(fp) # print 'train data set size:', len(train_y) # result = metrics.accuracy_score(train_pre, train_y) # 返回各自文本的所被分配到的类索引 # print"Predicting random boost train result: ", result # print 'train data set size:', len(train_y) # result = metrics.accuracy_score(test_pre, test_y) # 返回各自文本的所被分配到的类索引 # print "Predicting random boost test result:", result print 'train data set size:', len(train_y) result = model.score(train_X, train_y) # 返回各自文本的所被分配到的类索引 print"Predicting train result: ", result test_result = model.score(test_X, test_y) print "Predicting test set result: ", test_result top_train_result = model.predict_proba(train_X) print "top 3 predict train data accuracy rate: {}".format(cal_topThreeScore(model, top_train_result, train_y)) top_test_result = model.predict_proba(test_X) print "top 3 predict test data accuracy rate: {}".format(cal_topThreeScore(model, top_test_result, test_y))
def neuralNetworkIteration(): import pydotplus a,b,c,d,e,f = traing_test_data_set(); alphalist = [.00001,.00003,.0001,.0003,.001,.003,.01,.03,1,10] for feature_number in range(1, 2): print("Feature Number : " + str(feature_number)); train_data, train_label = a[feature_number - 1], b[feature_number - 1]; test_data, test_label = c[feature_number - 1], d[feature_number - 1]; validation_data,validation_label = e[feature_number-1],f[feature_number-1]; for new_alpha in alphalist: iteration_output = "Iteration,Training Error,Validation Error\n"; from sklearn.neural_network import MLPClassifier clf = MLPClassifier(alpha=new_alpha, hidden_layer_sizes=(200,), random_state=1, activation='logistic', warm_start=True,max_iter=1); for iteration in range(1,500): clf.fit(train_data, train_label) prediction = clf.predict(validation_data); from sklearn.metrics import accuracy_score iteration_output+=str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0)); iteration_output+="\n"; print(str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0))) file_name = "For All Feature. Alpha = "+str(new_alpha)+" "+" Iteration data"+".csv"; print(file_name); datafile = open(file_name,"w",encoding="utf-8"); datafile.write(iteration_output); datafile.close();
def neuralNetworkIterationLogistic(): import pydotplus a,b,c,d,e,f = traing_test_data_set(); for feature_number in range(1, 6): iteration_output = "Iteration,Training Error,Validation Error\n"; print("Feature Number : " + str(feature_number)); train_data, train_label = a[feature_number - 1], b[feature_number - 1]; test_data, test_label = c[feature_number - 1], d[feature_number - 1]; validation_data,validation_label = e[feature_number-1],f[feature_number-1]; from sklearn.neural_network import MLPClassifier clf = MLPClassifier(alpha=1, hidden_layer_sizes=(15,), random_state=1, activation='logistic', warm_start=True,max_iter=1); for iteration in range(1,350): clf.fit(train_data, train_label) tot = len(validation_data); cnt = 0; prediction = clf.predict(validation_data); for i in range(0, len(validation_data)): if clf.predict([validation_data[i]])[0] != validation_label[i]: # print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i])); cnt += 1; from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score from sklearn.metrics import f1_score iteration_output+=str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0)); iteration_output+="\n"; print(str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0))) file_name = "Feature No "+str(feature_number)+" Iteration data"+".csv"; print(file_name); datafile = open(file_name,"w",encoding="utf-8"); datafile.write(iteration_output); datafile.close();
def neuralNetwork(): import pydotplus a,b,c,d,e,f = traing_test_data_set(); for feature_number in range(1, 6): print("Feature Number : " + str(feature_number)); train_data, train_label = a[feature_number - 1], b[feature_number - 1]; test_data, test_label = c[feature_number - 1], d[feature_number - 1]; validation_data,validation_label = e[feature_number-1],f[feature_number-1]; from sklearn.neural_network import MLPClassifier clf = MLPClassifier(solver='lbfgs', alpha=.003, hidden_layer_sizes=(10,), random_state=1, activation='relu') clf.fit(train_data, train_label) tot = len(test_label); cnt = 0; prediction = clf.predict(test_data); for i in range(0, len(test_data)): if clf.predict([test_data[i]])[0] != test_label[i]: # print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i])); cnt += 1; from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score from sklearn.metrics import f1_score print("Complete for Feature :" + str(feature_number)); print("Train Score : " + str(clf.score(train_data, train_label))); print("Total test set size : " + str(len(test_label))); print("Correct prediction : " + str(tot - cnt)); print("Incorrect Prediction : " + str(cnt)); print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0)) print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0)) print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0)) print("Error Rate : " + str(cnt / tot * 100.0)); print("---------------------------------------\n");
def main(): iris = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target) classifier = MLPClassifier(max_iter=1000) classifier.fit(X_train, y_train) s = classifier.score(X_test, y_test) print(s)
def test_multilabel_classification(): # Test that multi-label classification works as expected. # test fit method X, y = make_multilabel_classification(n_samples=50, random_state=0, return_indicator=True) mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5, max_iter=150, random_state=0, activation='logistic', learning_rate_init=0.2) mlp.fit(X, y) assert_equal(mlp.score(X, y), 1) # test partial fit method mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150, random_state=0, activation='logistic', alpha=1e-5, learning_rate_init=0.2) for i in range(100): mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4]) assert_greater(mlp.score(X, y), 0.9)
def test_partial_fit_unseen_classes(): # Non regression test for bug 6994 # Tests for labeling errors in partial fit clf = MLPClassifier(random_state=0) clf.partial_fit([[1], [2], [3]], ["a", "b", "c"], classes=["a", "b", "c", "d"]) clf.partial_fit([[4]], ["d"]) assert_greater(clf.score([[1], [2], [3], [4]], ["a", "b", "c", "d"]), 0)
def fit_and_score_ann(x_train, y_train, x_test, y_test, config): ann = MLPClassifier(solver=config.ann.solver, max_iter=Configuration.ANN_MAX_ITERATIONS, alpha=config.ann.alpha, hidden_layer_sizes=(config.ann.hidden_neurons,), learning_rate='adaptive') ann.fit(x_train, y_train) return ann.score(x_test, y_test)
def neuralNetwork(): import pydotplus a,b,c,d,e,f = traing_test_data_set(); for feature_number in range(1, 2): print("Feature Number : " + str(feature_number)); train_data, train_label = a[feature_number - 1], b[feature_number - 1]; test_data, test_label = c[feature_number - 1], d[feature_number - 1]; validation_data,validation_label = e[feature_number-1],f[feature_number-1]; from sklearn.preprocessing import StandardScaler scaler = StandardScaler(); scaler.fit(train_data); train_data = scaler.transform(train_data); test_data = scaler.transform(test_data); validation_data = scaler.transform(validation_data); from sklearn.neural_network import MLPClassifier clf = MLPClassifier(alpha=1, hidden_layer_sizes=(100,), random_state=1, activation='logistic', max_iter=1000); clf.fit(train_data, train_label) tot = len(test_label); cnt = 0; prediction = clf.predict(test_data); for i in range(0, len(test_data)): if prediction[i] != test_label[i]: print(str(i)+str(prediction[i])+" "+str(test_label[i])); cnt += 1; from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score from sklearn.metrics import f1_score print("Complete for Feature :" + str(feature_number)); print("Train data set size : " + str(len(train_data))); print("Train Score : " + str(clf.score(train_data, train_label))); print("Total test set size : " + str(len(test_label))); print("Correct prediction : " + str(tot - cnt)); print("Incorrect Prediction : " + str(cnt)); print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0)) print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0)) print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0)) print("Error Rate : " + str(cnt / tot * 100.0)); print("---------------------------------------\n"); tot = len(validation_label); cnt = 0; prediction = clf.predict(validation_data); for i in range(0, len(validation_label)): if prediction[i] != validation_label[i]: print(str(i)+str(prediction[i])+" "+str(validation_label[i])); cnt += 1; print("Total validation set size : " + str(len(validation_label))); print("Correct prediction : " + str(tot - cnt)); print("Incorrect Prediction : " + str(cnt)); print("Accuracy : " + str(accuracy_score(validation_label, prediction) * 100.0)) print("Precision : " + str(precision_score(validation_label, prediction, average='weighted') * 100.0)) print("F1 Score : " + str(f1_score(validation_label, prediction, average='weighted') * 100.0)) print("Error Rate : " + str(cnt / tot * 100.0)); print("---------------------------------------\n");
def scikit_method(X,y,q_z,l_r,beta=0.9,k_f=10): train_acc=0 test_acc=0 clf = MLPClassifier(hidden_layer_sizes=(q_z,),activation='logistic',algorithm='sgd' ,learning_rate_init=l_r,momentum=beta) kf = KFold(len(X),k_f) for train, test in kf: clf.fit(X[train],y[train]) train_acc = clf.score(X[train],y[train]) test_acc = clf.score(X[test],y[test]) print("Train accuracy for scikit method: %f"%train_acc) print("Test accuracy for scikit method: %f"%test_acc) return train_acc/k_f, test_acc/k_f
def test_bool_onehot(self): X = [x for x in itertools.combinations_with_replacement([True, False], 9)] y = [True if sum(a) == 1 else False for a in X] X_r = repeat_data(X) y_r = repeat_data(y) mlp = MLPClassifier(hidden_layer_sizes=(2), activation='logistic', max_iter=10000, alpha=1e-4, algorithm='l-bfgs', verbose=False, tol=1e-4, random_state=1, learning_rate_init=.1) mlp.fit(X_r, y_r) assert (mlp.score(X, y) > 0.9) for x in X: self.assertEqual(mlp.predict(x), (sum(x) == 1))
def test_multilabel_classification(): # Test that multi-label classification works as expected. # test fit method X, y = make_multilabel_classification(n_samples=50, random_state=0, return_indicator=True) mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5, max_iter=150, random_state=0, activation='logistic', learning_rate_init=0.2) mlp.fit(X, y) assert_greater(mlp.score(X, y), 0.97) # test partial fit method mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150, random_state=0, activation='logistic', alpha=1e-5, learning_rate_init=0.2) for i in range(100): mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4]) assert_greater(mlp.score(X, y), 0.9) # Make sure early stopping still work now that spliting is stratified by # default (it is disabled for multilabel classification) mlp = MLPClassifier(early_stopping=True) mlp.fit(X, y).predict(X)
def test_ski_learn_mnist(self): mnist = fetch_mldata("MNIST original") # rescale the data, use the traditional train/test split X, y = mnist.data / 255., mnist.target X_train, X_test = X[:60000], X[60000:] y_train, y_test = y[:60000], y[60000:] mlp = MLPClassifier(hidden_layer_sizes=(50,), activation='logistic', max_iter=2, alpha=1e-4, algorithm='sgd', verbose=False, tol=1e-4, random_state=1, learning_rate_init=.1) mlp.fit(X_train, y_train) assert mlp.score(X_test, y_test) > 0.9
def test_partial_fit_classification(): # Test partial_fit on classification. # `partial_fit` should yield the same results as 'fit'for binary and # multi-class classification. for X, y in classification_datasets: X = X y = y mlp = MLPClassifier(algorithm='sgd', max_iter=100, random_state=1, tol=0, alpha=1e-5, learning_rate_init=0.2) mlp.fit(X, y) pred1 = mlp.predict(X) mlp = MLPClassifier(algorithm='sgd', random_state=1, alpha=1e-5, learning_rate_init=0.2) for i in range(100): mlp.partial_fit(X, y, classes=np.unique(y)) pred2 = mlp.predict(X) assert_array_equal(pred1, pred2) assert_greater(mlp.score(X, y), 0.95)
def test_lbfgs_classification(): # Test lbfgs on classification. # It should achieve a score higher than 0.95 for the binary and multi-class # versions of the digits dataset. for X, y in classification_datasets: X_train = X[:150] y_train = y[:150] X_test = X[150:] expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind) for activation in ACTIVATION_TYPES: mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, max_iter=150, shuffle=True, random_state=1, activation=activation) mlp.fit(X_train, y_train) y_predict = mlp.predict(X_test) assert_greater(mlp.score(X_train, y_train), 0.95) assert_equal((y_predict.shape[0], y_predict.dtype.kind), expected_shape_dtype)
def plot_on_dataset(X, y, ax, name): # for each dataset, plot learning for each learning strategy print("\nlearning on dataset %s" % name) ax.set_title(name) X = MinMaxScaler().fit_transform(X) mlps = [] if name == "digits": # digits is larger but converges fairly quickly max_iter = 15 else: max_iter = 400 for label, param in zip(labels, params): print("training: %s" % label) mlp = MLPClassifier(verbose=0, random_state=0, max_iter=max_iter, **param) mlp.fit(X, y) mlps.append(mlp) print("Training set score: %f" % mlp.score(X, y)) print("Training set loss: %f" % mlp.loss_) for mlp, label, args in zip(mlps, labels, plot_args): ax.plot(mlp.loss_curve_, label=label, **args)
def main(): from sklearn.neural_network import MLPClassifier from sklearn import preprocessing, model_selection from sklearn.datasets import fetch_mldata db_name = 'australian' data_set = fetch_mldata(db_name) data_set.data = preprocessing.scale(data_set.data) X_train, X_test, y_train, y_test = model_selection.train_test_split( data_set.data, data_set.target, test_size=0.4) mlp = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(2,), activation='logistic', learning_rate_init=0.5) mlp = mlp.fit(X_train, y_train) print("MLP Accuracy %0.3f " % mlp.score(X_test, y_test)) mlp = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(2,), activation='logistic', learning_rate_init=0.5) dbm = DBM(mlp).fit(X_train, y_train) print("DBM-MLP Accuracy %0.3f " % dbm.score(X_test, y_test))
def rede_neural(X, y): print("Iniciando treinamento da Rede Neural") X2 = normalize(X) clf = MLPClassifier(hidden_layer_sizes=(100,50), activation='tanh', algorithm='adam', alpha=1e-5, learning_rate='constant',tol=1e-8,learning_rate_init=0.0002, early_stopping=True,validation_fraction=0.2) kf = KFold(len(y),n_folds=3) i = 0 for train,test in kf: start = time.time() i = i + 1 print("Treinamento",i) # dividindo dataset em treino e test #X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=1) X_train, X_test, y_train, y_test = X2[train], X2[test], y[train], y[test] # fit clf.fit(X_train, y_train) print("score:",clf.score(X_test, y_test),"(",(time.time()-start)/60.0,"minutos )") return clf
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import MinMaxScaler scaler = StandardScaler() scaler.fit(X_train, y_train) X_train_std = scaler.transform(X_train) X_test_std = scaler.transform(X_test) clf = MLPClassifier(hidden_layer_sizes = (20,20,20), learning_rate_init = 0.0001, max_iter = 200000, momentum = 0.5) # clf = DecisionTreeClassifier(criterion = 'gini', max_depth = 25, random_state = 42) # clf = SVC(C=1.0000, gamma=0.10000, max_iter=100) clf.fit(X_train_std, y_train) print("Training Set Accuracy: {:.3f}".format(clf.score(X_train_std, y_train))) print("Test Set Accuracy: {:.3f}".format(clf.score(X_test_std, y_test))) predictions = clf.predict(X_test_std) print("\nConfusion Matrix") print(confusion_matrix(y_test, predictions)) print("\nAcurracy") print(accuracy_score(y_test, predictions)) df_ = pd.DataFrame() df_['y_test'] = y_test df_['predictions'] = predictions print(df_)
# 为了提高训练速度,我们只提取10%的样本进行演示 X_train_lite = X_train[0:5999, :] y_train_lite = y_train[0:5999] X_test_lite = X_test[0:999, :] y_test_lite = y_test[0:999] # 设置主成分数量为2以便我们进行可视化 pca = PCA(n_components=0.9) pca.fit(X_train_lite) X_train_pca = pca.transform(X_train_lite) X_test_pca = pca.transform(X_test_lite) print(X_train_lite.shape, X_test_lite.shape) print(X_train_pca.shape, X_test_pca.shape) start = time.time() mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=[100, 100], activation='relu', alpha=1e-5, random_state=62, verbose=2) mlp.fit(X_train_pca, y_train_lite) score_train = mlp.score(X_train_pca, y_train_lite) score_test = mlp.score(X_test_pca, y_test_lite) print('训练结束,用时{:.2f}s.'.format(time.time() - start)) print('训练集得分: {:.4f}, 测试集得分: {:.4f}'.format(score_train, score_test))
validation_fraction=0.1, batch_size=200, verbose=True) mlp.fit(X_train_scaled, y_train) # Save model as a pickle save_mlp = open("NN.pickle", "wb") pickle.dump(mlp, save_mlp) save_mlp.close() predictions_test = mlp.predict(X_test) from sklearn.metrics import classification_report, confusion_matrix print(confusion_matrix(y_test, predictions_test)) print(classification_report(y_test, predictions_test)) print("Accuracy on training set: {:.3f}".format(mlp.score(X_train, y_train))) print("Accuracy on test set: {:.3f}".format(mlp.score(X_test, y_test))) print("Accuracy on entire data set: {:.3f}".format(mlp.score(X, y))) # Compare strategy and market returns y_pred = mlp.predict(X) dataset['y_pred'] = np.NaN dataset.iloc[(len(dataset) - len(y_pred)):, -1:] = y_pred trade_dataset = dataset.dropna() trade_dataset['Tomorrows Returns'] = 0. trade_dataset['Tomorrows Returns'] = np.log(trade_dataset['Close'] / trade_dataset['Close'].shift(1)) trade_dataset['Tomorrows Returns'] = trade_dataset['Tomorrows Returns'].shift( -1)
oob_score=True) Model.fit(train_feature, train_labels) # oob採用未被選用的data來做validation print('Base oob score :%.5f' % (Model.oob_score_)) # 使用其他模型 mlp = MLPClassifier(solver='sgd', activation='relu', alpha=1e-4, hidden_layer_sizes=(50, 50), random_state=1, max_iter=10, verbose=10, learning_rate_init=.1) mlp.fit(train_feature, train_labels) print(mlp.score(train_feature, train_labels)) # 處理測試資料 # fare有空值 test.loc[(test['Fare'].isnull()), 'Fare'] = test['Fare'].dropna().median() # 選feature test_feature = test[['Sex', 'Pclass']].copy() # 新特徵 test['FamilySize'] = test['SibSp'] + test['Parch'] test.loc[test['FamilySize'] == 0, 'Family'] = 'alone' test.loc[(test['FamilySize'] > 0) & (test['FamilySize'] <= 3), 'Family'] = 'small' test.loc[(test['FamilySize'] > 3) & (test['FamilySize'] <= 6), 'Family'] = 'medium' test.loc[test['FamilySize'] > 6, 'Family'] = 'large'
model.score(X_test, y_test) # In[54]: p_lm = pd.Series(predicted_logit) p_lm.columns = ['p_lm'] print(p_lm) # In[60]: predicted_MLP = mlp.predict(X_test) print (predicted_MLP) mlp.score(X_test, y_test) p_MLP = pd.Series(predicted_MLP) p_MLP.columns = ['p_MLP'] print(p_MLP) # In[56]: predicted_SVC = SVC.predict(X_test) print (predicted_SVC) SVC.score(X_test, y_test) p_SVC = pd.Series(predicted_SVC) p_SVC.columns = ['p_SVC'] print(p_SVC)
from data_read import load # Load data X, Y, P, Q = load() clf = RandomForestClassifier( n_estimators=1000,random_state=0).fit(X, Y.values.ravel()) print ("Accuracy of Random Forest Classifier: "+str(clf.score(P,Q))) clf2 = SVC(kernel='rbf',C=10, gamma=0.001,random_state=0).fit(X, Y.values.ravel()) print ("Accuracy of SVM: "+str(clf2.score(P,Q))) clf3 = GradientBoostingClassifier(n_estimators=1000, learning_rate=1, max_depth=10, random_state=0, min_samples_split=5).fit(X, Y.values.ravel()) print ("Accuracy of Gradient Boosting Classifier: "+str(clf3.score(P,Q))) clf4 = GaussianNB().fit(X, Y.values.ravel()) print ("Accuracy of Gaussian Naive Bayes Classifier: "+str(clf4.score(P,Q))) # algorithm, learning_rate_init, alpha, hidden_layer_sizes # and activation have impact clf6 = MLPClassifier(algorithm='adam', alpha=0.01, max_iter=500, learning_rate='constant', hidden_layer_sizes=(400,), random_state=0, learning_rate_init=1e-2, activation='logistic').fit(X, Y.values.ravel()) print ("Accuracy of Multi-layer Perceptron Classifier: "+str(clf6.score(P,Q)))
for dataset in combine: dataset['Title'] = dataset['Title'].replace([ 'Lady', 'Countess', 'Capt', 'Col', 'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona' ], 'Rare') dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss') dataset['Title'] = dataset['Title'].replace('Ms', 'Miss') dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs') title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5} for dataset in combine: dataset['Title'] = dataset['Title'].map(title_mapping) dataset['Title'] = dataset['Title'].fillna(0) train_df = train_df.drop(['Name', 'PassengerId'], axis=1) test_df = test_df.drop(['Name'], axis=1) train_df = train_df.drop(['Ticket', 'Cabin'], axis=1) test_df = test_df.drop(['Ticket', 'Cabin'], axis=1) combine = [train_df, test_df] Y_train = train_df["Survived"] X_train = train_df.drop("Survived", axis=1) X_test = test_df.drop("PassengerId", axis=1).copy() print(X_train.head()) clf.fit(X_train, Y_train) Y_pred = clf.predict(X_test) acc_log = round(clf.score(X_train, Y_train) * 100, 2) print(acc_log)
# coding: utf-8 #====================================== # Breast cancer tumor classification # with sklearn MLPClassifier module # (c) Keishi Ishihara #====================================== from __future__ import print_function from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score, precision_score, recall_score from load_csv_data import load_data X_train, X_test, y_train, y_test = load_data() clf = MLPClassifier(hidden_layer_sizes=(30), activation='relu', alpha=1e-5, learning_rate_init=0.001, learning_rate='constant', solver='sgd', random_state=0, verbose=True, tol=1e-4, max_iter=10000) clf.fit(X_train, y_train) print('Training set score: {}'.format(clf.score(X_train, y_train))) print('Test set score: {}'.format(clf.score(X_test, y_test)))
# an input layer (R^4), # two hidden layers (R^5 and R^3) # and an output layer (R^3). mlp_multi_hidden_layer = MLPClassifier(hidden_layer_sizes=(5, 3), activation="tanh", solver="adam", max_iter=500, batch_size=10, verbose=True) # Training both networks mlp_single_hidden_layer.fit(X_train, y_train.ravel()) mlp_multi_hidden_layer.fit(X_train, y_train.ravel()) print("--------------------------------\n") print("Result of training (Single hidden layer): %5.3f" % mlp_single_hidden_layer.score(X_train, y_train)) print("Result of training (Multiple hidden layers): %5.3f" % mlp_multi_hidden_layer.score(X_train, y_train)) print("--------------------------------\n") # Evaluating the model using the test data pred_single = mlp_single_hidden_layer.predict(X_test) pred_multi = mlp_multi_hidden_layer.predict(X_test) print("Confusion Matrix (Single hidden layer):") print(confusion_matrix(y_test, pred_single)) print("\nClassification Report (Single hidden layer):") print(classification_report(y_test, pred_single)) print("\nConfusion Matrix (Multiple hidden layers):") print(confusion_matrix(y_test, pred_multi)) print("\nClassification Report (Multiple hidden layers):")
carbo_vc = np.array([1 for i in range(len(carbo_imgs))]) bolog_f = np.concatenate([bolog_imgs, carbo_imgs], axis=0) np.random.seed(1) np.random.shuffle(bolog_f) carbo_f = np.concatenate([bolog_vc, carbo_vc], axis=0) np.random.seed(1) np.random.shuffle(carbo_f) return bolog_f, carbo_f clf = svm.SVC(gamma=0.001, C=100.) feature, target = load_data() feature = feature.reshape((len(feature), -1)) X_train, X_test, y_train, y_test = train_test_split(feature, target, test_size=0.5, random_state=0) clf = MLPClassifier(solver="lbfgs", random_state=0, activation='relu', hidden_layer_sizes=[100, 100], alpha=0.0001) clf.fit(X_train, y_train) pred_x = np.array(X_test[:3]) print(pred_x) print(clf.predict(pred_x)) print("predict:", clf.score(X_test, y_test)) filename = 'pasta_model.sav' pickle.dump(clf, open(filename, 'wb'))
scaler.fit(X) X = scaler.transform(X) mlp = MLPClassifier(solver='sgd', alpha=0.0001, learning_rate='adaptive', hidden_layer_sizes=(11, 30), max_iter=300000, activation='logistic', verbose=True) mlp.fit(X, y) predictions_probabilities = mlp.predict_proba( [[53, 0, 49, 31, 17, 4, 18, 9, 7, 5, 7, 12, 19, 12, 8, 8, 4, 10, 15, 12]]) predictions = mlp.predict( [[53, 0, 49, 31, 17, 4, 18, 9, 7, 5, 7, 12, 19, 12, 8, 8, 4, 10, 15, 12]]) scoring = mlp.score(X, y, sample_weight=None) # Probability of each class for the prediction input print predictions_probabilities # max(predictions_probabilities) print predictions # Score print scoring # print X # print y # #Frequency # for x in xrange(0,10): # print X[x]
# mlp = MLPClassifier(hidden_layer_sizes=(90, 90, 90), verbose=True) # as 0.9518095238095238 # mlp = MLPClassifier(hidden_layer_sizes=(783, 783, 783), verbose=True) # as 0.9575238095238096 # mlp = MLPClassifier(hidden_layer_sizes=(783, 783, 783), verbose=True, solver='sgd') mlp = MLPClassifier(verbose=True, solver='adam', activation='relu', learning_rate='constant', hidden_layer_sizes=(783, )) mlp.fit(x_train, y_train) predictions = mlp.predict(x_test) print("\nConfusion matrix: ") print(confusion_matrix(y_test, predictions)) print("\nClassification report: ") print(classification_report(y_test, predictions)) print("\nAccuracy score: ") print(accuracy_score(y_test, predictions)) print("Training set score: %f" % mlp.score(x_train, y_train)) print("Test set score: %f" % mlp.score(x_test, y_test))
def do_machinea_leaning_stuff(train_X, train_Y, test_X, test_Y): returnValue = [] test_predict_Y = [] # de facut ceva cu acest rezultat #f_classif(X, y); #Algoritmi de clasificare rfc = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0) rfc.fit(train_X, train_Y) test_predict_Y = rfc.predict(test_X) returnValue.append({ 'name': "RandomForestClassifier", 'score': rfc.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) etc = ExtraTreesClassifier() etc.fit(train_X, train_Y) test_predict_Y = etc.predict(test_X) returnValue.append({ 'name': "ExtraTreesClassifier", 'score': etc.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) gpc = GaussianProcessClassifier(random_state=0) gpc.fit(train_X, train_Y) test_predict_Y = gpc.predict(test_X) # TODO : poate folosim si asta print(gpc.predict_proba(test_X)) returnValue.append({ 'name': "GaussianProcessClassifier", 'score': gpc.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) pac = PassiveAggressiveClassifier(max_iter=1000, random_state=0, tol=1e-3) pac.fit(train_X, train_Y) test_predict_Y = pac.predict(test_X) returnValue.append({ 'name': "PassiveAggressiveClassifier", 'score': pac.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) rc = RidgeClassifier() rc.fit(train_X, train_Y) test_predict_Y = rc.predict(test_X) returnValue.append({ 'name': "RidgeClassifier", 'score': rc.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) sgdc = SGDClassifier(max_iter=1000, tol=1e-3) sgdc.fit(train_X, train_Y) test_predict_Y = sgdc.predict(test_X) returnValue.append({ 'name': "SGDClassifier", 'score': sgdc.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) bnb = BernoulliNB() bnb.fit(train_X, train_Y) test_predict_Y = bnb.predict(test_X) returnValue.append({ 'name': "BernoulliNB", 'score': bnb.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) knnc = KNeighborsClassifier(n_neighbors=3) knnc.fit(train_X, train_Y) test_predict_Y = knnc.predict(test_X) returnValue.append({ 'name': "KNeighborsClassifier", 'score': knnc.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) mlpc = MLPClassifier() mlpc.fit(train_X, train_Y) test_predict_Y = mlpc.predict(test_X) returnValue.append({ 'name': "MLPClassifier", 'score': mlpc.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) label_prop_model = LabelPropagation() rng = np.random.RandomState(42) random_unlabeled_points = rng.rand(len(train_Y)) < 0.3 labels = np.copy(train_Y) labels[random_unlabeled_points] = -1 label_prop_model.fit(train_X, labels) test_predict_Y = label_prop_model.predict(test_X) returnValue.append({ 'name': "LabelPropagation", 'score': label_prop_model.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) lsvc = LinearSVC(random_state=0, tol=1e-5) lsvc.fit(train_X, train_Y) test_predict_Y = lsvc.predict(test_X) returnValue.append({ 'name': "LinearSVC", 'score': label_prop_model.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) svc = SVC(gamma='auto') svc.fit(train_X, train_Y) test_predict_Y = svc.predict(test_X) returnValue.append({ 'name': "SVC", 'score': svc.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) dtc = DecisionTreeClassifier(random_state=0) dtc.fit(train_X, train_Y) test_predict_Y = dtc.predict(test_X) returnValue.append({ 'name': "DecisionTreeClassifier", 'score': dtc.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) cccv = CalibratedClassifierCV() cccv.fit(train_X, train_Y) test_predict_Y = cccv.predict(test_X) returnValue.append({ 'name': "CalibratedClassifierCV", 'score': cccv.score(test_X, test_Y), 'accuracy_naive': (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y), 'accuracy_score': accuracy_score(test_Y, test_predict_Y), 'classification_report': classification_report(test_Y, test_predict_Y) }) return returnValue
#initialize the taret classifier and train it # clf = neighbors.KNeighborsClassifier(n_neighbors=3) #clf=SVC() #clf=GaussianProcessClassifier(1.0 * RBF(1.0)) #clf = DecisionTreeClassifier(max_depth=5) clf = MLPClassifier(alpha=1) clf.fit(X_train, y_train) #Store the predicted values y_pred = clf.predict(X_test) #Calculate global accuracy accuracy = accuracy_score(y_test, y_pred) #accuracy = clf.score(X_test, y_test) accuracy = clf.score(X_test, y_test) minority_y_test_index = [] minority_y_test_index1 = np.where(y_test == 1) total_indexes = np.where(y_test >= 0) minority_y_test_index1_list1 = minority_y_test_index1[0].tolist() minority_y_test_index = minority_y_test_index1_list1 y_pred_minority = [] y_test_minority = [] majority_test_index = total_indexes for item in minority_y_test_index: y_test_minority.append(y_test[item])
#print(trainY) #avgmlp = [] trainData = [data[j] for j in range(int(len(data)*.9))] testData = [data[u] for u in range(int(len(data)*.9),(int(len(data))))] trainText, trainY = [d[0] for d in trainData], [d[1] for d in trainData] #print(trainText) testText, testY = [d[0] for d in testData], [d[1] for d in testData] min_df = 1 max_features = 15000 countVec = CountVectorizer(ngram_range=(1,3), min_df = min_df, max_features = max_features) trainX = countVec.fit_transform(trainText) testX = countVec.transform(testText) mlp = MLPClassifier(hidden_layer_sizes=(100,), alpha = .0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5,max_iter=2000,shuffle=True, random_state=None, tol=0.0001, momentum=0.9) mlp.fit(trainX, trainY) score = mlp.score(testX,testY) #predictions = mlp.predict(testX) #confused_matrix = confusion_matrix(testY,predictions) print(score) #print(confused_matrix) # falpos = 0 # falneg = 0 # truepos = 0 # trueneg = 0 # testPredict = [] # accactual = [] # truepostotal = [] # truenegtotal = [] # falpostotal = [] # falnegtotal = []
verbose=showProgress, # Show weight update by loss, # The loss function for classification is Cross-Entropy activation='logistic', # Activation function for the hidden layer: returns f(x) = 1 / (1 + exp(-x)) tol=1e-5, # Tolerance for the optimization when loss decreases random_state=1, # seed for random values learning_rate_init=.001, # learning rate for weight update learning_rate='adaptive') # Adapts the learning rate when reaching loss improvement boundary mlp.fit(X_train, y_train) # Train # Show size of train and test sets print("\nTrain split: " + str(percent)) print("Train set size: %i" % len(y_train)) print("Test set size: %i" % len(y_test)) # score: Returns the mean accuracy on the given test data and labels print("\nMLP Classifier Training set score: %f" % mlp.score(X_train, y_train)) print("MLP Classifier Test set score: %f" % mlp.score(X_test, y_test)) # DECISION TREE SECTION STARTS dtc = tree.DecisionTreeClassifier() dtc.fit(X_train, y_train) # score: Returns the mean accuracy on the given test data and labels print("DTC Classifier Training set score: %f" % dtc.score(X_train, y_train)) print("DTC Classifier Test set score: %f" % dtc.score(X_test, y_test)) ############################################################################## # PREDICTING UNSEEN EARTHQUAKES # In order to verify them, we will get new samples that actually happened right after our last sample. # Our last sample happened on 2016-05-09 01:22:44.740. # So, our unseen data starts on 2016-05-09 01:22:44.741 # Source: http://earthquake.usgs.gov/earthquakes/search/
'SVM decision', 'MLP decision', 'RF decision', 'Boosting decision', 'Stacked LR', 'Stacked Decision Tree' ] classifiers = [svm, mlp, forest, boosting, stacked_lr, stacked_dt] for clf, ax, title in zip(classifiers, sub.flatten(), titles): plot_classifier_decision(ax, clf, X_test, mode='filled', alpha=0.4) plot_dataset(X_test, y_test, ax=ax) ax.set_xlim(np.min(X[:, 0]), np.max(X[:, 0])) ax.set_ylim(np.min(X[:, 1]), np.max(X[:, 1])) ax.set_title(title, fontsize=15) plt.show() plt.tight_layout() ############################################################################### # Evaluation on the test set # -------------------------- # # Finally, let's evaluate the baselines and the Dynamic Selection methods on # the test set: print('KNORAE score = {}'.format(knora_e.score(X_test, y_test))) print('DESP score = {}'.format(desp.score(X_test, y_test))) print('OLA score = {}'.format(ola.score(X_test, y_test))) print('Rank score = {}'.format(rank.score(X_test, y_test))) print('SVM score = {}'.format(svm.score(X_test, y_test))) print('MLP score = {}'.format(mlp.score(X_test, y_test))) print('RF score = {}'.format(forest.score(X_test, y_test))) print('Boosting score = {}'.format(boosting.score(X_test, y_test))) print('Stacking LR score = {}'.format(stacked_lr.score(X_test, y_test))) print('Staking Decision Tree = {}'.format(stacked_dt.score(X_test, y_test)))
# # the notes for this class can be found at: # https://deeplearningcourses.com/c/data-science-deep-learning-in-python # https://www.udemy.com/data-science-deep-learning-in-python from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import sys sys.path.append('../ann_logistic_extra') from process import get_data from sklearn.neural_network import MLPClassifier from sklearn.utils import shuffle # get the data Xtrain, Ytrain, Xtest, Ytest = get_data() # create the neural network model = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=2000) # train the neural network model.fit(Xtrain, Ytrain) # print the train and test accuracy train_accuracy = model.score(Xtrain, Ytrain) test_accuracy = model.score(Xtest, Ytest) print("train accuracy:", train_accuracy, "test accuracy:", test_accuracy)
print "Training Score Decision Tree: %s" % score_DT elapsed_time_g = time.time() - start_time_b print "Training Baseline Decision Tree took: %s" % str(elapsed_time_g) start_time_b = time.time() adb = AdaBoostClassifier() adb.fit(X, y) score_AD = adb.score(X, y) print "Training Score AdaBoost: %s" % score_AD elapsed_time_g = time.time() - start_time_b print "Training Baseline AdaBoost took: %s" % str(elapsed_time_g) start_time_b = time.time() mlp_g = MLPClassifier() mlp_g.fit(X_train, y_train) score_MLP = mlp_g.score(X_train, y_train) print "Training Score Neural Network: %s" % score_MLP elapsed_time_g = time.time() - start_time_b print "Training Baseline Neural Network took: %s" % str(elapsed_time_g) print "#" *150 print "Starting Random Forest Experiments..." # if isinstance(rfc, RandomForestClassifier): # print "test" if paramOptim: rfc = RandomForestClassifier(n_jobs=-1, max_features='sqrt', n_estimators=50, oob_score=True) param_grid = { 'n_estimators': [200, 700], 'max_features': ['auto', 'sqrt', 'log2'],
for i, row in enumerate (data): for j, attr in enumerate (row): train [i, j] = attr max_i = 40 fig, ax = plt.subplots (1, 1, figsize = (18, 10)) max_iter = 3000 for j, i in enumerate (np.linspace (0, max_i, 3)): X = train[int (i / 2 + i % 2):sample_nu - int(i / 2), 0:9] y = train[int (i / 2 + i % 2):sample_nu - int(i / 2), 9] X = MinMaxScaler().fit_transform(X) mlps = [] mlp = MLPClassifier(verbose=0, random_state=0, max_iter=max_iter) mlp.fit(X, y) mlps.append(mlp) print ("Training set score: %f" % mlp.score (X, y)) print ("Training set loss: %f" % mlp.loss_) ax.plot(mlp.loss_curve_, label = labels[j], **plot_args[j]) for l_n, swap_nu in enumerate ({5, 15, 40}): X = train[0:sample_nu, 0:9] y = train[0:sample_nu, 9] for i in range (1, swap_nu): ind1 = int (np.random.uniform(0, sample_nu - 1)) ind2 = int (np.random.uniform(0, sample_nu - 1)) j1 = int (np.random.uniform(1, 8)) j2 = int (np.random.uniform(1, 8)) tmp = X[ind1, j1] X[ind2, j2] = tmp X[ind1, j1] = X[ind2, j2] X = MinMaxScaler().fit_transform(X) mlp = MLPClassifier(verbose=0, random_state=0, max_iter=max_iter)
from GloVe_helper import GloVeLoader import pandas as pd import numpy as np from sklearn.neural_network import MLPClassifier import pickle if __name__ == '__main__': gl = GloVeLoader() df = pd.read_csv('IMDB_Dataset.csv') df = df.values x, y = df.T[0], df.T[1] y = np.array([0 if i == 'negative' else 1 for i in y]) x = np.array([gl.pull_glove_embed(i) for i in x]) print("starting training") clf = MLPClassifier(random_state=1, max_iter=500, verbose=True).fit(x, y) clf.score(x, y) pickle.dump(clf, open('sentiment_model.sav', 'wb'))
cancer = load_breast_cancer() ''' TRAIN - TEST SPLIT ''' X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target, random_state=0) ''' MLP CLASSIFIER 1000 EPOCHS ( 200 MAX-ITER CONVERGENCE ERROR ) NO SCALING OF DATA ''' mlp = MLPClassifier(max_iter=1000, random_state=42) mlp.fit(X_train, y_train) print("Training Accuracy (before scaling) : ", mlp.score(X_train, y_train)) print("Test Accuracy (before scaling) : ", mlp.score(X_test, y_test)) ''' ACCURACY ON TRAINING AND TEST DATASETS IS NOT GOOD ENOUGH WHICH MAY BE DUE TO SCALING OF DATA. FEATURES OF EACH SAMPLE OF DATASET ARE NOT AT SAME SCALE. ''' scaler = StandardScaler() X_train_scaled = scaler.fit(X_train).transform(X_train) X_test_scaled = scaler.fit(X_test).transform(X_test) ''' ACCURACIES AFTER SCALING OF DATA. PERFORMANCE IS MUCH BETTER WITH SCALED PARAMETERS ''' mlp = MLPClassifier(max_iter=1000, random_state=42) mlp.fit(X_train_scaled, y_train) print("Training Accuracy (after scaling) : ",
fpr, tpr, _ = roc_curve(Y_test, y_score) roc_auc_SVM = auc(fpr, tpr) plt.plot(fpr, tpr, color='navy', lw=lw) plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.legend([ 'ROC Curve for MLP (area = %0.3f)' % roc_auc_MLP, 'ROC Curve for SVM (area = %0.3f)' % roc_auc_SVM, 'Standard Curve' ], loc="lower right") # Output the metrics of the models print("Neural Network Model Test Set Metrics:") print("\tCross Val Acc:\t%f" % mlp_score) print("\tAccuracy:\t%f" % mlp.score(norm_X_test, Y_test)) print("\tPrecision:\t%f" % mlp_precision) print("\tRecall:\t\t%f" % mlp_recall) print("\tF1:\t\t%f" % mlp_f1) print("") print("Support Vector Machine Model Test Set Metrics:") print("\tCross Val Acc:\t%f" % svm_score) print("\tAccuracy:\t%f" % svm.score(norm_X_test, Y_test)) print("\tPrecision:\t%f" % svm_precision) print("\tRecall:\t\t%f" % svm_recall) print("\tF1:\t\t%f" % svm_f1) # Shows the ROC Curve plots that were made earlier plt.show() ################################################################################
"""Neuronal Network with scikit """ #""" neuralstart = timeit.default_timer() scaler = StandardScaler() X1_train_std = scaler.fit_transform(X1_train) X1_test_std = scaler.fit_transform(X1_test) neuralloop = timeit.default_timer() neural_fit = MLPClassifier(activation = "logistic", learning_rate = "adaptive", random_state = 0).fit(X1_train_std, y1_train) neural_score = neural_fit.score(X1_test_std, y1_test) neuralend = timeit.default_timer() print("Time for Neural network: ", neuralend-neuralloop) print(neural_score) neuralfinish = timeit.default_timer() print("The time used is ", neuralfinish-neuralstart) print("The maximal score is ") print(np.max(saver)) #""" # In[11]: KNN
max_iter=1000, activation='relu') random_forest_clf = RandomForestClassifier( criterion='entropy', min_samples_split=7, min_samples_leaf=2, n_estimators=14) neural_network_sum = 0 random_forest_sum = 0 neural_network_wins = 0 for i in range(10): neural_network_clf.fit(x_train, y_train) neural_network_score = neural_network_clf.score(x_val, y_val) random_forest_clf.fit(x_train, y_train) random_forest_score = random_forest_clf.score(x_val, y_val) print("random forest: ", random_forest_score) print("neural network: ", neural_network_score) if (neural_network_score > random_forest_score): neural_network_wins += 1 neural_network_sum += neural_network_score random_forest_sum += random_forest_score random_forest_avg = random_forest_sum / 10 neural_network_avg = neural_network_sum / 10 print("random forest avg: ", random_forest_avg) print("neural network avg: ", neural_network_avg) print("neural network wins: ", neural_network_wins)
scaler = StandardScaler().fit(X) # for later scaling of test data X_std = StandardScaler().fit_transform(X) # split the training set mask_nn = [True,]*int(0.8*X_std.shape[0]) mask_nn.extend([False,]*(X_std.shape[0] - int(0.8*X_std.shape[0]))) mask_nn = np.random.permutation(mask_nn) X_train, Y_train = X_std[mask_nn], Y[mask_nn] X_xv, Y_xv = X_std[~mask_nn], Y[~mask_nn] mlp = MLPClassifier(hidden_layer_sizes=(50), activation='logistic', max_iter=100, alpha=NN_para[count], solver='lbfgs', verbose=False, tol=1e-4, random_state=1, learning_rate_init=.1) mlp.fit(X_train, Y_train) print("Training Accurancy : {:<10}".format(mlp.score(X_train, Y_train))) print('x-validation Accurancy: {:<10}'.format(mlp.score(X_xv, Y_xv))) NN_scores.append(mlp.score(X_xv, Y_xv)) print('Time spent for NN: {:6.3f}s'.format(time.time() - start)) print('The logloss is: {}'.format(logloss(X_xv['result'], mlp.predict_proba(X_xv[features])[:, 1]))) import matplotlib.pylab as plt plt.figure() plt.subplot(2,1,1) plt.title('Scores of Random Forest') if RF: plt.plot(RF_para, RF_scores, 'o-') plt.subplot(2,1,2) plt.title('Scores of Neural Network') if NN:
for idx,val in enumerate(cm_x0[choice_src]): x0[choice_des,idx] = val if DEBUG: print(x0) print(x1) ########### ## Build training set for the model nn = MLPClassifier(solver='sgd',alpha=1e-5,activation='logistic',hidden_layer_sizes=(100,50,10,1),random_state=1,max_iter=10000) X = np.concatenate((x0,x1),axis=0) Y = np.concatenate((y0,y1),axis=0) Y = Y.ravel() print(X) print(Y) nn.fit(X,Y) validate_X = validate_x1.copy() validate_Y = validate_y1.copy() validate_X.resize((validate_X.shape[0]+validate_x0.shape[0],validate_x0.shape[1])) validate_Y.resize((validate_Y.shape[0]+validate_y0.shape[0],validate_y0.shape[1])) for idx in range(validate_X.shape[0],validate_X.shape[0]): for idcol in range(0,validate_X.shape[1]): validate_X[idx,idcol] = validate_x0[idx-validate_X.shape[0],idcol] validate_Y[idx,0] = validate_y0[idx-validate_X.shape[0],0] y_valid = nn.predict(validate_X) print(" accuracy => ",accuracy_score(y_valid.ravel(),validate_Y.ravel())) score = nn.score( validate_X, validate_Y.ravel() ) print("Score => ") print(score)
# In[84]: ss = StandardScaler() x_train = pd.DataFrame(ss.fit_transform(x_train)) test_feat = pd.DataFrame(ss.fit_transform(test_feat)) # In[85]: ann = MLPClassifier(solver='lbfgs', alpha=0.000000001, hidden_layer_sizes=(20, ), random_state=1) ann.fit(x_train, y_train) # 模型效果获取 r = ann.score(x_train, y_train) print("R值(准确率):", r) # In[86]: y_submission = ann.predict_proba(test_feat)[:, 0] pred = {'id': test_id, 'prob': y_submission} pred = pd.DataFrame.from_dict(pred) pred = pd.concat([pred, test_feat], axis=1) pred.sort_values(by='prob', ascending=[0], inplace=True) pred = pred.reset_index(drop=True) pred1 = pred.iloc[:20000] right = len(pred1[pred1.id.isin(df_9982.id.unique())]) right = len(pred1[pred1.id.isin(df_9982.id.unique())]) wrong = 20000 - right
def neuralNetworkHiddenLayerNumberAnalysis(): import pydotplus a,b,c,d,e,f = traing_test_data_set(); for feature_number in range(1, 2): print("Feature Number : " + str(feature_number)); train_data, train_label = a[feature_number - 1], b[feature_number - 1]; test_data, test_label = c[feature_number - 1], d[feature_number - 1]; validation_data,validation_label = e[feature_number-1],f[feature_number-1]; print("Train data set size : " + str(len(train_data))); print("Test set size : " + str(len(test_label))); print("Validation set size : " + str(len(validation_label))); print("------------------------------------------------"); alphalist = [.00001, .00003, .0001, .0003, .001, .003, .01, .03, 1, 10] from sklearn.neural_network import MLPClassifier for new_alpha in alphalist: hiddenLayerAnalysisResult = "Number of node in 1st hidden layer,train score,validation score,test score,iterations\n"; for hiddenNode in range(5,400,5): clf = MLPClassifier(alpha=new_alpha,tol = 1e-5, hidden_layer_sizes=(hiddenNode,), random_state=1, activation='logistic', max_iter=1000); clf.fit(train_data, train_label) tot = len(test_label); cnt = 0; prediction = clf.predict(test_data); for i in range(0, len(test_data)): if prediction[i] != test_label[i]: #print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i])); cnt += 1; from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score from sklearn.metrics import f1_score print("Number of node in first hidden layer :" + str(hiddenNode)); print("Train Score : " + str(clf.score(train_data, train_label))); train_score = str(clf.score(train_data, train_label)); test_score = str(accuracy_score(test_label, prediction) * 100.0); print("On test set"); print("Correct prediction : " + str(tot - cnt)); print("Incorrect Prediction : " + str(cnt)); print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0)) print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0)) print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0)) print("Error Rate : " + str(cnt / tot * 100.0)); print("---------------------------------------\n"); tot = len(test_label); cnt = 0; prediction = clf.predict(validation_data); for i in range(0, len(validation_data)): if clf.predict([validation_data[i]])[0] != validation_label[i]: #print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i])); cnt += 1; print("On validation set"); validation_score = str(accuracy_score(validation_label, prediction) * 100.0); print("Correct prediction : " + str(tot - cnt)); print("Incorrect Prediction : " + str(cnt)); print("Accuracy : " + str(accuracy_score(validation_label, prediction) * 100.0)) print("Precision : " + str(precision_score(validation_label, prediction, average='weighted') * 100.0)) print("F1 Score : " + str(f1_score(validation_label, prediction, average='weighted') * 100.0)) print("Error Rate : " + str(cnt / tot * 100.0)); print("---------------------------------------\n"); hiddenLayerAnalysisResult+= str(hiddenNode)+","+train_score+","+validation_score+","+test_score+","+str(clf.n_iter_)+"\n"; file_name = "hiddenLayerAnalysisResult " + " With alpha = [ " + str(new_alpha) + "] .csv"; fw = open(file_name, "w", encoding="utf-8"); fw.write(hiddenLayerAnalysisResult); fw.close();
#print(X) #classle = LabelEncoder() y = df['do'] #print(X, y) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, test_size=0.3) #model = GaussianNB() # 60左右 model = MLPClassifier(hidden_layer_sizes=(13,13,13),max_iter=999) # ma20 67左右 #model = DTC() # #model = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') model = LogisticRegression(C=1000, random_state=0) # 太差 # 以下都有UndefinedMetricWarning #model = MultinomialNB() #model = SVC(kernel='linear', C=1.0, random_state=1) # ma20 76左右 #model = SVC(kernel='rbf', C=1.0, random_state=0, gamma=0.1) # ma20 67左右 model.fit(X_train, y_train) print(model.score(X_train, y_train)) predicted = model.predict(X_test) expected = y_test report = metrics.classification_report(predicted, expected) print(report) # 混淆矩阵 cm = metrics.confusion_matrix(predicted, expected) print(cm)
def variando_camadas_escondidas(): max_score = 0 max_score_neurons = 0 csv_name = "test_neuronios.csv" range_neuronios = list(range(100, 10, -10)) for i in range(5): mlps = [] corretudes = [] for neuronios in range_neuronios: mlp = MLPClassifier(hidden_layer_sizes=(neuronios, ), max_iter=1, alpha=1e-4, solver='adam', tol=1e-4, random_state=i) N_TRAIN_SAMPLES = X_train.shape[0] N_EPOCHS = 2000 N_BATCH = min(128, N_TRAIN_SAMPLES) N_CLASSES = np.unique(y_train) scores_train = [] scores_test = [] # EPOCH epoch = 0 print(i) while epoch < N_EPOCHS: # print('epoch: ', epoch) # SHUFFLING random_perm = np.random.permutation(X_train.shape[0]) mini_batch_index = 0 while True: # MINI-BATCH indices = random_perm[mini_batch_index:mini_batch_index + N_BATCH] mlp.partial_fit(X_train[indices], y_train[indices], classes=N_CLASSES) mini_batch_index += N_BATCH if mini_batch_index >= N_TRAIN_SAMPLES: break # SCORE TRAIN score_train = mlp.score(X_train, y_train) scores_train.append(score_train) # SCORE TEST score_test = mlp.score(X_test, y_test) scores_test.append(score_test) if mlp._no_improvement_count > mlp.n_iter_no_change: break epoch += 1 if score_test > max_score: max_score = score_test max_score_execution = neuronios mlps.append(mlp) # PREDICT y_predict = mlp.predict(X_test) mat_confusao = confusion_matrix(y_test, y_predict) # mat_confusao_list.append(mat_confusao) corretude = score_test * 100 corretudes.append(corretude) write_csv(csv_name, [["neuronios", neuronios]]) write_csv(csv_name, [acordes]) write_csv(csv_name, mat_confusao) write_csv(csv_name, [["Corretude", str(corretude)]]) # write_csv(csv_name, [["Relatorio classificador"]]) report = classification_report(y_test, y_predict, output_dict=True) df_report = pd.DataFrame(report).transpose() # with open(csv_name, 'a') as f: # df_report.to_csv(f) """ Plot train_test""" # mlps.append(mlp) # plt.figure() # plt.plot(scores_train, color='green', alpha=0.8, label='Treino') # plt.plot(scores_test, color='magenta', alpha=0.8, label='Teste') # plt.title("Acurácia ao longo das épocas", fontsize=14) # plt.xlabel('Épocas') # plt.legend(loc='upper left') # plt.show() # plt.savefig('acuracia_treino_teste_execucao{}_{}neuronios_escondidos'.format(i, neuronios)) # plt.figure() # plt.title("Função de perda ao longo das épocas", fontsize=14) # plt.xlabel('Épocas') # plt.plot(mlp.loss_curve_) # plt.show() # plt.savefig('funcao_perda_execucao{}_9attr'.format(i)) write_csv(csv_name, [[ "Tabela de acuracias - latex format", "execucao {}".format(str(i)) ]]) n_iter_list = [mlp.n_iter_ for mlp in mlps] tabela_acuracias = [] for j, qtd_neuronios in enumerate(range_neuronios): execucao = str(i) neuronio_info = str(qtd_neuronios) acuracia = str(round(corretudes[j], 2)) epocas = str(n_iter_list[j]) row = "\#{execucao} && {neuronios} && {epocas} && {acuracia}".format( execucao=execucao, neuronios=neuronio_info, epocas=epocas, acuracia=acuracia) tabela_acuracias.append([row]) write_csv(csv_name, tabela_acuracias)
X.append(list(tmp)) y.append(0) X = np.array(X) y = np.array(y) x_train_all, x_test, y_train_all, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split(x_train_all, y_train_all, stratify=y_train_all, test_size=0.2, random_state=42) scaler = StandardScaler() scaler.fit(x_train) x_train_scaled = scaler.transform(x_train) x_val_scaled = scaler.transform(x_val) # simple model mlp = MLPClassifier(hidden_layer_sizes=(100,), activation='logistic', \ solver='lbfgs', alpha=0.01, batch_size=32, \ max_iter=500) mlp.fit(x_train_scaled, y_train) print(mlp.score(x_val_scaled, y_val))
y_train = train_labels X_test = test_images.reshape(10000, 28 * 28) / 255 y_test = test_labels # 为了提高训练速度,我们只提取10%的样本进行演示 X_train_lite = X_train[0:5999, :] y_train_lite = y_train[0:5999] X_test_lite = X_test[0:999, :] y_test_lite = y_test[0:999] # TODO: 3.使用原始数据进行预测 # 导入多层感知机MLP神经网络 start = time.time() mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=[100, 100], activation='relu', alpha=1e-5, random_state=62, verbose=0) mlp.fit(X_train_lite, y_train_lite) score_ori_train = mlp.score(X_train_lite, y_train_lite) score_ori_test = mlp.score(X_test_lite, y_test_lite) print('训练结束,用时{:.2f}s.'.format(time.time() - start)) print('训练集得分: {:.4f}, 测试集得分: {:.4f}'.format( mlp.score(X_train_lite, y_train_lite), mlp.score(X_test_lite, y_test_lite)))
def run_mlp(X_train, X_test, y_train, y_test): clf = MLPClassifier(random_state=0, hidden_layer_sizes=(100,), solver='sgd', max_iter=700) clf.fit(X_train, y_train) return clf.score(X_test, y_test)
fileName = 'final_model.sav' #pickle.dump(ml, open(fileName, 'wb')) #ml = pickle.load(open(fileName, 'rb')) # --------------------- # 분류 예측 y_pred = ml.predict(x_test) print('예측값 : ', y_pred) print('실제값 : ', y_test) print('분류 정확도 : ', accuracy_score(y_test, y_pred)) # 0.9777777777 # confusion_matrix con_mat = pd.crosstab(y_test, y_pred, rownames=['예측값'], colnames=['관측값']) print(con_mat) print((con_mat[0][0] + con_mat[1][1] + con_mat[2][2]) / len(y_test)) # 0.9777777777 print(ml.score(x_test, y_test)) # 0.977777777 # 시각화 import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from matplotlib import font_manager, rc font_name = font_manager.FontProperties( fname="c:/Windows/Fonts/malgun.ttf").get_name() plt.rc('font', family=font_name) #그래프에서 한글깨짐 방지용 def plot_decision_region(X, y,
labels = np.unique(all_charts.chart).tolist() num_labels = len(labels) class_mapping = {label:idx for idx,label in enumerate(labels)} y = all_charts.chart.map(class_mapping) for i in range(input_feature_size): for j in range(hidden_nodes): # split into train and test X_train, X_test, y_train, y_test = train_test_split(X[1:input_feature_size(i)], y, test_size=0.3) # scale std_scaler = StandardScaler() X_train_std = std_scaler.fit_transform(X_train) X_test_std = std_scaler.transform(X_test) model = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(100, 50), activation='logistic', batch_size=10, learning_rate_init=0.01, learning_rate='constant') model.fit(X_train_std, y_train) # evaluate model train_acc = model.score(X_train_std, y_train) test_acc = model.score(X_test_std, y_test) print('Train accuracy: {}'.format(train_acc)) print('Test accuracy: {}'.format(test_acc))
plt.pcolormesh(xx, yy, Z_1, cmap=cmap_light) plt.scatter(train_x[:, 0], train_x[:, 1], c=train_y, cmap=cmap_bold) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.title("MLP classification Result with Regularization") plt.xlabel("Symestry") plt.ylabel("Density") plt.show() ''' #Question C clf =MLPClassifier(algorithm='l-bfgs',alpha=0,hidden_layer_sizes=(10,), early_stopping=True,validation_fraction=0.1,max_iter=500) clf.fit(train_x,train_y) x1_min, x1_max = train_x[:,0].min() - 0.05, train_x[:,0].max() + 0.05 x2_min, x2_max = train_x[:,1].min() - 0.05, train_x[:,1].max() + 0.05 xx, yy = np.meshgrid(np.arange(x1_min, x1_max,0.01),np.arange(x2_min,x2_max,0.01)) Z_1 = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z_1 = Z_1.reshape(xx.shape) print "The testing error is: ", 1-clf.score(test_x,test_y) plt.figure(1) plt.pcolormesh(xx, yy, Z_1, cmap=cmap_light) plt.scatter(train_x[:, 0], train_x[:, 1], c=train_y, cmap=cmap_bold) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.title("MLP classification Result with Early Stopping and Validation") plt.xlabel("Symestry") plt.ylabel("Density") plt.show()
# TODO: Look at docs and add additional params here to try to increase # accuracy after you go through the demo end-to-end. classifier = MLPClassifier(random_state=seed, shuffle=True, learning_rate="constant", max_iter=10000, warm_start=False, hidden_layer_sizes=(10000)) # This fit() function is how we train the classifier classifier.fit(train_X, train_y) # Now that we've finished training, get the test and train errors. print("==================== AFTER TRAINING ====================") train_error = classifier.score(train_X, train_y) print("Train Accuracy: {}".format(train_error)) test_error = classifier.score(test_X, test_y) print("Test Accuracy: {}".format(test_error)) # Allow the classifier to make predictions on the test set using only the # features, not the labels predicted = classifier.predict(test_X) # TODO: Look up what precision, recall, and f1-score are. # Explain why recall for 3 and 8 might be so low. print("Classification report for classifier %s:\n%s\n" % (classifier, metrics.classification_report(test_y, predicted))) # TODO: Try to interpret what this might be.
# =========================================== if clf is None: sys.exit('Neural network model not initialized') else: print(clf) print('Neural network structure') print([coef.shape for coef in clf.coefs_]) print('Number of iterations used: %f' % clf.n_iter_) raw_input('Program paused. Press enter to continue') # =================== Part 3: Visualize Weights =================== print('Visualizing Neural Network...') plt.figure() displayData(np.transpose(clf.coefs_[0])) raw_input('Program paused. Press enter to continue') # =================== Part 4: Implement Predict =================== print('Training Set Accuracy: %f' % clf.score(X, y.ravel())); raw_input('Program paused. Press enter to continue')
from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import sys sys.path.append('../ann_logistic_extra') from process import get_data from sklearn.neural_network import MLPClassifier from sklearn.utils import shuffle # get the data X, Y = get_data() # split into train and test X, Y = shuffle(X, Y) Ntrain = int(0.7 * len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # create the neural network model = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=2000) # train the neural network model.fit(Xtrain, Ytrain) # print the train and test accuracy train_accuracy = model.score(Xtrain, Ytrain) test_accuracy = model.score(Xtest, Ytest) print("train accuracy:", train_accuracy, "test accuracy:", test_accuracy)
#%% X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=4) from sklearn.neural_network import MLPClassifier mlp=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(10), learning_rate='constant', learning_rate_init=0.1, max_iter=300, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='sgd', tol=0.0001, validation_fraction=0.1, verbose=10, warm_start=False) mlp.fit(X_train,y_train) print("Training set score: %f" % mlp.score(X_train, y_train)) print("Test set score: %f" % mlp.score(X_test, y_test)) predictions = mlp.predict(X_test) #%% ''' confusion matrix ''' from sklearn.metrics import classification_report,confusion_matrix import csv print(confusion_matrix(y_test,predictions)) print(classification_report(y_test,predictions)) cm=confusion_matrix(y_test,predictions)
mlp = MLPClassifier(random_state=42) mlp.fit(X_train,y_train) # 훈련 세트 각 특성의 평균을 구한다 mean_on_train = X_train.mean(axis=0) # 훈련 세트 각 특성의 표준 편차를 계산한다 std_on_train = X_train.std(axis=0) # 데이터에서 평균을 빼고 표준편차로 나누면 # 평균 0, 표준 편차 1인 데이터로 변형된다 X_train_scaled = (X_train-mean_on_train) / std_on_train # (훈련 데이터의 평균과 표준 편차를 이용해해 같은 변환을 테스트 세트에도 합니다 X_test_scaled = (X_test-mean_on_train)/std_on_train mlp = MLPClassifier(random_state=0).fit(X_train_scaled,y_train) print("훈련 세트 정확도: {:.3f}".format(mlp.score(X_train_scaled,y_train))) print("테스트 세트 정확도: {:.3f}".format(mlp.score(X_test_scaled,y_test))) # 여기까지 하면 최대 반복횟수에 도달했다고 경고가 뜬다 따라서 max_iter을 증가시켜줘야한다 mlp = MLPClassifier(random_state=0,max_iter=1000).fit(X_train_scaled,y_train) print("훈련 세트 정확도: {:.3f}".format(mlp.score(X_train_scaled,y_train))) print("테스트 세트 정확도: {:.3f}".format(mlp.score(X_test_scaled,y_test))) # 일반화를 더 올리기 위해ㅔ alpha 매개변수를 1로 올리면 된다 mlp = MLPClassifier(random_state=0,max_iter=1000,alpha=1).fit(X_train_scaled,y_train) print("훈련 세트 정확도: {:.3f}".format(mlp.score(X_train_scaled,y_train)))