Example #1
0
def naviBayes(train_X, train_y, test_X, test_y):
	# print train_y
	# print test_y
	# model = tfMultyPerceptron(train_X, train_y, test_X, test_y)
	# model.run()
	time_start = time.time()
	model = MLPClassifier(hidden_layer_sizes=(128, 32, 32, 128), max_iter=100, early_stopping=False, learning_rate_init=0.001,
	                      verbose=True)
	# model = MultinomialNB()
	# model = BernoulliNB()
	# model = KNeighborsClassifier()
	# model = DecisionTreeClassifier(max_depth=20, min_samples_leaf=0.01)
	# model = LinearSVC(random_state=0)
	# model.fit(X, y)
	model.fit(train_X, train_y)
	# model_1.fit(train_X, train_y)
	# model_2.fit(train_X, train_y)
	# model_3.fit(train_X, train_y)
	# model_4.fit(train_X, train_y)
	# model_5.fit(train_X, train_y)
	# All_model = [model, model_1, model_2, model_3, model_4, model_5]

	# train_pre = predct_all(All_model, train_X, train_y)
	# test_pre = predct_all(All_model, test_X, test_y)
	time_end = time.time()
	print "perceptron training cost time:{}".format(time_end - time_start)
	# model = OneVsRestClassifier(SVC(kernel='linear'))
	# model.fit(train_X, train_y)
	# save
	with open(config.BTMData + 'BayesModel/BTM_perceptron.model', 'wb') as fp:
		cPickle.dump(model, fp)

	# load model
	# model = None
	# with open(config.BTMData + 'BayesModel/bayes_BTM.model', 'rb') as fp:
	# 	model = cPickle.load(fp)

	# print 'train data set size:', len(train_y)
	# result = metrics.accuracy_score(train_pre, train_y)
	# 返回各自文本的所被分配到的类索引
	# print"Predicting random boost train result: ", result
	# print 'train data set size:', len(train_y)
	# result = metrics.accuracy_score(test_pre, test_y)
	# 返回各自文本的所被分配到的类索引
	# print "Predicting random boost test result:", result


	print 'train data set size:', len(train_y)
	result = model.score(train_X, train_y)
	# 返回各自文本的所被分配到的类索引
	print"Predicting train result: ", result

	test_result = model.score(test_X, test_y)
	print "Predicting test set result: ", test_result

	top_train_result = model.predict_proba(train_X)
	print "top 3 predict train data accuracy rate: {}".format(cal_topThreeScore(model, top_train_result, train_y))

	top_test_result = model.predict_proba(test_X)
	print "top 3 predict test data accuracy rate: {}".format(cal_topThreeScore(model, top_test_result, test_y))
def neuralNetworkIteration():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    alphalist = [.00001,.00003,.0001,.0003,.001,.003,.01,.03,1,10]
    for feature_number in range(1, 2):

        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        validation_data,validation_label = e[feature_number-1],f[feature_number-1];
        for new_alpha in alphalist:
            iteration_output = "Iteration,Training Error,Validation Error\n";
            from sklearn.neural_network import MLPClassifier
            clf = MLPClassifier(alpha=new_alpha, hidden_layer_sizes=(200,), random_state=1, activation='logistic',
                                warm_start=True,max_iter=1);
            for iteration in range(1,500):
                clf.fit(train_data, train_label)
                prediction = clf.predict(validation_data);
                from sklearn.metrics import accuracy_score
                iteration_output+=str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0));
                iteration_output+="\n";
                print(str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0)))
            file_name = "For All Feature. Alpha = "+str(new_alpha)+" "+" Iteration data"+".csv";
            print(file_name);
            datafile = open(file_name,"w",encoding="utf-8");
            datafile.write(iteration_output);
            datafile.close();
def neuralNetworkIterationLogistic():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    for feature_number in range(1, 6):
        iteration_output = "Iteration,Training Error,Validation Error\n";
        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        validation_data,validation_label = e[feature_number-1],f[feature_number-1];
        from sklearn.neural_network import MLPClassifier
        clf = MLPClassifier(alpha=1, hidden_layer_sizes=(15,), random_state=1, activation='logistic',
                            warm_start=True,max_iter=1);
        for iteration in range(1,350):
            clf.fit(train_data, train_label)
            tot = len(validation_data);
            cnt = 0;
            prediction = clf.predict(validation_data);
            for i in range(0, len(validation_data)):
                if clf.predict([validation_data[i]])[0] != validation_label[i]:
                    # print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i]));
                    cnt += 1;
            from sklearn.metrics import accuracy_score
            from sklearn.metrics import precision_score
            from sklearn.metrics import f1_score
            iteration_output+=str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0));
            iteration_output+="\n";
            print(str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0)))
        file_name = "Feature No "+str(feature_number)+" Iteration data"+".csv";
        print(file_name);
        datafile = open(file_name,"w",encoding="utf-8");
        datafile.write(iteration_output);
        datafile.close();
Example #4
0
def neuralNetwork():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    for feature_number in range(1, 6):
        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        validation_data,validation_label = e[feature_number-1],f[feature_number-1];
        from sklearn.neural_network import MLPClassifier
        clf = MLPClassifier(solver='lbfgs', alpha=.003, hidden_layer_sizes=(10,), random_state=1, activation='relu')
        clf.fit(train_data, train_label)

        tot = len(test_label);
        cnt = 0;
        prediction = clf.predict(test_data);
        for i in range(0, len(test_data)):
            if clf.predict([test_data[i]])[0] != test_label[i]:
                # print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i]));
                cnt += 1;
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import precision_score
        from sklearn.metrics import f1_score
        print("Complete for Feature :" + str(feature_number));
        print("Train Score : " + str(clf.score(train_data, train_label)));
        print("Total test set size : " + str(len(test_label)));
        print("Correct prediction : " + str(tot - cnt));
        print("Incorrect Prediction : " + str(cnt));
        print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0))
        print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0))
        print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0))
        print("Error Rate : " + str(cnt / tot * 100.0));
        print("---------------------------------------\n");
def main():
    iris = datasets.load_iris()
    X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)

    classifier = MLPClassifier(max_iter=1000)
    classifier.fit(X_train, y_train)
    s = classifier.score(X_test, y_test)
    print(s)
Example #6
0
def test_multilabel_classification():
    # Test that multi-label classification works as expected.
    # test fit method
    X, y = make_multilabel_classification(n_samples=50, random_state=0,
                                          return_indicator=True)
    mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5,
                        max_iter=150, random_state=0, activation='logistic',
                        learning_rate_init=0.2)
    mlp.fit(X, y)
    assert_equal(mlp.score(X, y), 1)

    # test partial fit method
    mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150,
                        random_state=0, activation='logistic', alpha=1e-5,
                        learning_rate_init=0.2)
    for i in range(100):
        mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4])
    assert_greater(mlp.score(X, y), 0.9)
Example #7
0
def test_partial_fit_unseen_classes():
    # Non regression test for bug 6994
    # Tests for labeling errors in partial fit

    clf = MLPClassifier(random_state=0)
    clf.partial_fit([[1], [2], [3]], ["a", "b", "c"],
                    classes=["a", "b", "c", "d"])
    clf.partial_fit([[4]], ["d"])
    assert_greater(clf.score([[1], [2], [3], [4]], ["a", "b", "c", "d"]), 0)
def fit_and_score_ann(x_train, y_train, x_test, y_test, config):
    ann = MLPClassifier(solver=config.ann.solver,
                        max_iter=Configuration.ANN_MAX_ITERATIONS,
                        alpha=config.ann.alpha,
                        hidden_layer_sizes=(config.ann.hidden_neurons,),
                        learning_rate='adaptive')

    ann.fit(x_train, y_train)
    return ann.score(x_test, y_test)
def neuralNetwork():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    for feature_number in range(1, 2):
        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        validation_data,validation_label = e[feature_number-1],f[feature_number-1];
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler();
        scaler.fit(train_data);
        train_data = scaler.transform(train_data);
        test_data = scaler.transform(test_data);
        validation_data = scaler.transform(validation_data);
        from sklearn.neural_network import MLPClassifier
        clf = MLPClassifier(alpha=1, hidden_layer_sizes=(100,), random_state=1, activation='logistic', max_iter=1000);
        clf.fit(train_data, train_label)

        tot = len(test_label);
        cnt = 0;
        prediction = clf.predict(test_data);
        for i in range(0, len(test_data)):
            if prediction[i] != test_label[i]:
                print(str(i)+str(prediction[i])+" "+str(test_label[i]));
                cnt += 1;
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import precision_score
        from sklearn.metrics import f1_score
        print("Complete for Feature :" + str(feature_number));
        print("Train data set size : " + str(len(train_data)));
        print("Train Score : " + str(clf.score(train_data, train_label)));
        print("Total test set size : " + str(len(test_label)));
        print("Correct prediction : " + str(tot - cnt));
        print("Incorrect Prediction : " + str(cnt));
        print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0))
        print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0))
        print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0))
        print("Error Rate : " + str(cnt / tot * 100.0));
        print("---------------------------------------\n");

        tot = len(validation_label);
        cnt = 0;
        prediction = clf.predict(validation_data);
        for i in range(0, len(validation_label)):
            if prediction[i] != validation_label[i]:
                print(str(i)+str(prediction[i])+" "+str(validation_label[i]));
                cnt += 1;
        print("Total validation set size : " + str(len(validation_label)));
        print("Correct prediction : " + str(tot - cnt));
        print("Incorrect Prediction : " + str(cnt));
        print("Accuracy : " + str(accuracy_score(validation_label, prediction) * 100.0))
        print("Precision : " + str(precision_score(validation_label, prediction, average='weighted') * 100.0))
        print("F1 Score : " + str(f1_score(validation_label, prediction, average='weighted') * 100.0))
        print("Error Rate : " + str(cnt / tot * 100.0));
        print("---------------------------------------\n");
Example #10
0
def scikit_method(X,y,q_z,l_r,beta=0.9,k_f=10):
	
	train_acc=0
	test_acc=0

	clf = MLPClassifier(hidden_layer_sizes=(q_z,),activation='logistic',algorithm='sgd'
		,learning_rate_init=l_r,momentum=beta)
	  

	kf = KFold(len(X),k_f)
	for train, test in kf:
		clf.fit(X[train],y[train])
		train_acc = clf.score(X[train],y[train])
		test_acc = clf.score(X[test],y[test])

	print("Train accuracy for scikit method: %f"%train_acc)
	print("Test accuracy for scikit method: %f"%test_acc)


	return train_acc/k_f, test_acc/k_f
Example #11
0
 def test_bool_onehot(self):
     X = [x for x in itertools.combinations_with_replacement([True, False], 9)]
     y = [True if sum(a) == 1 else False for a in X]
     X_r = repeat_data(X)
     y_r = repeat_data(y)
     mlp = MLPClassifier(hidden_layer_sizes=(2), activation='logistic', max_iter=10000, alpha=1e-4,
                         algorithm='l-bfgs', verbose=False, tol=1e-4, random_state=1,
                         learning_rate_init=.1)
     mlp.fit(X_r, y_r)
     assert (mlp.score(X, y) > 0.9)
     for x in X:
         self.assertEqual(mlp.predict(x), (sum(x) == 1))
Example #12
0
def test_multilabel_classification():
    # Test that multi-label classification works as expected.
    # test fit method
    X, y = make_multilabel_classification(n_samples=50, random_state=0,
                                          return_indicator=True)
    mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5,
                        max_iter=150, random_state=0, activation='logistic',
                        learning_rate_init=0.2)
    mlp.fit(X, y)
    assert_greater(mlp.score(X, y), 0.97)

    # test partial fit method
    mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150,
                        random_state=0, activation='logistic', alpha=1e-5,
                        learning_rate_init=0.2)
    for i in range(100):
        mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4])
    assert_greater(mlp.score(X, y), 0.9)

    # Make sure early stopping still work now that spliting is stratified by
    # default (it is disabled for multilabel classification)
    mlp = MLPClassifier(early_stopping=True)
    mlp.fit(X, y).predict(X)
Example #13
0
    def test_ski_learn_mnist(self):

        mnist = fetch_mldata("MNIST original")
        # rescale the data, use the traditional train/test split
        X, y = mnist.data / 255., mnist.target
        X_train, X_test = X[:60000], X[60000:]
        y_train, y_test = y[:60000], y[60000:]

        mlp = MLPClassifier(hidden_layer_sizes=(50,), activation='logistic', max_iter=2, alpha=1e-4,
                            algorithm='sgd', verbose=False, tol=1e-4, random_state=1,
                            learning_rate_init=.1)

        mlp.fit(X_train, y_train)
        assert mlp.score(X_test, y_test) > 0.9
Example #14
0
def test_partial_fit_classification():
    # Test partial_fit on classification.
    # `partial_fit` should yield the same results as 'fit'for binary and
    # multi-class classification.
    for X, y in classification_datasets:
        X = X
        y = y
        mlp = MLPClassifier(algorithm='sgd', max_iter=100, random_state=1,
                            tol=0, alpha=1e-5, learning_rate_init=0.2)

        mlp.fit(X, y)
        pred1 = mlp.predict(X)
        mlp = MLPClassifier(algorithm='sgd', random_state=1, alpha=1e-5,
                            learning_rate_init=0.2)
        for i in range(100):
            mlp.partial_fit(X, y, classes=np.unique(y))
        pred2 = mlp.predict(X)
        assert_array_equal(pred1, pred2)
        assert_greater(mlp.score(X, y), 0.95)
Example #15
0
def test_lbfgs_classification():
    # Test lbfgs on classification.
    # It should achieve a score higher than 0.95 for the binary and multi-class
    # versions of the digits dataset.
    for X, y in classification_datasets:
        X_train = X[:150]
        y_train = y[:150]
        X_test = X[150:]

        expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind)

        for activation in ACTIVATION_TYPES:
            mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50,
                                max_iter=150, shuffle=True, random_state=1,
                                activation=activation)
            mlp.fit(X_train, y_train)
            y_predict = mlp.predict(X_test)
            assert_greater(mlp.score(X_train, y_train), 0.95)
            assert_equal((y_predict.shape[0], y_predict.dtype.kind),
                         expected_shape_dtype)
def plot_on_dataset(X, y, ax, name):
    # for each dataset, plot learning for each learning strategy
    print("\nlearning on dataset %s" % name)
    ax.set_title(name)
    X = MinMaxScaler().fit_transform(X)
    mlps = []
    if name == "digits":
        # digits is larger but converges fairly quickly
        max_iter = 15
    else:
        max_iter = 400

    for label, param in zip(labels, params):
        print("training: %s" % label)
        mlp = MLPClassifier(verbose=0, random_state=0,
                            max_iter=max_iter, **param)
        mlp.fit(X, y)
        mlps.append(mlp)
        print("Training set score: %f" % mlp.score(X, y))
        print("Training set loss: %f" % mlp.loss_)
    for mlp, label, args in zip(mlps, labels, plot_args):
            ax.plot(mlp.loss_curve_, label=label, **args)
Example #17
0
def main():
    from sklearn.neural_network import MLPClassifier
    from sklearn import preprocessing, model_selection
    from sklearn.datasets import fetch_mldata

    db_name = 'australian'

    data_set = fetch_mldata(db_name)
    data_set.data = preprocessing.scale(data_set.data)

    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        data_set.data, data_set.target, test_size=0.4)

    mlp = MLPClassifier(solver='sgd', alpha=1e-5,
                        hidden_layer_sizes=(2,), activation='logistic', learning_rate_init=0.5)

    mlp = mlp.fit(X_train, y_train)
    print("MLP Accuracy %0.3f " % mlp.score(X_test, y_test))

    mlp = MLPClassifier(solver='sgd', alpha=1e-5,
                        hidden_layer_sizes=(2,), activation='logistic', learning_rate_init=0.5)

    dbm = DBM(mlp).fit(X_train, y_train)
    print("DBM-MLP Accuracy %0.3f " % dbm.score(X_test, y_test))
Example #18
0
def rede_neural(X, y):
	print("Iniciando treinamento da Rede Neural")

	X2 = normalize(X)

	clf = MLPClassifier(hidden_layer_sizes=(100,50), activation='tanh', algorithm='adam', alpha=1e-5,
						learning_rate='constant',tol=1e-8,learning_rate_init=0.0002,
						early_stopping=True,validation_fraction=0.2)

	kf = KFold(len(y),n_folds=3)
	i = 0
	for train,test in kf:
		start = time.time()
		i = i + 1
		print("Treinamento",i)

		# dividindo dataset em treino e test
		#X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=1)
		X_train, X_test, y_train, y_test = X2[train], X2[test], y[train], y[test]
		
		# fit
		clf.fit(X_train, y_train)
		print("score:",clf.score(X_test, y_test),"(",(time.time()-start)/60.0,"minutos )")
	return clf
Example #19
0
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

scaler = StandardScaler()
scaler.fit(X_train, y_train)
X_train_std = scaler.transform(X_train)
X_test_std = scaler.transform(X_test)


clf = MLPClassifier(hidden_layer_sizes = (20,20,20), learning_rate_init = 0.0001, max_iter = 200000, momentum = 0.5)

# clf = DecisionTreeClassifier(criterion = 'gini', max_depth = 25, random_state = 42)

# clf = SVC(C=1.0000, gamma=0.10000, max_iter=100)

clf.fit(X_train_std, y_train)
print("Training Set Accuracy: {:.3f}".format(clf.score(X_train_std, y_train)))
print("Test Set Accuracy: {:.3f}".format(clf.score(X_test_std, y_test)))

predictions = clf.predict(X_test_std)

print("\nConfusion Matrix")
print(confusion_matrix(y_test, predictions)) 
print("\nAcurracy")
print(accuracy_score(y_test, predictions))

df_ = pd.DataFrame()
df_['y_test'] = y_test
df_['predictions'] = predictions
print(df_)
Example #20
0
# 为了提高训练速度,我们只提取10%的样本进行演示
X_train_lite = X_train[0:5999, :]
y_train_lite = y_train[0:5999]
X_test_lite = X_test[0:999, :]
y_test_lite = y_test[0:999]

# 设置主成分数量为2以便我们进行可视化
pca = PCA(n_components=0.9)
pca.fit(X_train_lite)
X_train_pca = pca.transform(X_train_lite)
X_test_pca = pca.transform(X_test_lite)
print(X_train_lite.shape, X_test_lite.shape)
print(X_train_pca.shape, X_test_pca.shape)

start = time.time()

mlp = MLPClassifier(solver='lbfgs',
                    hidden_layer_sizes=[100, 100],
                    activation='relu',
                    alpha=1e-5,
                    random_state=62,
                    verbose=2)
mlp.fit(X_train_pca, y_train_lite)

score_train = mlp.score(X_train_pca, y_train_lite)
score_test = mlp.score(X_test_pca, y_test_lite)

print('训练结束,用时{:.2f}s.'.format(time.time() - start))
print('训练集得分: {:.4f}, 测试集得分: {:.4f}'.format(score_train, score_test))
Example #21
0
                    validation_fraction=0.1,
                    batch_size=200,
                    verbose=True)
mlp.fit(X_train_scaled, y_train)

# Save model as a pickle
save_mlp = open("NN.pickle", "wb")
pickle.dump(mlp, save_mlp)
save_mlp.close()

predictions_test = mlp.predict(X_test)

from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, predictions_test))
print(classification_report(y_test, predictions_test))
print("Accuracy on training set: {:.3f}".format(mlp.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(mlp.score(X_test, y_test)))
print("Accuracy on entire data set: {:.3f}".format(mlp.score(X, y)))

# Compare strategy and market returns
y_pred = mlp.predict(X)
dataset['y_pred'] = np.NaN
dataset.iloc[(len(dataset) - len(y_pred)):, -1:] = y_pred
trade_dataset = dataset.dropna()

trade_dataset['Tomorrows Returns'] = 0.
trade_dataset['Tomorrows Returns'] = np.log(trade_dataset['Close'] /
                                            trade_dataset['Close'].shift(1))
trade_dataset['Tomorrows Returns'] = trade_dataset['Tomorrows Returns'].shift(
    -1)
                               oob_score=True)
Model.fit(train_feature, train_labels)
# oob採用未被選用的data來做validation
print('Base oob score :%.5f' % (Model.oob_score_))

# 使用其他模型
mlp = MLPClassifier(solver='sgd',
                    activation='relu',
                    alpha=1e-4,
                    hidden_layer_sizes=(50, 50),
                    random_state=1,
                    max_iter=10,
                    verbose=10,
                    learning_rate_init=.1)
mlp.fit(train_feature, train_labels)
print(mlp.score(train_feature, train_labels))

# 處理測試資料
# fare有空值
test.loc[(test['Fare'].isnull()), 'Fare'] = test['Fare'].dropna().median()
# 選feature
test_feature = test[['Sex', 'Pclass']].copy()
# 新特徵
test['FamilySize'] = test['SibSp'] + test['Parch']
test.loc[test['FamilySize'] == 0, 'Family'] = 'alone'
test.loc[(test['FamilySize'] > 0) & (test['FamilySize'] <= 3),
         'Family'] = 'small'
test.loc[(test['FamilySize'] > 3) & (test['FamilySize'] <= 6),
         'Family'] = 'medium'
test.loc[test['FamilySize'] > 6, 'Family'] = 'large'
Example #23
0
model.score(X_test, y_test)


# In[54]:

p_lm = pd.Series(predicted_logit)
p_lm.columns = ['p_lm']
print(p_lm)


# In[60]:

predicted_MLP = mlp.predict(X_test)
print (predicted_MLP)
mlp.score(X_test, y_test)
p_MLP = pd.Series(predicted_MLP)
p_MLP.columns = ['p_MLP']
print(p_MLP)


# In[56]:

predicted_SVC = SVC.predict(X_test)
print (predicted_SVC)
SVC.score(X_test, y_test)
p_SVC = pd.Series(predicted_SVC)
p_SVC.columns = ['p_SVC']
print(p_SVC)

Example #24
0
from data_read import load

# Load data
X, Y, P, Q = load()


clf = RandomForestClassifier(
	n_estimators=1000,random_state=0).fit(X, Y.values.ravel())
print ("Accuracy of Random Forest Classifier: "+str(clf.score(P,Q)))

clf2 = SVC(kernel='rbf',C=10,
	gamma=0.001,random_state=0).fit(X, Y.values.ravel())
print ("Accuracy of SVM: "+str(clf2.score(P,Q)))


clf3 = GradientBoostingClassifier(n_estimators=1000, learning_rate=1,
max_depth=10, random_state=0, min_samples_split=5).fit(X, Y.values.ravel())
print ("Accuracy of Gradient Boosting Classifier: "+str(clf3.score(P,Q)))

clf4 = GaussianNB().fit(X, Y.values.ravel())
print ("Accuracy of Gaussian Naive Bayes Classifier: "+str(clf4.score(P,Q)))


# algorithm, learning_rate_init, alpha, hidden_layer_sizes 
# and activation have impact
clf6 = MLPClassifier(algorithm='adam', alpha=0.01, max_iter=500,
	learning_rate='constant', hidden_layer_sizes=(400,), 
	random_state=0, learning_rate_init=1e-2,
	activation='logistic').fit(X, Y.values.ravel())
print ("Accuracy of Multi-layer Perceptron Classifier: "+str(clf6.score(P,Q)))
Example #25
0
for dataset in combine:
    dataset['Title'] = dataset['Title'].replace([
        'Lady', 'Countess', 'Capt', 'Col', 'Don', 'Dr', 'Major', 'Rev', 'Sir',
        'Jonkheer', 'Dona'
    ], 'Rare')

    dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')
    dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')
    dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')

title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
for dataset in combine:
    dataset['Title'] = dataset['Title'].map(title_mapping)
    dataset['Title'] = dataset['Title'].fillna(0)

train_df = train_df.drop(['Name', 'PassengerId'], axis=1)
test_df = test_df.drop(['Name'], axis=1)

train_df = train_df.drop(['Ticket', 'Cabin'], axis=1)
test_df = test_df.drop(['Ticket', 'Cabin'], axis=1)
combine = [train_df, test_df]

Y_train = train_df["Survived"]
X_train = train_df.drop("Survived", axis=1)
X_test = test_df.drop("PassengerId", axis=1).copy()

print(X_train.head())
clf.fit(X_train, Y_train)
Y_pred = clf.predict(X_test)
acc_log = round(clf.score(X_train, Y_train) * 100, 2)
print(acc_log)
Example #26
0
# coding: utf-8
#======================================
#  Breast cancer tumor classification
#  with sklearn MLPClassifier module
#   (c) Keishi Ishihara
#======================================
from __future__ import print_function

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from load_csv_data import load_data

X_train, X_test, y_train, y_test = load_data()

clf = MLPClassifier(hidden_layer_sizes=(30),
                    activation='relu',
                    alpha=1e-5,
                    learning_rate_init=0.001,
                    learning_rate='constant',
                    solver='sgd',
                    random_state=0,
                    verbose=True,
                    tol=1e-4,
                    max_iter=10000)

clf.fit(X_train, y_train)

print('Training set score: {}'.format(clf.score(X_train, y_train)))
print('Test set score: {}'.format(clf.score(X_test, y_test)))
Example #27
0
# an input layer (R^4),
# two hidden layers (R^5 and R^3)
# and an output layer (R^3).
mlp_multi_hidden_layer = MLPClassifier(hidden_layer_sizes=(5, 3),
                                       activation="tanh",
                                       solver="adam",
                                       max_iter=500,
                                       batch_size=10,
                                       verbose=True)

# Training both networks
mlp_single_hidden_layer.fit(X_train, y_train.ravel())
mlp_multi_hidden_layer.fit(X_train, y_train.ravel())
print("--------------------------------\n")
print("Result of training (Single hidden layer): %5.3f" %
      mlp_single_hidden_layer.score(X_train, y_train))
print("Result of training (Multiple hidden layers): %5.3f" %
      mlp_multi_hidden_layer.score(X_train, y_train))
print("--------------------------------\n")

# Evaluating the model using the test data
pred_single = mlp_single_hidden_layer.predict(X_test)
pred_multi = mlp_multi_hidden_layer.predict(X_test)

print("Confusion Matrix (Single hidden layer):")
print(confusion_matrix(y_test, pred_single))
print("\nClassification Report (Single hidden layer):")
print(classification_report(y_test, pred_single))
print("\nConfusion Matrix (Multiple hidden layers):")
print(confusion_matrix(y_test, pred_multi))
print("\nClassification Report (Multiple hidden layers):")
Example #28
0
    carbo_vc = np.array([1 for i in range(len(carbo_imgs))])
    bolog_f = np.concatenate([bolog_imgs, carbo_imgs], axis=0)
    np.random.seed(1)
    np.random.shuffle(bolog_f)
    carbo_f = np.concatenate([bolog_vc, carbo_vc], axis=0)
    np.random.seed(1)
    np.random.shuffle(carbo_f)
    return bolog_f, carbo_f


clf = svm.SVC(gamma=0.001, C=100.)
feature, target = load_data()
feature = feature.reshape((len(feature), -1))
X_train, X_test, y_train, y_test = train_test_split(feature,
                                                    target,
                                                    test_size=0.5,
                                                    random_state=0)

clf = MLPClassifier(solver="lbfgs",
                    random_state=0,
                    activation='relu',
                    hidden_layer_sizes=[100, 100],
                    alpha=0.0001)
clf.fit(X_train, y_train)
pred_x = np.array(X_test[:3])
print(pred_x)
print(clf.predict(pred_x))
print("predict:", clf.score(X_test, y_test))
filename = 'pasta_model.sav'
pickle.dump(clf, open(filename, 'wb'))
scaler.fit(X)
X = scaler.transform(X)

mlp = MLPClassifier(solver='sgd',
                    alpha=0.0001,
                    learning_rate='adaptive',
                    hidden_layer_sizes=(11, 30),
                    max_iter=300000,
                    activation='logistic',
                    verbose=True)
mlp.fit(X, y)
predictions_probabilities = mlp.predict_proba(
    [[53, 0, 49, 31, 17, 4, 18, 9, 7, 5, 7, 12, 19, 12, 8, 8, 4, 10, 15, 12]])
predictions = mlp.predict(
    [[53, 0, 49, 31, 17, 4, 18, 9, 7, 5, 7, 12, 19, 12, 8, 8, 4, 10, 15, 12]])
scoring = mlp.score(X, y, sample_weight=None)

# Probability of each class for the prediction input
print predictions_probabilities

# max(predictions_probabilities)
print predictions

# Score
print scoring

# print X
# print y
# #Frequency
# for x in xrange(0,10):
# 	print X[x]
Example #30
0
# mlp = MLPClassifier(hidden_layer_sizes=(90, 90, 90), verbose=True)

# as 0.9518095238095238
# mlp = MLPClassifier(hidden_layer_sizes=(783, 783, 783), verbose=True)

# as 0.9575238095238096
# mlp = MLPClassifier(hidden_layer_sizes=(783, 783, 783), verbose=True, solver='sgd')

mlp = MLPClassifier(verbose=True,
                    solver='adam',
                    activation='relu',
                    learning_rate='constant',
                    hidden_layer_sizes=(783, ))

mlp.fit(x_train, y_train)

predictions = mlp.predict(x_test)

print("\nConfusion matrix: ")
print(confusion_matrix(y_test, predictions))

print("\nClassification report: ")
print(classification_report(y_test, predictions))

print("\nAccuracy score: ")
print(accuracy_score(y_test, predictions))

print("Training set score: %f" % mlp.score(x_train, y_train))
print("Test set score: %f" % mlp.score(x_test, y_test))
Example #31
0
def do_machinea_leaning_stuff(train_X, train_Y, test_X, test_Y):
    returnValue = []
    test_predict_Y = []

    # de facut ceva cu acest rezultat
    #f_classif(X, y);

    #Algoritmi de clasificare
    rfc = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
    rfc.fit(train_X, train_Y)
    test_predict_Y = rfc.predict(test_X)
    returnValue.append({
        'name':
        "RandomForestClassifier",
        'score':
        rfc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    etc = ExtraTreesClassifier()
    etc.fit(train_X, train_Y)
    test_predict_Y = etc.predict(test_X)
    returnValue.append({
        'name':
        "ExtraTreesClassifier",
        'score':
        etc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    gpc = GaussianProcessClassifier(random_state=0)
    gpc.fit(train_X, train_Y)
    test_predict_Y = gpc.predict(test_X)
    # TODO : poate folosim si asta print(gpc.predict_proba(test_X))
    returnValue.append({
        'name':
        "GaussianProcessClassifier",
        'score':
        gpc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    pac = PassiveAggressiveClassifier(max_iter=1000, random_state=0, tol=1e-3)
    pac.fit(train_X, train_Y)
    test_predict_Y = pac.predict(test_X)
    returnValue.append({
        'name':
        "PassiveAggressiveClassifier",
        'score':
        pac.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    rc = RidgeClassifier()
    rc.fit(train_X, train_Y)
    test_predict_Y = rc.predict(test_X)
    returnValue.append({
        'name':
        "RidgeClassifier",
        'score':
        rc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    sgdc = SGDClassifier(max_iter=1000, tol=1e-3)
    sgdc.fit(train_X, train_Y)
    test_predict_Y = sgdc.predict(test_X)
    returnValue.append({
        'name':
        "SGDClassifier",
        'score':
        sgdc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    bnb = BernoulliNB()
    bnb.fit(train_X, train_Y)
    test_predict_Y = bnb.predict(test_X)
    returnValue.append({
        'name':
        "BernoulliNB",
        'score':
        bnb.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    knnc = KNeighborsClassifier(n_neighbors=3)
    knnc.fit(train_X, train_Y)
    test_predict_Y = knnc.predict(test_X)
    returnValue.append({
        'name':
        "KNeighborsClassifier",
        'score':
        knnc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    mlpc = MLPClassifier()
    mlpc.fit(train_X, train_Y)
    test_predict_Y = mlpc.predict(test_X)
    returnValue.append({
        'name':
        "MLPClassifier",
        'score':
        mlpc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    label_prop_model = LabelPropagation()
    rng = np.random.RandomState(42)
    random_unlabeled_points = rng.rand(len(train_Y)) < 0.3
    labels = np.copy(train_Y)
    labels[random_unlabeled_points] = -1
    label_prop_model.fit(train_X, labels)
    test_predict_Y = label_prop_model.predict(test_X)
    returnValue.append({
        'name':
        "LabelPropagation",
        'score':
        label_prop_model.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    lsvc = LinearSVC(random_state=0, tol=1e-5)
    lsvc.fit(train_X, train_Y)
    test_predict_Y = lsvc.predict(test_X)
    returnValue.append({
        'name':
        "LinearSVC",
        'score':
        label_prop_model.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    svc = SVC(gamma='auto')
    svc.fit(train_X, train_Y)
    test_predict_Y = svc.predict(test_X)
    returnValue.append({
        'name':
        "SVC",
        'score':
        svc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    dtc = DecisionTreeClassifier(random_state=0)
    dtc.fit(train_X, train_Y)
    test_predict_Y = dtc.predict(test_X)
    returnValue.append({
        'name':
        "DecisionTreeClassifier",
        'score':
        dtc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    cccv = CalibratedClassifierCV()
    cccv.fit(train_X, train_Y)
    test_predict_Y = cccv.predict(test_X)
    returnValue.append({
        'name':
        "CalibratedClassifierCV",
        'score':
        cccv.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    return returnValue
    #initialize the taret classifier and train it
    # clf = neighbors.KNeighborsClassifier(n_neighbors=3)
    #clf=SVC()
    #clf=GaussianProcessClassifier(1.0 * RBF(1.0))
    #clf = DecisionTreeClassifier(max_depth=5)
    clf = MLPClassifier(alpha=1)
    clf.fit(X_train, y_train)

    #Store the predicted values
    y_pred = clf.predict(X_test)

    #Calculate global accuracy
    accuracy = accuracy_score(y_test, y_pred)
    #accuracy = clf.score(X_test, y_test)
    accuracy = clf.score(X_test, y_test)

    minority_y_test_index = []

    minority_y_test_index1 = np.where(y_test == 1)
    total_indexes = np.where(y_test >= 0)
    minority_y_test_index1_list1 = minority_y_test_index1[0].tolist()

    minority_y_test_index = minority_y_test_index1_list1
    y_pred_minority = []
    y_test_minority = []

    majority_test_index = total_indexes

    for item in minority_y_test_index:
        y_test_minority.append(y_test[item])
#print(trainY)

#avgmlp = []
trainData = [data[j] for j in range(int(len(data)*.9))]
testData = [data[u] for u in range(int(len(data)*.9),(int(len(data))))]
trainText, trainY = [d[0] for d in trainData], [d[1] for d in trainData]
#print(trainText)
testText, testY = [d[0] for d in testData], [d[1] for d in testData]
min_df = 1
max_features = 15000
countVec = CountVectorizer(ngram_range=(1,3), min_df = min_df, max_features = max_features)
trainX = countVec.fit_transform(trainText)
testX = countVec.transform(testText)
mlp = MLPClassifier(hidden_layer_sizes=(100,), alpha = .0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5,max_iter=2000,shuffle=True, random_state=None, tol=0.0001, momentum=0.9)
mlp.fit(trainX, trainY)
score = mlp.score(testX,testY)
#predictions = mlp.predict(testX)
#confused_matrix = confusion_matrix(testY,predictions)

print(score)                                         
#print(confused_matrix)                                                                                                                            
    # falpos = 0
    # falneg = 0
    # truepos = 0
    # trueneg = 0
    # testPredict = []
    # accactual = []
    # truepostotal = []
    # truenegtotal = []
    # falpostotal = []
    # falnegtotal = []
Example #34
0
                    verbose=showProgress,  # Show weight update by loss,
                    # The loss function for classification is Cross-Entropy
                    activation='logistic',  # Activation function for the hidden layer: returns f(x) = 1 / (1 + exp(-x))
                    tol=1e-5,  # Tolerance for the optimization when loss decreases
                    random_state=1,  # seed for random values
                    learning_rate_init=.001,  # learning rate for weight update
                    learning_rate='adaptive')  # Adapts the learning rate when reaching loss improvement boundary
mlp.fit(X_train, y_train)  # Train

# Show size of train and test sets
print("\nTrain split: " + str(percent))
print("Train set size: %i" % len(y_train))
print("Test set size: %i" % len(y_test))

# score: Returns the mean accuracy on the given test data and labels
print("\nMLP Classifier Training set score: %f" % mlp.score(X_train, y_train))
print("MLP Classifier Test set score: %f" % mlp.score(X_test, y_test))

# DECISION TREE SECTION STARTS
dtc = tree.DecisionTreeClassifier()
dtc.fit(X_train, y_train)
# score: Returns the mean accuracy on the given test data and labels
print("DTC Classifier Training set score: %f" % dtc.score(X_train, y_train))
print("DTC Classifier Test set score: %f" % dtc.score(X_test, y_test))

##############################################################################
# PREDICTING UNSEEN EARTHQUAKES
# In order to verify them, we will get new samples that actually happened right after our last sample.
# Our last sample happened on 2016-05-09 01:22:44.740.
# So, our unseen data starts on 2016-05-09 01:22:44.741
# Source: http://earthquake.usgs.gov/earthquakes/search/
    'SVM decision', 'MLP decision', 'RF decision', 'Boosting decision',
    'Stacked LR', 'Stacked Decision Tree'
]
classifiers = [svm, mlp, forest, boosting, stacked_lr, stacked_dt]
for clf, ax, title in zip(classifiers, sub.flatten(), titles):
    plot_classifier_decision(ax, clf, X_test, mode='filled', alpha=0.4)
    plot_dataset(X_test, y_test, ax=ax)
    ax.set_xlim(np.min(X[:, 0]), np.max(X[:, 0]))
    ax.set_ylim(np.min(X[:, 1]), np.max(X[:, 1]))
    ax.set_title(title, fontsize=15)

plt.show()
plt.tight_layout()

###############################################################################
# Evaluation on the test set
# --------------------------
#
# Finally, let's evaluate the baselines and the Dynamic Selection methods on
# the test set:

print('KNORAE score = {}'.format(knora_e.score(X_test, y_test)))
print('DESP score = {}'.format(desp.score(X_test, y_test)))
print('OLA score = {}'.format(ola.score(X_test, y_test)))
print('Rank score = {}'.format(rank.score(X_test, y_test)))
print('SVM score = {}'.format(svm.score(X_test, y_test)))
print('MLP score = {}'.format(mlp.score(X_test, y_test)))
print('RF score = {}'.format(forest.score(X_test, y_test)))
print('Boosting score = {}'.format(boosting.score(X_test, y_test)))
print('Stacking LR score = {}'.format(stacked_lr.score(X_test, y_test)))
print('Staking Decision Tree = {}'.format(stacked_dt.score(X_test, y_test)))
#
# the notes for this class can be found at: 
# https://deeplearningcourses.com/c/data-science-deep-learning-in-python
# https://www.udemy.com/data-science-deep-learning-in-python
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future


import sys
sys.path.append('../ann_logistic_extra')
from process import get_data

from sklearn.neural_network import MLPClassifier
from sklearn.utils import shuffle

# get the data
Xtrain, Ytrain, Xtest, Ytest = get_data()

# create the neural network
model = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=2000)

# train the neural network
model.fit(Xtrain, Ytrain)

# print the train and test accuracy
train_accuracy = model.score(Xtrain, Ytrain)
test_accuracy = model.score(Xtest, Ytest)
print("train accuracy:", train_accuracy, "test accuracy:", test_accuracy)
Example #37
0
print "Training Score Decision Tree: %s" % score_DT
elapsed_time_g = time.time() - start_time_b
print "Training Baseline Decision Tree took: %s" % str(elapsed_time_g)

start_time_b = time.time()
adb = AdaBoostClassifier()
adb.fit(X, y)
score_AD = adb.score(X, y)
print "Training Score AdaBoost: %s" % score_AD
elapsed_time_g = time.time() - start_time_b
print "Training Baseline AdaBoost took: %s" % str(elapsed_time_g)

start_time_b = time.time()
mlp_g = MLPClassifier()
mlp_g.fit(X_train, y_train)
score_MLP = mlp_g.score(X_train, y_train)

print "Training Score Neural Network: %s" % score_MLP
elapsed_time_g = time.time() - start_time_b
print "Training Baseline Neural Network took: %s" % str(elapsed_time_g)

print "#" *150
print "Starting Random Forest Experiments..."

# if isinstance(rfc, RandomForestClassifier):
#     print "test"
if paramOptim:
    rfc = RandomForestClassifier(n_jobs=-1, max_features='sqrt', n_estimators=50, oob_score=True)
    param_grid = {
        'n_estimators': [200, 700],
        'max_features': ['auto', 'sqrt', 'log2'],
Example #38
0
    for i, row in enumerate (data):
        for j, attr in enumerate (row):
            train [i, j] = attr    
max_i = 40
fig, ax = plt.subplots (1, 1, figsize = (18, 10))
max_iter = 3000
for j, i in enumerate (np.linspace (0, max_i, 3)):
    X = train[int (i / 2 + i % 2):sample_nu - int(i / 2), 0:9]
    y = train[int (i / 2 + i % 2):sample_nu - int(i / 2), 9]

    X = MinMaxScaler().fit_transform(X)
    mlps = []
    mlp = MLPClassifier(verbose=0, random_state=0, max_iter=max_iter)
    mlp.fit(X, y)
    mlps.append(mlp)
    print ("Training set score: %f" % mlp.score (X, y))
    print ("Training set loss: %f" % mlp.loss_)
    ax.plot(mlp.loss_curve_, label = labels[j], **plot_args[j])
for l_n, swap_nu in enumerate ({5, 15, 40}):
    X = train[0:sample_nu, 0:9]
    y = train[0:sample_nu, 9]
    for i in range (1, swap_nu):
        ind1 = int (np.random.uniform(0, sample_nu - 1))
        ind2 = int (np.random.uniform(0, sample_nu - 1)) 
        j1 = int (np.random.uniform(1, 8)) 
        j2 = int (np.random.uniform(1, 8)) 
        tmp = X[ind1, j1]
        X[ind2, j2] = tmp
        X[ind1, j1] = X[ind2, j2]
    X = MinMaxScaler().fit_transform(X)
    mlp = MLPClassifier(verbose=0, random_state=0, max_iter=max_iter)
Example #39
0
from GloVe_helper import GloVeLoader
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
import pickle

if __name__ == '__main__':
    gl = GloVeLoader()
    df = pd.read_csv('IMDB_Dataset.csv')
    df = df.values

    x, y = df.T[0], df.T[1]
    y = np.array([0 if i == 'negative' else 1 for i in y])

    x = np.array([gl.pull_glove_embed(i) for i in x])
    print("starting training")
    clf = MLPClassifier(random_state=1, max_iter=500, verbose=True).fit(x, y)
    clf.score(x, y)
    pickle.dump(clf, open('sentiment_model.sav', 'wb'))
Example #40
0
cancer = load_breast_cancer()
'''
    TRAIN - TEST SPLIT
'''
X_train, X_test, y_train, y_test = train_test_split(cancer.data,
                                                    cancer.target,
                                                    stratify=cancer.target,
                                                    random_state=0)
'''
    MLP CLASSIFIER
    1000 EPOCHS ( 200 MAX-ITER CONVERGENCE ERROR )
    NO SCALING OF DATA
'''
mlp = MLPClassifier(max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)
print("Training Accuracy (before scaling) : ", mlp.score(X_train, y_train))
print("Test Accuracy (before scaling) : ", mlp.score(X_test, y_test))
'''
    ACCURACY ON TRAINING AND TEST DATASETS IS NOT GOOD ENOUGH WHICH MAY BE DUE TO SCALING OF DATA.
    FEATURES OF EACH SAMPLE OF DATASET ARE NOT AT SAME SCALE.
'''
scaler = StandardScaler()
X_train_scaled = scaler.fit(X_train).transform(X_train)
X_test_scaled = scaler.fit(X_test).transform(X_test)
'''
    ACCURACIES AFTER SCALING OF DATA.
    PERFORMANCE IS MUCH BETTER WITH SCALED PARAMETERS
'''
mlp = MLPClassifier(max_iter=1000, random_state=42)
mlp.fit(X_train_scaled, y_train)
print("Training Accuracy (after scaling) : ",
Example #41
0
fpr, tpr, _ = roc_curve(Y_test, y_score)
roc_auc_SVM = auc(fpr, tpr)
plt.plot(fpr, tpr, color='navy', lw=lw)
plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.legend([
    'ROC Curve for MLP (area = %0.3f)' % roc_auc_MLP,
    'ROC Curve for SVM (area = %0.3f)' % roc_auc_SVM, 'Standard Curve'
],
           loc="lower right")

# Output the metrics of the models
print("Neural Network Model Test Set Metrics:")
print("\tCross Val Acc:\t%f" % mlp_score)
print("\tAccuracy:\t%f" % mlp.score(norm_X_test, Y_test))
print("\tPrecision:\t%f" % mlp_precision)
print("\tRecall:\t\t%f" % mlp_recall)
print("\tF1:\t\t%f" % mlp_f1)
print("")
print("Support Vector Machine Model Test Set Metrics:")
print("\tCross Val Acc:\t%f" % svm_score)
print("\tAccuracy:\t%f" % svm.score(norm_X_test, Y_test))
print("\tPrecision:\t%f" % svm_precision)
print("\tRecall:\t\t%f" % svm_recall)
print("\tF1:\t\t%f" % svm_f1)

# Shows the ROC Curve plots that were made earlier
plt.show()

################################################################################
Example #42
0
"""Neuronal Network with scikit """

#"""
neuralstart = timeit.default_timer()

scaler = StandardScaler()
X1_train_std = scaler.fit_transform(X1_train)
X1_test_std = scaler.fit_transform(X1_test)


neuralloop = timeit.default_timer()

neural_fit = MLPClassifier(activation = "logistic", learning_rate = "adaptive", random_state = 0).fit(X1_train_std, y1_train)

neural_score = neural_fit.score(X1_test_std, y1_test)

neuralend = timeit.default_timer()

print("Time for Neural network: ", neuralend-neuralloop)
print(neural_score)

neuralfinish = timeit.default_timer()

print("The time used is ", neuralfinish-neuralstart)
print("The maximal score is ")
print(np.max(saver))
#"""

# In[11]: KNN
Example #43
0
                           max_iter=1000,
                           activation='relu')

    random_forest_clf = RandomForestClassifier(
                                                criterion='entropy',
                                                min_samples_split=7,
                                                min_samples_leaf=2,
                                                n_estimators=14)

    neural_network_sum = 0
    random_forest_sum = 0
    neural_network_wins = 0

    for i in range(10):
        neural_network_clf.fit(x_train, y_train)
        neural_network_score = neural_network_clf.score(x_val, y_val)
        random_forest_clf.fit(x_train, y_train)
        random_forest_score = random_forest_clf.score(x_val, y_val)
        print("random forest: ", random_forest_score)
        print("neural network: ", neural_network_score)
        if (neural_network_score > random_forest_score):
            neural_network_wins += 1
        neural_network_sum += neural_network_score
        random_forest_sum += random_forest_score

    random_forest_avg = random_forest_sum / 10
    neural_network_avg = neural_network_sum / 10

    print("random forest avg: ", random_forest_avg)
    print("neural network avg: ", neural_network_avg)
    print("neural network wins: ", neural_network_wins)
Example #44
0
    scaler = StandardScaler().fit(X)  # for later scaling of test data
    X_std = StandardScaler().fit_transform(X)
    
    # split the training set
    mask_nn = [True,]*int(0.8*X_std.shape[0])
    mask_nn.extend([False,]*(X_std.shape[0] - int(0.8*X_std.shape[0])))
    mask_nn = np.random.permutation(mask_nn)
    X_train, Y_train = X_std[mask_nn], Y[mask_nn] 
    X_xv, Y_xv = X_std[~mask_nn], Y[~mask_nn]
    
    mlp = MLPClassifier(hidden_layer_sizes=(50), activation='logistic',
                        max_iter=100, alpha=NN_para[count],
                        solver='lbfgs', verbose=False, tol=1e-4, random_state=1,
                        learning_rate_init=.1)
    mlp.fit(X_train, Y_train)
    print("Training Accurancy : {:<10}".format(mlp.score(X_train, Y_train)))
    print('x-validation Accurancy: {:<10}'.format(mlp.score(X_xv, Y_xv)))
    NN_scores.append(mlp.score(X_xv, Y_xv))
    print('Time spent for NN: {:6.3f}s'.format(time.time() - start))
    print('The logloss is: {}'.format(logloss(X_xv['result'], 
          mlp.predict_proba(X_xv[features])[:, 1])))
    
import matplotlib.pylab as plt
plt.figure()
plt.subplot(2,1,1)
plt.title('Scores of Random Forest')
if RF:
  plt.plot(RF_para, RF_scores, 'o-')
plt.subplot(2,1,2)
plt.title('Scores of Neural Network')
if NN:
    for idx,val in enumerate(cm_x0[choice_src]):
        x0[choice_des,idx] = val

if DEBUG:
    print(x0)
    print(x1)

###########
## Build training set for the model
nn = MLPClassifier(solver='sgd',alpha=1e-5,activation='logistic',hidden_layer_sizes=(100,50,10,1),random_state=1,max_iter=10000)
X = np.concatenate((x0,x1),axis=0)
Y = np.concatenate((y0,y1),axis=0)
Y = Y.ravel()
print(X)
print(Y)
nn.fit(X,Y)
validate_X = validate_x1.copy()
validate_Y = validate_y1.copy()
validate_X.resize((validate_X.shape[0]+validate_x0.shape[0],validate_x0.shape[1]))
validate_Y.resize((validate_Y.shape[0]+validate_y0.shape[0],validate_y0.shape[1]))
for idx in range(validate_X.shape[0],validate_X.shape[0]):
    for idcol in range(0,validate_X.shape[1]):
        validate_X[idx,idcol] = validate_x0[idx-validate_X.shape[0],idcol]
    validate_Y[idx,0] = validate_y0[idx-validate_X.shape[0],0]
    
y_valid = nn.predict(validate_X)
print(" accuracy => ",accuracy_score(y_valid.ravel(),validate_Y.ravel()))
score = nn.score( validate_X, validate_Y.ravel() )
print("Score => ")
print(score)
Example #46
0
# In[84]:

ss = StandardScaler()
x_train = pd.DataFrame(ss.fit_transform(x_train))
test_feat = pd.DataFrame(ss.fit_transform(test_feat))

# In[85]:

ann = MLPClassifier(solver='lbfgs',
                    alpha=0.000000001,
                    hidden_layer_sizes=(20, ),
                    random_state=1)
ann.fit(x_train, y_train)
# 模型效果获取
r = ann.score(x_train, y_train)
print("R值(准确率):", r)

# In[86]:

y_submission = ann.predict_proba(test_feat)[:, 0]
pred = {'id': test_id, 'prob': y_submission}
pred = pd.DataFrame.from_dict(pred)
pred = pd.concat([pred, test_feat], axis=1)
pred.sort_values(by='prob', ascending=[0], inplace=True)
pred = pred.reset_index(drop=True)

pred1 = pred.iloc[:20000]
right = len(pred1[pred1.id.isin(df_9982.id.unique())])
right = len(pred1[pred1.id.isin(df_9982.id.unique())])
wrong = 20000 - right
def neuralNetworkHiddenLayerNumberAnalysis():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    for feature_number in range(1, 2):
        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        validation_data,validation_label = e[feature_number-1],f[feature_number-1];
        print("Train data set size : " + str(len(train_data)));
        print("Test set size : " + str(len(test_label)));
        print("Validation set size : " + str(len(validation_label)));
        print("------------------------------------------------");
        alphalist = [.00001, .00003, .0001, .0003, .001, .003, .01, .03, 1, 10]
        from sklearn.neural_network import MLPClassifier
        for new_alpha in alphalist:
            hiddenLayerAnalysisResult = "Number of node in 1st hidden layer,train score,validation score,test score,iterations\n";
            for hiddenNode in range(5,400,5):
                clf = MLPClassifier(alpha=new_alpha,tol = 1e-5, hidden_layer_sizes=(hiddenNode,), random_state=1, activation='logistic', max_iter=1000);
                clf.fit(train_data, train_label)

                tot = len(test_label);
                cnt = 0;
                prediction = clf.predict(test_data);
                for i in range(0, len(test_data)):
                    if prediction[i] != test_label[i]:
                        #print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i]));
                        cnt += 1;
                from sklearn.metrics import accuracy_score
                from sklearn.metrics import precision_score
                from sklearn.metrics import f1_score

                print("Number of node in first hidden layer :" + str(hiddenNode));
                print("Train Score : " + str(clf.score(train_data, train_label)));
                train_score = str(clf.score(train_data, train_label));
                test_score = str(accuracy_score(test_label, prediction) * 100.0);
                print("On test set");
                print("Correct prediction : " + str(tot - cnt));
                print("Incorrect Prediction : " + str(cnt));
                print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0))
                print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0))
                print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0))
                print("Error Rate : " + str(cnt / tot * 100.0));
                print("---------------------------------------\n");
                tot = len(test_label);
                cnt = 0;
                prediction = clf.predict(validation_data);

                for i in range(0, len(validation_data)):
                    if clf.predict([validation_data[i]])[0] != validation_label[i]:
                        #print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i]));
                        cnt += 1;
                print("On validation set");
                validation_score = str(accuracy_score(validation_label, prediction) * 100.0);
                print("Correct prediction : " + str(tot - cnt));
                print("Incorrect Prediction : " + str(cnt));
                print("Accuracy : " + str(accuracy_score(validation_label, prediction) * 100.0))
                print("Precision : " + str(precision_score(validation_label, prediction, average='weighted') * 100.0))
                print("F1 Score : " + str(f1_score(validation_label, prediction, average='weighted') * 100.0))
                print("Error Rate : " + str(cnt / tot * 100.0));
                print("---------------------------------------\n");
                hiddenLayerAnalysisResult+= str(hiddenNode)+","+train_score+","+validation_score+","+test_score+","+str(clf.n_iter_)+"\n";

            file_name = "hiddenLayerAnalysisResult " + " With alpha = [ " + str(new_alpha) + "] .csv";
            fw = open(file_name, "w", encoding="utf-8");
            fw.write(hiddenLayerAnalysisResult);
            fw.close();
Example #48
0
File: ma.py Project: wzgdavid/ana
#print(X)
#classle = LabelEncoder()
y = df['do']

#print(X, y)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, test_size=0.3)

#model = GaussianNB() # 60左右
model = MLPClassifier(hidden_layer_sizes=(13,13,13),max_iter=999)  # ma20 67左右
#model = DTC() #
#model = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
model = LogisticRegression(C=1000, random_state=0)                 # 太差
# 以下都有UndefinedMetricWarning
#model = MultinomialNB() 
#model = SVC(kernel='linear', C=1.0, random_state=1) # ma20  76左右
#model = SVC(kernel='rbf', C=1.0, random_state=0, gamma=0.1) # ma20 67左右

model.fit(X_train, y_train)

print(model.score(X_train, y_train))
predicted = model.predict(X_test)
expected = y_test
report = metrics.classification_report(predicted, expected)
print(report)


# 混淆矩阵
cm = metrics.confusion_matrix(predicted, expected)
print(cm)
Example #49
0
def variando_camadas_escondidas():
    max_score = 0
    max_score_neurons = 0
    csv_name = "test_neuronios.csv"
    range_neuronios = list(range(100, 10, -10))
    for i in range(5):
        mlps = []
        corretudes = []
        for neuronios in range_neuronios:
            mlp = MLPClassifier(hidden_layer_sizes=(neuronios, ),
                                max_iter=1,
                                alpha=1e-4,
                                solver='adam',
                                tol=1e-4,
                                random_state=i)
            N_TRAIN_SAMPLES = X_train.shape[0]
            N_EPOCHS = 2000
            N_BATCH = min(128, N_TRAIN_SAMPLES)
            N_CLASSES = np.unique(y_train)

            scores_train = []
            scores_test = []

            # EPOCH
            epoch = 0
            print(i)
            while epoch < N_EPOCHS:
                # print('epoch: ', epoch)
                # SHUFFLING
                random_perm = np.random.permutation(X_train.shape[0])
                mini_batch_index = 0
                while True:
                    # MINI-BATCH
                    indices = random_perm[mini_batch_index:mini_batch_index +
                                          N_BATCH]
                    mlp.partial_fit(X_train[indices],
                                    y_train[indices],
                                    classes=N_CLASSES)
                    mini_batch_index += N_BATCH

                    if mini_batch_index >= N_TRAIN_SAMPLES:
                        break

                # SCORE TRAIN
                score_train = mlp.score(X_train, y_train)
                scores_train.append(score_train)

                # SCORE TEST
                score_test = mlp.score(X_test, y_test)
                scores_test.append(score_test)

                if mlp._no_improvement_count > mlp.n_iter_no_change:
                    break

                epoch += 1
            if score_test > max_score:
                max_score = score_test
                max_score_execution = neuronios

            mlps.append(mlp)

            # PREDICT
            y_predict = mlp.predict(X_test)
            mat_confusao = confusion_matrix(y_test, y_predict)
            # mat_confusao_list.append(mat_confusao)

            corretude = score_test * 100
            corretudes.append(corretude)
            write_csv(csv_name, [["neuronios", neuronios]])
            write_csv(csv_name, [acordes])
            write_csv(csv_name, mat_confusao)
            write_csv(csv_name, [["Corretude", str(corretude)]])
            # write_csv(csv_name, [["Relatorio classificador"]])

            report = classification_report(y_test, y_predict, output_dict=True)

            df_report = pd.DataFrame(report).transpose()
            # with open(csv_name, 'a') as f:
            #     df_report.to_csv(f)
            """ Plot train_test"""
            # mlps.append(mlp)
            # plt.figure()
            # plt.plot(scores_train, color='green', alpha=0.8, label='Treino')
            # plt.plot(scores_test, color='magenta', alpha=0.8, label='Teste')
            # plt.title("Acurácia ao longo das épocas", fontsize=14)
            # plt.xlabel('Épocas')
            # plt.legend(loc='upper left')
            # plt.show()
            # plt.savefig('acuracia_treino_teste_execucao{}_{}neuronios_escondidos'.format(i, neuronios))
            # plt.figure()
            # plt.title("Função de perda ao longo das épocas", fontsize=14)
            # plt.xlabel('Épocas')
            # plt.plot(mlp.loss_curve_)
            # plt.show()
            # plt.savefig('funcao_perda_execucao{}_9attr'.format(i))
        write_csv(csv_name, [[
            "Tabela de acuracias - latex format", "execucao {}".format(str(i))
        ]])
        n_iter_list = [mlp.n_iter_ for mlp in mlps]

        tabela_acuracias = []

        for j, qtd_neuronios in enumerate(range_neuronios):
            execucao = str(i)
            neuronio_info = str(qtd_neuronios)
            acuracia = str(round(corretudes[j], 2))
            epocas = str(n_iter_list[j])
            row = "\#{execucao} && {neuronios} && {epocas} && {acuracia}".format(
                execucao=execucao,
                neuronios=neuronio_info,
                epocas=epocas,
                acuracia=acuracia)
            tabela_acuracias.append([row])
        write_csv(csv_name, tabela_acuracias)
Example #50
0
    X.append(list(tmp))
    y.append(0)

X = np.array(X)
y = np.array(y)

x_train_all, x_test, y_train_all, y_test = train_test_split(X,
                                                            y,
                                                            stratify=y,
                                                            test_size=0.2,
                                                            random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train_all,
                                                  y_train_all,
                                                  stratify=y_train_all,
                                                  test_size=0.2,
                                                  random_state=42)

scaler = StandardScaler()
scaler.fit(x_train)
x_train_scaled = scaler.transform(x_train)
x_val_scaled = scaler.transform(x_val)

# simple model
mlp = MLPClassifier(hidden_layer_sizes=(100,), activation='logistic', \
                    solver='lbfgs', alpha=0.01, batch_size=32, \
                    max_iter=500)

mlp.fit(x_train_scaled, y_train)
print(mlp.score(x_val_scaled, y_val))
Example #51
0
y_train = train_labels
X_test = test_images.reshape(10000, 28 * 28) / 255
y_test = test_labels

# 为了提高训练速度,我们只提取10%的样本进行演示
X_train_lite = X_train[0:5999, :]
y_train_lite = y_train[0:5999]
X_test_lite = X_test[0:999, :]
y_test_lite = y_test[0:999]

# TODO: 3.使用原始数据进行预测
# 导入多层感知机MLP神经网络

start = time.time()

mlp = MLPClassifier(solver='lbfgs',
                    hidden_layer_sizes=[100, 100],
                    activation='relu',
                    alpha=1e-5,
                    random_state=62,
                    verbose=0)
mlp.fit(X_train_lite, y_train_lite)

score_ori_train = mlp.score(X_train_lite, y_train_lite)
score_ori_test = mlp.score(X_test_lite, y_test_lite)

print('训练结束,用时{:.2f}s.'.format(time.time() - start))
print('训练集得分: {:.4f}, 测试集得分: {:.4f}'.format(
    mlp.score(X_train_lite, y_train_lite), mlp.score(X_test_lite,
                                                     y_test_lite)))
Example #52
0
def run_mlp(X_train, X_test, y_train, y_test):
    clf = MLPClassifier(random_state=0, hidden_layer_sizes=(100,), solver='sgd', max_iter=700)
    clf.fit(X_train, y_train)
    return clf.score(X_test, y_test)
Example #53
0
fileName = 'final_model.sav'
#pickle.dump(ml, open(fileName, 'wb'))
#ml = pickle.load(open(fileName, 'rb'))
# ---------------------

# 분류 예측
y_pred = ml.predict(x_test)
print('예측값 : ', y_pred)
print('실제값 : ', y_test)
print('분류 정확도 : ', accuracy_score(y_test, y_pred))  # 0.9777777777
# confusion_matrix
con_mat = pd.crosstab(y_test, y_pred, rownames=['예측값'], colnames=['관측값'])
print(con_mat)
print((con_mat[0][0] + con_mat[1][1] + con_mat[2][2]) /
      len(y_test))  # 0.9777777777
print(ml.score(x_test, y_test))  # 0.977777777

# 시각화
import numpy as np
import matplotlib.pyplot as plt

from matplotlib.colors import ListedColormap
from matplotlib import font_manager, rc

font_name = font_manager.FontProperties(
    fname="c:/Windows/Fonts/malgun.ttf").get_name()
plt.rc('font', family=font_name)  #그래프에서 한글깨짐 방지용


def plot_decision_region(X,
                         y,
Example #54
0
labels = np.unique(all_charts.chart).tolist()
num_labels = len(labels)
class_mapping = {label:idx for idx,label in enumerate(labels)}
y = all_charts.chart.map(class_mapping)

for i in range(input_feature_size):
    for j in range(hidden_nodes):


# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X[1:input_feature_size(i)], y, test_size=0.3)

# scale
std_scaler = StandardScaler()
X_train_std = std_scaler.fit_transform(X_train)
X_test_std = std_scaler.transform(X_test)

model = MLPClassifier(alpha=1e-5,
                      hidden_layer_sizes=(100, 50),
                      activation='logistic',
                      batch_size=10,
                      learning_rate_init=0.01,
                      learning_rate='constant')
model.fit(X_train_std, y_train)


# evaluate model
train_acc = model.score(X_train_std, y_train)
test_acc = model.score(X_test_std, y_test)
print('Train accuracy: {}'.format(train_acc))
print('Test accuracy: {}'.format(test_acc))
plt.pcolormesh(xx, yy, Z_1, cmap=cmap_light)
plt.scatter(train_x[:, 0], train_x[:, 1], c=train_y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("MLP classification Result with Regularization")
plt.xlabel("Symestry")
plt.ylabel("Density")
plt.show()

'''
#Question C 
clf =MLPClassifier(algorithm='l-bfgs',alpha=0,hidden_layer_sizes=(10,),
	early_stopping=True,validation_fraction=0.1,max_iter=500)
clf.fit(train_x,train_y)
x1_min, x1_max = train_x[:,0].min() - 0.05, train_x[:,0].max() + 0.05
x2_min, x2_max = train_x[:,1].min() - 0.05, train_x[:,1].max() + 0.05
xx, yy = np.meshgrid(np.arange(x1_min, x1_max,0.01),np.arange(x2_min,x2_max,0.01))
Z_1 = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z_1 = Z_1.reshape(xx.shape)

print "The testing error is: ", 1-clf.score(test_x,test_y)
plt.figure(1)
plt.pcolormesh(xx, yy, Z_1, cmap=cmap_light)
plt.scatter(train_x[:, 0], train_x[:, 1], c=train_y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("MLP classification Result with Early Stopping and Validation")
plt.xlabel("Symestry")
plt.ylabel("Density")
plt.show()
Example #56
0
# TODO: Look at docs and add additional params here to try to increase
# accuracy after you go through the demo end-to-end.
classifier = MLPClassifier(random_state=seed,
                           shuffle=True,
                           learning_rate="constant",
                           max_iter=10000,
                           warm_start=False,
                           hidden_layer_sizes=(10000))

# This fit() function is how we train the classifier
classifier.fit(train_X, train_y)

# Now that we've finished training, get the test and train errors.
print("==================== AFTER TRAINING ====================")

train_error = classifier.score(train_X, train_y)
print("Train Accuracy: {}".format(train_error))

test_error = classifier.score(test_X, test_y)
print("Test Accuracy: {}".format(test_error))

# Allow the classifier to make predictions on the test set using only the
# features, not the labels
predicted = classifier.predict(test_X)

# TODO: Look up what precision, recall, and f1-score are.
# Explain why recall for 3 and 8 might be so low.
print("Classification report for classifier %s:\n%s\n" %
      (classifier, metrics.classification_report(test_y, predicted)))

# TODO: Try to interpret what this might be.
Example #57
0
    # ===========================================

    if clf is None:
        sys.exit('Neural network model not initialized')
    else:
        print(clf)

    print('Neural network structure')
    print([coef.shape for coef in clf.coefs_])

    print('Number of iterations used: %f' % clf.n_iter_)

    raw_input('Program paused. Press enter to continue')

    # =================== Part 3: Visualize Weights ===================

    print('Visualizing Neural Network...')

    plt.figure()
    displayData(np.transpose(clf.coefs_[0]))

    raw_input('Program paused. Press enter to continue')

    # =================== Part 4: Implement Predict ===================

    print('Training Set Accuracy: %f' % clf.score(X, y.ravel()));

    raw_input('Program paused. Press enter to continue')


Example #58
0
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import sys
sys.path.append('../ann_logistic_extra')
from process import get_data

from sklearn.neural_network import MLPClassifier
from sklearn.utils import shuffle

# get the data
X, Y = get_data()

# split into train and test
X, Y = shuffle(X, Y)
Ntrain = int(0.7 * len(X))
Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
Xtest, Ytest = X[Ntrain:], Y[Ntrain:]

# create the neural network
model = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=2000)

# train the neural network
model.fit(Xtrain, Ytrain)

# print the train and test accuracy
train_accuracy = model.score(Xtrain, Ytrain)
test_accuracy = model.score(Xtest, Ytest)
print("train accuracy:", train_accuracy, "test accuracy:", test_accuracy)
#%%

X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=4)

from sklearn.neural_network import MLPClassifier

mlp=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(10), learning_rate='constant',
       learning_rate_init=0.1, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='sgd', tol=0.0001, validation_fraction=0.1, verbose=10,
       warm_start=False)

mlp.fit(X_train,y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))

print("Test set score: %f" % mlp.score(X_test, y_test))
predictions = mlp.predict(X_test)

#%%
'''
confusion matrix
'''
from sklearn.metrics import classification_report,confusion_matrix
import csv

print(confusion_matrix(y_test,predictions))
print(classification_report(y_test,predictions))

cm=confusion_matrix(y_test,predictions)
Example #60
0
mlp = MLPClassifier(random_state=42)
mlp.fit(X_train,y_train)
# 훈련 세트 각 특성의 평균을 구한다
mean_on_train = X_train.mean(axis=0)
# 훈련 세트 각 특성의 표준 편차를 계산한다
std_on_train = X_train.std(axis=0)

#  데이터에서 평균을 빼고 표준편차로 나누면
# 평균 0, 표준 편차 1인 데이터로 변형된다
X_train_scaled = (X_train-mean_on_train) / std_on_train
# (훈련 데이터의 평균과 표준 편차를 이용해해 같은 변환을 테스트 세트에도 합니다
X_test_scaled = (X_test-mean_on_train)/std_on_train

mlp = MLPClassifier(random_state=0).fit(X_train_scaled,y_train)

print("훈련 세트 정확도: {:.3f}".format(mlp.score(X_train_scaled,y_train)))
print("테스트 세트 정확도: {:.3f}".format(mlp.score(X_test_scaled,y_test)))

# 여기까지 하면 최대 반복횟수에 도달했다고 경고가 뜬다 따라서  max_iter을 증가시켜줘야한다


mlp = MLPClassifier(random_state=0,max_iter=1000).fit(X_train_scaled,y_train)

print("훈련 세트 정확도: {:.3f}".format(mlp.score(X_train_scaled,y_train)))
print("테스트 세트 정확도: {:.3f}".format(mlp.score(X_test_scaled,y_test)))

# 일반화를 더 올리기 위해ㅔ alpha 매개변수를 1로 올리면 된다

mlp = MLPClassifier(random_state=0,max_iter=1000,alpha=1).fit(X_train_scaled,y_train)

print("훈련 세트 정확도: {:.3f}".format(mlp.score(X_train_scaled,y_train)))