Esempio n. 1
0
class BCISignal():
    def __init__(self, fs, bands, ch_names, states_labels, indexes):
        self.states_labels = states_labels
        self.bands = bands
        self.prefilter = FilterSequence([ButterFilter((0.5, 45), fs, len(ch_names))])
        self.csp_pools = [SpatialDecompositionPool(ch_names, fs, bands, 'csp', indexes) for _label in states_labels]
        self.csp_transformer = None
        self.var_detector = InstantaneousVarianceFilter(len(bands)*len(indexes)*len(states_labels), n_taps=fs//2)
        self.classifier = MLPClassifier(hidden_layer_sizes=(), early_stopping=True, verbose=True)
        #self.classifier = RandomForestClassifier(max_depth=3, min_samples_leaf=100)

    def fit(self, X, y=None):
        X = self.prefilter.apply(X)
        for csp_pool, label in zip(self.csp_pools, self.states_labels):
            csp_pool.fit(X, y == label)
        self.csp_transformer = FilterStack([pool.get_filter_stack() for pool in self.csp_pools])
        X = self.csp_transformer.apply(X)
        X = self.var_detector.apply(X)
        self.classifier.fit(X, y)
        print('Fit accuracy {}'.format(sum(self.classifier.predict(X) == y)/len(y)))

    def apply(self, chunk: np.ndarray):
        chunk = self.prefilter.apply(chunk)
        chunk = self.csp_transformer.apply(chunk)
        chunk = self.var_detector.apply(chunk)
        predicted_labels = self.classifier.predict(chunk)
        return predicted_labels
def mlp_cv_architecture(X,Y):
    kfold = KFold(X.shape[0], n_folds = 10)

    architectures = ( (500,2), (400,2), (400,100,2), (400,200,2), (400,100,50,2), (400,200,50,2) )

    res_dict = {}

    for architecture in architectures:
        mlp = MLPClassifier( algorithm = 'sgd',
                learning_rate = 'adaptive',
                hidden_layer_sizes = architecture,
                random_state = 1)

        train_times    = []
        train_accuracy = []
        test_accuracy  = []

        for train, test in kfold:
            t_tr = time.time()
            mlp.fit( X[train], Y[train] )
            train_times.append( time.time() - t_tr )
            acc_train = np.sum( np.equal( mlp.predict( X[train]), Y[train] ) ) / float(X[train].shape[0])
            acc_test  = np.sum( np.equal( mlp.predict( X[test]), Y[test] ) ) / float(X[test].shape[0])
            train_accuracy.append( acc_train )
            test_accuracy.append(  acc_test )

        res_dict[str(architecture)] = (np.mean(train_accuracy), np.std(train_accuracy),
                          np.mean(test_accuracy), np.std(test_accuracy),
                          np.mean(train_times), np.std(train_times))

    with open('./../results/res_nncv_architecture.pkl', 'w') as f:
        pickle.dump(res_dict,f)
Esempio n. 3
0
def naviBayes(train_X, train_y, test_X, test_y):
	# print train_y
	# print test_y
	# model = tfMultyPerceptron(train_X, train_y, test_X, test_y)
	# model.run()
	time_start = time.time()
	model = MLPClassifier(hidden_layer_sizes=(128, 32, 32, 128), max_iter=100, early_stopping=False, learning_rate_init=0.001,
	                      verbose=True)
	# model = MultinomialNB()
	# model = BernoulliNB()
	# model = KNeighborsClassifier()
	# model = DecisionTreeClassifier(max_depth=20, min_samples_leaf=0.01)
	# model = LinearSVC(random_state=0)
	# model.fit(X, y)
	model.fit(train_X, train_y)
	# model_1.fit(train_X, train_y)
	# model_2.fit(train_X, train_y)
	# model_3.fit(train_X, train_y)
	# model_4.fit(train_X, train_y)
	# model_5.fit(train_X, train_y)
	# All_model = [model, model_1, model_2, model_3, model_4, model_5]

	# train_pre = predct_all(All_model, train_X, train_y)
	# test_pre = predct_all(All_model, test_X, test_y)
	time_end = time.time()
	print "perceptron training cost time:{}".format(time_end - time_start)
	# model = OneVsRestClassifier(SVC(kernel='linear'))
	# model.fit(train_X, train_y)
	# save
	with open(config.BTMData + 'BayesModel/BTM_perceptron.model', 'wb') as fp:
		cPickle.dump(model, fp)

	# load model
	# model = None
	# with open(config.BTMData + 'BayesModel/bayes_BTM.model', 'rb') as fp:
	# 	model = cPickle.load(fp)

	# print 'train data set size:', len(train_y)
	# result = metrics.accuracy_score(train_pre, train_y)
	# 返回各自文本的所被分配到的类索引
	# print"Predicting random boost train result: ", result
	# print 'train data set size:', len(train_y)
	# result = metrics.accuracy_score(test_pre, test_y)
	# 返回各自文本的所被分配到的类索引
	# print "Predicting random boost test result:", result


	print 'train data set size:', len(train_y)
	result = model.score(train_X, train_y)
	# 返回各自文本的所被分配到的类索引
	print"Predicting train result: ", result

	test_result = model.score(test_X, test_y)
	print "Predicting test set result: ", test_result

	top_train_result = model.predict_proba(train_X)
	print "top 3 predict train data accuracy rate: {}".format(cal_topThreeScore(model, top_train_result, train_y))

	top_test_result = model.predict_proba(test_X)
	print "top 3 predict test data accuracy rate: {}".format(cal_topThreeScore(model, top_test_result, test_y))
Esempio n. 4
0
class NeuralLearner(Learner.Learner):
	def __init__(self, FeatureMask):
		super(NeuralLearner, self).__init__(FeatureMask)
	        self.expected = FeatureMask.LabelsForAllPoints
		#self.model = MLPClassifier(algorithm='sgd', hidden_layer_sizes=(64,32))
                self.model = MLPClassifier(algorithm = 'sgd', 
                                           learning_rate = 'constant',
                                           momentum = .9,
                                           nesterovs_momentum = True, 
                                           learning_rate_init = 0.2)
        def FitAndPredict(self, mask):
                return self.Predict(self.Fit(mask))
        
        def SetupInputActivations(self, FeatureMask):
		arr = np.hstack([FeatureMask.ForceStd.reshape(-1,1), 
                                 FeatureMask.ForceMinMax.reshape(-1,1),
                                 FeatureMask.CannyFilter.reshape(-1,1)])
	        expected = FeatureMask.LabelsForAllPoints
		return arr, expected

        def Fit(self, mask):
                arr, expected = self.SetupInputActivations(mask)
                self.model.fit(arr, expected)

        def Predict(self, mask):
                arr, expected = self.SetupInputActivations(mask)
                return self.model.predict(arr).reshape(-1,1)
def train_on_source(X,Y):

    print "Start Learning Net on source"

    clf = MLPClassifier( algorithm = 'l-bfgs',
            alpha = 1e-5,
            hidden_layer_sizes = (500,2),
            random_state = 1,
            warm_start = 1,
            max_iter = 400)

    clf.fit(X,Y)
    #new_loss = 0
    #old_loss = 10000
    #for step in range(200):
    #    clf.fit(X,Y)
    #    new_loss = clf.loss_
    #    # stop training, if improvement is small
    #    improvement = abs(new_loss - old_loss)
    #    print "Step:", step, "Loss:", new_loss, "Improvement:", improvement
    #    if improvement < 1.e-5:
    #        print "Training converged!"
    #        break
    #    old_loss = new_loss
    print "Pretrained CLF on Source with num_iter:", clf.n_iter_
    return clf
Esempio n. 6
0
def main():
    enc = OneHotEncoder(n_values=[7,7,7,7,7,7])
    conn = sqlite3.connect('server.db')
    cursor = conn.cursor()
    all_ = pandas.read_sql_query('SELECT layers.burger, labels.output, layers.layer0, layers.layer1, layers.layer2, layers.layer3, layers.layer4, layers.layer5 FROM layers,labels WHERE layers.burger = labels.burger', conn, index_col='burger')
    
    X = all_.drop(['output'], axis=1)
    y = all_['output']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

               
    clf = MLPClassifier(solver='adam',  activation='relu',
                        verbose=False,
                        max_iter=10000,
                        tol=1e-9,
                        random_state=1)
    
    X_train_categoricals = X_train[column_names]
    tX_train_categoricals = enc.fit_transform(X_train_categoricals)
    clf.fit(tX_train_categoricals, y_train.as_matrix().astype(int))

    
    X_test_categoricals = X_test[column_names]
    tX_test_categoricals = enc.fit_transform(X_test_categoricals)
    prediction = clf.predict(tX_test_categoricals)
    
    print(classification_report(y_test, prediction))
    
    print_eval(y_test, prediction)
def neuralNetworkIteration():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    alphalist = [.00001,.00003,.0001,.0003,.001,.003,.01,.03,1,10]
    for feature_number in range(1, 2):

        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        validation_data,validation_label = e[feature_number-1],f[feature_number-1];
        for new_alpha in alphalist:
            iteration_output = "Iteration,Training Error,Validation Error\n";
            from sklearn.neural_network import MLPClassifier
            clf = MLPClassifier(alpha=new_alpha, hidden_layer_sizes=(200,), random_state=1, activation='logistic',
                                warm_start=True,max_iter=1);
            for iteration in range(1,500):
                clf.fit(train_data, train_label)
                prediction = clf.predict(validation_data);
                from sklearn.metrics import accuracy_score
                iteration_output+=str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0));
                iteration_output+="\n";
                print(str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0)))
            file_name = "For All Feature. Alpha = "+str(new_alpha)+" "+" Iteration data"+".csv";
            print(file_name);
            datafile = open(file_name,"w",encoding="utf-8");
            datafile.write(iteration_output);
            datafile.close();
Esempio n. 8
0
def neuralNetwork():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    for feature_number in range(1, 6):
        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        validation_data,validation_label = e[feature_number-1],f[feature_number-1];
        from sklearn.neural_network import MLPClassifier
        clf = MLPClassifier(solver='lbfgs', alpha=.003, hidden_layer_sizes=(10,), random_state=1, activation='relu')
        clf.fit(train_data, train_label)

        tot = len(test_label);
        cnt = 0;
        prediction = clf.predict(test_data);
        for i in range(0, len(test_data)):
            if clf.predict([test_data[i]])[0] != test_label[i]:
                # print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i]));
                cnt += 1;
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import precision_score
        from sklearn.metrics import f1_score
        print("Complete for Feature :" + str(feature_number));
        print("Train Score : " + str(clf.score(train_data, train_label)));
        print("Total test set size : " + str(len(test_label)));
        print("Correct prediction : " + str(tot - cnt));
        print("Incorrect Prediction : " + str(cnt));
        print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0))
        print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0))
        print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0))
        print("Error Rate : " + str(cnt / tot * 100.0));
        print("---------------------------------------\n");
def neuralNetworkIterationLogistic():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    for feature_number in range(1, 6):
        iteration_output = "Iteration,Training Error,Validation Error\n";
        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        validation_data,validation_label = e[feature_number-1],f[feature_number-1];
        from sklearn.neural_network import MLPClassifier
        clf = MLPClassifier(alpha=1, hidden_layer_sizes=(15,), random_state=1, activation='logistic',
                            warm_start=True,max_iter=1);
        for iteration in range(1,350):
            clf.fit(train_data, train_label)
            tot = len(validation_data);
            cnt = 0;
            prediction = clf.predict(validation_data);
            for i in range(0, len(validation_data)):
                if clf.predict([validation_data[i]])[0] != validation_label[i]:
                    # print(str(i)+str(clf.predict([test_data[i]]))+" "+str(test_label[i]));
                    cnt += 1;
            from sklearn.metrics import accuracy_score
            from sklearn.metrics import precision_score
            from sklearn.metrics import f1_score
            iteration_output+=str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0));
            iteration_output+="\n";
            print(str(str(iteration) +","+ str(100-clf.score(train_data, train_label)*100.0)+","+str(100-accuracy_score(validation_label, prediction) * 100.0)))
        file_name = "Feature No "+str(feature_number)+" Iteration data"+".csv";
        print(file_name);
        datafile = open(file_name,"w",encoding="utf-8");
        datafile.write(iteration_output);
        datafile.close();
def train():
    utl.print_title('Getting data...')
    X, Tc, X_test, Tc_test = dpp.getdata_arnold()
    #X, Tc, X_test, Tc_test = dpp.getdata_mnist()

    utl.print_title('Preparing data...')
    X, X_test = dpp.scale_data(X, X_test)
    T = dpp.one_hot_encode(Tc)
    T_test = dpp.one_hot_encode(Tc_test)

    utl.print_title('Sanity checks...')
    print('Shape X:', X.shape)
    print('Shape Tc:', Tc.shape)
    print('Shape T:', T.shape)
    print('Shape X_test:', X_test.shape)
    print('Shape Tc_test:', Tc_test.shape)
    print('Shape T_test:', T_test.shape)

    utl.print_title('Training the network...')
    classifier = MLPClassifier(solver='adam', learning_rate_init=1e-3, hidden_layer_sizes=(100), verbose=True, max_iter=200)
    classifier.fit(X, T)

    train_score, Pc = get_results(classifier, X, T)
    test_score, Pc_test = get_results(classifier, X_test, T_test)

    utl.print_title('Results:')
    print('Classification counts train (target):     ',  np.bincount(Tc.reshape(-1)))
    print('Classification counts train (prediction): ',  np.bincount(Pc))

    print('\nClassification counts test (target):     ',  np.bincount(Tc_test.reshape(-1)))
    print('Classification counts test (prediction): ',  np.bincount(Pc_test))

    print('\nTrain score: ', train_score)
    print('Test score:  ', test_score)
Esempio n. 11
0
def test_gradient():
    # Test gradient.

    # This makes sure that the activation functions and their derivatives
    # are correct. The numerical and analytical computation of the gradient
    # should be close.
    for n_labels in [2, 3]:
        n_samples = 5
        n_features = 10
        X = np.random.random((n_samples, n_features))
        y = 1 + np.mod(np.arange(n_samples) + 1, n_labels)
        Y = LabelBinarizer().fit_transform(y)

        for activation in ACTIVATION_TYPES:
            mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10,
                                solver='lbfgs', alpha=1e-5,
                                learning_rate_init=0.2, max_iter=1,
                                random_state=1)
            mlp.fit(X, y)

            theta = np.hstack([l.ravel() for l in mlp.coefs_ +
                               mlp.intercepts_])

            layer_units = ([X.shape[1]] + [mlp.hidden_layer_sizes] +
                           [mlp.n_outputs_])

            activations = []
            deltas = []
            coef_grads = []
            intercept_grads = []

            activations.append(X)
            for i in range(mlp.n_layers_ - 1):
                activations.append(np.empty((X.shape[0],
                                             layer_units[i + 1])))
                deltas.append(np.empty((X.shape[0],
                                        layer_units[i + 1])))

                fan_in = layer_units[i]
                fan_out = layer_units[i + 1]
                coef_grads.append(np.empty((fan_in, fan_out)))
                intercept_grads.append(np.empty(fan_out))

            # analytically compute the gradients
            def loss_grad_fun(t):
                return mlp._loss_grad_lbfgs(t, X, Y, activations, deltas,
                                            coef_grads, intercept_grads)

            [value, grad] = loss_grad_fun(theta)
            numgrad = np.zeros(np.size(theta))
            n = np.size(theta, 0)
            E = np.eye(n)
            epsilon = 1e-5
            # numerically compute the gradients
            for i in range(n):
                dtheta = E[:, i] * epsilon
                numgrad[i] = ((loss_grad_fun(theta + dtheta)[0] -
                              loss_grad_fun(theta - dtheta)[0]) /
                              (epsilon * 2.0))
            assert_almost_equal(numgrad, grad)
Esempio n. 12
0
 def mlp_train(self,x_train,y_train):
     scaler = StandardScaler()
     scaler.fit(x_train)
     x_train = scaler.transform(x_train)
     clf = MLPClassifier(max_iter=500,alpha=1e-5,hidden_layer_sizes=(40,100,80),warm_start=True,random_state=0)
     clf.fit(x_train,y_train)
     
     return clf
Esempio n. 13
0
def test_tolerance():
    # Test tolerance.
    # It should force the solver to exit the loop when it converges.
    X = [[3, 2], [1, 6]]
    y = [1, 0]
    clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd')
    clf.fit(X, y)
    assert_greater(clf.max_iter, clf.n_iter_)
Esempio n. 14
0
def test_adaptive_learning_rate():
    X = [[3, 2], [1, 6]]
    y = [1, 0]
    clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd',
                        learning_rate='adaptive')
    clf.fit(X, y)
    assert_greater(clf.max_iter, clf.n_iter_)
    assert_greater(1e-6, clf._optimizer.learning_rate)
Esempio n. 15
0
def train(classes, y_samples, feature_dict, classes_dict):
    # Using dev version of slearn, 1.9
    from sklearn.neural_network import MLPClassifier

    clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(50, 25), random_state=1, verbose=True)
    clf.fit(y_samples, classes)

    return clf
Esempio n. 16
0
def fitMLPs(trainIndexes, datasets):
	classifiers = []
	for (x,y) in datasets:
		cl =  MLPClassifier(algorithm='l-bfgs', alpha=1e-4, hidden_layer_sizes=(76, 30), random_state=1, momentum=0.8)
		data, target = listToData(trainIndexes, x, y)
		cl.fit(data, target)
		classifiers.append(cl)
	return classifiers 
Esempio n. 17
0
def main():
    iris = datasets.load_iris()
    X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)

    classifier = MLPClassifier(max_iter=1000)
    classifier.fit(X_train, y_train)
    s = classifier.score(X_test, y_test)
    print(s)
Esempio n. 18
0
def do_mlp(x_train, x_test, y_train, y_test):

    clf = MLPClassifier(solver='lbfgs',
                        alpha=1e-5,
                        hidden_layer_sizes = (10, 4),
                        random_state = 1)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(classification_report(y_test, y_pred))
Esempio n. 19
0
def do_mlp(x_train, x_test, y_train, y_test):
    #mlp
    clf = MLPClassifier(solver='lbfgs',
                        alpha=1e-5,
                        hidden_layer_sizes=(5, 2),
                        random_state=1)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    do_metrics(y_test,y_pred)
def test2():
    X = [[0., 0.], [1., 1.]]
    y = [0, 1]
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(3), random_state=1, activation='relu')
    clf.fit(X,y)
    test_sample = [[2., 2.], [-1., -2.]]
    print clf.predict(test_sample)
    print clf.predict_proba(test_sample)
    output_mlp(clf)
Esempio n. 21
0
 def mlpTest(self):
     mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=1000, alpha=1e-4,
         solver  ='sgd', verbose=10, tol=1e-4, random_state=1)
     mlp.fit(self.X_train,self.Y_train)
     predicted = mlp.predict(self.X_test)
     print("Classification report for classifier %s:\n%s\n"
         % (mlp, metrics.classification_report(self.Y_test, predicted)))
     print("Confusion matrix:\n%s"
         % metrics.confusion_matrix(self.Y_test, predicted))
def fit_and_score_ann(x_train, y_train, x_test, y_test, config):
    ann = MLPClassifier(solver=config.ann.solver,
                        max_iter=Configuration.ANN_MAX_ITERATIONS,
                        alpha=config.ann.alpha,
                        hidden_layer_sizes=(config.ann.hidden_neurons,),
                        learning_rate='adaptive')

    ann.fit(x_train, y_train)
    return ann.score(x_test, y_test)
Esempio n. 23
0
def MLP_classifier(train_x, train_y):
    clf = MLPClassifier(activation='relu', algorithm='adam', alpha=0.0001,
               batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=True,
               epsilon=1e-08, hidden_layer_sizes=([50,50]), learning_rate='constant',
               learning_rate_init=0.01, max_iter=3000, momentum=0.9,
               nesterovs_momentum=True, power_t=0.5, random_state=0, shuffle=True,
                validation_fraction=0.1, verbose=False,
               warm_start=False)
    clf.fit(train_x, train_y)
    return clf
Esempio n. 24
0
def test_early_stopping_stratified():
    # Make sure data splitting for early stopping is stratified
    X = [[1, 2], [2, 3], [3, 4], [4, 5]]
    y = [0, 0, 0, 1]

    mlp = MLPClassifier(early_stopping=True)
    with pytest.raises(
            ValueError,
            match='The least populated class in y has only 1 member'):
        mlp.fit(X, y)
Esempio n. 25
0
def do_mlp(x_train, x_test, y_train, y_test):
    # Building deep neural network
    clf = MLPClassifier(solver='lbfgs',
                        alpha=1e-5,
                        hidden_layer_sizes = (5, 2),
                        random_state = 1)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(classification_report(y_test, y_pred))
    print metrics.confusion_matrix(y_test, y_pred)
def neuralNetwork():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    for feature_number in range(1, 2):
        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        validation_data,validation_label = e[feature_number-1],f[feature_number-1];
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler();
        scaler.fit(train_data);
        train_data = scaler.transform(train_data);
        test_data = scaler.transform(test_data);
        validation_data = scaler.transform(validation_data);
        from sklearn.neural_network import MLPClassifier
        clf = MLPClassifier(alpha=1, hidden_layer_sizes=(100,), random_state=1, activation='logistic', max_iter=1000);
        clf.fit(train_data, train_label)

        tot = len(test_label);
        cnt = 0;
        prediction = clf.predict(test_data);
        for i in range(0, len(test_data)):
            if prediction[i] != test_label[i]:
                print(str(i)+str(prediction[i])+" "+str(test_label[i]));
                cnt += 1;
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import precision_score
        from sklearn.metrics import f1_score
        print("Complete for Feature :" + str(feature_number));
        print("Train data set size : " + str(len(train_data)));
        print("Train Score : " + str(clf.score(train_data, train_label)));
        print("Total test set size : " + str(len(test_label)));
        print("Correct prediction : " + str(tot - cnt));
        print("Incorrect Prediction : " + str(cnt));
        print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0))
        print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0))
        print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0))
        print("Error Rate : " + str(cnt / tot * 100.0));
        print("---------------------------------------\n");

        tot = len(validation_label);
        cnt = 0;
        prediction = clf.predict(validation_data);
        for i in range(0, len(validation_label)):
            if prediction[i] != validation_label[i]:
                print(str(i)+str(prediction[i])+" "+str(validation_label[i]));
                cnt += 1;
        print("Total validation set size : " + str(len(validation_label)));
        print("Correct prediction : " + str(tot - cnt));
        print("Incorrect Prediction : " + str(cnt));
        print("Accuracy : " + str(accuracy_score(validation_label, prediction) * 100.0))
        print("Precision : " + str(precision_score(validation_label, prediction, average='weighted') * 100.0))
        print("F1 Score : " + str(f1_score(validation_label, prediction, average='weighted') * 100.0))
        print("Error Rate : " + str(cnt / tot * 100.0));
        print("---------------------------------------\n");
def neural_network_voting_systemLogistic():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    iterations = [75, 60, 90, 95, 95];
    voting_pred = list();
    for i in range(0, len(d[0])):
        voting_pred.append([]);
    import random
    for feature_number in range(1, 6):
        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        # use feature scaling for rbf kernel
        # from sklearn.preprocessing import StandardScaler
        # scaler = StandardScaler();
        # scaler.fit(train_data);
        # train_data = scaler.transform(train_data);
        # test_data = scaler.transform(test_data);
        #rnd = list(zip(train_data,train_label));
        #random.shuffle(rnd);
        #train_data, train_label = zip(*rnd)
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler();
        scaler.fit(train_data);
        train_data = scaler.transform(train_data);
        test_data = scaler.transform(test_data);
        from sklearn.neural_network import MLPClassifier
        clf = MLPClassifier(alpha=1, hidden_layer_sizes=(15,), random_state=1, activation='logistic',max_iter =1000,early_stopping=False)
        clf.fit(train_data, train_label)
        tot = len(test_label);
        cnt = 0;
        print(clf.n_iter_);
        for i in range(0, len(test_data)):
            voting_pred[i].append(clf.predict([test_data[i]])[0]);

    tot = len(test_label);
    cnt = 0;
    prediction = list();
    for i in range(0, len(test_data)):
        prediction.append(most_common(voting_pred[i]));
        if prediction[i] != test_label[i]:
            print(str(i) + " " + str(prediction[i]) + " " + str(test_label[i]));
            cnt += 1;
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import precision_score
    from sklearn.metrics import f1_score
    print("Complete for Voting system :");
    print("Total test set size : " + str(len(test_label)));
    print("Correct prediction : " + str(tot - cnt));
    print("Incorrect Prediction : " + str(cnt));
    print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0))
    print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0))
    print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0))
    print("Error Rate : " + str(cnt / tot * 100.0));
    print("---------------------------------------\n");
Esempio n. 28
0
 def test_bool_and(self):
     x = ((0, 0), (1, 1), (1, 0), (0, 1))
     y = ( 0,      1,      0,      0)
     mlp = MLPClassifier(hidden_layer_sizes=(), activation='logistic', max_iter=2, alpha=1e-4,
                         algorithm='l-bfgs', verbose=False, tol=1e-4, random_state=1,
                         learning_rate_init=.1)
     mlp.fit(x, y)
     assert mlp.predict(((0, 0))) == 0
     assert mlp.predict(((0, 1))) == 0
     assert mlp.predict(((1, 0))) == 0
     assert mlp.predict(((1, 1))) == 1
 def Neural_network(self, X_train, Y_train, X_test, Y_test):
     from sklearn import metrics
     from sklearn.neural_network import MLPClassifier
     modle = MLPClassifier()
     modle.fit(X_train, Y_train)
     expected = Y_test
     prediceted = modle.predict(X_test)
     ftp, tpr, thres = metrics.roc_curve(expected, prediceted)
     print metrics.classification_report(expected, prediceted)
     # print metrics.confusion_matrix(expected, prediceted)
     print metrics.auc(ftp, tpr)
Esempio n. 30
0
class AnnClassifier(AbstractClassifier):
    def __init__(self, features, target, solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1):
        self.solver = solver
        self.alpha = alpha
        self.hidden_layer_sizes = hidden_layer_sizes
        self.random_state = random_state
        super(AnnClassifier, self).__init__(features, target)

    def __fit(self, features):
        self.clf = MLPClassifier(solver=self.solver, alpha=self.alpha, hidden_layer_sizes=self.hidden_layer_sizes,
                                 random_state=self.random_state)
        self.clf.fit(features, self.target)
Esempio n. 31
0
while 1:
    with open(f_name, 'r') as f:
        threads = f.readlines()

    x_train, y_train = [], []
    for t in threads:
        tr = list(map(int, t.replace('\n', '').split(',')))
        x_train.append(tr[:-1])
        y_train.append(tr[-1])
    x_train, y_train = np.array(x_train), np.array(y_train)
    zero, one = x_train[y_train == 0], x_train[y_train == 1]

    print('#', len(threads), 'y_train', dict(collections.Counter(y_train)))

    model = MLPClassifier(max_iter=100000, random_state=42)
    model.fit(x_train, y_train)

    pred_p = pred_model(model)[:, 0]

    Xr = X[np.logical_and(ppl[0] < pred_p, pred_p < ppl[1])]
    print(len(Xr), end=' ')

    point = get_closest(Xr, zero, one)
    """
    grid0 = get_segments(point, min_max_X, 0)
    pred_p = model.predict_proba(grid0)[:, 0]
    Xr = grid0[np.logical_and(ppl[0] < pred_p, pred_p < ppl[1])]
    point = get_closest(Xr, zero, one)

    grid1 = get_segments(point, get_min_max(grid0), 1)
    pred_p = model.predict_proba(grid1)[:, 0]
Esempio n. 32
0
# gp = GaussianProcessClassifier()
# gp.fit(Xtrain, Ytrain)
# # print("importances=", gp.feature_importances_)
#
# y_predict = gp.predict(Xtest)
# print(f"Accuracy score for Gaussian Process Classifier Classifier is: {accuracy_score(Ytest, y_predict)}")
# # print("GP importances=", gp.feature_importances_)
#
# cm = ConfusionMatrix(gp, classes=[0,1])
# cm.fit(Xtrain, Ytrain)
# cm.score(Xtest, Ytest)
# cm.show()

# MLP Classifier #############################################
mlp = MLPClassifier()
mlp.fit(Xtrain, Ytrain)
# print("importances=", gp.feature_importances_)

y_predict = mlp.predict(Xtest)
print(
    f"\nAccuracy score for MLP Classifier Classifier is: {accuracy_score(Ytest, y_predict)}"
)
# print("GP importances=", gp.feature_importances_)

cm = ConfusionMatrix(mlp, classes=[0, 1])
cm.fit(Xtrain, Ytrain)
cm.score(Xtest, Ytest)
cm.show()

# AdaBoostClassifier #######################################
ada = AdaBoostClassifier()
Esempio n. 33
0
clf = svm.SVC()#calling the function
clf.fit(X_train, y_train)# just does a simple fit of the data we seperated out for training
pred_clf = clf.predict(X_test)# predect the test values
#How the CLF model preformes
print("SVM Classification")
print(classification_report(y_test, pred_clf))# how the test data compares to the predected values
print(confusion_matrix(y_test, pred_clf))# this give us a matrix on the mislabels between good  and bad

#=================================
##Neural Network
#hidden layers is the nodes in the NN
#Good for text based code or big data sets, picture processing
#==================================
#object = Classifier(how many nodes in each layer, max many iterations
mlpc = MLPClassifier(hidden_layer_sizes=(11,11,11),max_iter=500)
mlpc.fit(X_train, y_train)
pred_mlpc = mlpc.predict(X_test)
#How the NN model preformes
print("Neural Network")
print(classification_report(y_test, pred_mlpc))# how the test data compares to the predected values
print(confusion_matrix(y_test, pred_mlpc))# this give us a matrix on the mislabels between good  and bad

#Score the AI
from sklearn.metrics import accuracy_score #Test scrore
bn = accuracy_score(y_test, pred_rfc) #Labelling code for printing
dm = accuracy_score(y_test, pred_clf) #Labelling code for printing
cm = accuracy_score(y_test, pred_mlpc) #Labelling code for printing
print(bn, ' is the Forest score')
print(dm, ' is the SVM Classification score')
print(cm, ' is the Neural Network score')
Esempio n. 34
0
def mp(X_train, y_train, X_test, hid=(100, 100)):
    from sklearn.neural_network import MLPClassifier
    knn = MLPClassifier(solver='adam', hidden_layer_sizes=hid)
    knn.fit(X_train, y_train)
    y_predict = knn.predict(X_test)
    return y_predict
Esempio n. 35
0
        data_for_predicting_Y_batch_2 = data_for_predicting_Y[order[(N / 3):(
            2 * N / 3)], :]
        data_for_predicting_A_batch_2 = data_for_predicting_A[order[(N / 3):(
            2 * N / 3)], :]
        pr_attr_batch_2 = pr_attr[order[(N / 3):(2 * N / 3)]]
        label_batch_2 = label[order[(N / 3):(2 * N / 3)]]

        data_for_predicting_Y_batch_3 = data_for_predicting_Y[order[(2 * N /
                                                                     3):], :]
        data_for_predicting_A_batch_3 = data_for_predicting_A[order[(2 * N /
                                                                     3):], :]
        pr_attr_batch_3 = pr_attr[order[(2 * N / 3):]]
        label_batch_3 = label[order[(2 * N / 3):]]

        #train classifiers on Batch 1
        clf_for_Y.fit(data_for_predicting_Y_batch_1, label_batch_1)
        clf_for_A.fit(data_for_predicting_A_batch_1, pr_attr_batch_1)

        #make predictions on Batch 2 and 3
        label_batch_2_PREDICTED = clf_for_Y.predict(
            data_for_predicting_Y_batch_2)
        label_batch_3_PREDICTED = clf_for_Y.predict(
            data_for_predicting_Y_batch_3)
        pr_attr_batch_2_PREDICTED = clf_for_A.predict(
            data_for_predicting_A_batch_2)
        pr_attr_batch_3_PREDICTED = clf_for_A.predict(
            data_for_predicting_A_batch_3)

        #run equalized odds (training on Batch 2 and predicting on Batch 3) with predicted attribute
        EO_PREDICTION_batch_3 = equalized_odds_pred(label_batch_2,
                                                    label_batch_2_PREDICTED,
Esempio n. 36
0
data = np.array(data)

train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size = 0.1)

class_weight = "balanced"

print('Training Features Shape:', train_data.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_data.shape)
print('Testing Labels Shape:', test_labels.shape)
print('Sample weights: '+ str(compute_sample_weight(class_weight=class_weight, y=train_labels)))

# STEP 1 Training

# try diff activation function, solver, hhidden layers, etc
nn = MLPClassifier(activation="relu",solver="adam", alpha=1e-5)
nn.fit(train_data, train_labels, sample_weight=compute_sample_weight(class_weight=class_weight, y=train_labels))

# STEP 2 Errors
print("TRAINING ACCURACY: "+str(nn.score(train_data, train_labels)))
print("TESTING ACCURACY: "+str(nn.score(test_data, test_labels)))

predictions = nn.predict(test_data)
conf_mat = confusion_matrix(test_labels, predictions)
print(conf_mat)

#print(rf.feature_importances_)

# STEP 3 Save Ensemble
#filename = 'LogReg_1.sav'
#pickle.dump(ada, open(filename, 'wb'))
Esempio n. 37
0
        train_data.append(data[1:])
print('Loaded ' + str(len(train_label)))

# step 2: PCA reduction + svm
print('PCA Reduction and ANN fitting...')
train_label = np.array(train_label)
train_data = np.array(train_data)
pca = PCA(n_components=COMPONENT_NUM, whiten=True)
pca.fit(train_data)
train_data = pca.transform(train_data)

clf = MLPClassifier(solver='lbfgs',
                    alpha=1e-5,
                    hidden_layer_sizes=(5, 2),
                    random_state=RANOM_STATE)
clf.fit(train_data, train_label)

# step 3: plot PCAs into 2D plot
# reference 1: https://jakevdp.github.io/PythonDataScienceHandbook/05.09-principal-component-analysis.html
# ref 2: http://scikit-learn.org/0.17/auto_examples/svm/plot_iris.html


def getcolor(index):
    if index % CIGTOTAL == 0:
        return 'r'
    elif index % CIGTOTAL == 1:
        return 'b'
    else:
        return 'g'

Esempio n. 38
0
 def mlp(self):
     mlp = MLPClassifier(C=1000, penalty='l2')
     mlp = mlp.fit(x_train, y_train)
     pred = mlp.predict(x_test)
     print("MLP's Accuracy score:", accuracy_score(pred, y_test))
     return pred
image_size = 28 # width and length
no_of_different_labels = 10 #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size

# create MLP
mlp = MLPClassifier(hidden_layer_sizes=(100, ), 
                    max_iter=480, alpha=1e-4,
                    solver='sgd', verbose=10, 
                    tol=1e-4, random_state=1,
                    learning_rate_init=.1)

# train MLP
train_labels = train_labels.reshape(train_labels.shape[0],)
print(train_imgs.shape, train_labels.shape)

mlp.fit(train_imgs, train_labels)
print("Training set score: %f" % mlp.score(train_imgs, train_labels))
print("Test set score: %f" % mlp.score(test_imgs, test_labels))
help(mlp.fit)

# plots results
fig, axes = plt.subplots(4, 4)
# use global min / max to ensure all weights are shown on the same scale
vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()
for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):
    ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=.5 * vmin,
               vmax=.5 * vmax)
    ax.set_xticks(())
    ax.set_yticks(())

plt.show()
Esempio n. 40
0
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
file = pd.read_csv('data2.csv',header=None)
data = file.values
base = 1
data_X = [[0 for col in range(5)] for row in range(len(data))]
data_Y = [0 for b in range(len(data))]

for i in range(len(data)):
    data_X[i][0] = data[i][0]
    data_X[i][1] = data[i][1]
    data_X[i][2] = data[i][2]
    data_X[i][3] = data[i][3]
    if data[i][4] == 'S':
        data_X[i][4] = base
    elif data[i][4] == 'SW':
        data_X[i][4] = 2*base
    elif data[i][4] == 'SE':
        data_X[i][4] = 3*base
    data_Y[i] = data[i][5]
# data_Y = data[:,-1]
X_train, X_test, Y_train, Y_test = train_test_split(data_X, data_Y, test_size=0.2, random_state=0)

model = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 5), random_state=1)
model.fit(X_train, Y_train)

res = model.predict(X_test)
print(res)
print(Y_test)
Esempio n. 41
0
print "prediction using support vector machine"

print prediction

from sklearn.naive_bayes import GaussianNB

clf3 = GaussianNB()

clf3.fit(X, Y)

prediction = clf3.predict([[167.64, 76.43, 36.5]])

print "prediction using Gaussian Naive Bayes"

print prediction

from sklearn.neural_network import MLPClassifier

clf4 = MLPClassifier(solver='lbfgs',
                     alpha=1e-5,
                     hidden_layer_sizes=(5, 2),
                     random_state=1)

clf4.fit(X, Y)

prediction = clf4.predict([[167.64, 76.43, 36.5]])

print "prediction using Neural Network"

print prediction
Esempio n. 42
0
                     learning_rate_init=learningRateInit,
                     max_iter=maxIter,
                     tol=tolRate,
                     momentum=momentumRate,
                     batch_size=batchSize,
                     verbose=talkative)
mlp5 = MLPClassifier(activation=activationMode,
                     solver=solverMode,
                     alpha=alphaParameter,
                     learning_rate_init=learningRateInit,
                     max_iter=maxIter,
                     tol=tolRate,
                     momentum=momentumRate,
                     batch_size=batchSize,
                     verbose=talkative)
mlp1.fit(dfToTrain, a41)
print("Fit 1")
mlp2.fit(dfToTrain, a42)
print("Fit 2")
mlp3.fit(dfToTrain, a43)
print("Fit 3")
mlp4.fit(dfToTrain, a44)
print("Fit 4")
mlp5.fit(dfToTrain, a45)
print("Fit 5")

# Data to Predict
df2 = openCSV('data/test3.csv')
df2 = df2.dropna(subset=[
    'NU_NOTA_LC', 'NU_NOTA_CH', 'NU_NOTA_CN', 'NU_NOTA_REDACAO', 'NU_INSCRICAO'
])
Esempio n. 43
0
X, y = list(zip(*features))

X = np.array(X)
y = (np.array(y) == '1-0').astype(np.int)

X = csr_matrix(X)

X_tr, X_te, y_tr, y_te = train_test_split(X, y)

model_lr = LogisticRegression()
model_lr.fit(X_tr, y_tr)
pred_lr = model_lr.predict_proba(X_te)[:, 1]
print("auc lr", roc_auc_score(y_te, pred_lr))

model_nn = MLPClassifier(hidden_layer_sizes=(5, ), verbose=True)
model_nn.fit(X_tr, y_tr)
pred_nn = model_lr.predict_proba(X_te)[:, 1]
print("auc lr", roc_auc_score(y_te, pred_nn))

model_lgbm = LGBMClassifier(n_estimators=100)
model_lgbm.fit(X_tr.astype(np.float64), y_tr)
pred_lgbm = model_lgbm.predict_proba(X_te.astype(np.float64))[:, 1]
print("auc lgbm", roc_auc_score(y_te, pred_lgbm))

print("auc", roc_auc_score(y_te, pred_lr + pred_lgbm))

with open("lgbm_if_else.c", "wt") as out:
    out.write(
        parseAllTrees(model_lgbm.booster_.dump_model()['tree_info']).replace(
            "1.0000000180025095e-35", "1"))
"""
x=[]
y=[]
print(xx)
files_name=[f for f in listdir('testsdc') if isfile(join('testsdc',f))]

for name in files_name:
    img=cv2.imread(join('testsdc',name))
    cv2.imshow('Learning Image',img)
    cv2.waitkey(100)
    cv2.desstroyAllWindows()
    img=cv2.blur(img,(5,5))
    retval,img=cv2.threshold(img,201,255,cv2.THRESH_BINARY)
    img=cv2.resize(img,(24,24))
    image_as_array=numpy.ndarray.flatten(numpy.array(img))
    x.append(image_as_array)
    y.append(name.split('_')[0])

xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=42)

scaler=StandardScaler()
scaler.fit(xtrain)
xtrain=scaler.transform(xtrain)
xtest=scaler.transform(xtest)

alg=MLPClassifier(solver='lbfgs',alpha=100.0,random_state=1,hidden_layer_sizes=50,verbose=True)
alg.fit(xtrain,ytrain)
print(alg.score(xtest,ytest))
joblib.dump(alg,'model.pkl')
print(xx)
Esempio n. 45
0
    (9, 14, 14, 2),  # 9 input, 14-14 neuron in 2 layers,1 output layer
    'random_state': [1]
}

# Type of scoring to compare parameter combos
acc_scorer = make_scorer(accuracy_score)

# Run grid search
grid_obj = GridSearchCV(ann_clf, parameters, scoring=acc_scorer)
grid_obj = grid_obj.fit(X_train, y_train)

# Pick the best combination of parameters
ann_clf = grid_obj.best_estimator_

# Fit the best algorithm to the data
ann_clf.fit(X_train, y_train)

y_pred_ann = ann_clf.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix

cm_ann = confusion_matrix(y_test, y_pred_ann)
print(cm_ann)

ann_result = accuracy_score(y_test, y_pred_ann)
print(ann_result)

recall_ann = cm_ann[0][0] / (cm_ann[0][0] + cm_ann[0][1])
precision_ann = cm_ann[0][0] / (cm_ann[0][0] + cm_ann[1][1])
print(recall_ann, precision_ann)
Esempio n. 46
0
# Train MLPclassifier
###

# Take 33% of the data for testing
X_train, X_test, y_train, y_test = train_test_split(
    data_tf, labels, test_size=TEST_SIZE, random_state=42)

# Note that another 10% of the taining data is used as validation data for early_stopping
# Doing so allows the usage of an adaptive learning rate

print("Creating MLPClassifier...")
clf = MLPClassifier(solver=SOLVER, activation='tanh', verbose=True, early_stopping=False,
                    hidden_layer_sizes=LAYER, max_iter=ITERATIONS, alpha=L2_PENALTY, learning_rate_init=LEARNING_RATE_INIT)

print("Training ANN (max. " + str(ITERATIONS) + " itr.)...")
clf.fit(X_train, y_train)

###
# Export data
###

print("\nExporting data structures:")

print(" -> CountVectorizer")
with open("export/export_count.dat", "wb+") as handle:
    pickle.dump(count_vect, handle)

print(" -> Tf-idf Transformer")
with open("export/export_tfidf.dat", "wb+") as handle:
    pickle.dump(tf_transformer, handle)
Esempio n. 47
0
    '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38',
    '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50',
    '51', '52', '53', '54', '55', '56', '57', '58', '59', '60'
}]

y_test = data2['61']

mlp = MLPClassifier(hidden_layer_sizes=3,
                    activation='relu',
                    solver='adam',
                    alpha=0.0001,
                    batch_size='auto',
                    learning_rate='constant',
                    learning_rate_init=0.001,
                    max_iter=300)
mlp.fit(x_train, y_train)
print('MLP accuracy with 3 hidden layers:', mlp.score(x_test, y_test))

mlp = MLPClassifier(hidden_layer_sizes=5,
                    activation='relu',
                    solver='adam',
                    alpha=0.0001,
                    batch_size='auto',
                    learning_rate='constant',
                    learning_rate_init=0.001,
                    max_iter=300)
mlp.fit(x_train, y_train)
print('MLP accuracy with 5 hidden layers:', mlp.score(x_test, y_test))

mlp = MLPClassifier(hidden_layer_sizes=100,
                    activation='relu',
Esempio n. 48
0
def run(train=[], test=[], leafsize=5, bag=10):
    print
    print
    #overall accuracy
    RFACCout = 0.0
    DTACCout = 0.0
    SVMACCout = 0.0
    RFCACCout = 0.0
    MLPACCout = 0.0
    for cv in range(0, 10):
        traindata = train[cv]
        testdata = test[cv]
        trainX = traindata[:, 0:-1]
        trainY = traindata[:, -1]
        testX = testdata[:, 0:-1]
        testY = testdata[:, -1]
        sizeTrainSet = len(trainX)  #how many data in this train set
        sizeTestSet = len(testX)  #how many data in this test set
        baselineTrain = np.float(np.sum(trainY)) / sizeTrainSet
        baselineTest = np.float(np.sum(testY)) / sizeTestSet
        #print np.sum(inSamY==trainY)

        # ========================
        #Random Forest
        learner = rf.RandomForest(learner=rt.RandomTree,
                                  kwargs={"leaf_size": leafsize},
                                  bags=bag,
                                  boost=False,
                                  verbose=False)
        learner.addEvidence(trainX, trainY)
        #inSamY = learner.query(trainX)#in sample test
        outSamY = learner.query(testX)  #out sample test
        #inSamACC=np.float(np.sum(inSamY==trainY))/sizeTrainSet
        outSamACC = np.float(np.sum(outSamY == testY)) / sizeTestSet
        #RFACCin = RFACCin + inSamACC
        RFACCout = RFACCout + outSamACC

        # ========================
        #Random Forest - SKLEARN
        rfc = RandomForestClassifier(n_estimators=bag)
        rfc.fit(trainX, trainY)
        RFCoutSamY = rfc.predict(testX)
        RFCoutSamACC = np.float(np.sum(RFCoutSamY == testY)) / sizeTestSet
        RFCACCout = RFCACCout + RFCoutSamACC

        # ========================
        #Decision Tree
        clf = tree.DecisionTreeClassifier()
        clf = clf.fit(trainX, trainY)
        #DTinSamY = clf.predict(trainX)
        DToutSamY = clf.predict(testX)
        #DTinSamACC = np.float(np.sum(DTinSamY == trainY)) / sizeTrainSet
        DToutSamACC = np.float(np.sum(DToutSamY == testY)) / sizeTestSet
        #DTACCin = DTACCin + DTinSamACC
        DTACCout = DTACCout + DToutSamACC

        # ========================
        #SVM
        svm = SVC()
        svm.fit(trainX, trainY)
        #SVMinSamY = svm.predict(trainX)
        SVMoutSamY = svm.predict(testX)
        #SVMinSamACC = np.float(np.sum(SVMinSamY == trainY)) / sizeTrainSet
        SVMoutSamACC = np.float(np.sum(SVMoutSamY == testY)) / sizeTestSet
        #SVMACCin = SVMACCin + SVMinSamACC
        SVMACCout = SVMACCout + SVMoutSamACC

        # ========================
        # feed forward - neural net
        clf = MLPClassifier(solver='lbfgs',
                            alpha=1e-5,
                            hidden_layer_sizes=(100, ),
                            random_state=1)
        clf.fit(trainX, trainY)
        MLPoutSamY = clf.predict(testX)
        MLPoutSamACC = np.float(np.sum(MLPoutSamY == testY)) / sizeTestSet
        MLPACCout = MLPACCout + MLPoutSamACC

        print "================"
        print "doing cross-valid " + str(cv + 1) + ":"
        #print "in-sample Accuracy baseline: "+str(max(baselineTrain,1-baselineTrain))
        #print "in-sample Accuracy - Random Forest: " + str(inSamACC)
        #print "in-sample Accuracy - Decision Tree: " + str(DTinSamACC)
        #print "in-sample Accuracy - SVM: " + str(SVMinSamACC)
        print "out-sample Accuracy baseline: " + str(
            max(baselineTest, 1 - baselineTest))
        print "out-sample Accuracy - Random Forest: " + str(outSamACC)
        print "out-sample Accuracy - Random Forest - SKLEARN: " + str(
            RFCoutSamACC)
        print "out-sample Accuracy - Decision Tree: " + str(DToutSamACC)
        print "out-sample Accuracy - SVM: " + str(SVMoutSamACC)
        print "out-sample Accuracy - Neural Net MLP: " + str(MLPoutSamACC)
        print

    print
    print "================================"
    print "cross validation done"
    print "Out-sample accuracy: "
    print "Random Forest: " + str(RFACCout / 10)
    print "Random Forest - SKLEARN: " + str(RFCACCout / 10)
    print "Decision Tree: " + str(DTACCout / 10)
    print "SVM: " + str(SVMACCout / 10)
    print "Neural Net MLP: " + str(MLPACCout / 10)
Esempio n. 49
0
def evaluation_models():
    # metodo per valutare i classificatori dopo aver effettuato training
    # i parametri dei vari classificatori vengono ottenuti dal metodo training() che li stamperà e verranno inseriti manualmente
    dataset_path = './training_set.csv'
    # bisogna inserire il precorso del file da testare
    testset_path = 'testset'  # 1)

    #nel caso in cui ci fosse un solo dataset bisognerebbe commentare 1),2),3),4),5) e togliere il commento a 6)

    dataset = pd.read_csv(dataset_path)
    testsetdata = pd.read_csv(testset_path)  #2)
    label = read_label(dataset)
    # La separazione 80/20 del dataset non la effettuo in quanto suppongo di avere 2 dataset uno per il training e uno per il testing
    # sostituisco i valori mancanti del testset con la media dei valori nel testingset
    for count in label:
        media = dataset[count].mean()
        testsetdata[count] = testsetdata[count].fillna(media)  #3)
        dataset[count] = dataset[count].fillna(media)

    # separo gli attributi dal dataset
    training_x = dataset.iloc[:, 0:20].values
    training_y = dataset.iloc[:, 20].values
    test_x = testsetdata.iloc[:, 0:20].values  #4)
    test_y = testsetdata.iloc[:, 20].values  #5)
    #training_x, test_x, training_y, test_y = model.train_test_split(training_x, training_y, test_size=0.2, random_state=0) #6)

    # normalizzo i dati
    StandardScaler = preprocessing.MinMaxScaler()
    StandardScaler.fit(training_x)
    training_x = StandardScaler.transform(training_x)
    test_x = StandardScaler.transform(test_x)

    # futureselection
    test_x = featureSelection(test_x)
    training_x = featureSelection(training_x)

    # valutazione modello
    classifier = MLPClassifier(max_iter=10000,
                               activation='relu',
                               hidden_layer_sizes=(100, 50),
                               learning_rate='adaptive',
                               learning_rate_init=0.01,
                               solver='sgd')
    classifier.fit(training_x, training_y)
    print('Risultati MLP')
    evaluation(classifier, test_x, test_y)
    classifier1 = RandomForestClassifier(criterion='entropy',
                                         max_depth=100,
                                         max_features='log2',
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         n_estimators=400)
    classifier1.fit(training_x, training_y)
    print('Risultati RandomForest')
    evaluation(classifier1, test_x, test_y)

    classifier2 = SVC(C=10,
                      decision_function_shape='ovo',
                      gamma=10,
                      kernel='rbf')
    classifier2.fit(training_x, training_y)
    print('Risultati SVC')
    evaluation(classifier2, test_x, test_y)

    classifier3 = DecisionTreeClassifier(criterion='entropy',
                                         max_depth=100,
                                         max_features=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         splitter='best')
    classifier3.fit(training_x, training_y)
    print('Risultati DecisionTree')
    evaluation(classifier3, test_x, test_y)

    classifier4 = GaussianNB(priors=None, var_smoothing=1e-9)
    classifier4.fit(training_x, training_y)
    print('Risultati NaiveBayes')
    evaluation(classifier4, test_x, test_y)

    classifier5 = KNeighborsClassifier(algorithm='auto',
                                       leaf_size=30,
                                       n_neighbors=10,
                                       p=3,
                                       weights='distance')
    classifier5.fit(training_x, training_y)
    print('Risultati KNeighbors')
    evaluation(classifier, test_x, test_y)
print(confusion_matrix(y_test, y_pred_svm))
print('==== PRECISION ====')
print(precision_score(y_test, y_pred_svm) * 100, '%')
print('==== ACCURACY ==== ')
print(accuracy_score(y_test, y_pred_svm) * 100, '%')
print('==== RECALL ==== ')
print(recall_score(y_test, y_pred_svm, average='binary') * 100, '%')

# ===============================================================================================
# NEURAL NETWORK
# ===============================================================================================

# Moment at we start building the model
time_ini_rn = time()
mlp = MLPClassifier(solver='adam', activation='relu', hidden_layer_sizes=(6, 4), max_iter=1000)
mlp.fit(X_train, y_train)
# Moment at we end building the model
time_fin_rn = time()
# Time spent in build the model
t_rn = time_fin_rn - time_ini_rn
print('==== Tiempo de construccion de la RED NEURONAL ==== ')
print(t_rn)

# Predictions
y_pred_rn = mlp.predict(X_test)

# Time to classify:
# Moment at we start
time_ini_rn = time()
# classify
scores_rn = cross_val_score(mlp, X, y, cv=2, scoring='accuracy')
Esempio n. 51
0
             gauss = np.append(gauss,gaussian(image,clusterCenters[i][n],betaVals[i][n]))
         gaussiansTrain = np.vstack((gaussiansTrain,gauss))
     gaussiansTrain = gaussiansTrain[1:]
     
     #computes hidden layer neuron values for testing images input
     gaussiansTest = np.zeros(KM)
     for image in test:
         gauss = np.array([])
         for n in range(KM):
             gauss = np.append(gauss,gaussian(image,clusterCenters[i][n],betaVals[i][n]))
         gaussiansTest = np.vstack((gaussiansTest,gauss))
     gaussiansTest = gaussiansTest[1:]
     
     #MLP Classifier model which takes gaussian neurons as input and predicts output
     clf = MLPClassifier(solver = "lbfgs", alpha=1e-5, hidden_layer_sizes=(KM,), activation = "identity")
     clf.fit(gaussiansTrain, trainLabels)    #compares output to label then backpropagates to adjust weights
     predicted = clf.predict(gaussiansTest)  #prediciton using test images to determine accuracy
     
     correct = 0
     for t in range(size):
         if (np.array_equal(predicted[t], testLabels[t])):   #checks for correctness
             correct += 1
     
     percent = round(100*correct/size,2)
     print("Accuracy = ", percent, "%")
     
     percentages = np.append(percentages, percent)
     
 avg = round(np.mean(percentages),2)
 averages = np.append(averages,avg)
 print("")
    print("accuracy: %s" % ((TP + TN) * 1.0 / sum([TP, TN, FP, FN])))
    print("precision: %s " % ((TP) * 1.0 / (TP + FP)))
    print("recall: %s" % ((TP) * 1.0 / (TP + FN)))
    """
    output:
    [ True  True  True  True  True  True  True]
    accuracy: 1.0
    precision: 1.0 
    recall: 1.0
    """

    #使用sklearn实现感知器模型,sklearn使用的时交叉熵评估
    clf = MLPClassifier(solver="lbfgs",
                        alpha=1e-1,
                        hidden_layer_sizes=5,
                        random_state=1)
    clf.fit(X, Y)
    pred2 = clf.predict(X)
    print("使用sklearn构建的MLP模型在二分类问题上的评估结果如下:")
    print(pred2 == Y)
    [TP, TN, FP, FN] = evaluate(Y, pred2)
    print("accuracy: %s" % ((TP + TN) * 1.0 / sum([TP, TN, FP, FN])))
    print("precision: %s " % ((TP) * 1.0 / (TP + FP)))
    print("recall: %s" % ((TP) * 1.0 / (TP + FN)))
    """
    output:
    [ True  True  True  True  True  True  True]
    accuracy: 1.0
    precision: 1.0
    recall: 1.0
    """
Esempio n. 53
0
from sklearn.ensemble import RandomForestClassifier
clf2 = RandomForestClassifier(max_depth=2, random_state=0)
clf2.fit(X_train, y_train)
clf2.predict(X_test)
clf2.score(X_test, y_test)

from sklearn.naive_bayes import GaussianNB
clf3 = GaussianNB()
clf3.fit(X_train, y_train)
clf3.predict(X_test)
clf3.score(X_test, y_test)

from sklearn.neural_network import MLPClassifier
clf4 = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
clf4.fit(X_train, y_train)
clf4.predict(X_test)
clf4.score(X_test, y_test)

from sklearn import linear_model
clf5 = linear_model.LogisticRegression()
clf5.fit(X_train,y_train)
clf5.predict(X_test)
clf5.score(X_test,y_test)


plt.plot(clf1,clf2,clf3,clf4,clf5)
plt.show()


Esempio n. 54
0
        if (target_test[x] == targets[x]):
            corrects += 1

    print("Accuracy: {}".format(corrects / len(target_test)))

    my_classifier_accuracy += corrects / len(target_test)
    plt.plot(graph)
    plt.ylabel('Accuracy')
    plt.xlabel('Loop')
    plt.title('Iris')
    plt.show()

    mlp = MLPClassifier(hidden_layer_sizes=(4),
                        learning_rate_init=0.08,
                        max_iter=1000)
    mlp.fit(data_train, target_train)
    predictions = mlp.predict(data_test)

    corrects = 0
    for x in range(len(target_test)):
        if (target_test[x] == predictions[x]):
            corrects += 1

    scikit_classifier_accuracy += corrects / len(target_test)

print("My accuracy: {}".format(my_classifier_accuracy / 10))
print("Scikits accuracy: {}".format(scikit_classifier_accuracy / 10))

# Pima Indian Diabetes
headers = [
    'times_pregnant', 'glucose', 'blood_pressure', 'triceps', 'insulin', 'bmi',
Esempio n. 55
0
# print(dataset.head())

print(dataset.describe().transpose())

train_x, test_x, train_y, test_y = train_test_split(dataset[HEADERS[1:-1]],
                                                    dataset[HEADERS[-1]])

scaler = StandardScaler()

scaler.fit(train_x)

train_x = scaler.transform(train_x)

test_x = scaler.transform(test_x)

# min = None

# for i in range(10):
clf = MLPClassifier(activation="identity", learning_rate="invscaling")
# clf = MLPClassifier(activation="logistic")
# clf = MLPClassifier(activation="tanh")
# clf = MLPClassifier(hidden_layer_sizes=(13,13),activation="relu", max_iter=300)

clf.fit(train_x, train_y)

print("Training Accuracy  :", clf.score(train_x, train_y))

print("Test Accuracy      :", clf.score(test_x, test_y))

print()
Esempio n. 56
0
        l = preprocess_input(x)
        image_list += [l]
        row = [0] * 6  #6 different labels
        row[dim] = 1
        label_list += [row]
    dim += 1
x_train = np.concatenate((image_list))
y_train = np.array(label_list)
sh = x_train.shape
col_dim = sh[1] * sh[2] * sh[3]
xx = x_train.reshape([sh[0], col_dim])
clf = MLPClassifier(solver='lbfgs',
                    alpha=1e-5,
                    hidden_layer_sizes=(10),
                    random_state=1)
clf.fit(xx, y_train)


def claim_prediction(picture_file):
    #turning raw data into array:
    img = image.load_img(picture_file, target_size=(64, 64))
    x_claim = image.img_to_array(img)
    x_claim = np.expand_dims(x_claim, axis=0)
    x_claim = preprocess_input(x_claim)
    #reshaping array:
    sh = x_claim.shape
    col_dim = sh[1] * sh[2] * sh[3]
    xx = x_claim.reshape([sh[0], col_dim])
    y = clf.predict(xx.reshape(1, -1))
    position = np.argmax(y)
    return ([labels[position], position])
                               solver="sgd",
                               learning_rate_init=0.001,
                               max_iter=len(train_df) * 2,
                               shuffle=True,
                               random_state=0,
                               tol=1e-6,
                               verbose=True,
                               early_stopping=False,
                               batch_size=BATCH_SIZE)

X = train_df.iloc[:, :-1]
y = train_df['Descript']
print("Starting training, batch size: %i, training samples: %i" %
      (BATCH_SIZE, len(X)))

mlp_classifier.fit(X, y)

print('Writing classifier to file. Time: {:.2f}'.format(time.time() -
                                                        start_time))
print('Accuracy of classifier on train set: {:.2f},'
      ' time: {:.2f}, test set size: {:.2f}'.format(mlp_classifier.score(X, y),
                                                    time.time() - start_time,
                                                    len(X)))

test_df = pd.read_csv('categoriacal/some_test.csv', header=0, index_col=0)
test_df = test_df.drop(DROP, axis=1)
test_df = test_df.sample(frac=1)

test_df['PdDistrict'] = le.fit_transform(test_df['PdDistrict'])
test_df['DayOfWeek'] = le.fit_transform(test_df['DayOfWeek'])
test_df['Date'] = le.fit_transform(test_df['Date'])
Esempio n. 58
0
  train_dataset, test_dataset = getKFoldDatasets(i)

  dataset = ClassifierDataset(train_dataset, test_dataset)


  # Creating tf-idf training and test matrix
  tfIdfVectorizer=TfidfVectorizer(use_idf=True)
  train_matrix = tfIdfVectorizer.fit_transform(dataset.training_data).toarray()
  test_matrix = tfIdfVectorizer.transform(dataset.test_data).toarray()


  # Training KNN model
  print('Starting fit')
  clf = MLPClassifier(random_state=1, max_iter=300, hidden_layer_sizes=(20, 20, 20))
  clf.fit(train_matrix, dataset.training_target)
  print('Finished fit')


  # Predicting test dataset classification
  print('Starting prediction')
  test_result = clf.predict(test_matrix)
  dataset.setTestResult(test_result)
  print('Finished prediction')


  # Printing metrics
  print('\n------------------------------------------------')
  print(f'Classification results {i} for 80% of full dataset')
  print('------------------------------------------------')
  precision_score, error, confusion_matrix = dataset.getResultMetrics()
Esempio n. 59
0
def main():
    quesSet = "hamlet_all.txt"
    labelSet = "labels_all.txt"

    words = getTextWords(quesSet)
    items, counts = getFreq(words)
    #printFreq(items)
    #print(len(items))
    #print(counts)
    lines = getLineWords(quesSet)
    lineFreq = getLineFreq(lines, counts)

    print("Loading...")
    train_dataSet, train_hwLabels = readDataSet(labelSet, len(lines),
                                                len(items), lineFreq)
    QuesNum = len(train_dataSet)

    clf = MLPClassifier(hidden_layer_sizes=(50, ),
                        activation='logistic',
                        solver='adam',
                        learning_rate_init=0.001,
                        max_iter=1000)

    knn_hwLabels = readDataSet_K(labelSet, len(lines), len(items), lineFreq)
    knn = neighbors.KNeighborsClassifier(algorithm='kd_tree', n_neighbors=1)
    Knn = neighbors.NearestNeighbors(n_neighbors=3)

    #print(clf,'\n',knn)

    op = input("Do you want to do Cross-Validation? ('y' to confirm) ")

    if op == 'y':
        Error_M = 0
        Error_K = 0

        print("LOOCV initiated.\n")

        for j in range(QuesNum):

            Error_M, Error_K = validate(train_dataSet, train_hwLabels,
                                        knn_hwLabels, clf, knn, QuesNum, j,
                                        Error_M, Error_K)

        print("\nLOOCV complete.")
        print("Error (MLP Neural Network):", Error_M,
              "\nError (KNN Algorithm):", Error_K)
        print("Accuracy (MLP Neural Network):", 1 - Error_M / QuesNum,
              "\nAccuracy (KNN Algorithm):", 1 - Error_K / QuesNum)

    print("\nTraining with whole dataset...")
    clf.fit(train_dataSet, train_hwLabels)
    knn.fit(train_dataSet, knn_hwLabels)
    Knn.fit(train_dataSet, knn_hwLabels)
    print("Training complete.")

    op = input("\nEnter your questions ('n' to quit): ")
    while op != 'n':
        a = getQuesFreq(counts, op)
        res = clf.predict(a)
        Res = knn.predict(a)
        for v in a:
            if sum(v) == 0:
                print("Unknown question type")
                break
            Class, Classifier = getQuesClass(res, int(Res))
            print("Question Type:", Class, Classifier)

            dist, neigh = Knn.kneighbors(a)
            #print(dist,neigh)
            for v in dist:
                maxDist = max(v)
            if maxDist >= 2:
                print("\nResults may compromise. Consider more questions:\n")
                for v in neigh:
                    for qNum in v:
                        print('\t', getQues("hamlet_all.txt", qNum), end='')

        op = input("\nEnter your questions ('n' to quit): ")
Esempio n. 60
0
        correct = 0
        for x in range(0, len(classes)):
            if classes[x] == y[x]:
                correct += 1
        print(str((correct / 150.0) * 100) + "% accurate")

    # clf classifier from skLearn
    clf = MLPClassifier(solver='lbfgs',
                        alpha=1e-5,
                        hidden_layer_sizes=(5, 2),
                        random_state=3)

    y = y.ravel()
    train_y = np.array(y).astype(int)

    clf.fit(normalized_X, train_y)
    clf.predict(normalized_X)
    print(clf.score(normalized_X, y))

    print("\nPima-indians:\n")

    array = np.genfromtxt(
        "/Users/jeremy/Documents/cs450/pima-indians-diabetes.csv",
        delimiter=",")
    X = array[:, :-1]
    Y = array[:, -1:]
    normalized_X = preprocessing.normalize(X)
    normalized_X = np.insert(normalized_X, normalized_X.shape[1], -1, axis=1)
    num_cols = normalized_X.shape[1]
    neuralNet = NeuralNet(num_cols, [4, 3], normalized_X)
    for i in range(0, 200):