Example #1
0
 def __init__(self, methodname='linear regression', trainingpart=0.9, ):
     """
     类初始化函数。
         :param self: 类变量本身
         :param method='linear regression': model类别,可以为'linear regression', 'svc', 'neural netword'
         :param trainingPart=0.9: 训练集占整体的比例,默认为0.9
     """
     if trainingpart <= 0 or trainingpart >=1:
         raise Exception("Training Part is belong to (0, 1)")
     # 设置model
     if methodname == 'linear regression':
         self.model = LinearRegression()
     elif methodname == 'svc':
         self.model = SVC()
         print 'Warning: your\'s y data\'s type need to be int!'
     elif methodname == 'neural netword':
         self.model = MLPClassifier()
         print 'Warning: your\'s y data\'s type need to be int!'
     else:
         methodname = 'linear regression'
         self.model = LinearRegression()
     # 设置其他属性
     self.trainingpart = trainingpart
     self.methodname = methodname
     self.X = None
     self.y = None
     self.train_X = None
     self.test_X = None
     self.train_y = None
     self.test_y = None
Example #2
0
class BCISignal():
    def __init__(self, fs, bands, ch_names, states_labels, indexes):
        self.states_labels = states_labels
        self.bands = bands
        self.prefilter = FilterSequence([ButterFilter((0.5, 45), fs, len(ch_names))])
        self.csp_pools = [SpatialDecompositionPool(ch_names, fs, bands, 'csp', indexes) for _label in states_labels]
        self.csp_transformer = None
        self.var_detector = InstantaneousVarianceFilter(len(bands)*len(indexes)*len(states_labels), n_taps=fs//2)
        self.classifier = MLPClassifier(hidden_layer_sizes=(), early_stopping=True, verbose=True)
        #self.classifier = RandomForestClassifier(max_depth=3, min_samples_leaf=100)

    def fit(self, X, y=None):
        X = self.prefilter.apply(X)
        for csp_pool, label in zip(self.csp_pools, self.states_labels):
            csp_pool.fit(X, y == label)
        self.csp_transformer = FilterStack([pool.get_filter_stack() for pool in self.csp_pools])
        X = self.csp_transformer.apply(X)
        X = self.var_detector.apply(X)
        self.classifier.fit(X, y)
        print('Fit accuracy {}'.format(sum(self.classifier.predict(X) == y)/len(y)))

    def apply(self, chunk: np.ndarray):
        chunk = self.prefilter.apply(chunk)
        chunk = self.csp_transformer.apply(chunk)
        chunk = self.var_detector.apply(chunk)
        predicted_labels = self.classifier.predict(chunk)
        return predicted_labels
Example #3
0
def init_Q():
    # make some dummy training set
    board = init_board()
    board_vec = board2vec(board)
    X = np.array([board_vec])
    y = [(BOARD_SIZE-1)**2]
    board_vec = np.invert(board_vec)
    X = np.append(X,np.array([board_vec]),axis=0)
    y.append(0)
    
    edges = get_potential_moves(board) # all the edges, since the board is empty
    for edge in edges:
        i = edge2ind(edge)
        board_vec[i] = False
        X = np.append(X,np.array([board_vec]),axis=0)
        y.append(check_surrounding_squares(board,edge,0))
        board_vec[i] = True       
    
    
        
    Q = MLPClassifier(warm_start=True, 
                      hidden_layer_sizes=(BOARD_SIZE,10*BOARD_SIZE,BOARD_SIZE), 
                      tol = 1e-10,
                      )
    # Q = DecisionTreeRegressor()
                     
    #    shf = range(len(y))
    #    for j in xrange(100):
    #        random.shuffle(shf)
    #        Xshf = [X[i] for i in shf]
    #        yshf = [y[i] for i in shf]
    triedy = range((BOARD_SIZE-1)**2+1)
    Q.partial_fit(np.repeat(X,100,axis=0),np.repeat(y,100,axis=0),classes=triedy)
    print(Q.predict(X))
    return(Q)
Example #4
0
def main():
    enc = OneHotEncoder(n_values=[7,7,7,7,7,7])
    conn = sqlite3.connect('server.db')
    cursor = conn.cursor()
    all_ = pandas.read_sql_query('SELECT layers.burger, labels.output, layers.layer0, layers.layer1, layers.layer2, layers.layer3, layers.layer4, layers.layer5 FROM layers,labels WHERE layers.burger = labels.burger', conn, index_col='burger')
    
    X = all_.drop(['output'], axis=1)
    y = all_['output']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

               
    clf = MLPClassifier(solver='adam',  activation='relu',
                        verbose=False,
                        max_iter=10000,
                        tol=1e-9,
                        random_state=1)
    
    X_train_categoricals = X_train[column_names]
    tX_train_categoricals = enc.fit_transform(X_train_categoricals)
    clf.fit(tX_train_categoricals, y_train.as_matrix().astype(int))

    
    X_test_categoricals = X_test[column_names]
    tX_test_categoricals = enc.fit_transform(X_test_categoricals)
    prediction = clf.predict(tX_test_categoricals)
    
    print(classification_report(y_test, prediction))
    
    print_eval(y_test, prediction)
Example #5
0
def test_gradient():
    # Test gradient.

    # This makes sure that the activation functions and their derivatives
    # are correct. The numerical and analytical computation of the gradient
    # should be close.
    for n_labels in [2, 3]:
        n_samples = 5
        n_features = 10
        X = np.random.random((n_samples, n_features))
        y = 1 + np.mod(np.arange(n_samples) + 1, n_labels)
        Y = LabelBinarizer().fit_transform(y)

        for activation in ACTIVATION_TYPES:
            mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10,
                                solver='lbfgs', alpha=1e-5,
                                learning_rate_init=0.2, max_iter=1,
                                random_state=1)
            mlp.fit(X, y)

            theta = np.hstack([l.ravel() for l in mlp.coefs_ +
                               mlp.intercepts_])

            layer_units = ([X.shape[1]] + [mlp.hidden_layer_sizes] +
                           [mlp.n_outputs_])

            activations = []
            deltas = []
            coef_grads = []
            intercept_grads = []

            activations.append(X)
            for i in range(mlp.n_layers_ - 1):
                activations.append(np.empty((X.shape[0],
                                             layer_units[i + 1])))
                deltas.append(np.empty((X.shape[0],
                                        layer_units[i + 1])))

                fan_in = layer_units[i]
                fan_out = layer_units[i + 1]
                coef_grads.append(np.empty((fan_in, fan_out)))
                intercept_grads.append(np.empty(fan_out))

            # analytically compute the gradients
            def loss_grad_fun(t):
                return mlp._loss_grad_lbfgs(t, X, Y, activations, deltas,
                                            coef_grads, intercept_grads)

            [value, grad] = loss_grad_fun(theta)
            numgrad = np.zeros(np.size(theta))
            n = np.size(theta, 0)
            E = np.eye(n)
            epsilon = 1e-5
            # numerically compute the gradients
            for i in range(n):
                dtheta = E[:, i] * epsilon
                numgrad[i] = ((loss_grad_fun(theta + dtheta)[0] -
                              loss_grad_fun(theta - dtheta)[0]) /
                              (epsilon * 2.0))
            assert_almost_equal(numgrad, grad)
def mlp_cv_architecture(X,Y):
    kfold = KFold(X.shape[0], n_folds = 10)

    architectures = ( (500,2), (400,2), (400,100,2), (400,200,2), (400,100,50,2), (400,200,50,2) )

    res_dict = {}

    for architecture in architectures:
        mlp = MLPClassifier( algorithm = 'sgd',
                learning_rate = 'adaptive',
                hidden_layer_sizes = architecture,
                random_state = 1)

        train_times    = []
        train_accuracy = []
        test_accuracy  = []

        for train, test in kfold:
            t_tr = time.time()
            mlp.fit( X[train], Y[train] )
            train_times.append( time.time() - t_tr )
            acc_train = np.sum( np.equal( mlp.predict( X[train]), Y[train] ) ) / float(X[train].shape[0])
            acc_test  = np.sum( np.equal( mlp.predict( X[test]), Y[test] ) ) / float(X[test].shape[0])
            train_accuracy.append( acc_train )
            test_accuracy.append(  acc_test )

        res_dict[str(architecture)] = (np.mean(train_accuracy), np.std(train_accuracy),
                          np.mean(test_accuracy), np.std(test_accuracy),
                          np.mean(train_times), np.std(train_times))

    with open('./../results/res_nncv_architecture.pkl', 'w') as f:
        pickle.dump(res_dict,f)
def train():
    utl.print_title('Getting data...')
    X, Tc, X_test, Tc_test = dpp.getdata_arnold()
    #X, Tc, X_test, Tc_test = dpp.getdata_mnist()

    utl.print_title('Preparing data...')
    X, X_test = dpp.scale_data(X, X_test)
    T = dpp.one_hot_encode(Tc)
    T_test = dpp.one_hot_encode(Tc_test)

    utl.print_title('Sanity checks...')
    print('Shape X:', X.shape)
    print('Shape Tc:', Tc.shape)
    print('Shape T:', T.shape)
    print('Shape X_test:', X_test.shape)
    print('Shape Tc_test:', Tc_test.shape)
    print('Shape T_test:', T_test.shape)

    utl.print_title('Training the network...')
    classifier = MLPClassifier(solver='adam', learning_rate_init=1e-3, hidden_layer_sizes=(100), verbose=True, max_iter=200)
    classifier.fit(X, T)

    train_score, Pc = get_results(classifier, X, T)
    test_score, Pc_test = get_results(classifier, X_test, T_test)

    utl.print_title('Results:')
    print('Classification counts train (target):     ',  np.bincount(Tc.reshape(-1)))
    print('Classification counts train (prediction): ',  np.bincount(Pc))

    print('\nClassification counts test (target):     ',  np.bincount(Tc_test.reshape(-1)))
    print('Classification counts test (prediction): ',  np.bincount(Pc_test))

    print('\nTrain score: ', train_score)
    print('Test score:  ', test_score)
  def create(self):
    csvPath = self.sourceCsvFile

    dataset = np.loadtxt( csvPath, dtype='int', delimiter=",", skiprows=1,converters={ \
        4: convertCell, \
        5: convertCell, \
        6: convertCell, \
        7: convertCell, \
        8: convertCell, \
        9: convertCell, \
        10: convertCell, \
        11: convertCell, \
        12: convertCell, \
        13: convertCell, \
        14: convertCell, \
        15: convertCell \
        } )

    non_cat_data = dataset[:, [0,1,2] ]
    cat_data = dataset[:, [4,5,6,7,8,9,10,11,12,13,14,15] ]

    output_data = dataset[:, 3]

    enc =  preprocessing.OneHotEncoder()
    enc.fit(cat_data)
    cat_out = enc.transform(cat_data).toarray() 
    merge_data = np.concatenate((non_cat_data,cat_data),axis=1)
    d(merge_data[0])

    clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
    #clf = tree.DecisionTreeClassifier()
    clf = clf.fit(merge_data, output_data)

    s = pickle.dumps(clf)
    dtFileName = "%s\\save.pkl"%self.outDir
    dtFile = open( dtFileName, 'w' )
    print dtFileName
    dtFile.write( s );
    dtFile.close()

    choicesFile = open( "%s\\choices.pkl"%self.outDir, 'w' )
    s = pickle.dumps(choiceArr)
    choicesFile.write( s );
    choicesFile.close()

    sample_inputs = []
    for i in range( 100 ):
      sample_inputs.append( merge_data[i*500] )
    file = open( "%s\\sampleInputs.pkl"%self.outDir, 'w' )
    file.write( pickle.dumps(sample_inputs) )
    file.close()

    file = open( "%s\\def.txt"%self.outDir, 'w' )
    file.write( "input file: %s\n"%self.sourceCsvFile )
    file.close()

    print dataset[722]
    print merge_data[722]
    print output_data[722]
    print clf.predict( sample_inputs ) 
Example #9
0
class NeuralLearner(Learner.Learner):
	def __init__(self, FeatureMask):
		super(NeuralLearner, self).__init__(FeatureMask)
	        self.expected = FeatureMask.LabelsForAllPoints
		#self.model = MLPClassifier(algorithm='sgd', hidden_layer_sizes=(64,32))
                self.model = MLPClassifier(algorithm = 'sgd', 
                                           learning_rate = 'constant',
                                           momentum = .9,
                                           nesterovs_momentum = True, 
                                           learning_rate_init = 0.2)
        def FitAndPredict(self, mask):
                return self.Predict(self.Fit(mask))
        
        def SetupInputActivations(self, FeatureMask):
		arr = np.hstack([FeatureMask.ForceStd.reshape(-1,1), 
                                 FeatureMask.ForceMinMax.reshape(-1,1),
                                 FeatureMask.CannyFilter.reshape(-1,1)])
	        expected = FeatureMask.LabelsForAllPoints
		return arr, expected

        def Fit(self, mask):
                arr, expected = self.SetupInputActivations(mask)
                self.model.fit(arr, expected)

        def Predict(self, mask):
                arr, expected = self.SetupInputActivations(mask)
                return self.model.predict(arr).reshape(-1,1)
def train_on_source(X,Y):

    print "Start Learning Net on source"

    clf = MLPClassifier( algorithm = 'l-bfgs',
            alpha = 1e-5,
            hidden_layer_sizes = (500,2),
            random_state = 1,
            warm_start = 1,
            max_iter = 400)

    clf.fit(X,Y)
    #new_loss = 0
    #old_loss = 10000
    #for step in range(200):
    #    clf.fit(X,Y)
    #    new_loss = clf.loss_
    #    # stop training, if improvement is small
    #    improvement = abs(new_loss - old_loss)
    #    print "Step:", step, "Loss:", new_loss, "Improvement:", improvement
    #    if improvement < 1.e-5:
    #        print "Training converged!"
    #        break
    #    old_loss = new_loss
    print "Pretrained CLF on Source with num_iter:", clf.n_iter_
    return clf
Example #11
0
def test_partial_fit_classes_error():
    # Tests that passing different classes to partial_fit raises an error
    X = [[3, 2]]
    y = [0]
    clf = MLPClassifier(solver='sgd')
    clf.partial_fit(X, y, classes=[0, 1])
    assert_raises(ValueError, clf.partial_fit, X, y, classes=[1, 2])
Example #12
0
def train(classes, y_samples, feature_dict, classes_dict):
    # Using dev version of slearn, 1.9
    from sklearn.neural_network import MLPClassifier

    clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(50, 25), random_state=1, verbose=True)
    clf.fit(y_samples, classes)

    return clf
Example #13
0
def test_adaptive_learning_rate():
    X = [[3, 2], [1, 6]]
    y = [1, 0]
    clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd',
                        learning_rate='adaptive')
    clf.fit(X, y)
    assert_greater(clf.max_iter, clf.n_iter_)
    assert_greater(1e-6, clf._optimizer.learning_rate)
Example #14
0
 def mlp_train(self,x_train,y_train):
     scaler = StandardScaler()
     scaler.fit(x_train)
     x_train = scaler.transform(x_train)
     clf = MLPClassifier(max_iter=500,alpha=1e-5,hidden_layer_sizes=(40,100,80),warm_start=True,random_state=0)
     clf.fit(x_train,y_train)
     
     return clf
Example #15
0
def test_tolerance():
    # Test tolerance.
    # It should force the solver to exit the loop when it converges.
    X = [[3, 2], [1, 6]]
    y = [1, 0]
    clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd')
    clf.fit(X, y)
    assert_greater(clf.max_iter, clf.n_iter_)
def main():
    iris = datasets.load_iris()
    X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)

    classifier = MLPClassifier(max_iter=1000)
    classifier.fit(X_train, y_train)
    s = classifier.score(X_test, y_test)
    print(s)
Example #17
0
def fitMLPs(trainIndexes, datasets):
	classifiers = []
	for (x,y) in datasets:
		cl =  MLPClassifier(algorithm='l-bfgs', alpha=1e-4, hidden_layer_sizes=(76, 30), random_state=1, momentum=0.8)
		data, target = listToData(trainIndexes, x, y)
		cl.fit(data, target)
		classifiers.append(cl)
	return classifiers 
def fit_and_score_ann(x_train, y_train, x_test, y_test, config):
    ann = MLPClassifier(solver=config.ann.solver,
                        max_iter=Configuration.ANN_MAX_ITERATIONS,
                        alpha=config.ann.alpha,
                        hidden_layer_sizes=(config.ann.hidden_neurons,),
                        learning_rate='adaptive')

    ann.fit(x_train, y_train)
    return ann.score(x_test, y_test)
Example #19
0
 def mlpTest(self):
     mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=1000, alpha=1e-4,
         solver  ='sgd', verbose=10, tol=1e-4, random_state=1)
     mlp.fit(self.X_train,self.Y_train)
     predicted = mlp.predict(self.X_test)
     print("Classification report for classifier %s:\n%s\n"
         % (mlp, metrics.classification_report(self.Y_test, predicted)))
     print("Confusion matrix:\n%s"
         % metrics.confusion_matrix(self.Y_test, predicted))
Example #20
0
def do_mlp(x_train, x_test, y_train, y_test):

    clf = MLPClassifier(solver='lbfgs',
                        alpha=1e-5,
                        hidden_layer_sizes = (10, 4),
                        random_state = 1)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(classification_report(y_test, y_pred))
Example #21
0
def do_mlp(x_train, x_test, y_train, y_test):
    #mlp
    clf = MLPClassifier(solver='lbfgs',
                        alpha=1e-5,
                        hidden_layer_sizes=(5, 2),
                        random_state=1)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    do_metrics(y_test,y_pred)
Example #22
0
def main():
    np.random.seed(RANDOM_STATE)
    pd.set_option('display.width', 0)
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    
    data = pd.read_csv('data/train.csv')
    
    #test_data = pd.read_csv('data/test.csv')
    
    records = []
    
    #n = 42000*0.8
    n = 10000
    X, y = extract_data(data, n)
    activation = 'tanh'
    param_dict = {'batch_size': [100, 200], 'momentum': [0.9, 0.99 ], 'learning_rate_init':[0.001, 0.01, 0.1]}
    #param_dict = {'batch_size': [200], 'momentum': [0.9], 'learning_rate_init':[0.1]}
    for param in ParameterGrid(param_dict):       
        nn = MLPClassifier(algorithm='sgd', 
                           tol=float('-inf'),
                           warm_start = True,
                           max_iter=1, 
                           hidden_layer_sizes = [200],
                           random_state=RANDOM_STATE)
        #nn_params = {'algorithm': 'sgd', 'tol': float
        nn_params = nn.get_params()
        nn_params.update(param)
        nn.set_params(**nn_params)
        #nn = MLPClassifier(**nn_params)
        time_limits = list(range(1, 60, 60))
        try:
            evaluation_list = trainer_by_time(X, y, time_limits, nn)
        except:
            evaluation_list = [{}]
            
        for i in range(len(evaluation_list)):
            evaluation = evaluation_list[i]
            record = {}
            record['n'] = n
            record['time limit'] = time_limits[i]
            record.update(evaluation)  
            record.update(param)
            records.append(record)
        
        
    df = pd.DataFrame(records)
    cols = list(df.columns)
    keys = evaluation_list[0].keys()
    cols = [item for item in cols if item not in keys]
    cols += keys
    df = df.reindex(columns=cols)
    now = datetime.datetime.now()    
    result_file = open('result.txt', 'a')
    print(now,file=result_file)
    print(df)
    print(df,file=result_file)
Example #23
0
def MLP_classifier(train_x, train_y):
    clf = MLPClassifier(activation='relu', algorithm='adam', alpha=0.0001,
               batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=True,
               epsilon=1e-08, hidden_layer_sizes=([50,50]), learning_rate='constant',
               learning_rate_init=0.01, max_iter=3000, momentum=0.9,
               nesterovs_momentum=True, power_t=0.5, random_state=0, shuffle=True,
                validation_fraction=0.1, verbose=False,
               warm_start=False)
    clf.fit(train_x, train_y)
    return clf
Example #24
0
def test_early_stopping_stratified():
    # Make sure data splitting for early stopping is stratified
    X = [[1, 2], [2, 3], [3, 4], [4, 5]]
    y = [0, 0, 0, 1]

    mlp = MLPClassifier(early_stopping=True)
    with pytest.raises(
            ValueError,
            match='The least populated class in y has only 1 member'):
        mlp.fit(X, y)
Example #25
0
def do_mlp(x_train, x_test, y_train, y_test):
    # Building deep neural network
    clf = MLPClassifier(solver='lbfgs',
                        alpha=1e-5,
                        hidden_layer_sizes = (5, 2),
                        random_state = 1)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(classification_report(y_test, y_pred))
    print metrics.confusion_matrix(y_test, y_pred)
def neural_network_voting_systemLogistic():
    import pydotplus
    a,b,c,d,e,f = traing_test_data_set();
    iterations = [75, 60, 90, 95, 95];
    voting_pred = list();
    for i in range(0, len(d[0])):
        voting_pred.append([]);
    import random
    for feature_number in range(1, 6):
        print("Feature Number : " + str(feature_number));
        train_data, train_label = a[feature_number - 1], b[feature_number - 1];
        test_data, test_label = c[feature_number - 1], d[feature_number - 1];
        # use feature scaling for rbf kernel
        # from sklearn.preprocessing import StandardScaler
        # scaler = StandardScaler();
        # scaler.fit(train_data);
        # train_data = scaler.transform(train_data);
        # test_data = scaler.transform(test_data);
        #rnd = list(zip(train_data,train_label));
        #random.shuffle(rnd);
        #train_data, train_label = zip(*rnd)
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler();
        scaler.fit(train_data);
        train_data = scaler.transform(train_data);
        test_data = scaler.transform(test_data);
        from sklearn.neural_network import MLPClassifier
        clf = MLPClassifier(alpha=1, hidden_layer_sizes=(15,), random_state=1, activation='logistic',max_iter =1000,early_stopping=False)
        clf.fit(train_data, train_label)
        tot = len(test_label);
        cnt = 0;
        print(clf.n_iter_);
        for i in range(0, len(test_data)):
            voting_pred[i].append(clf.predict([test_data[i]])[0]);

    tot = len(test_label);
    cnt = 0;
    prediction = list();
    for i in range(0, len(test_data)):
        prediction.append(most_common(voting_pred[i]));
        if prediction[i] != test_label[i]:
            print(str(i) + " " + str(prediction[i]) + " " + str(test_label[i]));
            cnt += 1;
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import precision_score
    from sklearn.metrics import f1_score
    print("Complete for Voting system :");
    print("Total test set size : " + str(len(test_label)));
    print("Correct prediction : " + str(tot - cnt));
    print("Incorrect Prediction : " + str(cnt));
    print("Accuracy : " + str(accuracy_score(test_label, prediction) * 100.0))
    print("Precision : " + str(precision_score(test_label, prediction, average='weighted') * 100.0))
    print("F1 Score : " + str(f1_score(test_label, prediction, average='weighted') * 100.0))
    print("Error Rate : " + str(cnt / tot * 100.0));
    print("---------------------------------------\n");
Example #27
0
 def test_bool_and(self):
     x = ((0, 0), (1, 1), (1, 0), (0, 1))
     y = ( 0,      1,      0,      0)
     mlp = MLPClassifier(hidden_layer_sizes=(), activation='logistic', max_iter=2, alpha=1e-4,
                         algorithm='l-bfgs', verbose=False, tol=1e-4, random_state=1,
                         learning_rate_init=.1)
     mlp.fit(x, y)
     assert mlp.predict(((0, 0))) == 0
     assert mlp.predict(((0, 1))) == 0
     assert mlp.predict(((1, 0))) == 0
     assert mlp.predict(((1, 1))) == 1
 def Neural_network(self, X_train, Y_train, X_test, Y_test):
     from sklearn import metrics
     from sklearn.neural_network import MLPClassifier
     modle = MLPClassifier()
     modle.fit(X_train, Y_train)
     expected = Y_test
     prediceted = modle.predict(X_test)
     ftp, tpr, thres = metrics.roc_curve(expected, prediceted)
     print metrics.classification_report(expected, prediceted)
     # print metrics.confusion_matrix(expected, prediceted)
     print metrics.auc(ftp, tpr)
Example #29
0
 def test_bool_onehot(self):
     X = [x for x in itertools.combinations_with_replacement([True, False], 9)]
     y = [True if sum(a) == 1 else False for a in X]
     X_r = repeat_data(X)
     y_r = repeat_data(y)
     mlp = MLPClassifier(hidden_layer_sizes=(2), activation='logistic', max_iter=10000, alpha=1e-4,
                         algorithm='l-bfgs', verbose=False, tol=1e-4, random_state=1,
                         learning_rate_init=.1)
     mlp.fit(X_r, y_r)
     assert (mlp.score(X, y) > 0.9)
     for x in X:
         self.assertEqual(mlp.predict(x), (sum(x) == 1))
Example #30
0
class AnnClassifier(AbstractClassifier):
    def __init__(self, features, target, solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1):
        self.solver = solver
        self.alpha = alpha
        self.hidden_layer_sizes = hidden_layer_sizes
        self.random_state = random_state
        super(AnnClassifier, self).__init__(features, target)

    def __fit(self, features):
        self.clf = MLPClassifier(solver=self.solver, alpha=self.alpha, hidden_layer_sizes=self.hidden_layer_sizes,
                                 random_state=self.random_state)
        self.clf.fit(features, self.target)
x = pd.get_dummies(cf)
cetagorical = ['token','next','previous']
x = x.to_numpy()
#x = pd.get_dummies(df[columns=cetagorical])
#print(x)
"""
print(type(y))
print(df)
print("0th token = ",token_exp[0])
print("rest")
print(df.shape)
print(df['class'])
print("Type of dummy is : ",type(dummy))"""
print(type(y))
X_train,X_test,Y_train,Y_test = train_test_split(x,y,test_size=0.25,random_state=40)
classifier = MLPClassifier(activation="logistic",solver='sgd',alpha=0.1,hidden_layer_sizes=(5,15))
classification = classifier.fit(X_train,Y_train)
# Save the model as a pickle in a file 
joblib.dump(classifier, 'NeuralNet.pkl') 
  
# Load the model from the file 
nn_from_joblib = joblib.load('NeuralNet.pkl')
Y_pred = nn_from_joblib.predict(X_test)
confusion = confusion_matrix(Y_test,Y_pred)
print("confusion matrix : \n",confusion)
accuracy = accuracy_score(Y_test,Y_pred)*100
print("System accuracy = ",accuracy)
c = precision_score(Y_test, Y_pred, average='macro')*100
print("Precission of the system = ",c)
d = recall_score(Y_test,Y_pred,average='micro')*100
print("Recall of the system = ",d)
Example #32
0

df_train, df_test, X_train, Y_train, X_test, Y_test = get_train_test(
    df_glass, y_col_glass, x_cols_glass, train_test_ratio)

#%% CREATE DICTIONARY OF VARIOUS CLASSIFIERS TO TRY

dict_classifiers = {
    "Logistic Regression": LogisticRegression(),
    "Nearest Neighbors": KNeighborsClassifier(),
    "Linear SVM": SVC(),
    "Gradient Boosting Classifier":
    GradientBoostingClassifier(n_estimators=1000),
    "Decision Tree": tree.DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=1000),
    "Neural Net": MLPClassifier(alpha=1),
    "Naive Bayes": GaussianNB(),
    "AdaBoost": AdaBoostClassifier(),
    "QDA": QuadraticDiscriminantAnalysis(),
    "Gaussian Process": GaussianProcessClassifier()
}

#%% BATCH CLASSIFIER


def batch_classify(X_train,
                   Y_train,
                   X_test,
                   Y_test,
                   no_classifiers=11,
                   verbose=True):
Example #33
0
def mplc(x_train, x_test, y_train, y_test, solver):
	clf = MLPClassifier(solver = solver)
	clf.fit(x_train, y_train.values.ravel())

	predict(clf, x_train, x_test, y_train, y_test)
Example #34
0
## print training accuracy
print('train acc: ', accuracy_score(rsvc.predict(X_train), y_train))
## print test accuracy
print('test acc: ', accuracy_score(rsvc.predict(X_test), y_test))

# Isn't that just amazing accuracy?

# ## Basic Neural Network

# You should never do neural networks in sklearn. Use Keras (which we will teach you later in this class), Tensorflow, PyTorch, etc. However, in an effort to keep this homework somewhat cohesive, let us proceed.

# Basic neural networks proceed in layers. Each layer has a certain number of nodes, representing how expressive that layer can be. Below is a sample network, with an input layer, one hidden (middle) layer of 50 neurons, and finally the output layer.

# In[ ]:

nn = MLPClassifier(hidden_layer_sizes=(50, ), solver='adam', verbose=1)
## fit the nn
nn.fit(X, y)
print('Basic Neural Network Accuracy')
## print training accuracy
print('train acc: ', accuracy_score(nn.predict(X_train), y_train))
## print test accuracy
print('test acc: ', accuracy_score(nn.predict(X_test), y_test))

# Fiddle around with the hiddle layers. Change the number of neurons, add more layers, experiment. You should be able to hit 98% accuracy.

# Neural networks are optimized with a technique called gradient descent (a neural net is just one big function - so we can take the gradient with respect to all its parameters, then just go opposite the gradient to try and find the minimum). This is why it requires many iterations to converge.

# ## Turning In

# Convert this notebook to a PDF (file -> download as -> pdf via latex) and submit to Gradescope.





classifier1 =  neighbors.KNeighborsClassifier(weights='distance')
classifier1.fit(trainX, trainY)

prediction_clf1 = classifier1.predict(testX)

print(prediction_clf1)
print(metrics.accuracy_score(testY, prediction_clf1))


classifier2 = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 6), random_state=1,
                            learning_rate='invscaling', max_iter=200)
classifier2.fit(trainX, trainY)

prediction_clf2 = classifier2.predict(testX)

print(prediction_clf2)
print(metrics.accuracy_score(testY, prediction_clf2))





#print(trainX)


from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np

from util import getKaggleMNIST
from datetime import datetime
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

# get the data: https://www.kaggle.com/c/digit-recognizer
Xtrain, Ytrain, Xtest, Ytest = getKaggleMNIST()

# scale first
pipeline = Pipeline([
  # ('scaler', StandardScaler()),
  ('mlp', MLPClassifier(hidden_layer_sizes=(500,), activation='tanh')),
])



t0 = datetime.now()
pipeline.fit(Xtrain, Ytrain)
print("train duration:", datetime.now() - t0)
t0 = datetime.now()
print("train score:", pipeline.score(Xtrain, Ytrain), "duration:", datetime.now() - t0)
t0 = datetime.now()
print("test score:", pipeline.score(Xtest, Ytest), "duration:", datetime.now() - t0)
Example #37
0
        cross = 10
        test_size = (1 / cross)
        X_train, X_test, y_train, y_test = train_test_split(
            datanew, target, stratify=target, test_size=test_size)

        rf = RandomForestClassifier()
        rf.fit(X_train, y_train)
        predrf = rf.predict_proba(X_test)
        #print("rf: ",rf.score(X_test,y_test))

        knn = KNeighborsClassifier(n_neighbors=10)
        knn.fit(X_train, y_train)
        predknn = knn.predict_proba(X_test)
        #print("knn: ",knn.score(X_test,y_test))

        mlp = MLPClassifier(hidden_layer_sizes=(50, 25, 10))
        mlp.fit(X_train, y_train)
        predmlp = mlp.predict_proba(X_test)
        #print("mlp : ",mlp.score(X_test,y_test))

        y_pred = []
        for i in range(len(predrf)):
            l1 = predrf[i]
            l2 = predknn[i]
            l3 = predmlp[i]

            n1 = np.array(l1)
            n2 = np.array(l2)
            n3 = np.array(l3)

            pr = n1 + n2 + n3
Example #38
0
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred_rf = rf.predict_proba(X_test)[:, 1]
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf)
precision_rf, recall_rf, _ = precision_recall_curve(y_test, y_pred_rf)
roc_auc_rf = auc(fpr_rf, tpr_rf)

dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict_proba(X_test)[:, 1]
fpr_dt, tpr_dt, _ = roc_curve(y_test, y_pred_dt)
precision_dt, recall_dt, _ = precision_recall_curve(y_test, y_pred_dt)
roc_auc_dt = auc(fpr_dt, tpr_dt)

mlp = MLPClassifier()
mlp.fit(X_train, y_train)
y_pred_mlp = mlp.predict_proba(X_test)[:, 1]
fpr_mlp, tpr_mlp, _ = roc_curve(y_test, y_pred_mlp)
precision_mlp, recall_mlp, _ = precision_recall_curve(y_test, y_pred_mlp)
roc_auc_mlp = auc(fpr_mlp, tpr_mlp)

svm = svm.SVC(probability=True)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict_proba(X_test)[:, 1]
fpr_svm, tpr_svm, _ = roc_curve(y_test, y_pred_svm)
precision_svm, recall_svm, _ = precision_recall_curve(y_test, y_pred_svm)
roc_auc_svm = auc(fpr_svm, tpr_svm)

sgd = SGDClassifier(loss='log')
sgd.fit(X_train, y_train)
Example #39
0
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

X = df.headline
y = df.label

cv = ShuffleSplit(n_splits=20, test_size=0.2)

models = [
    MultinomialNB(),
    BernoulliNB(),
    LogisticRegression(),
    SGDClassifier(),
    LinearSVC(),
    RandomForestClassifier(),
    MLPClassifier()
]

sm = SMOTE()

# Init a dictionary for storing results of each run for each model
results = {
    model.__class__.__name__: {
        'accuracy': [],
        'f1_score': [],
        'confusion_matrix': []
    }
    for model in models
}

for train_index, test_index in cv.split(X):
)
base.loc[base.age < 0, 'age'] = 40.92

previsores = base.iloc[:, 1:4].values
classe = base.iloc[:, 4].values

from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(previsores[:, 1:4])
previsores[:, 1:4] = imputer.transform(previsores[:, 1:4])

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
previsores = scaler.fit_transform(previsores)

from sklearn.model_selection import train_test_split
previsores_treinameto, previsores_teste, classe_treinamento, classe_teste = train_test_split(
    previsores, classe, test_size=0.25, random_state=0)

from sklearn.neural_network import MLPClassifier
classificador = MLPClassifier(verbose=True, max_iter=1000)
classificador.fit(previsores_treinameto, classe_treinamento)
previsoes = classificador.predict(previsores_teste)

from sklearn.metrics import confusion_matrix, accuracy_score
precisao = accuracy_score(classe_teste, previsoes)
matriz = confusion_matrix(classe_teste, previsoes)

from collections import Counter
Counter(classe_teste)
Example #41
0
predicted_class_name=['prediction']

df=pd.read_csv("C:\Python27\heart2.csv")

x= df[feature_column_names].values
y=df[predicted_class_name].values


split_test_size=.2



x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=split_test_size,random_state=42)

mlp=MLPClassifier(solver='lbfgs',alpha=1e-5,hidden_layer_sizes=(500,),random_state=42)

mlp.fit(x_train, y_train.ravel())
expected=y_test
predicted_mlp=mlp.predict(x_test)
accuracy_mlp= mlp.score(x_test, y_test)
print(accuracy_mlp)
print(metrics.classification_report(expected,predicted_mlp))
print(metrics.confusion_matrix(expected,predicted_mlp))
cm_mlp=metrics.confusion_matrix(expected,predicted_mlp)
cm_mlp_list=cm_mlp.tolist()
cm_mlp_list[0].insert(0,'Real True')
cm_mlp_list[1].insert(0,'Real False')
print tabulate(cm_mlp_list,headers=['Real/Pred','Pred True', 'Pred False'])

imgToDigit.convert_to_bw(thr)
digits = imgToDigit.split()
imgToDigit.to_32_32()
X, Y = imgToDigit.featureExtract_new()
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
# 标准化数据,保证每个维度的特征数据方差为1,均值为0,使得预测结果不会被某些维度过大的特征值而主导
ma = X.max(0)  # 求每列的最大值
ma[ma == 0] = 1  # 有很多列是0,保证不被0除而出错
mi = X.min(0)
X = X - mi
X = X / ma
clf = MLPClassifier(solver='lbfgs',
                    hidden_layer_sizes=(200, 4),
                    alpha=1e-5,
                    random_state=1,
                    max_iter=40000)  #最大迭代次数
clf.fit(X, Y)  # 训练模型
import pickle
with open('handWriting.bin', 'wb') as f:
    rs = pickle.dumps(clf)
    f.write(rs)

# %%
from PIL import Image
import pickle
import numpy as np
from ImageDigit import ImageDigit
with open('handWriting.bin', 'rb') as f:
    clf = pickle.load(f)
Example #43
0
from sklearn import datasets, svm, metrics
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier

digits = datasets.load_digits()
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1)) #flattens the image

NBclass = GaussianNB()
print("Running NB...")
NBclass.fit(data[:int(n_samples*2/3)], digits.target[:int(n_samples*2/3)])
NBpred = NBclass.predict(data[int(n_samples/3):])

MLPclass = MLPClassifier(alpha=1, hidden_layer_sizes=(25, 15), random_state=1)
print("Running MLP...")
MLPclass.fit(data[:int(n_samples*2/3)], digits.target[:int(n_samples*2/3)])
MLPpred = MLPclass.predict(data[int(n_samples/3):])

SVCclass = svm.SVC(gamma=1)
print("Running SVC...")
SVCclass.fit(data[:int(n_samples*2/3)], digits.target[:int(n_samples*2/3)])
SVCpred = SVCclass.predict(data[int(n_samples/3):])

KNEIclass = KNeighborsClassifier(3)
print("Running KNEI...")
KNEIclass.fit(data[:int(n_samples*2/3)], digits.target[:int(n_samples*2/3)])
KNEIpred = KNEIclass.predict(data[int(n_samples/3):])

print("Calculating means..."); predicted = []
"""

import pandas as pd
import numpy as np

train = pd.read_csv(
    "/Users/jaynanda/Desktop/Assignments/660/Project/Numeric Data/art_foreign_numeric.csv"
)

feature = pd.DataFrame(train['Genre'])
train = train.drop('Genre', axis=1)

from sklearn.cross_validation import train_test_split

X_train, X_test, y_train, y_test = train_test_split(train,
                                                    feature,
                                                    test_size=0.30)

from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(solver='lbfgs',
                    alpha=1e-5,
                    hidden_layer_sizes=(5, 2),
                    random_state=1)
clf.fit(X_train, y_train)

res = clf.predict(X_test)

from sklearn.metrics import accuracy_score

print(accuracy_score(res, y_test))
Example #45
0
    matrix = np.array(data, dtype=int)

# For the model selection part, we will not work on all the data set, this could take too much time.
training_length = 40200
training_set = matrix[:training_length, 1:] / 255.0
labels_training = matrix[:training_length, 0]
validation_length = 19800
validation_set = matrix[training_length:training_length + validation_length,
                        1:] / 255.0
labels_validation = matrix[training_length:training_length + validation_length,
                           0]

X, y = training_set, labels_training

# Now, since our data set is ready, we can find the best random weights
#We manually evaluate the accuracy and zero-one-loss after 10 iterations for both the training and the validation set
for seed in [1, 26, 42, 67, 123]:
    mlp = MLPClassifier(hidden_layer_sizes=(100),
                        alpha=0.05,
                        max_iter=200,
                        random_state=seed)
    mlp.fit(X, y)

    labels_training_pred = mlp.predict(X)

    labels_validation_pred = mlp.predict(validation_set)

    print("Seed: {}, accuracy training: {}, accuracy validation: {}".format(
        seed, accuracy_score(y, labels_training_pred),
        accuracy_score(labels_validation, labels_validation_pred)))
Example #46
0
import pickle
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_validate
from load_data import DATA

nn = MLPClassifier(
    hidden_layer_sizes=(144,),
    activation="relu",
    max_iter=1000,
    early_stopping=True,
    learning_rate_init=0.01,
    random_state=0,
)

# raw data
print("Running NN on raw data...")
X, y = DATA["credit"]
cv_results = cross_validate(
    nn,
    X,
    y,
    scoring=["accuracy", "balanced_accuracy", "precision", "recall", "f1", "roc_auc"],
    cv=10,
    return_train_score=True,
)
res_mean = {key: [] for key in cv_results}
res_std = {key: [] for key in cv_results}
for key in cv_results:
    res_mean[key].append(cv_results[key].mean())
def Cross_Validation(X, y):
    model1 = MultinomialNB()
    scores1 = cross_validation.cross_val_score(model1,
                                               X,
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("Naive  Bayes with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores1)))))

    model2 = GradientBoostingClassifier()
    scores2 = cross_validation.cross_val_score(model2,
                                               X,
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("Gradient Boost with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores2)))))

    model3 = SVC()
    scores3 = cross_validation.cross_val_score(model3,
                                               X,
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("SVC with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores3)))))

    model4 = LogisticRegression()
    scores4 = cross_validation.cross_val_score(model4,
                                               X,
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("Logistic Regression with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores4)))))

    model5 = KNeighborsClassifier(n_neighbors=2)
    scores5 = cross_validation.cross_val_score(model5,
                                               X,
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("K-Neighbours-Classifier with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores5)))))

    model6 = ExtraTreesClassifier()
    scores6 = cross_validation.cross_val_score(model6,
                                               X,
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("Tree-Classifier with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores6)))))

    model7 = DecisionTreeClassifier()
    scores7 = cross_validation.cross_val_score(model7,
                                               X,
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("Decision-Tree-Classifier with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores7)))))

    model8 = MLPClassifier(solver='adam',
                           alpha=0.01,
                           hidden_layer_sizes=(10, 10))
    scores8 = cross_validation.cross_val_score(model8,
                                               X,
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("MLP classifier's with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores8)))))
Example #48
0
	keras.layers.Dense(60, activation=tf.nn.relu),
    keras.layers.Dense(1, activation=tf.nn.sigmoid),
])
model.summary()
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32)
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
print('Test loss:', test_loss)

from sklearn.svm import SVC
from sklearn import metrics
svc=SVC() #Default hyperparameters
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
print('Accuracy Score:')
print(metrics.accuracy_score(y_test,y_pred))


clf = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=50, alpha=0.0001,
                     solver='sgd', verbose=10,  random_state=21,tol=0.000000001)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
cm
sns.heatmap(cm, center=True)
plt.show()
Example #49
0
    titles_ser = pd.Series(clustering_tree[cluster]["child_titles"], dtype=str)
    cluster_label_enc = title_encoding.index(
        titles_ser.value_counts().index[0])
    # Matrix
    cluster_matrix = data_tfidf_matrix[clustering_tree[cluster]
                                       ["child_indices"]]
    output_column = np.array([[cluster_label_enc] * cluster_matrix.shape[0]
                              ]).reshape(-1, 1)
    cluster_matrix = np.concatenate((cluster_matrix, output_column), axis=1)
    np.random.shuffle(cluster_matrix)
    training_data_list.append(cluster_matrix[:int(cfg.train_test_frac *
                                                  cluster_matrix.shape[0]), :])

training_data_matrix = np.concatenate(
    [matrix for matrix in training_data_list], axis=0)
np.random.shuffle(training_data_matrix)
X_train, y_train = training_data_matrix[:, :-1], training_data_matrix[:, -1]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

print("Fitting model")
print(X_train.shape)
mlp = MLPClassifier(
    hidden_layer_sizes=(X_train.shape[1], int((2 / 3) * X_train.shape[1]),
                        len(data_pipeline.label_encoder.classes_)),
    max_iter=1000,
    verbose=True)

mlp.fit(X_train, y_train)
dump(mlp, cfg.binary_path + "MLPClassifier_model.joblib")
X['Created'] = X['Created'].map(gettime)
X['dFollowers'] = (X['Followers at Posting'].diff(
    periods=-3)) / (X['Created'].diff(periods=-3))
X['Sentiment'] = X['Description'].map(getsent)
X['Punctuation'] = X['Description'].str.count('!!!|ebron|rving|urry|iannis|arden|Why') \
                   + 2*X['Description'].str.count('@|#|ames')
X['Description'] = X['Description'].str.len()
X['dTime'] = X['Created'].diff(periods=-3)

# Splits the data into training and testing sets, and resolves NaNs
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
X_train = X_train.fillna(X_train.mean())
y_train = y_train.fillna(y_train.mean())
X_test = X_test.fillna(X_test.mean())
y_test = y_test.fillna(y_test.mean())

# Scales the feature set for MLP sensitivity
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Creates the Classifier and fits it to training data
mlp = MLPClassifier(hidden_layer_sizes=(100, 100, 100, 100, 100),
                    max_iter=10000,
                    learning_rate='adaptive')
mlp.fit(X_train, y_train.values.ravel())

# Pickles MLP for use with MLP_Creator.py
pickle.dump(mlp, open('MLP_EC', 'wb'))
Example #51
0
predicted_test_knn = knn_clf.predict(test_data)

# Train SVM classifier
svc_clf = svm.SVC(gamma='auto',
                  kernel='rbf',
                  decision_function_shape='ovo',
                  max_iter=-1,
                  probability=False,
                  random_state=None,
                  shrinking=True,
                  tol=0.001,
                  verbose=False).fit(train_data, train_labels)
predicted_test_svc = svc_clf.predict(test_data)

nn_clf = MLPClassifier(solver='lbfgs',
                       alpha=1e-5,
                       hidden_layer_sizes=(15, ),
                       random_state=1).fit(train_data, train_labels)
predicted_test_nn = nn_clf.predict(test_data)

fpr_rf, recall_rf = metrics_cal(test_labels, predicted_test_rf)
fpr_dt, recall_dt = metrics_cal(test_labels, predicted_test_dt)
fpr_knn, recall_knn = metrics_cal(test_labels, predicted_test_knn)
fpr_svc, recall_svc = metrics_cal(test_labels, predicted_test_svc)
fpr_nn, recall_nn = metrics_cal(test_labels, predicted_test_nn)

print('Detection rate  |   False alarm rate  ')
print(recall_dt, fpr_dt)
print(recall_rf, fpr_rf)
print(recall_knn, fpr_knn)
print(recall_svc, fpr_svc)
print(recall_nn, fpr_nn)
Example #52
0
def classBonus(filename):
    ''' This function performs experiment Bonus explores all 
        the classifiers
    
    Parameters
       filename : string, the name of the npz file from Task 2

    '''
    decisionTreeResult = []
    AdaBoostResult = []
    MLPResult = []
    RandomForResult = []
    # load data
    data = np.load(filename)
    data = data['arr_0']

    # getting y value
    X = data[:, :-1]
    y = data[:, -1]

    # splitting data into test and training 20%,80%
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=66)

    maxDep = range(1, 16)
    # Random Forest performance may be different for each train
    for depth in maxDep:
        print("Depth: " + str(depth))

        model = RandomForestClassifier(max_depth=depth, n_estimators=10)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        C = confusion_matrix(y_test, y_pred)
        print(C)
        output = ["RandomForestClassifier"] + [depth] + [
            accuracy(C)
        ] + recall(C) + precision(C) + np.ravel(C).tolist()
        RandomForResult.append(output)

    aList = [1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.025, 0.01]
    # MLP performance may be different for each train
    for alpha in aList:
        print("Alpha: " + str(alpha))

        model = MLPClassifier(alpha=0.05)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        C = confusion_matrix(y_test, y_pred)
        print(C)
        output = ["MLPClassifier"] + [alpha] + [
            accuracy(C)
        ] + recall(C) + precision(C) + np.ravel(C).tolist()
        MLPResult.append(output)

    learnRate = [0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0]
    # AdaBoost
    for rate in learnRate:
        print("learnRate: " + str(learnRate))

        model = AdaBoostClassifier(learning_rate=rate)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        C = confusion_matrix(y_test, y_pred)
        print(C)
        output = ["AdaBoost"] + [rate] + [
            accuracy(C)
        ] + recall(C) + precision(C) + np.ravel(C).tolist()
        AdaBoostResult.append(output)

    maxFeatList = ['log2', 'sqrt', None]
    # Decision Tree
    for feat in maxFeatList:
        print("max Feats: " + str(feat))

        model = DecisionTreeClassifier(random_state=66, max_features=feat)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        C = confusion_matrix(y_test, y_pred)
        print(C)
        output = ["Decision Tree"] + [feat] + [
            accuracy(C)
        ] + recall(C) + precision(C) + np.ravel(C).tolist()
        decisionTreeResult.append(output)

    bestAccuracy = -1

    result = [decisionTreeResult, AdaBoostResult, MLPResult, RandomForResult]
    with open('a1_bonus.csv', 'w', newline='') as csvFile:
        csvWriter = csv.writer(csvFile, delimiter=',')
        for r in result:
            csvWriter.writerows(r)
Example #53
0
y = to_categorical(y)

# scaling of features to fit range 0-1
x = MinMaxScaler().fit_transform(x)

# shuffle and split into training and test
x, x_test, y, y_test = train_test_split(x,
                                        y,
                                        test_size=0.25,
                                        shuffle=True,
                                        random_state=42)

# fit multi-layer perceptron classifier
mlp = MLPClassifier(hidden_layer_sizes=(64, 48, 10),
                    activation='relu',
                    solver='adam',
                    max_iter=250,
                    random_state=42,
                    verbose=True)
mlp = mlp.fit(x, y)

# output mean mlp accuracy on test data
mlp_accuracy = mlp.score(x_test, y_test)
print('sklearn', mlp_accuracy)

# calculcate confusion matrix for predicted labels
label_pred = np.argmax(mlp.predict(x_test), axis=1)
label_true = np.argmax(y_test, axis=1)
cf_matrix = confusion_matrix(label_true, label_pred)
print(cf_matrix)

# construct multi-layer keras network
Example #54
0
def main(num):
    import pandas as pd
    col_names = [
        'DNS', 'TCP', 'HTTP', 'BROWSER', 'IGMPv3', 'SSDP', 'NBSS', 'NBNS',
        'SMB', 'LANMAN', 'IRC', 'SSL', 'SSLv2', 'SSLv3', 'TLSv1', 'SMTP',
        'SMTP|IMF', 'VICP', 'HTTP/XML', 'ICMP', 'Packets Sent',
        'Packets Received', 'Bytes Sent', 'Bytes Received', 'Country', 'Label'
    ]

    file = pd.read_csv("final_features.csv", )

    #feature_cols = [ 'TCP', 'HTTP', 'SSL', 'Country'] num = 25
    feature_cols = [
        'DNS', 'TCP', 'HTTP', 'ICMP', 'Packets Sent', 'Packets Received',
        'Bytes Sent', 'Bytes Received', 'Country'
    ]

    X = file[feature_cols]
    Y = file.Label
    from sklearn.model_selection import train_test_split

    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.25,
                                                        random_state=num)

    # Baysian Stuff

    from sklearn.neural_network import MLPClassifier

    model = MLPClassifier(hidden_layer_sizes=(100, 4), random_state=num)

    model.fit(X_train, Y_train)

    Y_pred = model.predict(X_test)

    from sklearn.metrics import classification_report, confusion_matrix

    matrix = confusion_matrix(Y_test, Y_pred)
    print(matrix)
    print(classification_report(Y_test, Y_pred))

    from sklearn import metrics
    print("Accuracy:", metrics.accuracy_score(Y_test, Y_pred))
    print("Precision:", metrics.precision_score(Y_test, Y_pred))
    print("Recall:", metrics.recall_score(Y_test, Y_pred))

    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    class_names = [0, 1]
    fig, ax = plt.subplots()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names)
    plt.yticks(tick_marks, class_names)

    cnf_matrix = confusion_matrix(Y_test, Y_pred)
    sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu", fmt='g')
    plt.tight_layout()
    plt.title('Confusion matrix', y=1.1)
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')
    plt.text(0.5, 257.44, 'Predicted label')
Example #55
0
#this code does stuff

# split training and testing data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=3)

# Create MLP classifier and define hyperameters
model = MLPClassifier(verbose=1,
                      learning_rate_init=0.5,
                      hidden_layer_sizes=(
                          344,
                          172,
                      ),
                      batch_size=500,
                      learning_rate='adaptive',
                      activation='relu',
                      solver='sgd',
                      max_iter=200)
#
# model = OneVsRestClassifier(MLPClassifier(verbose=1, learning_rate_init=0.01, hidden_layer_sizes=(256, 256, 256), batch_size=200,
#                          learning_rate='adaptive', activation='sigmoid', solver='sgd', max_iter=500))
# # #model = TPOTClassifier(generations=5, population_size=50, verbosity=3)
# #
# #
# # # Fit the classifier to the data
# model.fit(X_train,y_train)

model = joblib.load('saved_model_3.pkl')
Example #56
0
import re
import tqdm
import jieba
import json
import chardet

ban_word = open('ban_word.txt', encoding='utf8').read().split('\n')

classifiers = [
    KNeighborsClassifier(),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]


def cal_tfidf(data):
    res1 = ' '.join(jieba.lcut(data))
    corpus = [res1]
    vector = TfidfVectorizer(stop_words=ban_word)
    try:
        tfidf = vector.fit_transform(corpus)
    except:
        return 0
    return res1
    
Example #57
0
for i in range(7, 11):
    img1 = np.column_stack((img1, gimg(i)))

img = 1 - np.row_stack((img, img1))

plt.imshow(img)
plt.show()

# select training data size using 75% of the source
train_size = int(X.shape[0] * .75)
# perform principle component analysis and use 50 feature
pca = PCA(n_components=50)
training_data = pca.fit_transform(X[:train_size], y[:train_size])
# create simple neural network and train
clf = MLPClassifier(solver='lbfgs',
                    alpha=1e-5,
                    activation='relu',
                    max_iter=3000,
                    hidden_layer_sizes=(30, ),
                    random_state=1)
clf.fit(training_data, y[:train_size].ravel())

# get predictions for the data not used in the classifier
predicted = clf.predict(pca.transform(X[train_size:]))
actual = y[train_size:]
print(metrics.classification_report(actual, predicted))
print(metrics.confusion_matrix(actual, predicted))

joblib.dump(pca, '../trained/sklearn_pca.pkl')
joblib.dump(clf, '../trained/sklearn_neural_network.pkl')
Example #58
0
    def _build_estimator(Y_train, method, cv, cv_scoring, cv_n_folds,
                         **options):
        if cv:
            #from sklearn.cross_validation import StratifiedKFold
            #cv_obj = StratifiedKFold(n_splits=cv_n_folds, shuffle=False)
            cv_obj = cv_n_folds  # temporary hack (due to piclking issues otherwise, this needs to be fixed)
        else:
            cv_obj = None

        _rename_main_thread()

        if method == 'LinearSVC':
            from sklearn.svm import LinearSVC
            if cv is None:
                cmod = LinearSVC(**options)
            else:
                try:
                    from freediscovery_extra import make_linearsvc_cv_model
                except ImportError:
                    raise OptionalDependencyMissing('freediscovery_extra')
                cmod = make_linearsvc_cv_model(cv_obj, cv_scoring, **options)
        elif method == 'LogisticRegression':
            from sklearn.linear_model import LogisticRegression
            if cv is None:
                cmod = LogisticRegression(**options)
            else:
                try:
                    from freediscovery_extra import make_logregr_cv_model
                except ImportError:
                    raise OptionalDependencyMissing('freediscovery_extra')
                cmod = make_logregr_cv_model(cv_obj, cv_scoring, **options)
        elif method == 'NearestCentroid':
            cmod = NearestCentroidRanker()
        elif method == 'NearestNeighbor':
            cmod = NearestNeighborRanker()
        elif method == 'xgboost':
            try:
                import xgboost as xgb
            except ImportError:
                raise OptionalDependencyMissing('xgboost')
            if cv is None:
                try:
                    from freediscovery_extra import make_xgboost_model
                except ImportError:
                    raise OptionalDependencyMissing('freediscovery_extra')
                cmod = make_xgboost_model(cv_obj, cv_scoring, **options)
            else:
                try:
                    from freediscovery_extra import make_xgboost_cv_model
                except ImportError:
                    raise OptionalDependencyMissing('freediscovery_extra')
                cmod = make_xgboost_cv_model(cv, cv_obj, cv_scoring, **options)
        elif method == 'MLPClassifier':
            if cv is not None:
                raise NotImplementedFD('CV not supported with MLPClassifier')
            from sklearn.neural_network import MLPClassifier
            cmod = MLPClassifier(solver='adam',
                                 hidden_layer_sizes=10,
                                 max_iter=200,
                                 activation='identity',
                                 verbose=0)
        else:
            raise WrongParameter('Method {} not implemented!'.format(method))
        return cmod
def Baselin_predict(mask):
    df_pred_x = df_test.drop(["label", 0], 1)
    df_pred_x = df_pred_x.loc[:, mask]
    x_pred = np.array(df_pred_x)

    CU_X, Y = feature_selection(mask)

    # rbfsvm = svm.SVC()
    # lsvm = svm.LinearSVC()
    mlp = MLPClassifier(hidden_layer_sizes=(100, 100, 100, 100), max_iter=2000)

    skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=0)
    fold_accuracy = []

    scaler = StandardScaler()
    tfidf = TfidfTransformer(norm=None)
    dense = Data_Utils.DenseTransformer()

    for train, test in skf.split(CU_X, Y):
        # train split
        CU_train_data = CU_X[train]
        train_labels = Y[train]

        # test split
        CU_eval_data = CU_X[test]
        eval_labels = Y[test]

        # tf-idf
        tfidf.fit(CU_train_data)
        CU_train_data = dense.transform(tfidf.transform(CU_train_data))
        CU_eval_data = dense.transform(tfidf.transform(CU_eval_data))

        # standardization
        scaler.fit(CU_train_data)
        CU_train_data = scaler.transform(CU_train_data)
        CU_eval_data = scaler.transform(CU_eval_data)

        # normalization
        CU_train_data = normalize(CU_train_data)
        CU_eval_data = normalize(CU_eval_data)

        train_data = CU_train_data
        eval_data = CU_eval_data

        # evaluation
        # rbfsvm.fit(train_data, train_labels)
        # lsvm.fit(train_data, train_labels)
        mlp.fit(train_data, train_labels)

        # rbfsvm_acc = rbfsvm.score(eval_data, eval_labels)
        # lsvm_acc = lsvm.score(eval_data, eval_labels)
        mlp_acc = mlp.score(eval_data, eval_labels)

        fold_accuracy.append(mlp_acc)
    print("accuracy Measure", np.mean(fold_accuracy))
    CU_pred_data = dense.transform(tfidf.transform(x_pred))

    CU_pred_data = scaler.transform(CU_pred_data)
    CU_pred_data = normalize(CU_pred_data)
    pred = [mlp.predict(i.reshape(1, -1))[0] for i in CU_pred_data]
    df_test["pred"] = pred
    df_out = df_test[[0, "pred"]]
    df_res = df_out.sort_values(by=[0])
    df_res.to_csv("AdversarialTestResults.txt",
                  header=None,
                  index=None,
                  sep=' ')
Example #60
0
def class33(X_train, X_test, y_train, y_test, i, X_1k, y_1k):
    ''' This function performs experiment 3.3
    
    Parameters:
       X_train: NumPy array, with the selected training features
       X_test: NumPy array, with the selected testing features
       y_train: NumPy array, with the selected training classes
       y_test: NumPy array, with the selected testing classes
       i: int, the index of the supposed best classifier (from task 3.1)  
       X_1k: numPy array, just 1K rows of X_train (from task 3.2)
       y_1k: numPy array, just 1K rows of y_train (from task 3.2)
    '''
    kList = {5, 10, 20, 30, 40, 50}
    csvResult = []
    pval1 = []
    pval32 = []

    # find the best k features p values for 1k and 32k
    for i in kList:
        selector = SelectKBest(f_classif, k=i)
        X_new = selector.fit_transform(X_1k, y_1k)
        pp = sorted(selector.pvalues_)
        pval1.append(pp[:i])
    print(pval1)

    for i in kList:
        selector = SelectKBest(f_classif, k=i)
        X_new = selector.fit_transform(X_train, y_train)
        pp = sorted(selector.pvalues_)
        pval32.append(pp[:i])
        csvResult.append([i] + pp)
    print(pval32)

    # 1k and 32k with 5 features
    selector = SelectKBest(f_classif, k=5)
    X_train1k = selector.fit_transform(X_1k, y_1k)
    X_test1k = selector.transform(X_test)
    print(X_train1k)

    selector = SelectKBest(f_classif, k=5)
    X_train32k = selector.fit_transform(X_train, y_train)
    X_test32k = selector.transform(X_test)
    print(X_train32k)

    if iBest == 1:
        model = LinearSVC(max_iter=10000)
    elif iBest == 2:
        model = SVC(max_iter=10000, gamma=2)
    elif iBest == 3:
        model = RandomForestClassifier(max_depth=5, n_estimators=10)
    elif iBest == 4:
        model = MLPClassifier(alpha=0.05)
    else:
        model = AdaBoostClassifier()

    accuracies = []
    model.fit(X_train1k, y_1k)
    y_predict1k = model.predict(X_test1k)
    accuracies.append(
        accuracy(confusion_matrix(y_test, y_predict1k, label=[0, 1, 2, 3])))

    model.fit(X_train32k, y_train)
    y_predict32k = model.predict(X_test32k)
    accuracies.append(
        accuracy(confusion_matrix(y_test, y_predict32k, label=[0, 1, 2, 3])))

    csvResult.append(accuracies)

    with open("a1_3.3.csv", "w", newline="") as csvFile:
        csvWriter = csv.writer(csvFile)
        csvWriter.writerows(csvResult)