예제 #1
0
def mlp(train_x, train_y, name, results):
    f = open(results + '/all_model_params.txt', 'a')
    f.write(name + ' for MLP: \n')
    ''' Generate a MLP for each model '''
    #relu activation MLP
    relu_clf = MLPClassifier(hidden_layer_sizes=(100),
                             activation='relu',
                             random_state=1,
                             max_iter=10000).fit(train_x, train_y)
    pickle.dump(relu_clf, open(results + '/params/' + name + '/relu.txt',
                               'wb'))
    params = relu_clf.get_params()
    f.write(str(params))
    f.write('\n')

    #logistic activation MLP
    log_clf = MLPClassifier(hidden_layer_sizes=(100),
                            activation='logistic',
                            random_state=1,
                            max_iter=10000).fit(train_x, train_y)
    pickle.dump(log_clf, open(results + '/params/' + name + '/log.txt', 'wb'))
    params = log_clf.get_params()
    f.write(str(params))
    f.write('\n\n')
    f.close()
예제 #2
0
def first_task(filename: str):
    print(filename)
    X = list()
    y = list()

    with open(filename, "r") as f:
        lines = f.readlines()[1:]
        for line in lines:
            arr = line.strip('\n').split(",")
            X.append(list(map(float, arr[:2])))
            y.append(int(arr[2]))

    X00 = [X[i][0] for i in range(len(X)) if y[i] == 1]
    X10 = [X[i][1] for i in range(len(X)) if y[i] == 1]
    X01= [X[i][0] for i in range(len(X)) if y[i] == -1]
    X11 = [X[i][1] for i in range(len(X)) if y[i] == -1]
    plt.scatter(X00, X10, label="one class")
    plt.scatter(X01, X11, label="minus one class")
    plt.legend(loc="best")
    plt.show()
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

    mlp = MLPClassifier(random_state=1, hidden_layer_sizes=(), max_iter=10000)
    mlp.fit(X_train, y_train)
    print("standard:")
    print(mlp.get_params())
    print("n_iter:", mlp.n_iter_)
    print("best_loss", mlp.best_loss_)
    pred = mlp.predict(X_test)
    print("test accuracy:", end=" ")
    print(metrics.accuracy_score(y_test, pred))
    print(metrics.confusion_matrix(y_test, pred))
    pred = mlp.predict(X_train)
    print("train accuracy:", end=" ")
    print(metrics.accuracy_score(y_train, pred))
    print(metrics.confusion_matrix(y_train, pred))
    for activation_func in ("relu", "identity", "logistic", "tanh"):
        for solver in ("lbfgs", "sgd", "adam"):
            mlp = MLPClassifier(random_state=1, hidden_layer_sizes=(), max_iter=10000,
                                activation=activation_func, solver=solver)
            mlp.fit(X_train, y_train)
            print("------------------------")
            print("activation:", activation_func, ", solver:", solver)
            print(mlp.get_params())
            print("n_iter:", mlp.n_iter_)
            try:
                print("best_loss:", mlp.best_loss_)
            except:
                print("best_loss: None")
            pred = mlp.predict(X_test)
            print("test accuracy:", end=" ")
            print(metrics.accuracy_score(y_test, pred))
            print(metrics.confusion_matrix(y_test, pred))
            pred = mlp.predict(X_train)
            print("train accuracy:", end=" ")
            print(metrics.accuracy_score(y_train, pred))
            print(metrics.confusion_matrix(y_train, pred))
예제 #3
0
def main():
    #small_grid()
    train, test, train_t, test_t = loadHand()
    test_t = test_t.as_matrix()

    classifier = MLPClassifier(hidden_layer_sizes=(25, 25),
                               max_iter=500,
                               activation='logistic',
                               learning_rate='invscaling')
    classifier.fit(train, train_t)
    prediction = classifier.predict(test)

    correct = 0
    for i in range(len(prediction)):
        if test_t[i] == prediction[i]:
            correct += 1

    print("You got", correct, "out of", len(prediction), "total datapoints")
    print(correct / len(prediction) * 100)

    train, test, train_t, test_t = loadHand()
    test_t = test_t.as_matrix()
    prediction = classifier.predict(test)
    correct = 0
    for i in range(len(prediction)):
        if test_t[i] == prediction[i]:
            correct += 1

    print("You got", correct, "out of", len(prediction), "total datapoints1")
    print(correct / len(prediction) * 100)

    print(classifier.get_params())
예제 #4
0
 def __call__(self, config_id):
     pbar.update(1)
     config = self.configs[config_id]
     # train
     x, y = self.dataset['train']
     clf = MLPClassifier(**config).fit(x, y)
     # test
     x, y = self.dataset['test']
     t_accuracy, t_f_mac, t_f_mic = run_test(clf, x, y)
     report = self.shared_config.copy()
     report.update({
         'metric/test/accuracy': t_accuracy,
         'metric/test/f1_macro': t_f_mac,
         'metric/test/f1_micro': t_f_mic,
         'classifier_config': clf.get_params()
     })
     if 'val' in self.dataset:
         x, y = self.dataset['val']
         v_accuracy, v_f_mac, v_f_mic = run_test(clf, x, y)
         report.update({
             'metric/val/accuracy': v_accuracy,
             'metric/val/f1_macro': v_f_mac,
             'metric/val/f1_micro': v_f_mic
         })
     return report
예제 #5
0
def evaluate_sk(emb, labels):
    print("evaluating with classifier")
    X = pd.DataFrame(emb)

    # X.columns = [str(col) for col in X.columns.get_values()]

    y = labels

    X = StandardScaler().fit_transform(X)
    train_x, test_x, train_y, test_y = train_test_split(X,
                                                        y,
                                                        test_size=.4,
                                                        random_state=42)

    # print("train_x ", train_x)
    # print("train_y ", train_y)

    clf = MLPClassifier(solver='sgd',
                        activation='tanh',
                        learning_rate_init=0.001,
                        alpha=1e-5,
                        hidden_layer_sizes=(30, 30),
                        max_iter=10000,
                        batch_size=X.shape[0],
                        random_state=0)

    clf.n_outputs_ = 6
    clf.out_activation_ = "softmax"
    print(clf.get_params())
    clf.fit(train_x, train_y)

    mean_acc = clf.score(test_x, test_y)
    print(mean_acc)
 def run(self, input_file_path='output.json', output_file_path='predicted.json', num_max_train_instance=-1):
     X_train = []
     y_train = []
     train_indexes = []
     with open(input_file_path, 'r', encoding='utf-8') as f:
         data = json.loads(f.read())
     for frame in data['frames']:
         if frame['learningData'] and frame['barcode']['isRandom'] and 'random' in frame and 'truthValue' in frame[
             'random']:
             if num_max_train_instance == -1 or frame['random']['index'] < num_max_train_instance:
                 X_train.extend(frame['learningData'])
                 y_train.extend(frame['random']['truthValue'])
                 train_indexes.append(frame['image']['index'])
     # model = RandomForestClassifier(n_jobs=-1)
     model = MLPClassifier()
     model.fit(X_train, y_train)
     print(model.get_params())
     test_indexes = []
     result = []
     for frame in data['frames']:
         if frame['learningData'] and not frame['barcode']['isRandom']:
             predicted = model.predict(frame['learningData'])
             predicted = predicted.tolist()
             result.append({
                 'index': frame['image']['index'],
                 'value': predicted
             })
             test_indexes.append(frame['image']['index'])
     data['learning'] = {}
     data['learning']['trainIndexes'] = train_indexes
     data['learning']['testIndexes'] = test_indexes
     with open(input_file_path, 'w', encoding='utf-8') as f:
         f.write(json.dumps(data))
     with open(output_file_path, 'w', encoding='utf-8') as f:
         f.write(json.dumps(result))
예제 #7
0
파일: MLP.py 프로젝트: xiaolei-lab/ISwine
def main():
    ##Load data
    df = pd.read_csv('./train_data.txt', sep='\t', header=None)
    df.columns = [
        'case', 'gene', 'upstream_snp', 'downstream_snp', 'intron_snp',
        'synonymous_snp', 'nonsynonymous_snp', 'upstream_indel',
        'downstream_indel', 'intron_indel', 'synonymous_indel',
        'nonsynonymous_indel', 'module', 'expression', 'QTN', 'QTL', 'QTG',
        'type', 'label'
    ]
    selected_columns = [
        'upstream_snp', 'downstream_snp', 'intron_snp', 'synonymous_snp',
        'nonsynonymous_snp', 'upstream_indel', 'downstream_indel',
        'intron_indel', 'synonymous_indel', 'nonsynonymous_indel', 'module',
        'expression', 'QTN', 'QTL'
    ]
    X = df[selected_columns]
    y = df['label']
    print('Positive(N): %d' % df['label'].sum())
    print('Negative(N): %d' % (len(y) - df['label'].sum()))
    ##Scale
    X = preprocessing.scale(X)
    ##Train set and Test set
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=8)
    ##Grid search
    model = MLPClassifier(early_stopping=True)
    param_grid = [
        {
            'activation': ['identity', 'logistic', 'tanh', 'relu'],
            'solver': ['sgd', 'adam'],
            'hidden_layer_sizes': [(64, 64), (32, 32), (16, 16)],
            'tol': [1e-4, 1e-5],
            'max_iter': [50, 100, 200],
        },
        {
            'activation': ['identity', 'logistic', 'tanh', 'relu'],
            'solver': ['lbfgs'],
            'hidden_layer_sizes': [(8, 8), (8, 4), (4, 4)],
            'tol': [1e-4, 1e-5],
            'max_iter': [10, 20, 40, 60],
        },
    ]
    gridsearch = GridSearchCV(model, param_grid, scoring='f1', n_jobs=-1, cv=4)
    gridsearch.fit(X_train, y_train)
    model = gridsearch.best_estimator_
    ##Print the best parameters
    best_parameters = model.get_params()
    for param_name in sorted(best_parameters.keys()):
        print('\t%s: %r' % (param_name, best_parameters[param_name]))
    ##Print the model scores
    preds = model.predict(X_test)
    print('Accuracy: %.4f' % accuracy_score(y_test, preds))
    print('Precision: %.4f' % precision_score(y_test, preds))
    print('Recall: %.4f' % recall_score(y_test, preds))
    print('F1: %.4f' % f1_score(y_test, preds))
예제 #8
0
def main():
    np.random.seed(RANDOM_STATE)
    pd.set_option('display.width', 0)
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    
    data = pd.read_csv('data/train.csv')
    
    #test_data = pd.read_csv('data/test.csv')
    
    records = []
    
    #n = 42000*0.8
    n = 10000
    X, y = extract_data(data, n)
    activation = 'tanh'
    param_dict = {'batch_size': [100, 200], 'momentum': [0.9, 0.99 ], 'learning_rate_init':[0.001, 0.01, 0.1]}
    #param_dict = {'batch_size': [200], 'momentum': [0.9], 'learning_rate_init':[0.1]}
    for param in ParameterGrid(param_dict):       
        nn = MLPClassifier(algorithm='sgd', 
                           tol=float('-inf'),
                           warm_start = True,
                           max_iter=1, 
                           hidden_layer_sizes = [200],
                           random_state=RANDOM_STATE)
        #nn_params = {'algorithm': 'sgd', 'tol': float
        nn_params = nn.get_params()
        nn_params.update(param)
        nn.set_params(**nn_params)
        #nn = MLPClassifier(**nn_params)
        time_limits = list(range(1, 60, 60))
        try:
            evaluation_list = trainer_by_time(X, y, time_limits, nn)
        except:
            evaluation_list = [{}]
            
        for i in range(len(evaluation_list)):
            evaluation = evaluation_list[i]
            record = {}
            record['n'] = n
            record['time limit'] = time_limits[i]
            record.update(evaluation)  
            record.update(param)
            records.append(record)
        
        
    df = pd.DataFrame(records)
    cols = list(df.columns)
    keys = evaluation_list[0].keys()
    cols = [item for item in cols if item not in keys]
    cols += keys
    df = df.reindex(columns=cols)
    now = datetime.datetime.now()    
    result_file = open('result.txt', 'a')
    print(now,file=result_file)
    print(df)
    print(df,file=result_file)
예제 #9
0
def main():
    np.set_printoptions(suppress=True)
    narac_file_path = "../../tigress/arburton/plink_data/narac_rf"
    csv_data = []
    for chunk in pd.read_csv(narac_file_path,
                             delim_whitespace=True,
                             index_col=0,
                             chunksize=20000):
        csv_data.append(chunk)
    samples = pd.concat(csv_data, axis=0)
    del csv_data
    # TODO: pull out affection column as y
    affection = pd.DataFrame(samples, columns="Affection")
    samples = samples.drop([
        "Affection", "Sex", "DRB1_1", "DRB1_2", "SENum", "SEStatus", "AntiCCP",
        "RFUW"
    ],
                           axis=1)
    samples = pd.get_dummies(samples, columns=(samples.columns != "ID"))
    sample_train, sample_test, affection_train, affection_test = train_test_split(
        samples, affection, test_size=0.8)
    # TODO: potentially make sample weights percentage of non ?? SNPs

    # RANDOM FOREST CLASSIFIER

    rf = RandomForestClassifier(n_estimators=5000, max_features=40, n_jobs=2)
    rf.fit(sample_train, affection_train)
    print("Random forest accuracy: {}".format(
        rf.score(sample_test, affection_test)))
    print("Random forest feature importances:")
    print(rf.feature_importances_)
    print("Random forest parameters:")
    print(rf.get_params())

    # LASSO CLASSIFIER
    lasso = Lasso()
    lasso.fit(sample_train, affection_train)
    print("LASSO accuracy: {}".format(lasso.score(sample_test,
                                                  affection_test)))
    print("LASSO parameters:")
    print(lasso.get_params())

    # LOG REGRESSION
    log_reg = LogisticRegression(n_jobs=2)
    log_reg.fit(sample_train, affection_train)
    print("Log regression accuracy: {}".format(
        log_reg.score(sample_test, affection_test)))
    print("Log regression parameters:")
    print(log_reg.get_params())

    # NEURAL NETS
    mlp_classifier = MLPClassifier()
    mlp_classifier.fit(sample_train, affection_train)
    print("MLP Classifier accuracy: {}".format(
        mlp_classifier.score(sample_test, affection_test)))
    print("MLP Classifier parameters:")
    print(mlp_classifier.get_params())
예제 #10
0
def CreateClassifier(dop):
    cls = MLPClassifier(hidden_layer_sizes=dop['hidden_layer_sizes'],
                        activation=dop['activation'],
                        solver=dop['solver'],
                        learning_rate=dop['learning_rate'],
                        learning_rate_init=dop['learning_rate_init'],
                        max_iter=dop['max_iter'])
    print(cls.get_params())
    return cls
예제 #11
0
def MLP_normal(x_train,y_train, x_test,y_test):
    #使用sklearn库包下的DNN算法模型
    from sklearn.neural_network import MLPClassifier
    clf=MLPClassifier(solver='sgd',hidden_layer_sizes=(100,500,100),warm_start=True)
    print(clf.get_params())
    #训练模型
    clf.partial_fit(x_train, y_train, classes=np.unique(y_train))

    return clf
예제 #12
0
    def __init__(self, classifier=None, max_iter=1500):
        if not classifier:
            classifier = MLPClassifier(activation='relu',
                                       alpha=1e-5,
                                       hidden_layer_sizes=(2048, ),
                                       random_state=1,
                                       max_iter=max_iter)

        print("Selected classifier: ", classifier.get_params())
        self.pipeline = make_pipeline(MinMaxScaler(), classifier)
예제 #13
0
 def training(self, training, testing):
     train_images = training[0]
     x_training = training[0] / 255.0
     y_training = training[1]
     x_test = testing[0] / 255.0
     y_test = testing[1]
     mlp = MLPClassifier(hidden_layer_sizes=(100, ),
                         max_iter=100,
                         alpha=1e-4,
                         solver='sgd',
                         verbose=10,
                         tol=0.0001,
                         random_state=1,
                         learning_rate_init=.1,
                         learning_rate="adaptive")
     mlp.fit(x_training, y_training)
     print mlp.get_params()
     with open(self.filename, 'wb') as output:
         pickle.dump(mlp, output, pickle.HIGHEST_PROTOCOL)
     print "[+] Saving Completed"
예제 #14
0
def neural_network(X,y):
    # takes X and y as parameters
    # returns plot of yhat, train and test scores, parameters 
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y)
    model = MLPClassifier()
    model.fit(Xtrain, ytrain)
    model.predict(Xtest)
    train_score = model.score(Xtrain, ytrain)
    test_score = model.score(Xtest, ytest)
    params = model.get_params()

    print(f"train score: {train_score:.3f} | test_score: {test_score:.3f} | params: {params}")
예제 #15
0
class myMlp():
    def __init__(self, train_data, train_label, test_data, test_label):
        self.train_data = train_data
        self.train_label = train_label
        self.test_data = test_data
        self.test_label = test_label
        self.predict_label = None
        self.train_time = 0
        self.test_time = 0
        self.clf = None

    def setActivationFunction(self, fun = 0):
        if fun == 1: # relu
            self.clf = MLPClassifier(hidden_layer_sizes=(100,50,), activation='relu',solver='adam',alpha=0.0001,max_iter=300 )
        elif fun == 2: # tanh
            self.clf = MLPClassifier(hidden_layer_sizes=(100,50,), activation='tanh',solver='adam',alpha=0.0001,max_iter=300 )
        elif fun == 3: # identity 
            self.clf = MLPClassifier(hidden_layer_sizes=(100,50,), activation='identity',solver='adam',alpha=0.0001,max_iter=300 )
         
    def train(self):
        print("Start train")
        time_start = time.time()
        self.clf.fit(self.train_data, self.train_label)
        time_end = time.time() - time_start
        print("End train", time_end)
        self.train_time = time_end
        return self.train_time

    def test(self):
        print("Start test")
        time_start = time.time()
        self.predict_label = self.clf.predict(self.test_data)
        time_end = time.time() - time_start
        print("End test", time_end)
        self.test_time = time_end
        return self.test_label, self.test_time

    def getTestLabel(self):
        return self.test_label

    def getPredictLabel(self):
        return self.predict_label
    
    def getTrainTime(self):
        return self.train_time

    def getTestTime(self):
        return self.test_time
    
    def getParams(self):
        return self.clf.get_params()
예제 #16
0
    def trainNN(self, training_data, output_data):
        e1 = cv2.getTickCount()
        print "Building Perceptron..."
        #Creating MultiLayer Perceptrons.
        mlp = MLPClassifier(hidden_layer_sizes=(32,16),activation='logistic',solver='sgd',
                            learning_rate_init=0.1, alpha=0.1,
                            random_state=1, max_iter=20000, 
                            momentum=0)
        mlp.out_activation_ = 'identity'

        print "Training MLP............."
        mlp.fit(training_data, output_data)
        e2 = cv2.getTickCount()
        time_taken = (e2-e1)/cv2.getTickFrequency()
        print "Time taken to train : ", time_taken
        print mlp.get_params()
        # print("Training set score: %f" % mlp.score(training_data, output_data))
        # print("Test set score: %f" % mlp.score(training_data[0].reshape(1,-1), output_data[0].reshape(1,-1)))
        # print mlp.predict(training_data[3].reshape(1,-1))
        # self.testProcess(mlp)
        # save the model to disk
        filename = 'finalized_model.sav'
        pickle.dump(mlp, open(filename, 'wb'))
예제 #17
0
def ann_factory(dsname):
    hidden_layers = {
        'musk': (200, 200),
        'shoppers': (20, 20, 20),
        'cancer': (10, )
    }

    ann = MLPClassifier(solver='adam',
                        early_stopping=True,
                        shuffle=True,
                        random_state=10,
                        learning_rate='adaptive',
                        hidden_layer_sizes=hidden_layers[dsname])
    logging.info('Created MLPClassifier with parameters: {}'.format(
        ann.get_params()))
    return ann
예제 #18
0
def Run(X_train, X_test, y_train, y_test):
    vect = CountVectorizer(stop_words = 'english', min_df = 0.0035).fit(X_train)

    X_train = Format_inputs(vect, X_train)
    X_test = Format_inputs(vect, X_test)

    model = MLPClassifier()
    parameters = [{'alpha': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]}]
    print(model.get_params().keys())
    
    model = GridSearchCV(model, parameters, cv = 10)
    model.fit(X_train, y_train)
    best_accuracy = model.best_score_
    best_parameters = model.best_params_
    print(best_accuracy)
    print(best_parameters)
    """
예제 #19
0
def model_fitting(x, y, test_size=0.33, seed=7, pfi_fitted_models=''):
    """ Save the model fitted on the input data """

    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        x, y, test_size=test_size, random_state=seed)

    model = MLPClassifier()
    model.fit(x_train, y_train)

    if not os.path.exists(pfi_fitted_models):
        raise ValueError()

    pickle.dump(model.get_params(), open(pfi_fitted_models, 'wb'))

    scored_model_assessment = model.score(x_test, y_test)

    return scored_model_assessment
예제 #20
0
class BP_FFNN:

	def __init__(self):

		self.params = {'solver':'sgd', 'learning_rate': 'constant', 'learning_rate_init':0.01, 'activation':'logistic',
						'max_iter':1000, 'hidden_layer_sizes':(100,)}


		self.mlp = MLPClassifier(**self.params)


		pass

	def read_PGM(self, filepath):

		img = Image.open(filepath)
		return list(img.getdata())

	def load_data_set(self, file_list):

		with open(file_list, 'r') as f:
			data = f.read().splitlines()

		data_list  = [d for d in data]
		
		x_data = []
		y_data = []
		for t in data_list:
			y_data.append(1 if 'down' in t else 0)
			x_data.append(self.read_PGM(t))
		return x_data, y_data


	def run(self):

		train_x_data, train_y_data = self.load_data_set('downgesture_train.list')
		test_x_data, test_y_data = self.load_data_set('downgesture_test.list')

		for i in range(3):
			self.mlp.fit(train_x_data, train_y_data)
			print('round{}'.format(i+1))
			print('train scores:', self.mlp.score(train_x_data, train_y_data))
			print('test scores:', self.mlp.score(test_x_data, test_y_data))
			print('parameters:', self.mlp.get_params())
			print('')
예제 #21
0
 def __call__(self, config_id):
     config = self.configs[config_id]
     report = self.shared_config.copy()
     # train
     x, y = self.dataset['train']
     clf = MLPClassifier(**config).fit(x, y)
     report.update({'classifier_config': clf.get_params()})
     # test
     x, y = self.dataset['test']
     tmp = self.run_test(clf, x, y, per_class_metric=True)
     tmp = {'test/{}'.format(k): v for k, v in tmp.items()}
     report.update(tmp)
     if 'val' in self.dataset:
         x, y = self.dataset['val']
         tmp = self.run_test(clf, x, y, per_class_metric=True)
         tmp = {'val/{}'.format(k): v for k, v in tmp.items()}
         report.update(tmp)
     return report
def MLPClassifier_Model(X_train, y_train, X_test, y_test, max_iter):
    
    model = MLPClassifier(max_iter = max_iter)
    classifier = model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    testing_model = model.predict(X_test)
    cv_scores = cross_val_score(classifier, X_test, y_test, cv = 3)
        
    print(' ')
    print('===== MLP Classifier Model =====')
    print('score:', score)
    print('cross validation scores:', cv_scores) 
      
    # Visualize parameters in a table.
    visualize_params(model.get_params())
    
    # Display confusion matrix.
    visualize_heatmap(y_test, testing_model, 'MLP Classifier')       
    
    return score
예제 #23
0
class NeuralNet:
    """
    An attempt to interface MLPClassifier in a way that makes the hyperparameters related to width/depth more clear.
    We intercept the parameters fetched from MLPClassifier, and translates the parameters into our own
    Before going back to the same as before
    """
    def __init__(self, **params):
        self.neuralnet = MLPClassifier(**params)

    def fit(self, X, y, **params):
        fit = self.neuralnet.fit(X, y)
        return fit

    def predict(self, X):
        return self.neuralnet.predict(X)

    def predict_log_proba(self, X):
        return self.neuralnet.predict_log_proba(X)

    def predict_proba(self, X):
        return self.neuralnet.predict_proba(X)

    def score(self, X, y, sample_weight=None):
        return self.neuralnet.score(X, y, sample_weight)

    def set_params(self, **params):
        n_hidden_neurons = params['n_hidden_neurons']
        n_hidden_layers = params['n_hidden_layers']
        hidden_layer_sizes = tuple(
            [n_hidden_neurons for i in range(n_hidden_layers)])

        params['hidden_layer_sizes'] = hidden_layer_sizes
        del params['n_hidden_neurons']
        del params['n_hidden_layers']

        self.neuralnet.set_params(**params)

    def get_params(self, deep=True):
        params = self.neuralnet.get_params(deep)
        del params['hidden_layer_sizes']
        return params
예제 #24
0
def neural_network(X, y, predict):
    # takes X and y as parameters
    # returns plot of yhat, train and test scores, parameters
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y)
    model = MLPClassifier()
    model.fit(Xtrain, ytrain)
    model.predict(Xtrain)
    train_score = model.score(Xtrain, ytrain)
    test_score = model.score(Xtest, ytest)
    params = model.get_params()

    xpts = range(len(model.predict(Xtrain)))
    fig, ax = plt.subplots(1, 1, figsize=(20, 8))

    ax.plot(xpts, model.predict(Xtrain), color='b')
    #ax.plot(xpts, y_hat_test_lst, color='r')
    ax.set_ylabel("yhat")
    ax.set_xlabel("over xpts")

    #plt.legend([f"{gdbr.__class__.__name__} Train - learning rate 0.1", f"{model.__class__.__name__} Test - learning rate 0.1"])
    plt.show()
예제 #25
0
def get_mlp(x_train, t_train, x_val, t_val, search=False):
    # {'activation': 'relu', 'alpha': 0.1, 'learning_rate': 'constant', 'solver': 'adam'}
    # {'solver': 'adam', 'learning_rate': 'constant', 'hidden_layer_sizes': (100,), 'alpha': 0.06, 'activation': 'tanh'}
    # mlp validated at (array([0.8940068 , 0.78879874, 0.71866004, 0.99057592, 0.74764398]), 0.9286105369755633)
    # MLP tested at (array([0.72152429, 0.7118928 , 0.91457286, 0.71602094, 0.70136126]), 0.9242268552514312)
    # mlp validated at (array([0.8940068 , 0.78879874, 0.71866004, 0.99057592, 0.74764398]), 0.9833533999895304)
    # MLP tested at (array([0.72152429, 0.7118928 , 0.91457286, 0.71602094, 0.70136126]), 0.9823687075969512)

    # {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive', 'solver': 'adam'}
    # mlp validated at (array([0.98953154, 0.99188694, 0.99188694, 0.98848168, 0.98612565]), 0.9537539956508365)
    # MLP tested at (array([0.98680905, 0.98848409, 0.98911223, 0.98910995, 0.99036649]), 0.9467248198442366)
    if search:

        mlp_params = param_sel(
            x_train, t_train, MLPClassifier(max_iter=1000), {
                'alpha': [
                    0.06,
                    0.1,
                ],
                'hidden_layer_sizes': [(20, 20, 10), (100, )],
                'activation': ['relu', 'tanh'],
                'solver': ['sgd', 'adam'],
                'learning_rate': ['constant', 'adaptive'],
            })
    else:
        mlp_params = {
            'activation': 'relu',
            'alpha': 0.01,
            'learning_rate': 'adaptive',
            'solver': 'adam',
            'hidden_layer_sizes': (100, )
        }

    mlp_classifier = MLPClassifier(**mlp_params, max_iter=6000)
    mlp_classifier.fit(x_train, t_train)
    print("MLP params:", mlp_classifier.get_params())
    print("MLP validated at", validate(mlp_classifier, x_val, t_val))
    return mlp_classifier
def test_serialize_model():
    instance = HostFootprint()
    model = MLPClassifier()
    label_binarizer = LabelBinarizer()
    label_binarizer.neg_label = 0
    label_binarizer.pos_label = 1
    label_binarizer.sparse_output = False
    label_binarizer.y_type_ = "binary"
    label_binarizer.sparse_input_ = False
    label_binarizer.classes_ = np.array([0])

    parameters = {'hidden_layer_sizes': [(64, 32)]}
    GridSearchCV(model, parameters, cv=5, n_jobs=-1, scoring='f1_weighted')

    model.coefs_ = np.array([[1], [2]])
    model.loss_ = 42
    model.intercepts_ = np.array([[3], [4]])
    model.classes_ = np.array([[5], [6]])
    model.n_iter_ = 42
    model.n_layers_ = 2
    model.n_outputs_ = 1
    model.out_activation_ = "logistic"
    model._label_binarizer = label_binarizer
    model.features = ['test_1', 'test_2', 'test_3']

    with tempfile.TemporaryDirectory() as tmpdir:
        model_file = os.path.join(tmpdir, 'host_footprint.json')
        instance.serialize_model(model, model_file)
        new_model = instance.deserialize_model(model_file)
        assert model.features == new_model.features
        print(f"model params: {model.get_params()}")
        print(f"new_model params: {new_model.get_params()}")
        assert len(model.get_params()['hidden_layer_sizes']) == len(
            new_model.get_params()['hidden_layer_sizes'])
        assert model._label_binarizer.y_type_ == new_model._label_binarizer.y_type_
        assert len(model.coefs_) == len(new_model.coefs_)
        assert len(model.intercepts_) == len(new_model.intercepts_)
예제 #27
0
    def learn(self, column, params):
        """
        Learn dataframe.

        Args:
            column: column you want to predict.
            params: parameter for Grid Search.
        """
        if dfc.DataFrameChecker.is_df_num(self._df) is False:
            return False

        #predict data
        y = self._df[column]
        self._y = y

        #learning data
        X = self._df.drop([column], axis=1)
        self._X = X

        (X_train, X_test, y_train, y_test) = train_test_split(X,
                                                              y,
                                                              test_size=0.3,
                                                              random_state=0)

        model = MLPClassifier()

        phelper.PrintHelper.print_title('Default Params')
        print(model.get_params())

        phelper.PrintHelper.print_title('Params from a file')
        if params == None:
            print('Settings Params file is None.')
            params = {}
        else:
            print(params)

        print('...Doing Grid Search...')
        cv = GridSearchCV(model,
                          params,
                          cv=10,
                          scoring='neg_mean_squared_error',
                          n_jobs=1,
                          refit=True)

        cv.fit(X_train, y_train)

        self._best_params = cv.best_params_
        self._learned_model = cv.best_estimator_

        phelper.PrintHelper.print_title('Best Params')
        print(cv.best_params_)

        self._learned_model = cv

        # Accuracy Score
        print('...Predicting Test Data...')
        predicted_result = self._learned_model.predict(X_test).astype(int)

        phelper.PrintHelper.print_title('Accuracy Score')
        print(accuracy_score(y_test, predicted_result))

        return True
예제 #28
0
targetNames = data['target_names']

X = pd.DataFrame(data.data, columns=featureNames)
Y = data.target

xTrain, xTest, yTrain, yTest = train_test_split(X,
                                                Y,
                                                shuffle=True,
                                                test_size=0.2,
                                                random_state=1234)

#Default Neural Network model without any tuning - base metric
MLPmodelDefault = MLPClassifier()
MLPmodelDefault.fit(xTrain, yTrain)

MLPmodelDefault.get_params()

# train
yPredTrainMLPDefault = MLPmodelDefault.predict(xTrain)
print(
    f'Train Error - Default Network: {1- metrics.accuracy_score(yTrain, yPredTrainMLPDefault)}'
)
# test
yPredTestMLPDefault = MLPmodelDefault.predict(xTest)
print(
    f'Test Error - Default Network: {1- metrics.accuracy_score(yTest, yPredTestMLPDefault)}'
)

#Parameter tuning with GridSearchCV

#######################
예제 #29
0
        
        print('Xtrain size:{}, X_test  size:{}, y_train size:{}, y_test size:{}'.format(X_train.shape, X_test.shape, y_train.shape, y_test.shape))
        from sklearn.neural_network import MLPClassifier
        from sklearn.preprocessing import StandardScaler
        scalar = StandardScaler()
        X_train = scalar.fit_transform(X_train)
        X_test = scalar.fit_transform(X_test)
        mlp = MLPClassifier(alpha=1e-5, hidden_layer_sizes=[256, 256], random_state=0, max_iter=1, warm_start=True)
        for i in range(10):
            mlp.fit(X_train, y_train)
        mlp.max_iter = 100000
        mlp.fit(X_train, y_train)
        print(classification_report(mlp.predict(X_test),
                                            y_test, 
                                            target_names=list(conf['input']['target_List'].keys())))
        print(mlp.get_params())
    # exit()
    estimator = None
    #GA Hyperparameters..
    train_json.GA = set_config(**conf['GA'])
    ##GA set result json Init
    ##학습결과 Json Init
    train_json.Result=ac()
    train_json.Result.FeatureSelection = ac()

    feature_names = np.array(feature_names)
    while True:
        #Generic Algorithm Variable.
        print("Generic Algorithm Hyperparametor as followed")
        for k, v in conf['GA'].items():
            print(f'{k}: {v}')
예제 #30
0
파일: ml_nn.py 프로젝트: azeus404/module6
df = shuffle(df).reset_index(drop=True)
f.write('Dataset %s\n' % path)
"""
Properties of the dataset
"""
data_total = df.shape
print('Total llds %d' % data_total[0])
f.write('Total llds %d \n' % data_total[0])
"""
Neural network
"""
print("[+] Applying Neural Network")

model = MLPClassifier()

print(model.get_params())

x = df.drop(['label', 'lld'], axis=1).values
y = df['label'].values

#create a test set of size of about 20% of the dataset
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.20,
                                                    random_state=42,
                                                    stratify=y)

#Standardizing numeric variables.
scale = preprocessing.StandardScaler()
scale.fit(x_train)
x_train = scale.transform(x_train)
# train the model
mlpc.fit(x_train, y_train)

# looking at the attributes
classes = mlpc.classes_  # target column classes
loss = mlpc.loss_  # the loss computed with the loss function
coefs = mlpc.coefs_  # a list of the weight matrix for each layer
intercepts = mlpc.intercepts_  # a list of the bias vector for each layer
n_iter = mlpc.n_iter_  # the number of interations the solver has run
n_layers = mlpc.n_layers_  # the number of layers of the model
n_outputs = mlpc.n_outputs_  # the number of outputs, maybe corresponding to the number of classes
out_activation = mlpc.out_activation_  # the name of the output activation function used

# looking at the methods
get_params = mlpc.get_params()  # returning the parameters for the model
prediction_array = mlpc.predict(
    x_test
)  # running the test dataset through the model, giving an array of predicted values
predict_log_proba = mlpc.predict_log_proba(
    x_test)  # log of probability estimate for each class
predict_proba = mlpc.predict_proba(x_test)  # the probability for each class
train_score = mlpc.score(
    x_train, y_train)  # returns the mean accuracy of the training set
test_score = mlpc.score(x_test,
                        y_test)  # returns the mean accuracy of the test set

print(
    'Using the standard neural network model the accuracy score for the train dataset is: %.3f and the accuracy for the test dataset is: %.3f'
    % (train_score, test_score))