def mlp(train_x, train_y, name, results): f = open(results + '/all_model_params.txt', 'a') f.write(name + ' for MLP: \n') ''' Generate a MLP for each model ''' #relu activation MLP relu_clf = MLPClassifier(hidden_layer_sizes=(100), activation='relu', random_state=1, max_iter=10000).fit(train_x, train_y) pickle.dump(relu_clf, open(results + '/params/' + name + '/relu.txt', 'wb')) params = relu_clf.get_params() f.write(str(params)) f.write('\n') #logistic activation MLP log_clf = MLPClassifier(hidden_layer_sizes=(100), activation='logistic', random_state=1, max_iter=10000).fit(train_x, train_y) pickle.dump(log_clf, open(results + '/params/' + name + '/log.txt', 'wb')) params = log_clf.get_params() f.write(str(params)) f.write('\n\n') f.close()
def first_task(filename: str): print(filename) X = list() y = list() with open(filename, "r") as f: lines = f.readlines()[1:] for line in lines: arr = line.strip('\n').split(",") X.append(list(map(float, arr[:2]))) y.append(int(arr[2])) X00 = [X[i][0] for i in range(len(X)) if y[i] == 1] X10 = [X[i][1] for i in range(len(X)) if y[i] == 1] X01= [X[i][0] for i in range(len(X)) if y[i] == -1] X11 = [X[i][1] for i in range(len(X)) if y[i] == -1] plt.scatter(X00, X10, label="one class") plt.scatter(X01, X11, label="minus one class") plt.legend(loc="best") plt.show() X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8) mlp = MLPClassifier(random_state=1, hidden_layer_sizes=(), max_iter=10000) mlp.fit(X_train, y_train) print("standard:") print(mlp.get_params()) print("n_iter:", mlp.n_iter_) print("best_loss", mlp.best_loss_) pred = mlp.predict(X_test) print("test accuracy:", end=" ") print(metrics.accuracy_score(y_test, pred)) print(metrics.confusion_matrix(y_test, pred)) pred = mlp.predict(X_train) print("train accuracy:", end=" ") print(metrics.accuracy_score(y_train, pred)) print(metrics.confusion_matrix(y_train, pred)) for activation_func in ("relu", "identity", "logistic", "tanh"): for solver in ("lbfgs", "sgd", "adam"): mlp = MLPClassifier(random_state=1, hidden_layer_sizes=(), max_iter=10000, activation=activation_func, solver=solver) mlp.fit(X_train, y_train) print("------------------------") print("activation:", activation_func, ", solver:", solver) print(mlp.get_params()) print("n_iter:", mlp.n_iter_) try: print("best_loss:", mlp.best_loss_) except: print("best_loss: None") pred = mlp.predict(X_test) print("test accuracy:", end=" ") print(metrics.accuracy_score(y_test, pred)) print(metrics.confusion_matrix(y_test, pred)) pred = mlp.predict(X_train) print("train accuracy:", end=" ") print(metrics.accuracy_score(y_train, pred)) print(metrics.confusion_matrix(y_train, pred))
def main(): #small_grid() train, test, train_t, test_t = loadHand() test_t = test_t.as_matrix() classifier = MLPClassifier(hidden_layer_sizes=(25, 25), max_iter=500, activation='logistic', learning_rate='invscaling') classifier.fit(train, train_t) prediction = classifier.predict(test) correct = 0 for i in range(len(prediction)): if test_t[i] == prediction[i]: correct += 1 print("You got", correct, "out of", len(prediction), "total datapoints") print(correct / len(prediction) * 100) train, test, train_t, test_t = loadHand() test_t = test_t.as_matrix() prediction = classifier.predict(test) correct = 0 for i in range(len(prediction)): if test_t[i] == prediction[i]: correct += 1 print("You got", correct, "out of", len(prediction), "total datapoints1") print(correct / len(prediction) * 100) print(classifier.get_params())
def __call__(self, config_id): pbar.update(1) config = self.configs[config_id] # train x, y = self.dataset['train'] clf = MLPClassifier(**config).fit(x, y) # test x, y = self.dataset['test'] t_accuracy, t_f_mac, t_f_mic = run_test(clf, x, y) report = self.shared_config.copy() report.update({ 'metric/test/accuracy': t_accuracy, 'metric/test/f1_macro': t_f_mac, 'metric/test/f1_micro': t_f_mic, 'classifier_config': clf.get_params() }) if 'val' in self.dataset: x, y = self.dataset['val'] v_accuracy, v_f_mac, v_f_mic = run_test(clf, x, y) report.update({ 'metric/val/accuracy': v_accuracy, 'metric/val/f1_macro': v_f_mac, 'metric/val/f1_micro': v_f_mic }) return report
def evaluate_sk(emb, labels): print("evaluating with classifier") X = pd.DataFrame(emb) # X.columns = [str(col) for col in X.columns.get_values()] y = labels X = StandardScaler().fit_transform(X) train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=.4, random_state=42) # print("train_x ", train_x) # print("train_y ", train_y) clf = MLPClassifier(solver='sgd', activation='tanh', learning_rate_init=0.001, alpha=1e-5, hidden_layer_sizes=(30, 30), max_iter=10000, batch_size=X.shape[0], random_state=0) clf.n_outputs_ = 6 clf.out_activation_ = "softmax" print(clf.get_params()) clf.fit(train_x, train_y) mean_acc = clf.score(test_x, test_y) print(mean_acc)
def run(self, input_file_path='output.json', output_file_path='predicted.json', num_max_train_instance=-1): X_train = [] y_train = [] train_indexes = [] with open(input_file_path, 'r', encoding='utf-8') as f: data = json.loads(f.read()) for frame in data['frames']: if frame['learningData'] and frame['barcode']['isRandom'] and 'random' in frame and 'truthValue' in frame[ 'random']: if num_max_train_instance == -1 or frame['random']['index'] < num_max_train_instance: X_train.extend(frame['learningData']) y_train.extend(frame['random']['truthValue']) train_indexes.append(frame['image']['index']) # model = RandomForestClassifier(n_jobs=-1) model = MLPClassifier() model.fit(X_train, y_train) print(model.get_params()) test_indexes = [] result = [] for frame in data['frames']: if frame['learningData'] and not frame['barcode']['isRandom']: predicted = model.predict(frame['learningData']) predicted = predicted.tolist() result.append({ 'index': frame['image']['index'], 'value': predicted }) test_indexes.append(frame['image']['index']) data['learning'] = {} data['learning']['trainIndexes'] = train_indexes data['learning']['testIndexes'] = test_indexes with open(input_file_path, 'w', encoding='utf-8') as f: f.write(json.dumps(data)) with open(output_file_path, 'w', encoding='utf-8') as f: f.write(json.dumps(result))
def main(): ##Load data df = pd.read_csv('./train_data.txt', sep='\t', header=None) df.columns = [ 'case', 'gene', 'upstream_snp', 'downstream_snp', 'intron_snp', 'synonymous_snp', 'nonsynonymous_snp', 'upstream_indel', 'downstream_indel', 'intron_indel', 'synonymous_indel', 'nonsynonymous_indel', 'module', 'expression', 'QTN', 'QTL', 'QTG', 'type', 'label' ] selected_columns = [ 'upstream_snp', 'downstream_snp', 'intron_snp', 'synonymous_snp', 'nonsynonymous_snp', 'upstream_indel', 'downstream_indel', 'intron_indel', 'synonymous_indel', 'nonsynonymous_indel', 'module', 'expression', 'QTN', 'QTL' ] X = df[selected_columns] y = df['label'] print('Positive(N): %d' % df['label'].sum()) print('Negative(N): %d' % (len(y) - df['label'].sum())) ##Scale X = preprocessing.scale(X) ##Train set and Test set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=8) ##Grid search model = MLPClassifier(early_stopping=True) param_grid = [ { 'activation': ['identity', 'logistic', 'tanh', 'relu'], 'solver': ['sgd', 'adam'], 'hidden_layer_sizes': [(64, 64), (32, 32), (16, 16)], 'tol': [1e-4, 1e-5], 'max_iter': [50, 100, 200], }, { 'activation': ['identity', 'logistic', 'tanh', 'relu'], 'solver': ['lbfgs'], 'hidden_layer_sizes': [(8, 8), (8, 4), (4, 4)], 'tol': [1e-4, 1e-5], 'max_iter': [10, 20, 40, 60], }, ] gridsearch = GridSearchCV(model, param_grid, scoring='f1', n_jobs=-1, cv=4) gridsearch.fit(X_train, y_train) model = gridsearch.best_estimator_ ##Print the best parameters best_parameters = model.get_params() for param_name in sorted(best_parameters.keys()): print('\t%s: %r' % (param_name, best_parameters[param_name])) ##Print the model scores preds = model.predict(X_test) print('Accuracy: %.4f' % accuracy_score(y_test, preds)) print('Precision: %.4f' % precision_score(y_test, preds)) print('Recall: %.4f' % recall_score(y_test, preds)) print('F1: %.4f' % f1_score(y_test, preds))
def main(): np.random.seed(RANDOM_STATE) pd.set_option('display.width', 0) pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) data = pd.read_csv('data/train.csv') #test_data = pd.read_csv('data/test.csv') records = [] #n = 42000*0.8 n = 10000 X, y = extract_data(data, n) activation = 'tanh' param_dict = {'batch_size': [100, 200], 'momentum': [0.9, 0.99 ], 'learning_rate_init':[0.001, 0.01, 0.1]} #param_dict = {'batch_size': [200], 'momentum': [0.9], 'learning_rate_init':[0.1]} for param in ParameterGrid(param_dict): nn = MLPClassifier(algorithm='sgd', tol=float('-inf'), warm_start = True, max_iter=1, hidden_layer_sizes = [200], random_state=RANDOM_STATE) #nn_params = {'algorithm': 'sgd', 'tol': float nn_params = nn.get_params() nn_params.update(param) nn.set_params(**nn_params) #nn = MLPClassifier(**nn_params) time_limits = list(range(1, 60, 60)) try: evaluation_list = trainer_by_time(X, y, time_limits, nn) except: evaluation_list = [{}] for i in range(len(evaluation_list)): evaluation = evaluation_list[i] record = {} record['n'] = n record['time limit'] = time_limits[i] record.update(evaluation) record.update(param) records.append(record) df = pd.DataFrame(records) cols = list(df.columns) keys = evaluation_list[0].keys() cols = [item for item in cols if item not in keys] cols += keys df = df.reindex(columns=cols) now = datetime.datetime.now() result_file = open('result.txt', 'a') print(now,file=result_file) print(df) print(df,file=result_file)
def main(): np.set_printoptions(suppress=True) narac_file_path = "../../tigress/arburton/plink_data/narac_rf" csv_data = [] for chunk in pd.read_csv(narac_file_path, delim_whitespace=True, index_col=0, chunksize=20000): csv_data.append(chunk) samples = pd.concat(csv_data, axis=0) del csv_data # TODO: pull out affection column as y affection = pd.DataFrame(samples, columns="Affection") samples = samples.drop([ "Affection", "Sex", "DRB1_1", "DRB1_2", "SENum", "SEStatus", "AntiCCP", "RFUW" ], axis=1) samples = pd.get_dummies(samples, columns=(samples.columns != "ID")) sample_train, sample_test, affection_train, affection_test = train_test_split( samples, affection, test_size=0.8) # TODO: potentially make sample weights percentage of non ?? SNPs # RANDOM FOREST CLASSIFIER rf = RandomForestClassifier(n_estimators=5000, max_features=40, n_jobs=2) rf.fit(sample_train, affection_train) print("Random forest accuracy: {}".format( rf.score(sample_test, affection_test))) print("Random forest feature importances:") print(rf.feature_importances_) print("Random forest parameters:") print(rf.get_params()) # LASSO CLASSIFIER lasso = Lasso() lasso.fit(sample_train, affection_train) print("LASSO accuracy: {}".format(lasso.score(sample_test, affection_test))) print("LASSO parameters:") print(lasso.get_params()) # LOG REGRESSION log_reg = LogisticRegression(n_jobs=2) log_reg.fit(sample_train, affection_train) print("Log regression accuracy: {}".format( log_reg.score(sample_test, affection_test))) print("Log regression parameters:") print(log_reg.get_params()) # NEURAL NETS mlp_classifier = MLPClassifier() mlp_classifier.fit(sample_train, affection_train) print("MLP Classifier accuracy: {}".format( mlp_classifier.score(sample_test, affection_test))) print("MLP Classifier parameters:") print(mlp_classifier.get_params())
def CreateClassifier(dop): cls = MLPClassifier(hidden_layer_sizes=dop['hidden_layer_sizes'], activation=dop['activation'], solver=dop['solver'], learning_rate=dop['learning_rate'], learning_rate_init=dop['learning_rate_init'], max_iter=dop['max_iter']) print(cls.get_params()) return cls
def MLP_normal(x_train,y_train, x_test,y_test): #使用sklearn库包下的DNN算法模型 from sklearn.neural_network import MLPClassifier clf=MLPClassifier(solver='sgd',hidden_layer_sizes=(100,500,100),warm_start=True) print(clf.get_params()) #训练模型 clf.partial_fit(x_train, y_train, classes=np.unique(y_train)) return clf
def __init__(self, classifier=None, max_iter=1500): if not classifier: classifier = MLPClassifier(activation='relu', alpha=1e-5, hidden_layer_sizes=(2048, ), random_state=1, max_iter=max_iter) print("Selected classifier: ", classifier.get_params()) self.pipeline = make_pipeline(MinMaxScaler(), classifier)
def training(self, training, testing): train_images = training[0] x_training = training[0] / 255.0 y_training = training[1] x_test = testing[0] / 255.0 y_test = testing[1] mlp = MLPClassifier(hidden_layer_sizes=(100, ), max_iter=100, alpha=1e-4, solver='sgd', verbose=10, tol=0.0001, random_state=1, learning_rate_init=.1, learning_rate="adaptive") mlp.fit(x_training, y_training) print mlp.get_params() with open(self.filename, 'wb') as output: pickle.dump(mlp, output, pickle.HIGHEST_PROTOCOL) print "[+] Saving Completed"
def neural_network(X,y): # takes X and y as parameters # returns plot of yhat, train and test scores, parameters Xtrain, Xtest, ytrain, ytest = train_test_split(X, y) model = MLPClassifier() model.fit(Xtrain, ytrain) model.predict(Xtest) train_score = model.score(Xtrain, ytrain) test_score = model.score(Xtest, ytest) params = model.get_params() print(f"train score: {train_score:.3f} | test_score: {test_score:.3f} | params: {params}")
class myMlp(): def __init__(self, train_data, train_label, test_data, test_label): self.train_data = train_data self.train_label = train_label self.test_data = test_data self.test_label = test_label self.predict_label = None self.train_time = 0 self.test_time = 0 self.clf = None def setActivationFunction(self, fun = 0): if fun == 1: # relu self.clf = MLPClassifier(hidden_layer_sizes=(100,50,), activation='relu',solver='adam',alpha=0.0001,max_iter=300 ) elif fun == 2: # tanh self.clf = MLPClassifier(hidden_layer_sizes=(100,50,), activation='tanh',solver='adam',alpha=0.0001,max_iter=300 ) elif fun == 3: # identity self.clf = MLPClassifier(hidden_layer_sizes=(100,50,), activation='identity',solver='adam',alpha=0.0001,max_iter=300 ) def train(self): print("Start train") time_start = time.time() self.clf.fit(self.train_data, self.train_label) time_end = time.time() - time_start print("End train", time_end) self.train_time = time_end return self.train_time def test(self): print("Start test") time_start = time.time() self.predict_label = self.clf.predict(self.test_data) time_end = time.time() - time_start print("End test", time_end) self.test_time = time_end return self.test_label, self.test_time def getTestLabel(self): return self.test_label def getPredictLabel(self): return self.predict_label def getTrainTime(self): return self.train_time def getTestTime(self): return self.test_time def getParams(self): return self.clf.get_params()
def trainNN(self, training_data, output_data): e1 = cv2.getTickCount() print "Building Perceptron..." #Creating MultiLayer Perceptrons. mlp = MLPClassifier(hidden_layer_sizes=(32,16),activation='logistic',solver='sgd', learning_rate_init=0.1, alpha=0.1, random_state=1, max_iter=20000, momentum=0) mlp.out_activation_ = 'identity' print "Training MLP............." mlp.fit(training_data, output_data) e2 = cv2.getTickCount() time_taken = (e2-e1)/cv2.getTickFrequency() print "Time taken to train : ", time_taken print mlp.get_params() # print("Training set score: %f" % mlp.score(training_data, output_data)) # print("Test set score: %f" % mlp.score(training_data[0].reshape(1,-1), output_data[0].reshape(1,-1))) # print mlp.predict(training_data[3].reshape(1,-1)) # self.testProcess(mlp) # save the model to disk filename = 'finalized_model.sav' pickle.dump(mlp, open(filename, 'wb'))
def ann_factory(dsname): hidden_layers = { 'musk': (200, 200), 'shoppers': (20, 20, 20), 'cancer': (10, ) } ann = MLPClassifier(solver='adam', early_stopping=True, shuffle=True, random_state=10, learning_rate='adaptive', hidden_layer_sizes=hidden_layers[dsname]) logging.info('Created MLPClassifier with parameters: {}'.format( ann.get_params())) return ann
def Run(X_train, X_test, y_train, y_test): vect = CountVectorizer(stop_words = 'english', min_df = 0.0035).fit(X_train) X_train = Format_inputs(vect, X_train) X_test = Format_inputs(vect, X_test) model = MLPClassifier() parameters = [{'alpha': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]}] print(model.get_params().keys()) model = GridSearchCV(model, parameters, cv = 10) model.fit(X_train, y_train) best_accuracy = model.best_score_ best_parameters = model.best_params_ print(best_accuracy) print(best_parameters) """
def model_fitting(x, y, test_size=0.33, seed=7, pfi_fitted_models=''): """ Save the model fitted on the input data """ x_train, x_test, y_train, y_test = model_selection.train_test_split( x, y, test_size=test_size, random_state=seed) model = MLPClassifier() model.fit(x_train, y_train) if not os.path.exists(pfi_fitted_models): raise ValueError() pickle.dump(model.get_params(), open(pfi_fitted_models, 'wb')) scored_model_assessment = model.score(x_test, y_test) return scored_model_assessment
class BP_FFNN: def __init__(self): self.params = {'solver':'sgd', 'learning_rate': 'constant', 'learning_rate_init':0.01, 'activation':'logistic', 'max_iter':1000, 'hidden_layer_sizes':(100,)} self.mlp = MLPClassifier(**self.params) pass def read_PGM(self, filepath): img = Image.open(filepath) return list(img.getdata()) def load_data_set(self, file_list): with open(file_list, 'r') as f: data = f.read().splitlines() data_list = [d for d in data] x_data = [] y_data = [] for t in data_list: y_data.append(1 if 'down' in t else 0) x_data.append(self.read_PGM(t)) return x_data, y_data def run(self): train_x_data, train_y_data = self.load_data_set('downgesture_train.list') test_x_data, test_y_data = self.load_data_set('downgesture_test.list') for i in range(3): self.mlp.fit(train_x_data, train_y_data) print('round{}'.format(i+1)) print('train scores:', self.mlp.score(train_x_data, train_y_data)) print('test scores:', self.mlp.score(test_x_data, test_y_data)) print('parameters:', self.mlp.get_params()) print('')
def __call__(self, config_id): config = self.configs[config_id] report = self.shared_config.copy() # train x, y = self.dataset['train'] clf = MLPClassifier(**config).fit(x, y) report.update({'classifier_config': clf.get_params()}) # test x, y = self.dataset['test'] tmp = self.run_test(clf, x, y, per_class_metric=True) tmp = {'test/{}'.format(k): v for k, v in tmp.items()} report.update(tmp) if 'val' in self.dataset: x, y = self.dataset['val'] tmp = self.run_test(clf, x, y, per_class_metric=True) tmp = {'val/{}'.format(k): v for k, v in tmp.items()} report.update(tmp) return report
def MLPClassifier_Model(X_train, y_train, X_test, y_test, max_iter): model = MLPClassifier(max_iter = max_iter) classifier = model.fit(X_train, y_train) score = model.score(X_test, y_test) testing_model = model.predict(X_test) cv_scores = cross_val_score(classifier, X_test, y_test, cv = 3) print(' ') print('===== MLP Classifier Model =====') print('score:', score) print('cross validation scores:', cv_scores) # Visualize parameters in a table. visualize_params(model.get_params()) # Display confusion matrix. visualize_heatmap(y_test, testing_model, 'MLP Classifier') return score
class NeuralNet: """ An attempt to interface MLPClassifier in a way that makes the hyperparameters related to width/depth more clear. We intercept the parameters fetched from MLPClassifier, and translates the parameters into our own Before going back to the same as before """ def __init__(self, **params): self.neuralnet = MLPClassifier(**params) def fit(self, X, y, **params): fit = self.neuralnet.fit(X, y) return fit def predict(self, X): return self.neuralnet.predict(X) def predict_log_proba(self, X): return self.neuralnet.predict_log_proba(X) def predict_proba(self, X): return self.neuralnet.predict_proba(X) def score(self, X, y, sample_weight=None): return self.neuralnet.score(X, y, sample_weight) def set_params(self, **params): n_hidden_neurons = params['n_hidden_neurons'] n_hidden_layers = params['n_hidden_layers'] hidden_layer_sizes = tuple( [n_hidden_neurons for i in range(n_hidden_layers)]) params['hidden_layer_sizes'] = hidden_layer_sizes del params['n_hidden_neurons'] del params['n_hidden_layers'] self.neuralnet.set_params(**params) def get_params(self, deep=True): params = self.neuralnet.get_params(deep) del params['hidden_layer_sizes'] return params
def neural_network(X, y, predict): # takes X and y as parameters # returns plot of yhat, train and test scores, parameters Xtrain, Xtest, ytrain, ytest = train_test_split(X, y) model = MLPClassifier() model.fit(Xtrain, ytrain) model.predict(Xtrain) train_score = model.score(Xtrain, ytrain) test_score = model.score(Xtest, ytest) params = model.get_params() xpts = range(len(model.predict(Xtrain))) fig, ax = plt.subplots(1, 1, figsize=(20, 8)) ax.plot(xpts, model.predict(Xtrain), color='b') #ax.plot(xpts, y_hat_test_lst, color='r') ax.set_ylabel("yhat") ax.set_xlabel("over xpts") #plt.legend([f"{gdbr.__class__.__name__} Train - learning rate 0.1", f"{model.__class__.__name__} Test - learning rate 0.1"]) plt.show()
def get_mlp(x_train, t_train, x_val, t_val, search=False): # {'activation': 'relu', 'alpha': 0.1, 'learning_rate': 'constant', 'solver': 'adam'} # {'solver': 'adam', 'learning_rate': 'constant', 'hidden_layer_sizes': (100,), 'alpha': 0.06, 'activation': 'tanh'} # mlp validated at (array([0.8940068 , 0.78879874, 0.71866004, 0.99057592, 0.74764398]), 0.9286105369755633) # MLP tested at (array([0.72152429, 0.7118928 , 0.91457286, 0.71602094, 0.70136126]), 0.9242268552514312) # mlp validated at (array([0.8940068 , 0.78879874, 0.71866004, 0.99057592, 0.74764398]), 0.9833533999895304) # MLP tested at (array([0.72152429, 0.7118928 , 0.91457286, 0.71602094, 0.70136126]), 0.9823687075969512) # {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive', 'solver': 'adam'} # mlp validated at (array([0.98953154, 0.99188694, 0.99188694, 0.98848168, 0.98612565]), 0.9537539956508365) # MLP tested at (array([0.98680905, 0.98848409, 0.98911223, 0.98910995, 0.99036649]), 0.9467248198442366) if search: mlp_params = param_sel( x_train, t_train, MLPClassifier(max_iter=1000), { 'alpha': [ 0.06, 0.1, ], 'hidden_layer_sizes': [(20, 20, 10), (100, )], 'activation': ['relu', 'tanh'], 'solver': ['sgd', 'adam'], 'learning_rate': ['constant', 'adaptive'], }) else: mlp_params = { 'activation': 'relu', 'alpha': 0.01, 'learning_rate': 'adaptive', 'solver': 'adam', 'hidden_layer_sizes': (100, ) } mlp_classifier = MLPClassifier(**mlp_params, max_iter=6000) mlp_classifier.fit(x_train, t_train) print("MLP params:", mlp_classifier.get_params()) print("MLP validated at", validate(mlp_classifier, x_val, t_val)) return mlp_classifier
def test_serialize_model(): instance = HostFootprint() model = MLPClassifier() label_binarizer = LabelBinarizer() label_binarizer.neg_label = 0 label_binarizer.pos_label = 1 label_binarizer.sparse_output = False label_binarizer.y_type_ = "binary" label_binarizer.sparse_input_ = False label_binarizer.classes_ = np.array([0]) parameters = {'hidden_layer_sizes': [(64, 32)]} GridSearchCV(model, parameters, cv=5, n_jobs=-1, scoring='f1_weighted') model.coefs_ = np.array([[1], [2]]) model.loss_ = 42 model.intercepts_ = np.array([[3], [4]]) model.classes_ = np.array([[5], [6]]) model.n_iter_ = 42 model.n_layers_ = 2 model.n_outputs_ = 1 model.out_activation_ = "logistic" model._label_binarizer = label_binarizer model.features = ['test_1', 'test_2', 'test_3'] with tempfile.TemporaryDirectory() as tmpdir: model_file = os.path.join(tmpdir, 'host_footprint.json') instance.serialize_model(model, model_file) new_model = instance.deserialize_model(model_file) assert model.features == new_model.features print(f"model params: {model.get_params()}") print(f"new_model params: {new_model.get_params()}") assert len(model.get_params()['hidden_layer_sizes']) == len( new_model.get_params()['hidden_layer_sizes']) assert model._label_binarizer.y_type_ == new_model._label_binarizer.y_type_ assert len(model.coefs_) == len(new_model.coefs_) assert len(model.intercepts_) == len(new_model.intercepts_)
def learn(self, column, params): """ Learn dataframe. Args: column: column you want to predict. params: parameter for Grid Search. """ if dfc.DataFrameChecker.is_df_num(self._df) is False: return False #predict data y = self._df[column] self._y = y #learning data X = self._df.drop([column], axis=1) self._X = X (X_train, X_test, y_train, y_test) = train_test_split(X, y, test_size=0.3, random_state=0) model = MLPClassifier() phelper.PrintHelper.print_title('Default Params') print(model.get_params()) phelper.PrintHelper.print_title('Params from a file') if params == None: print('Settings Params file is None.') params = {} else: print(params) print('...Doing Grid Search...') cv = GridSearchCV(model, params, cv=10, scoring='neg_mean_squared_error', n_jobs=1, refit=True) cv.fit(X_train, y_train) self._best_params = cv.best_params_ self._learned_model = cv.best_estimator_ phelper.PrintHelper.print_title('Best Params') print(cv.best_params_) self._learned_model = cv # Accuracy Score print('...Predicting Test Data...') predicted_result = self._learned_model.predict(X_test).astype(int) phelper.PrintHelper.print_title('Accuracy Score') print(accuracy_score(y_test, predicted_result)) return True
targetNames = data['target_names'] X = pd.DataFrame(data.data, columns=featureNames) Y = data.target xTrain, xTest, yTrain, yTest = train_test_split(X, Y, shuffle=True, test_size=0.2, random_state=1234) #Default Neural Network model without any tuning - base metric MLPmodelDefault = MLPClassifier() MLPmodelDefault.fit(xTrain, yTrain) MLPmodelDefault.get_params() # train yPredTrainMLPDefault = MLPmodelDefault.predict(xTrain) print( f'Train Error - Default Network: {1- metrics.accuracy_score(yTrain, yPredTrainMLPDefault)}' ) # test yPredTestMLPDefault = MLPmodelDefault.predict(xTest) print( f'Test Error - Default Network: {1- metrics.accuracy_score(yTest, yPredTestMLPDefault)}' ) #Parameter tuning with GridSearchCV #######################
print('Xtrain size:{}, X_test size:{}, y_train size:{}, y_test size:{}'.format(X_train.shape, X_test.shape, y_train.shape, y_test.shape)) from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import StandardScaler scalar = StandardScaler() X_train = scalar.fit_transform(X_train) X_test = scalar.fit_transform(X_test) mlp = MLPClassifier(alpha=1e-5, hidden_layer_sizes=[256, 256], random_state=0, max_iter=1, warm_start=True) for i in range(10): mlp.fit(X_train, y_train) mlp.max_iter = 100000 mlp.fit(X_train, y_train) print(classification_report(mlp.predict(X_test), y_test, target_names=list(conf['input']['target_List'].keys()))) print(mlp.get_params()) # exit() estimator = None #GA Hyperparameters.. train_json.GA = set_config(**conf['GA']) ##GA set result json Init ##학습결과 Json Init train_json.Result=ac() train_json.Result.FeatureSelection = ac() feature_names = np.array(feature_names) while True: #Generic Algorithm Variable. print("Generic Algorithm Hyperparametor as followed") for k, v in conf['GA'].items(): print(f'{k}: {v}')
df = shuffle(df).reset_index(drop=True) f.write('Dataset %s\n' % path) """ Properties of the dataset """ data_total = df.shape print('Total llds %d' % data_total[0]) f.write('Total llds %d \n' % data_total[0]) """ Neural network """ print("[+] Applying Neural Network") model = MLPClassifier() print(model.get_params()) x = df.drop(['label', 'lld'], axis=1).values y = df['label'].values #create a test set of size of about 20% of the dataset x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42, stratify=y) #Standardizing numeric variables. scale = preprocessing.StandardScaler() scale.fit(x_train) x_train = scale.transform(x_train)
# train the model mlpc.fit(x_train, y_train) # looking at the attributes classes = mlpc.classes_ # target column classes loss = mlpc.loss_ # the loss computed with the loss function coefs = mlpc.coefs_ # a list of the weight matrix for each layer intercepts = mlpc.intercepts_ # a list of the bias vector for each layer n_iter = mlpc.n_iter_ # the number of interations the solver has run n_layers = mlpc.n_layers_ # the number of layers of the model n_outputs = mlpc.n_outputs_ # the number of outputs, maybe corresponding to the number of classes out_activation = mlpc.out_activation_ # the name of the output activation function used # looking at the methods get_params = mlpc.get_params() # returning the parameters for the model prediction_array = mlpc.predict( x_test ) # running the test dataset through the model, giving an array of predicted values predict_log_proba = mlpc.predict_log_proba( x_test) # log of probability estimate for each class predict_proba = mlpc.predict_proba(x_test) # the probability for each class train_score = mlpc.score( x_train, y_train) # returns the mean accuracy of the training set test_score = mlpc.score(x_test, y_test) # returns the mean accuracy of the test set print( 'Using the standard neural network model the accuracy score for the train dataset is: %.3f and the accuracy for the test dataset is: %.3f' % (train_score, test_score))