def MLP(data_directory, model_dir, features): X_train, X_test, y_train, y_test, predict_X, features = pre( data_directory, features) os.chdir(model_dir) model = mlp(random_state=1, max_iter=10000) grid = gs(estimator=model, param_grid={ 'hidden_layer_sizes': [(500, 500)], 'activation': ['logistic', 'tanh', 'relu'], 'alpha': np.exp(2.303 * np.arange(-8, 0)), 'learning_rate': ['constant'] }, cv=5, n_jobs=6) grid.fit(X_train, y_train) print(grid.best_params_) print(grid.best_estimator_.score(X_test, y_test)) joblib.dump( grid.best_estimator_, 'mlp_%d_%.4f.m' % (len(features), grid.best_estimator_.score(X_test, y_test))) df = pd.DataFrame(columns=['ml_bandgap', 'pbe_bandgap']) df['pbe_bandgap'] = y_test df['ml_bandgap'] = grid.best_estimator_.predict(X_test) print(df)
def train_model(): print("Loading Data...") pd.set_option('display.max_columns', 20) f = open("NSL_KDD-master\KDDTrain+.csv") cnfile = open("NSL_KDD-master\Field Names.csv") column_names = pd.read_csv(cnfile, header=None) column_names_list = column_names[0].tolist() col_list = list(range(0, 42)) column_names_list.append("labels") data = pd.read_csv(f, header=None, names=column_names_list, usecols=col_list) data = data[data.service != "harvest"] data = data[data.service != "urh_i"] data = data[data.service != "red_i"] data = data[data.service != "ftp_u"] data = data[data.service != "tftp_u"] data = data[data.service != "aol"] data = data[data.service != "http_8001"] data = data[data.service != "http_2784"] data = data[data.service != "pm_dump"] data = data[data.labels != "spy"] data = data[data.labels != "warezclient"] labels = data.labels print(labels) data.drop(['labels'], axis=1, inplace=True) df = pd.get_dummies(data) print(df.head(20)) print(df.shape) c_range = [50, 75, 100] gamma_range = [.0001, .0005, .00001] tuned_parameters = dict(kernel=['rbf'], gamma=gamma_range, C=c_range, shrinking=[False]) grid = gs(svm.SVC(), tuned_parameters, cv=3, scoring='accuracy', n_jobs=2, verbose=10) grid.fit(df, labels) print(grid.best_params_) print(grid.best_score_) clf = svm.SVC(gamma=.0001, verbose=1, shrinking=False, C=100, kernel='rbf', max_iter=100000) clf.fit(df, labels) save_file = 'SVM_trained_model.sav' pickle.dump(clf, open(save_file, 'wb'))
def svc_param_selection(X, y, nfolds): Cs = [1000] gammas = [0.001, 0.01, 0.1, 1] degrees = [0] param_grid = {'C': Cs, 'gamma': gammas, 'degree': degrees} grid_search = gs(svm.SVC(kernel='sigmoid'), param_grid, cv=nfolds, verbose=2) grid_search.fit(X, y) return grid_search.best_params_
def DecisionTree(data_directory, model_dir, features): X_train, X_test, y_train, y_test, predict_X, features = pre(data_directory, features) os.chdir(model_dir) model = dt(random_state=1) grid = gs(estimator=model, param_grid={'criterion': ['mse', 'friedman_mse', 'mae'], 'splitter': ['best', 'random'], 'max_features': ['auto', 'sqrt', 'log2']}, cv=5) grid.fit(X_train, y_train) print(grid.best_params_) print(grid.best_estimator_.score(X_test, y_test)) joblib.dump(grid.best_estimator_, 'dtr_%d_%.4f.m'%(len(features),grid.best_estimator_.score(X_test, y_test))) df = pd.DataFrame(columns=['ml_bandgap', 'pbe_bandgap']) df['pbe_bandgap'] = y_test df['ml_bandgap'] = grid.best_estimator_.predict(X_test) print(df)
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] #probing for best dropout rate no_of_neurons = [1, 5, 10, 15, 20, 25, 30] #probing for no of neurons in hidden layers param_grid = dict(batch_size=batch_Size, epochs=epochs, optimizer=optimizers, learn_rate=learn_rate, momentum=momentum, init=initialization, activation=activation, weight_constraint=weights, dropout_rate=dropout_rate, neurons=no_of_neurons) grid = gs(estimator=classifier, param_grid=param_grid, n_jobs=-1) gSearch = grid.fit(input_variables, output) best_params = gSearch.best_params_ best_accuracy = gSearch.best_score_ # summarize results print("Best score: %f using params %s" % (gSearch.best_score_, gSearch.best_params_)) means = gSearch.cv_results_['mean_test_score'] stds = gSearch.cv_results_['std_test_score'] params = gSearch.cv_results_['params'] for mean, stdev, param in zip(means, stds, params): print("%f (%f) with: %r" % (mean, stdev, param)) # evaluate using 10-fold cross validation
range=(0, max_bins), density=True) return sample_hist sample_data, sample_label = load_picture() sample_hist = feature_detect() param_dist = { 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': np.arange(1, 2001, 100), 'gamma': np.arange(0.1, 1.1, 0.1) } clf = SVC() grid_search = gs(clf, param_dist, cv=3, scoring="precision") grid_result = grid_search.fit(sample_hist, sample_label) best_estimator = grid_result.best_estimator_ print(best_estimator) #上述程序代替了下面的程序 # result=[] # maxid=0 # max=0 # id=0 # kenellist=['linear', 'poly', 'rbf', 'sigmoid'] # for kenel in kenellist : # for c in range(1,2001,100): # gama=0.1 # while(gama<=1): # clf = SVC(kernel=kenel, C=c, gamma=gama)
def build_classifier(): classifier = Sequential(optimizer) classifier.add( Dense(units=6, kernel_initializer='uniform', activation='relu', input_dim=11)) classifier.add( Dense(units=6, kernel_initializer='uniform', activation='relu')) classifier.add( Dense(units=1, kernel_initializer='uniform', activation='sigmoid')) classifier.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) return classifier classifier = kc(build_fn=build_classifier) params = { 'batch_size': [25, 32], 'epochs': [100, 500], 'optimizer': ['adam', 'rmsprop'] } gSearch = gs(estimator=classifier, param_grid=params, scoring='accuracy', cv=10) gSearch = gSearch.fit(xTrain, yTrain) best_params = gSearch.best_params_ best_accuracy = gSearch.best_score_