# Spliting and specify testing data = 0.4 X_train, X_test, y_train, y_test = train_test_split(datatrain_array[:, :9], datatrain_array[:, 9], test_size=0.4) """ input layer has 9 neuron because 9 feature in phishing dataset hidden layer has 10 neuron output layer has 3 neuron because 3 class in phishing dataset using inbuilt solver sgd = stochastic gradient descent learning rate = 0.01 iteration = 1500 """ from sklearn.neural_network import MLPClassifier mlp = MLPClassifier(hidden_layer_sizes=(10), activation='logistic', solver='sgd', learning_rate_init=0.01, max_iter=1500, random_state=7, tol=0.001) # Train the model mlp.fit(X_train, y_train) # Test the model print("testing model score") print(mlp.score(X_test, y_test))
from mnist import MNIST from sklearn.neural_network import MLPClassifier mndata = MNIST('samples') images, labels = mndata.load_training() classifier = MLPClassifier(solver='sgd', alpha=0.0001, verbose=True, hidden_layer_sizes=(70, ), random_state=1) print('training') classifier.fit(images, labels) import pickle pickle.dump(classifier, open('network.pickle', 'wb'))
def ann(train_x, train_y, test_x, test_y, msno_df): print ("ANN") clf = MLPClassifier(hidden_layer_sizes=(100,150,100,50), activation="relu", solver="lbfgs", alpha=1.0, max_iter=500) checkResult(clf, "ANN", train_x, train_y, test_x, test_y, msno_df)
def first_generation(X, y, seed=None): mlp_parameters = list( itertools.product([1, 2, 4, 8, 16], [0, 0.2, 0.5, 0.9], [0.3, 0.6])) mlp_clf = [ MLPClassifier(hidden_layer_sizes=(h, ), momentum=m, learning_rate_init=a) for (h, m, a) in mlp_parameters ] mlp_name = ['mlp_{0}_{1}_{2}'.format(*param) for param in mlp_parameters] neigbhors_number = [int(i) for i in np.linspace(1, X.shape[0], 20)] weighting_methods = ['uniform', 'distance', lambda x: abs(1 - x)] knn_clf = [ KNeighborsClassifier(n_neighbors=nn, weights=w) for (nn, w) in itertools.product(neigbhors_number, weighting_methods) ] knn_name = [ 'knn_{0}_{1}'.format(*param) for param in itertools.product( neigbhors_number, ['uniform', 'distance', 'similarity']) ] C = [1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1, 10, 100] degree = [2, 3] gamma = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2] svm_clf_poly = [ SVC(C=c, kernel='poly', degree=d) for (c, d) in itertools.product(C, degree) ] svm_clf_poly_name = [ 'svm_poly_{0}_{1}'.format(*param) for param in itertools.product(C, degree) ] svm_clf_rbf = [ SVC(C=c, kernel='rbf', gamma=g) for (c, g) in itertools.product(C, gamma) ] svm_clf_rbf_name = [ 'svm_rbf_{0}_{1}'.format(*param) for param in itertools.product(C, gamma) ] dt_max_depth_params = list( itertools.product(['gini', 'entropy'], [1, 2, 3, 4, None])) dt_max_depth_clf = [DecisionTreeClassifier(criterion=c, max_depth=d) \ for (c, d) in dt_max_depth_params] dt_max_depth_name = [ 'dt_max_depth_{0}_{1}'.format(*param) for param in dt_max_depth_params ] dt_max_features_params = list( itertools.product(['gini', 'entropy'], [None, 'sqrt', 'log2', 0.5])) dt_max_features_clf = [DecisionTreeClassifier(criterion=c, max_features=f) \ for (c, f) in dt_max_features_params] dt_max_features_name = [ 'dt_max_features_{0}_{1}'.format(*param) for param in dt_max_features_params ] dt_min_leaf_params = [2, 3] dt_min_leaf_clf = [ DecisionTreeClassifier(min_samples_leaf=l) for l in dt_min_leaf_params ] dt_min_leaf_name = [ 'dt_min_leaf_{0}'.format(param) for param in dt_min_leaf_params ] pool = mlp_clf + knn_clf + svm_clf_poly + svm_clf_rbf + dt_max_depth_clf + dt_max_features_clf + \ dt_min_leaf_clf pool_name = mlp_name + knn_name + svm_clf_poly_name + svm_clf_rbf_name + dt_max_depth_name + \ dt_max_features_name + dt_min_leaf_name ensemble = VotingClassifier(estimators=list(zip(pool_name, pool))) ensemble.fit(X, y) estimators = ensemble.estimators_ return estimators, pool_name
import pandas as pd from sklearn.model_selection import train_test_split, RandomizedSearchCV from sklearn import datasets from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score mnist = datasets.load_digits() X = mnist['data'] y = mnist['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3116) print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) #Initialize Neural Network, set up parameters for your grid search and implement a Random Search procedure ann = MLPClassifier() grid_parameters = { 'hidden_layer_sizes': list(range(100, 450, 50)), 'activation': ['relu', 'identity', 'logistic', 'tanh'], 'learning_rate': ['constant', 'adaptive', 'invscaling'] } ann_grid_search = RandomizedSearchCV(ann, grid_parameters, cv=5, n_iter=10) ann_grid_search.fit(X_train, y_train) #Accuracy score y_pred = ann_grid_search.predict(X_test) print('Accuracy Score:', accuracy_score(y_test, y_pred)) #Best hyperparameters print(ann_grid_search.best_estimator_)
x_data = x_data.loc[:, x_data.columns != "Sequence"] y_data = data.loc[:, "Type"] random_state = 100 x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.7, random_state=100, stratify=y_data) scaler = StandardScaler() scaler.fit(x_train) x_train = scaler.transform(x_train) x_test = scaler.transform(x_test) mlp = MLPClassifier() mlp.fit(x_train, y_train) y_pred_train = mlp.predict(x_train) y_pred_test = mlp.predict(x_test) print("classifier", mlp) print("Accuracy on Train Set") print(mlp.score(x_train, y_train)) print("MLP Classifier") print("Accuracy on Test Set") print(mlp.score(x_test, y_test)) print("Report") print(classification_report(y_test, mlp.predict(x_test))) param_grid = {
def second_generation(X, y, seed=None): features = [] ### 25 x 2 bagged trees bag_gini = BaggingClassifier( base_estimator=DecisionTreeClassifier(criterion='gini'), n_estimators=25, random_state=seed) bag_gini.fit(X, y) bag_gini_names = ['bag_gini_' + str(i) for i in range(25)] features.extend( [np.arange(X.shape[1]) for _ in range(len(bag_gini_names))]) bag_entropy = BaggingClassifier( base_estimator=DecisionTreeClassifier(criterion='entropy'), n_estimators=25, random_state=3 * seed**2) bag_entropy.fit(X, y) bag_entropy_names = ['bag_entropy_' + str(i) for i in range(25)] features.extend( [np.arange(X.shape[1]) for _ in range(len(bag_entropy_names))]) ### 25 x 2 random subspaces rs_gini = BaggingClassifier( base_estimator=DecisionTreeClassifier(criterion='gini'), n_estimators=25, max_features=int(np.sqrt(X.shape[1])), bootstrap=False, random_state=seed) rs_gini.fit(X, y) rs_gini_names = ['rs_gini_' + str(i) for i in range(25)] features.extend(rs_gini.estimators_features_) rs_entropy = BaggingClassifier( base_estimator=DecisionTreeClassifier(criterion='entropy'), n_estimators=25, max_features=int(np.sqrt(X.shape[1])), bootstrap=False, random_state=3 * seed**2) rs_entropy.fit(X, y) rs_entropy_names = ['rs_entropy_' + str(i) for i in range(25)] features.extend(rs_entropy.estimators_features_) ### 14 Ada nb_stumps = [2, 4, 8, 16, 32, 64, 128] ada_st_gini = [ AdaBoostClassifier(base_estimator=DecisionTreeClassifier( criterion='gini', max_depth=1), n_estimators=st, random_state=seed) for st in nb_stumps ] ada_st_gini_names = ['ada_st_gini_' + str(i) for i in nb_stumps] features.extend( [np.arange(X.shape[1]) for _ in range(len(ada_st_gini_names))]) for clf in ada_st_gini: clf.fit(X, y) ada_st_entropy = [ AdaBoostClassifier(base_estimator=DecisionTreeClassifier( criterion='entropy', max_depth=1), n_estimators=st, random_state=3 * seed**2) for st in nb_stumps ] ada_st_entropy_names = ['ada_st_entropy_' + str(i) for i in nb_stumps] features.extend( [np.arange(X.shape[1]) for _ in range(len(ada_st_entropy_names))]) for clf in ada_st_entropy: clf.fit(X, y) ### 8 Ada DT nb_dt = [2, 4, 8, 16] ada_dt_gini = [ AdaBoostClassifier(base_estimator=DecisionTreeClassifier( criterion='gini', max_depth=3), n_estimators=dt, random_state=seed) for dt in nb_dt ] ada_dt_gini_names = ['ada_dt_gini_' + str(i) for i in nb_dt] features.extend( [np.arange(X.shape[1]) for _ in range(len(ada_dt_gini_names))]) for clf in ada_dt_gini: clf.fit(X, y) ada_dt_entropy = [ AdaBoostClassifier(base_estimator=DecisionTreeClassifier( criterion='entropy', max_depth=3), n_estimators=st, random_state=3 * seed**2) for dt in nb_dt ] ada_dt_entropy_names = ['ada_dt_entropy_' + str(i) for i in nb_dt] features.extend( [np.arange(X.shape[1]) for _ in range(len(ada_dt_entropy_names))]) for clf in ada_dt_entropy: clf.fit(X, y) ### 24 ANN mlp_parameters = list(itertools.product([1, 2, 4, 8, 32, 128],\ [0, 0.2, 0.5, 0.9])) mlp_clf = [ MLPClassifier(hidden_layer_sizes=(h, ), momentum=m) for (h, m) in mlp_parameters ] for clf in mlp_clf: clf.fit(X, y) mlp_name = ['mlp_{0}_{1}'.format(*param) for param in mlp_parameters] features.extend([np.arange(X.shape[1]) for _ in range(len(mlp_name))]) ### 54 SVM C = np.logspace(-3, 2, num=6) gamma = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2] svm_linear = [SVC(C=c, kernel='poly', degree=1) for c in C] for clf in svm_linear: clf.fit(X, y) svm_linear_names = ['svm_linear_' + str(c) for c in C] features.extend( [np.arange(X.shape[1]) for _ in range(len(svm_linear_names))]) svm_rbf = [SVC(C=c, gamma=g) for c, g in itertools.product(C, gamma)] for clf in svm_rbf: clf.fit(X, y) svm_rbf_names = [ 'svm_rbf_{0}_{1}'.format(*param) for param in itertools.product(C, gamma) ] features.extend([np.arange(X.shape[1]) for _ in range(len(svm_rbf_names))]) pool = bag_gini.estimators_ + bag_entropy.estimators_ + rs_gini.estimators_ + rs_entropy.estimators_ + \ ada_st_gini + ada_st_entropy + ada_dt_gini + ada_dt_entropy + mlp_clf + svm_linear + svm_rbf pool_name = bag_gini_names + bag_entropy_names + rs_gini_names + rs_entropy_names + ada_st_gini_names + \ ada_st_entropy_names + ada_dt_gini_names + ada_dt_entropy_names + mlp_name + svm_linear_names + \ svm_rbf_names return pool, pool_name, features
X, Y = nudge_dataset(X, digits.target) X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001) # 0-1 scaling X = 2 * X - 1 # [-1,1] scaling # plot_sample(X[0,:]) # plot_sample(-X[0,:]) # exit() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) model1 = MLPClassifier(solver='lbfgs', alpha=1e-5, activation='tanh', hidden_layer_sizes=(20, 10), random_state=1) model2 = ConstrainedMLPClassifier(solver='lbfgs', alpha=1e-5, activation='tanh', hidden_layer_sizes=(20, 10), random_state=2, fit_intercepts=False) model1.fit(X_train, Y_train) model2.fit(X_train, Y_train) result_1 = "NN model with biases test results:\n{}\n".format( metrics.classification_report(Y_test, model1.predict(X_test)))
## With K-fold our training data is divided into 5 parts, the prediction model is generated for the 4 parts, and tested on the 5th part kfold = KFold(n_splits=5, random_state=82089) cv_results = cross_val_score(logreg, x_train, y_train, cv=kfold) print (cv_results.mean()*100, "%") ## Define regularization parameter ## The lower the value of C, the higher we penalize the coeeficients of our logstic regression param_grid = {"C":[0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]} grid = GridSearchCV(estimator=logreg, param_grid=param_grid, cv=kfold) grid.fit(x_train,y_train) print (grid.best_estimator_.C) print (grid.best_score_*100, "%") ## Generate Neural Network model from sklearn.neural_network import MLPClassifier clf = MLPClassifier(solver='lbfgs', random_state=1, activation='logistic', hidden_layer_sizes=(100,)) kfold = KFold(n_splits=5,random_state=82089) cv_results = cross_val_score(clf, x_train, y_train, cv=kfold) print (cv_results.mean()*100, "%") ## Find the regularization parameter param_grid = {"alpha":10.0 ** -np.arange(-4, 7)} grid = GridSearchCV(estimator=clf, param_grid=param_grid, cv=kfold) grid.fit(x_train,y_train) print (grid.best_estimator_.alpha) print (grid.best_score_*100, "%") ## Now that we know the optimal alpha ve C values. Let's check the results of accuracy. ## For Logistic regression
np.random.seed(seed) np.random.shuffle(X_train) np.random.seed(seed) np.random.shuffle(Y_train) models = [] models.append(('GLM', LogisticRegression())) models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('KNN', KNeighborsClassifier())) models.append(('DT', DecisionTreeClassifier())) models.append(('BY', GaussianNB())) models.append(('SVM', SVC(probability=True))) models.append(('BAG', BaggingClassifier())) models.append(('NNet', MLPClassifier())) models.append(('RF', RandomForestClassifier())) models.append(('BST', AdaBoostClassifier())) seed = 7 #crossvalidation on trainset and select the best model on validation set, test on test set import xlwt sel = [] sel.append(('CHSQ', SelectKBest(chi2, k=num_fea))) sel.append(('ANOVA', SelectKBest(f_classif, k=num_fea))) sel.append(('TSCR', SelectKBest(t_score.t_score, k=num_fea))) sel.append(('FSCR', SelectKBest(fisher_score.fisher_score, k=num_fea))) sel.append(('RELF', SelectKBest(reliefF.reliefF, k=num_fea)))
# Ahora comprobamos la eficacia, y nada más!!! predicciones = logreg.predict(data_x_test) print("Eficacia (reg. lineal) del {0}".format(np.mean(predicciones==data_y_test) * 100)) ###################################################### # Red neuronal ###################################################### from sklearn.model_selection import GridSearchCV from sklearn.neural_network import MLPClassifier modelo = MLPClassifier(random_state=1, verbose=False) param_grid = [ { 'hidden_layer_sizes': [(2,),(4,),(8,),(16,)], 'solver':['lbfgs'], # 'alpha': 10.0 ** -np.arange(1, 7), # 'max_iter': [500,1000,1500] }, { 'hidden_layer_sizes': [(2,),(4,),(8,),(16,)], 'solver':['adam'], # 'alpha': 10.0 ** -np.arange(1, 7), # 'max_iter': [500,1000,1500] } ]
import support from sklearn.model_selection import KFold, cross_validate from sklearn.svm import SVC, SVR from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor from sklearn.neural_network import MLPClassifier, MLPRegressor if __name__ == '__main__': # 直接実行された場合のみ実行し、それ以外の場合は実行しない # ベンチマークとなるアルゴリズムと、アルゴリズムを実装したモデルの一覧 # それぞれのアルゴリズムの引数を指定している。 models = [ ('SVM', SVC(random_state=1), SVR()), ('GaussianProcess', GaussianProcessClassifier(random_state=1), GaussianProcessRegressor(normalize_y=True, alpha=1, random_state=1)), ('KNeighbors', KNeighborsClassifier(), KNeighborsRegressor()), ('MLP', MLPClassifier(random_state=1), MLPRegressor(hidden_layer_sizes=(5), solver='lbfgs', random_state=1)), ] # 検証用データセットのファイルとファイルの区切り文字、ヘッダーとなる行の位置、インデックスとなる列の位置のリスト classifier_files = ['iris.data', 'sonar.all-data', 'glass.data'] classifier_params = [(',', None, None), (',', None, None), (',', None, 0)] regressor_files = [ 'airfoil_self_noise.dat', 'winequality-red.csv', 'winequality-white.csv' ] regressor_params = [(r'\t', None, None), (';', 0, None), (';', 0, None)] # 評価スコアを検証用データセットのファイル、アルゴリズムごとに保存する表 result = pd.DataFrame( columns=['target', 'function'] + [m[0] for m in models],
# how you can make predictions predictions = model.predict(X_test) # what did we get? predictions # manually check the accuracy of your predictions N = len(y_test) np.sum(predictions == y_test) / N # can also just call np.mean() # we can even use deep learning to solve the same problem! from sklearn.neural_network import MLPClassifier # you'll learn why scaling is needed in a later course from sklearn.preprocessing import StandardScaler scaler = StandardScaler() X_train2 = scaler.fit_transform(X_train) X_test2 = scaler.transform(X_test) model = MLPClassifier(max_iter=500) model.fit(X_train2, y_train) # evaluate the model's performance model.score(X_train2, y_train) model.score(X_test2, y_test)
train_index_list = pickle.load(mfile) test_index_list = pickle.load(mfile) mfile.close() y = yb cls_names = [ 'SVC', 'LogReg', 'GradBoost', 'NeuralNet', 'RandForest', 'NaiveBayes', 'K-NN' ] clss = [ SVC(gamma='scale'), LogisticRegression(solver='lbfgs', max_iter=500), GradientBoostingClassifier(), MLPClassifier(), RandomForestClassifier(), GaussianNB(), KNeighborsClassifier() ] accuracy_score_log = {} for cn, cls in zip(cls_names, clss): print("\n\nEvaluating %s now......" % cn) accuracy_score_log[cn] = [] fold_i = 0 for train_index, test_index in zip(train_index_list, test_index_list): fold_i += 1 print("\t\tfold %d: " % fold_i, end='') X_train, X_test = X[train_index], X[test_index]
ap = argparse.ArgumentParser() ap.add_argument("-m", "--model", type=str, default="knn", help="type of python machine learning model to use") args = vars(ap.parse_args()) # define the dictionary of models our script can use, where the key # to the dictionary is the name of the model (supplied via command # line argument) and the value is the model itself models = { "knn": KNeighborsClassifier(n_neighbors=1), "naive_bayes": GaussianNB(), "logit": LogisticRegression(solver="lbfgs", multi_class="auto"), "svm": SVC(kernel="rbf", gamma="auto"), "decision_tree": DecisionTreeClassifier(), "random_forest": RandomForestClassifier(n_estimators=100), "mlp": MLPClassifier() } # load the Iris dataset and perform a training and testing split, # using 75% of the data for training and 25% for evaluation print("[INFO] loading data...") dataset = load_iris() (trainX, testX, trainY, testY) = train_test_split(dataset.data, dataset.target, random_state=3, test_size=0.25) # train the model print("[INFO] using '{}' model".format(args["model"])) model = models[args["model"]] model.fit(trainX, trainY) # make predictions on our data and show a classification report
accuracy=0 hl=[1,3] act=['logistic', 'tanh', 'relu'] sol=['lbfgs','sgd','adam'] al=[0.0001,0.0005] bs=[64,128] lr=['constant','invscaling','adaptive'] best_params = [0,0,0,0,0,0] params = [0,0,0,0,0,0] for h in hl: for a in act: for s in sol: for a1 in al: for b in bs: for l in lr: classifier=MLPClassifier(hidden_layer_sizes=h,activation=a,solver=s,alpha=a1,batch_size=b,learning_rate=l) classifier.fit(train_inp,train_label) ypred=classifier.predict(test_inp) #acc=classifier.score(ypred,test_label) x=0 score=0 for i in ypred: if(i==test_label[x]): score=score+1 accuracy=score/10 params=[h,a,s,a1,b,l] print("=========") print('Accuracy:',accuracy) print('Params:',params) print("=========") if(best<=accuracy):
from pylab import rcParams from sklearn import metrics from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.metrics import plot_confusion_matrix,accuracy_score from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split url = 'C:/Users/Camila/Documents/Tesis/csv/relative/data.csv' eeg_dataset = pd.read_csv(url,error_bad_lines=False) eeg_dataset.head() X = eeg_dataset[['alpha','betha','delta','gamma','theta']].values y = eeg_dataset[['class']].values.ravel() clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(25, 2), random_state=1, max_iter=2500) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.7,test_size = .3, random_state=25) from sklearn.preprocessing import StandardScaler scaler = StandardScaler() # Don't cheat - fit only on training data scaler.fit(X_train) X_train = scaler.transform(X_train) # apply same transformation to test data X_test = scaler.transform(X_test) # doctest: +SKIP clf.fit(X_train, y_train)
h = .02 # step size in the mesh names = [ "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process", "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes", "QDA" ] classifiers = [ KNeighborsClassifier(2), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), GaussianProcessClassifier(1.0 * RBF(1.0)), DecisionTreeClassifier(max_depth=50), RandomForestClassifier(max_depth=50, n_estimators=100, max_features=10), MLPClassifier(alpha=1, max_iter=10000), AdaBoostClassifier(), GaussianNB() ] import scipy.io dataset = scipy.io.loadmat('train_data.mat') train_data = dataset['train_data'] X_train = train_data[:, 0:-1] y_train = train_data[:, -1] dataset = scipy.io.loadmat('test_data.mat') test_data = dataset['test_data'] X_test = test_data[:, 0:-1] y_test = test_data[:, -1]
yeast.loc[:, 'SequenceName'] = enc.fit_transform(yeast.loc[:, 'SequenceName']) yeast.set_index(['SequenceName'], inplace=True) yeast.loc[:, 'ClassDist'] = enc.fit_transform(yeast.loc[:, 'ClassDist']) # -- DV/IV Splitting -- X_adult = adult.drop('income_bin', axis=1) Y_adult = adult.loc[:, 'income_bin'] X_yeast = yeast.drop('ClassDist', axis=1) Y_yeast = yeast.loc[:, 'ClassDist'] # -- Classifier setup -- default_adult_DTree = DecisionTreeClassifier(random_state=13) default_adult_knn = KNeighborsClassifier() default_adult_RFC = RandomForestClassifier(random_state=13) default_adult_MLP = MLPClassifier(max_iter=5000, random_state=13) default_adult_SVC = SVC(random_state=13) default_yeast_DTree = DecisionTreeClassifier(random_state=13) default_yeast_knn = KNeighborsClassifier() default_yeast_RFC = RandomForestClassifier(random_state=13) default_yeast_MLP = MLPClassifier(max_iter=5000, random_state=13) default_yeast_SVC = SVC(random_state=13) adult_DTree = DecisionTreeClassifier(criterion="entropy", max_depth=10, min_samples_leaf=50, min_samples_split=500, random_state=13) adult_knn = KNeighborsClassifier(n_neighbors=30, n_jobs=4) adult_RFC = RandomForestClassifier(max_depth=20, min_samples_split=50,
def set_mlp(self, hidden_layer_sizes): self.mlp_hidden_layer_sizes_list.append(hidden_layer_sizes) self.mlp_clf = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, random_state=1)
def third_generation(X, y, size=200, seed=None): mlp_parameters = list(itertools.product([1, 2, 4, 8, 32, 128],\ [0, 0.2, 0.5, 0.9], [0.1, 0.3, 0.6])) mlp_clf = [ MLPClassifier(hidden_layer_sizes=(h, ), momentum=m, learning_rate_init=a) for (h, m, a) in mlp_parameters ] mlp_name = ['mlp_{0}_{1}_{2}'.format(*param) for param in mlp_parameters] neigbhors_number = [int(i) for i in np.linspace(1, X.shape[0], 40)] weighting_methods = ['uniform', 'distance'] knn_clf = [ KNeighborsClassifier(n_neighbors=nn, weights=w) for (nn, w) in itertools.product(neigbhors_number, weighting_methods) ] knn_name = [ 'knn_{0}_{1}'.format(*param) for param in itertools.product( neigbhors_number, ['uniform', 'distance']) ] C = np.logspace(-3, 7, num=11) degree = [2, 3, 4] gamma = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2] svm_clf_poly = [ SVC(C=c, kernel='poly', degree=d) for (c, d) in itertools.product(C, degree) ] svm_clf_poly_name = [ 'svm_poly_{0}_{1}'.format(*param) for param in itertools.product(C, degree) ] svm_clf_rbf = [ SVC(C=c, kernel='rbf', gamma=g) for (c, g) in itertools.product(C, gamma) ] svm_clf_rbf_name = [ 'svm_rbf_{0}_{1}'.format(*param) for param in itertools.product(C, gamma) ] dt_params = list(itertools.product(['gini', 'entropy'], \ [1, 2, 3, 4, 5, None], \ [None, 'sqrt', 'log2'], \ ['best', 'random'])) dt_clf = [ DecisionTreeClassifier(criterion=c, max_depth=d, max_features=f, splitter=s) for (c, d, f, s) in dt_params ] dt_name = ['dt_{0}_{1}_{2}_{3}'.format(*param) for param in dt_params] et_clf = [ ExtraTreeClassifier(criterion=c, max_depth=d, max_features=f, splitter=s) for (c, d, f, s) in dt_params ] et_name = ['et_{0}_{1}_{2}_{3}'.format(*param) for param in dt_params] ada_params = list(itertools.product([2**i for i in range(1, 14)], \ [1, 2, 3])) ada_dt_clf = [ AdaBoostClassifier(n_estimators=n, base_estimator=DecisionTreeClassifier(max_depth=m)) for (n, m) in ada_params ] ada_et_clf = [ AdaBoostClassifier(n_estimators=n, base_estimator=ExtraTreeClassifier(max_depth=m)) for (n, m) in ada_params ] ada_dt_name = ['ada_dt_{0}_{1}'.format(*param) for param in ada_params] ada_et_name = ['ada_et_{0}_{1}'.format(*param) for param in ada_params] nb_bag_est = 50 nb_bag_stumps = 200 bag_dt = BaggingClassifier(n_estimators=nb_bag_est, base_estimator=DecisionTreeClassifier()) bag_et = BaggingClassifier(n_estimators=nb_bag_est, base_estimator=ExtraTreeClassifier()) bag_stumps = BaggingClassifier( n_estimators=nb_bag_stumps, base_estimator=DecisionTreeClassifier(max_depth=1)) bag_dt.fit(X, y) bag_et.fit(X, y) bag_stumps.fit(X, y) dt_bag_clf = bag_dt.estimators_ et_bag_clf = bag_et.estimators_ stump_bag_clf = bag_stumps.estimators_ dt_bag_name = ['dt_bag_{0}'.format(nb_est) for nb_est in range(nb_bag_est)] et_bag_name = ['et_bag_{0}'.format(nb_est) for nb_est in range(nb_bag_est)] stump_bag_name = [ 'stump_bag_{0}'.format(nb_est) for nb_est in range(nb_bag_stumps) ] bag_dt_clf = [bag_dt] bag_et_clf = [bag_dt] bag_stump_clf = [bag_stumps] bag_dt_name = ['bag_dt_{0}'.format(str(nb_bag_est))] bag_et_name = ['bag_et_{0}'.format(str(nb_bag_est))] bag_stump_name = ['bag_stump_{0}'.format(str(200))] nb_rf = 15 rf = RandomForestClassifier(n_estimators=nb_rf) rf.fit(X, y) dt_rf_clf = rf.estimators_ dt_rf_name = ['dt_rf_{0}'.format(nb_est) for nb_est in range(nb_rf)] log_parameters = list(itertools.product(['l1', 'l2'],\ np.logspace(-5, 9, num=15), [True, False])) log_clf = [ LogisticRegression(penalty=l, C=c, fit_intercept=f) for (l, c, f) in log_parameters ] log_name = ['log_{0}_{1}_{2}'.format(*param) for param in log_parameters] sgd_parameters = list( itertools.product([ 'hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron', 'squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive' ], ['elasticnet'], [True, False], np.arange(0, 1.1, 0.1))) sgd_clf = [ SGDClassifier(loss=l, penalty=p, fit_intercept=f, l1_ratio=l1) for (l, p, f, l1) in sgd_parameters ] sgd_name = [ 'sgd_{0}_{1}_{2}_{3}'.format(*param) for param in sgd_parameters ] pool = mlp_clf + knn_clf + svm_clf_poly + svm_clf_rbf + dt_clf + et_clf + ada_dt_clf + ada_et_clf + \ dt_bag_clf + et_bag_clf + stump_bag_clf + bag_dt_clf + bag_et_clf + bag_stump_clf + dt_rf_clf + \ log_clf + sgd_clf pool_name = mlp_name + knn_name + svm_clf_poly_name + svm_clf_rbf_name + dt_name + et_name + ada_dt_name + \ ada_et_name + dt_bag_name + et_bag_name + stump_bag_name + bag_dt_name + bag_et_name + \ bag_stump_name + dt_rf_name + log_name + sgd_name for model in pool: if not check_model_is_fitted(model, X[0, :].reshape((1, -1))): model.fit(X, y) np.random.seed(seed) order = np.random.permutation(range(len(pool))) estimators = [pool[i] for i in order[:size]] return estimators, pool_name
from sklearn.linear_model import RidgeClassifierCV model = RidgeClassifierCV() classifier(X_train_rare, y_train, X_test_rare, y_test, cats, model) classifier(X_train_freq, y_train, X_test_freq, y_test, cats, model) from sklearn.svm import SVC model = SVC() classifier(X_train_rare, y_train, X_test_rare, y_test, cats, model) classifier(X_train_freq, y_train, X_test_freq, y_test, cats, model) from sklearn.neural_network import MLPClassifier model = MLPClassifier() classifier(X_train, y_train, X_test, y_test, cats, model) classifier(X_train_freq, y_train, X_test_freq, y_test, cats, model) from sklearn.svm import LinearSVC model = LinearSVC() classifier(X_train, y_train, X_test, y_test, cats, model) classifier(X_train_freq, y_train, X_test_freq, y_test, cats, model) from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier() classifier(X_train, y_train, X_test, y_test, cats, model) classifier(X_train_freq, y_train, X_test_freq, y_test, cats, model)
data1 = scipy.io.loadmat('NN_ex4/ex4data1.mat') data2 = scipy.io.loadmat('NN_ex4/ex4weights.mat') X = np.array(data1["X"]) # 5000 samples with 400 parameters(20x20 gray scale) y = data1["y"] # target as 0-9 digits y[y==10] = 0 # convert 10s to 0s # create test and train variables X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2) #create model clf = MLPClassifier(solver = "lbfgs", activation="relu", hidden_layer_sizes=(20, 20)) #train model clf.fit(X_train, y_train) #accuracy predicts = clf.predict(X_test) acc = confusion_matrix(y_test, predicts) def accuracy(cm): diagonal = cm.trace() elements = np.sum(cm) return diagonal/elements
# ]) # Y = pd.DataFrame([ # 1, 1, 1, 1, 0, 0, 0, 0 # ]) train_X = pd.DataFrame(train_X.transpose()) tr_X = train_X.iloc[:, :21600] tr_Y = train_Y.iloc[:21600, :] ts_X = train_X.iloc[:, 21600:] ts_Y = train_Y.iloc[:21600, :] print(tr_X.shape, tr_Y.shape, ts_X.shape, ts_Y.shape) mlpc = MLPClassifier(verbose=False, hidden_layer_sizes=(200, ), activation='relu', max_iter=20000, learning_rate_init=.2, warm_start=True, mini_batch='auto', step_size=50, load_from_file=True, dump_file=True, file_root='nn-relu-very-wide') mlpc.fit(tr_X, tr_Y) # r = mlpc.predict(ts_X) # correct = 0 # for i in zip(r, ts_Y.values.reshape(-1)): # left, truth = i # pre, _ = left # if pre == int(truth): # correct += 1 # else: # print(i)
import pickle from sklearn.neural_network import MLPClassifier train = pickle.load(open('train_pca.pickle', 'rb')) test = pickle.load(open('test_pca.pickle', 'rb')) train_num = 200 test_num = 50 X_train = train[0][:train_num] y_train = train[1][:train_num] X_test = test[0][:test_num] y_test = test[1][:test_num] mlp = MLPClassifier(hidden_layer_sizes=(100, ), max_iter=50, alpha=1e-4, solver='sgd', verbose=10, tol=1e-4, random_state=1, learning_rate_init=.1) mlp.fit(X_train, y_train) print("Training set score: %f" % mlp.score(X_train, y_train)) print("Test set score: %f" % mlp.score(X_test, y_test))
def main(): data, targets = loadData() norm_data = preprocessing.normalize(data) train, test, train_t, test_t = processIris(norm_data, targets) file_reader = FileReader() data_processor = DataProcessor() raw_data = file_reader.read_file("health.txt") h_data, h_data_norm, p_targets = data_processor.process_health(raw_data) p_train, p_test, p_train_t, p_test_t = processIris(h_data, p_targets) iris_network = NeuralNet() iris_network.create_layer(3) iris_network.train_network(train, train_t) iris_predictions = iris_network.predict(test) correct = 0 for i in range(len(test_t)): if iris_predictions[i] == test_t[i]: correct += 1 print("Iris with 3 nodes in hidden layer") print("Iris prediction correct = ", correct, "out of", len(test), "\nAccuracy = ", (correct / len(test_t) * 100)) iris2_network = NeuralNet() iris2_network.create_layer(6) iris2_network.train_network(train, train_t) iris2_predictions = iris2_network.predict(test) correct = 0 for i in range(len(test_t)): if iris2_predictions[i] == test_t[i]: correct += 1 print("Iris with 6 nodes in hidden layer") print("Iris prediction correct = ", correct, "out of", len(test), "\nAccuracy = ", (correct / len(test_t) * 100)) mlp_class = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(6)) mlp_class.fit(train, train_t) mlp_iris_predict = mlp_class.predict(test) correct = 0 for i in range(len(test_t)): if mlp_iris_predict[i] == test_t[i]: correct += 1 print("Iris with 6 nodes in hidden layer SKLEARN MODEL") print("Iris prediction correct = ", correct, "out of", len(test_t), "\nAccuracy = ", (correct / len(test_t) * 100)) pima_network = NeuralNet() pima_network.create_layer(4) pima_network.train_network(p_train, p_train_t) pima_predictions = pima_network.predict(p_test) correct = 0 for i in range(len(p_test_t)): if pima_predictions[i] == p_test_t[i]: correct += 1 print("Pima with 4 nodes in hidden layer") print("Pima prediction correct = ", correct, "out of", len(p_test), "\nAccuracy = ", (correct / len(p_test_t) * 100)) pima2_network = NeuralNet() pima2_network.create_layer(6) pima2_network.train_network(p_train, p_train_t) pima2_predictions = pima2_network.predict(p_test) correct = 0 for i in range(len(p_test_t)): if pima2_predictions[i] == p_test_t[i]: correct += 1 print("Pima with 6 nodes in hidden layer") print("Pima prediction correct = ", correct, "out of", len(p_test), "\nAccuracy = ", (correct / len(p_test_t) * 100)) mlp_pima_class = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(6)) mlp_pima_class.fit(p_train, p_train_t.ravel()) mlp_pima_predict = mlp_pima_class.predict(p_test) correct = 0 for i in range(len(p_test_t)): if mlp_pima_predict[i] == p_test_t[i]: correct += 1 print("Pima with 6 nodes in hidden layer SKLEARN MODEL") print("Pima prediction correct = ", correct, "out of", len(p_test), "\nAccuracy = ", (correct / len(p_test_t) * 100))
data_content = data.content data_label = data.label.tolist() count_vect = CountVectorizer(stop_words='english') csv_file = open(output, "w", newline='') writer = csv.writer(csv_file, delimiter=',') for clf_name in clf_names: if clf_name == 'lr': clf = LogisticRegression() elif clf_name == 'svm': # the kernel can also be 'linear', 'rbf','polynomial','sigmoid', etc. clf = svm.SVC(kernel='linear', probability=True) elif clf_name == 'mlp': clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) elif clf_name == 'nb': clf = MultinomialNB() elif clf_name == 'rf': clf = RandomForestClassifier(oob_score=True, n_estimators=30) else: print('分类器名称仅为\'lr,svm,mlp,nb,rf\'中的一种') # the input data needs to be iterable data_content_matrix = count_vect.fit_transform(data_content) # data_content_matrix_dmr = dr.selectFromLinearSVC(data_content,data_label) # data_content_matrix_dmr = dr.selectFromLinearSVC(data_content_matrix,data_label) # train_content_matrix_input_dmr_smt,train_label_input_smt = get_smote_standard(train_content_matrix_input_dmr,train_label_input) # data_content_matrix_dmr_smt,data_label_smt = get_smoteenn(data_content_matrix_dmr,data_label)
print(np.asarray((unique_elements, counts_elements))) kf = KFold(n_splits=10)#divide o dataset em 10 partes 9 p/ treino e 1 teste a = 0 f = 0 p = 0 r = 0 i = 0 for train_index, test_index in kf.split(X): i+=1 X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] clf = MLPClassifier(solver='lbfgs').fit(X_train,y_train.ravel()) y_pred = clf.predict(X_test) a += clf.score(X_test,y_test) f += metrics.f1_score(y_test, y_pred, average='macro') p += metrics.precision_score(y_test, y_pred, average='macro') r += metrics.recall_score(y_test, y_pred, average='macro') average_accuracy = a/i average_f1_score = f/i average_precision = p/i average_recall = r/i print('Accuracy: ') print(average_accuracy) print('F1 - Score:')
def class34(filename, i): ''' This function performs experiment 3.4 Parameters filename : string, the name of the npz file from Task 2 i: int, the index of the supposed best classifier (from task 3.1) ''' i = i - 1 data = np.load(filename)["arr_0"] X = [] y = [] for d in data: X.append(d[0:173]) y.append(d[173]) X = np.array(X) y = np.array(y) classifiers = [ SVC(kernel='linear', max_iter=1000), SVC(gamma=2, max_iter=1000), RandomForestClassifier(max_depth=5, n_estimators=10), MLPClassifier(alpha=0.05), AdaBoostClassifier() ] kf = KFold(n_splits=5, shuffle=True) # global list to store result fold_test_result_list = [] p_values = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] accuracy_list = [] for clf in classifiers: classifier = clone(clf) classifier.fit(X_train, y_train) prediction = classifier.predict(X_test) c_m = confusion_matrix(y_test, prediction) accuracy_list.append(accuracy(c_m)) fold_test_result_list.append(accuracy_list) vertical_result = np.transpose(fold_test_result_list) # compare the result with the best classifier for j in range(len(classifiers)): if i != j: S = stats.ttest_rel(vertical_result[i], vertical_result[j]) p_values.append(S[1]) with open('a1_3.4.csv', 'w', newline='') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') for result in fold_test_result_list: spamwriter.writerow(result) spamwriter.writerow(p_values) spamwriter.writerow([ "The accuracy of the cross-validation's result may lead different result as part3.1 " + "It could be caused by the variance of the data. In the 3.1, there are only one set of training" " and testing data. The form of the trianing set may lead to bias." ])
# data.append(tuple(["Dataset-" + str(c),"","","","","","","","","","","","","","","","","","","",""])) # data2.append(tuple(["Dataset-" + str(c),"","","","","","","","","","","","","","","","","","","",""])) row = ["$P$"] # print(" &","$P$", end="") for sampler in samplers_array_all: t = "" # precision, recall, f1, rocauc, kappa, gmean = evalSampling(sampler, RandomForestClassifier(max_depth=2, random_state=0), Xtrain, Xtest, ytrain, ytest) # print(precision) try: precision, recall, f1, rocauc, kappa, gmean = evalSampling( sampler, MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(15, 10), batch_size=18, max_iter=300, random_state=1), Xtrain, Xtest, ytrain, ytest) # print(" &", round(precision,3), end="") t = str(round(precision, 3)) except: # print(" &", "N/A", end="") t = "N/A" row.append(t) # print(row) data.append(tuple(row)) # print("\\\\")