def selectParametersForMLPC(a, b, c, d): """http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html http://scikit-learn.org/stable/modules/grid_search.html#grid-search""" model = MLPC() parameters = { 'verbose': [False], 'activation': ['logistic', 'relu'], 'max_iter': [1000, 2000], 'learning_rate': ['constant', 'adaptive'] } accuracy_scorer = make_scorer(accuracy_score) grid_obj = GridSearchCV(model, parameters, scoring=accuracy_scorer) grid_obj = grid_obj.fit(a, b) model = grid_obj.best_estimator_ model.fit(a, b) print('Selected Parameters for Multi-Layer Perceptron NN:\n') print(model) print('') # predictions = model.predict(c) # print(accuracy_score(d, predictions)) # print('Logistic Regression - Training set accuracy: %s' % accuracy_score(d, predictions)) kfold = model_selection.KFold(n_splits=10) accuracy = model_selection.cross_val_score(model, a, b, cv=kfold, scoring='accuracy') mean = accuracy.mean() stdev = accuracy.std() print('SKlearn Multi-Layer Perceptron - Training set accuracy: %s (%s)' % (mean, stdev)) print('')
def train(self): classifiers = [["Rfc", Rfc(criterion="entropy", n_estimators=100)], ["knn", Knn(10, algorithm="auto")], ["svc", SVC(kernel="linear", C=0.025, verbose=True)], [ "MLPC", MLPC(activation='identity', learning_rate_init=0.01, hidden_layer_sizes=(3, 2, 2), learning_rate='adaptive', solver='adam', verbose=True, max_iter=100) ]] def dump_Data(fileName, model): try: f = open(PathFile.PREDICRFILE + fileName + ".pkl", "wb") pickle.dump(model, f) f.close() print(fileName, 'Dump_file OK...') except IOError as e: print(e) for name, model in classifiers: model.fit(self.X, self.z.values.ravel()) dump_Data(name, model) return model
def MLPC_pack(xtrain, xtest, ytrain): model = MLPC(hidden_layer_sizes=(5, 2), solver='adam', alpha=1e-5, random_state=11) model.fit(xtrain, ytrain) ypre = model.predict(xtest) return ypre
def fit_predict(self, dfit, dpre, tournament): clf = MLPC(hidden_layer_sizes=self.p['layers'], alpha=self.p['alpha'], activation=self.p['activation'], learning_rate_init=self.p['learn'], random_state=self.p['seed'], max_iter=200) clf.fit(dfit.x, dfit.y[tournament]) yhat = clf.predict_proba(dpre.x)[:, 1] return dpre.ids, yhat
def get_models(dataset): if dataset in ["mnist12", "mnist28"]: classifiers = [(DTC(max_depth=30, class_weight='balanced'), "Decision Tree (max_depth=30)"), (LRC(solver='lbfgs', n_jobs=2, multi_class="auto", class_weight='balanced', max_iter=50), "Logistic Regression"), (MLPC((100, ), max_iter=50), "MLP (100)")] return classifiers if dataset in ['adult']: classifiers = [(DTC(max_depth=15, class_weight='balanced'), "Decision Tree (max_depth=20)"), (ABC(), "Adaboost (estimator=50)"), (LRC(solver='lbfgs', n_jobs=2, class_weight='balanced', max_iter=50), "Logistic Regression"), (MLPC((50, ), max_iter=50), "MLP (50)")] return classifiers if dataset in ['census', 'credit']: classifiers = [ (DTC(max_depth=30, class_weight='balanced'), "Decision Tree (max_depth=30)"), (ABC(), "Adaboost (estimator=50)"), (MLPC((100, ), max_iter=50), "MLP (100)"), ] return classifiers if dataset in ['intrusion', 'covtype']: classifiers = [ (DTC(max_depth=30, class_weight='balanced'), "Decision Tree (max_depth=30)"), (MLPC((100, ), max_iter=50), "MLP (100)"), ] return classifiers if dataset in ['news']: regressors = [(LRR(), "Linear Regression"), (MLPR((100, ), max_iter=50), "MLP (100)")] return regressors assert 0
def MLPClassifier(trainData, trainLable, testData, testLable): clf = MLPC(solver='adam', activation='relu', alpha=1e-4, random_state=1, max_iter=200, learning_rate_init=.1) clf.fit(trainData, trainLable) pickle.dump(clf, open('./models/MLPClassifier.pkl', 'wb')) predict = clf.predict(testData) return calculateScores(testLable, predict)
def runVotingClassifier(a, b, c, d): """http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.VotingClassifier.html http://scikit-learn.org/stable/modules/ensemble.html#voting-classifier""" global votingC, mean, stdev # eventually I should get rid of these global variables and use classes instead. in this case i need these variables for the submission function. votingC = VotingClassifier(estimators=[('LSVM', LinearSVC(C=0.0001, class_weight=None, dual=True, fit_intercept=True, intercept_scaling=1, loss='squared_hinge', max_iter=1000, multi_class='ovr', penalty='l2', random_state=None, tol=0.0001, verbose=0)), ('MLPC', MLPC(activation='logistic', alpha=0.0001, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(100, ), learning_rate='constant', learning_rate_init=0.001, max_iter=2000, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=None, shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False))], voting='hard') votingC = votingC.fit(a, b) kfold = model_selection.KFold(n_splits=10) accuracy = model_selection.cross_val_score(votingC, a, b, cv=kfold, scoring='accuracy') meanC = accuracy.mean() stdevC = accuracy.std() print('Ensemble Voting Method - Training set accuracy: %s (%s)' % (meanC, stdevC)) print('') return votingC, meanC, stdevC
def learnData(xData,yData,f_obj,MLtype): f_obj.write('Accuracy for {}:\n'.format(MLtype)) for test in [0.10,0.15,0.20,0.25]: xData_train,xData_test,yData_train,yData_test = tts(xData,yData,test_size=test,random_state=42) if(MLtype=='LSVC'): clf = LSVC() if(MLtype=='LR'): clf = LR() if(MLtype=='MNB'): clf = MNB() else: clf = MLPC() clf.fit(xData_train,yData_train) score = clf.score(xData_test,yData_test) f_obj.write('\ttest partition {} yields {} accuracy\n'.format(test,score)) f_obj.write('\n')
def runMLPC(a, b, c, d): classifier = MLPC(activation='relu', max_iter=1000) classifier.fit(a, b) kfold = model_selection.KFold(n_splits=10) accuracy = model_selection.cross_val_score(classifier, a, b, cv=kfold, scoring='accuracy') mean = accuracy.mean() stdev = accuracy.std() print( 'SKlearn Multi-layer Perceptron NN - Training set accuracy: %s (%s)' % (mean, stdev)) print('')
def __init__( self, hidden_layer_sizes=(100, ), activation='relu', # ‘identity’, ‘logistic’, ‘tanh’, ‘relu’ solver="adam", # ‘lbfgs’, ‘sgd’, ‘adam’ alpha=0.0001, # l2 penalty param batch_size="auto", learning_rate="constant", # ‘constant’, ‘invscaling’, ‘adaptive’ learning_rate_init=0.001, power_t=0.5, # exponent for exp. lr decay max_iter=200, # max epochs shuffle=True, # shuffle batches tol=1e-4, # tolerance for loss/score decrease/increase momentum=0.9, # momentum for sgd nesterovs_momentum=True, early_stopping=False, # stops when n_iter_no_change epochs yielded no increase n_iter_no_change=10, validation_fraction=0.1, # percentage of training set used for validation beta_1=0.9, # beta1 for adam beta_2=0.999, # beta2 for adam epsilon=1e-8 # adam numerical stability ): self._clf = MLPC(hidden_layer_sizes=hidden_layer_sizes, activation=activation, solver=solver, alpha=alpha, batch_size=batch_size, learning_rate=learning_rate, learning_rate_init=learning_rate_init, power_t=power_t, max_iter=max_iter, shuffle=shuffle, tol=tol, early_stopping=early_stopping, n_iter_no_change=n_iter_no_change, momentum=momentum, nesterovs_momentum=nesterovs_momentum, validation_fraction=validation_fraction, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon) self._last_score = None self._model_path = None
def __init__(self, hidden_layer_sizes=100, activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10, max_fun=15000): self.tol = tol self.n_iter_no_change = n_iter_no_change self.momentum = momentum self.nesterovs_momentum = nesterovs_momentum self.beta_2 = beta_2 self.hidden_layer_sizes = hidden_layer_sizes self.alpha = alpha self.max_fun = max_fun self.beta_1 = beta_1 self.activation = activation self.early_stopping = early_stopping self.epsilon = epsilon self.warm_start = warm_start self.learning_rate_init = learning_rate_init self.solver = solver self.shuffle = shuffle self.random_state = random_state self.max_iter = max_iter self.batch_size = batch_size self.validation_fraction = validation_fraction self.learning_rate = learning_rate self.power_t = power_t self.model = MLPC(validation_fraction=self.validation_fraction, activation=self.activation, learning_rate=self.learning_rate, alpha=self.alpha, beta_1=self.beta_1, solver=self.solver, learning_rate_init=self.learning_rate_init, hidden_layer_sizes=self.hidden_layer_sizes, epsilon=self.epsilon, nesterovs_momentum=self.nesterovs_momentum, max_fun=self.max_fun, momentum=self.momentum, shuffle=self.shuffle, n_iter_no_change=self.n_iter_no_change, early_stopping=self.early_stopping, power_t=self.power_t, beta_2=self.beta_2, warm_start=self.warm_start, tol=self.tol, batch_size=self.batch_size, max_iter=self.max_iter, random_state=self.random_state)
def __init__(self, featureset=None, target=None, mode='predict', path=''): if (mode == 'train'): self.__svm = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) self.__svr = SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto', kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) self.__nusvm = NuSVC(cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', max_iter=-1, nu=0.5, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) self.__nusvr = NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='auto', kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=False) self.__linsvm = LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True, intercept_scaling=1, loss='squared_hinge', max_iter=1000, multi_class='ovr', penalty='l2', random_state=None, tol=0.0001, verbose=0) self.__linsvr = LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True, intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000, random_state=None, tol=0.0001, verbose=0) self.__mlpc = MLPC(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(100, 25), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) self.__mlpr = MLPR(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(100, 25), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=None, shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) self.__dtc = DTC(class_weight=None, criterion='gini', max_depth=None, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, random_state=None, splitter='best') self.__dtr = DTR(criterion='mse', max_depth=None, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, random_state=None, splitter='best') self.__rfc = RFC(bootstrap=True, class_weight=None, criterion='gini', max_depth=100, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False) self.__rfr = RFR(bootstrap=True, criterion='mse', max_depth=None, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False) (self.__svm, self.__svr, self.__nusvm, self.__nusvr, self.__linsvm, self.__linsvr, self.__mlpc, self.__mlpr, self.__dtc, self.__dtr, self.__rfc, self.__rfr) = self.__trainAll(X=list(featureset), Y=list(target)) self.__saveModelsToFile(path) else: self.__svm = joblib.load(path + 'Mel_SVM.pkl') self.__svr = joblib.load(path + 'Mel_SVR.pkl') self.__nusvm = joblib.load(path + 'Mel_NuSVM.pkl') self.__nusvr = joblib.load(path + 'Mel_NuSVR.pkl') self.__linsvm = joblib.load(path + 'Mel_LinSVM.pkl') self.__linsvr = joblib.load(path + 'Mel_LinSVR.pkl') self.__mlpc = joblib.load(path + 'Mel_MLPC.pkl') self.__mlpr = joblib.load(path + 'Mel_MLPR.pkl') self.__dtc = joblib.load(path + 'Mel_DTC.pkl') self.__dtr = joblib.load(path + 'Mel_DTR.pkl') self.__rfc = joblib.load(path + 'Mel_RFC.pkl') self.__rfr = joblib.load(path + 'Mel_RFR.pkl')
""" #####RFC pipes##### """ rfc_norm_pipe = mp(MinMaxScaler(), RFC(random_state=47)) rfc_stand_pipe = mp(StandardScaler(), RFC(random_state=47)) rfc_pca_pipe = mp(PCA(), RFC()) """ #####SVC pipes##### """ svc_norm_pipe = mp(MinMaxScaler(), SVC()) svc_stand_pipe = mp(StandardScaler(), SVC()) svc_pca_pipe = mp(PCA(), SVC()) """ #####MLPC pipes##### """ mlpc_norm_pipe = mp(MinMaxScaler(), MLPC(random_state=47)) mlpc_stand_pipe = mp(StandardScaler(), MLPC(random_state=47)) mlpc_pca_pipe = mp(PCA(), MLPC()) """ #####kNN grid##### """ kNN_param_grid = { 'kneighborsclassifier__n_neighbors': [1, 2, 3, 4, 5], 'kneighborsclassifier__weights': ['uniform', 'distance'], 'kneighborsclassifier__p': [1, 2, 3] } """ Test set score: 0.12 Best parameters: {'kneighborsclassifier__n_neighbors': 1, 'kneighborsclassifier__p': 1, 'kneighborsclassifier__weights': 'uniform'} """ kNN_norm_grid = GSCV(knn_norm_pipe, kNN_param_grid, scoring='f1', cv=5)
def fit_model(features, sumstats, train_genes, test_genes, model='logit'): """ Fit classifier to train_genes and calculate RMSE on test_genes """ all_genes = train_genes + test_genes # Join sumstats with features for logistic regression, subset to # genes of interest, and drop genes with NaN BFDPs full_df = sumstats.merge(features, how='left', left_index=True, right_index=True) full_df = full_df.loc[full_df.index.isin(all_genes), :].dropna() train_df = full_df.loc[full_df.index.isin(train_genes), :].\ drop(labels='chrom', axis=1) test_df = full_df.loc[full_df.index.isin(test_genes), :].\ drop(labels='chrom', axis=1) # Instantiate classifier dependent on model if model == 'logit': grid_params = { 'C': [10**x for x in range(-2, 3, 1)], 'l1_ratio': [x / 10 for x in range(0, 11, 1)] } base_class = logit(solver='saga', penalty='elasticnet') elif model == 'svm': grid_params = {'C': [10**x for x in range(-2, 2, 1)]} base_class = SVC(random_state=0, probability=True, break_ties=True, kernel='rbf') elif model == 'randomforest': grid_params = { 'n_estimators': [50, 100, 500], 'criterion': ['gini', 'entropy'] } base_class = RFC(random_state=0, bootstrap=True, oob_score=True) elif model == 'lda': grid_params = { 'shrinkage': [None, 0, 0.5, 1, 'auto'], 'solver': ['svd', 'lsqr', 'eigen'] } base_class = LDAC() elif model == 'naivebayes': grid_params = {'var_smoothing': [10**x for x in range(-4, -11, -1)]} base_class = GNBC() elif model == 'neuralnet': grid_params = { 'hidden_layer_sizes': [(10, 5, 2), (20, 10, 5), (20, 10, 5, 2), (50, 20, 10), (50, 20, 10, 5), (50, 20, 10, 5, 2)], 'alpha': [10**x for x in range(-4, 5, 1)] } base_class = MLPC(activation='relu', solver='adam', early_stopping=True, random_state=0) elif model == 'gbdt': grid_params = {'n_estimators': [50, 100], 'subsample': [0.5, 1]} base_class = GBDT(random_state=0) elif model == 'knn': grid_params = { 'n_neighbors': [10, 50, 100, 500], 'weights': ['uniform', 'distance'], 'leaf_size': [5, 10, 25, 50, 100] } base_class = KNN() # Learn best parameters for classifier using cross-validated grid search classifier = GridSearchCV(base_class, grid_params, verbose=1, n_jobs=-1) # Fit sklearn model & predict on test set # (Models parameterized by grid search need to be treated separately) if isinstance(classifier, GridSearchCV): fitted_model = classifier.fit(train_df.drop(labels='bfdp', axis=1), np.round(train_df.bfdp)).best_estimator_ else: fitted_model = classifier.fit(train_df.drop(labels='bfdp', axis=1), np.round(train_df.bfdp)) test_bfdps = pd.Series(fitted_model.predict_proba( test_df.drop(labels='bfdp', axis=1))[:, 1], name='pred', index=test_df.index) # Compute RMSE of bfdps for test set test_vals = test_df.merge(test_bfdps, left_index=True, right_index=True).\ loc[:, 'bfdp pred'.split()] test_rmse = rmse(test_vals.to_records(index=False)) return fitted_model, test_rmse
from sklearn.metrics import log_loss # grid search cross validation from sklearn.model_selection import GridSearchCV # ignore ConverenceWarning import warnings from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings("ignore", category=ConvergenceWarning) ################################## ## 3.1 train and test models using GridSearchCV models = { 'DT': DTC(), 'LR': LR(), 'MLP': MLPC(), 'SVC': SVC(), 'NB': NB(), 'KNN': KNNC(), 'Bagging': BaggingC(), 'RF': RFC(), 'AdaBoost': AdaBoostC(), 'GB': GBC(), 'XGB': XGB(), } param_dict = { # 0.67 {'max_depth': 1, 'max_leaf_nodes': None, 'min_samples_leaf': 1, 'min_samples_split': 2} 'DT': { 'max_depth': [1,2,3,None], 'max_leaf_nodes': [4,6,8,10,None],
print(count) X1 = X1.reshape(int(X1.shape[0] / 50), 50) X2 = X2.reshape(int(X2.shape[0] / 50), 50) X = np.concatenate((X1, X2), axis=1) X = np.nan_to_num(X) y = train_df['project_is_approved'].as_matrix() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=47) mlpc_norm_pipe = mp(MinMaxScaler(), MLPC(random_state=47)) mlp_param_grid1 = { 'mlpclassifier__hidden_layer_sizes': [10, 100, (10, 10), (100, 100)], 'mlpclassifier__activation': ['identity', 'logistic', 'tanh', 'relu'], 'mlpclassifier__solver': ['lbfgs', 'sgd', 'adam'] } mlp_param_grid2 = { 'hidden_layer_sizes': [10, 100, (10, 10), (100, 100)], 'activation': ['identity', 'logistic', 'tanh', 'relu'], 'solver': ['lbfgs', 'sgd', 'adam'] } mlp_norm_grid = GSCV(mlpc_norm_pipe, mlp_param_grid1, scoring='f1', cv=5) mlp_norm_grid.fit(X_train, y_train) print("Test set score: {:.2f}".format(mlp_norm_grid.score(X_test, y_test)))
from sklearn.neural_network import MLPClassifier as MLPC from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split filename = '/usr/src/app/sentiment/models/pickles/MLPC.pickle' if isfile(filename) == False: train, test = train_test_split(utils.read_data(), test_size=0.2) train_embeddings = utils.combined_embeddings(train['text'].tolist()) test_embeddings = utils.combined_embeddings(test['text'].tolist()) clf = MLPC( hidden_layer_sizes=(256), learning_rate='adaptive', max_iter=1000 ) clf.fit(train_embeddings, train['sentiment']) prediction = clf.predict(test_embeddings) report = classification_report(test['sentiment'], prediction) print(report) with open(filename, 'wb') as f: pickle.dump(clf, f) else: print('Already Trained!')
X = data['data'] y = data['target'] # Dividindo um porcentagem de 20% do data set para test e 80% para treino X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) scaler = StandardScaler() # Convertendo para uma escala padrao de acordo com os atributos de treino scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) # Criando a RNA com 3 layers de 100 neuronios cada uma, e com 1000 epocas clf = MLPC(hidden_layer_sizes=(100, 100, 100), max_iter=1000) # Criando o modelo atraves do treinamento clf.fit(X_train, y_train) # Classificando os atributos do teste predictions = clf.predict(X_test) # Calculando o erro medio quadratico mean = mean_squared_error(y_test, predictions) print(mean) # Criando uma matriz de confusao confusion_m = pd.DataFrame(confusion_matrix(y_test, predictions), columns=['Benigno', 'Maligno'], index=['Benigno', 'Maligno'])
for current_para in parameter_set: best_classifier = None highest_accuracy = 0 """ get current parameters for the classifier """ current_alpha = current_para[0] current_activition_function = current_para[1] current_hidden_layer = current_para[2] current_iteration = max_iteration[3] fold_num = 0 for i in range(0, 5): fold_num = fold_num + 1 """ build up a classifier """ current_classifier = MLPC(hidden_layer_sizes = current_hidden_layer, activation = current_activition_function, alpha = current_alpha, max_iter = current_iteration) """ get training and validation set """ current_training_feature, current_training_label, current_validation_feature, current_validation_label = five_folds(training_data, i) """ training """ current_classifier.fit(current_training_feature, current_training_label) """ validation """ correct_num = 0 number_of_instance_in_validation_set = len(current_validation_feature) for i in range(number_of_instance_in_validation_set): if current_classifier.predict(current_validation_feature[i].reshape(1, -1)) == current_validation_label[i]: correct_num = correct_num + 1 print (" Correct number for this time is ", correct_num)
dtc_people.score(X_test_people_stand, y_test_people))) ###DONE### rfc_people = RFC(n_estimators=100, max_depth=25, bootstrap=False, random_state=37).fit(X_train_people_norm, y_train_people) print("Test set score of RFC people: {:.3f}".format( rfc_people.score(X_test_people_norm, y_test_people))) svc_people = SVC(C=4, kernel='linear', random_state=37).fit(X_train_people_stand, y_train_people) print("Test set score of SVC people: {:.3f}".format( svc_people.score(X_test_people_stand, y_test_people))) mlpc_people = MLPC(alpha=.1, random_state=37).fit(X_train_people_nmf, y_train_people) print("Test set score of MLPC people: {:.3f}".format( mlpc_people.score(X_test_people_nmf, y_test_people))) print('people\n') #Mnist ###DONE### knc_mnist = KNC(weights='distance').fit(X_train_mnist_norm, y_train_mnist) print("Test set score of kNN mnist: {:.3f}".format( knc_mnist.score(X_test_mnist_norm, y_test_mnist))) ###DONE### dtc_mnist = DTC(criterion='entropy', max_depth=15, min_samples_split=3, random_state=37).fit(X_train_mnist_nmf, y_train_mnist)
#"DTC", #"GNB", "QDA"] model_types = [LR, RFC, #ABC, MLPC, KNC, SVC, #DTC, #GNB, QDA] models = [LR(), RFC(n_estimators=30), #ABC(), MLPC(), KNC(), SVC(probability=True), #DTC(), #GNB(), QDA()] models2 = copy.deepcopy(models) ### experiment bright students math finance N = 10000 ## 1000 of each group (groups S and T) minority_percent = 0.3 MIN = int(minority_percent * N) MAJ = int((1 - minority_percent) * N) # print(MIN, MAJ)
from sklearn.neural_network import MLPClassifier as MLPC # The data set from MMNIST images, labels = load_mnist() images = images[:1000] labels = labels[:1000] # To apply a classifier on this data, we need to flatten the image, to # turn the data in a (samples, feature) matrix: n_samples = len(images) data = images.reshape((n_samples, -1)) print(data[3]) # Create a classifier: a support vector classifier classifier = MLPC() labels = np.ravel(labels) # We learn the digits on the first half of the digits classifier.fit(data[:n_samples / 2], labels[:n_samples / 2]) # Now predict the value of the digit on the second half: expected = labels[n_samples / 2:] predicted = classifier.predict(data[n_samples / 2:]) print("Classification report for classifier %s:\n%s\n" % (classifier, metrics.classification_report(expected, predicted))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted)) """ images_and_predictions = list(zip(digits.images[n_samples / 2:], predicted))