def get_model(choice='lr', class_weight=None): if choice == 'svc': model = svc(verbose=1, class_weight=class_weight, n_jobs=-1) elif choice == 'lsvc': model = lsvc(class_weight=class_weight, n_jobs=-1) elif choice == 'knn': model = KNeighborsClassifier() elif choice == 'msvm': model = MulticlassSVM(C=0.1, tol=0.01, max_iter=100, random_state=0, verbose=1) elif choice == 'gnb': model = gnb(class_weight=class_weight) elif choice == 'gpc': model = gpc(class_weight=class_weight) elif choice == 'sgdc': model = sgdc(class_weight=class_weight) elif choice == 'rf': model = rf(class_weight=class_weight) # elif choice == 'vw': # model = vw() else: model = lr(class_weight=class_weight) return model
def classification(self, metric, folds, alphas, graph): size = 1.3 * self.report_width // 10 models = {} models["K nearest neighbors classifier K2"] = knnc(n_neighbors=2) models["K nearest neighbors classifier K5"] = knnc(n_neighbors=5) models["K nearest neighbors classifier K10"] = knnc(n_neighbors=10) models["Decision tree classifier"] = dtc() models["Logistic classifier"] = logitc() models["SVM classifier with RBF kernel"] = svc(gamma='scale') models["SVM classifier with linear kernel"] = svc(kernel='linear') models["Gaussian naive bayes"] = gnbc() models["Bernoulli naive bayes"] = bnbc() models["SGD classifier"] = sgdc(max_iter=10000) models["Random forest classifier"] = rfc(n_estimators=100) models["Gradient boosting classifier"] = gbc() self.models = models print('\n') print(self.report_width * '*', '\n*') print('* CLASSIFICATION RESULTS - BEFORE PARAMETERS BOOSTING \n*') kf = StratifiedKFold(n_splits=folds, shuffle=True) results = [] names = [] for model_name in models: cv_scores = cross_val_score(models[model_name], self.Xt_train, self.yt_train.values.ravel(), cv=kf, error_score=np.nan) results.append(cv_scores) names.append(model_name) print(self.report_width * '*', '') report = pd.DataFrame({'Classifier': names, 'Score': results}) report['Score (avg)'] = report.Score.apply(lambda x: x.mean()) report['Score (std)'] = report.Score.apply(lambda x: x.std()) report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)'] report.sort_values(by='Score (avg)', inplace=True, ascending=False) report.drop('Score', axis=1, inplace=True) display(report) print('\n') if graph: fig, ax = plt.subplots(figsize=(size, 0.5 * size)) plt.title('Classifier Comparison') #ax = fig.add_subplot(111) plt.boxplot(results) ax.set_xticklabels(names) plt.xticks(rotation=45) plt.subplots_adjust(hspace=0.0) plt.show() return None
sex = 'F' scaler = StandardScaler() data_partial = data[data['Sex'] == sex].drop('Sex', axis=1) # corr_matrix_f, corr_matrix_m = data_f.corr(), data_m.corr() # plot_corr_matrices(corr_matrix_f, corr_matrix_m) y = data_partial['EmoState'] X = scaler.fit_transform(data_partial.drop('EmoState', axis=1)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=71) models = (('DTC', dtc()), ('SVM', svc(C=10)), ('KNN', knc(n_neighbors=10)), ('SGDC', sgdc()), ('GNBC', gnbc()), ('MLPC', mlpc(max_iter=1000, learning_rate='adaptive'))) results = [] names = [] seed = 13 scoring = 'accuracy' for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed, shuffle=True) cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring) results.append(cv_results)
def classification(self, metric, folds, printt=True, graph=False): size = self.graph_width if len(self.y.iloc[:,0].unique()) > 2: struct = 'multiclass' else: struct = 'binary' # significant model setup differences should be list as different models models = {} models["Linear discriminant analysis"] = ldac() models["Nearest centroid classifier euclidian"] = ncc(metric='euclidean') models["Nearest centroid classifier manhattan"] = ncc(metric='manhattan') models["K nearest neighbors classifier K2"] = knnc(n_neighbors=2) models["K nearest neighbors classifier K5"] = knnc(n_neighbors=5) models["K nearest neighbors classifier K10"] = knnc(n_neighbors=10) models["Decision tree classifier"] = dtc() models["Gaussian naive bayes"] = gnbc() models["Bernoulli naive bayes"] = bnbc(binarize=0.5) models["Multinomial naive bayes"] = mnbc() models["SGD classifier"] = sgdc(max_iter=10000) models["Ridge classifier"] = rc() if len(self.Xt_train) < 10000: models["SVM classifier RBF"] = svc(gamma='scale') models["SVM classifier Linear"] = svc(kernel='linear') models["SVM classifier Poly"] = svc(kernel='poly') if self.Xt_train.shape[0] < 10000 or self.Xt_train.shape[1] < 5: models["Gradient boosting classifier"] = gbc() models["Random forest classifier"] = rfc(n_estimators=100) if struct == 'multiclass': models["Logistic classifier multinomial"] = logitc(multi_class='multinomial', solver='lbfgs') models["Logistic classifier auto"] = logitc(multi_class='auto') models["Logistic One vs Rest"] = ovrc(logitc()) models["Logistic One vs One"] = ovoc(logitc()) if struct == 'binary': models["Logistic classifier"] = logitc(max_iter=2000) self.models = models kf = StratifiedKFold(n_splits=folds, shuffle=True) results = [] names = [] et = [] for model_name in models: start = time.time() cv_scores = cross_val_score(models[model_name], self.Xt_train, self.yt_train, cv=kf, scoring=metric, error_score=np.nan) results.append(cv_scores) names.append(model_name) et.append((time.time() - start)) #print(model_name, time.time() - start) report = pd.DataFrame({'Model': names, 'Score': results, 'Elapsed Time': et}) report['Score (avg)'] = report.Score.apply(lambda x: x.mean()) report['Score (std)'] = report.Score.apply(lambda x: x.std()) report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)'] report.sort_values(by='Score (avg)', inplace=True, ascending=False) report.drop('Score', axis=1, inplace=True) report.reset_index(inplace=True, drop=True) self.report_performance = report if printt: print('\n') print(self.report_width * '*', '\n*') print('* CLASSIFICATION RESULTS - BEFORE PARAMETERS BOOSTING \n*') print(self.report_width * '*', '') print(report) print('\n') if graph: fig, ax = plt.subplots(figsize=(size, 0.5 * size)) plt.title('Classifier Comparison') #ax = fig.add_subplot(111) plt.boxplot(results) ax.set_xticklabels(names) plt.xticks(rotation=45) plt.subplots_adjust(hspace=0.0, bottom=0.25) self.graphs_model.append(fig) plt.show() return None