def callRFC(): ### Feature selection num_to_keep = 9 my_features_list = select_k_best_features(my_dataset, features_list, num_to_keep) ### Make classier pipeline pipeline_rfc = Pipeline(steps=[ ('classifier', RandomForestClassifier(random_state = 42)) ]) parameters_rfc = { 'classifier__max_features': ('sqrt', 1), 'classifier__max_depth': np.arange(3, 8), 'classifier__n_estimators' : (10, 20) } ### Grid search for the optimal parameters precision_n_recall(pipeline_rfc, parameters_rfc, my_dataset, my_features_list) clf = RandomForestClassifier(max_depth = 5, max_features = 'sqrt', n_estimators = 10, random_state = 42) tester_prep(clf, my_dataset, my_features_list)
def callSVC(): ### Feature selection num_to_keep = 8 my_features_list = select_k_best_features(my_dataset, features_list, num_to_keep) ### Make classier pipeline pipeline_svc = Pipeline(steps=[ ('scaler', StandardScaler()), ('classifier', SVC(kernel = 'rbf', random_state = 42, class_weight = 'auto')) ]) parameters_svc = { 'classifier__gamma': 10.0 ** np.arange(-4, 0), 'classifier__C': 10.0 ** np.arange(1, 5) } ### Grid search for the optimal parameters precision_n_recall(pipeline_svc, parameters_svc, my_dataset, my_features_list) clf = Pipeline(steps=[ ('scaler', StandardScaler()), ('classifier', SVC(kernel = 'rbf', C = 1000, gamma = 0.0001, random_state = 42, class_weight = 'auto')) ]) tester_prep(clf, my_dataset, my_features_list)
def callNBC(): ### Feature selection num_to_keep = 8 my_features_list = select_k_best_features(my_dataset, features_list, num_to_keep) clf = GaussianNB() tester_prep(clf, my_dataset, my_features_list)
def callLR(): ### Feature selection num_to_keep = 16 my_features_list = select_k_best_features(my_dataset, features_list, num_to_keep) ### Make classier pipeline pipeline_lrg = Pipeline(steps=[ ('scaler', StandardScaler()), ('classifier', LogisticRegression(tol = 0.001, random_state = 42)) ]) parameters_lrg = { 'classifier__penalty': ('l1', 'l2'), 'classifier__C': 10.0 ** np.arange(-10, -3) } ### Grid search for the optimal parameters precision_n_recall(pipeline_lrg, parameters_lrg, my_dataset, my_features_list) clf = Pipeline(steps=[ ('scaler', StandardScaler()), ('classifier', LogisticRegression(tol = 0.001, C = 10**-8, penalty = 'l2', random_state = 42)) ]) tester_prep(clf, my_dataset, my_features_list)