def test_gnb_priors(): """Test whether the class prior override is properly used""" clf = GaussianNB(priors=np.array([0.3, 0.7])).fit(X, y) assert_array_almost_equal( clf.predict_proba([[-0.1, -0.1]]), np.array([[0.825303662161683, 0.174696337838317]]), 8) assert_array_almost_equal(clf.class_prior_, np.array([0.3, 0.7]))
def test_check_accuracy_on_digits(): # Non regression test to make sure that any further refactoring / optim # of the NB models do not harm the performance on a slightly non-linearly # separable dataset X, y = load_digits(return_X_y=True) binary_3v8 = np.logical_or(y == 3, y == 8) X_3v8, y_3v8 = X[binary_3v8], y[binary_3v8] # Multinomial NB scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10) assert scores.mean() > 0.86 scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10) assert scores.mean() > 0.94 # Bernoulli NB scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10) assert scores.mean() > 0.83 scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10) assert scores.mean() > 0.92 # Gaussian NB scores = cross_val_score(GaussianNB(), X, y, cv=10) assert scores.mean() > 0.77 scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10) assert scores.mean() > 0.89 scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10) assert scores.mean() > 0.86
def test_predict_on_toy_problem(): """Manually check predicted class labels for toy dataset.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = GaussianNB() X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2], [2.1, 1.4], [3.1, 2.3]]) y = np.array([1, 1, 1, 2, 2, 2]) assert all(clf1.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) assert all(clf2.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) assert all(clf3.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard', weights=[1, 1, 1]) assert all(eclf.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', weights=[1, 1, 1]) assert all(eclf.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2])
def test_gnb_prior(): # Test whether class priors are properly set. clf = GaussianNB().fit(X, y) assert_array_almost_equal(np.array([3, 3]) / 6.0, clf.class_prior_, 8) clf.fit(X1, y1) # Check that the class priors sum to 1 assert_array_almost_equal(clf.class_prior_.sum(), 1)
def test_gnb_pfit_wrong_nb_features(): """Test whether an error is raised when the number of feature changes between two partial fit""" clf = GaussianNB() # Fit for the first time the GNB clf.fit(X, y) # Partial fit a second time with an incoherent X assert_raises(ValueError, clf.partial_fit, np.hstack((X, X)), y)
def test_gnb_priors_sum_isclose(): # test whether the class prior sum is properly tested""" X = np.array([[-1, -1], [-2, -1], [-3, -2], [-4, -5], [-5, -4], [1, 1], [2, 1], [3, 2], [4, 4], [5, 5]]) priors = np.array( [0.08, 0.14, 0.03, 0.16, 0.11, 0.16, 0.07, 0.14, 0.11, 0.0]) Y = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) clf = GaussianNB(priors) # smoke test for issue #9633 clf.fit(X, Y)
def test_gnb_sample_weight(): """Test whether sample weights are properly used in GNB. """ # Sample weights all being 1 should not change results sw = np.ones(6) clf = GaussianNB().fit(X, y) clf_sw = GaussianNB().fit(X, y, sw) assert_array_almost_equal(clf.theta_, clf_sw.theta_) assert_array_almost_equal(clf.sigma_, clf_sw.sigma_) # Fitting twice with half sample-weights should result # in same result as fitting once with full weights sw = rng.rand(y.shape[0]) clf1 = GaussianNB().fit(X, y, sample_weight=sw) clf2 = GaussianNB().partial_fit(X, y, classes=[1, 2], sample_weight=sw / 2) clf2.partial_fit(X, y, sample_weight=sw / 2) assert_array_almost_equal(clf1.theta_, clf2.theta_) assert_array_almost_equal(clf1.sigma_, clf2.sigma_) # Check that duplicate entries and correspondingly increased sample # weights yield the same result ind = rng.randint(0, X.shape[0], 20) sample_weight = np.bincount(ind, minlength=X.shape[0]) clf_dupl = GaussianNB().fit(X[ind], y[ind]) clf_sw = GaussianNB().fit(X, y, sample_weight) assert_array_almost_equal(clf_dupl.theta_, clf_sw.theta_) assert_array_almost_equal(clf_dupl.sigma_, clf_sw.sigma_)
def test_transform(): """Check transform method of VotingClassifier on toy dataset.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = GaussianNB() X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) y = np.array([1, 1, 2, 2]) eclf1 = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft').fit(X, y) eclf2 = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', flatten_transform=True).fit(X, y) eclf3 = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', flatten_transform=False).fit(X, y) assert_array_equal(eclf1.transform(X).shape, (4, 6)) assert_array_equal(eclf2.transform(X).shape, (4, 6)) assert_array_equal(eclf3.transform(X).shape, (3, 4, 2)) assert_array_almost_equal(eclf1.transform(X), eclf2.transform(X)) assert_array_almost_equal( eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)), eclf2.transform(X) )
def test_set_params(): """set_params should be able to set estimators""" clf1 = LogisticRegression(random_state=123, C=1.0) clf2 = RandomForestClassifier(random_state=123, max_depth=None) clf3 = GaussianNB() eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft', weights=[1, 2]) assert 'lr' in eclf1.named_estimators assert eclf1.named_estimators.lr is eclf1.estimators[0][1] assert eclf1.named_estimators.lr is eclf1.named_estimators['lr'] eclf1.fit(X, y) assert 'lr' in eclf1.named_estimators_ assert eclf1.named_estimators_.lr is eclf1.estimators_[0] assert eclf1.named_estimators_.lr is eclf1.named_estimators_['lr'] eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft', weights=[1, 2]) eclf2.set_params(nb=clf2).fit(X, y) assert not hasattr(eclf2, 'nb') assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) assert eclf2.estimators[0][1].get_params() == clf1.get_params() assert eclf2.estimators[1][1].get_params() == clf2.get_params() eclf1.set_params(lr__C=10.0) eclf2.set_params(nb__max_depth=5) assert eclf1.estimators[0][1].get_params()['C'] == 10.0 assert eclf2.estimators[1][1].get_params()['max_depth'] == 5 assert (eclf1.get_params()["lr__C"] == eclf1.get_params()["lr"].get_params()['C'])
def test_gnb_naive_bayes_scale_invariance(): # Scaling the data should not change the prediction results iris = load_iris() X, y = iris.data, iris.target labels = [ GaussianNB().fit(f * X, y).predict(f * X) for f in [1E-10, 1, 1E10] ] assert_array_equal(labels[0], labels[1]) assert_array_equal(labels[1], labels[2])
def test_majority_label_iris(): """Check classification by majority label on dataset iris.""" clf1 = LogisticRegression(solver='liblinear', random_state=123) clf2 = RandomForestClassifier(n_estimators=10, random_state=123) clf3 = GaussianNB() eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard') scores = cross_val_score(eclf, X, y, scoring='accuracy') assert_almost_equal(scores.mean(), 0.95, decimal=2)
def test_gnb_check_update_with_no_data(): """ Test when the partial fit is called without any data""" # Create an empty array prev_points = 100 mean = 0. var = 1. x_empty = np.empty((0, X.shape[1])) tmean, tvar = GaussianNB._update_mean_variance(prev_points, mean, var, x_empty) assert tmean == mean assert tvar == var
def test_weights_iris(): """Check classification by average probabilities on dataset iris.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = GaussianNB() eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', weights=[1, 2, 10]) scores = cross_val_score(eclf, X, y, scoring='accuracy') assert_almost_equal(scores.mean(), 0.93, decimal=2)
def test_set_estimator_none(drop): """VotingClassifier set_params should be able to set estimators as None or drop""" # Test predict clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(n_estimators=10, random_state=123) clf3 = GaussianNB() eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 0, 0.5]).fit(X, y) eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 1, 0.5]) eclf2.set_params(rf=drop).fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert dict(eclf2.estimators)["rf"] is drop assert len(eclf2.estimators_) == 2 assert all(isinstance(est, (LogisticRegression, GaussianNB)) for est in eclf2.estimators_) assert eclf2.get_params()["rf"] is drop eclf1.set_params(voting='soft').fit(X, y) eclf2.set_params(voting='soft').fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) msg = 'All estimators are None or "drop". At least one is required!' assert_raise_message( ValueError, msg, eclf2.set_params(lr=drop, rf=drop, nb=drop).fit, X, y) # Test soft voting transform X1 = np.array([[1], [2]]) y1 = np.array([1, 2]) eclf1 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[0, 0.5], flatten_transform=False).fit(X1, y1) eclf2 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[1, 0.5], flatten_transform=False) eclf2.set_params(rf=drop).fit(X1, y1) assert_array_almost_equal(eclf1.transform(X1), np.array([[[0.7, 0.3], [0.3, 0.7]], [[1., 0.], [0., 1.]]])) assert_array_almost_equal(eclf2.transform(X1), np.array([[[1., 0.], [0., 1.]]])) eclf1.set_params(voting='hard') eclf2.set_params(voting='hard') assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]])) assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
def test_gridsearch(): """Check GridSearch support.""" clf1 = LogisticRegression(random_state=1) clf2 = RandomForestClassifier(random_state=1) clf3 = GaussianNB() eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft') params = {'lr__C': [1.0, 100.0], 'voting': ['soft', 'hard'], 'weights': [[0.5, 0.5, 0.5], [1.0, 0.5, 0.5]]} grid = GridSearchCV(estimator=eclf, param_grid=params) grid.fit(iris.data, iris.target)
def test_predict_proba_on_toy_problem(): """Calculate predicted probabilities on toy dataset.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = GaussianNB() X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) y = np.array([1, 1, 2, 2]) clf1_res = np.array([[0.59790391, 0.40209609], [0.57622162, 0.42377838], [0.50728456, 0.49271544], [0.40241774, 0.59758226]]) clf2_res = np.array([[0.8, 0.2], [0.8, 0.2], [0.2, 0.8], [0.3, 0.7]]) clf3_res = np.array([[0.9985082, 0.0014918], [0.99845843, 0.00154157], [0., 1.], [0., 1.]]) t00 = (2*clf1_res[0][0] + clf2_res[0][0] + clf3_res[0][0]) / 4 t11 = (2*clf1_res[1][1] + clf2_res[1][1] + clf3_res[1][1]) / 4 t21 = (2*clf1_res[2][1] + clf2_res[2][1] + clf3_res[2][1]) / 4 t31 = (2*clf1_res[3][1] + clf2_res[3][1] + clf3_res[3][1]) / 4 eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', weights=[2, 1, 1]) eclf_res = eclf.fit(X, y).predict_proba(X) assert_almost_equal(t00, eclf_res[0][0], decimal=1) assert_almost_equal(t11, eclf_res[1][1], decimal=1) assert_almost_equal(t21, eclf_res[2][1], decimal=1) assert_almost_equal(t31, eclf_res[3][1], decimal=1) with pytest.raises( AttributeError, match="predict_proba is not available when voting='hard'"): eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard') eclf.fit(X, y).predict_proba(X)
def test_gnb_partial_fit(): clf = GaussianNB().fit(X, y) clf_pf = GaussianNB().partial_fit(X, y, np.unique(y)) assert_array_almost_equal(clf.theta_, clf_pf.theta_) assert_array_almost_equal(clf.sigma_, clf_pf.sigma_) assert_array_almost_equal(clf.class_prior_, clf_pf.class_prior_) clf_pf2 = GaussianNB().partial_fit(X[0::2, :], y[0::2], np.unique(y)) clf_pf2.partial_fit(X[1::2], y[1::2]) assert_array_almost_equal(clf.theta_, clf_pf2.theta_) assert_array_almost_equal(clf.sigma_, clf_pf2.sigma_) assert_array_almost_equal(clf.class_prior_, clf_pf2.class_prior_)
def test_parallel_fit(): """Check parallel backend of VotingClassifier on toy dataset.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = GaussianNB() X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) y = np.array([1, 1, 2, 2]) eclf1 = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', n_jobs=1).fit(X, y) eclf2 = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', n_jobs=2).fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
def test_gnb(): # Gaussian Naive Bayes classification. # This checks that GaussianNB implements fit and predict and returns # correct values for a simple toy dataset. clf = GaussianNB() y_pred = clf.fit(X, y).predict(X) assert_array_equal(y_pred, y) y_pred_proba = clf.predict_proba(X) y_pred_log_proba = clf.predict_log_proba(X) assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8) # Test whether label mismatch between target y and classes raises # an Error # FIXME Remove this test once the more general partial_fit tests are merged assert_raises(ValueError, GaussianNB().partial_fit, X, y, classes=[0, 1])
names = [ "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process", "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes", "QDA" ] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), GaussianProcessClassifier(1.0 * RBF(1.0)), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), MLPClassifier(alpha=1, max_iter=1000), AdaBoostClassifier(), GaussianNB(), QuadraticDiscriminantAnalysis() ] X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable
X, y = datasets.make_classification(n_samples=100000, n_features=20, n_informative=2, n_redundant=2) train_samples = 100 # Samples used for training the models X_train = X[:train_samples] X_test = X[train_samples:] y_train = y[:train_samples] y_test = y[train_samples:] # Create classifiers lr = LogisticRegression() gnb = GaussianNB() svc = LinearSVC(C=1.0) rfc = RandomForestClassifier() # ############################################################################# # Plot calibration plots plt.figure(figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (gnb, 'Naive Bayes'), (svc, 'Support Vector Classification'), (rfc, 'Random Forest')]: clf.fit(X_train, y_train)
axes[2].set_ylabel("Score") axes[2].set_title("Performance of the model") return plt fig, axes = plt.subplots(3, 2, figsize=(10, 15)) X, y = load_digits(return_X_y=True) title = "Learning Curves (Naive Bayes)" # Cross validation with 100 iterations to get smoother mean test and train # score curves, each time with 20% data randomly selected as a validation set. cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) estimator = GaussianNB() plot_learning_curve(estimator, title, X, y, axes=axes[:, 0], ylim=(0.7, 1.01), cv=cv, n_jobs=4) title = r"Learning Curves (SVM, RBF kernel, $\gamma=0.001$)" # SVC is more expensive so we do a lower number of CV iterations: cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) estimator = SVC(gamma=0.001) plot_learning_curve(estimator, title,
def test_gnb_wrong_nb_priors(): """ Test whether an error is raised if the number of prior is different from the number of class""" clf = GaussianNB(priors=np.array([.25, .25, .25, .25])) assert_raises(ValueError, clf.fit, X, y)
print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f\n" % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout() # Plot calibration curve for Gaussian Naive Bayes plot_calibration_curve(GaussianNB(), "Naive Bayes", 1) # Plot calibration curve for Linear SVC plot_calibration_curve(LinearSVC(max_iter=10000), "SVC", 2) plt.show()
def test_gnb_neg_priors(): """Test whether an error is raised in case of negative priors""" clf = GaussianNB(priors=np.array([-1., 2.])) assert_raises(ValueError, clf.fit, X, y)
def test_gnb_prior_greater_one(): """Test if an error is raised if the sum of prior greater than one""" clf = GaussianNB(priors=np.array([2., 1.])) assert_raises(ValueError, clf.fit, X, y)
def test_gnb_prior_large_bias(): """Test if good prediction when class prior favor largely one class""" clf = GaussianNB(priors=np.array([0.01, 0.99])) clf.fit(X, y) assert clf.predict([[-0.1, -0.1]]) == np.array([2])
n_features=2, cluster_std=1.0, centers=centers, shuffle=False, random_state=42) y[:n_samples // 2] = 0 y[n_samples // 2:] = 1 sample_weight = np.random.RandomState(42).rand(y.shape[0]) # split train, test for calibration X_train, X_test, y_train, y_test, sw_train, sw_test = \ train_test_split(X, y, sample_weight, test_size=0.9, random_state=42) # Gaussian Naive-Bayes with no calibration clf = GaussianNB() clf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights prob_pos_clf = clf.predict_proba(X_test)[:, 1] # Gaussian Naive-Bayes with isotonic calibration clf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic') clf_isotonic.fit(X_train, y_train, sw_train) prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1] # Gaussian Naive-Bayes with sigmoid calibration clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid') clf_sigmoid.fit(X_train, y_train, sw_train) prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1] print("Brier scores: (the smaller the better)")
# License: BSD 3 clause RANDOM_STATE = 42 FIG_SIZE = (10, 7) features, target = load_wine(return_X_y=True) # Make a train/test split using 30% test size X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.30, random_state=RANDOM_STATE) # Fit to data and predict using pipelined GNB and PCA. unscaled_clf = make_pipeline(PCA(n_components=2), GaussianNB()) unscaled_clf.fit(X_train, y_train) pred_test = unscaled_clf.predict(X_test) # Fit to data and predict using pipelined scaling, GNB and PCA. std_clf = make_pipeline(StandardScaler(), PCA(n_components=2), GaussianNB()) std_clf.fit(X_train, y_train) pred_test_std = std_clf.predict(X_test) # Show prediction accuracies in scaled and unscaled data. print('\nPrediction accuracy for the normal test dataset with PCA') print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred_test))) print('\nPrediction accuracy for the standardized test dataset with PCA') print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred_test_std)))
mean = X_train.mean(axis=0) std = X_train.std(axis=0) mean[10:] = 0.0 std[10:] = 1.0 X_train = (X_train - mean) / std X_test = (X_test - mean) / std return X_train, X_test, y_train, y_test ESTIMATORS = { 'GBRT': GradientBoostingClassifier(n_estimators=250), 'ExtraTrees': ExtraTreesClassifier(n_estimators=20), 'RandomForest': RandomForestClassifier(n_estimators=20), 'CART': DecisionTreeClassifier(min_samples_split=5), 'SGD': SGDClassifier(alpha=0.001), 'GaussianNB': GaussianNB(), 'liblinear': LinearSVC(loss="l2", penalty="l2", C=1000, dual=False, tol=1e-3), 'SAG': LogisticRegression(solver='sag', max_iter=2, C=1000) } if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--classifiers', nargs="+", choices=ESTIMATORS, type=str, default=['liblinear', 'GaussianNB', 'SGD', 'CART'],