Beispiel #1
0
class RidgeClassifierImpl():
    def __init__(self,
                 alpha=1.0,
                 fit_intercept=True,
                 normalize=False,
                 copy_X=True,
                 max_iter=None,
                 tol=0.001,
                 class_weight='balanced',
                 solver='auto',
                 random_state=None):
        self._hyperparams = {
            'alpha': alpha,
            'fit_intercept': fit_intercept,
            'normalize': normalize,
            'copy_X': copy_X,
            'max_iter': max_iter,
            'tol': tol,
            'class_weight': class_weight,
            'solver': solver,
            'random_state': random_state
        }

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)
Beispiel #2
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
def get_multi_classifier():
    clf1 = RidgeClassifier()
    clf2 = RandomForestClassifier(n_estimators=10)
    clf3 = LinearDiscriminantAnalysis()
    clf4 = GaussianNB()
    classifier = MultiClassifier([clf1, clf2, clf3, clf4])
    return classifier
Beispiel #4
0
def classify(X_train, X_test, y_train, y_test):

    algorithm = None  # classification
    classification_time_start = MPI.Wtime()
    if rank == 0:
        algorithm = 'ridge'
        clf0 = RidgeClassifier()
        st.fit(clf0, X_train, y_train)
        classification_output = st.predict(clf0, X_test)
        pass
    elif rank == 1:
        algorithm = 'randomForest'
        clf1 = RandomForestClassifier(n_estimators=10)
        st.fit(clf1, X_train, y_train)
        classification_output = st.predict(clf1, X_test)
        pass
    elif rank == 2:
        algorithm = 'lda'
        clf2 = LinearDiscriminantAnalysis()
        st.fit(clf2, X_train, y_train)
        classification_output = st.predict(clf2, X_test)
        pass
    elif rank == 3:
        algorithm = 'GaussianNaiveBayes'
        clf3 = GaussianNB()
        st.fit(clf3, X_train, y_train)
        classification_output = st.predict(clf3, X_test)
        pass
    classification_time_end = MPI.Wtime()
    classification_time = classification_time_end - classification_time_start
    print(
        f'[TIME] Process {rank} finished classification by {algorithm} algorithm with time: {classification_time}'
    )
    return classification_output
Beispiel #5
0
def test_class_weights():
    """
    Test class weights.
    """
    X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0,
                                                                     0.0]])
    y = [1, 1, 1, -1, -1]

    clf = RidgeClassifier(class_weight=None)
    clf.fit(X, y)
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))

    # we give a small weights to class 1
    clf = RidgeClassifier(class_weight={1: 0.001})
    clf.fit(X, y)

    # now the hyperplane should rotate clock-wise and
    # the prediction on this point should shift
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
def _test_ridge_classifiers(filter_):
    n_classes = np.unique(y_iris).shape[0]
    n_features = X_iris.shape[1]
    for reg in (RidgeClassifier(), RidgeClassifierCV()):
        reg.fit(filter_(X_iris), y_iris)
        assert_equal(reg.coef_.shape, (n_classes, n_features))
        y_pred = reg.predict(filter_(X_iris))
        assert_greater(np.mean(y_iris == y_pred), .79)

    cv = KFold(5)
    reg = RidgeClassifierCV(cv=cv)
    reg.fit(filter_(X_iris), y_iris)
    y_pred = reg.predict(filter_(X_iris))
    assert_true(np.mean(y_iris == y_pred) >= 0.8)
Beispiel #7
0
def test_deprecation_warning_dense_cholesky():
    """Tests if DeprecationWarning is raised at instantiation of estimators
    and when ridge_regression is called"""

    warning_class = DeprecationWarning
    warning_message = ("The name 'dense_cholesky' is deprecated."
                       " Using 'cholesky' instead")
    func1 = lambda: Ridge(solver='dense_cholesky')
    func2 = lambda: RidgeClassifier(solver='dense_cholesky')
    X = np.ones([3, 2])
    y = np.zeros(3)
    func3 = lambda: ridge_regression(X, y, alpha=1, solver='dense_cholesky')

    for func in [func1, func2, func3]:
        assert_warns_message(warning_class, warning_message, func)
Beispiel #8
0
def _test_ridge_classifiers(filter_):
    n_classes = np.unique(y_iris).shape[0]
    n_features = X_iris.shape[1]
    for clf in (RidgeClassifier(), RidgeClassifierCV()):
        clf.fit(filter_(X_iris), y_iris)
        assert_equal(clf.coef_.shape, (n_classes, n_features))
        y_pred = clf.predict(filter_(X_iris))
        assert_greater(np.mean(y_iris == y_pred), .79)

    n_samples = X_iris.shape[0]
    cv = KFold(n_samples, 5)
    clf = RidgeClassifierCV(cv=cv)
    clf.fit(filter_(X_iris), y_iris)
    y_pred = clf.predict(filter_(X_iris))
    assert_true(np.mean(y_iris == y_pred) >= 0.8)
Beispiel #9
0
def test_class_weights():
    """
    Test class weights.
    """
    X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
    y = [1, 1, 1, -1, -1]

    clf = RidgeClassifier(class_weight=None)
    clf.fit(X, y)
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))

    # we give a small weights to class 1
    clf = RidgeClassifier(class_weight={1: 0.001})
    clf.fit(X, y)

    # now the hyperplane should rotate clock-wise and
    # the prediction on this point should shift
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
Beispiel #10
0
 def __init__(self,
              alpha=1.0,
              fit_intercept=True,
              normalize=False,
              copy_X=True,
              max_iter=None,
              tol=0.001,
              class_weight='balanced',
              solver='auto',
              random_state=None):
     self._hyperparams = {
         'alpha': alpha,
         'fit_intercept': fit_intercept,
         'normalize': normalize,
         'copy_X': copy_X,
         'max_iter': max_iter,
         'tol': tol,
         'class_weight': class_weight,
         'solver': solver,
         'random_state': random_state
     }
     self._wrapped_model = SKLModel(**self._hyperparams)
Beispiel #11
0
if ",ABC," in Functions:
    models.append(('ABC', AdaBoostClassifier()))
if ",GNB," in Functions:
    models.append(('GNB', GaussianNB()))
if ",QDA," in Functions:
    models.append(('QDA', QuadraticDiscriminantAnalysis()))
if ",GBC," in Functions:
    models.append(('GBC', GradientBoostingClassifier()))
if ",ETC," in Functions:
    models.append(('ETC', ExtraTreeClassifier()))
if ",BC," in Functions:
    models.append(('BC', BaggingClassifier()))
if ",SGDC," in Functions:
    models.append(('SGDC', SGDClassifier()))
if ",RC," in Functions:
    models.append(('RC', RidgeClassifier()))
if ",PAC," in Functions:
    models.append(('PAC', PassiveAggressiveClassifier()))
if ",ETSC," in Functions:
    models.append(('ETSC', ExtraTreesClassifier()))
if ",BNB," in Functions:
    models.append(('BNB', BernoulliNB()))
if ",GM," in Functions:
    models.append(('GM', GaussianMixture()))

from sklearn.model_selection import KFold
from collections import Counter

Predictii = [[] for _ in range(len(Y_Test))]

Accs = []
Beispiel #12
0
		adjusted = pandas.concat((adjusted, adjusted_proba), axis = 1)
	store_csv(adjusted, name + ".csv")

build_audit(DecisionTreeClassifier(random_state = 13, min_samples_leaf = 2), "DecisionTreeAudit", compact = True)
build_audit(BaggingClassifier(DecisionTreeClassifier(random_state = 13, min_samples_leaf = 5), random_state = 13, n_estimators = 3, max_features = 0.5), "DecisionTreeEnsembleAudit")
build_audit(DummyClassifier(strategy = "most_frequent"), "DummyAudit")
build_audit(ExtraTreesClassifier(random_state = 13, min_samples_leaf = 5), "ExtraTreesAudit")
build_audit(GradientBoostingClassifier(random_state = 13, loss = "exponential", init = None), "GradientBoostingAudit", compact = True)
build_audit(OptimalLGBMClassifier(objective = "binary", n_estimators = 37, num_iteration = 17), "LGBMAudit", compact = True)
build_audit(LinearDiscriminantAnalysis(solver = "lsqr"), "LinearDiscriminantAnalysisAudit")
build_audit(LogisticRegressionCV(), "LogisticRegressionAudit")
build_audit(BaggingClassifier(LogisticRegression(), random_state = 13, n_estimators = 3, max_features = 0.5), "LogisticRegressionEnsembleAudit")
build_audit(GaussianNB(), "NaiveBayesAudit")
build_audit(RandomForestClassifier(random_state = 13, min_samples_leaf = 3), "RandomForestAudit", compact = True)
build_audit(RidgeClassifierCV(), "RidgeAudit", with_proba = False)
build_audit(BaggingClassifier(RidgeClassifier(random_state = 13), random_state = 13, n_estimators = 3, max_features = 0.5), "RidgeEnsembleAudit")
build_audit(SVC(), "SVCAudit", with_proba = False)
build_audit(VotingClassifier([("dt", DecisionTreeClassifier(random_state = 13)), ("nb", GaussianNB()), ("lr", LogisticRegression())], voting = "soft", weights = [3, 1, 2]), "VotingEnsembleAudit")
build_audit(OptimalXGBClassifier(objective = "binary:logistic", ntree_limit = 71), "XGBAudit", compact = True)

audit_dict_X = audit_X.to_dict("records")

def build_audit_dict(classifier, name, with_proba = True):
	pipeline = PMMLPipeline([
		("dict-transformer", DictVectorizer()),
		("classifier", classifier)
	])
	pipeline.fit(audit_dict_X, audit_y)
	store_pkl(pipeline, name + ".pkl")
	adjusted = DataFrame(pipeline.predict(audit_dict_X), columns = ["Adjusted"])
	if(with_proba == True):
Beispiel #13
0
linear_support_vector_classifier = svm.LinearSVC(dual=False)
nearest_neighbor_classifier = KNeighborsClassifier()
extra_trees_classifier = ExtraTreesClassifier(n_estimators=256)
bagging_classifier = BaggingClassifier(
    base_estimator=GradientBoostingClassifier(n_estimators=200,
                                              max_features=4),
    max_features=0.5,
    n_jobs=2,
    verbose=1)
gradient_boosting_classifier = GradientBoostingClassifier(n_estimators=200,
                                                          max_features=4,
                                                          learning_rate=0.3,
                                                          verbose=0)
random_forest_classifier = RandomForestClassifier(n_estimators=2)
logistic_regression = LogisticRegression(C=0.5)
ridge_classifier = RidgeClassifier(alpha=0.1, solver='svd')
bayes = MultinomialNB()
sgd = SGDClassifier()
boundary_forest = BoundaryForestClassifier(num_trees=4)

# FEATURE UNION
feature_union = FeatureUnion(transformer_list=[('PCA', pca)])

# PIPE DEFINITION
classifier = Pipeline(steps=[(
    'selector',
    FeatureSelector()), ('minmax',
                         MinMaxScaler()), ('estimator', logistic_regression)])
print('Successfully prepared classifier pipeline!')
#
def test_ridge_classifier_no_support_multilabel():
    X, y = make_multilabel_classification(n_samples=10, random_state=0)
    assert_raises(ValueError, RidgeClassifier().fit, X, y)
Beispiel #15
0
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, f_classif, chi2, mutual_info_classif, RFE, RFECV
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.datasets import make_classification

random_state = 42
classifiers = [
    RidgeClassifierCV(),
    RandomForestClassifier(),
    SVC(),
    DecisionTreeClassifier(),
    BaggingClassifier(),
    LogisticRegressionCV(),
    ExtraTreeClassifier(),
    SGDClassifier(),
    RidgeClassifier(),
    PassiveAggressiveClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    ExtraTreesClassifier(),
    LogisticRegression(),
    KNeighborsClassifier(),
    GaussianProcessClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    LinearSVC(),
    NearestCentroid(),
    NuSVC(),
    QuadraticDiscriminantAnalysis(),
]
data = load_breast_cancer()
X, y = data.data, data.target

gbc = GradientBoostingClassifier()
rfc = RandomForestClassifier()
etc = ExtraTreesClassifier()

mlp = MLPClassifier()
gnb = GaussianNB()
gpc = GaussianProcessClassifier()
dtc = DecisionTreeClassifier()
knn = KNeighborsClassifier()

lr = LogisticRegression()
rc = RidgeClassifier()


def stacking(para, X, y):
    stack_lvl_0 = StackingClassifier(classifiers=para["lvl_0"],
                                     meta_classifier=para["top"])
    stack_lvl_1 = StackingClassifier(classifiers=para["lvl_1"],
                                     meta_classifier=stack_lvl_0)
    scores = cross_val_score(stack_lvl_1, X, y, cv=3)

    return scores.mean()


def get_combinations(models):
    comb = []
    for i in range(0, len(models) + 1):
Beispiel #17
0
X_train, X_val, y_train, y_val = train_test_split(X,
                                                  y,
                                                  test_size=0.2,
                                                  random_state=2)

# aa=train_search(X, y, df_test)

# base learners
# C0=best_classifier
C1 = DecisionTreeClassifier(max_depth=8)
C2 = CatBoostClassifier(verbose=0)
C3 = KNeighborsClassifier()
C4 = BernoulliNB()
C5 = RandomForestClassifier()
C6 = XGBClassifier()
C7 = RidgeClassifier()
C8 = KNeighborsClassifier()
C9 = AdaBoostClassifier()
C10 = MLPClassifier(alpha=1, max_iter=1000)
C11 = RidgeClassifier()
C12 = BaggingClassifier()
C13 = ExtraTreesClassifier()
C14 = XGBRFClassifier()
C15 = GradientBoostingClassifier()
C16 = GaussianNB()
C17 = HistGradientBoostingClassifier()
C18 = KNeighborsClassifier()
C19 = SVC()
C20 = RidgeClassifierCV()
Cm = LogisticRegression(max_iter=3000, C=0.2)
Cm1 = LogisticRegression(max_iter=3000, C=0.4)
    t_start = time()
    y_hat = model.predict(x_test)
    t_end = time()
    t_test = t_end - t_start
    print u'测试时间:%.3f秒' % t_test

    train_acc = metrics.accuracy_score(y_train, model.predict(x_train))
    test_acc = metrics.accuracy_score(y_test, y_hat)
    print u'训练集准确率:%.2f%%' % (100 * train_acc)
    print u'测试集准确率:%.2f%%' % (100 * test_acc)

    return t_train, t_test, 1 - train_acc, 1 - test_acc, name


#开始传提参数
clfs = [[RidgeClassifier(), 'Ridge'], [KNeighborsClassifier(), 'KNN'],
        [MultinomialNB(), 'MultinomialNB'], [BernoulliNB(), 'BernoulliNB'],
        [RandomForestClassifier(n_estimators=200), 'RandomForest'],
        [SVC(), 'SVM'],
        [
            LinearSVC(loss='squared_hinge', penalty='l1', dual=False,
                      tol=1e-4), 'LinearSVC-l1'
        ],
        [
            LinearSVC(loss='squared_hinge', penalty='l2', dual=False,
                      tol=1e-4), 'LinearSVC-l2'
        ]]

#开始训练
result = []
for clf, name in clfs:
Beispiel #19
0
    'LogisticRegressionCV': (True, LogisticRegressionCV(Cs=10, fit_intercept=True, cv='warn', dual=False, penalty='l2', scoring=None, solver='lbfgs', tol=0.0001, max_iter=100, class_weight=None, n_jobs=None, verbose=0, refit=True, intercept_scaling=1.0, multi_class='warn', random_state=None, l1_ratios=None) ),
    'LDA': (True, LinearDiscriminantAnalysis(solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=0.0001) ),
    'LogisticRegression': (True, LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='warn', max_iter=100, multi_class='warn', verbose=0, warm_start=False, n_jobs=None, l1_ratio=None) ),
    'CalibratedClassifierCV': (True, CalibratedClassifierCV(base_estimator=None, method='sigmoid', cv='warn') ),
    'LinearSVC': (True, LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000) ),
    'LinearSVM': ( True, SVC(kernel='linear', C=0.025) ),  # (C=0.01, penalty='l1', dual=False) ),
    'RBF_SVM': (True, SVC(gamma='auto') ),#gamma=2, C=1) ), #
    'Nu_SVM': (True, NuSVC(gamma='auto') ),
    'GaussianProcess': (False, GaussianProcessClassifier() ), #(1.0 * RBF(1.0)) ),
    'NeuralNet': (True, MLPClassifier(alpha=1, max_iter=1000) ),
    'QDA': (True, QuadraticDiscriminantAnalysis() ),
    'NaiveBayes': (True,  GaussianNB() ),
    'RadiusNeighborsClassifier': (True, RadiusNeighborsClassifier() ),
    'SGDClassifier': (True, SGDClassifier() ),
    'RidgeClassifierCV': (True, RidgeClassifierCV() ),
    'RidgeClassifier': (True, RidgeClassifier() ),
    'PassiveAggressiveClassifier': (True, PassiveAggressiveClassifier() ),
    'LabelPropagation': (True, LabelPropagation() ),
    'LabelSpreading': (False, LabelSpreading() ),
    'MultinomialNB': (True, MultinomialNB() ),
    'NearestCentroid': (True, NearestCentroid() ),
    'Perceptron': (True, Perceptron() ),
}


# feature_set is used for manually enabling the individual features.
# NOTE:  setting boolean value, eanbles/disables feature.
feature_set = {
    'backers_count': True,
    'converted_pledged_amount': True,
    'goal': True,
Beispiel #20
0
    GradientBoostingClassifier(random_state=13, loss="exponential", init=None),
    "GradientBoostingAudit")
build_audit(LinearDiscriminantAnalysis(solver="lsqr"),
            "LinearDiscriminantAnalysisAudit")
build_audit(LogisticRegressionCV(), "LogisticRegressionAudit")
build_audit(
    BaggingClassifier(LogisticRegression(),
                      random_state=13,
                      n_estimators=3,
                      max_features=0.5), "LogisticRegressionEnsembleAudit")
build_audit(GaussianNB(), "NaiveBayesAudit")
build_audit(RandomForestClassifier(random_state=13, min_samples_leaf=5),
            "RandomForestAudit")
build_audit(RidgeClassifierCV(), "RidgeAudit", with_proba=False)
build_audit(
    BaggingClassifier(RidgeClassifier(random_state=13),
                      random_state=13,
                      n_estimators=3,
                      max_features=0.5), "RidgeEnsembleAudit")
build_audit(
    VotingClassifier([("dt", DecisionTreeClassifier(random_state=13)),
                      ("nb", GaussianNB()), ("lr", LogisticRegression())],
                     voting="soft",
                     weights=[3, 1, 2]), "VotingEnsembleAudit")
build_audit(XGBClassifier(objective="binary:logistic"), "XGBAudit")

versicolor_df = load_csv("Versicolor.csv")

print(versicolor_df.dtypes)

versicolor_columns = versicolor_df.columns.tolist()
Beispiel #21
0
			'PassiveAggressiveRegressor':PassiveAggressiveRegressor(),
			'Perceptron':Perceptron(),
			'ProjectedGradientNMF':ProjectedGradientNMF(),
			'QuadraticDiscriminantAnalysis':QuadraticDiscriminantAnalysis(),
			'RANSACRegressor':RANSACRegressor(),
			'RBFSampler':RBFSampler(),
			'RadiusNeighborsClassifier':RadiusNeighborsClassifier(),
			'RadiusNeighborsRegressor':RadiusNeighborsRegressor(),
			'RandomForestClassifier':RandomForestClassifier(),
			'RandomForestRegressor':RandomForestRegressor(),
			'RandomizedLasso':RandomizedLasso(),
			'RandomizedLogisticRegression':RandomizedLogisticRegression(),
			'RandomizedPCA':RandomizedPCA(),
			'Ridge':Ridge(),
			'RidgeCV':RidgeCV(),
			'RidgeClassifier':RidgeClassifier(),
			'RidgeClassifierCV':RidgeClassifierCV(),
			'RobustScaler':RobustScaler(),
			'SGDClassifier':SGDClassifier(),
			'SGDRegressor':SGDRegressor(),
			'SVC':SVC(),
			'SVR':SVR(),
			'SelectFdr':SelectFdr(),
			'SelectFpr':SelectFpr(),
			'SelectFwe':SelectFwe(),
			'SelectKBest':SelectKBest(),
			'SelectPercentile':SelectPercentile(),
			'ShrunkCovariance':ShrunkCovariance(),
			'SkewedChi2Sampler':SkewedChi2Sampler(),
			'SparsePCA':SparsePCA(),
			'SparseRandomProjection':SparseRandomProjection(),
Beispiel #22
0
def test_class_weights():
    # Test class weights.
    X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0,
                                                                     0.0]])
    y = [1, 1, 1, -1, -1]

    clf = RidgeClassifier(class_weight=None)
    clf.fit(X, y)
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))

    # we give a small weights to class 1
    clf = RidgeClassifier(class_weight={1: 0.001})
    clf.fit(X, y)

    # now the hyperplane should rotate clock-wise and
    # the prediction on this point should shift
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))

    # check if class_weight = 'balanced' can handle negative labels.
    clf = RidgeClassifier(class_weight='balanced')
    clf.fit(X, y)
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))

    # class_weight = 'balanced', and class_weight = None should return
    # same values when y has equal number of all labels
    X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0]])
    y = [1, 1, -1, -1]
    clf = RidgeClassifier(class_weight=None)
    clf.fit(X, y)
    clfa = RidgeClassifier(class_weight='balanced')
    clfa.fit(X, y)
    assert_equal(len(clfa.classes_), 2)
    assert_array_almost_equal(clf.coef_, clfa.coef_)
    assert_array_almost_equal(clf.intercept_, clfa.intercept_)
Beispiel #23
0
    XTest.fillna(0, inplace=True)
    XTest.replace([np.inf, -np.inf], 0, inplace=True)

    features = XTrain.columns
    print len(features), ' features total'
    print "\n".join(features)

    XTrain = XTrain.ix[:, features].as_matrix()
    YTrain = np.array(YTrain).ravel()

    XTest = XTest.ix[:, features].as_matrix()

    skf = StratifiedKFold(n_splits=N_FOLDS).split(XTrain, YTrain)
    splits = list(skf)

    base_classifiers = [RidgeClassifier()]

    print "Creating train and test sets for blending."

    df_blend_train = np.zeros((XTrain.shape[0], len(base_classifiers)))
    df_blend_test = np.zeros((XTest.shape[0], len(base_classifiers)))
    oof_loglosses = np.zeros((len(base_classifiers), len(splits)))

    for clf_id, clf in enumerate(base_classifiers):

        print "Training base classifier #{0} -- {1}".format(clf_id, clf.__class__.__name__)

        dataset_blend_test_j = np.zeros((XTest.shape[0], N_FOLDS))
        for fold_id, (train_indexes, predict_indexes) in enumerate(splits):
            print "Fold", fold_id
Beispiel #24
0
def test_class_weights():
    # Test class weights.
    X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                  [1.0, 1.0], [1.0, 0.0]])
    y = [1, 1, 1, -1, -1]

    clf = RidgeClassifier(class_weight=None)
    clf.fit(X, y)
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))

    # we give a small weights to class 1
    clf = RidgeClassifier(class_weight={1: 0.001})
    clf.fit(X, y)

    # now the hyperplane should rotate clock-wise and
    # the prediction on this point should shift
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))

    # check if class_weight = 'balanced' can handle negative labels.
    clf = RidgeClassifier(class_weight='balanced')
    clf.fit(X, y)
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))

    # class_weight = 'balanced', and class_weight = None should return
    # same values when y has equal number of all labels
    X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0]])
    y = [1, 1, -1, -1]
    clf = RidgeClassifier(class_weight=None)
    clf.fit(X, y)
    clfa = RidgeClassifier(class_weight='balanced')
    clfa.fit(X, y)
    assert_equal(len(clfa.classes_), 2)
    assert_array_almost_equal(clf.coef_, clfa.coef_)
    assert_array_almost_equal(clf.intercept_, clfa.intercept_)
def all_classifier_models():
    models = []
    metrix = []
    c_report = []
    train_accuracy = []
    test_accuracy = []
    
    models.append(('LogisticRegression', LogisticRegression(solver='liblinear', multi_class='ovr')))
    models.append(('LinearDiscriminantAnalysis', LinearDiscriminantAnalysis()))
    models.append(('KNeighborsClassifier', KNeighborsClassifier()))
    models.append(('DecisionTreeClassifier', DecisionTreeClassifier()))
    models.append(('GaussianNB', GaussianNB()))
    models.append(('RandomForestClassifier', RandomForestClassifier(n_estimators=100)))
    models.append(('SVM', SVC(gamma='auto')))
    models.append(('Linear_SVM', LinearSVC()))
    models.append(('XGB', XGBClassifier()))
    models.append(('SGD', SGDClassifier()))
    models.append(('Perceptron', Perceptron()))
    models.append(('ExtraTreeClassifier', ExtraTreeClassifier()))
    models.append(('OneClassSVM', OneClassSVM(gamma = 'auto')))
    models.append(('NuSVC', NuSVC()))
    models.append(('MLPClassifier', MLPClassifier(solver='lbfgs', alpha=1e-5, random_state=1)))
    models.append(('RadiusNeighborsClassifier', RadiusNeighborsClassifier(radius=2.0)))
    models.append(('OutputCodeClassifier', OutputCodeClassifier(estimator=RandomForestClassifier(random_state=0),random_state=0)))
    models.append(('OneVsOneClassifier', OneVsOneClassifier(estimator = RandomForestClassifier(random_state=1))))
    models.append(('OneVsRestClassifier', OneVsRestClassifier(estimator = RandomForestClassifier(random_state=1))))
    models.append(('LogisticRegressionCV', LogisticRegressionCV()))
    models.append(('RidgeClassifierCV', RidgeClassifierCV()))
    models.append(('RidgeClassifier', RidgeClassifier()))
    models.append(('PassiveAggressiveClassifier', PassiveAggressiveClassifier()))
    models.append(('GaussianProcessClassifier', GaussianProcessClassifier()))
    models.append(('HistGradientBoostingClassifier', HistGradientBoostingClassifier()))
    estimators = [('rf', RandomForestClassifier(n_estimators=10, random_state=42)),('svr', make_pipeline(StandardScaler(),LinearSVC(random_state=42)))]
    models.append(('StackingClassifier', StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())))
    clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
    clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
    clf3 = GaussianNB()
    models.append(('VotingClassifier', VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')))
    models.append(('AdaBoostClassifier', AdaBoostClassifier()))
    models.append(('GradientBoostingClassifier', GradientBoostingClassifier()))
    models.append(('BaggingClassifier', BaggingClassifier()))
    models.append(('ExtraTreesClassifier', ExtraTreesClassifier()))
    models.append(('CategoricalNB', CategoricalNB()))
    models.append(('ComplementNB', ComplementNB()))
    models.append(('BernoulliNB', BernoulliNB()))
    models.append(('MultinomialNB', MultinomialNB()))
    models.append(('CalibratedClassifierCV', CalibratedClassifierCV()))
    models.append(('LabelPropagation', LabelPropagation()))
    models.append(('LabelSpreading', LabelSpreading()))
    models.append(('NearestCentroid', NearestCentroid()))
    models.append(('QuadraticDiscriminantAnalysis', QuadraticDiscriminantAnalysis()))
    models.append(('GaussianMixture', GaussianMixture()))
    models.append(('BayesianGaussianMixture', BayesianGaussianMixture()))
    
    test_accuracy= []
    names = []
    for name, model in models:
        try:
            m = model
            m.fit(X_train, y_train)
            y_pred = m.predict(X_test)
            train_acc = round(m.score(X_train, y_train) * 100, 2)
            test_acc = metrics.accuracy_score(y_test,y_pred) *100
            c_report.append(classification_report(y_test, y_pred))
            test_accuracy.append(test_acc)
            names.append(name)
            metrix.append([name, train_acc, test_acc])
        except:
            print("Exception Occurred  :",name)
    return metrix,test_accuracy,names