예제 #1
0
def run_cat(filename,modelname,fileout,embeddings,new_run=True,run_parse=True,
            model_type='logreg',C=10.0,
            alpha=1.0, cutoff=0.50, n_iter=1):
    # NOTE: pull relevant data and run parsing and classification
    data = db.getTransactionData()
    df = pd.DataFrame(data)
    # DEBUG: print(df)
    print(dirs,run_parse)
    if new_run: # initialize the model;
        if model_type=='logreg':
            model = linear_model.SGDClassifier(loss='log',warm_start=True,
                                           n_iter=n_iter,alpha=alpha)
        elif model_type=='passive-aggressive':
            model = linear_model.PassiveAggressiveClassifier(C=C,warm_start=True)
        elif model_type=='naive-bayes':
            model = naive_bayes.GaussianNB()
        else:
            raise NameError('model_type must be logreg, passive-aggressive, or naive-bayes')
    else: # load a saved, pre-trained model
        modelFileLoad = open(modelname, 'rb')
        model = pickle.load(modelFileLoad)

    #fileCities = dirs.dataDir + 'cities_by_state.pickle'
    #us_cities = pd.read_pickle(fileCities)

    df = cat_df(df,model,embeddings,new_run,run_parse,cutoff=cutoff,
                model_type=model_type)

    df.to_csv(fileout,index=False)

    # Saving logistic regression model from training set 1
    modelFileSave = open(modelname, 'wb')
    pickle.dump(model, modelFileSave)
    modelFileSave.close()
예제 #2
0
def model_comparison_classification(k, data):
    # type: (int, tuple) -> dict
    """
    Compares various classification models and
    their performance in analyzing a dataset
    using k-fold cross-validation.

    :param k: How many bins.
    :param data: Data of samples and their labels.
    :return: A dictionary with keys being names
             of classifiers and values being the
             k bins and their accuracy scores.
    """
    models = {
        clf: None
        for clf in
        ['SVM', 'Passive-Agressive', 'Bernoulli', 'Multilayered Perceptron']
    }
    # Data for k-fold cross-validation on various models.
    cv_results = [
        k_fold_cv(k, data, c) for c in [
            SVC(),
            linear.PassiveAggressiveClassifier(),
            bayes.BernoulliNB(),
            neural.MLPClassifier()
        ]
    ]
    models['SVM'] = cv_results[0]
    models['Passive-Agressive'] = cv_results[1]
    models['Bernoulli'] = cv_results[2]
    models['Multilayered Perceptron'] = cv_results[3]

    return models
예제 #3
0
def get_algorithms():
    MLA_dict = {
        # Ensemble methods
        "ada": ensemble.AdaBoostClassifier(),
        "bc": ensemble.BaggingClassifier(),
        "etc": ensemble.ExtraTreesClassifier(),
        "gbc": ensemble.GradientBoostingClassifier(),
        "rfc": ensemble.RandomForestClassifier(),
        # Gaussian processes
        "gpc": gaussian_process.GaussianProcessClassifier(),
        # Linear models
        "lr": linear_model.LogisticRegressionCV(),
        "pac": linear_model.PassiveAggressiveClassifier(),
        "rcc": linear_model.RidgeClassifierCV(),
        "sgd": linear_model.SGDClassifier(),
        "per": linear_model.Perceptron(),
        # Navies bayes
        "bnb": naive_bayes.BernoulliNB(),
        "gnb": naive_bayes.GaussianNB(),
        # Nearest neighbour
        "knn": neighbors.KNeighborsClassifier(),
        # SVM
        "svc": svm.SVC(probability=True),
        "nvc": svm.NuSVC(probability=True),
        "lvc": svm.LinearSVC(),
        # Trees
        "dtc": tree.DecisionTreeClassifier(),
        "ets": tree.ExtraTreeClassifier(),
        # Discriminant analysis
        "lda": discriminant_analysis.LinearDiscriminantAnalysis(),
        "qda": discriminant_analysis.QuadraticDiscriminantAnalysis(),
    }
    return MLA_dict
예제 #4
0
 def test_basic(self, single_chunk_classification):
     X, y = single_chunk_classification
     a = lm.BigPassiveAggressiveClassifier(classes=[0, 1], random_state=0)
     b = lm_.PassiveAggressiveClassifier(random_state=0)
     a.fit(X, y)
     b.partial_fit(X, y, classes=[0, 1])
     assert_eq(a.coef_, b.coef_)
예제 #5
0
def test_sk_PassiveAggressiveClassifier():
    print("Testing sklearn, PassiveAggressiveClassifier...")
    mod = linear_model.PassiveAggressiveClassifier()
    X, y = iris_data
    mod.fit(X, y)
    docs = {'name': "PassiveAggressiveClassifier test"}
    fv = X[0, :]
    upload(mod, fv, docs)
예제 #6
0
 def test_basic(self, single_chunk_classification):
     X, y = single_chunk_classification
     a = lm.PartialPassiveAggressiveClassifier(
         classes=[0, 1], random_state=0, max_iter=100, tol=1e-3
     )
     b = lm_.PassiveAggressiveClassifier(random_state=0, max_iter=100, tol=1e-3)
     a.fit(X, y)
     b.partial_fit(*dask.compute(X, y), classes=[0, 1])
     assert_estimator_equal(a, b, exclude=["loss_function_"])
예제 #7
0
 def build_sklearn(self, model_id, model_params):
     """Method that builds models implemented in sklearn"""
     if model_id == 'sklearn_LogisticRegressionCV':
         return linear_model.LogisticRegressionCV(**model_params)
     if model_id == 'sklearn_LogisticRegression':
         return linear_model.LogisticRegression(**model_params)
     elif model_id == 'sklearn_MLPClassifier':
         return neural_network.MLPClassifier(**model_params)
     elif model_id == 'sklearn_GaussianNB':
         return naive_bayes.GaussianNB(**model_params)
     elif model_id == 'sklearn_MultinomialNB':
         return naive_bayes.MultinomialNB(**model_params)
     elif model_id == 'sklearn_BernoulliNB':
         return naive_bayes.BernoulliNB(**model_params)
     elif model_id == 'sklearn_RandomForestClassifier':
         return ensemble.RandomForestClassifier(**model_params)
     elif model_id == 'sklearn_SVC':
         return svm.SVC(**model_params)
     elif model_id == 'sklearn_AdaBoostClassifier':
         return ensemble.AdaBoostClassifier(**model_params)
     elif model_id == 'sklearn_SGDClassifier':
         return linear_model.SGDClassifier(**model_params)
     elif model_id == 'sklearn_PassiveAggressiveClassifier':
         return linear_model.PassiveAggressiveClassifier(**model_params)
     elif model_id == 'sklearn_RidgeClassifier':
         return linear_model.RidgeClassifier(**model_params)
     elif model_id == 'sklearn_DummyClassifier':
         return dummy.DummyClassifier(**model_params)
     elif model_id == 'sklearn_KNeighborsClassifier':
         return neighbors.KNeighborsClassifier(**model_params)
     elif model_id == 'sklearn_DecisionTreeClassifier':
         return tree.DecisionTreeClassifier(**model_params)
     elif model_id == 'sklearn_LinearRegression':
         return linear_model.LinearRegression(**model_params)
     elif model_id == 'sklearn_LassoCV':
         return linear_model.LassoCV(**model_params)
     elif model_id == 'sklearn_RidgeCV':
         return linear_model.RidgeCV(**model_params)
     elif model_id == 'sklearn_Ridge':
         return linear_model.Ridge(**model_params)
     elif model_id == 'sklearn_DummyRegressor':
         return dummy.DummyRegressor(**model_params)
     elif model_id == 'sklearn_RandomForestRegressor':
         return ensemble.RandomForestRegressor(**model_params)
     elif model_id == 'sklearn_GradientBoostingRegressor':
         return ensemble.GradientBoostingRegressor(**model_params)
     elif model_id == 'sklearn_MLPRegressor':
         return neural_network.MLPRegressor(**model_params)
     elif model_id == 'sklearn_KNeighborsRegressor':
         return neighbors.KNeighborsRegressor(**model_params)
     elif model_id == 'sklearn_SVR':
         return svm.SVR(**model_params)
     elif model_id == 'sklearn_SGDRegressor':
         return linear_model.SGDRegressor(**model_params)
     elif model_id == 'sklearn_DecisionTreeRegressor':
         return tree.DecisionTreeRegressor(**model_params)
     return None
def run_cat(filename,
            modelname,
            fileout,
            embeddings,
            new_run=True,
            run_parse=True,
            model_type='logreg',
            C=10.0,
            alpha=1.0,
            cutoff=0.50,
            n_iter=1):
    # pull relevant data and run parsing and classification
    df = pd.read_csv(filename)
    if (len(df.columns) == 2):  # make sure columns have the right names
        df.columns = ['raw', 'amount']

    if new_run:  # initialize the model;
        if model_type == 'logreg':
            model = linear_model.SGDClassifier(loss='log',
                                               warm_start=True,
                                               n_iter=n_iter,
                                               alpha=alpha)
        elif model_type == 'passive-aggressive':
            model = linear_model.PassiveAggressiveClassifier(C=C,
                                                             warm_start=True)
        elif model_type == 'naive-bayes':
            model = naive_bayes.GaussianNB()
        else:
            raise NameError(
                'model_type must be logreg, passive-aggressive, or naive-bayes'
            )
    else:  # load a saved, pre-trained model
        modelFileLoad = open(modelname, 'rb')
        model = pickle.load(modelFileLoad)

    fileCities = dirs.data_dir + 'cities_by_state.pickle'
    us_cities = pd.read_pickle(fileCities)

    df = cat_df(df,
                model,
                us_cities,
                embeddings,
                new_run,
                run_parse,
                cutoff=cutoff,
                model_type=model_type)

    df.to_csv(fileout, index=False)

    # Saving logistic regression model from training set 1
    modelFileSave = open(modelname, 'wb')
    pickle.dump(model, modelFileSave)
    modelFileSave.close()
예제 #9
0
def train_test(x_tr, y_tr, x_te, y_te, name):
    algorithms = {
        'ada_boost': ensemble.AdaBoostClassifier(),
        'bagging': ensemble.BaggingClassifier(),
        'extra_trees': ensemble.ExtraTreesClassifier(),
        'random_forest': ensemble.RandomForestClassifier(),
        'logistic_regression': linear_model.LogisticRegression(),
        'passive_aggressive': linear_model.PassiveAggressiveClassifier(),
        'ridge': linear_model.RidgeClassifier(),
        'sgd': linear_model.SGDClassifier(),
        'bernoulli': naive_bayes.BernoulliNB(),
        'gaussian': naive_bayes.GaussianNB(),
        'k_neighbors': neighbors.KNeighborsClassifier(),
        'nearest_centroid': neighbors.NearestCentroid(),
        'mlp': neural_network.MLPClassifier(),
        'linear_svc': svm.LinearSVC(),
        'decision_tree': tree.DecisionTreeClassifier(),
        'extra_tree': tree.ExtraTreeClassifier(),
        'gradient_boosting': ensemble.GradientBoostingClassifier(),
        'hist_gradient_boosting': HistGradientBoostingClassifier()
    }
    res = {}
    try:
        clf = GridSearchCV(algorithms.get(name),
                           getattr(CVParameters, name),
                           cv=2,
                           n_jobs=-1)
        start = time.clock()
        clf.fit(x_tr, y_tr)
        tr_time = time.clock() - start
        print(tr_time)
        print(clf.best_params_)
        print(clf.best_score_)
        tr_score = clf.score(x_tr, y_tr)
        score = clf.score(x_te, y_te)
        tr_fscore = f1_score(y_tr, clf.predict(x_tr), average='weighted')
        fscore = f1_score(y_te, clf.predict(x_te), average='weighted')
        print(tr_score, score, tr_fscore, fscore)
        res = {
            name: {
                'test': score,
                'train': tr_score,
                'f1_test': fscore,
                'f1_train': tr_fscore,
                'tr_time': tr_time
            }
        }
        res[name].update(clf.best_params_)
    except Exception as e:
        print(e)
    return res
예제 #10
0
def sklearn_liner_model_regressions(xTrain, xTest, yTrain, yTest):
    modelForConsideration: DataFrame = pd.DataFrame()
    LinerModels = \
        [
            linear_model.ARDRegression(), linear_model.BayesianRidge(), linear_model.ElasticNet(),
            linear_model.ElasticNetCV(),
            linear_model.HuberRegressor(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(),
            linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(),
            linear_model.LinearRegression(), linear_model.MultiTaskLasso(),
            linear_model.MultiTaskElasticNet(), linear_model.MultiTaskLassoCV(), linear_model.MultiTaskElasticNetCV(),
            linear_model.OrthogonalMatchingPursuit(),
            linear_model.OrthogonalMatchingPursuitCV(), linear_model.PassiveAggressiveClassifier(),
            linear_model.PassiveAggressiveRegressor(), linear_model.Perceptron(),
            linear_model.RANSACRegressor(), linear_model.Ridge(), linear_model.RidgeClassifier(),
            linear_model.RidgeClassifierCV(),
            linear_model.RidgeCV(), linear_model.SGDClassifier(), linear_model.SGDRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.enet_path(xTrain, yTrain),
            linear_model.lars_path(xTrain, yTrain), linear_model.lasso_path(xTrain, yTrain),
            # linear_model.LogisticRegression()
            # ,linear_model.LogisticRegressionCV(),linear_model.logistic_regression_path(xTrain, yTrain), linear_model.orthogonal_mp(xTrain, yTrain), linear_model.orthogonal_mp_gram(), linear_model.ridge_regression()
        ]
    for model in LinerModels:
        modelName: str = model.__class__.__name__
        try:
            # print(f"Preparing Model {modelName}")
            if modelName == "LogisticRegression":
                model = linear_model.LogisticRegression(random_state=0)
            model.fit(xTrain, yTrain)
            yTrainPredict = model.predict(xTrain)
            yTestPredict = model.predict(xTest)
            errorList = calculate_prediction_error(modelName, yTestPredict,
                                                   yTest, yTrainPredict,
                                                   yTrain)

            if errorList["Test Average Error"][0] < 30 and errorList[
                    "Train Average Error"][0] < 30:
                try:
                    modelForConsideration = modelForConsideration.append(
                        errorList)
                except (Exception) as e:
                    print(e)

        except (Exception, ArithmeticError) as e:
            print(f"Error occurred while preparing Model {modelName}")
    return modelForConsideration
예제 #11
0
    def build(self, **kwargs):
        """
        builds and returns estimator

        Args:
            hyperparameters (dictionary): Dictionary of hyperparameters to be used for tuning the estimator.
            **kwargs (key-value arguments): Ignored in this implementation. Added for compatibility with :func:`mlaut.estimators.nn_estimators.Deep_NN_Classifier`.
        
        Returns:
            `sklearn pipeline` object: pipeline for transforming the features and training the estimator
        """
        estimator = GridSearchCV(linear_model.PassiveAggressiveClassifier(),
                                 self._hyperparameters,
                                 verbose=self._verbose,
                                 n_jobs=self._n_jobs,
                                 refit=self._refit,
                                 cv=self._num_cv_folds)
        return self._create_pipeline(estimator=estimator)
예제 #12
0
def ModelSelection(test_data, features, label):
    MLA = [
        ensemble.AdaBoostClassifier(),
        ensemble.BaggingClassifier(),
        ensemble.ExtraTreesClassifier(),
        ensemble.GradientBoostingClassifier(),
        ensemble.RandomForestClassifier(),
        gaussian_process.GaussianProcessClassifier(),
        linear_model.LogisticRegressionCV(),
        linear_model.PassiveAggressiveClassifier(),
        linear_model.RidgeClassifierCV(),
        linear_model.SGDClassifier(),
        linear_model.Perceptron(),
        naive_bayes.BernoulliNB(),
        naive_bayes.GaussianNB(),
        neighbors.KNeighborsClassifier(),
        svm.SVC(probability=True),
        svm.NuSVC(probability=True),
        svm.LinearSVC(),
        tree.DecisionTreeClassifier(),
        tree.ExtraTreeClassifier(),
        discriminant_analysis.LinearDiscriminantAnalysis(),
        discriminant_analysis.QuadraticDiscriminantAnalysis(),
    ]

    MLA_columns = ['MLA Name', 'MLA Parameters', 'MLA Score']
    MLA_compare = pd.DataFrame(columns=MLA_columns)
    x_train, x_test, y_train, y_test = train_test_split(train_data[features],
                                                        train_data[label],
                                                        test_size=0.2)
    row_index = 0
    MLA_predict = train_data[label]
    for alg in MLA:

        MLA_name = alg.__class__.__name__
        MLA_compare.loc[row_index, 'MLA Name'] = MLA_name
        MLA_compare.loc[row_index, 'MLA Parameters'] = str(alg.get_params())
        alg.fit(x_train, y_train)
        MLA_predict[MLA_name] = alg.predict(x_test)
        MLA_compare.loc[row_index, 'MLA Score'] = alg.score(x_test, y_test)
        row_index += 1

    MLA_compare.sort_values(by=['MLA Score'], ascending=False, inplace=True)
    return MLA_compare, x_train, x_test, y_train, y_test
def all_classifiers():
    # Model Data
    MLA = [
        # Ensemble Methods
        ensemble.AdaBoostClassifier(),
        ensemble.BaggingClassifier(),
        ensemble.ExtraTreesClassifier(),
        ensemble.GradientBoostingClassifier(),
        ensemble.RandomForestClassifier(),

        # Gaussian Processes
        gaussian_process.GaussianProcessClassifier(),

        # GLM
        linear_model.LogisticRegressionCV(),
        linear_model.PassiveAggressiveClassifier(),
        linear_model.RidgeClassifierCV(),
        linear_model.SGDClassifier(),
        linear_model.Perceptron(),

        # Navies Bayes
        naive_bayes.BernoulliNB(),
        naive_bayes.GaussianNB(),

        # Nearest Neighbor
        neighbors.KNeighborsClassifier(),  # SVM
        svm.SVC(probability=True),
        svm.NuSVC(probability=True),
        svm.LinearSVC(),

        # Trees
        tree.DecisionTreeClassifier(),
        tree.ExtraTreeClassifier(),

        # Discriminant Analysis
        discriminant_analysis.LinearDiscriminantAnalysis(),
        discriminant_analysis.QuadraticDiscriminantAnalysis(),

        # xgboost: http://xgboost.readthedocs.io/en/latest/model.html
        XGBClassifier()
    ]
    return MLA
예제 #14
0
Methodes = [
    #Ensemble Methods
    ensemble.AdaBoostClassifier(),
    ensemble.BaggingClassifier(),
    ensemble.ExtraTreesClassifier(),
    ensemble.GradientBoostingClassifier(),
    ensemble.RandomForestClassifier(),

    #Gaussian Processes
    #gaussian_process.GaussianProcessClassifier(),

    #GLM
    linear_model.LogisticRegressionCV(),
    linear_model.LogisticRegression(C=1000, random_state=0,
                                    solver='liblinear'),
    linear_model.PassiveAggressiveClassifier(),
    linear_model.RidgeClassifierCV(),
    linear_model.SGDClassifier(),
    linear_model.Perceptron(),

    #Navies Bayes
    naive_bayes.BernoulliNB(),
    #naive_bayes.GaussianNB(),

    #Nearest Neighbor
    neighbors.KNeighborsClassifier(),

    #SVM
    svm.SVC(probability=True),
    svm.NuSVC(probability=True),
    svm.LinearSVC(),
예제 #15
0
def passive_aggressive_classifiers():
	pa = OneVsRestClassifier(linear_model.PassiveAggressiveClassifier())
	return pa
예제 #16
0
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, 
                                                    random_state=0)

shuffle_index = np.random.permutation(len(X_train))
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]


# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

from sklearn import linear_model
clf = linear_model.PassiveAggressiveClassifier(random_state=0)
clf.fit(X_train, y_train)

# Cross Validation
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict
cross_val_score(clf, X_train, y_train, cv=3, scoring='accuracy')
y_train_pred = cross_val_predict(clf, X_train, y_train, cv=3)
cm = confusion_matrix(y_train, y_train_pred)
print(cm)

from sklearn.metrics import precision_score, recall_score
print("precision score = {0:.4f}".format(precision_score(y_train, y_train_pred)))
print("recall score =  {0:.4f}".format(recall_score(y_train, y_train_pred)))
예제 #17
0
 def get_skl_estimator(self, **default_parameters):
     return linear_model.PassiveAggressiveClassifier(**default_parameters)
예제 #18
0
파일: titanic.py 프로젝트: bcohen1/Titanic
def main():
    train_df = pd.read_csv("train.csv")
    test_df = pd.read_csv("test.csv")
    combine = [train_df, test_df]

    for df in combine:
        df.info()
        standardize_data(df)
        create_columns(df)
        create_bins(df)
        encode_data(df)
    # Define target (Y variable)
    target = ["Survived"]

    # Define features (X variables)
    train_df_x = [
        "Pclass",
        "Sex",
        "Age",
        "SibSp",
        "Parch",
        "Fare",
        "Embarked",
        "FamilySize",
        "IsAlone",
        "Title",
    ]

    # Define numerical features (binned and encoded)
    train_df_x_bin = [
        "Pclass",
        "Sex_Code",
        "AgeBin_Code",
        "FareBin_Code",
        "Embarked_Code",
        "FamilySize",
        "IsAlone",
        "Title_Code",
    ]

    # Analyze feature correlation with target
    for x in train_df_x:
        if train_df[x].dtype != "float64":
            print(train_df[[x, target[0]]].groupby(x).mean())

    # Graph individual features by survival
    fig, axis = plt.subplots(1, 3, figsize=(9, 6))
    sns.histplot(x="Fare",
                 data=train_df,
                 hue="Survived",
                 multiple="stack",
                 ax=axis[0])
    sns.histplot(x="Age",
                 data=train_df,
                 hue="Survived",
                 multiple="stack",
                 ax=axis[1])
    sns.histplot(x="FamilySize",
                 data=train_df,
                 hue="Survived",
                 multiple="stack",
                 ax=axis[2])

    fig, axis = plt.subplots(2, 3, figsize=(16, 12))
    sns.barplot(x="Pclass", y="Survived", data=train_df, ax=axis[0, 0])
    sns.barplot(x="Sex", y="Survived", data=train_df, ax=axis[0, 1])
    sns.barplot(x="Embarked", y="Survived", data=train_df, ax=axis[0, 2])
    sns.barplot(x="IsAlone", y="Survived", data=train_df, ax=axis[1, 0])
    sns.barplot(x="Title", y="Survived", data=train_df, ax=axis[1, 1])

    # Compare class with a 2nd feature
    fig, axis = plt.subplots(1, 3, figsize=(9, 6))
    sns.barplot(x="Pclass", y="Survived", data=train_df, hue="Sex", ax=axis[0])
    sns.barplot(x="Pclass",
                y="Survived",
                data=train_df,
                hue="IsAlone",
                ax=axis[1])
    sns.barplot(x="Pclass",
                y="Survived",
                data=train_df,
                hue="Embarked",
                ax=axis[2])

    # Compare Sex with a 2nd feature
    fig, axis = plt.subplots(1, 3, figsize=(9, 6))
    sns.barplot(x="Sex", y="Survived", data=train_df, hue="Pclass", ax=axis[0])
    sns.barplot(x="Sex",
                y="Survived",
                data=train_df,
                hue="IsAlone",
                ax=axis[1])
    sns.barplot(x="Sex",
                y="Survived",
                data=train_df,
                hue="Embarked",
                ax=axis[2])

    # Correlation heatmap of dataset
    fig, ax = plt.subplots(figsize=(14, 12))
    fig = sns.heatmap(
        train_df.corr(),
        cmap=sns.diverging_palette(240, 10, as_cmap=True),
        annot=True,
        ax=ax,
    )

    # Machine Learning Algorithm (MLA) selection and initialization
    mla = [
        linear_model.LogisticRegressionCV(),
        linear_model.SGDClassifier(),
        linear_model.Perceptron(),
        linear_model.PassiveAggressiveClassifier(),
        linear_model.RidgeClassifierCV(),
        svm.SVC(probability=True),
        svm.NuSVC(probability=True),
        svm.LinearSVC(dual=False),
        neighbors.KNeighborsClassifier(),
        gaussian_process.GaussianProcessClassifier(),
        naive_bayes.GaussianNB(),
        naive_bayes.BernoulliNB(),
        tree.DecisionTreeClassifier(),
        tree.ExtraTreeClassifier(),
        ensemble.BaggingClassifier(),
        ensemble.RandomForestClassifier(),
        ensemble.ExtraTreesClassifier(),
        ensemble.AdaBoostClassifier(),
        ensemble.GradientBoostingClassifier(),
    ]

    mla_compare = test_models(mla, train_df, train_df_x_bin, target)

    best_estimator = optimize_params(mla, mla_compare, train_df,
                                     train_df_x_bin, target)

    generate_submission_csv(test_df, train_df_x_bin, best_estimator)
예제 #19
0
            print(name, '.8 - val score:', val_score)
            if val_score > best_val_score:
                print('New best val score!! image_feature_model:',
                      image_features_model_name, 'clf:', name)
                best_clf = clf
                best_val_score = val_score
                best_clf_name = name
        except Exception as e:
            print("Exception 2!", e)


best_clf_name = None
best_clf = None
best_val_score = 0.97
validate_score_clf(
    linear_model.PassiveAggressiveClassifier(max_iter=1100, loss='hinge'),
    'linear_model.PassiveAggressiveClassifier-loss-hinge')
validate_score_clf(
    linear_model.PassiveAggressiveClassifier(max_iter=700, loss='hinge'),
    'linear_model.PassiveAggressiveClassifier-loss-hinge-700')
validate_score_clf(
    linear_model.PassiveAggressiveClassifier(max_iter=700,
                                             loss='hinge',
                                             class_weight='balanced'),
    'linear_model.PassiveAggressiveClassifier-loss-hinge-700-balanced')
validate_score_clf(
    linear_model.PassiveAggressiveClassifier(max_iter=1100,
                                             loss='hinge',
                                             class_weight='balanced'),
    'linear_model.PassiveAggressiveClassifier-loss-hinge-1100-balanced')
validate_score_clf(
def parse_param_and_get_model(param_dict):
    
    #param_dict = json.loads(j_str)
    model_name = param_dict['learning_algorithm']     # 1: linear_svm; 2: ; 3: 
    cv = eval(param_dict['cv'])
    mode = param_dict['mode']
    api = param_dict['api']
    
    print "INFO: Learning Algorithm: ", model_name
    print "INFO: CV = ", cv
    print "INFO: mode = ", mode
    print "INFO: API use: ", api
    ###parse and print print parameters###
    print "INFO: ============ Learning Algorithm and Grid Search Parameters ============="    
    
    if model_name == "linear_svm":
        ### 1: linearSVM
        if mode == "cheap":
            param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000]}]
        else:
            param_dic = [{'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}]
        print "INFO: Grid Search Parameters:"
        print "INFO: C = ", param_dic[0]['C']                       
        print "INFO: ====================1: Linear SVM============="
        clf = svm.LinearSVC()
    elif model_name == "svm":
        ### 2: SVM with kernel
        if mode == "cheap":
            param_dic = [{'C': [0.01, 1, 100], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.5]}, {'C': [0.01, 1, 100], 'kernel':['linear']}, {'C': [0.01, 1, 100], 'kernel':['poly'], 'gamma':[0.0, 0.5], 'degree':[3]}]
        else:
            param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.5, 1]}, {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['linear']}, {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['poly'], 'gamma':[0.0, 0.5], 'degree':[2,3]}]
            #param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.5, 1]}, {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['linear']}, {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['poly'], 'gamma':[0.0, 0.5, 1], 'degree':[2,3]}]
            #param_dic = [{'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.01, 0.1, 1, 10, 100]}, {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel':['linear']}, {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel':['poly'], 'gamma':[0.0, 0.01, 0.1, 1, 10, 100], 'degree':[2,3,4]}]            
        print "INFO: Grid Search Parameters:"
        for p in range (0, len(param_dic)):
            print "INFO: ",
            for key in param_dic[p]:
                print key, ' = ', param_dic[p][key],
            print ""
        print "INFO: ====================2: SVM with kernel============="
        clf = svm.SVC()
    elif model_name == "nu_svm":
        ### 3: NuSVC
        if mode == "cheap":
            param_dic = [{'nu': [0.1, 0.3], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.1]}, {'nu': [0.1, 0.3], 'kernel':['linear']}, {'nu': [0.1, 0.3], 'kernel':['poly'], 'gamma':[0.0, 0.1], 'degree':[3]}]
        else:
            param_dic = [{'nu': [0.1, 0.2, 0.3], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.1, 1, 10]}, {'nu': [0.1, 0.2, 0.3], 'kernel':['linear']}, {'nu': [0.1, 0.2, 0.3], 'kernel':['poly'], 'gamma':[0.0, 0.1, 1, 10], 'degree':[2,3]}]
            #param_dic = [{'nu': [0.1, 0.2, 0.3, 0.4], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.1, 1, 10]}, {'nu': [0.1, 0.2, 0.3, 0.4], 'kernel':['linear']}, {'nu': [0.1, 0.2, 0.3, 0.4], 'kernel':['poly'], 'gamma':[0.0, 0.1, 1, 10], 'degree':[2,3]}]            
        print "INFO: Grid Search Parameters:"
        for p in range (0, len(param_dic)):
            print "INFO: ",
            for key in param_dic[p]:
                print key, ' = ', param_dic[p][key],
            print ""
        print "INFO: ====================3: NuSVC============="
        clf = svm.NuSVC()
    elif model_name == "logistic_regression":
        ### 4: Logistic Regression
        if mode == "cheap":
            param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000], 'penalty':['l2']}]
        else:
            param_dic = [{'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000], 'penalty':['l2', 'l1']}]
        print "INFO: Grid Search Parameters:"
        print "INFO: C= ", param_dic[0]['C']
        print "INFO: penalty= ", param_dic[0]['penalty']                
        print "INFO: ====================4: Logistic Regression============="
        clf = linear_model.LogisticRegression()
    elif model_name == "passive_aggressive_classifier":
        ### 6: Passive Aggressive Classifier
        if mode == "cheap":
            param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000]}]
        else:
            param_dic = [{'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}]
        print "INFO: Grid Search Parameters:"
        print "INFO: C= ", param_dic[0]['C']       
        print "INFO: ====================6: Passive Aggressive Classifier============="
        clf = linear_model.PassiveAggressiveClassifier()
    else:
        print "INFO: Training model selection error: no valid ML model selected!"
        return (0, "none", 0, 0, 0)
    return (clf, model_name, api, cv, param_dic)
def parse_param_and_get_model(param_dict):
    # get model name
    if 'learning_algorithm' in param_dict:
        model_name = param_dict['learning_algorithm']
    else:
        print "ERROR: learning_algorithm not found"
        return (0, "none")

    ###parse and print print parameters###
    print "INFO: ============ Learning Algorithm", model_name, "============="

    if model_name == "linear_svm":
        ### 1: linearSVM
        C = eval(param_dict['c'])
        C = float(C)
        print "INFO: C = ", C
        print "INFO: ==================== 1: Linear SVM ============="
        clf = svm.LinearSVC(C=C)

    elif model_name == "svm":
        ### 2: SVM with kernel
        C = eval(param_dict['c'])
        C = float(C)
        kernel_func = param_dict['kernel']
        gamma_val = eval(param_dict['gamma'])
        gamma_val = float(gamma_val)
        print "INFO: C = ", C
        print "INFO: kernel = ", kernel_func
        print "INFO: gamma = ", gamma_val
        if kernel_func == "poly":
            degree_num = eval(param_dict['degree'])
            print "degree = ", degree_num
        print "==================== 2: SVM with kernel ============="
        if kernel_func == "poly":
            clf = svm.SVC(C=C,
                          kernel=kernel_func,
                          gamma=gamma_val,
                          degree=degree_num)
        elif kernel_func == "rbf" or kernel_func == "sigmoid":
            clf = svm.SVC(C=C, kernel=kernel_func, gamma=gamma_val)
        else:
            clf = svm.SVC(C=C, kernel=kernel_func)

    elif model_name == "nu-svm":
        ### 3: NuSVC
        nu_val = eval(param_dict['nu'])
        nu_val = float(nu_val)
        kernel_func = param_dict['kernel']
        gamma_val = eval(param_dict['gamma'])
        gamma_val = float(gamma_val)
        print "INFO: nu = ", nu_val
        print "INFO: kernel = ", kernel_func
        print "INFO: gamma = ", gamma_val
        if kernel_func == "poly":
            degree_num = eval(param_dict['degree'])
            print "INFO: degree = ", degree_num
        print "INFO: ==================== 3: NuSVC ============="
        if kernel_func == "poly":
            clf = svm.NuSVC(nu=nu_val,
                            kernel=kernel_func,
                            gamma=gamma_val,
                            degree=degree_num)
        elif kernel_func == "rbf" or kernel_func == "sigmoid":
            clf = svm.NuSVC(nu=nu_val, kernel=kernel_func, gamma=gamma_val)
        else:
            clf = svm.NuSVC(nu=nu_val, kernel=kernel_func)

    elif model_name == "logistic_regression":
        ### 4: linearSVM
        C = eval(param_dict['c'])
        C = float(C)
        regularization = param_dict['regularization']
        print "INFO: C = ", C
        print "INFO: penalty = ", regularization
        print "INFO: ==================== 4: Logistic Regression ============="
        clf = linear_model.LogisticRegression(C=C, penalty=regularization)

    elif model_name == "linear_svm_with_sgd":
        ### 5: linearSVM with SGD, no para as input
        print "INFO: ==================== 5: Linear SVM with SGD ============="
        clf = linear_model.SGDClassifier()
    elif model_name == "passive_aggressive_classifier":
        ### 6: Passive Aggressive Classifier
        C = eval(param_dict['c'])
        C = float(C)
        print "INFO: C = ", C
        print "INFO: ==================== 6: Passive Aggressive Classifier ============="
        clf = linear_model.PassiveAggressiveClassifier(C=C)
    elif model_name == "perceptron":
        ### 7: Perceptron
        print "INFO: ==================== 7: Perceptron ============="
        clf = linear_model.Perceptron()
    else:
        print "ERROR: Training model not supported:", model_name
        return (0, "none")
    return (clf, model_name)
예제 #22
0
#Machine Learning Algorithm (MLA) Selection and initialization
CLF = [
    #Ensemble Methods
    ('ada', ensemble.AdaBoostClassifier(tree.DecisionTreeClassifier())),
    ('bc', ensemble.BaggingClassifier()),
    ('etc', ensemble.ExtraTreesClassifier()),
    ('gbc', ensemble.GradientBoostingClassifier()),
    ('xgbc', xgb.XGBClassifier(max_depth=3)),  # xgb.XGBClassifier()),    #
    ('rfc', ensemble.RandomForestClassifier(n_estimators=50)),

    #Gaussian Processes
    ('gpc', gaussian_process.GaussianProcessClassifier()),

    #GLM - remove linear models, since this is a classifier algorithm
    ('lr', linear_model.LogisticRegressionCV()),
    ('pac', linear_model.PassiveAggressiveClassifier()),
    ('rc', linear_model.RidgeClassifierCV()),
    ('sgd', linear_model.SGDClassifier()),
    ('pct', linear_model.Perceptron()),

    #Navies Bayes
    ('gnb', naive_bayes.GaussianNB()),

    #Nearest Neighbor
    ('knn', neighbors.KNeighborsClassifier(n_neighbors=3)),

    #SVM
    ('svc', svm.SVC(probability=True)),
    ('lsvc', svm.LinearSVC()),

    #Trees
예제 #23
0
def label_learner_pa():
    "return a keyed instance of passive aggressive learner"
    learner = sk.PassiveAggressiveClassifier(C=LOCAL_C,
                                             n_iter=LOCAL_N_ITER,
                                             class_weight=LOCAL_CLASS_WEIGHT)
    return Keyed('pa', SklearnLabelClassifier(learner))
df_list=['classifier_name','acc_train','acc_test','loss_train','loss_test']
clf=[linear_model.LogisticRegression(solver='liblinear',multi_class='ovr'),
     linear_model.LogisticRegressionCV(solver='liblinear',multi_class='ovr'),
     linear_model.SGDClassifier(max_iter=1000,tol=0.00001),
     linear_model.RidgeClassifier(),linear_model.RidgeClassifierCV(),
     LinearDiscriminantAnalysis(),QuadraticDiscriminantAnalysis(),
     svm.LinearSVC(),svm.SVC(gamma='scale',C=10.0,kernel='poly'),
     svm.NuSVC(gamma='scale',kernel='poly'),
     KNeighborsClassifier(),RadiusNeighborsClassifier(radius=30),
     NearestCentroid(),
     DecisionTreeClassifier(),ExtraTreeClassifier(),GaussianNB(),
     BernoulliNB(),MultinomialNB(),
     BaggingClassifier(),RandomForestClassifier(n_estimators=64),
     AdaBoostClassifier(),GradientBoostingClassifier(),
     linear_model.Perceptron(max_iter=1000,tol=0.00001),
     linear_model.PassiveAggressiveClassifier(max_iter=1000,tol=0.00001),
     GaussianProcessClassifier(),LabelPropagation(),LabelSpreading()]

list3clf=['LogisticRegression','LogisticRegressionCV','SGDClassifier',
          'RidgeClassifier', 'RidgeClassifierCV',
          'LinearDiscriminantAnalysis','QuadraticDiscriminantAnalysis',
          'LinearSVC', 'SVC','NuSVC',
          'KNeighborsClassifier','RadiusNeighborsClassifier','NearestCentroid', 
          'DecisionTreeClassifier','ExtraTreeClassifier',
          'GaussianNB','BernoulliNB','MultinomialNB',
          'BaggingClassifier','RandomForestClassifier',
          'AdaBoostClassifier','GradientBoostingClassifier',
          'Perceptron','PassiveAggressiveClassifier']
y3clf=[]
for i in range(len(list3clf)):
    y3clf.append(classifier_fit_score(clf[i],list3clf[i],'Digits',
예제 #25
0
def compare_algorithm(data, target):
    x_train, x_cross, y_train, y_cross = train_test_split(data, target)
    MLA = [
        # Ensemble Methods
        ensemble.AdaBoostClassifier(),
        ensemble.BaggingClassifier(),
        ensemble.ExtraTreesClassifier(),
        ensemble.GradientBoostingClassifier(),
        ensemble.RandomForestClassifier(),

        # Gaussian Processes
        gaussian_process.GaussianProcessClassifier(),

        # GLM
        linear_model.LogisticRegressionCV(),
        linear_model.PassiveAggressiveClassifier(max_iter=1000, tol=0.001),
        linear_model.RidgeClassifierCV(),
        linear_model.SGDClassifier(max_iter=1000, tol=0.001),
        linear_model.Perceptron(max_iter=1000, tol=0.001),

        # Navies Bayes
        naive_bayes.BernoulliNB(),
        naive_bayes.GaussianNB(),

        # Nearest Neighbor
        neighbors.KNeighborsClassifier(),

        # SVM
        svm.SVC(probability=True),
        svm.NuSVC(probability=True),
        svm.LinearSVC(),

        # Trees
        tree.DecisionTreeClassifier(),
        tree.ExtraTreeClassifier(),

        # Discriminant Analysis
        discriminant_analysis.LinearDiscriminantAnalysis(),
        discriminant_analysis.QuadraticDiscriminantAnalysis(),

        # xgboost: http://xgboost.readthedocs.io/en/latest/model.html
        xgb.XGBClassifier()
    ]
    MLA_columns = []
    MLA_compare = pd.DataFrame(columns=MLA_columns)

    row_index = 0
    for alg in MLA:
        predicted = alg.fit(x_train, y_train).predict(x_cross)
        fp, tp, th = roc_curve(y_cross, predicted)
        MLA_name = alg.__class__.__name__
        MLA_compare.loc[row_index, 'MLA Name'] = MLA_name
        MLA_compare.loc[row_index, 'MLA Train Accuracy'] = round(
            alg.score(x_train, y_train), 4)
        MLA_compare.loc[row_index, 'MLA Test Accuracy'] = round(
            alg.score(x_cross, y_cross), 4)
        MLA_compare.loc[row_index, 'MLA Precission'] = precision_score(
            y_cross, predicted)
        MLA_compare.loc[row_index,
                        'MLA Recall'] = recall_score(y_cross, predicted)
        MLA_compare.loc[row_index, 'MLA AUC'] = auc(fp, tp)
        row_index = row_index + 1

    MLA_compare.sort_values(by=['MLA Test Accuracy'],
                            ascending=False,
                            inplace=True)
    print(MLA_compare)
예제 #26
0
def parse_para_and_get_model(param_dict):

    #param_dict = json.loads(ml_opts_jstr)
    model_name = param_dict['learning_algorithm']  # 1: linear_svm; 2: ; 3:

    ###parse and print print parameters###
    print "INFO: ============Learning Algorithm and Parameters============="
    print "INFO: param_dict=", param_dict
    if model_name == "linear_svm":
        ### 1: linearSVM
        C = eval(param_dict['c'])
        C = float(C)
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: C = ", C
        print "INFO: ====================1: Linear SVM============="
        clf = svm.LinearSVC(C=C)

    elif model_name == "svm":
        ### 2: SVM with kernel
        C = eval(param_dict['c'])
        C = float(C)
        kernel_func = param_dict['kernel']
        gamma_val = "0.0"
        if 'gamma' in param_dict:
            gamma_val = eval(param_dict['gamma'])
            gamma_val = float(gamma_val)
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: C = ", C
        print "INFO: kernel = ", kernel_func
        print "INFO: gamma = ", gamma_val
        if kernel_func == "poly":
            degree_num = eval(param_dict['degree'])
            print "degree = ", degree_num
        print "INFO: ====================2: SVM with kernel============="
        if kernel_func == "poly":
            clf = svm.SVC(C=C,
                          kernel=kernel_func,
                          gamma=gamma_val,
                          degree=degree_num)
        elif kernel_func == "rbf" or kernel_func == "sigmoid":
            clf = svm.SVC(C=C, kernel=kernel_func, gamma=gamma_val)
        else:
            clf = svm.SVC(C=C, kernel=kernel_func)

    elif model_name == "nu_svm":
        ### 3: NuSVC
        nu_val = eval(param_dict['nu'])
        nu_val = float(nu_val)
        kernel_func = param_dict['kernel']
        gamma_val = eval(param_dict['gamma'])
        gamma_val = float(gamma_val)
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: nu = ", nu_val
        print "INFO: kernel = ", kernel_func
        print "INFO: gamma = ", gamma_val
        if kernel_func == "poly":
            degree_num = eval(param_dict['degree'])
            print "INFO: degree = ", degree_num
        print "INFO: ====================3: NuSVC============="
        if kernel_func == "poly":
            clf = svm.NuSVC(nu=nu_val,
                            kernel=kernel_func,
                            gamma=gamma_val,
                            degree=degree_num)
        elif kernel_func == "rbf" or kernel_func == "sigmoid":
            clf = svm.NuSVC(nu=nu_val, kernel=kernel_func, gamma=gamma_val)
        else:
            clf = svm.NuSVC(nu=nu_val, kernel=kernel_func)

    elif model_name == "logistic_regression":
        ### 4: linearSVM
        C = eval(param_dict['c'])
        C = float(C)
        # penalty from CV, regularization from non-CV training
        if 'regularization' in param_dict:
            regularization = param_dict['regularization']
        elif 'penalty' in param_dict:
            regularization = param_dict['penalty']
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: C = ", C
        print "INFO: penalty = ", regularization
        print "INFO: ====================4: Logistic Regression============="
        clf = linear_model.LogisticRegression(C=C, penalty=regularization)

    elif model_name == "linear_svm_with_sgd":
        ### 5: linearSVM with SGD, no para as input
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: ====================5: Linear SVM with SGD============="
        clf = linear_model.SGDClassifier()
    elif model_name == "passive_aggressive_classifier":
        ### 6: Passive Aggressive Classifier
        C = eval(param_dict['c'])
        C = float(C)
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: C = ", C
        print "INFO: ====================6: Passive Aggressive Classifier============="
        clf = linear_model.PassiveAggressiveClassifier(C=C)
    elif model_name == "perceptron":
        ### 7: Perceptron
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: ====================7: Perceptron============="
        clf = linear_model.Perceptron()
    else:
        print "INFO: Training model selection error: no valid ML model selected!"
        return (0, "none")
    return (clf, model_name)
예제 #27
0
# Level 2 Score: 

clf = linear_model.LogisticRegression(solver='sag', random_state=rnd, verbose=0, n_jobs=-1)
model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="classifier", filename = "LogReg", setused=setused)


# Level 2 Score: 

clf = linear_model.RidgeCV(cv = 5)
model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="regressor", filename = "RidgeCV", setused=setused)


# Level 2 Score: 

clf = linear_model.PassiveAggressiveClassifier(n_iter=100, random_state=rnd, verbose=0, n_jobs=-1)
model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="regressor", filename = "PasAggC", setused=setused, tag = "1")


# Level 2 Score: 

clf = linear_model.PassiveAggressiveClassifier(n_iter=100, loss='squared_hinge', random_state=rnd, verbose=0, n_jobs=-1)
model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="regressor", filename = "PasAggC", setused=setused, tag = "2")


# Level 2 Score: 

clf = linear_model.PassiveAggressiveRegressor(n_iter=100, random_state=rnd, verbose=0)
model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="regressor", filename = "PasAggR", setused=setused, tag = "1")

예제 #28
0
    xx, yy = np.dot(R, [xx, yy])
    ## skalowanie
    xx /= max(np.absolute(xx))
    yy /= max(np.absolute(yy))
    ## przypisanie do X
    X[row, ::2] = xx
    X[row, 1::2] = yy

## Rozdzielenie danych do późniejszego liczenia 'accuracy' i 'confusion matrix'
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.1, stratify=y)

## UTWORZENIE OBIEKTU KLASYFIKATORA
clf = linear_model.PassiveAggressiveClassifier(C=60.69620253164557,
                                               fit_intercept=False,
                                               max_iter=10000,
                                               n_jobs=-1)

## CROSS-VALIDACJA
scores = model_selection.cross_validate(clf,
                                        X_train,
                                        y_train,
                                        return_estimator=True,
                                        n_jobs=-1)
print('The score array for test scores on each cv split:',
      scores['test_score'])
print('Mean of above:', scores['test_score'].mean())

## WYBRANIE NAJLEPSZEGO ESTYMATORA I PREDYKCJA DLA WSZYTKICH DANYCH
best_clf = scores['estimator'][np.argmax(scores['test_score'])]
print('Accuracy on final set:', best_clf.score(X_test, y_test))
예제 #29
0
파일: test_e2e.py 프로젝트: goldv/m2cgen
        # Lightning Linear Regression
        regression(light_reg.AdaGradRegressor(random_state=RANDOM_SEED)),
        regression(light_reg.CDRegressor(random_state=RANDOM_SEED)),
        regression(light_reg.FistaRegressor()),
        regression(light_reg.SAGARegressor(random_state=RANDOM_SEED)),
        regression(light_reg.SAGRegressor(random_state=RANDOM_SEED)),
        regression(light_reg.SDCARegressor(random_state=RANDOM_SEED)),

        # Sklearn Linear Classifiers
        classification(
            linear_model.LogisticRegression(random_state=RANDOM_SEED)),
        classification(
            linear_model.LogisticRegressionCV(random_state=RANDOM_SEED)),
        classification(
            linear_model.PassiveAggressiveClassifier(
                random_state=RANDOM_SEED)),
        classification(linear_model.Perceptron(random_state=RANDOM_SEED)),
        classification(linear_model.RidgeClassifier(random_state=RANDOM_SEED)),
        classification(linear_model.RidgeClassifierCV()),
        classification(linear_model.SGDClassifier(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.LogisticRegression(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.LogisticRegressionCV(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.PassiveAggressiveClassifier(
                random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.Perceptron(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.RidgeClassifier(random_state=RANDOM_SEED)),
예제 #30
0
def label_learner_pa():
    "return a keyed instance of passive aggressive learner"
    learner = sk.PassiveAggressiveClassifier(n_iter=LOCAL_PA_ARGS.iterations)
    return Keyed('pa', SklearnLabelClassifier(learner))