Example #1
0
    def __init__(self, feature_length, num_classes, x=10):

        super().__init__(feature_length, num_classes)

        self.model = VotingClassifier(estimators=[
            ('gba',
             GradientBoostingClassifier(n_estimators=100,
                                        learning_rate=1.0,
                                        max_depth=1,
                                        random_state=0)),
            ('knn',
             KNeighborsClassifier(metric='manhattan',
                                  weights='distance',
                                  n_neighbors=3)),
            ('Nc', NearestCentroid(metric='manhattan')), ('nvb', GaussianNB()),
            ('rf', RandomForestClassifier(n_estimators=10,
                                          criterion='entropy')),
            ('svmlin', svm.SVC(kernel='linear')),
            ('svmpol', svm.SVC(kernel='poly')),
            ('svmrbf', svm.SVC(kernel='rbf'))
        ],
                                      voting='hard')

        self.num_classes = num_classes
y_train = X_train['interest_level']
X_train = X_train.drop('interest_level', axis=1)

gbt = GradientBoostingClassifier(learning_rate=0.005,
                                 n_estimators=args.n,
                                 max_depth=4,
                                 random_state=2018)
rf = RandomForestClassifier(1000,
                            criterion='gini',
                            n_jobs=-1,
                            random_state=2018)
lor = LogisticRegression(solver='newton-cg',
                         multi_class='multinomial',
                         max_iter=1000)
clf = VotingClassifier([('gbt', gbt), ('rf', rf), ('lor', lor)],
                       voting='soft',
                       weights=[3, 2, 1],
                       n_jobs=-1)

if args.s:
    clf.fit(X_train, y_train)
    joblib.dump(clf, 'checkpoint/voting.pkl')
    X_test = pd.read_csv("data/test_python.csv", encoding='utf-8')
    pred = clf.predict_proba(X_test)
    np.savetxt('submission/submission.csv',
               np.c_[X_test['listing_id'], pred[:, [2, 1, 0]]],
               delimiter=',',
               header='listing_id,high,medium,low',
               fmt='%d,%.16f,%.16f,%.16f',
               comments='')
else:
    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=2018)
Example #3
0
    BaggingClassifier(LogisticRegression(),
                      random_state=13,
                      n_estimators=3,
                      max_features=0.5), "LogisticRegressionEnsembleAudit")
build_audit(GaussianNB(), "NaiveBayesAudit")
build_audit(RandomForestClassifier(random_state=13, min_samples_leaf=5),
            "RandomForestAudit")
build_audit(RidgeClassifierCV(), "RidgeAudit", with_proba=False)
build_audit(
    BaggingClassifier(RidgeClassifier(random_state=13),
                      random_state=13,
                      n_estimators=3,
                      max_features=0.5), "RidgeEnsembleAudit")
build_audit(
    VotingClassifier([("dt", DecisionTreeClassifier(random_state=13)),
                      ("nb", GaussianNB()), ("lr", LogisticRegression())],
                     voting="soft",
                     weights=[3, 1, 2]), "VotingEnsembleAudit")
build_audit(XGBClassifier(objective="binary:logistic"), "XGBAudit")

versicolor_df = load_csv("Versicolor.csv")

print(versicolor_df.dtypes)

versicolor_columns = versicolor_df.columns.tolist()

versicolor_mapper = DataFrameMapper([(versicolor_columns[:-1],
                                      [ContinuousDomain(),
                                       RobustScaler()]),
                                     (versicolor_columns[-1], None)])

versicolor = versicolor_mapper.fit_transform(versicolor_df)
Example #4
0
class VotingEnsemble(BaseClassifier):
    def __init__(self, feature_length, num_classes, x=10):

        super().__init__(feature_length, num_classes)

        self.model = VotingClassifier(estimators=[
            ('gba',
             GradientBoostingClassifier(n_estimators=100,
                                        learning_rate=1.0,
                                        max_depth=1,
                                        random_state=0)),
            ('knn',
             KNeighborsClassifier(metric='manhattan',
                                  weights='distance',
                                  n_neighbors=3)),
            ('Nc', NearestCentroid(metric='manhattan')), ('nvb', GaussianNB()),
            ('rf', RandomForestClassifier(n_estimators=10,
                                          criterion='entropy')),
            ('svmlin', svm.SVC(kernel='linear')),
            ('svmpol', svm.SVC(kernel='poly')),
            ('svmrbf', svm.SVC(kernel='rbf'))
        ],
                                      voting='hard')

        self.num_classes = num_classes

    def train(self, features, labels):
        """
        Using a set of features and labels, trains the classifier and returns the training accuracy.
        :param features: An MxN matrix of features to use in prediction
        :param labels: An M row list of labels to train to predict
        :return: Prediction accuracy, as a float between 0 and 1
        """

        labels = self.labels_to_categorical(labels)
        self.model.fit(features, labels)
        accuracy = self.model.score(features, labels)
        return accuracy

    # make sure you save model using the same library as we used in machine learning price-predictor

    def predict(self, features, labels):
        """
        Using a set of features and labels, predicts the labels from the features,
        and returns the accuracy of predicted vs actual labels.
        :param features: An MxN matrix of features to use in prediction
        :param labels: An M row list of labels to test prediction accuracy on
        :return: Prediction accuracy, as a float between 0 and 1
        """
        label_train = self.labels_to_categorical(labels)
        labels = self.model.predict(features)
        accuracy = self.model.score(features, label_train)
        return accuracy

    def get_prediction(self, features):
        return self.model.predict(features)

    def reset(self):
        """
        Resets the trained weights / parameters to initial state
        :return:
        """

        pass

    def labels_to_categorical(self, labels):
        _, IDs = unique(labels, return_inverse=True)
        return IDs
Example #5
0
# create the sub models
estimators = []
model1 = LogisticRegression()
estimators.append(('logistic', model1))
# model2 = DecisionTreeClassifier()
# estimators.append(('cart', model2))
# model3 = SVC()
# estimators.append(('svm', model3))
# model4 = tree.DecisionTreeClassifier()
# estimators.append(('svm', model4))
# model5 = RandomForestClassifier(n_jobs=40)
# estimators.append(('svm', model5))
# model6 = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(1, 1), random_state=2)
# estimators.append(('svm', model6))
# create the ensemble model
ensemble = VotingClassifier(estimators)
results = model_selection.cross_val_score(ensemble,
                                          training,
                                          trainingTag,
                                          cv=kfold)
print(results.mean())
print results

# #----- logistic regression  Model  ------
lg = LogisticRegression()
lg = lg.fit(training, trainingTag)
predictedValues0 = lg.predict(test)
#
# #----- DecisionTree  Model  ------
# DecisionTree = tree.DecisionTreeClassifier()
# DecisionTree.fit(training,trainingTag)
    missing = np.random.binomial(1, .1, size=X.shape)
    X[missing] = np.nan
    X = DataFrame(X, columns=['x%d' % i for i in range(n)])
    return (dict(X=X, y=y), dict(X=X), dict(X=X))


def create_boston_housing():
    X, y = load_boston(return_X_y=True)
    X = DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    return (dict(X=X, y=y), dict(X=X), dict(X=X))


test_cases = [
    (VotingClassifier([('logistic', LogisticRegression()),
                       ('earth',
                        Pipeline([('earth', Earth()),
                                  ('logistic', LogisticRegression())]))],
                      'hard',
                      weights=[1.01, 1.01]), ['predict'],
     create_weird_classification_problem_1()),
    (GradientBoostingClassifier(max_depth=10,
                                n_estimators=10), ['predict_proba', 'predict'],
     create_weird_classification_problem_1()),
    (LogisticRegression(), ['predict_proba', 'predict'],
     create_weird_classification_problem_1()),
    (IsotonicRegression(out_of_bounds='clip'), ['predict'],
     create_isotonic_regression_problem_1()),
    (Earth(), ['predict', 'transform'], create_regression_problem_1()),
    (Earth(allow_missing=True), ['predict', 'transform'],
     create_regression_problem_with_missingness_1()),
    (ElasticNet(), ['predict'], create_regression_problem_1()),
    (ElasticNetCV(), ['predict'], create_regression_problem_1()),
Example #7
0
    Perceptron(max_iter=100, random_state=rng)).fit(X_train, y_train)

model_svc = SVC(probability=True, gamma='auto').fit(X_train, y_train)
model_bayes = GaussianNB().fit(X_train, y_train)
model_tree = DecisionTreeClassifier(random_state=rng).fit(X_train, y_train)
model_knn = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train)

pool_classifiers = [
    model_perceptron, model_svc, model_bayes, model_tree, model_knn
]

voting_classifiers = [("perceptron", model_perceptron), ("svc", model_svc),
                      ("bayes", model_bayes), ("tree", model_tree),
                      ("knn", model_knn)]

model_voting = VotingClassifier(estimators=voting_classifiers).fit(
    X_train, y_train)

# Initializing the DS techniques
knorau = KNORAU(pool_classifiers)
kne = KNORAE(pool_classifiers)
desp = DESP(pool_classifiers)
# DCS techniques
ola = OLA(pool_classifiers)
mcb = MCB(pool_classifiers)

# Fitting the techniques
knorau.fit(X_dsel, y_dsel)
kne.fit(X_dsel, y_dsel)
desp.fit(X_dsel, y_dsel)
ola.fit(X_dsel, y_dsel)
mcb.fit(X_dsel, y_dsel)
split3 = splits_cols[sz * 2:sz * 3]
split4 = splits_cols[sz * 3:sz * 4]
split5 = splits_cols[sz * 4:]

# create a pipeline meta-classifier (sklearn) and use a linearsvc at the end as the classifier
pipe1 = make_pipeline(ColumnSelector(cols=split1), LinearSVC())
pipe2 = make_pipeline(ColumnSelector(cols=split2), LinearSVC())
pipe3 = make_pipeline(ColumnSelector(cols=split3), LinearSVC())
pipe4 = make_pipeline(ColumnSelector(cols=split4), LinearSVC())
pipe5 = make_pipeline(ColumnSelector(cols=split5), LinearSVC())

# create the ensemble with the votingclassifier
cls = VotingClassifier([
    ('l1', pipe1),
    ('l2', pipe2),
    ('l3', pipe3),
    ('l4', pipe4),
    ('l5', pipe5),
],
                       n_jobs=4)
cls.fit(cars_train_X, cars_train_y)

# uncomment the 3 lines below if needed to see the accuracy and std-dev of the training set
# scores = cross_val_score(cls, cars_train_X, cars_train_y, cv=5, verbose=True)
# print(scores)
# print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
# this reaches about 30% acc

# create the predictions and dump to a file for plotting the heatmap
y_pred = cls.predict(cars_test_X)

with open('5subset_linearsvm_voting.sav', 'wb') as f:
Example #9
0
 def _collect_probas(self, x):
     if (self._proba_cache is None) or (self._x_cache is None) or \
             (not np.asarray(list(self._x_cache) == list(x)).all()):
         self._proba_cache = VotingClassifier._collect_probas(self, x)
         self._x_cache = x
     return self._proba_cache