Example #1
0
class VotingClassifierImpl():
    def __init__(self,
                 estimators=None,
                 voting='hard',
                 weights=None,
                 n_jobs=None,
                 flatten_transform=True):
        self._hyperparams = {
            'estimators': estimators,
            'voting': voting,
            'weights': weights,
            'n_jobs': n_jobs,
            'flatten_transform': flatten_transform
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)
def voting_process(df_list, label_list, scale=False):
    random_state = np.random.RandomState(20180213)
    vt_results = {
        'prediction': [],
        'probaility': [],
        'y_test': [],
        'y_score': []
    }
    try:
        if scale:
            df_list = [scale_df(df) for df in df_list]
            print('DF Scaling successful.')
    except:
        raise ValueError('Failed to execute DF Scaling.')

    for x, y in zip(df_list, label_list):

        try:
            x_train, x_test, y_train, y_test = train_test_split(
                x, y, test_size=.2, random_state=random_state)
        except:
            raise ValueError('Train/Test split failed.')
        vt = VotingClassifier(estimators=[
            ('basic_log', LogisticRegression()),
            ('et', ExtraTreesClassifier()), ('ada', AdaBoostClassifier()),
            ('rf', RandomForestClassifier()),
            ('gbm',
             GradientBoostingClassifier(n_estimators=100,
                                        max_depth=5,
                                        learning_rate=0.1))
        ],
                              voting='soft')
        weighting = lambda x: 1 if x else 50
        vt.fit(x_train, y_train, sample_weight=[weighting(i) for i in y_train])

        vt_results['y_test'].append(y_test)
        vt_results['prediction'].append(vt.predict(x_test))
        vt_results['probaility'].append(vt.predict_proba(x_test)[::, 1])
        try:
            vt_results['y_score'].append(vt.decision_function(x_test))
        except:
            vt_results['y_score'].append(vt.predict_proba(x_test)[::, 1])
    return vt_results