class VotingClassifierImpl(): def __init__(self, estimators=None, voting='hard', weights=None, n_jobs=None, flatten_transform=True): self._hyperparams = { 'estimators': estimators, 'voting': voting, 'weights': weights, 'n_jobs': n_jobs, 'flatten_transform': flatten_transform } self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X) def predict(self, X): return self._wrapped_model.predict(X) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X)
def voting_process(df_list, label_list, scale=False): random_state = np.random.RandomState(20180213) vt_results = { 'prediction': [], 'probaility': [], 'y_test': [], 'y_score': [] } try: if scale: df_list = [scale_df(df) for df in df_list] print('DF Scaling successful.') except: raise ValueError('Failed to execute DF Scaling.') for x, y in zip(df_list, label_list): try: x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=.2, random_state=random_state) except: raise ValueError('Train/Test split failed.') vt = VotingClassifier(estimators=[ ('basic_log', LogisticRegression()), ('et', ExtraTreesClassifier()), ('ada', AdaBoostClassifier()), ('rf', RandomForestClassifier()), ('gbm', GradientBoostingClassifier(n_estimators=100, max_depth=5, learning_rate=0.1)) ], voting='soft') weighting = lambda x: 1 if x else 50 vt.fit(x_train, y_train, sample_weight=[weighting(i) for i in y_train]) vt_results['y_test'].append(y_test) vt_results['prediction'].append(vt.predict(x_test)) vt_results['probaility'].append(vt.predict_proba(x_test)[::, 1]) try: vt_results['y_score'].append(vt.decision_function(x_test)) except: vt_results['y_score'].append(vt.predict_proba(x_test)[::, 1]) return vt_results