def fit(self, X, y, sample_weight=None): import eval preds = [[] for _ in self.sub_dags] for train_idx, test_idx in cross_validation.StratifiedKFold(y, n_folds=5): tr_X, tr_y = X.iloc[train_idx], y.iloc[train_idx] tst_X, tst_y = X.iloc[test_idx], y.iloc[test_idx] wf_init = Workflow(self.initial_dag) wf_init.fit(tr_X, tr_y, sample_weight=sample_weight) preproc_X, preproc_y = eval.test_dag(self.initial_dag, wf_init.models, test_data=(tr_X, tr_y), output='all') pp_tst_X = wf_init.transform(tst_X) if pp_tst_X.empty: continue for i, dag in enumerate(self.sub_dags): wf = Workflow(dag) wf.fit(preproc_X, preproc_y) res = wf.predict(pp_tst_X) preds[i].append(pd.DataFrame(res, index=pp_tst_X.index)) preds = [pd.concat(ps) for ps in preds] self.train = pd.concat(preds, axis=1) self.train.columns = ['p' + str(x) for x in range(len(preds))] return self
def transform(self, X): import eval return eval.test_dag(self.dag, self.models, test_data=(X, None), output='feats_only')
def predict(self, X): import eval #TODO: Refactor to remove circular imports return np.array( eval.test_dag(self.dag, self.models, test_data=(X, None)))
def predict(self, X): import eval #TODO: Refactor to remove circular imports return np.array(eval.test_dag(self.dag, self.models, test_data=(X, None)))