예제 #1
0
    def fit(self, X, y, sample_weight=None):
        import eval
        preds = [[] for _ in self.sub_dags]

        for train_idx, test_idx in cross_validation.StratifiedKFold(y,
                                                                    n_folds=5):
            tr_X, tr_y = X.iloc[train_idx], y.iloc[train_idx]
            tst_X, tst_y = X.iloc[test_idx], y.iloc[test_idx]
            wf_init = Workflow(self.initial_dag)
            wf_init.fit(tr_X, tr_y, sample_weight=sample_weight)
            preproc_X, preproc_y = eval.test_dag(self.initial_dag,
                                                 wf_init.models,
                                                 test_data=(tr_X, tr_y),
                                                 output='all')
            pp_tst_X = wf_init.transform(tst_X)
            if pp_tst_X.empty:
                continue
            for i, dag in enumerate(self.sub_dags):
                wf = Workflow(dag)
                wf.fit(preproc_X, preproc_y)
                res = wf.predict(pp_tst_X)
                preds[i].append(pd.DataFrame(res, index=pp_tst_X.index))

        preds = [pd.concat(ps) for ps in preds]

        self.train = pd.concat(preds, axis=1)
        self.train.columns = ['p' + str(x) for x in range(len(preds))]

        return self
예제 #2
0
    def fit(self, X, y, sample_weight=None):
        import eval
        preds = [[] for _ in self.sub_dags]

        for train_idx, test_idx in cross_validation.StratifiedKFold(y, n_folds=5):
            tr_X, tr_y = X.iloc[train_idx], y.iloc[train_idx]
            tst_X, tst_y = X.iloc[test_idx], y.iloc[test_idx]
            wf_init = Workflow(self.initial_dag)
            wf_init.fit(tr_X, tr_y, sample_weight=sample_weight)
            preproc_X, preproc_y = eval.test_dag(self.initial_dag, wf_init.models, test_data=(tr_X, tr_y), output='all')
            pp_tst_X = wf_init.transform(tst_X)
            if pp_tst_X.empty:
                continue
            for i, dag in enumerate(self.sub_dags):
                wf = Workflow(dag)
                wf.fit(preproc_X, preproc_y)
                res = wf.predict(pp_tst_X)
                preds[i].append(pd.DataFrame(res, index=pp_tst_X.index))

        preds = [pd.concat(ps) for ps in preds]

        self.train = pd.concat(preds, axis=1)
        self.train.columns = ['p' + str(x) for x in range(len(preds))]

        return self
예제 #3
0
 def transform(self, X):
     import eval
     return eval.test_dag(self.dag,
                          self.models,
                          test_data=(X, None),
                          output='feats_only')
예제 #4
0
 def predict(self, X):
     import eval  #TODO: Refactor to remove circular imports
     return np.array(
         eval.test_dag(self.dag, self.models, test_data=(X, None)))
예제 #5
0
 def transform(self, X):
     import eval
     return eval.test_dag(self.dag, self.models, test_data=(X, None), output='feats_only')
예제 #6
0
 def predict(self, X):
     import eval  #TODO: Refactor to remove circular imports
     return np.array(eval.test_dag(self.dag, self.models, test_data=(X, None)))