예제 #1
0
 def test_concatenate(self):
     A = None
     B = np.array([[1, 2], [3, 4]])
     np.testing.assert_equal(numpy_c_concatenate(A, B), B)
     A = np.array([[0], [1]])
     np.testing.assert_equal(numpy_c_concatenate(A, B),
                             [[0, 1, 2], [1, 3, 4]])
예제 #2
0
    def _make_blend_test(self, xs_test, index=None):
        """Make blend sample for test.

        Parameters
        ----------
        xs_test : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        blend_test : array of shape = [n_samples, n_stage0_models].
            Calc as the mean of the predictions of the cross validation set.
        """
        blend_test = None
        for clfs in self.all_learner.values():
            blend_test_j = None
            for clf in clfs:
                blend_test_j_temp = self._get_child_predict(clf, xs_test, index)
                if blend_test_j is None:
                    blend_test_j = blend_test_j_temp
                else:
                    blend_test_j += blend_test_j_temp
            blend_test_j = blend_test_j / len(clfs) #convert to mean
            blend_test = numpy_c_concatenate(blend_test, blend_test_j)
        return blend_test
예제 #3
0
    def _make_blend_test(self, xs_test, index=None):
        """Make blend sample for test.

        Parameters
        ----------
        xs_test : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        blend_test : array of shape = [n_samples, n_stage0_models].
            Calc as the mean of the predictions of the cross validation set.
        """
        blend_test = None
        for clfs in self.all_learner.values():
            blend_test_j = None
            for clf in clfs:
                blend_test_j_temp = self._get_child_predict(
                    clf, xs_test, index)
                if blend_test_j is None:
                    blend_test_j = blend_test_j_temp
                else:
                    blend_test_j += blend_test_j_temp
            blend_test_j = blend_test_j / len(clfs)  #convert to mean
            blend_test = numpy_c_concatenate(blend_test, blend_test_j)
        return blend_test
예제 #4
0
    def _fit_child(self, skf, xs_train, y_train):
        """Build stage0 models from the training set (xs_train, y_train).
        Parameters
        ----------
        skf: StratifiedKFold-like iterator
            Use for cross validation blending.
        xs_train : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.
        y_train : array-like, shape = [n_samples]
            The target values (class labels in classification).
        Returns
        -------
        blend_train : array-like, shape = [n_samples]
            For stage1 model training.
        blend_test : array-like, shape = [n_samples]
            If you use TwoStageKFold, blended sample for test will be prepared.
        """

        def _fit_clf(j, i, clf, train_index, cv_index, xs_bc, y_bc, stack_by_proba):
            all_learner_key = str(type(clf)) + str(j)
            xs = xs_bc.value
            y = y_bc.value

            xs_now_train = xs[train_index]
            y_now_train = y[train_index]
            xs_cv = xs[cv_index]

            clf.fit(xs_now_train, y_now_train)

            blend_train_j_i = BaseStacked._get_child_predict(clf, xs_cv, stack_by_proba, False, cv_index)

            return j, all_learner_key, i, cv_index, clf, blend_train_j_i

        tasks = list(product(enumerate(self.clfs), enumerate(skf)))

        tasks_rdd = self.sc.parallelize(tasks)

        xs_train_bc = self.sc.broadcast(xs_train)
        y_train_bc = self.sc.broadcast(y_train)

        stack_by_proba = self.stack_by_proba
        results = tasks_rdd.map(lambda tuple: _fit_clf(tuple[0][0], tuple[1][0], clone(tuple[0][1]), tuple[1][1][0], tuple[1][1][1], xs_train_bc, y_train_bc, stack_by_proba)).collect()

        results = sorted(results, key=lambda x: (x[0], x[2]))

        blend_train = None
        blend_test = None

        for (j, all_learner_key), cvs in groupby(results, lambda x: (x[0], x[1])):
            blend_train_j = None
            self.all_learner[all_learner_key] = []
            for (_, _, i, cv_index, clf, blend_train_j_i) in cvs:
                if blend_train_j is None:
                    blend_train_j = self._get_blend_init(y_train, clf)
                blend_train_j[cv_index] = blend_train_j_i
                self.all_learner[all_learner_key].append(clf)

            blend_train = util.numpy_c_concatenate(blend_train, blend_train_j)

        return blend_train, blend_test
예제 #5
0
    def _fit_child(self, skf, xs_train, y_train):
        """Build stage0 models from the training set (xs_train, y_train).

        Parameters
        ----------
        skf: StratifiedKFold-like iterator
            Use for cross validation blending.

        xs_train : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.

        y_train : array-like, shape = [n_samples]
            The target values (class labels in classification).

        Returns
        -------
        blend_train : array-like, shape = [n_samples]
            For stage1 model training.
        blend_test : array-like, shape = [n_samples]
            If you use TwoStageKFold, blended sample for test will be prepared.
        """
        blend_train = None
        blend_test = None
        for j, clf in enumerate(self.clfs):
            self._out_to_console('Training classifier [{0}]'.format(j), 0)
            all_learner_key = str(type(clf)) + str(j)
            self.all_learner[all_learner_key] = []
            blend_train_j = None
            for i, (train_index, cv_index) in enumerate(skf):
                now_learner = clone(clf)
                self.all_learner[all_learner_key].append(now_learner)
                xs_now_train = xs_train[train_index]
                y_now_train = y_train[train_index]
                xs_cv = xs_train[cv_index]
                #y_cv = y_train[cv_index] no use

                if not hasattr(now_learner, 'id'):
                    now_learner.id = self.get_stage0_id(now_learner)

                dump_file = util.get_cache_file(now_learner.id,
                                                cv_index,
                                                suffix='pkl')
                if self.save_stage0 and self._is_saved(now_learner, cv_index):
                    print('Prediction cache exists: skip fitting.')
                    now_learner = joblib.load(dump_file)
                    self.all_learner[all_learner_key][-1] = now_learner
                else:
                    self._out_to_console('Fold [{0}]'.format(i), 0)
                    now_learner.fit(xs_now_train, y_now_train)
                    if self.save_stage0:
                        joblib.dump(now_learner, dump_file, compress=True)

                if blend_train_j is None:
                    blend_train_j = self._get_blend_init(y_train, now_learner)
                blend_train_j[cv_index] = self._get_child_predict(
                    now_learner, xs_cv, cv_index)
            blend_train = numpy_c_concatenate(blend_train, blend_train_j)
        return blend_train, blend_test
예제 #6
0
    def _fit_child(self, skf, xs_train, y_train):
        """Build stage0 models from the training set (xs_train, y_train).

        Parameters
        ----------
        skf: StratifiedKFold-like iterator
            Use for cross validation blending.

        xs_train : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.

        y_train : array-like, shape = [n_samples]
            The target values (class labels in classification).

        Returns
        -------
        blend_train : array-like, shape = [n_samples]
            For stage1 model training.
        blend_test : array-like, shape = [n_samples]
            If you use TwoStageKFold, blended sample for test will be prepared.
        """
        blend_train = None
        blend_test = None
        for j, clf in enumerate(self.clfs):
            self._out_to_console('Training classifier [{0}]'.format(j), 0)
            all_learner_key = str(type(clf)) + str(j)
            self.all_learner[all_learner_key] = []
            blend_train_j = None
            for i, (train_index, cv_index) in enumerate(skf):
                now_learner = clone(clf)
                self.all_learner[all_learner_key].append(now_learner)
                xs_now_train = xs_train[train_index]
                y_now_train = y_train[train_index]
                xs_cv = xs_train[cv_index]
                #y_cv = y_train[cv_index] no use

                if not hasattr(now_learner, 'id'):
                    now_learner.id = self.get_stage0_id(now_learner)

                dump_file = util.get_cache_file(now_learner.id,
                                                cv_index,
                                                suffix='pkl')
                if self.save_stage0 and self._is_saved(now_learner, cv_index):
                    print('Prediction cache exists: skip fitting.')
                    now_learner = joblib.load(dump_file)
                    self.all_learner[all_learner_key][-1] = now_learner
                else:
                    self._out_to_console('Fold [{0}]'.format(i), 0)
                    now_learner.fit(xs_now_train, y_now_train)
                    if self.save_stage0:
                        joblib.dump(now_learner, dump_file, compress=True)

                if blend_train_j is None:
                    blend_train_j = self._get_blend_init(y_train, now_learner)
                blend_train_j[cv_index] = self._get_child_predict(now_learner, xs_cv, cv_index)
            blend_train = numpy_c_concatenate(blend_train, blend_train_j)
        return blend_train, blend_test
예제 #7
0
 def test_concatenate(self):
     A = None
     B = np.array([[1,2],[3,4]])
     np.testing.assert_equal(numpy_c_concatenate(A, B), B)
     A = np.array([[0], [1]])
     np.testing.assert_equal(numpy_c_concatenate(A, B), [[0,1,2], [1,3,4]])
예제 #8
0
 def _pre_propcess(self, blend, X):
     return numpy_c_concatenate(blend, X)