def test_concatenate(self): A = None B = np.array([[1, 2], [3, 4]]) np.testing.assert_equal(numpy_c_concatenate(A, B), B) A = np.array([[0], [1]]) np.testing.assert_equal(numpy_c_concatenate(A, B), [[0, 1, 2], [1, 3, 4]])
def _make_blend_test(self, xs_test, index=None): """Make blend sample for test. Parameters ---------- xs_test : array-like or sparse matrix of shape = [n_samples, n_features] The input samples. Returns ------- blend_test : array of shape = [n_samples, n_stage0_models]. Calc as the mean of the predictions of the cross validation set. """ blend_test = None for clfs in self.all_learner.values(): blend_test_j = None for clf in clfs: blend_test_j_temp = self._get_child_predict(clf, xs_test, index) if blend_test_j is None: blend_test_j = blend_test_j_temp else: blend_test_j += blend_test_j_temp blend_test_j = blend_test_j / len(clfs) #convert to mean blend_test = numpy_c_concatenate(blend_test, blend_test_j) return blend_test
def _make_blend_test(self, xs_test, index=None): """Make blend sample for test. Parameters ---------- xs_test : array-like or sparse matrix of shape = [n_samples, n_features] The input samples. Returns ------- blend_test : array of shape = [n_samples, n_stage0_models]. Calc as the mean of the predictions of the cross validation set. """ blend_test = None for clfs in self.all_learner.values(): blend_test_j = None for clf in clfs: blend_test_j_temp = self._get_child_predict( clf, xs_test, index) if blend_test_j is None: blend_test_j = blend_test_j_temp else: blend_test_j += blend_test_j_temp blend_test_j = blend_test_j / len(clfs) #convert to mean blend_test = numpy_c_concatenate(blend_test, blend_test_j) return blend_test
def _fit_child(self, skf, xs_train, y_train): """Build stage0 models from the training set (xs_train, y_train). Parameters ---------- skf: StratifiedKFold-like iterator Use for cross validation blending. xs_train : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. y_train : array-like, shape = [n_samples] The target values (class labels in classification). Returns ------- blend_train : array-like, shape = [n_samples] For stage1 model training. blend_test : array-like, shape = [n_samples] If you use TwoStageKFold, blended sample for test will be prepared. """ def _fit_clf(j, i, clf, train_index, cv_index, xs_bc, y_bc, stack_by_proba): all_learner_key = str(type(clf)) + str(j) xs = xs_bc.value y = y_bc.value xs_now_train = xs[train_index] y_now_train = y[train_index] xs_cv = xs[cv_index] clf.fit(xs_now_train, y_now_train) blend_train_j_i = BaseStacked._get_child_predict(clf, xs_cv, stack_by_proba, False, cv_index) return j, all_learner_key, i, cv_index, clf, blend_train_j_i tasks = list(product(enumerate(self.clfs), enumerate(skf))) tasks_rdd = self.sc.parallelize(tasks) xs_train_bc = self.sc.broadcast(xs_train) y_train_bc = self.sc.broadcast(y_train) stack_by_proba = self.stack_by_proba results = tasks_rdd.map(lambda tuple: _fit_clf(tuple[0][0], tuple[1][0], clone(tuple[0][1]), tuple[1][1][0], tuple[1][1][1], xs_train_bc, y_train_bc, stack_by_proba)).collect() results = sorted(results, key=lambda x: (x[0], x[2])) blend_train = None blend_test = None for (j, all_learner_key), cvs in groupby(results, lambda x: (x[0], x[1])): blend_train_j = None self.all_learner[all_learner_key] = [] for (_, _, i, cv_index, clf, blend_train_j_i) in cvs: if blend_train_j is None: blend_train_j = self._get_blend_init(y_train, clf) blend_train_j[cv_index] = blend_train_j_i self.all_learner[all_learner_key].append(clf) blend_train = util.numpy_c_concatenate(blend_train, blend_train_j) return blend_train, blend_test
def _fit_child(self, skf, xs_train, y_train): """Build stage0 models from the training set (xs_train, y_train). Parameters ---------- skf: StratifiedKFold-like iterator Use for cross validation blending. xs_train : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. y_train : array-like, shape = [n_samples] The target values (class labels in classification). Returns ------- blend_train : array-like, shape = [n_samples] For stage1 model training. blend_test : array-like, shape = [n_samples] If you use TwoStageKFold, blended sample for test will be prepared. """ blend_train = None blend_test = None for j, clf in enumerate(self.clfs): self._out_to_console('Training classifier [{0}]'.format(j), 0) all_learner_key = str(type(clf)) + str(j) self.all_learner[all_learner_key] = [] blend_train_j = None for i, (train_index, cv_index) in enumerate(skf): now_learner = clone(clf) self.all_learner[all_learner_key].append(now_learner) xs_now_train = xs_train[train_index] y_now_train = y_train[train_index] xs_cv = xs_train[cv_index] #y_cv = y_train[cv_index] no use if not hasattr(now_learner, 'id'): now_learner.id = self.get_stage0_id(now_learner) dump_file = util.get_cache_file(now_learner.id, cv_index, suffix='pkl') if self.save_stage0 and self._is_saved(now_learner, cv_index): print('Prediction cache exists: skip fitting.') now_learner = joblib.load(dump_file) self.all_learner[all_learner_key][-1] = now_learner else: self._out_to_console('Fold [{0}]'.format(i), 0) now_learner.fit(xs_now_train, y_now_train) if self.save_stage0: joblib.dump(now_learner, dump_file, compress=True) if blend_train_j is None: blend_train_j = self._get_blend_init(y_train, now_learner) blend_train_j[cv_index] = self._get_child_predict( now_learner, xs_cv, cv_index) blend_train = numpy_c_concatenate(blend_train, blend_train_j) return blend_train, blend_test
def _fit_child(self, skf, xs_train, y_train): """Build stage0 models from the training set (xs_train, y_train). Parameters ---------- skf: StratifiedKFold-like iterator Use for cross validation blending. xs_train : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. y_train : array-like, shape = [n_samples] The target values (class labels in classification). Returns ------- blend_train : array-like, shape = [n_samples] For stage1 model training. blend_test : array-like, shape = [n_samples] If you use TwoStageKFold, blended sample for test will be prepared. """ blend_train = None blend_test = None for j, clf in enumerate(self.clfs): self._out_to_console('Training classifier [{0}]'.format(j), 0) all_learner_key = str(type(clf)) + str(j) self.all_learner[all_learner_key] = [] blend_train_j = None for i, (train_index, cv_index) in enumerate(skf): now_learner = clone(clf) self.all_learner[all_learner_key].append(now_learner) xs_now_train = xs_train[train_index] y_now_train = y_train[train_index] xs_cv = xs_train[cv_index] #y_cv = y_train[cv_index] no use if not hasattr(now_learner, 'id'): now_learner.id = self.get_stage0_id(now_learner) dump_file = util.get_cache_file(now_learner.id, cv_index, suffix='pkl') if self.save_stage0 and self._is_saved(now_learner, cv_index): print('Prediction cache exists: skip fitting.') now_learner = joblib.load(dump_file) self.all_learner[all_learner_key][-1] = now_learner else: self._out_to_console('Fold [{0}]'.format(i), 0) now_learner.fit(xs_now_train, y_now_train) if self.save_stage0: joblib.dump(now_learner, dump_file, compress=True) if blend_train_j is None: blend_train_j = self._get_blend_init(y_train, now_learner) blend_train_j[cv_index] = self._get_child_predict(now_learner, xs_cv, cv_index) blend_train = numpy_c_concatenate(blend_train, blend_train_j) return blend_train, blend_test
def test_concatenate(self): A = None B = np.array([[1,2],[3,4]]) np.testing.assert_equal(numpy_c_concatenate(A, B), B) A = np.array([[0], [1]]) np.testing.assert_equal(numpy_c_concatenate(A, B), [[0,1,2], [1,3,4]])
def _pre_propcess(self, blend, X): return numpy_c_concatenate(blend, X)