コード例 #1
0
ファイル: cascade_forest.py プロジェクト: surfing1231/deep-rf
    def fit(self, feats, labels):
        self._is_fitted = False
        self._stacking_model, _, curr_acc = common_utils.get_class_distribution(
            feats=feats,
            labels=labels,
            model=self._stacking_model,
            num_all_classes=self.classes_.shape[0],
            k_cv=self.k_cv)

        print("Final layer average accuracy: %.5f..." % curr_acc)
        self._is_fitted = True
コード例 #2
0
ファイル: cascade_forest.py プロジェクト: surfing1231/deep-rf
    def fit_predict(self, train_feats, train_labels, test_feats):
        # designed not to save the trained model

        model, _, curr_acc = common_utils.get_class_distribution(
            feats=train_feats,
            labels=train_labels,
            model=self._stacking_model,
            num_all_classes=self.classes_.shape[0],
            k_cv=self.k_cv)

        proba_preds = np.zeros((test_feats.shape[0], self.classes_.shape[0]))
        class_indices = model.classes_
        proba_preds[:, class_indices] = model.predict_proba(test_feats)

        return self.classes_[np.argmax(proba_preds, axis=1)]
コード例 #3
0
ファイル: cascade_forest.py プロジェクト: surfing1231/deep-rf
    def fit_transform(self, train_feats, train_labels, test_feats):
        train_feats_crf, train_feats_rf = [], []
        test_feats_crf, test_feats_rf = [], []
        train_feats_rsf, test_feats_rsf = [], []
        train_feats_xonf, test_feats_xonf = [], []

        all_train, all_test = None, None
        layer_acc = 0.0
        print("Training cascade layer...")

        for idx_crf in range(self.n_crf):
            print("Training CRF#%d..." % idx_crf)
            curr_model = ExtraTreesClassifier(
                n_estimators=self.n_estimators_crf, max_features=1, n_jobs=-1)
            curr_model, curr_train_feats, curr_acc = common_utils.get_class_distribution(
                feats=train_feats,
                labels=train_labels,
                model=curr_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            curr_test_feats = np.zeros(
                (test_feats.shape[0], self.classes_.shape[0]))
            class_indices = curr_model.classes_
            curr_test_feats[:, class_indices] = curr_model.predict_proba(
                test_feats)

            layer_acc += curr_acc

            train_feats_crf.append(curr_train_feats)
            test_feats_crf.append(curr_test_feats)

        if self.n_crf > 0:
            train_feats_crf = np.hstack(train_feats_crf)
            test_feats_crf = np.hstack(test_feats_crf)

            all_train = train_feats_crf
            all_test = test_feats_crf

        for idx_rf in range(self.n_rf):
            print("Training RF#%d..." % idx_rf)
            curr_model = RandomForestClassifier(
                n_estimators=self.n_estimators_rf, n_jobs=-1)
            curr_model, curr_train_feats, curr_acc = common_utils.get_class_distribution(
                feats=train_feats,
                labels=train_labels,
                model=curr_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            curr_test_feats = np.zeros(
                (test_feats.shape[0], self.classes_.shape[0]))
            class_indices = curr_model.classes_
            curr_test_feats[:, class_indices] = curr_model.predict_proba(
                test_feats)

            layer_acc += curr_acc
            train_feats_rf.append(curr_train_feats)
            test_feats_rf.append(curr_test_feats)

        if self.n_rf > 0:
            train_feats_rf = np.hstack(train_feats_rf)
            test_feats_rf = np.hstack(test_feats_rf)

            if all_train is None:
                all_train = train_feats_rf
                all_test = test_feats_rf
            else:
                all_train = np.hstack((all_train, train_feats_rf))
                all_test = np.hstack((all_test, test_feats_rf))

        for idx_rsf in range(self.n_rsf):
            print("Training RSF#%d..." % idx_rsf)
            curr_model = RandomSubspaceForest(
                n_estimators=self.n_estimators_rsf,
                n_features="sqrt",
                n_jobs=-1)

            curr_model, curr_train_feats, curr_acc = common_utils.get_class_distribution(
                feats=train_feats,
                labels=train_labels,
                model=curr_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            curr_test_feats = np.zeros(
                (test_feats.shape[0], self.classes_.shape[0]))
            class_indices = curr_model.classes_
            curr_test_feats[:, class_indices] = curr_model.predict_proba(
                test_feats)

            layer_acc += curr_acc
            train_feats_rsf.append(curr_train_feats)
            test_feats_rsf.append(curr_test_feats)

        if self.n_rsf > 0:
            train_feats_rsf = np.hstack(train_feats_rsf)
            test_feats_rsf = np.hstack(test_feats_rsf)

            if all_train is None:
                all_train = train_feats_rsf
                all_test = test_feats_rsf
            else:
                all_train = np.hstack((all_train, train_feats_rsf))
                all_test = np.hstack((all_test, test_feats_rsf))

        for idx_xonf in range(self.n_xonf):
            print("Training XoNF#%d..." % idx_xonf)
            # TODO: `sample_size`, `max_features` parameters (maybe)
            curr_model = RandomXOfNForest(n_estimators=self.n_estimators_xonf,
                                          sample_size=0.05,
                                          n_jobs=-1)
            curr_model, curr_train_feats, curr_acc = common_utils.get_class_distribution(
                feats=train_feats,
                labels=train_labels,
                model=curr_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            curr_test_feats = np.zeros(
                (test_feats.shape[0], self.classes_.shape[0]))
            class_indices = curr_model.classes_
            curr_test_feats[:, class_indices] = curr_model.predict_proba(
                test_feats)

            layer_acc += curr_acc
            train_feats_xonf.append(curr_train_feats)
            test_feats_xonf.append(curr_test_feats)

        if self.n_xonf > 0:
            train_feats_xonf = np.hstack(train_feats_xonf)
            test_feats_xonf = np.hstack(test_feats_xonf)

            if all_train is None:
                all_train = train_feats_xonf
                all_test = test_feats_xonf
            else:
                all_train = np.hstack((all_train, train_feats_xonf))
                all_test = np.hstack((all_test, test_feats_xonf))

        if all_train is None:
            raise Exception("No models were specified for this layer!")

        layer_acc /= (self.n_rf + self.n_crf + self.n_rsf + self.n_xonf)
        self.kfold_acc = layer_acc
        print("Average LAYER accuracy is %f..." % self.kfold_acc)
        print("-------------------------------")

        return all_train, all_test
コード例 #4
0
ファイル: cascade_forest.py プロジェクト: surfing1231/deep-rf
    def train_layer(self, feats, labels):
        """
            This method is currently not the main focus because caching is not yet implemented - `fit_transform(...)`
            is therefore better suited, as it does not keep/save models in memory and does fitting and predicting
            "simultaneously".

            tl;dr: use fit_transform(...) instead at the moment.
        """

        feats_crf, feats_rf, feats_rsf, feats_xonf = [], [], [], []
        all_train = None

        layer_acc = 0.0

        # train completely random forests
        for idx_crf in range(self.n_crf):
            crf_model = ExtraTreesClassifier(
                n_estimators=self.n_estimators_crf, max_features=1, n_jobs=-1)
            curr_model, curr_feats, curr_acc = common_utils.get_class_distribution(
                feats=feats,
                labels=labels,
                model=crf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            layer_acc += curr_acc

            if self.keep_models:
                self.crf_estimators.append(curr_model)
            feats_crf.append(curr_feats)

        if self.n_crf > 0:
            feats_crf = np.hstack(feats_crf)
            all_train = feats_crf

        # train random forests
        for idx_rf in range(self.n_rf):
            rf_model = RandomForestClassifier(
                n_estimators=self.n_estimators_rf, n_jobs=-1)
            curr_model, curr_feats, curr_acc = common_utils.get_class_distribution(
                feats=feats,
                labels=labels,
                model=rf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            layer_acc += curr_acc

            if self.keep_models:
                self.rf_estimators.append(curr_model)
            feats_rf.append(curr_feats)

        if self.n_rf > 0:
            feats_rf = np.hstack(feats_rf)
            all_train = feats_rf if all_train is None else np.hstack(
                (all_train, feats_rf))

        # train random subspace forests
        for idx_rsf in range(self.n_rsf):
            rsf_model = RandomSubspaceForest(
                n_estimators=self.n_estimators_rsf,
                n_features="sqrt",
                n_jobs=-1)
            curr_model, curr_feats, curr_acc = common_utils.get_class_distribution(
                feats=feats,
                labels=labels,
                model=rsf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            layer_acc += curr_acc

            if self.keep_models:
                self.rsf_estimators.append(rsf_model)
            feats_rsf.append(curr_feats)

        if self.n_rsf > 0:
            feats_rsf = np.hstack(feats_rsf)
            all_train = feats_rsf if all_train is None else np.hstack(
                (all_train, feats_rsf))

        # train random X-of-N forests
        for idx_xonf in range(self.n_xonf):
            xonf_model = RandomXOfNForest(n_estimators=self.n_estimators_xonf,
                                          sample_size=0.05,
                                          n_jobs=-1)

            curr_model, curr_feats, curr_acc = common_utils.get_class_distribution(
                feats=feats,
                labels=labels,
                model=xonf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            layer_acc += curr_acc

            if self.keep_models:
                self.xonf_estimators.append(xonf_model)
            feats_xonf.append(curr_feats)

        if self.n_xonf > 0:
            feats_xonf = np.hstack(feats_xonf)
            all_train = feats_xonf if all_train is None else np.hstack(
                (all_train, feats_xonf))

        layer_acc /= (self.n_rf + self.n_crf + self.n_rsf + self.n_xonf)
        self.kfold_acc = layer_acc
        print("Average LAYER accuracy is %f..." % self.kfold_acc)

        return all_train
コード例 #5
0
    def fit_transform(self, train_feats, train_labels, test_feats):
        sliced_train = self.slice_data(train_feats)
        sliced_test = self.slice_data(test_feats)

        print(
            "Successfully sliced TRAINING data for window size %s and stride %s ----> shape of slices: %s..."
            % (str(self.wind_size), str(self.stride), str(sliced_train.shape)))
        print(
            "Successfully sliced TEST data for window size %s and stride %s ----> shape of slices: %s..."
            % (str(self.wind_size), str(self.stride), str(sliced_test.shape)))

        # because labels do not get appended to sliced features in slice_data, it is done here
        multiply_factor = int(sliced_train.shape[0] / train_feats.shape[0])
        train_labels = np.tile(np.reshape(train_labels, [-1, 1]),
                               (1, multiply_factor)).flatten()

        feats_crf_train, feats_crf_test = [], []
        feats_rf_train, feats_rf_test = [], []
        feats_rsf_train, feats_rsf_test = [], []
        feats_xonf_train, feats_xonf_test = [], []

        all_train, all_test = None, None

        layer_acc = 0.0

        for idx_crf in range(self.n_crf):
            print("Training CRF#%d..." % idx_crf)
            crf_model = ExtraTreesClassifier(
                n_estimators=self.n_estimators_crf,
                max_features=1,
                min_samples_leaf=10,
                max_depth=100,
                n_jobs=-1)

            # fit
            crf_model, curr_train_feats, curr_acc = common_utils.get_class_distribution(
                feats=sliced_train,
                labels=train_labels,
                model=crf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            # predict
            curr_test_feats = np.zeros(
                (sliced_test.shape[0], self.classes_.shape[0]))
            class_indices = crf_model.classes_
            curr_test_feats[:, class_indices] = crf_model.predict_proba(
                sliced_test)

            # combine probabilities for slices of same example together
            feats_crf_train.append(
                curr_train_feats.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))
            feats_crf_test.append(
                curr_test_feats.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))

            layer_acc += curr_acc

        if self.n_crf > 0:
            feats_crf_train = np.hstack(feats_crf_train)
            feats_crf_test = np.hstack(feats_crf_test)

            all_train = feats_crf_train
            all_test = feats_crf_test

        for idx_rf in range(self.n_rf):
            print("Training RF#%d..." % idx_rf)
            rf_model = RandomForestClassifier(
                n_estimators=self.n_estimators_rf,
                min_samples_leaf=10,
                max_depth=100,
                n_jobs=-1)

            # fit
            rf_model, curr_train_feats, curr_acc = common_utils.get_class_distribution(
                feats=sliced_train,
                labels=train_labels,
                model=rf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            # predict
            curr_test_feats = np.zeros(
                (sliced_test.shape[0], self.classes_.shape[0]))
            class_indices = rf_model.classes_
            curr_test_feats[:, class_indices] = rf_model.predict_proba(
                sliced_test)

            # combine probabilities for slices of same examples together
            feats_rf_train.append(
                curr_train_feats.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))
            feats_rf_test.append(
                curr_test_feats.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))

            layer_acc += curr_acc

        if self.n_rf > 0:
            feats_rf_train = np.hstack(feats_rf_train)
            feats_rf_test = np.hstack(feats_rf_test)

            if all_train is None:
                all_train = feats_rf_train
                all_test = feats_rf_test
            else:
                all_train = np.hstack((all_train, feats_rf_train))
                all_test = np.hstack((all_test, feats_rf_test))

        for idx_rsf in range(self.n_rsf):
            print("Training RSF#%d..." % idx_rsf)
            rsf_model = RandomSubspaceForest(
                n_estimators=self.n_estimators_rsf,
                min_samples_leaf=10,
                max_depth=100,
                n_features="sqrt",
                n_jobs=-1)

            # fit
            rsf_model, curr_train_feats, curr_acc = common_utils.get_class_distribution(
                feats=sliced_train,
                labels=train_labels,
                model=rsf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            # predict
            curr_test_feats = np.zeros(
                (sliced_test.shape[0], self.classes_.shape[0]))
            class_indices = rsf_model.classes_
            curr_test_feats[:, class_indices] = rsf_model.predict_proba(
                sliced_test)

            # combine probabilities for slices of same examples together
            feats_rsf_train.append(
                curr_train_feats.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))
            feats_rsf_test.append(
                curr_test_feats.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))

            layer_acc += curr_acc

        if self.n_rsf > 0:
            feats_rsf_train = np.hstack(feats_rsf_train)
            feats_rsf_test = np.hstack(feats_rsf_test)

            if all_train is None:
                all_train = feats_rsf_train
                all_test = feats_rsf_test
            else:
                all_train = np.hstack((all_train, feats_rsf_train))
                all_test = np.hstack((all_test, feats_rsf_test))

        for idx_xonf in range(self.n_xonf):
            print("Training XoNF#%d..." % idx_xonf)
            xonf_model = RandomXOfNForest(n_estimators=self.n_estimators_xonf,
                                          min_samples_leaf=10,
                                          max_depth=100,
                                          sample_size=0.05,
                                          n_jobs=-1)

            # fit
            xonf_model, curr_train_feats, curr_acc = common_utils.get_class_distribution(
                feats=sliced_train,
                labels=train_labels,
                model=xonf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            # predict
            curr_test_feats = np.zeros(
                (sliced_test.shape[0], self.classes_.shape[0]))
            class_indices = xonf_model.classes_
            curr_test_feats[:, class_indices] = xonf_model.predict_proba(
                sliced_test)

            # combine probabilities for slices of same examples together
            feats_xonf_train.append(
                curr_train_feats.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))
            feats_xonf_test.append(
                curr_test_feats.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))

            layer_acc += curr_acc

        if self.n_xonf > 0:
            feats_xonf_train = np.hstack(feats_xonf_train)
            feats_xonf_test = np.hstack(feats_xonf_test)

            if all_train is None:
                all_train = feats_xonf_train
                all_test = feats_xonf_test
            else:
                all_train = np.hstack((all_train, feats_xonf_train))
                all_test = np.hstack((all_test, feats_xonf_test))

        if all_train is None:
            raise Exception("No models were specified for this Grain!")

        layer_acc /= (self.n_rf + self.n_crf + self.n_rsf + self.n_xonf)
        self.kfold_acc = layer_acc
        print("Average LAYER accuracy is %f..." % self.kfold_acc)

        return all_train, all_test
コード例 #6
0
    def create(self, features, labels):
        # -----------------------------------------------------------------------------------------------
        # NOTE: preferably use fit_transform(...) instead (more thoroughly tested and less memory hungry)
        # -----------------------------------------------------------------------------------------------
        # TODO (low priority): refactor
        sliced_data = self.slice_data(features)
        print(
            "Successfully sliced data for window size %s and stride %s ----> shape of slices: %s..."
            % (str(self.wind_size), str(self.stride), str(sliced_data.shape)))

        # because labels do not get appended to sliced features in slice_data, it is done here
        multiply_factor = int(sliced_data.shape[0] / features.shape[0])
        labels = np.tile(np.reshape(labels, [-1, 1]),
                         (1, multiply_factor)).flatten()

        feats_crf, feats_rf, feats_rsf, feats_xonf = [], [], [], []
        all_train = None
        layer_acc = 0.0

        for idx_crf in range(self.n_crf):
            crf_model = ExtraTreesClassifier(
                n_estimators=self.n_estimators_crf,
                max_features=1,
                min_samples_leaf=10,
                max_depth=100,
                n_jobs=-1)

            print("Training CRF#%d..." % idx_crf)
            crf_model, curr_proba_preds, curr_acc = common_utils.get_class_distribution(
                feats=sliced_data,
                labels=labels,
                model=crf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            layer_acc += curr_acc

            # combine predictions for slices of same example together
            feats_crf.append(
                curr_proba_preds.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))
            # save trained model
            self.crf_estimators.append(crf_model)

        if self.n_crf > 0:
            feats_crf = np.hstack(feats_crf)
            all_train = feats_crf

        for idx_rf in range(self.n_rf):
            rf_model = RandomForestClassifier(
                n_estimators=self.n_estimators_rf,
                min_samples_leaf=10,
                max_depth=100,
                n_jobs=-1)

            print("Training RF#%d..." % idx_rf)
            rf_model, curr_proba_preds, curr_acc = common_utils.get_class_distribution(
                feats=sliced_data,
                labels=labels,
                model=rf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            layer_acc += curr_acc

            # combine predictions for slices of same example together
            feats_rf.append(
                curr_proba_preds.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))
            # save trained model
            self.rf_estimators.append(rf_model)

        if self.n_rf > 0:
            feats_rf = np.hstack(feats_rf)
            all_train = feats_rf if all_train is None else np.hstack(
                (all_train, feats_rf))

        for idx_rsf in range(self.n_rsf):
            rsf_model = RandomSubspaceForest(
                n_estimators=self.n_estimators_rsf,
                min_samples_leaf=10,
                max_depth=100,
                n_features="sqrt",
                n_jobs=-1)

            print("Training RSF#%d..." % idx_rsf)
            rsf_model, curr_proba_preds, curr_acc = common_utils.get_class_distribution(
                feats=sliced_data,
                labels=labels,
                model=rsf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            layer_acc += curr_acc
            # combine predictions for slices of same example together
            feats_rsf.append(
                curr_proba_preds.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))
            # save trained model
            self.rsf_estimators.append(rsf_model)

        if self.n_rsf > 0:
            feats_rsf = np.hstack(feats_rsf)
            all_train = feats_rsf if all_train is None else np.hstack(
                (all_train, feats_rsf))

        for idx_xonf in range(self.n_xonf):
            xonf_model = RandomXOfNForest(n_estimators=self.n_estimators_xonf,
                                          min_samples_leaf=10,
                                          max_depth=100,
                                          sample_size=0.05,
                                          n_jobs=-1)

            print("Training XoNF#%d..." % idx_xonf)
            xonf_model, curr_proba_preds, curr_acc = common_utils.get_class_distribution(
                feats=sliced_data,
                labels=labels,
                model=xonf_model,
                num_all_classes=self.classes_.shape[0],
                k_cv=self.k_cv)

            layer_acc += curr_acc
            # combine predictions for slices of same example together
            feats_xonf.append(
                curr_proba_preds.reshape(
                    [-1, multiply_factor * self.classes_.shape[0]]))
            # save trained model
            self.xonf_estimators.append(xonf_model)

        if self.n_xonf > 0:
            feats_xonf = np.hstack(feats_xonf)
            all_train = feats_xonf if all_train is None else np.hstack(
                (all_train, feats_xonf))

        if all_train is None:
            raise Exception("No models were specified for this Grain!")

        layer_acc /= (self.n_rf + self.n_crf + self.n_rsf + self.n_xonf)
        self.kfold_acc = layer_acc
        print("Average LAYER accuracy is %f..." % self.kfold_acc)

        return all_train