def fit(self, feats, labels): self._is_fitted = False self._stacking_model, _, curr_acc = common_utils.get_class_distribution( feats=feats, labels=labels, model=self._stacking_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) print("Final layer average accuracy: %.5f..." % curr_acc) self._is_fitted = True
def fit_predict(self, train_feats, train_labels, test_feats): # designed not to save the trained model model, _, curr_acc = common_utils.get_class_distribution( feats=train_feats, labels=train_labels, model=self._stacking_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) proba_preds = np.zeros((test_feats.shape[0], self.classes_.shape[0])) class_indices = model.classes_ proba_preds[:, class_indices] = model.predict_proba(test_feats) return self.classes_[np.argmax(proba_preds, axis=1)]
def fit_transform(self, train_feats, train_labels, test_feats): train_feats_crf, train_feats_rf = [], [] test_feats_crf, test_feats_rf = [], [] train_feats_rsf, test_feats_rsf = [], [] train_feats_xonf, test_feats_xonf = [], [] all_train, all_test = None, None layer_acc = 0.0 print("Training cascade layer...") for idx_crf in range(self.n_crf): print("Training CRF#%d..." % idx_crf) curr_model = ExtraTreesClassifier( n_estimators=self.n_estimators_crf, max_features=1, n_jobs=-1) curr_model, curr_train_feats, curr_acc = common_utils.get_class_distribution( feats=train_feats, labels=train_labels, model=curr_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) curr_test_feats = np.zeros( (test_feats.shape[0], self.classes_.shape[0])) class_indices = curr_model.classes_ curr_test_feats[:, class_indices] = curr_model.predict_proba( test_feats) layer_acc += curr_acc train_feats_crf.append(curr_train_feats) test_feats_crf.append(curr_test_feats) if self.n_crf > 0: train_feats_crf = np.hstack(train_feats_crf) test_feats_crf = np.hstack(test_feats_crf) all_train = train_feats_crf all_test = test_feats_crf for idx_rf in range(self.n_rf): print("Training RF#%d..." % idx_rf) curr_model = RandomForestClassifier( n_estimators=self.n_estimators_rf, n_jobs=-1) curr_model, curr_train_feats, curr_acc = common_utils.get_class_distribution( feats=train_feats, labels=train_labels, model=curr_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) curr_test_feats = np.zeros( (test_feats.shape[0], self.classes_.shape[0])) class_indices = curr_model.classes_ curr_test_feats[:, class_indices] = curr_model.predict_proba( test_feats) layer_acc += curr_acc train_feats_rf.append(curr_train_feats) test_feats_rf.append(curr_test_feats) if self.n_rf > 0: train_feats_rf = np.hstack(train_feats_rf) test_feats_rf = np.hstack(test_feats_rf) if all_train is None: all_train = train_feats_rf all_test = test_feats_rf else: all_train = np.hstack((all_train, train_feats_rf)) all_test = np.hstack((all_test, test_feats_rf)) for idx_rsf in range(self.n_rsf): print("Training RSF#%d..." % idx_rsf) curr_model = RandomSubspaceForest( n_estimators=self.n_estimators_rsf, n_features="sqrt", n_jobs=-1) curr_model, curr_train_feats, curr_acc = common_utils.get_class_distribution( feats=train_feats, labels=train_labels, model=curr_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) curr_test_feats = np.zeros( (test_feats.shape[0], self.classes_.shape[0])) class_indices = curr_model.classes_ curr_test_feats[:, class_indices] = curr_model.predict_proba( test_feats) layer_acc += curr_acc train_feats_rsf.append(curr_train_feats) test_feats_rsf.append(curr_test_feats) if self.n_rsf > 0: train_feats_rsf = np.hstack(train_feats_rsf) test_feats_rsf = np.hstack(test_feats_rsf) if all_train is None: all_train = train_feats_rsf all_test = test_feats_rsf else: all_train = np.hstack((all_train, train_feats_rsf)) all_test = np.hstack((all_test, test_feats_rsf)) for idx_xonf in range(self.n_xonf): print("Training XoNF#%d..." % idx_xonf) # TODO: `sample_size`, `max_features` parameters (maybe) curr_model = RandomXOfNForest(n_estimators=self.n_estimators_xonf, sample_size=0.05, n_jobs=-1) curr_model, curr_train_feats, curr_acc = common_utils.get_class_distribution( feats=train_feats, labels=train_labels, model=curr_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) curr_test_feats = np.zeros( (test_feats.shape[0], self.classes_.shape[0])) class_indices = curr_model.classes_ curr_test_feats[:, class_indices] = curr_model.predict_proba( test_feats) layer_acc += curr_acc train_feats_xonf.append(curr_train_feats) test_feats_xonf.append(curr_test_feats) if self.n_xonf > 0: train_feats_xonf = np.hstack(train_feats_xonf) test_feats_xonf = np.hstack(test_feats_xonf) if all_train is None: all_train = train_feats_xonf all_test = test_feats_xonf else: all_train = np.hstack((all_train, train_feats_xonf)) all_test = np.hstack((all_test, test_feats_xonf)) if all_train is None: raise Exception("No models were specified for this layer!") layer_acc /= (self.n_rf + self.n_crf + self.n_rsf + self.n_xonf) self.kfold_acc = layer_acc print("Average LAYER accuracy is %f..." % self.kfold_acc) print("-------------------------------") return all_train, all_test
def train_layer(self, feats, labels): """ This method is currently not the main focus because caching is not yet implemented - `fit_transform(...)` is therefore better suited, as it does not keep/save models in memory and does fitting and predicting "simultaneously". tl;dr: use fit_transform(...) instead at the moment. """ feats_crf, feats_rf, feats_rsf, feats_xonf = [], [], [], [] all_train = None layer_acc = 0.0 # train completely random forests for idx_crf in range(self.n_crf): crf_model = ExtraTreesClassifier( n_estimators=self.n_estimators_crf, max_features=1, n_jobs=-1) curr_model, curr_feats, curr_acc = common_utils.get_class_distribution( feats=feats, labels=labels, model=crf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) layer_acc += curr_acc if self.keep_models: self.crf_estimators.append(curr_model) feats_crf.append(curr_feats) if self.n_crf > 0: feats_crf = np.hstack(feats_crf) all_train = feats_crf # train random forests for idx_rf in range(self.n_rf): rf_model = RandomForestClassifier( n_estimators=self.n_estimators_rf, n_jobs=-1) curr_model, curr_feats, curr_acc = common_utils.get_class_distribution( feats=feats, labels=labels, model=rf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) layer_acc += curr_acc if self.keep_models: self.rf_estimators.append(curr_model) feats_rf.append(curr_feats) if self.n_rf > 0: feats_rf = np.hstack(feats_rf) all_train = feats_rf if all_train is None else np.hstack( (all_train, feats_rf)) # train random subspace forests for idx_rsf in range(self.n_rsf): rsf_model = RandomSubspaceForest( n_estimators=self.n_estimators_rsf, n_features="sqrt", n_jobs=-1) curr_model, curr_feats, curr_acc = common_utils.get_class_distribution( feats=feats, labels=labels, model=rsf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) layer_acc += curr_acc if self.keep_models: self.rsf_estimators.append(rsf_model) feats_rsf.append(curr_feats) if self.n_rsf > 0: feats_rsf = np.hstack(feats_rsf) all_train = feats_rsf if all_train is None else np.hstack( (all_train, feats_rsf)) # train random X-of-N forests for idx_xonf in range(self.n_xonf): xonf_model = RandomXOfNForest(n_estimators=self.n_estimators_xonf, sample_size=0.05, n_jobs=-1) curr_model, curr_feats, curr_acc = common_utils.get_class_distribution( feats=feats, labels=labels, model=xonf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) layer_acc += curr_acc if self.keep_models: self.xonf_estimators.append(xonf_model) feats_xonf.append(curr_feats) if self.n_xonf > 0: feats_xonf = np.hstack(feats_xonf) all_train = feats_xonf if all_train is None else np.hstack( (all_train, feats_xonf)) layer_acc /= (self.n_rf + self.n_crf + self.n_rsf + self.n_xonf) self.kfold_acc = layer_acc print("Average LAYER accuracy is %f..." % self.kfold_acc) return all_train
def fit_transform(self, train_feats, train_labels, test_feats): sliced_train = self.slice_data(train_feats) sliced_test = self.slice_data(test_feats) print( "Successfully sliced TRAINING data for window size %s and stride %s ----> shape of slices: %s..." % (str(self.wind_size), str(self.stride), str(sliced_train.shape))) print( "Successfully sliced TEST data for window size %s and stride %s ----> shape of slices: %s..." % (str(self.wind_size), str(self.stride), str(sliced_test.shape))) # because labels do not get appended to sliced features in slice_data, it is done here multiply_factor = int(sliced_train.shape[0] / train_feats.shape[0]) train_labels = np.tile(np.reshape(train_labels, [-1, 1]), (1, multiply_factor)).flatten() feats_crf_train, feats_crf_test = [], [] feats_rf_train, feats_rf_test = [], [] feats_rsf_train, feats_rsf_test = [], [] feats_xonf_train, feats_xonf_test = [], [] all_train, all_test = None, None layer_acc = 0.0 for idx_crf in range(self.n_crf): print("Training CRF#%d..." % idx_crf) crf_model = ExtraTreesClassifier( n_estimators=self.n_estimators_crf, max_features=1, min_samples_leaf=10, max_depth=100, n_jobs=-1) # fit crf_model, curr_train_feats, curr_acc = common_utils.get_class_distribution( feats=sliced_train, labels=train_labels, model=crf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) # predict curr_test_feats = np.zeros( (sliced_test.shape[0], self.classes_.shape[0])) class_indices = crf_model.classes_ curr_test_feats[:, class_indices] = crf_model.predict_proba( sliced_test) # combine probabilities for slices of same example together feats_crf_train.append( curr_train_feats.reshape( [-1, multiply_factor * self.classes_.shape[0]])) feats_crf_test.append( curr_test_feats.reshape( [-1, multiply_factor * self.classes_.shape[0]])) layer_acc += curr_acc if self.n_crf > 0: feats_crf_train = np.hstack(feats_crf_train) feats_crf_test = np.hstack(feats_crf_test) all_train = feats_crf_train all_test = feats_crf_test for idx_rf in range(self.n_rf): print("Training RF#%d..." % idx_rf) rf_model = RandomForestClassifier( n_estimators=self.n_estimators_rf, min_samples_leaf=10, max_depth=100, n_jobs=-1) # fit rf_model, curr_train_feats, curr_acc = common_utils.get_class_distribution( feats=sliced_train, labels=train_labels, model=rf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) # predict curr_test_feats = np.zeros( (sliced_test.shape[0], self.classes_.shape[0])) class_indices = rf_model.classes_ curr_test_feats[:, class_indices] = rf_model.predict_proba( sliced_test) # combine probabilities for slices of same examples together feats_rf_train.append( curr_train_feats.reshape( [-1, multiply_factor * self.classes_.shape[0]])) feats_rf_test.append( curr_test_feats.reshape( [-1, multiply_factor * self.classes_.shape[0]])) layer_acc += curr_acc if self.n_rf > 0: feats_rf_train = np.hstack(feats_rf_train) feats_rf_test = np.hstack(feats_rf_test) if all_train is None: all_train = feats_rf_train all_test = feats_rf_test else: all_train = np.hstack((all_train, feats_rf_train)) all_test = np.hstack((all_test, feats_rf_test)) for idx_rsf in range(self.n_rsf): print("Training RSF#%d..." % idx_rsf) rsf_model = RandomSubspaceForest( n_estimators=self.n_estimators_rsf, min_samples_leaf=10, max_depth=100, n_features="sqrt", n_jobs=-1) # fit rsf_model, curr_train_feats, curr_acc = common_utils.get_class_distribution( feats=sliced_train, labels=train_labels, model=rsf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) # predict curr_test_feats = np.zeros( (sliced_test.shape[0], self.classes_.shape[0])) class_indices = rsf_model.classes_ curr_test_feats[:, class_indices] = rsf_model.predict_proba( sliced_test) # combine probabilities for slices of same examples together feats_rsf_train.append( curr_train_feats.reshape( [-1, multiply_factor * self.classes_.shape[0]])) feats_rsf_test.append( curr_test_feats.reshape( [-1, multiply_factor * self.classes_.shape[0]])) layer_acc += curr_acc if self.n_rsf > 0: feats_rsf_train = np.hstack(feats_rsf_train) feats_rsf_test = np.hstack(feats_rsf_test) if all_train is None: all_train = feats_rsf_train all_test = feats_rsf_test else: all_train = np.hstack((all_train, feats_rsf_train)) all_test = np.hstack((all_test, feats_rsf_test)) for idx_xonf in range(self.n_xonf): print("Training XoNF#%d..." % idx_xonf) xonf_model = RandomXOfNForest(n_estimators=self.n_estimators_xonf, min_samples_leaf=10, max_depth=100, sample_size=0.05, n_jobs=-1) # fit xonf_model, curr_train_feats, curr_acc = common_utils.get_class_distribution( feats=sliced_train, labels=train_labels, model=xonf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) # predict curr_test_feats = np.zeros( (sliced_test.shape[0], self.classes_.shape[0])) class_indices = xonf_model.classes_ curr_test_feats[:, class_indices] = xonf_model.predict_proba( sliced_test) # combine probabilities for slices of same examples together feats_xonf_train.append( curr_train_feats.reshape( [-1, multiply_factor * self.classes_.shape[0]])) feats_xonf_test.append( curr_test_feats.reshape( [-1, multiply_factor * self.classes_.shape[0]])) layer_acc += curr_acc if self.n_xonf > 0: feats_xonf_train = np.hstack(feats_xonf_train) feats_xonf_test = np.hstack(feats_xonf_test) if all_train is None: all_train = feats_xonf_train all_test = feats_xonf_test else: all_train = np.hstack((all_train, feats_xonf_train)) all_test = np.hstack((all_test, feats_xonf_test)) if all_train is None: raise Exception("No models were specified for this Grain!") layer_acc /= (self.n_rf + self.n_crf + self.n_rsf + self.n_xonf) self.kfold_acc = layer_acc print("Average LAYER accuracy is %f..." % self.kfold_acc) return all_train, all_test
def create(self, features, labels): # ----------------------------------------------------------------------------------------------- # NOTE: preferably use fit_transform(...) instead (more thoroughly tested and less memory hungry) # ----------------------------------------------------------------------------------------------- # TODO (low priority): refactor sliced_data = self.slice_data(features) print( "Successfully sliced data for window size %s and stride %s ----> shape of slices: %s..." % (str(self.wind_size), str(self.stride), str(sliced_data.shape))) # because labels do not get appended to sliced features in slice_data, it is done here multiply_factor = int(sliced_data.shape[0] / features.shape[0]) labels = np.tile(np.reshape(labels, [-1, 1]), (1, multiply_factor)).flatten() feats_crf, feats_rf, feats_rsf, feats_xonf = [], [], [], [] all_train = None layer_acc = 0.0 for idx_crf in range(self.n_crf): crf_model = ExtraTreesClassifier( n_estimators=self.n_estimators_crf, max_features=1, min_samples_leaf=10, max_depth=100, n_jobs=-1) print("Training CRF#%d..." % idx_crf) crf_model, curr_proba_preds, curr_acc = common_utils.get_class_distribution( feats=sliced_data, labels=labels, model=crf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) layer_acc += curr_acc # combine predictions for slices of same example together feats_crf.append( curr_proba_preds.reshape( [-1, multiply_factor * self.classes_.shape[0]])) # save trained model self.crf_estimators.append(crf_model) if self.n_crf > 0: feats_crf = np.hstack(feats_crf) all_train = feats_crf for idx_rf in range(self.n_rf): rf_model = RandomForestClassifier( n_estimators=self.n_estimators_rf, min_samples_leaf=10, max_depth=100, n_jobs=-1) print("Training RF#%d..." % idx_rf) rf_model, curr_proba_preds, curr_acc = common_utils.get_class_distribution( feats=sliced_data, labels=labels, model=rf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) layer_acc += curr_acc # combine predictions for slices of same example together feats_rf.append( curr_proba_preds.reshape( [-1, multiply_factor * self.classes_.shape[0]])) # save trained model self.rf_estimators.append(rf_model) if self.n_rf > 0: feats_rf = np.hstack(feats_rf) all_train = feats_rf if all_train is None else np.hstack( (all_train, feats_rf)) for idx_rsf in range(self.n_rsf): rsf_model = RandomSubspaceForest( n_estimators=self.n_estimators_rsf, min_samples_leaf=10, max_depth=100, n_features="sqrt", n_jobs=-1) print("Training RSF#%d..." % idx_rsf) rsf_model, curr_proba_preds, curr_acc = common_utils.get_class_distribution( feats=sliced_data, labels=labels, model=rsf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) layer_acc += curr_acc # combine predictions for slices of same example together feats_rsf.append( curr_proba_preds.reshape( [-1, multiply_factor * self.classes_.shape[0]])) # save trained model self.rsf_estimators.append(rsf_model) if self.n_rsf > 0: feats_rsf = np.hstack(feats_rsf) all_train = feats_rsf if all_train is None else np.hstack( (all_train, feats_rsf)) for idx_xonf in range(self.n_xonf): xonf_model = RandomXOfNForest(n_estimators=self.n_estimators_xonf, min_samples_leaf=10, max_depth=100, sample_size=0.05, n_jobs=-1) print("Training XoNF#%d..." % idx_xonf) xonf_model, curr_proba_preds, curr_acc = common_utils.get_class_distribution( feats=sliced_data, labels=labels, model=xonf_model, num_all_classes=self.classes_.shape[0], k_cv=self.k_cv) layer_acc += curr_acc # combine predictions for slices of same example together feats_xonf.append( curr_proba_preds.reshape( [-1, multiply_factor * self.classes_.shape[0]])) # save trained model self.xonf_estimators.append(xonf_model) if self.n_xonf > 0: feats_xonf = np.hstack(feats_xonf) all_train = feats_xonf if all_train is None else np.hstack( (all_train, feats_xonf)) if all_train is None: raise Exception("No models were specified for this Grain!") layer_acc /= (self.n_rf + self.n_crf + self.n_rsf + self.n_xonf) self.kfold_acc = layer_acc print("Average LAYER accuracy is %f..." % self.kfold_acc) return all_train