class ExtraTreesRegressorImpl(): def __init__(self, n_estimators=10, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=False, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False): self._hyperparams = { 'n_estimators': n_estimators, 'criterion': criterion, 'max_depth': max_depth, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf, 'min_weight_fraction_leaf': min_weight_fraction_leaf, 'max_features': max_features, 'max_leaf_nodes': max_leaf_nodes, 'min_impurity_decrease': min_impurity_decrease, 'min_impurity_split': min_impurity_split, 'bootstrap': bootstrap, 'oob_score': oob_score, 'n_jobs': n_jobs, 'random_state': random_state, 'verbose': verbose, 'warm_start': warm_start} self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
class ExtremelyRandomizeTreeEstimator(Estimator): def __init__(self): self.estimator = ExtraTreesRegressor(n_estimators=30) self.initialized = False def __call__(self, state, action): if self.initialized: x = np.array(state + [action[0], action[1]]).reshape(1, -1) return self.estimator.predict(x)[0] else: return 0 def train(self, train_in, train_out): self.initialized = True train_in_formatted = np.array(train_in) self.estimator.fit(train_in_formatted, train_out)
qtfm = PowerTransformer(method='yeo-johnson') y_train = np.squeeze(qtfm.fit_transform(y_train_tmp.values.reshape(-1, 1))) selidx, selscore, _ = sel(X_train.values, y_train, n_selected_features=numfeat) selscoredf = pd.DataFrame(data=np.transpose( np.vstack((X_train.columns[selidx].values, selscore))), columns=['Feature', 'Score']) X_train_selected = X_train.iloc[:, selidx[0:numfeat]] print(X_train_selected.columns.values) print("Train classifier...") clf = ExtraTreesRegressor(n_estimators=200, n_jobs=5, random_state=randomstate) clf.fit(X_train, y_train) # save classifier for further use dump(clf, clfpath) print("Training complete...") # clf = load(clfpath) # VALIDATION SET # load validation data validationfeatures = pd.read_csv( "/media/yannick/c4a7e8d3-9ac5-463f-b6e6-92e216ae6ac0/BRATS/BraTS2020/validationfeat_normalized.csv", index_col="ID") y_pred_validation = clf.predict(validationfeatures) pred_validation_df = pd.DataFrame(data=zip(validationfeatures.index.values, y_pred_validation), columns=["ID", "Prediction"])