def get_regressor(x, y, n_estimators=1500, pCut=0.05, n_tries=5, verbose=False): if verbose: sys.stderr.write('getting regressor\n') clfs = [] oob_scores = [] for i in range(n_tries): if verbose: sys.stderr.write('%d.' % i) clf = ExtraTreesRegressor(n_estimators=n_estimators, oob_score=True, bootstrap=True, max_features='sqrt', n_jobs=1, random_state=i).fit(x, y) clfs.append(clf) oob_scores.append(clf.oob_score_) clf = clfs[np.argmax(oob_scores)] clf.feature_importances = pd.Series(clf.feature_importances_, index=x.columns) return clf, oob_scores
def get_regressor(x, y, n_estimators=1500, n_tries=5, verbose=False): """Calculate an ExtraTreesRegressor on predictor and target variables Parameters ---------- x : numpy.array Predictor vector y : numpy.array Target vector n_estimators : int, optional Number of estimators to use n_tries : int, optional Number of attempts to calculate regression verbose : bool, optional If True, output progress statements Returns ------- classifier : sklearn.ensemble.ExtraTreesRegressor The classifier with the highest out of bag scores of all the attempted "tries" oob_scores : numpy.array Out of bag scores of the classifier """ if verbose: sys.stderr.write('Getting regressor\n') clfs = [] oob_scores = [] for i in range(n_tries): if verbose: sys.stderr.write('%d.' % i) clf = ExtraTreesRegressor(n_estimators=n_estimators, oob_score=True, bootstrap=True, max_features='sqrt', n_jobs=1, random_state=i).fit(x, y) clfs.append(clf) oob_scores.append(clf.oob_score_) clf = clfs[np.argmax(oob_scores)] clf.feature_importances = pd.Series(clf.feature_importances_, index=x.columns) return clf, oob_scores