def optimize(self, train, rare_thresh=100, size=5000, tune_mode="paramwise", as_text=False, cached_params=False): # Estimate useful features on a random sample of |size| instances sys.stderr.write("o Tuning hyperparameters\n\n") # Optimize hyperparameters via grid search if cached_params: clf, best_params, _ = get_best_params(self.corpus, self.name) sys.stderr.write("\no Using cached best hyperparameters\n") else: clf, best_params = self.train(train, rare_thresh=rare_thresh, tune_mode=tune_mode, size=size, as_text=as_text) sys.stderr.write("\no Found best hyperparameters\n") for key, val in best_params.items(): sys.stderr.write(key + "\t" + str(val) + "\n") sys.stderr.write("\n") return clf, [], best_params
def optimize(self, train, rare_thresh=100, size=5000, tune_mode="paramwise", cached_params=False, as_text=False): # Estimate useful features on a random sample of |size| instances selected_cat, selected_num = self.train(train, model_path=None, rare_thresh=100, as_text=False, size=size, tune_mode="importances") selected_feats = selected_cat + selected_num sys.stderr.write("o Chose " + str(len(selected_feats)) + " features: " + ",".join(selected_feats) + "\n") sys.stderr.write("o Tuning hyperparameters\n\n") # Optimize hyperparameters via grid search or hyperopt if cached_params: best_clf, best_params, _ = get_best_params(self.corpus, self.name) sys.stderr.write("\no Using cached best hyperparameters\n") else: best_clf, best_params = self.train(train, rare_thresh=rare_thresh, tune_mode=tune_mode, size=size, as_text=as_text) sys.stderr.write("\no Found best hyperparameters\n") for key, val in best_params.items(): sys.stderr.write(key + "\t" + str(val) + "\n") sys.stderr.write("\n") return best_clf, selected_feats, best_params
chosen_feats=vars, rare_thresh=200, clf_params=best_params, as_text=False, multitrain=True) else: est.train(train, rare_thresh=200, clf_params=best_params, as_text=False, chosen_clf=clf) elif "train" in mode: if opts.best_params and est.name in [ "SubtreeSegmenter", "EnsembleSegmenter" ]: best_clf, params, feats = get_best_params(corpus, est.name) else: best_clf = None params = None feats = None if est.name == "SubtreeSegmenter": est.train(train, rare_thresh=200, as_text=False, multitrain=True, chosen_clf=best_clf, clf_params=params, chosen_feats=feats) elif est.name == "RNNSegmenter": est.train(train, as_text=False, multifolds=5) elif est.name == "EnsembleSegmenter":
# Now train on whole training set with those variables sys.stderr.write("\no Training best configuration\n") e.train(train, rare_thresh=200, clf_params=best_params, as_text=False, chosen_clf=best_clf, chosen_feats=vars, size=220000) elif "train" in opts.mode: tune_mode = None if opts.tune_mode != "hyperopt" else "hyperopt" feats = None params = None best_clf = None if opts.best_params: best_clf, params, feats = get_best_params( corpus, "EnsembleSentencer") if len(feats) == 0: feats = None e.train(train, chosen_feats=feats, as_text=False, tune_mode=tune_mode, clf_params=params, chosen_clf=best_clf, size=220000) if "test" in opts.mode: if opts.eval_test: conf_mat, prec, rec, f1 = e.predict(test, eval_gold=True, as_text=False) else:
size=5000, tune_mode=opts.tune_mode, cached_params=opts.best_params) if "best_score" in best_params: best_params.pop("best_score") # Now train on whole training set with those variables sys.stderr.write("\no Training best configuration\n") e.train(train, rare_thresh=100, clf_params=best_params, as_text=False, chosen_clf=clf) elif "train" in opts.mode: if opts.best_params: best_clf, best_params, _ = get_best_params(corpus, e.name, auto=auto) else: best_clf = None sys.stderr.write("\no Training on corpus " + corpus + "\n") tune_mode = None if opts.tune_mode != "hyperopt" else "hyperopt" e.train(train, as_text=False, tune_mode=tune_mode, chosen_clf=best_clf, clf_params=best_params) if "test" in opts.mode: if opts.eval_test: conf_mat, prec, rec, f1 = e.predict(test, eval_gold=True,