예제 #1
0
    def optimize(self,
                 train,
                 rare_thresh=100,
                 size=5000,
                 tune_mode="paramwise",
                 as_text=False,
                 cached_params=False):

        # Estimate useful features on a random sample of |size| instances
        sys.stderr.write("o Tuning hyperparameters\n\n")

        # Optimize hyperparameters via grid search
        if cached_params:
            clf, best_params, _ = get_best_params(self.corpus, self.name)
            sys.stderr.write("\no Using cached best hyperparameters\n")
        else:
            clf, best_params = self.train(train,
                                          rare_thresh=rare_thresh,
                                          tune_mode=tune_mode,
                                          size=size,
                                          as_text=as_text)
            sys.stderr.write("\no Found best hyperparameters\n")
        for key, val in best_params.items():
            sys.stderr.write(key + "\t" + str(val) + "\n")
        sys.stderr.write("\n")

        return clf, [], best_params
예제 #2
0
    def optimize(self,
                 train,
                 rare_thresh=100,
                 size=5000,
                 tune_mode="paramwise",
                 cached_params=False,
                 as_text=False):

        # Estimate useful features on a random sample of |size| instances
        selected_cat, selected_num = self.train(train,
                                                model_path=None,
                                                rare_thresh=100,
                                                as_text=False,
                                                size=size,
                                                tune_mode="importances")
        selected_feats = selected_cat + selected_num
        sys.stderr.write("o Chose " + str(len(selected_feats)) +
                         " features: " + ",".join(selected_feats) + "\n")
        sys.stderr.write("o Tuning hyperparameters\n\n")

        # Optimize hyperparameters via grid search or hyperopt
        if cached_params:
            best_clf, best_params, _ = get_best_params(self.corpus, self.name)
            sys.stderr.write("\no Using cached best hyperparameters\n")
        else:
            best_clf, best_params = self.train(train,
                                               rare_thresh=rare_thresh,
                                               tune_mode=tune_mode,
                                               size=size,
                                               as_text=as_text)
            sys.stderr.write("\no Found best hyperparameters\n")
        for key, val in best_params.items():
            sys.stderr.write(key + "\t" + str(val) + "\n")
        sys.stderr.write("\n")

        return best_clf, selected_feats, best_params
예제 #3
0
                   chosen_feats=vars,
                   rare_thresh=200,
                   clf_params=best_params,
                   as_text=False,
                   multitrain=True)
     else:
         est.train(train,
                   rare_thresh=200,
                   clf_params=best_params,
                   as_text=False,
                   chosen_clf=clf)
 elif "train" in mode:
     if opts.best_params and est.name in [
             "SubtreeSegmenter", "EnsembleSegmenter"
     ]:
         best_clf, params, feats = get_best_params(corpus, est.name)
     else:
         best_clf = None
         params = None
         feats = None
     if est.name == "SubtreeSegmenter":
         est.train(train,
                   rare_thresh=200,
                   as_text=False,
                   multitrain=True,
                   chosen_clf=best_clf,
                   clf_params=params,
                   chosen_feats=feats)
     elif est.name == "RNNSegmenter":
         est.train(train, as_text=False, multifolds=5)
     elif est.name == "EnsembleSegmenter":
예제 #4
0
     # Now train on whole training set with those variables
     sys.stderr.write("\no Training best configuration\n")
     e.train(train,
             rare_thresh=200,
             clf_params=best_params,
             as_text=False,
             chosen_clf=best_clf,
             chosen_feats=vars,
             size=220000)
 elif "train" in opts.mode:
     tune_mode = None if opts.tune_mode != "hyperopt" else "hyperopt"
     feats = None
     params = None
     best_clf = None
     if opts.best_params:
         best_clf, params, feats = get_best_params(
             corpus, "EnsembleSentencer")
         if len(feats) == 0:
             feats = None
     e.train(train,
             chosen_feats=feats,
             as_text=False,
             tune_mode=tune_mode,
             clf_params=params,
             chosen_clf=best_clf,
             size=220000)
 if "test" in opts.mode:
     if opts.eval_test:
         conf_mat, prec, rec, f1 = e.predict(test,
                                             eval_gold=True,
                                             as_text=False)
     else:
예제 #5
0
                                                size=5000,
                                                tune_mode=opts.tune_mode,
                                                cached_params=opts.best_params)
            if "best_score" in best_params:
                best_params.pop("best_score")
            # Now train on whole training set with those variables
            sys.stderr.write("\no Training best configuration\n")
            e.train(train,
                    rare_thresh=100,
                    clf_params=best_params,
                    as_text=False,
                    chosen_clf=clf)
        elif "train" in opts.mode:
            if opts.best_params:
                best_clf, best_params, _ = get_best_params(corpus,
                                                           e.name,
                                                           auto=auto)
            else:
                best_clf = None
            sys.stderr.write("\no Training on corpus " + corpus + "\n")
            tune_mode = None if opts.tune_mode != "hyperopt" else "hyperopt"
            e.train(train,
                    as_text=False,
                    tune_mode=tune_mode,
                    chosen_clf=best_clf,
                    clf_params=best_params)

        if "test" in opts.mode:
            if opts.eval_test:
                conf_mat, prec, rec, f1 = e.predict(test,
                                                    eval_gold=True,