Beispiel #1
0
    def search_grid(self, X, y, param_grid, verbose):
        if '__algorithm' in param_grid.keys():
            algorithm = param_grid['__algorithm']
        else:
            algorithm = self.best_algorithm

        if '__best_parameter' in param_grid.keys(
        ) and param_grid['__best_parameter']:
            self.param_base = self.best_param.copy()

        param_grid = ParameterGrid({
            p[0]: p[1]
            for p in param_grid.items() if not p[0].startswith('__')
        })
        for param in param_grid:
            trainer = crf.Trainer(verbose=verbose)
            param_train = self.param_base.copy()
            param_train.update(param)
            trainer.select(algorithm, self.graphical_model)
            trainer.set_params(param_train)

            if isinstance(self.cv, int):
                cv = KFold(n=len(X),
                           n_folds=self.cv,
                           shuffle=True,
                           random_state=None)

            print('Parameter: (%s) %s' % (algorithm, param_train))
            cv_score = []
            for j, indices in enumerate(cv):
                X_train, y_train = X[indices[0]], y[indices[0]]
                X_test, y_test = X[indices[1]], y[indices[1]]

                for xseq, yseq in zip(X_train, y_train):
                    trainer.append(xseq, yseq)
                start = time.time()
                trainer.train('model')
                fit_elapsed_in_sec = time.time() - start
                trainer.clear()

                tagger = crf.Tagger()
                tagger.open('model')
                start = time.time()
                y_pred = [tagger.tag(xseq) for xseq in X_test]
                predict_elapsed_in_sec = time.time() - start
                tagger.close()
                score = self.scorer(y_pred, y_test)

                print(
                    '  cv(%i): score %.4f, train size %i, test size %i, train elapsed %.4f sec, test elapsed %.4f sec'
                    % (j, score, X_train.shape[0], X_test.shape[0],
                       fit_elapsed_in_sec, predict_elapsed_in_sec))
                cv_score.append(score)

            score = np.mean(cv_score)
            if self.best_score < score:
                self.best_score = score
                self.best_param = param_train
                self.best_algorithm = algorithm
            del cv_score[:]