Python ResultsProcessor 예제들, results_procesor.ResultsProcessor Python 예제들

예제 #1

0

파일 보기

파일: searn_parser.py 프로젝트: IslamMohamedMosaad/PythonNlpResearch

    def train(self, tagged_essays, max_epochs):

        ys_by_sent = self.get_label_data(tagged_essays)

        for i in range(0, max_epochs):
            self.epoch += 1
            self.log("Epoch: {epoch}".format(epoch=self.epoch))

            # TODO - provide option for different model types here?
            parse_examples = WeightedExamples(labels=PARSE_ACTIONS,
                                              positive_value=self.positive_val)
            crel_examples = WeightedExamples(labels=None,
                                             positive_value=self.positive_val)

            pred_ys_by_sent = defaultdict(list)
            for essay_ix, essay in enumerate(tagged_essays):
                for sent_ix, taggged_sentence in enumerate(essay.sentences):
                    predicted_tags = essay.pred_tagged_sentences[sent_ix]
                    pred_relations = self.generate_training_data(
                        taggged_sentence, predicted_tags, parse_examples,
                        crel_examples)
                    # Store predictions for evaluation
                    self.add_cr_labels(pred_relations, pred_ys_by_sent)

            class2metrics = ResultsProcessor.compute_metrics(
                ys_by_sent, pred_ys_by_sent)
            micro_metrics = micro_rpfa(class2metrics.values())  # type: rpfa
            self.log(
                "Training Metrics: {metrics}".format(metrics=micro_metrics))

            self.train_parse_models(parse_examples)
            self.train_crel_models(crel_examples)

            self.training_datasets_parsing[self.epoch] = parse_examples
            self.training_datasets_crel[self.epoch] = crel_examples

예제 #2

0

파일 보기

파일: perceptron_tagger_multiclass_combo.py 프로젝트: simonhughes22/PythonNlpResearch

    def train(self, essay_feats, save_loc=None, nr_iter=5, verbose=True):
        '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter``
        controls the number of Perceptron training iterations.
        :param sentences: A list of (words, tags) tuples.
        :param save_loc: If not ``None``, saves a pickled model in this location.
        :param nr_iter: Number of training iterations.
        '''

        cp_essay_feats = list(essay_feats)

        # Copy as we do an inplace shuffle below
        tag_freq = defaultdict(int)
        for essay in cp_essay_feats:
            for taggged_sentence in essay.sentences:
                for wd in taggged_sentence:
                    fs_tags = self.__get_tags_(wd.tags)
                    tag_freq[fs_tags] +=1


        self.classes = set([ fs for fs, cnt in tag_freq.items() if cnt >= self.combo_freq_threshold])
        self.model = AveragedPerceptron(self.classes)

        for iter_ in range(nr_iter):
            class2predictions = defaultdict(list)
            class2tags = defaultdict(list)

            for essay_ix, essay in enumerate(cp_essay_feats):
                for sent_ix, taggged_sentence in enumerate(essay.sentences):
                    """ Start Sentence """
                    prev = list(self.START)

                    for i, (wd) in enumerate(taggged_sentence):
                        # Don't mutate the feat dictionary
                        shared_features = dict(wd.features.items())
                        # get all tagger predictions for previous 2 tags
                        self._add_secondary_tag_features(shared_features, prev)

                        tagger_feats = dict(shared_features.items())
                        # add more in depth features for this tag
                        actual = self.__get_tags_(wd.tags)

                        if self.use_tag_features:
                            self._add_tag_features(tagger_feats, wd.word, prev[-1], prev[-2])

                        guess = self.model.predict(tagger_feats)
                        self.model.update(actual, guess, tagger_feats)

                        prev.append(guess)
                        for cls in self.individual_tags:
                            class2predictions[cls].append(  1 if cls in guess  else 0 )
                            class2tags[cls].append(         1 if cls in actual else 0)

            random.shuffle(cp_essay_feats)
            class2metrics = ResultsProcessor.compute_metrics(class2tags, class2predictions)
            micro_metrics = micro_rpfa(class2metrics.values())
            if verbose:
                logging.info("Iter {0}: Micro Avg Metrics: {1}".format(iter_, str(micro_metrics)))

        self.model.average_weights()
        return None

예제 #3

0

파일 보기

    def train(self, essay_feats, save_loc=None, nr_iter=5, verbose=True):
        '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter``
        controls the number of Perceptron training iterations.
        :param sentences: A list of (words, tags) tuples.
        :param save_loc: If not ``None``, saves a pickled model in this location.
        :param nr_iter: Number of training iterations.
        '''

        # Copy as we do an inplace shuffle below
        cp_essay_feats = list(essay_feats)

        for iter_ in range(nr_iter):
            class2predictions = defaultdict(list)
            class2tags = defaultdict(list)

            for essay_ix, essay in enumerate(cp_essay_feats):
                for sent_ix, taggged_sentence in enumerate(essay.sentences):
                    """ Start Sentence """
                    class2prev = defaultdict(list)
                    for cls in self.classes:
                        class2prev[cls] = list(self.START)

                    for wd in taggged_sentence:
                        # Don't mutate the feat dictionary
                        shared_features = dict(wd.features.items())
                        # get all tagger predictions for previous 2 tags
                        for cls in self.classes:
                            self._add_secondary_tag_features(
                                shared_features, wd.word, cls, class2prev[cls])
                        # train each binary tagger
                        for cls in self.classes:
                            tagger_feats = dict(shared_features.items())
                            # add more in depth features for this tag
                            self._add_tag_features(tagger_feats, wd.word,
                                                   class2prev[cls][-1],
                                                   class2prev[cls][-2])
                            actual = self.__get_yal_(wd, cls)
                            model = self.class2model[cls]
                            guess = model.predict(tagger_feats)
                            model.update(actual, guess, tagger_feats)

                            class2prev[cls].append(guess)

                            class2predictions[cls].append(guess)
                            class2tags[cls].append(actual)

            random.shuffle(cp_essay_feats)
            class2metrics = ResultsProcessor.compute_metrics(
                class2tags, class2predictions)
            wtd_mean = weighted_mean_rpfa(class2metrics.values())
            if verbose:
                logging.info("Iter {0}: Wtd Mean: {1}".format(
                    iter_, str(wtd_mean)))

        for cls in self.classes:
            self.class2model[cls].average_weights()
        return None

예제 #4

0

파일 보기

파일: perceptron_tagger_binary.py 프로젝트: simonhughes22/PythonNlpResearch

    def train(self, essay_feats, save_loc=None, nr_iter=5, verbose=True):
        '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter``
        controls the number of Perceptron training iterations.
        :param sentences: A list of (words, tags) tuples.
        :param save_loc: If not ``None``, saves a pickled model in this location.
        :param nr_iter: Number of training iterations.
        '''

        # Copy as we do an inplace shuffle below
        cp_essay_feats = list(essay_feats)

        for iter_ in range(nr_iter):
            class2predictions = defaultdict(list)
            class2tags = defaultdict(list)

            for essay_ix, essay in enumerate(cp_essay_feats):
                for sent_ix, taggged_sentence in enumerate(essay.sentences):
                    """ Start Sentence """
                    class2prev = defaultdict(list)
                    for cls in self.classes:
                        class2prev[cls] = list(self.START)

                    for wd in taggged_sentence:
                        # Don't mutate the feat dictionary
                        shared_features = dict(wd.features.items())
                        # get all tagger predictions for previous 2 tags
                        for cls in self.classes:
                            self._add_secondary_tag_features(shared_features, wd.word, cls, class2prev[cls])
                        # train each binary tagger
                        for cls in self.classes:
                            tagger_feats = dict(shared_features.items())
                            # add more in depth features for this tag
                            self._add_tag_features(tagger_feats, wd.word, class2prev[cls][-1], class2prev[cls][-2])
                            actual = self.__get_yal_(wd, cls)
                            model = self.class2model[cls]
                            guess = model.predict(tagger_feats)
                            model.update(actual, guess, tagger_feats)

                            class2prev[cls].append(guess)

                            class2predictions[cls].append(guess)
                            class2tags[cls].append(actual)

            random.shuffle(cp_essay_feats)
            class2metrics = ResultsProcessor.compute_metrics(class2tags, class2predictions)
            wtd_mean = weighted_mean_rpfa(class2metrics.values())
            if verbose:
                logging.info("Iter {0}: Wtd Mean: {1}".format(iter_, str(wtd_mean)))

        for cls in self.classes:
            self.class2model[cls].average_weights()
        return None

예제 #5

0

파일 보기

파일: metrics.py 프로젝트: simonhughes22/PythonNlpResearch

def get_wd_level_preds(essays, expected_tags):
    expected_tags = set(expected_tags)
    ysbycode = defaultdict(list)
    for e in essays:
        for sentix in range(len(e.sentences)):
            p_ccodes = e.pred_tagged_sentences[sentix]
            for wordix in range(len(p_ccodes)):
                tags = p_ccodes[wordix]
                if type(tags) == str:
                    ptag_set = {tags}
                elif type(tags) in (set,list):
                    ptag_set = set(tags)   
                else:
                    raise Exception("Unrecognized tag type")
                for exp_tag in expected_tags:
                    ysbycode[exp_tag].append(ResultsProcessor._ResultsProcessor__get_label_(exp_tag, ptag_set))
    return ysbycode

예제 #6

0

파일 보기

파일: searn_parser_legacy.py 프로젝트: IslamMohamedMosaad/PythonNlpResearch

    def train(self, tagged_essays, max_epochs):

        trained_with_beta0 = False
        ys_by_sent = self.get_label_data(tagged_essays)

        for i in range(0, max_epochs):
            if self.beta < 0:
                trained_with_beta0 = True

            self.epoch += 1
            print("Epoch: {epoch}".format(epoch=self.epoch))
            print("Beta:  {beta}".format(beta=self.beta))

            # TODO - provide option for different model types here?
            parse_examples = WeightedExamples(labels=PARSE_ACTIONS, positive_value=self.positive_val)
            crel_examples  = WeightedExamples(labels=None,          positive_value=self.positive_val)

            pred_ys_by_sent = defaultdict(list)
            for essay_ix, essay in enumerate(tagged_essays):
                for sent_ix, taggged_sentence in enumerate(essay.sentences):
                    predicted_tags = essay.pred_tagged_sentences[sent_ix]
                    pred_relations = self.generate_training_data(taggged_sentence, predicted_tags, parse_examples, crel_examples)
                    # Store predictions for evaluation
                    self.add_cr_labels(pred_relations, pred_ys_by_sent)

            class2metrics = ResultsProcessor.compute_metrics(ys_by_sent, pred_ys_by_sent)
            micro_metrics = micro_rpfa(class2metrics.values()) # type: rpfa
            print("Training Metrics: {metrics}".format(metrics=micro_metrics))

            # TODO, dictionary vectorize examples, train a weighted binary classifier for each separate parsing action
            self.train_parse_models(parse_examples)
            self.train_crel_models(crel_examples)

            self.training_datasets_parsing[self.epoch] = parse_examples
            self.training_datasets_crel[self.epoch] = crel_examples

            # Decay beta
            self.beta = self.beta_decay_fn(self.beta)
            if self.beta < 0 and trained_with_beta0:
                print("beta decayed below 0 - beta:'{beta}', stopping".format(beta=self.beta))
                break
        # end [for each epoch]
        if not trained_with_beta0:
            print("Algorithm hit max epochs without training with beta <= 0 - final_beta:{beta}".format(beta=self.beta))

예제 #7

0

파일 보기

파일: searn_parser_legacy.py 프로젝트: simonhughes22/PythonNlpResearch

    def train(self, tagged_essays, max_epochs):

        trained_with_beta0 = False
        ys_by_sent = self.get_label_data(tagged_essays)

        for i in range(0, max_epochs):
            if self.beta < 0:
                trained_with_beta0 = True

            self.epoch += 1
            print("Epoch: {epoch}".format(epoch=self.epoch))
            print("Beta:  {beta}".format(beta=self.beta))

            # TODO - provide option for different model types here?
            parse_examples = WeightedExamples(labels=PARSE_ACTIONS, positive_value=self.positive_val)
            crel_examples  = WeightedExamples(labels=None,          positive_value=self.positive_val)

            pred_ys_by_sent = defaultdict(list)
            for essay_ix, essay in enumerate(tagged_essays):
                for sent_ix, taggged_sentence in enumerate(essay.sentences):
                    predicted_tags = essay.pred_tagged_sentences[sent_ix]
                    pred_relations = self.generate_training_data(taggged_sentence, predicted_tags, parse_examples, crel_examples)
                    # Store predictions for evaluation
                    self.add_cr_labels(pred_relations, pred_ys_by_sent)

            class2metrics = ResultsProcessor.compute_metrics(ys_by_sent, pred_ys_by_sent)
            micro_metrics = micro_rpfa(class2metrics.values()) # type: rpfa
            print("Training Metrics: {metrics}".format(metrics=micro_metrics))

            # TODO, dictionary vectorize examples, train a weighted binary classifier for each separate parsing action
            self.train_parse_models(parse_examples)
            self.train_crel_models(crel_examples)

            self.training_datasets_parsing[self.epoch] = parse_examples
            self.training_datasets_crel[self.epoch] = crel_examples

            # Decay beta
            self.beta = self.beta_decay_fn(self.beta)
            if self.beta < 0 and trained_with_beta0:
                print("beta decayed below 0 - beta:'{beta}', stopping".format(beta=self.beta))
                break
        # end [for each epoch]
        if not trained_with_beta0:
            print("Algorithm hit max epochs without training with beta <= 0 - final_beta:{beta}".format(beta=self.beta))

예제 #8

0

파일 보기

파일: WindowBased_Classifier_2Splits.py 프로젝트: simonhughes22/PythonNlpResearch

def evaluate_on_test_set(test_essay_feats, out_predictions_file, out_predicted_margins_file, out_metrics_file, out_categories_file):

    test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats)
    wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags)
    test_x = feature_transformer.transform(test_feats)
    """ TEST Tagger """
    test_wd_predictions_by_code = test_classifier_per_code(test_x, tag2word_classifier, wd_test_tags)
    print "\nRunning Sentence Model"
    """ SENTENCE LEVEL PREDICTIONS FROM STACKING """
    sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags,
                                                                                         sent_input_interaction_tags,
                                                                                         test_essay_feats, test_x,
                                                                                         wd_test_ys_bytag,
                                                                                         tag2word_classifier,
                                                                                         SPARSE_SENT_FEATS, LOOK_BACK)
    """ Test Stack Classifier """
    test_sent_predictions_by_code \
        = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags)
    if USE_SVM:
        test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier,
                                                                   sent_output_train_test_tags,
                                                                   predict_fn=decision_function_for_tag)
    else:
        test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier,
                                                                   sent_output_train_test_tags,
                                                                   predict_fn=probability_for_tag)

    """ Write out the predicted classes """
    with open(out_predictions_file, "w+") as f_output_file:
        f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags)
    with open(out_predicted_margins_file, "w+") as f_output_file:
        f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags, output_confidence=True)
    """ Write out the accuracy metrics """
    train_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code)
    test_wd_metrics = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code)
    train_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
    test_sent_metrics = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
    with open(out_metrics_file, "w+") as f_metrics_file:
        s = ""
        pad = ResultsProcessor.pad_str
        s += ResultsProcessor.metrics_to_string(train_wd_metrics, test_wd_metrics,
                                                "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
        s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics,
                                                "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
        f_metrics_file.write(s)
        write_categories(out_predictions_file, "CB", out_categories_file)
        print s

예제 #9

0

파일 보기

파일: evaluation.py 프로젝트: simonhughes22/PythonNlpResearch

def evaluate_ranker(model, xs, essay2crels, ys_bytag, set_cr_tags):
    clone = model.clone()
    if hasattr(model, "average_weights"):
        clone.average_weights()
    pred_ys_bytag = defaultdict(list)
    ename2inps = dict()
    for parser_input in xs:
        ename2inps[parser_input.essay_name] = parser_input

    for ename, act_crels in essay2crels.items():
        if ename not in ename2inps:
            # no predicted crels for this essay
            highest_ranked = set()
        else:
            parser_input = ename2inps[ename]
            ixs = clone.rank(parser_input.all_feats_array)
            highest_ranked = parser_input.all_parses[ixs[0]]  # type: Tuple[str]

        add_cr_labels(set(highest_ranked), pred_ys_bytag, set_cr_tags)

    mean_metrics = ResultsProcessor.compute_mean_metrics(ys_bytag, pred_ys_bytag)
    df = get_micro_metrics(metrics_to_df(mean_metrics))
    return df

예제 #10

0

파일 보기

파일: train_causal_relation_extractor_logreg.py 프로젝트: simonhughes22/PythonNlpResearch

NGRAMS = 3
MIN_FEAT_FREQ = 5

BETA = 0.2
MAX_EPOCHS = 10

settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
training_folder = root_folder + "Training" + "/"
test_folder = root_folder + "Test" + "/"
training_pickled = settings.data_directory + "CoralBleaching/Thesis_Dataset/training.pl"
# NOTE: These predictions are generated from the "./notebooks/SEARN/Keras - Train Tagger and Save CV Predictions For Word Tags.ipynb" notebook
predictions_folder = root_folder + "Predictions/Bi-LSTM-4-SEARN/"

config = get_config(training_folder)
processor = ResultsProcessor(dbname="metrics_causal")

# Get Test Data In Order to Get Test CRELS
# load the test essays to make sure we compute metrics over the test CR labels
test_config = get_config(test_folder)
tagged_essays_test = load_process_essays(**test_config)
########################################################

fname = predictions_folder + "essays_train_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill"
with open(fname, "rb") as f:
    pred_tagged_essays = dill.load(f)

print("Number of pred tagged essasy %i" % len(pred_tagged_essays))  # should be 902
print("Started at: " + str(datetime.datetime.now()))
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger()

예제 #11

0

파일 보기

from IterableFP import flatten
from collections import defaultdict
from window_based_tagger_config import get_config
from results_procesor import ResultsProcessor, compute_metrics
from nltk.classify import maxent
# END Classifiers

import Settings
import logging

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)
logger = logging.getLogger()

# Create persister (mongo client) - fail fast if mongo service not initialized
processor = ResultsProcessor()

NUM_TRAIN_ITERATIONS = 5
# not hashed as don't affect persistence of feature processing
SPARSE_WD_FEATS = True
SPARSE_SENT_FEATS = True

MIN_FEAT_FREQ = 5  # 5 best so far
CV_FOLDS = 5

MIN_TAG_FREQ = 5
LOOK_BACK = 0  # how many sentences to look back when predicting tags
# end not hashed

# construct unique key using settings for pickling

예제 #12

0

파일 보기

파일: feature_selection.py 프로젝트: simonhughes22/PythonNlpResearch

test_folds = [(pred_tagged_essays_train, pred_tagged_essays_test)]  # type: List[Tuple[Any,Any]]
cv_folds = cross_validation(pred_tagged_essays_train, CV_FOLDS)  # type: List[Tuple[Any,Any]]

result_test_essay_level = evaluate_model_essay_level(
    folds=cv_folds,
    extractor_fn_names_lst=best_extractor_names,
    all_extractor_fns=all_extractor_fns,
    ngrams=ngrams,
    beta=beta,
    stemmed=stemmed,
    down_sample_rate=1.0,
    max_epochs=max_epochs)

models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_td_preds_by_sent, cv_sent_vd_ys_by_tag = result_test_essay_level

mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag)
print(get_micro_metrics(metrics_to_df(mean_metrics)))

models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag = result_test_essay_level

mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag)
print(get_micro_metrics(metrics_to_df(mean_metrics)))

result_final_test = evaluate_model_essay_level(
    folds=test_folds,
    extractor_fn_names_lst=best_extractor_names,
    all_extractor_fns=all_extractor_fns,
    ngrams=ngrams,
    beta=beta,
    stemmed=stemmed,
    down_sample_rate=1.0,

예제 #13

0

파일 보기

파일: train_causal_relation_extractor_logreg.py 프로젝트: IslamMohamedMosaad/PythonNlpResearch

NGRAMS = 3
MIN_FEAT_FREQ = 5

BETA = 0.2
MAX_EPOCHS = 10

settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
training_folder = root_folder + "Training" + "/"
test_folder = root_folder + "Test" + "/"
training_pickled = settings.data_directory + "CoralBleaching/Thesis_Dataset/training.pl"
# NOTE: These predictions are generated from the "./notebooks/SEARN/Keras - Train Tagger and Save CV Predictions For Word Tags.ipynb" notebook
predictions_folder = root_folder + "Predictions/Bi-LSTM-4-SEARN/"

config = get_config(training_folder)
processor = ResultsProcessor(dbname="metrics_causal")

# Get Test Data In Order to Get Test CRELS
# load the test essays to make sure we compute metrics over the test CR labels
test_config = get_config(test_folder)
tagged_essays_test = load_process_essays(**test_config)
########################################################

fname = predictions_folder + "essays_train_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill"
with open(fname, "rb") as f:
    pred_tagged_essays = dill.load(f)

print("Number of pred tagged essasy %i" %
      len(pred_tagged_essays))  # should be 902
print("Started at: " + str(datetime.datetime.now()))
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',

예제 #14

0

파일 보기

파일: train_tagger_multiclasscombo_test.py 프로젝트: simonhughes22/PythonNlpResearch

def evaluate_tagger_on_fold(kfold, wd_train_tags, tag_history, tag_plus_word, tag_ngram, avg_weights=True, split=0.2):

    # logger.info("Loading data for fold %i" % kfold)
    k_fold_data = k_fold_2data[kfold]
    essays_TD, essays_VD, essays_TD_most_freq, wd_td_ys_bytag, wd_vd_ys_bytag = k_fold_data

    """ TRAINING """
    tagger = PerceptronTaggerLabelPowerset(wd_train_tags,
                                           combo_freq_threshold=1,
                                           tag_history=tag_history,
                                           tag_plus_word=tag_plus_word,
                                           tag_ngram_size=tag_ngram)

    # Split into train and test set
    np_essays = np.asarray(essays_TD_most_freq)
    ixs = np.arange(len(essays_TD_most_freq))
    np.random.shuffle(ixs)
    split_size = int(split * len(essays_TD_most_freq))

    test, train = np_essays[ixs[:split_size]], np_essays[ixs[split_size:]]
    _, test_tags = flatten_to_wordlevel_feat_tags(test)
    class2ys = get_wordlevel_ys_by_code(test_tags, wd_train_tags)

    optimal_num_iterations = -1
    last_f1 = -1
    """ EARLY STOPPING USING TEST SET """
    for i in range(30):
        tagger.train(train, nr_iter=1, verbose=False, average_weights=False)
        wts_copy = dict(tagger.model.weights.items())
        if avg_weights:
            tagger.model.average_weights()

        class2predictions = tagger.predict(test)
        #Compute F1 score, stop early if worse than previous
        class2metrics = ResultsProcessor.compute_metrics(class2ys, class2predictions)
        micro_metrics = micro_rpfa(class2metrics.values())
        current_f1 = micro_metrics.f1_score
        if current_f1 <= last_f1:
            optimal_num_iterations = i # i.e. this number minus 1, but 0 based
            break
        # Reset weights (as we are averaging weights)
        tagger.model.weights = wts_copy
        last_f1 = current_f1

    # print("fold %i - Optimal F1 obtained at iteration %i " % (kfold, optimal_num_iterations))
    """ Re-train model using stopping criterion on full training set """
    final_tagger = PerceptronTaggerLabelPowerset(wd_train_tags,
                                           combo_freq_threshold=1,
                                           tag_history=tag_history,
                                           tag_plus_word=tag_plus_word,
                                           tag_ngram_size=tag_ngram)

    final_tagger.train(essays_TD_most_freq, nr_iter=optimal_num_iterations, verbose=False, average_weights=avg_weights)

    """ PREDICT """
    td_wd_predictions_by_code = final_tagger.predict(essays_TD)
    vd_wd_predictions_by_code = final_tagger.predict(essays_VD)

    # logger.info("Fold %i finished" % kfold)
    """ Aggregate results """
    return kfold, td_wd_predictions_by_code, vd_wd_predictions_by_code, optimal_num_iterations

예제 #15

0

파일 보기

파일: WindowBased_Classifier.py 프로젝트: simonhughes22/PythonNlpResearch

if USE_SVM:
    test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=decision_function_for_tag)
else:
    test_decision_functions_by_code = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags, predict_fn=probability_for_tag)

""" Write out the predicted classes """
with open(out_predictions_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode, test_sent_predictions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags)

with open(out_predicted_margins_file, "w+") as f_output_file:
    f_output_file.write("Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n")
    predictions_to_file(f_output_file, sent_test_ys_bycode, test_decision_functions_by_code, test_essay_feats, regular_tags + sent_output_train_test_tags, output_confidence=True)

""" Write out the accuracy metrics """
train_wd_metrics    = ResultsProcessor.compute_mean_metrics(wd_td_ys_bytag, train_wd_predictions_by_code)
test_wd_metrics     = ResultsProcessor.compute_mean_metrics(wd_test_ys_bytag, test_wd_predictions_by_code)

train_sent_metrics  = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)
test_sent_metrics   = ResultsProcessor.compute_mean_metrics(sent_test_ys_bycode, test_sent_predictions_by_code)

with open(out_metrics_file, "w+") as f_metrics_file:
    s = ""
    pad = ResultsProcessor.pad_str
    s += ResultsProcessor.metrics_to_string(train_wd_metrics,   test_wd_metrics,   "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
    s += ResultsProcessor.metrics_to_string(train_sent_metrics, test_sent_metrics, "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
    f_metrics_file.write(s)
    print s
    #TODO - need to add logic here for GW
    #write_categories(out_predictions_file, "CB", out_categories_file)

예제 #16

0

파일 보기

    predictions_to_file(f_output_file, sent_test_ys_bycode,
                        test_sent_predictions_by_code, test_essay_feats,
                        regular_tags + sent_output_train_test_tags)

with open(out_predicted_margins_file, "w+") as f_output_file:
    f_output_file.write(
        "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n"
    )
    predictions_to_file(f_output_file,
                        sent_test_ys_bycode,
                        test_decision_functions_by_code,
                        test_essay_feats,
                        regular_tags + sent_output_train_test_tags,
                        output_confidence=True)
""" Write out the accuracy metrics """
train_wd_metrics = ResultsProcessor.compute_mean_metrics(
    wd_td_ys_bytag, train_wd_predictions_by_code)
test_wd_metrics = ResultsProcessor.compute_mean_metrics(
    wd_test_ys_bytag, test_wd_predictions_by_code)

train_sent_metrics = ResultsProcessor.compute_mean_metrics(
    sent_test_ys_bycode, test_sent_predictions_by_code)
test_sent_metrics = ResultsProcessor.compute_mean_metrics(
    sent_test_ys_bycode, test_sent_predictions_by_code)

with open(out_metrics_file, "w+") as f_metrics_file:
    s = ""
    pad = ResultsProcessor.pad_str
    s += ResultsProcessor.metrics_to_string(
        train_wd_metrics, test_wd_metrics,
        "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
    s += ResultsProcessor.metrics_to_string(

예제 #17

0

파일 보기

파일: windowbasedCRFclassifier_most_common_tag_test.py 프로젝트: IslamMohamedMosaad/PythonNlpResearch

        td_predictions, regular_tags)
    vd_wd_predictions_by_code = to_flattened_binary_tags_by_code(
        vd_predictions, regular_tags)
    os.remove(model_filename)

    return wd_td_ys_bytag, wd_vd_ys_bytag, td_wd_predictions_by_code, vd_wd_predictions_by_code


logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)
logger = logging.getLogger()

# Load the Essays
# ---------------
# Create persister (mongo client) - fail fast if mongo service not initialized
processor = ResultsProcessor()

# not hashed as don't affect persistence of feature processing
SPARSE_WD_FEATS = True

MIN_FEAT_FREQ = 5  # 5 best so far
CV_FOLDS = 5

MIN_TAG_FREQ = 5
LOOK_BACK = 0  # how many sentences to look back when predicting tags
# end not hashed

# construct unique key using settings for pickling
settings = Settings.Settings()
root_folder = settings.data_directory + "SkinCancer/Thesis_Dataset/"
training_folder = root_folder + "Training/"

예제 #18

0

파일 보기

파일: perceptron_tagger_multiclass_combo.py 프로젝트: abhilashreddyy/PythonNlpResearch

    def train(self, essay_feats, save_loc=None, nr_iter=5, verbose=True):
        '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter``
        controls the number of Perceptron training iterations.
        :param sentences: A list of (words, tags) tuples.
        :param save_loc: If not ``None``, saves a pickled model in this location.
        :param nr_iter: Number of training iterations.
        '''

        cp_essay_feats = list(essay_feats)

        # Copy as we do an inplace shuffle below
        tag_freq = defaultdict(int)
        for essay in cp_essay_feats:
            for taggged_sentence in essay.sentences:
                for wd in taggged_sentence:
                    fs_tags = self.__get_tags_(wd.tags)
                    tag_freq[fs_tags] += 1

        self.classes = set([
            fs for fs, cnt in tag_freq.items()
            if cnt >= self.combo_freq_threshold
        ])
        self.model = AveragedPerceptron(self.classes)

        for iter_ in range(nr_iter):
            class2predictions = defaultdict(list)
            class2tags = defaultdict(list)

            for essay_ix, essay in enumerate(cp_essay_feats):
                for sent_ix, taggged_sentence in enumerate(essay.sentences):
                    """ Start Sentence """
                    prev = list(self.START)

                    for i, (wd) in enumerate(taggged_sentence):
                        # Don't mutate the feat dictionary
                        shared_features = dict(wd.features.items())
                        # get all tagger predictions for previous 2 tags
                        self._add_secondary_tag_features(shared_features, prev)

                        tagger_feats = dict(shared_features.items())
                        # add more in depth features for this tag
                        actual = self.__get_tags_(wd.tags)

                        if self.use_tag_features:
                            self._add_tag_features(tagger_feats, wd.word,
                                                   prev[-1], prev[-2])

                        guess = self.model.predict(tagger_feats)
                        self.model.update(actual, guess, tagger_feats)

                        prev.append(guess)
                        for cls in self.individual_tags:
                            class2predictions[cls].append(1 if cls in
                                                          guess else 0)
                            class2tags[cls].append(1 if cls in actual else 0)

            random.shuffle(cp_essay_feats)
            class2metrics = ResultsProcessor.compute_metrics(
                class2tags, class2predictions)
            micro_metrics = micro_rpfa(class2metrics.values())
            if verbose:
                logging.info("Iter {0}: Micro Avg Metrics: {1}".format(
                    iter_, str(micro_metrics)))

        self.model.average_weights()
        return None

예제 #19

0

파일 보기

파일: windowbasedtagger_br_hyper_param_tuning.py 프로젝트: IslamMohamedMosaad/PythonNlpResearch

import Settings
from CrossValidation import cross_validation
from Decorators import memoize_to_disk
from IterableFP import flatten
from featureextractionfunctions import *
from featurevectorizer import FeatureVectorizer
from load_data import load_process_essays, extract_features
from results_procesor import ResultsProcessor,__MICRO_F1__
from window_based_tagger_config import get_config
from wordtagginghelper import *

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger()

# Create persister (mongo client) - fail fast if mongo service not initialized
processor = ResultsProcessor()

# not hashed as don't affect persistence of feature processing
SPARSE_WD_FEATS     = True

MIN_FEAT_FREQ       = 5        # 5 best so far
CV_FOLDS            = 5

MIN_TAG_FREQ        = 5
LOOK_BACK           = 0     # how many sentences to look back when predicting tags
# end not hashed

# construct unique key using settings for pickling

settings = Settings.Settings()

예제 #20

0

파일 보기

파일: metrics.py 프로젝트: simonhughes22/PythonNlpResearch

def get_metrics_raw(essays, expected_tags, micro_only=False):
    act_ys_bycode  = ResultsProcessor.get_wd_level_lbs(essays,  expected_tags=expected_tags)
    pred_ys_bycode = get_wd_level_preds(essays, expected_tags=expected_tags)
    mean_metrics = ResultsProcessor.compute_mean_metrics(act_ys_bycode, pred_ys_bycode)
    return mean_metrics

예제 #21

0

파일 보기

파일: RNN_tagger_most_common_tag_hyper_param_tuning.py 프로젝트: IslamMohamedMosaad/PythonNlpResearch

def score_predictions(model, xs, ys_by_tag, seq_len):
    preds = model.predict_classes(xs, batch_size=batch_size, verbose=0)
    pred_ys_by_tag = collapse_results(seq_len, preds)
    class2metrics = ResultsProcessor.compute_metrics(ys_by_tag, pred_ys_by_tag)
    micro_metrics = micro_rpfa(class2metrics.values())
    return micro_metrics, pred_ys_by_tag

예제 #22

0

파일 보기

파일: windowbasedCRFclassifier_br.py 프로젝트: simonhughes22/PythonNlpResearch

        vd_predictions = model.tag_sents(to_sentences(vd))
        # Delete model file now predictions obtained
        # Note, we are randomizing name above, so we need to clean up here
        os.remove(model_filename)

        td_wd_predictions_by_code[code] = to_flattened_binary_tags(td_predictions)
        vd_wd_predictions_by_code[code] = to_flattened_binary_tags(vd_predictions)
    return wd_td_ys_bytag, wd_vd_ys_bytag, td_wd_predictions_by_code, vd_wd_predictions_by_code

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger()

# Load the Essays
# ---------------
# Create persister (mongo client) - fail fast if mongo service not initialized
processor = ResultsProcessor()

# not hashed as don't affect persistence of feature processing
SPARSE_WD_FEATS = True

MIN_FEAT_FREQ = 5  # 5 best so far
CV_FOLDS = 5

MIN_TAG_FREQ = 5
LOOK_BACK = 0  # how many sentences to look back when predicting tags
# end not hashed

# construct unique key using settings for pickling
settings = Settings.Settings()
root_folder = settings.data_directory + "SkinCancer/Thesis_Dataset/"
folder =                            root_folder + "Training/"

예제 #23

0

파일 보기

파일: RNN_tagger_most_common_tag_test.py 프로젝트: simonhughes22/PythonNlpResearch

def score_predictions(model, xs, ys_by_tag, seq_len):
    preds = model.predict_classes(xs, batch_size=batch_size, verbose=0)
    pred_ys_by_tag = collapse_results(seq_len, preds)
    class2metrics = ResultsProcessor.compute_metrics(ys_by_tag, pred_ys_by_tag)
    micro_metrics = micro_rpfa(class2metrics.values())
    return micro_metrics, pred_ys_by_tag

예제 #24

0

파일 보기

from IterableFP import flatten
from featureextractionfunctions import *
from featurevectorizer import FeatureVectorizer
from load_data import load_process_essays, extract_features
from predictions_to_file import predictions_to_file
from results_procesor import ResultsProcessor
from sent_feats_for_stacking import *
from window_based_tagger_config import get_config
from wordtagginghelper import *

# END Classifiers
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger()

# Create persister (mongo client) - fail fast if mongo service not initialized
processor = ResultsProcessor()

# not hashed as don't affect persistence of feature processing
SPARSE_WD_FEATS     = True
SPARSE_SENT_FEATS   = True

MIN_FEAT_FREQ       = 5        # 5 best so far
CV_FOLDS            = 5

MIN_TAG_FREQ        = 5
LOOK_BACK           = 0     # how many sentences to look back when predicting tags
# end not hashed

# construct unique key using settings for pickling

settings = Settings.Settings()

예제 #25

0

파일 보기

파일: windowbasedtagger_most_common_tag_multiclass_hyper_param_tuning.py 프로젝트: IslamMohamedMosaad/PythonNlpResearch

from CrossValidation import cross_validation
from Decorators import memoize_to_disk
from IterableFP import flatten
from featureextractionfunctions import *
from featurevectorizer import FeatureVectorizer
from load_data import load_process_essays, extract_features
from results_procesor import ResultsProcessor, __MICRO_F1__
from window_based_tagger_config import get_config
from wordtagginghelper import *

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)
logger = logging.getLogger()

# Create persister (mongo client) - fail fast if mongo service not initialized
processor = ResultsProcessor(dbname="metrics_coref_new")

# not hashed as don't affect persistence of feature processing
SPARSE_WD_FEATS = True

MIN_FEAT_FREQ = 5  # 5 best so far
CV_FOLDS = 5

MIN_TAG_FREQ = 5
LOOK_BACK = 0  # how many sentences to look back when predicting tags
# end not hashed

# construct unique key using settings for pickling

settings = Settings.Settings()

예제 #26

0

파일 보기

파일: train_tagger_binary.py 프로젝트: simonhughes22/PythonNlpResearch

from wordtagginghelper import *
from IterableFP import flatten
from collections import defaultdict
from window_based_tagger_config import get_config
from results_procesor import ResultsProcessor, compute_metrics
from nltk.classify import maxent
# END Classifiers

import Settings
import logging

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger()

# Create persister (mongo client) - fail fast if mongo service not initialized
processor = ResultsProcessor()

NUM_TRAIN_ITERATIONS = 5
# not hashed as don't affect persistence of feature processing
SPARSE_WD_FEATS     = True
SPARSE_SENT_FEATS   = True

MIN_FEAT_FREQ       = 5        # 5 best so far
CV_FOLDS            = 5

MIN_TAG_FREQ        = 5
LOOK_BACK           = 0     # how many sentences to look back when predicting tags
# end not hashed

# construct unique key using settings for pickling

예제 #27

0

파일 보기

def evaluate_tagger_on_fold(kfold,
                            wd_train_tags,
                            tag_history,
                            tag_plus_word,
                            tag_ngram,
                            avg_weights=True,
                            split=0.2):

    # logger.info("Loading data for fold %i" % kfold)
    k_fold_data = k_fold_2data[kfold]
    essays_TD, essays_VD, essays_TD_most_freq, wd_td_ys_bytag, wd_vd_ys_bytag = k_fold_data
    """ TRAINING """
    tagger = PerceptronTaggerLabelPowerset(wd_train_tags,
                                           combo_freq_threshold=1,
                                           tag_history=tag_history,
                                           tag_plus_word=tag_plus_word,
                                           tag_ngram_size=tag_ngram)

    # Split into train and test set
    np_essays = np.asarray(essays_TD_most_freq)
    ixs = np.arange(len(essays_TD_most_freq))
    np.random.shuffle(ixs)
    split_size = int(split * len(essays_TD_most_freq))

    test, train = np_essays[ixs[:split_size]], np_essays[ixs[split_size:]]
    _, test_tags = flatten_to_wordlevel_feat_tags(test)
    class2ys = get_wordlevel_ys_by_code(test_tags, wd_train_tags)

    optimal_num_iterations = -1
    last_f1 = -1
    """ EARLY STOPPING USING TEST SET """
    for i in range(30):
        tagger.train(train, nr_iter=1, verbose=False, average_weights=False)
        wts_copy = dict(tagger.model.weights.items())
        if avg_weights:
            tagger.model.average_weights()

        class2predictions = tagger.predict(test)
        #Compute F1 score, stop early if worse than previous
        class2metrics = ResultsProcessor.compute_metrics(
            class2ys, class2predictions)
        micro_metrics = micro_rpfa(class2metrics.values())
        current_f1 = micro_metrics.f1_score
        if current_f1 <= last_f1:
            optimal_num_iterations = i  # i.e. this number minus 1, but 0 based
            break
        # Reset weights (as we are averaging weights)
        tagger.model.weights = wts_copy
        last_f1 = current_f1

    # print("fold %i - Optimal F1 obtained at iteration %i " % (kfold, optimal_num_iterations))
    """ Re-train model using stopping criterion on full training set """
    final_tagger = PerceptronTaggerLabelPowerset(wd_train_tags,
                                                 combo_freq_threshold=1,
                                                 tag_history=tag_history,
                                                 tag_plus_word=tag_plus_word,
                                                 tag_ngram_size=tag_ngram)

    final_tagger.train(essays_TD_most_freq,
                       nr_iter=optimal_num_iterations,
                       verbose=False,
                       average_weights=avg_weights)
    """ PREDICT """
    td_wd_predictions_by_code = final_tagger.predict(essays_TD)
    vd_wd_predictions_by_code = final_tagger.predict(essays_VD)

    # logger.info("Fold %i finished" % kfold)
    """ Aggregate results """
    return kfold, td_wd_predictions_by_code, vd_wd_predictions_by_code, optimal_num_iterations

예제 #28

0

파일 보기

파일: train_causal_relation_extractor_feature_selection_multinomial_logreg.py 프로젝트: IslamMohamedMosaad/PythonNlpResearch

CV_FOLDS = 5
MIN_FEAT_FREQ = 5

# Global settings

settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
training_folder = root_folder + "Training" + "/"
test_folder = root_folder + "Test" + "/"
training_pickled = settings.data_directory + "CoralBleaching/Thesis_Dataset/training.pl"
# NOTE: These predictions are generated from the "./notebooks/SEARN/Keras - Train Tagger and Save CV Predictions For Word Tags.ipynb" notebook
# used as inputs to parsing model
rnn_predictions_folder = root_folder + "Predictions/Bi-LSTM-4-SEARN/"

config = get_config(training_folder)
processor = ResultsProcessor(dbname="metrics_causal")

# Get Test Data In Order to Get Test CRELS
# load the test essays to make sure we compute metrics over the test CR labels
test_config = get_config(test_folder)
tagged_essays_test = load_process_essays(**test_config)
########################################################

fname = rnn_predictions_folder + "essays_train_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill"
with open(fname, "rb") as f:
    pred_tagged_essays = dill.load(f)

logger.info("Started at: " + str(datetime.datetime.now()))
logger.info("Number of pred tagged essays %i" %
            len(pred_tagged_essays))  # should be 902

예제 #29

0

파일 보기

def evaluate_on_test_set(test_essay_feats, out_predictions_file,
                         out_predicted_margins_file, out_metrics_file,
                         out_categories_file):

    test_feats, test_tags = flatten_to_wordlevel_feat_tags(test_essay_feats)
    wd_test_ys_bytag = get_wordlevel_ys_by_code(test_tags, wd_train_tags)
    test_x = feature_transformer.transform(test_feats)
    """ TEST Tagger """
    test_wd_predictions_by_code = test_classifier_per_code(
        test_x, tag2word_classifier, wd_test_tags)
    print "\nRunning Sentence Model"
    """ SENTENCE LEVEL PREDICTIONS FROM STACKING """
    sent_test_xs, sent_test_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(
        sent_input_feat_tags, sent_input_interaction_tags, test_essay_feats,
        test_x, wd_test_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS,
        LOOK_BACK)
    """ Test Stack Classifier """
    test_sent_predictions_by_code \
        = test_classifier_per_code(sent_test_xs, tag2sent_classifier, sent_output_train_test_tags)
    if USE_SVM:
        test_decision_functions_by_code = test_classifier_per_code(
            sent_test_xs,
            tag2sent_classifier,
            sent_output_train_test_tags,
            predict_fn=decision_function_for_tag)
    else:
        test_decision_functions_by_code = test_classifier_per_code(
            sent_test_xs,
            tag2sent_classifier,
            sent_output_train_test_tags,
            predict_fn=probability_for_tag)
    """ Write out the predicted classes """
    with open(out_predictions_file, "w+") as f_output_file:
        f_output_file.write(
            "Essay|Sent Number|Processed Sentence|Concept Codes|Predictions\n")
        predictions_to_file(f_output_file, sent_test_ys_bycode,
                            test_sent_predictions_by_code, test_essay_feats,
                            regular_tags + sent_output_train_test_tags)
    with open(out_predicted_margins_file, "w+") as f_output_file:
        f_output_file.write(
            "Essay|Sent Number|Processed Sentence|Concept Codes|Predicted Confidence\n"
        )
        predictions_to_file(f_output_file,
                            sent_test_ys_bycode,
                            test_decision_functions_by_code,
                            test_essay_feats,
                            regular_tags + sent_output_train_test_tags,
                            output_confidence=True)
    """ Write out the accuracy metrics """
    train_wd_metrics = ResultsProcessor.compute_mean_metrics(
        wd_td_ys_bytag, train_wd_predictions_by_code)
    test_wd_metrics = ResultsProcessor.compute_mean_metrics(
        wd_test_ys_bytag, test_wd_predictions_by_code)
    train_sent_metrics = ResultsProcessor.compute_mean_metrics(
        sent_test_ys_bycode, test_sent_predictions_by_code)
    test_sent_metrics = ResultsProcessor.compute_mean_metrics(
        sent_test_ys_bycode, test_sent_predictions_by_code)
    with open(out_metrics_file, "w+") as f_metrics_file:
        s = ""
        pad = ResultsProcessor.pad_str
        s += ResultsProcessor.metrics_to_string(
            train_wd_metrics, test_wd_metrics,
            "\n%s%s%s" % (pad("TAGGING"), pad("Train"), pad("Test")))
        s += ResultsProcessor.metrics_to_string(
            train_sent_metrics, test_sent_metrics,
            "\n%s%s%s" % (pad("SENTENCE"), pad("Train"), pad("Test")))
        f_metrics_file.write(s)
        write_categories(out_predictions_file, "CB", out_categories_file)
        print s