def train(self, tagged_essays, max_epochs): ys_by_sent = self.get_label_data(tagged_essays) for i in range(0, max_epochs): self.epoch += 1 self.log("Epoch: {epoch}".format(epoch=self.epoch)) # TODO - provide option for different model types here? parse_examples = WeightedExamples(labels=PARSE_ACTIONS, positive_value=self.positive_val) crel_examples = WeightedExamples(labels=None, positive_value=self.positive_val) pred_ys_by_sent = defaultdict(list) for essay_ix, essay in enumerate(tagged_essays): for sent_ix, taggged_sentence in enumerate(essay.sentences): predicted_tags = essay.pred_tagged_sentences[sent_ix] pred_relations = self.generate_training_data( taggged_sentence, predicted_tags, parse_examples, crel_examples) # Store predictions for evaluation self.add_cr_labels(pred_relations, pred_ys_by_sent) class2metrics = ResultsProcessor.compute_metrics( ys_by_sent, pred_ys_by_sent) micro_metrics = micro_rpfa(class2metrics.values()) # type: rpfa self.log( "Training Metrics: {metrics}".format(metrics=micro_metrics)) self.train_parse_models(parse_examples) self.train_crel_models(crel_examples) self.training_datasets_parsing[self.epoch] = parse_examples self.training_datasets_crel[self.epoch] = crel_examples
def add_mean_metrics(dict_mean_metrics, fltr=is_a_regular_code): mean_metrics = dict(dict_mean_metrics.items()) code_metrics = [] # Filters to concept codes by default for tag, metric in mean_metrics.items(): if fltr(tag): code_metrics.append(metric) """ All Tags """ mean_metric = mean_rpfa(mean_metrics.values()) weighted_mean_metric = weighted_mean_rpfa(mean_metrics.values()) micro_f1_metric = micro_rpfa(mean_metrics.values()) """ Concept Codes """ mean_metric_codes = mean_rpfa(code_metrics) weighted_mean_metric_codes = weighted_mean_rpfa(code_metrics) mean_metrics["MEAN"] = mean_metric mean_metrics["WEIGHTED_MEAN"] = weighted_mean_metric """ The default behavior is to assume codes starting with a digit are concept codes """ mean_metrics["MEAN_CONCEPT_CODES"] = mean_metric_codes mean_metrics[ "WEIGHTED_MEAN_CONCEPT_CODES"] = weighted_mean_metric_codes # convert values to dicts from rpfa objects for mongodb """ Micro and Macro F1 """ mean_metrics[__MICRO_F1__] = micro_f1_metric macro_f1 = ResultsProcessor.f1(mean_metric_codes.recall, mean_metric_codes.precision) return dict( list( map(lambda tpl: (tpl[0], tpl[1].__dict__), mean_metrics.items())) + [(__MACRO_F1__, macro_f1)])
def train(self, essay_feats, save_loc=None, nr_iter=5, verbose=True): '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter`` controls the number of Perceptron training iterations. :param sentences: A list of (words, tags) tuples. :param save_loc: If not ``None``, saves a pickled model in this location. :param nr_iter: Number of training iterations. ''' cp_essay_feats = list(essay_feats) # Copy as we do an inplace shuffle below tag_freq = defaultdict(int) for essay in cp_essay_feats: for taggged_sentence in essay.sentences: for wd in taggged_sentence: fs_tags = self.__get_tags_(wd.tags) tag_freq[fs_tags] +=1 self.classes = set([ fs for fs, cnt in tag_freq.items() if cnt >= self.combo_freq_threshold]) self.model = AveragedPerceptron(self.classes) for iter_ in range(nr_iter): class2predictions = defaultdict(list) class2tags = defaultdict(list) for essay_ix, essay in enumerate(cp_essay_feats): for sent_ix, taggged_sentence in enumerate(essay.sentences): """ Start Sentence """ prev = list(self.START) for i, (wd) in enumerate(taggged_sentence): # Don't mutate the feat dictionary shared_features = dict(wd.features.items()) # get all tagger predictions for previous 2 tags self._add_secondary_tag_features(shared_features, prev) tagger_feats = dict(shared_features.items()) # add more in depth features for this tag actual = self.__get_tags_(wd.tags) if self.use_tag_features: self._add_tag_features(tagger_feats, wd.word, prev[-1], prev[-2]) guess = self.model.predict(tagger_feats) self.model.update(actual, guess, tagger_feats) prev.append(guess) for cls in self.individual_tags: class2predictions[cls].append( 1 if cls in guess else 0 ) class2tags[cls].append( 1 if cls in actual else 0) random.shuffle(cp_essay_feats) class2metrics = ResultsProcessor.compute_metrics(class2tags, class2predictions) micro_metrics = micro_rpfa(class2metrics.values()) if verbose: logging.info("Iter {0}: Micro Avg Metrics: {1}".format(iter_, str(micro_metrics))) self.model.average_weights() return None
def train(self, tagged_essays, max_epochs): trained_with_beta0 = False ys_by_sent = self.get_label_data(tagged_essays) for i in range(0, max_epochs): if self.beta < 0: trained_with_beta0 = True self.epoch += 1 print("Epoch: {epoch}".format(epoch=self.epoch)) print("Beta: {beta}".format(beta=self.beta)) # TODO - provide option for different model types here? parse_examples = WeightedExamples(labels=PARSE_ACTIONS, positive_value=self.positive_val) crel_examples = WeightedExamples(labels=None, positive_value=self.positive_val) pred_ys_by_sent = defaultdict(list) for essay_ix, essay in enumerate(tagged_essays): for sent_ix, taggged_sentence in enumerate(essay.sentences): predicted_tags = essay.pred_tagged_sentences[sent_ix] pred_relations = self.generate_training_data(taggged_sentence, predicted_tags, parse_examples, crel_examples) # Store predictions for evaluation self.add_cr_labels(pred_relations, pred_ys_by_sent) class2metrics = ResultsProcessor.compute_metrics(ys_by_sent, pred_ys_by_sent) micro_metrics = micro_rpfa(class2metrics.values()) # type: rpfa print("Training Metrics: {metrics}".format(metrics=micro_metrics)) # TODO, dictionary vectorize examples, train a weighted binary classifier for each separate parsing action self.train_parse_models(parse_examples) self.train_crel_models(crel_examples) self.training_datasets_parsing[self.epoch] = parse_examples self.training_datasets_crel[self.epoch] = crel_examples # Decay beta self.beta = self.beta_decay_fn(self.beta) if self.beta < 0 and trained_with_beta0: print("beta decayed below 0 - beta:'{beta}', stopping".format(beta=self.beta)) break # end [for each epoch] if not trained_with_beta0: print("Algorithm hit max epochs without training with beta <= 0 - final_beta:{beta}".format(beta=self.beta))
def evaluate_tagger_on_fold(kfold, wd_train_tags, tag_history, tag_plus_word, tag_ngram, avg_weights=True, split=0.2): # logger.info("Loading data for fold %i" % kfold) k_fold_data = k_fold_2data[kfold] essays_TD, essays_VD, essays_TD_most_freq, wd_td_ys_bytag, wd_vd_ys_bytag = k_fold_data """ TRAINING """ tagger = PerceptronTaggerLabelPowerset(wd_train_tags, combo_freq_threshold=1, tag_history=tag_history, tag_plus_word=tag_plus_word, tag_ngram_size=tag_ngram) # Split into train and test set np_essays = np.asarray(essays_TD_most_freq) ixs = np.arange(len(essays_TD_most_freq)) np.random.shuffle(ixs) split_size = int(split * len(essays_TD_most_freq)) test, train = np_essays[ixs[:split_size]], np_essays[ixs[split_size:]] _, test_tags = flatten_to_wordlevel_feat_tags(test) class2ys = get_wordlevel_ys_by_code(test_tags, wd_train_tags) optimal_num_iterations = -1 last_f1 = -1 """ EARLY STOPPING USING TEST SET """ for i in range(30): tagger.train(train, nr_iter=1, verbose=False, average_weights=False) wts_copy = dict(tagger.model.weights.items()) if avg_weights: tagger.model.average_weights() class2predictions = tagger.predict(test) #Compute F1 score, stop early if worse than previous class2metrics = ResultsProcessor.compute_metrics(class2ys, class2predictions) micro_metrics = micro_rpfa(class2metrics.values()) current_f1 = micro_metrics.f1_score if current_f1 <= last_f1: optimal_num_iterations = i # i.e. this number minus 1, but 0 based break # Reset weights (as we are averaging weights) tagger.model.weights = wts_copy last_f1 = current_f1 # print("fold %i - Optimal F1 obtained at iteration %i " % (kfold, optimal_num_iterations)) """ Re-train model using stopping criterion on full training set """ final_tagger = PerceptronTaggerLabelPowerset(wd_train_tags, combo_freq_threshold=1, tag_history=tag_history, tag_plus_word=tag_plus_word, tag_ngram_size=tag_ngram) final_tagger.train(essays_TD_most_freq, nr_iter=optimal_num_iterations, verbose=False, average_weights=avg_weights) """ PREDICT """ td_wd_predictions_by_code = final_tagger.predict(essays_TD) vd_wd_predictions_by_code = final_tagger.predict(essays_VD) # logger.info("Fold %i finished" % kfold) """ Aggregate results """ return kfold, td_wd_predictions_by_code, vd_wd_predictions_by_code, optimal_num_iterations
def score_predictions(model, xs, ys_by_tag, seq_len): preds = model.predict_classes(xs, batch_size=batch_size, verbose=0) pred_ys_by_tag = collapse_results(seq_len, preds) class2metrics = ResultsProcessor.compute_metrics(ys_by_tag, pred_ys_by_tag) micro_metrics = micro_rpfa(class2metrics.values()) return micro_metrics, pred_ys_by_tag
def evaluate_tagger_on_fold(kfold, wd_train_tags, tag_history, tag_plus_word, tag_ngram, avg_weights=True, split=0.2): # logger.info("Loading data for fold %i" % kfold) k_fold_data = k_fold_2data[kfold] essays_TD, essays_VD, essays_TD_most_freq, wd_td_ys_bytag, wd_vd_ys_bytag = k_fold_data """ TRAINING """ tagger = PerceptronTaggerLabelPowerset(wd_train_tags, combo_freq_threshold=1, tag_history=tag_history, tag_plus_word=tag_plus_word, tag_ngram_size=tag_ngram) # Split into train and test set np_essays = np.asarray(essays_TD_most_freq) ixs = np.arange(len(essays_TD_most_freq)) np.random.shuffle(ixs) split_size = int(split * len(essays_TD_most_freq)) test, train = np_essays[ixs[:split_size]], np_essays[ixs[split_size:]] _, test_tags = flatten_to_wordlevel_feat_tags(test) class2ys = get_wordlevel_ys_by_code(test_tags, wd_train_tags) optimal_num_iterations = -1 last_f1 = -1 """ EARLY STOPPING USING TEST SET """ for i in range(30): tagger.train(train, nr_iter=1, verbose=False, average_weights=False) wts_copy = dict(tagger.model.weights.items()) if avg_weights: tagger.model.average_weights() class2predictions = tagger.predict(test) #Compute F1 score, stop early if worse than previous class2metrics = ResultsProcessor.compute_metrics( class2ys, class2predictions) micro_metrics = micro_rpfa(class2metrics.values()) current_f1 = micro_metrics.f1_score if current_f1 <= last_f1: optimal_num_iterations = i # i.e. this number minus 1, but 0 based break # Reset weights (as we are averaging weights) tagger.model.weights = wts_copy last_f1 = current_f1 # print("fold %i - Optimal F1 obtained at iteration %i " % (kfold, optimal_num_iterations)) """ Re-train model using stopping criterion on full training set """ final_tagger = PerceptronTaggerLabelPowerset(wd_train_tags, combo_freq_threshold=1, tag_history=tag_history, tag_plus_word=tag_plus_word, tag_ngram_size=tag_ngram) final_tagger.train(essays_TD_most_freq, nr_iter=optimal_num_iterations, verbose=False, average_weights=avg_weights) """ PREDICT """ td_wd_predictions_by_code = final_tagger.predict(essays_TD) vd_wd_predictions_by_code = final_tagger.predict(essays_VD) # logger.info("Fold %i finished" % kfold) """ Aggregate results """ return kfold, td_wd_predictions_by_code, vd_wd_predictions_by_code, optimal_num_iterations
def train(self, essay_feats, save_loc=None, nr_iter=5, verbose=True): '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter`` controls the number of Perceptron training iterations. :param sentences: A list of (words, tags) tuples. :param save_loc: If not ``None``, saves a pickled model in this location. :param nr_iter: Number of training iterations. ''' cp_essay_feats = list(essay_feats) # Copy as we do an inplace shuffle below tag_freq = defaultdict(int) for essay in cp_essay_feats: for taggged_sentence in essay.sentences: for wd in taggged_sentence: fs_tags = self.__get_tags_(wd.tags) tag_freq[fs_tags] += 1 self.classes = set([ fs for fs, cnt in tag_freq.items() if cnt >= self.combo_freq_threshold ]) self.model = AveragedPerceptron(self.classes) for iter_ in range(nr_iter): class2predictions = defaultdict(list) class2tags = defaultdict(list) for essay_ix, essay in enumerate(cp_essay_feats): for sent_ix, taggged_sentence in enumerate(essay.sentences): """ Start Sentence """ prev = list(self.START) for i, (wd) in enumerate(taggged_sentence): # Don't mutate the feat dictionary shared_features = dict(wd.features.items()) # get all tagger predictions for previous 2 tags self._add_secondary_tag_features(shared_features, prev) tagger_feats = dict(shared_features.items()) # add more in depth features for this tag actual = self.__get_tags_(wd.tags) if self.use_tag_features: self._add_tag_features(tagger_feats, wd.word, prev[-1], prev[-2]) guess = self.model.predict(tagger_feats) self.model.update(actual, guess, tagger_feats) prev.append(guess) for cls in self.individual_tags: class2predictions[cls].append(1 if cls in guess else 0) class2tags[cls].append(1 if cls in actual else 0) random.shuffle(cp_essay_feats) class2metrics = ResultsProcessor.compute_metrics( class2tags, class2predictions) micro_metrics = micro_rpfa(class2metrics.values()) if verbose: logging.info("Iter {0}: Micro Avg Metrics: {1}".format( iter_, str(micro_metrics))) self.model.average_weights() return None