def evaluate_srl_2_steps(no_repeat=False, find_preds_automatically=False, gold_file=None): ''' Prints the output of a 2-step SRL system in CoNLL style for evaluating. ''' # load boundary identification network and reader md_boundary = Metadata.load_from_file('srl_boundary') nn_boundary = taggers.load_network(md_boundary) reader_boundary = taggers.create_reader(md_boundary, gold_file) itd_boundary = reader_boundary.get_inverse_tag_dictionary() # same for arg classification md_classify = Metadata.load_from_file('srl_classify') nn_classify = taggers.load_network(md_classify) reader_classify = taggers.create_reader(md_classify, gold_file) itd_classify = reader_classify.get_inverse_tag_dictionary() if find_preds_automatically: tagger = taggers.SRLTagger() else: iter_predicates = iter(reader_boundary.predicates) actual_sentences = [ actual_sentence for actual_sentence, _ in reader_boundary.sentences ] for sent in actual_sentences: if find_preds_automatically: pred_pos = tagger.find_predicates(sent) else: pred_pos = iter_predicates.next() verbs = [(position, sent[position].word) for position in pred_pos] sent_bound_codified = np.array( [reader_boundary.converter.convert(t) for t in sent]) sent_class_codified = np.array( [reader_classify.converter.convert(t) for t in sent]) answers = nn_boundary.tag_sentence(sent_bound_codified, pred_pos) boundaries = [[itd_boundary[x] for x in pred_answer] for pred_answer in answers] arg_limits = [ utils.boundaries_to_arg_limits(pred_boundaries) for pred_boundaries in boundaries ] answers = nn_classify.tag_sentence(sent_class_codified, pred_pos, arg_limits, allow_repeats=not no_repeat) arguments = [[itd_classify[x] for x in pred_answer] for pred_answer in answers] tags = join_2_steps(boundaries, arguments) print(prop_conll(verbs, tags, len(sent)))
def evaluate_srl_2_steps(no_repeat=False, find_preds_automatically=False, gold_file=None): """ Prints the output of a 2-step SRL system in CoNLL style for evaluating. """ # load boundary identification network and reader md_boundary = Metadata.load_from_file('srl_boundary') nn_boundary = taggers.load_network(md_boundary) reader_boundary = taggers.create_reader(md_boundary, gold_file) itd_boundary = reader_boundary.get_inverse_tag_dictionary() # same for arg classification md_classify = Metadata.load_from_file('srl_classify') nn_classify = taggers.load_network(md_classify) reader_classify = taggers.create_reader(md_classify, gold_file) itd_classify = reader_classify.get_inverse_tag_dictionary() if find_preds_automatically: tagger = taggers.SRLTagger() else: iter_predicates = iter(reader_boundary.predicates) actual_sentences = [actual_sentence for actual_sentence, _ in reader_boundary.sentences] for sent in actual_sentences: if find_preds_automatically: pred_pos = tagger.find_predicates(sent) else: pred_pos = next(iter_predicates) verbs = [(position, sent[position].word) for position in pred_pos] sent_bound_codified = np.array([reader_boundary.converter.convert(t) for t in sent]) sent_class_codified = np.array([reader_classify.converter.convert(t) for t in sent]) answers = nn_boundary.tag_sentence(sent_bound_codified, pred_pos) boundaries = [[itd_boundary[x] for x in pred_answer] for pred_answer in answers] arg_limits = [utils.boundaries_to_arg_limits(pred_boundaries) for pred_boundaries in boundaries] answers = nn_classify.tag_sentence(sent_class_codified, pred_pos, arg_limits, allow_repeats=not no_repeat) arguments = [[itd_classify[x] for x in pred_answer] for pred_answer in answers] tags = join_2_steps(boundaries, arguments) print(prop_conll(verbs, tags, len(sent)))
def tag_tokens(self, tokens, no_repeats=False): """ Runs the SRL process on the given tokens. :param tokens: a list of tokens (as strings) :param no_repeats: whether to prevent repeated argument labels :returns: a list of lists (one list for each sentence). Sentences have tuples (all_tokens, predicate, arg_structure), where arg_structure is a dictionary mapping argument labels to the words it includes. """ if self.language == 'pt': tokens_obj = [attributes.Token(utils.clean_text(t, False)) for t in tokens] else: tokens_obj = [attributes.Token(t) for t in tokens] converted_bound = np.array([self.boundary_reader.converter.convert(t) for t in tokens_obj]) converted_class = np.array([self.classify_reader.converter.convert(t) for t in tokens_obj]) pred_positions = self.find_predicates(tokens_obj) # first, argument boundary detection # the answer includes all predicates answers = self.boundary_nn.tag_sentence(converted_bound, pred_positions) boundaries = [[self.boundary_itd[x] for x in pred_answer] for pred_answer in answers] arg_limits = [utils.boundaries_to_arg_limits(pred_boundaries) for pred_boundaries in boundaries] # now, argument classification answers = self.classify_nn.tag_sentence(converted_class, pred_positions, arg_limits, allow_repeats=not no_repeats) arguments = [[self.classify_itd[x] for x in pred_answer] for pred_answer in answers] structures = _group_arguments(tokens, pred_positions, boundaries, arguments) return SRLAnnotatedSentence(tokens, structures)