Esempio n. 1
0
def evaluate_srl_1step(find_preds_automatically=False, gold_file=None):
    """
    Evaluates the network on the SRL task performed with one step for
    id + class.
    """
    md = Metadata.load_from_file('srl')
    nn = taggers.load_network(md)
    r = taggers.create_reader(md, gold_file=gold_file)
    
    itd = r.get_inverse_tag_dictionary()
    
    if find_preds_automatically:
        tagger = taggers.SRLTagger()
    else:
        iter_predicates = iter(r.predicates)
    
    for sent in iter(r.sentences):
        
        # the other elements in the list are the tags for each proposition
        actual_sent = sent[0]
        
        if find_preds_automatically:
            pred_positions = tagger.find_predicates(sent)
        else:
            pred_positions = iter_predicates.next()
            
        verbs = [(position, actual_sent[position].word) for position in pred_positions]
        sent_codified = np.array([r.converter.convert(token) for token in actual_sent])
        
        answers = nn.tag_sentence(sent_codified, pred_positions)
        tags = [convert_iob_to_iobes([itd[x] for x in pred_answer])
                for pred_answer in answers]
            
        print prop_conll(verbs, tags, len(actual_sent))
Esempio n. 2
0
def evaluate_srl_2_steps(no_repeat=False,
                         find_preds_automatically=False,
                         gold_file=None):
    '''
    Prints the output of a 2-step SRL system in CoNLL style for evaluating.
    '''
    # load boundary identification network and reader
    md_boundary = Metadata.load_from_file('srl_boundary')
    nn_boundary = taggers.load_network(md_boundary)
    reader_boundary = taggers.create_reader(md_boundary, gold_file)
    itd_boundary = reader_boundary.get_inverse_tag_dictionary()

    # same for arg classification
    md_classify = Metadata.load_from_file('srl_classify')
    nn_classify = taggers.load_network(md_classify)
    reader_classify = taggers.create_reader(md_classify, gold_file)
    itd_classify = reader_classify.get_inverse_tag_dictionary()

    if find_preds_automatically:
        tagger = taggers.SRLTagger()
    else:
        iter_predicates = iter(reader_boundary.predicates)

    actual_sentences = [
        actual_sentence for actual_sentence, _ in reader_boundary.sentences
    ]

    for sent in actual_sentences:

        if find_preds_automatically:
            pred_pos = tagger.find_predicates(sent)
        else:
            pred_pos = iter_predicates.next()

        verbs = [(position, sent[position].word) for position in pred_pos]
        sent_bound_codified = np.array(
            [reader_boundary.converter.convert(t) for t in sent])
        sent_class_codified = np.array(
            [reader_classify.converter.convert(t) for t in sent])

        answers = nn_boundary.tag_sentence(sent_bound_codified, pred_pos)
        boundaries = [[itd_boundary[x] for x in pred_answer]
                      for pred_answer in answers]

        arg_limits = [
            utils.boundaries_to_arg_limits(pred_boundaries)
            for pred_boundaries in boundaries
        ]

        answers = nn_classify.tag_sentence(sent_class_codified,
                                           pred_pos,
                                           arg_limits,
                                           allow_repeats=not no_repeat)

        arguments = [[itd_classify[x] for x in pred_answer]
                     for pred_answer in answers]
        tags = join_2_steps(boundaries, arguments)

        print(prop_conll(verbs, tags, len(sent)))