def cut_sent(self, text: str, sid=None) -> List[Sentence]:
     last_cut = 0
     sentences = []
     for i in range(0, len(text) - 1):
         if text[i] in self._eos:
             sentences.append(Sentence([TEXT(text[last_cut:i + 1])]))
             last_cut = i + 1
     if last_cut < len(text) - 1:
         sentences.append(Sentence([TEXT(text[last_cut:])]))
     return sentences
 def parse(self, para):
     edus = []
     for edu in para.edus():
         edu_copy = EDU([TEXT(edu.text)])
         setattr(edu_copy, "words", edu.words)
         setattr(edu_copy, "tags", edu.tags)
         edus.append(edu_copy)
     return self.parser.parse(edus)
 def cut_edu(self, sent: Sentence) -> List[EDU]:
     if not hasattr(sent, "parse"):
         print(sent.text)
         parse = self.parser.parse(sent.text)
     else:
         parse = getattr(sent, "parse")
     parse = ParentedTree.fromstring(parse.pformat())
     children = list(
         parse.subtrees(
             lambda t: t.height() == 2 and t.label() != '-NONE-'))
     edus = []
     last_edu_words = []
     last_edu_tags = []
     offset = 0
     for child in children:
         if child[0] == '-LRB-':
             child[0] = '('
         if child[0] == '-RRB-':
             child[0] = ')'
         last_edu_words.append(child[0])
         last_edu_tags.append(child.label())
         if child[0] in self._eos or (child[0] in self.candidate and
                                      self.model.predict(offset, parse)):
             text = "".join(last_edu_words)
             edu = EDU([TEXT(text)])
             setattr(edu, "words", last_edu_words)
             setattr(edu, "tags", last_edu_tags)
             edus.append(edu)
             last_edu_words = []
             last_edu_tags = []
         offset += len(child[0])
     if last_edu_words:
         text = "".join(last_edu_words)
         edu = EDU([TEXT(text)])
         setattr(edu, "words", last_edu_words)
         setattr(edu, "tags", last_edu_tags)
         edus.append(edu)
     return edus
Beispiel #4
0
def parse_and_eval(dataset, model):
    model.eval()
    parser = PartitionPtrParser(model)
    golds = list(filter(lambda d: d.root_relation(), chain(*dataset)))
    num_instances = len(golds)
    strips = []
    for paragraph in golds:
        edus = []
        for edu in paragraph.edus():
            edu_copy = EDU([TEXT(edu.text)])
            setattr(edu_copy, "words", edu.words)
            setattr(edu_copy, "tags", edu.tags)
            edus.append(edu_copy)
        strips.append(edus)
    parses = []
    for edus in strips:
        parse = parser.parse(edus)
        parses.append(parse)
    return num_instances, parse_eval(parses, golds)
def main():
    logging.basicConfig(level=logging.INFO)
    with open("data/models/treebuilder.partptr.model", "rb") as model_fd:
        model = torch.load(model_fd, map_location="cpu")
        model.eval()
        model.use_gpu = False
    parser = PartitionPtrParser(model)
    cdtb = CDTB("data/CDTB",
                "TRAIN",
                "VALIDATE",
                "TEST",
                ctb_dir="data/CTB",
                preprocess=True,
                cache_dir="data/cache")
    golds = list(filter(lambda d: d.root_relation(), chain(*cdtb.test)))

    strips = []
    for paragraph in golds:
        edus = []
        for edu in paragraph.edus():
            edu_copy = EDU([TEXT(edu.text)])
            setattr(edu_copy, "words", edu.words)
            setattr(edu_copy, "tags", edu.tags)
            edus.append(edu_copy)
        strips.append(edus)
    parses = []
    parse_sessions = []
    for edus in strips:
        parse, session = parser.parse(edus, ret_session=True)
        parses.append(parse)
        parse_sessions.append(session)

    # macro cdtb scores
    cdtb_macro_scores = eval.parse_eval(parses, golds, average="macro")
    logging.info("CDTB macro (strict) scores:")
    logging.info(eval.gen_parse_report(*cdtb_macro_scores))
    # micro cdtb scores
    cdtb_micro_scores = eval.parse_eval(parses, golds, average="micro")
    logging.info("CDTB micro (strict) scores:")
    logging.info(eval.gen_parse_report(*cdtb_micro_scores))

    # micro rst scores
    rst_scores = eval.rst_parse_eval(parses, golds)
    logging.info("RST styled scores:")
    logging.info(eval.gen_parse_report(*rst_scores))

    # nuclear scores
    nuclear_scores = eval.nuclear_eval(parses, golds)
    logging.info("nuclear scores:")
    logging.info(eval.gen_category_report(nuclear_scores))

    # relation scores
    ctype_scores, ftype_scores = eval.relation_eval(parses, golds)
    logging.info("coarse relation scores:")
    logging.info(eval.gen_category_report(ctype_scores))
    logging.info("fine relation scores:")
    logging.info(eval.gen_category_report(ftype_scores))

    # draw gold and parse tree along with decision hotmap
    for gold, parse, session in zip(golds, parses, parse_sessions):
        gold.draw()
        session.draw_decision_hotmap()
        parse.draw()