예제 #1
0
def eval_trees(silver_tree,
               gold_tree,
               rst_span_perf,
               rst_nuc_perf,
               rst_rel_perf,
               use_parseval=False):
    # RST-Parseval
    met = Metrics(levels=['span', 'nuclearity', 'relation'],
                  use_parseval=use_parseval)
    met.eval(gold_tree, silver_tree)
    assert met.span_perf.hit_num / met.span_num == rst_span_perf
    assert met.nuc_perf.hit_num / met.span_num == rst_nuc_perf
    assert met.rela_perf.hit_num / met.span_num == rst_rel_perf
예제 #2
0
def eval_trees(silver_tree, gold_tree, rst_span_perf, rst_nuc_perf, span_perf, nuc_perf):
    # RST-Parseval
    met = Metrics(use_parseval=False, levels=['span', 'nuclearity'])
    met.eval(silver_tree, gold_tree)
    assert met.span_perf.hit_num / met.span_num == rst_span_perf
    assert met.nuc_perf.hit_num / met.span_num == rst_nuc_perf
    
    # Parseval
    met = Metrics(use_parseval=True, levels=['span', 'nuclearity'])
    met.eval(silver_tree, gold_tree)
    assert met.span_perf.hit_num / met.span_num == span_perf
    assert met.nuc_perf.hit_num / met.span_num == nuc_perf
예제 #3
0
    def eval_parser(self,
                    dev_data=None,
                    path='./examples',
                    use_parseval=False):
        """ Test the parsing performance"""
        # Evaluation
        met = Metrics(use_parseval, levels=['span', 'nuclearity'])
        # ----------------------------------------
        # Read all files from the given path
        if dev_data is None:
            eval_list = [
                os.path.join(path, fname) for fname in os.listdir(path)
                if fname.endswith('.merge')
            ]
        else:
            eval_list = dev_data
        pred_forms = []
        gold_forms = []
        total_cost = 0
        for eval_instance in eval_list:
            # ----------------------------------------
            # Read *.merge file
            doc = Doc()
            if dev_data is not None:
                gold_rst = eval_instance
                doc = eval_instance.doc
            else:
                doc.read_from_fmerge(eval_instance)
                eval_instance = eval_instance.replace('.dis', '.merge')
                fdis = eval_instance.replace('.merge', '.dis')
                gold_rst = RstTree(fdis, eval_instance)
                gold_rst.build()

            _, gold_action_seq = gold_rst.generate_action_samples()
            gold_action_seq = list(
                map(lambda x: self.data_helper.action_map[x], gold_action_seq))
            pred_rst, cost = self.parser.sr_parse(
                [doc], [torch.cuda.LongTensor(gold_action_seq)],
                None,
                is_train=False)
            total_cost += cost
            pred_rst = pred_rst[0]
            # ----------------------------------------
            # Evaluate with gold RST tree
            met.eval(gold_rst, pred_rst)

        print("Total cost: ", total_cost)
        met.report()
예제 #4
0
    def eval_parser(self,
                    dev_data=None,
                    path='./examples',
                    save_preds=True,
                    use_parseval=False):
        """ Test the parsing performance"""
        # Evaluation
        met = Metrics(levels=['span', 'nuclearity', 'relation'],
                      use_parseval=use_parseval)
        # ----------------------------------------
        # Read all files from the given path
        if dev_data is None:
            dev_data = [
                os.path.join(path, fname) for fname in os.listdir(path)
                if fname.endswith('.dis')
            ]
        total_cost = 0
        for eval_instance in dev_data:
            # ----------------------------------------
            fmerge = eval_instance.replace('.dis', '.merge')
            doc = Doc()
            doc.read_from_fmerge(fmerge)
            gold_rst = RstTree(eval_instance, fmerge)
            gold_rst.build()

            # tok_edus = [nltk.word_tokenize(edu) for edu in doc.doc_edus]
            tok_edus = [edu.split(" ") for edu in doc.doc_edus]
            tokens = flatten(tok_edus)

            coref_document = Document(raw_text=None,
                                      tokens=tokens,
                                      sents=tok_edus,
                                      corefs=[],
                                      speakers=["0"] * len(tokens),
                                      genre="nw",
                                      filename=None)

            coref_document.token_dict = doc.token_dict
            coref_document.edu_dict = doc.edu_dict
            doc = coref_document

            gold_action_seq, gold_rel_seq = gold_rst.decode_rst_tree()

            gold_action_seq = [action_map[x] for x in gold_action_seq]
            gold_relation_seq = [
                relation_map[x.lower()] for x in gold_rel_seq if x is not None
            ]
            pred_rst, cost = self.parser.sr_parse(
                doc, torch.cuda.LongTensor(gold_action_seq),
                torch.cuda.LongTensor(gold_relation_seq))
            total_cost += cost

            if save_preds:
                if not os.path.isdir('../data/predicted_trees'):
                    os.mkdir('../data/predicted_trees')

                filename = eval_instance.split(os.sep)[-1]
                filepath = f'../data/predicted_trees/{self.config[MODEL_NAME]}_{filename}'

                pred_brackets = pred_rst.bracketing()
                # Write brackets into file
                Evaluator.writebrackets(filepath, pred_brackets)
            # ----------------------------------------
            # Evaluate with gold RST tree
            met.eval(gold_rst, pred_rst)

        print("Total cost: ", total_cost)
        if use_parseval:
            print("Reporting original Parseval metric.")
        else:
            print("Reporting RST Parseval metric.")
        met.report()
예제 #5
0
    def eval_parser(self,
                    path='./examples',
                    report=False,
                    bcvocab=None,
                    draw=True):
        """ Test the parsing performance"""
        # Evaluation
        met = Metrics(levels=['span', 'nuclearity', 'relation'])
        # ----------------------------------------
        # Read all files from the given path
        doclist = [
            os.path.join(path, fname) for fname in os.listdir(path)
            if fname.endswith('.merge')
        ]
        pred_forms = []
        gold_forms = []
        depth_per_relation = {}
        for fmerge in doclist:
            # ----------------------------------------
            # Read *.merge file
            doc = Doc()
            doc.read_from_fmerge(fmerge)
            # ----------------------------------------
            # Parsing
            pred_rst = self.parser.sr_parse(doc, bcvocab)
            if draw:
                pred_rst.draw_rst(fmerge.replace(".merge", ".ps"))
            # Get brackets from parsing results
            pred_brackets = pred_rst.bracketing()
            fbrackets = fmerge.replace('.merge', '.brackets')
            # Write brackets into file
            Evaluator.writebrackets(fbrackets, pred_brackets)
            # ----------------------------------------
            # Evaluate with gold RST tree
            if report:
                fdis = fmerge.replace('.merge', '.dis')
                gold_rst = RstTree(fdis, fmerge)
                gold_rst.build()
                met.eval(gold_rst, pred_rst)
                for node in pred_rst.postorder_DFT(pred_rst.tree, []):
                    pred_forms.append(node.form)
                for node in gold_rst.postorder_DFT(gold_rst.tree, []):
                    gold_forms.append(node.form)

                nodes = gold_rst.postorder_DFT(gold_rst.tree, [])
                inner_nodes = [
                    node for node in nodes
                    if node.lnode is not None and node.rnode is not None
                ]
                for idx, node in enumerate(inner_nodes):
                    relation = node.rnode.relation if node.form == 'NS' else node.lnode.relation
                    rela_class = RstTree.extract_relation(relation)
                    if rela_class in depth_per_relation:
                        depth_per_relation[rela_class].append(node.depth)
                    else:
                        depth_per_relation[rela_class] = [node.depth]
                    lnode_text = ' '.join([
                        gold_rst.doc.token_dict[tid].word
                        for tid in node.lnode.text
                    ])
                    lnode_lemmas = ' '.join([
                        gold_rst.doc.token_dict[tid].lemma
                        for tid in node.lnode.text
                    ])
                    rnode_text = ' '.join([
                        gold_rst.doc.token_dict[tid].word
                        for tid in node.rnode.text
                    ])
                    rnode_lemmas = ' '.join([
                        gold_rst.doc.token_dict[tid].lemma
                        for tid in node.rnode.text
                    ])
                    # if rela_class == 'Topic-Change':
                    #     print(fmerge)
                    #     print(relation)
                    #     print(lnode_text)
                    #     print(rnode_text)
                    #     print()

        if report:
            met.report()
예제 #6
0
    def eval_parser(self, data_dir, output_dir='./examples', report=False, bcvocab=None, draw=True, isFlat=False):
        """ Test the parsing performance"""
        # Evaluation
        met = Metrics(levels=['span', 'nuclearity', 'relation'])
        # ----------------------------------------
        # Read all files from the given path
        with open(os.path.join(output_dir, "Treebank/TEST", "processed_data.p"), 'rb') as file:
            doclist = pickle.load(file)
        fnames = [fn for fn in os.listdir(data_dir) if fn.endswith(".out")]

        pred_forms = []
        gold_forms = []
        depth_per_relation = {}
        for lines, fname in zip(doclist, fnames):
            # ----------------------------------------
            # Read *.merge file
            doc = Doc()
            doc.read_from_fmerge(lines)
            fout = os.path.join(data_dir, fname)
            print(fout)
            # ----------------------------------------
            # Parsing
            print("************************ predict rst ************************")
            pred_rst = self.parser.sr_parse(doc, self.isFlat,bcvocab)
            if draw:
                pred_rst.draw_rst(fout+'.ps')
            # Get brackets from parsing results
            pred_brackets = pred_rst.bracketing(self.isFlat)
            fbrackets = fout+'.brackets'
            # Write brackets into file
            Evaluator.writebrackets(fbrackets, pred_brackets)
            # ----------------------------------------
            # Evaluate with gold RST tree
            if report:
                print("************************ gold rst ************************")
                fdis = fout+'.dis'
                gold_rst = RstTree(fdis, lines, isFlat)
                gold_rst.build()
                met.eval(gold_rst, pred_rst, self.isFlat)
                if isFlat:
                    for node in pred_rst.postorder_flat_DFT(pred_rst.tree, []):
                        pred_forms.append(node.form)
                    for node in gold_rst.postorder_flat_DFT(gold_rst.tree, []):
                        gold_forms.append(node.form)
                    nodes = gold_rst.postorder_flat_DFT(gold_rst.tree, [])
                else:
                    for node in pred_rst.postorder_DFT(pred_rst.tree, []):
                        pred_forms.append(node.form)
                    for node in gold_rst.postorder_DFT(gold_rst.tree, []):
                        gold_forms.append(node.form)
                    nodes = gold_rst.postorder_DFT(gold_rst.tree, [])
                inner_nodes = [node for node in nodes if node.lnode is not None and node.rnode is not None]
                for idx, node in enumerate(inner_nodes):
                    relation = node.rnode.relation if node.form == 'NS' else node.lnode.relation
                    rela_class = RstTree.extract_relation(relation)
                    if rela_class in depth_per_relation:
                        depth_per_relation[rela_class].append(node.depth)
                    else:
                        depth_per_relation[rela_class] = [node.depth]


        if report:
            met.report()