Beispiel #1
0
    def eval(self, gold: Discourse, parse: Discourse):
        if self.binarize:
            gold = gold.binarize(left_heavy=self.left_heavy)
            parse = parse.binarize(left_heavy=self.left_heavy)

        self._eval_sent(gold, parse)
        self._eval_edu(gold, parse)
        self._eval_span(gold, parse)
        self._eval_nuclear(gold, parse)
        self._eval_relation(gold, parse)
        self._eval_nuclear_classes(gold, parse)
        self._eval_relation_classes(gold, parse)
Beispiel #2
0
 def cut(self, label, text, start=0, end=-1, info=None):
     if end < 0:
         end = len(text)
     sentences = []
     edus = []
     try:
         for sentence in self.cut_sent(text, start, end):
             sentences.append(sentence)
             edus.extend(self.cut_edu(sentence))
     except SentenceParseError as e:
         logger.error("during segmenting %s, %s" % (label, e))
         raise SegmentError("error segmenting %s" % label, label, text,
                            start, end, info)
     return Discourse(label, text, (start, end), edus, sentences)
Beispiel #3
0
def sr_oracle(discorse: Discourse):
    for node in discorse.binarize().traverse():
        if isinstance(node, EDU):
            yield SRTransition.SHIFT, None
        elif isinstance(node, RelationNode):
            yield SRTransition.REDUCE, node.nuclear
Beispiel #4
0
    def _dom2discourse(p, label, info):
        raw = p.find("RAW")  # type: ElementTree.Element
        _text = raw.get("Sentence")
        sentspans = CDTB._xml2pos(raw.get("SentencePosition"))
        _sentences = []
        _offset = 0
        for s, e in sentspans:
            _sentences.append(_text[s + _offset:e + _offset])
            _offset += 1
        text = ''.join(_sentences)
        sids = raw.get("SID").split("|") if raw.get("SID") else []
        sentences = []
        for i, span in enumerate(sentspans):
            sid = sids[i] if sids else None
            sentences.append(Sentence(span, text[slice(*span)], sid=sid))
        eduspans = CDTB._xml2pos(raw.get("EduPosition"))
        edus = [EDU(span, text[slice(*span)]) for span in eduspans]
        del _text, _sentences, sids, sentspans, eduspans

        discourse_span = CDTB._xml2pos(raw.get("AnnotatedPosition"))[0]
        discourse = Discourse(label, text, discourse_span, edus, sentences,
                              info)
        relations = {}
        for r in p.find("Relations"):  # type: ElementTree.Element
            rid = r.get("ID")
            prid = r.get("ParentId")
            parent = relations[prid] if prid in relations else None
            rspans = CDTB._xml2pos(r.get("SentencePosition"))
            rspan = rspans[0][0], rspans[-1][1]
            nuclear = nuclear_map[r.get("Center")] if r.get("Center") else None

            if not r.get("ConnectiveType"):
                relation_explicit = None
            elif r.get("ConnectiveType") == "显式关系":
                relation_explicit = True
            else:
                relation_explicit = False

            if relation_explicit:
                relation_connective = r.get("Connective").split("…")
                relation_connectivespan = CDTB._xml2pos(
                    r.get("ConnectivePosition"))
            else:
                relation_connective = None
                relation_connectivespan = None
            relation_finetype = r.get("RelationType") or None
            if "CoarseRelationType" in r.attrib and r.get(
                    "CoarseRelationType"):
                relation_coarsetype = r.get("CoarseRelationType")
            elif relation_finetype:
                relation_coarsetype = coarsemap[relation_finetype]
            else:
                relation_coarsetype = None
            relation_type = Relation(explicit=relation_explicit,
                                     connective=relation_connective,
                                     connective_span=relation_connectivespan,
                                     fine=relation_finetype,
                                     coarse=relation_coarsetype)
            relation = discourse.add_relation(rspan,
                                              nuclear,
                                              relation=relation_type,
                                              parent=parent)
            relations[rid] = relation
        return discourse
Beispiel #5
0
    def _discourse2dom(discourse: Discourse):
        p = et.Element("P")
        p.attrib["ID"] = str(discourse.label)
        raw = et.Element("RAW")
        sent_spans = [sentence.span for sentence in discourse.sentences]
        if sent_spans[0][0] != 0:
            sent_spans = [(0, sent_spans[0][0])] + sent_spans
        if sent_spans[-1][1] != len(discourse.text):
            sent_spans = sent_spans + [(sent_spans[-1][1], len(discourse.text))
                                       ]
        raw.attrib["AnnotatedPosition"] = CDTB._pos2xml([discourse.span])
        raw.attrib["SentencePosition"] = CDTB._pos2xml(sent_spans)
        raw.attrib["SID"] = ""
        edu_spans = [edu.span for edu in discourse.edus]
        raw.attrib["EduPosition"] = CDTB._pos2xml(edu_spans)
        raw.attrib["Sentence"] = '|'.join(
            [discourse.text[slice(*span)] for span in sent_spans])
        if discourse.complete():
            root = discourse.tree()
            root_id = discourse.index(root)
            raw.attrib["ROOT"] = str(root_id)
        else:
            raw.attrib["ROOT"] = ""
        relations = et.Element("Relations")
        relations.attrib["Nodes"] = str(len(discourse.relations))
        if discourse.complete():
            relations.attrib["Height"] = str(discourse.tree().height() - 2)
        else:
            relations.attrib["Height"] = ""
        for relation in discourse.relations:
            r = et.Element("R")
            r.attrib["ID"] = str(discourse.index(relation))
            if relation.nuclear:
                r.attrib["Center"] = nuclear_map_rev[relation.nuclear]
            else:
                r.attrib["Center"] = ""
            relations.append(r)
            child_ids = [
                discourse.index(child) for child in relation
                if isinstance(child, RelationNode)
            ]
            child_spans = [child.span for child in relation]
            r.attrib["ChildList"] = "|".join(map(str, child_ids))
            r.attrib["SentencePosition"] = CDTB._pos2xml(child_spans)
            if relation.parent():
                r.attrib["ParentId"] = str(discourse.index(relation.parent()))
            else:
                r.attrib["ParentId"] = "-1"
            r.attrib["Sentence"] = "|".join(
                [discourse.text[slice(*span)] for span in child_spans])

            # relation type
            relation_type = relation.relation
            if relation_type:
                explicit, fine, coarse, connective, connective_span = relation_type.explicit, \
                                                                      relation_type.fine, \
                                                                      relation_type.coarse, \
                                                                      relation_type.connective, \
                                                                      relation_type.connective_span
            else:
                explicit, fine, coarse, connective, connective_span = None, None, None, None, None
            if explicit is not None:
                r.attrib["ConnectiveType"] = "显式关系" if explicit else "隐式关系"
            else:
                r.attrib["ConnectiveType"] = ""
            r.attrib["RelationType"] = fine or ""
            if coarse:
                r.attrib["CoarseRelationType"] = coarse
            elif fine:
                r.attrib["CoarseRelationType"] = coarsemap[fine]
            else:
                r.attrib["CoarseRelationType"] = ""
            r.attrib["Connective"] = "…".join(
                connective) if connective and explicit else ""
            r.attrib["ConnectivePosition"] = CDTB._pos2xml(
                connective_span) if explicit and connective_span else ""

        p.append(raw)
        p.append(relations)
        return p