コード例 #1
0
def speakers(contexts, anno):
    """ Returns the speakers for given annotation unit

    Takes : contexts (Context dict), Annotation """
    if stac.is_edu(anno):
        edus = [anno]
    else:
        edus = [x for x in anno.terminals() if stac.is_edu(x)]
    return frozenset([contexts[x].speaker() for x in edus])
コード例 #2
0
ファイル: rfc.py プロジェクト: eipiplusun/educe
def speakers(contexts, anno):
    """ Returns the speakers for given annotation unit

    Takes : contexts (Context dict), Annotation """
    if stac.is_edu(anno):
        edus = [anno]
    else:
        edus = [x for x in anno.terminals() if stac.is_edu(x)]
    return frozenset([contexts[x].speaker() for x in edus])
コード例 #3
0
ファイル: test_sanity.py プロジェクト: irit-melodi/educe
    def test_innocent(self):
        "no squawking on in-dialogue relation"
        src = self.edu1_1
        tgt = self.edu1_2
        rel = FakeRelInst('r', src, tgt)

        doc = FakeDocument(self.edus1, [rel], [])
        contexts = Context.for_edus(doc)
        cp = doc.copies
        self.assertTrue(stac.is_edu(cp[src]))
        self.assertTrue(stac.is_edu(cp[rel].source))
        self.assertFalse(is_cross_dialogue(contexts)(cp[rel]))
コード例 #4
0
ファイル: test_sanity.py プロジェクト: moreymat/educe
    def test_innocent(self):
        "no squawking on in-dialogue relation"
        src = self.edu1_1
        tgt = self.edu1_2
        rel = FakeRelInst('r', src, tgt)

        doc = FakeDocument(self.edus1, [rel], [])
        contexts = Context.for_edus(doc)
        cp = doc.copies
        self.assertTrue(stac.is_edu(cp[src]))
        self.assertTrue(stac.is_edu(cp[rel].source))
        self.assertFalse(is_cross_dialogue(contexts)(cp[rel]))
コード例 #5
0
ファイル: checks.py プロジェクト: chloebt/educe
def rough_type(anno):
    if anno.type == 'Segment' or stac.is_edu(anno):
        return 'EDU'
    elif stac.is_relation_instance(anno):
        return 'relation'
    else:
        return anno.type
コード例 #6
0
ファイル: checks.py プロジェクト: chloebt/educe
 def in_dialogue(x):
     if stac.is_edu(x):
         return x in units
     elif stac.is_relation_instance(x):
         return x.source in units and x.target in units
     elif stac.is_cdu(x):
         return all(t in units for t in x.terminals())
     else:
         return False
コード例 #7
0
ファイル: graph.py プロジェクト: arne-cl/educe
 def _add_edu(self, node):
     anno  = self.core.annotation(node)
     label = self._edu_label(anno)
     attrs = { 'label' : textwrap.fill(label, 30)
             , 'shape' : 'plaintext'
             }
     if not self._edu_label(anno) or not stac.is_edu(anno):
         attrs['fontcolor'] = 'red'
     self.add_node(pydot.Node(node, **attrs))
コード例 #8
0
ファイル: glozz.py プロジェクト: moreymat/educe
def search_glozz_off_by_one(inputs, k):
    """
    EDUs which have non-whitespace (or boundary) characters
    either on their right or left
    """
    doc = inputs.corpus[k]
    contexts = inputs.contexts[k]
    txt = doc.text()
    return [OffByOneItem(doc, contexts, u) for u in doc.units
            if stac.is_edu(u) and is_maybe_off_by_one(txt, u)]
コード例 #9
0
ファイル: graph.py プロジェクト: eipiplusun/educe
 def in_dialogue(d_annos, anno):
     "if the given annotation is in the given dialogue"
     if stac.is_edu(anno):
         return anno in d_annos
     elif stac.is_relation_instance(anno):
         return anno.source in d_annos and anno.target in d_annos
     elif stac.is_cdu(anno):
         return all(t in d_annos for t in anno.terminals())
     else:
         return False
コード例 #10
0
 def in_dialogue(d_annos, anno):
     "if the given annotation is in the given dialogue"
     if stac.is_edu(anno):
         return anno in d_annos
     elif stac.is_relation_instance(anno):
         return anno.source in d_annos and anno.target in d_annos
     elif stac.is_cdu(anno):
         return all(t in d_annos for t in anno.terminals())
     else:
         return False
コード例 #11
0
ファイル: main.py プロジェクト: chloebt/educe
def search_glozz_off_by_one(inputs, k):
    """
    EDUs which have non-whitespace (or boundary) characters
    either on their right or left
    """
    doc = inputs.corpus[k]
    contexts = inputs.contexts[k]
    txt = doc.text()
    return [OffByOneItem(doc, contexts, u) for u in doc.units\
            if stac.is_edu(u) and is_maybe_off_by_one(txt, u)]
コード例 #12
0
ファイル: graph.py プロジェクト: eipiplusun/educe
 def node_speaker(anno):
     "return the designated speaker for an EDU or CDU"
     if stac.is_edu(anno):
         return edu_speaker(anno)
     elif stac.is_cdu(anno):
         speakers = frozenset(edu_speaker(x) for x in anno.terminals())
         if len(speakers) == 1:
             return list(speakers)[0]
         else:
             return None
     else:
         return None
コード例 #13
0
 def node_speaker(anno):
     "return the designated speaker for an EDU or CDU"
     if stac.is_edu(anno):
         return edu_speaker(anno)
     elif stac.is_cdu(anno):
         speakers = frozenset(edu_speaker(x) for x in anno.terminals())
         if len(speakers) == 1:
             return list(speakers)[0]
         else:
             return None
     else:
         return None
コード例 #14
0
ファイル: checks.py プロジェクト: chloebt/educe
 def node_speaker(n):
     if stac.is_edu(n):
         return edu_speaker(n)
     elif stac.is_cdu(n):
         terms    = n.terminals()
         speakers = list(frozenset(map(edu_speaker, n.terminals())))
         if len(speakers) == 1:
             return speakers[0]
         else:
             return None
     else:
         return None
コード例 #15
0
ファイル: main.py プロジェクト: chloebt/educe
def anno_code(t):
    """
    Short code providing a clue what the annotation is
    """
    if is_glozz_relation(t):
        return 'r'
    elif stac.is_edu(t):
        return 'e'
    elif is_glozz_unit(t):
        return 'u'
    elif is_glozz_schema(t):
        return 's'
    else:
        return '???'
コード例 #16
0
ファイル: common.py プロジェクト: eipiplusun/educe
def rough_type(anno):
    """
    Return either

        * "EDU"
        * "relation"
        * or the annotation type
    """
    if anno.type == 'Segment' or stac.is_edu(anno):
        return 'EDU'
    elif stac.is_relation_instance(anno):
        return 'relation'
    else:
        return anno.type
コード例 #17
0
ファイル: common.py プロジェクト: tjane/educe
def anno_code(anno):
    """
    Short code providing a clue what the annotation is
    """
    if is_glozz_relation(anno):
        return 'r'
    elif stac.is_edu(anno):
        return 'e'
    elif is_glozz_unit(anno):
        return 'u'
    elif is_glozz_schema(anno):
        return 's'
    else:
        return '???'
コード例 #18
0
ファイル: common.py プロジェクト: tjane/educe
def rough_type(anno):
    """
    Return either

        * "EDU"
        * "relation"
        * or the annotation type
    """
    if anno.type == 'Segment' or stac.is_edu(anno):
        return 'EDU'
    elif stac.is_relation_instance(anno):
        return 'relation'
    else:
        return anno.type
コード例 #19
0
ファイル: checks.py プロジェクト: chloebt/educe
 def dialogue(anno):
     if stac.is_edu(anno):
         if anno not in contexts:
             return None
         else:
             return contexts[anno].dialogue
     elif stac.is_cdu(anno):
         units = anno.terminals()
         dialogues = list(map(dialogue, units))
         if dialogues and all(d == dialogues[0] for d in dialogues[1:]):
             return dialogues[0]
         else:
             return None
     else:
         return None
コード例 #20
0
ファイル: annotation.py プロジェクト: eipiplusun/educe
 def dialogue(anno):
     "return the enclosing dialogue for an EDU/CDU"
     if stac.is_edu(anno):
         if anno not in contexts:
             return None
         else:
             return contexts[anno].dialogue
     elif stac.is_cdu(anno):
         dialogues = [dialogue(x) for x in anno.terminals()]
         if dialogues and all(d == dialogues[0] for d in dialogues[1:]):
             return dialogues[0]
         else:
             return None
     else:
         return None
コード例 #21
0
ファイル: annotation.py プロジェクト: Sablayrolles/debates
 def dialogue(anno):
     "return the enclosing dialogue for an EDU/CDU"
     if stac.is_edu(anno):
         if anno not in contexts:
             return None
         else:
             return contexts[anno].dialogue
     elif stac.is_cdu(anno):
         dialogues = [dialogue(x) for x in anno.terminals()]
         if dialogues and all(d == dialogues[0] for d in dialogues[1:]):
             return dialogues[0]
         else:
             return None
     else:
         return None
コード例 #22
0
ファイル: main.py プロジェクト: chloebt/educe
def cross_check_units(inputs, k1, k2, status):
    """
    Return tuples for certain corpus[k1] units
    not present in corpus[k2]
    """
    corpus = inputs.corpus
    if k1 not in corpus:
        raise MissingDocumentException(k1)
    if k2 not in corpus:
        raise MissingDocumentException(k2)
    doc1 = corpus[k1]
    doc2 = corpus[k2]
    contexts1 = inputs.contexts[k1]
    contexts2 = inputs.contexts[k2]
    missing = defaultdict(list)
    for unit in doc1.units:
        if stac.is_structure(unit) or stac.is_edu(unit):
            if not filter_matches(unit, doc2.units):
                rtype = rough_type(unit)
                approx = [x for x in doc2.units if x.span == unit.span]
                missing[rtype].append(MissingItem(status, doc1, contexts1,
                                                  unit,
                                                  doc2, contexts2, approx))
    return missing
コード例 #23
0
ファイル: glozz.py プロジェクト: moreymat/educe
def cross_check_units(inputs, key1, key2, status):
    """
    Return tuples for certain corpus[key1] units
    not present in corpus[key2]
    """
    corpus = inputs.corpus
    if key1 not in corpus:
        raise MissingDocumentException(key1)
    if key2 not in corpus:
        raise MissingDocumentException(key2)
    doc1 = corpus[key1]
    doc2 = corpus[key2]
    contexts1 = inputs.contexts[key1]
    contexts2 = inputs.contexts[key2]
    missing = defaultdict(list)
    for unit in doc1.units:
        if stac.is_structure(unit) or stac.is_edu(unit):
            if not filter_matches(unit, doc2.units):
                rtype = rough_type(unit)
                approx = [x for x in doc2.units if x.span == unit.span]
                missing[rtype].append(MissingItem(status, doc1, contexts1,
                                                  unit,
                                                  doc2, contexts2, approx))
    return missing
コード例 #24
0
def is_non_du(anno):
    """
    True if the annotation is neither an EDU nor a CDU
    """
    return (is_glozz_relation(anno)
            or (is_glozz_unit(anno) and not stac.is_edu(anno)))
コード例 #25
0
ファイル: tests.py プロジェクト: eipiplusun/educe
def test_fake_objs():
    assert stac.is_edu(edu1)
    assert stac.is_relation_instance(rel1)
    assert stac.is_cdu(cdu1)
コード例 #26
0
ファイル: graph.py プロジェクト: arne-cl/educe
 def is_edu(self, x):
     return super(Graph, self).is_edu(x) and\
             stac.is_edu(self.annotation(x))
コード例 #27
0
ファイル: checks.py プロジェクト: chloebt/educe
 def expect_dialogue(anno):
     return stac.is_edu(anno) or stac.is_cdu(anno)
コード例 #28
0
ファイル: annotation.py プロジェクト: Sablayrolles/debates
 def expect_dialogue(anno):
     "true if the annotation should live in a dialogue"
     return stac.is_edu(anno) or stac.is_cdu(anno)
コード例 #29
0
ファイル: tests.py プロジェクト: tjane/educe
def test_fake_objs():
    assert stac.is_edu(edu1)
    assert stac.is_relation_instance(rel1)
    assert stac.is_cdu(cdu1)
コード例 #30
0
def are_single_headed_cdus(inputs, k, gra):
    """Check that each CDU has exactly one head DU.

    Parameters
    ----------
    gra : Graph
        Graph for the discourse structure.

    Returns
    -------
    report_items : list of ReportItem
        List of report items, one per faulty CDU.
    """
    report_items = []
    doc = inputs.corpus[k]
    contexts = inputs.contexts[k]

    # compute the transitive closure of DUs embedded under each CDU
    # * map each CDU to its member EDUs and CDUs, as two lists
    # keys are edge ids eg. 'e_pilot01_07_jhunter_1487683021582',
    # values are node ids eg. 'n_pilot01_07_stac_1464335440'
    cdu2mems = defaultdict(lambda: ([], []))
    for cdu_id in gra.cdus():
        cdu = gra.annotation(cdu_id)
        cdu_members = set(gra.cdu_members(cdu_id))
        cdu2mems[cdu_id] = ([
            x for x in cdu_members if stac.is_edu(gra.annotation(x))
        ], [x for x in cdu_members if stac.is_cdu(gra.annotation(x))])
    # * replace each nested CDU in the second list with its member DUs
    # (to first list), and mark CDUs for exploration (to second list) ;
    # repeat until fixpoint, ie. transitive closure complete for each CDU
    while any(v[1] for k, v in cdu2mems.items()):
        for cdu_id, (mem_edus, mem_cdus) in cdu2mems.items():
            for mem_cdu in mem_cdus:
                # switch between the edge and node representations of CDUs:
                # gra.mirror()
                nested_edus, nested_cdus = cdu2mems[gra.mirror(mem_cdu)]
                # add the nested CDU and its EDU members
                cdu2mems[cdu_id][0].append(mem_cdu)
                cdu2mems[cdu_id][0].extend(nested_edus)
                # store CDU members of the nested CDU for exploration
                cdu2mems[cdu_id][1].extend(nested_cdus)
                # delete current nested CDU from list of CDUs to be explored
                cdu2mems[cdu_id][1].remove(mem_cdu)
    # switch to simple dict, forget list of CDUs for exploration
    cdu2mems = {k: v[0] for k, v in cdu2mems.items()}
    # end transitive closure

    for cdu_id in gra.cdus():
        cdu = gra.annotation(cdu_id)
        cdu_mems = set(gra.cdu_members(cdu_id))
        cdu_rec_mems = set(cdu2mems[cdu_id])
        internal_head = dict()
        for cdu_mem in cdu_mems:
            for rel in gra.links(cdu_mem):
                if gra.is_relation(rel):
                    src, tgt = gra.rel_links(rel)
                    # src can be any DU under the current CDU, eg. even
                    # a member of a nested CDU ; this is probably too
                    # loose but we'll see later if we need to refine
                    if src in cdu_rec_mems and tgt in cdu_mems:
                        internal_head[tgt] = src
        unheaded_mems = cdu_mems - set(internal_head.keys())
        if len(unheaded_mems) > 1:
            report_items.append(SchemaItem(doc, contexts, cdu, []))
    return report_items
コード例 #31
0
ファイル: annotation.py プロジェクト: eipiplusun/educe
 def expect_dialogue(anno):
     "true if the annotation should live in a dialogue"
     return stac.is_edu(anno) or stac.is_cdu(anno)
コード例 #32
0
ファイル: main.py プロジェクト: chloebt/educe
def is_non_du(anno):
    return is_glozz_relation(anno) or\
            (is_glozz_unit(anno) and not stac.is_edu(anno))
コード例 #33
0
ファイル: graph.py プロジェクト: irit-melodi/educe
def are_single_headed_cdus(inputs, k, gra):
    """Check that each CDU has exactly one head DU.

    Parameters
    ----------
    gra : Graph
        Graph for the discourse structure.

    Returns
    -------
    report_items : list of ReportItem
        List of report items, one per faulty CDU.
    """
    report_items = []
    doc = inputs.corpus[k]
    contexts = inputs.contexts[k]

    # compute the transitive closure of DUs embedded under each CDU
    # * map each CDU to its member EDUs and CDUs, as two lists
    # keys are edge ids eg. 'e_pilot01_07_jhunter_1487683021582',
    # values are node ids eg. 'n_pilot01_07_stac_1464335440'
    cdu2mems = defaultdict(lambda: ([], []))
    for cdu_id in gra.cdus():
        cdu = gra.annotation(cdu_id)
        cdu_members = set(gra.cdu_members(cdu_id))
        cdu2mems[cdu_id] = (
            [x for x in cdu_members if stac.is_edu(gra.annotation(x))],
            [x for x in cdu_members if stac.is_cdu(gra.annotation(x))]
        )
    # * replace each nested CDU in the second list with its member DUs
    # (to first list), and mark CDUs for exploration (to second list) ;
    # repeat until fixpoint, ie. transitive closure complete for each CDU
    while any(v[1] for k, v in cdu2mems.items()):
        for cdu_id, (mem_edus, mem_cdus) in cdu2mems.items():
            for mem_cdu in mem_cdus:
                # switch between the edge and node representations of CDUs:
                # gra.mirror()
                nested_edus, nested_cdus = cdu2mems[gra.mirror(mem_cdu)]
                # add the nested CDU and its EDU members
                cdu2mems[cdu_id][0].append(mem_cdu)
                cdu2mems[cdu_id][0].extend(nested_edus)
                # store CDU members of the nested CDU for exploration
                cdu2mems[cdu_id][1].extend(nested_cdus)
                # delete current nested CDU from list of CDUs to be explored
                cdu2mems[cdu_id][1].remove(mem_cdu)
    # switch to simple dict, forget list of CDUs for exploration
    cdu2mems = {k: v[0] for k, v in cdu2mems.items()}
    # end transitive closure

    for cdu_id in gra.cdus():
        cdu = gra.annotation(cdu_id)
        cdu_mems = set(gra.cdu_members(cdu_id))
        cdu_rec_mems = set(cdu2mems[cdu_id])
        internal_head = dict()
        for cdu_mem in cdu_mems:
            for rel in gra.links(cdu_mem):
                if gra.is_relation(rel):
                    src, tgt = gra.rel_links(rel)
                    # src can be any DU under the current CDU, eg. even
                    # a member of a nested CDU ; this is probably too
                    # loose but we'll see later if we need to refine
                    if src in cdu_rec_mems and tgt in cdu_mems:
                        internal_head[tgt] = src
        unheaded_mems = cdu_mems - set(internal_head.keys())
        if len(unheaded_mems) > 1:
            report_items.append(
                SchemaItem(doc, contexts, cdu, []))
    return report_items
コード例 #34
0
ファイル: type_err.py プロジェクト: tjane/educe
def is_non_du(anno):
    """
    True if the annotation is neither an EDU nor a CDU
    """
    return is_glozz_relation(anno) or (is_glozz_unit(anno) and not stac.is_edu(anno))