Beispiel #1
0
    def tgt_html(grandparent, anno, naughty=False):
        """
        Describe the given annotation in HTML and append that
        description to the given HTML grandparent node.
        """
        parent = h.span(grandparent)
        h.span(parent, anno_code(anno))
        type_span = h.span(parent, '[%s] ' % anno.type)
        if naughty:
            type_span.attrib['class'] = 'naughty'

        if anno in contexts:
            turn = contexts[anno].turn
            turn_info = stac.split_turn_text(doc.text(turn.span))[0]
            turn_splits = turn_info.split(":")
            if len(turn_splits) > 1:
                tid = ET.SubElement(parent, 'b')
                tid.text = turn_splits[0] + ":"
                h.span(parent, ":".join(turn_splits[1:]))
            else:
                h.span(parent, turn_info)

        if not stac.is_relation_instance(anno):
            t_text = text(anno)
            if stac.is_cdu(anno):
                trange = turn_range(anno)
                if trange:
                    h.elem(parent, 'b', trange)
            h.span(parent,
                   text=snippet(t_text, 100),
                   attrib={'class': 'snippet'})
            h.span(parent, ' %s' % anno.text_span())
        return parent
Beispiel #2
0
    def tgt_html(grandparent, anno, naughty=False):
        """
        Describe the given annotation in HTML and append that
        description to the given HTML grandparent node.
        """
        parent = h.span(grandparent)
        h.span(parent, anno_code(anno))
        type_span = h.span(parent, '[%s] ' % anno.type)
        if naughty:
            type_span.attrib['class'] = 'naughty'

        if anno in contexts:
            turn = contexts[anno].turn
            turn_info = stac.split_turn_text(doc.text(turn.span))[0]
            turn_splits = turn_info.split(":")
            if len(turn_splits) > 1:
                tid = ET.SubElement(parent, 'b')
                tid.text = turn_splits[0] + ":"
                h.span(parent, ":".join(turn_splits[1:]))
            else:
                h.span(parent, turn_info)

        if not stac.is_relation_instance(anno):
            t_text = text(anno)
            if stac.is_cdu(anno):
                trange = turn_range(anno)
                if trange:
                    h.elem(parent, 'b', trange)
            h.span(parent,
                   text=snippet(t_text, 100),
                   attrib={'class': 'snippet'})
            h.span(parent, ' %s' % anno.text_span())
        return parent
Beispiel #3
0
 def in_dialogue(x):
     if stac.is_edu(x):
         return x in units
     elif stac.is_relation_instance(x):
         return x.source in units and x.target in units
     elif stac.is_cdu(x):
         return all(t in units for t in x.terminals())
     else:
         return False
Beispiel #4
0
 def in_dialogue(d_annos, anno):
     "if the given annotation is in the given dialogue"
     if stac.is_edu(anno):
         return anno in d_annos
     elif stac.is_relation_instance(anno):
         return anno.source in d_annos and anno.target in d_annos
     elif stac.is_cdu(anno):
         return all(t in d_annos for t in anno.terminals())
     else:
         return False
Beispiel #5
0
 def in_dialogue(d_annos, anno):
     "if the given annotation is in the given dialogue"
     if stac.is_edu(anno):
         return anno in d_annos
     elif stac.is_relation_instance(anno):
         return anno.source in d_annos and anno.target in d_annos
     elif stac.is_cdu(anno):
         return all(t in d_annos for t in anno.terminals())
     else:
         return False
Beispiel #6
0
def search_cdu_overlap(inputs, k, g):
    doc = inputs.corpus[k]
    contexts = inputs.contexts[k]
    containers = defaultdict(list)
    for cdu in g.cdus():
        cdu_anno = g.annotation(cdu)
        if not stac.is_cdu(cdu_anno): continue
        for m in g.cdu_members(cdu):
            edu_anno = g.annotation(m)
            containers[edu_anno].append(cdu_anno)
    return [CduOverlapItem(doc, contexts, k,v) for k,v in containers.items() if len(v) > 1]
Beispiel #7
0
 def node_speaker(anno):
     "return the designated speaker for an EDU or CDU"
     if stac.is_edu(anno):
         return edu_speaker(anno)
     elif stac.is_cdu(anno):
         speakers = frozenset(edu_speaker(x) for x in anno.terminals())
         if len(speakers) == 1:
             return list(speakers)[0]
         else:
             return None
     else:
         return None
Beispiel #8
0
 def node_speaker(anno):
     "return the designated speaker for an EDU or CDU"
     if stac.is_edu(anno):
         return edu_speaker(anno)
     elif stac.is_cdu(anno):
         speakers = frozenset(edu_speaker(x) for x in anno.terminals())
         if len(speakers) == 1:
             return list(speakers)[0]
         else:
             return None
     else:
         return None
Beispiel #9
0
 def node_speaker(n):
     if stac.is_edu(n):
         return edu_speaker(n)
     elif stac.is_cdu(n):
         terms    = n.terminals()
         speakers = list(frozenset(map(edu_speaker, n.terminals())))
         if len(speakers) == 1:
             return speakers[0]
         else:
             return None
     else:
         return None
Beispiel #10
0
def has_non_du_member(anno):
    """
    True if `anno` is a relation that points to another relation,
    or if it's a CDU that has relation members
    """
    if stac.is_relation_instance(anno):
        members = [anno.source, anno.target]
    elif stac.is_cdu(anno):
        members = anno.members
    else:
        return False

    return any(is_non_du(x) for x in members)
Beispiel #11
0
def has_non_du_member(anno):
    """
    True if `anno` is a relation that points to another relation,
    or if it's a CDU that has relation members
    """
    if stac.is_relation_instance(anno):
        members = [anno.source, anno.target]
    elif stac.is_cdu(anno):
        members = anno.members
    else:
        return False

    return any(is_non_du(x) for x in members)
Beispiel #12
0
 def dialogue(anno):
     "return the enclosing dialogue for an EDU/CDU"
     if stac.is_edu(anno):
         if anno not in contexts:
             return None
         else:
             return contexts[anno].dialogue
     elif stac.is_cdu(anno):
         dialogues = [dialogue(x) for x in anno.terminals()]
         if dialogues and all(d == dialogues[0] for d in dialogues[1:]):
             return dialogues[0]
         else:
             return None
     else:
         return None
Beispiel #13
0
 def dialogue(anno):
     if stac.is_edu(anno):
         if anno not in contexts:
             return None
         else:
             return contexts[anno].dialogue
     elif stac.is_cdu(anno):
         units = anno.terminals()
         dialogues = list(map(dialogue, units))
         if dialogues and all(d == dialogues[0] for d in dialogues[1:]):
             return dialogues[0]
         else:
             return None
     else:
         return None
Beispiel #14
0
 def dialogue(anno):
     "return the enclosing dialogue for an EDU/CDU"
     if stac.is_edu(anno):
         if anno not in contexts:
             return None
         else:
             return contexts[anno].dialogue
     elif stac.is_cdu(anno):
         dialogues = [dialogue(x) for x in anno.terminals()]
         if dialogues and all(d == dialogues[0] for d in dialogues[1:]):
             return dialogues[0]
         else:
             return None
     else:
         return None
Beispiel #15
0
    def is_bad(anno):
        if stac.is_relation_instance(anno):
            members = [ anno.source, anno.target ]
        elif stac.is_cdu(anno):
            members = list(anno.members)
        else:
            members = []

        # don't worry about members which are relations
        members = list(filter(expect_dialogue, members))

        dialogues = frozenset(map(dialogue, members))
        if members:
            return len(dialogues) > 1
        else:
            return False
Beispiel #16
0
    def is_bad(anno):
        "true if the annotation is crosses a dialogue boundary"
        if stac.is_relation_instance(anno):
            members = [anno.source, anno.target]
        elif stac.is_cdu(anno):
            members = list(anno.members)
        else:
            members = []

        # don't worry about members which are relations
        members = [x for x in members if expect_dialogue(x)]
        dialogues = frozenset(dialogue(x) for x in members)
        if members:
            return len(dialogues) > 1
        else:
            return False
Beispiel #17
0
    def is_bad(anno):
        "true if the annotation is crosses a dialogue boundary"
        if stac.is_relation_instance(anno):
            members = [anno.source, anno.target]
        elif stac.is_cdu(anno):
            members = list(anno.members)
        else:
            members = []

        # don't worry about members which are relations
        members = [x for x in members if expect_dialogue(x)]
        dialogues = frozenset(dialogue(x) for x in members)
        if members:
            return len(dialogues) > 1
        else:
            return False
Beispiel #18
0
def search_graph_cdu_overlap(inputs, k, gra):
    """
    Return a ReportItem for every EDU that appears in more
    than one CDU
    """
    doc = inputs.corpus[k]
    contexts = inputs.contexts[k]
    containers = defaultdict(list)
    for cdu in gra.cdus():
        cdu_anno = gra.annotation(cdu)
        if not stac.is_cdu(cdu_anno):
            continue
        for mem in gra.cdu_members(cdu):
            edu_anno = gra.annotation(mem)
            containers[edu_anno].append(cdu_anno)
    return [CduOverlapItem(doc, contexts, k, v)
            for k, v in containers.items() if len(v) > 1]
Beispiel #19
0
    def turn_range(anno):
        """
        given a CDU return a string representing the turns
        spanned by that CDU (or None if empty)
        """
        if not stac.is_cdu(anno):
            raise ValueError("not a CDU: " + anno)

        tids = [turn_id(y) for y in anno.terminals()]
        tids = [x for x in tids if x]
        if tids:
            min_tid = min(tids)
            max_tid = max(tids)
            if min_tid == max_tid:
                return "%d: " % min_tid
            else:
                return "%d-%d: " % (min_tid, max_tid)
        else:
            return None
Beispiel #20
0
    def turn_range(anno):
        """
        given a CDU return a string representing the turns
        spanned by that CDU (or None if empty)
        """
        if not stac.is_cdu(anno):
            raise ValueError("not a CDU: " + anno)

        tids = [turn_id(y) for y in anno.terminals()]
        tids = [x for x in tids if x]
        if tids:
            min_tid = min(tids)
            max_tid = max(tids)
            if min_tid == max_tid:
                return "%d: " % min_tid
            else:
                return "%d-%d: " % (min_tid, max_tid)
        else:
            return None
Beispiel #21
0
def search_graph_cdu_overlap(inputs, k, gra):
    """
    Return a ReportItem for every EDU that appears in more
    than one CDU
    """
    doc = inputs.corpus[k]
    contexts = inputs.contexts[k]
    containers = defaultdict(list)
    for cdu in gra.cdus():
        cdu_anno = gra.annotation(cdu)
        if not stac.is_cdu(cdu_anno):
            continue
        for mem in gra.cdu_members(cdu):
            edu_anno = gra.annotation(mem)
            containers[edu_anno].append(cdu_anno)
    return [
        CduOverlapItem(doc, contexts, ek, ev) for ek, ev in containers.items()
        if len(ev) > 1
    ]
Beispiel #22
0
    def tgt_html(grandparent, t, naughty=False):
        def tid(x):
            if x in contexts:
                tid_str = contexts[x].turn.features['Identifier']
                return int(tid_str) if tid_str else None
            else:
                return None

        parent = html_span(grandparent)
        html_span(parent, anno_code(t))
        type_span = html_span(parent, '[%s] ' % t.type)
        if naughty:
            type_span.attrib['class'] = 'naughty'

        if t in contexts:
            turn = contexts[t].turn
            turn_info = stac.split_turn_text(doc.text(turn.span))[0]
            turn_splits = turn_info.split(":")
            if len(turn_splits) > 1:
                tid = ET.SubElement(parent, 'b')
                tid.text = turn_splits[0] + ":"
                trest = html_span(parent, ":".join(turn_splits[1:]))
            else:
                html_span(parent, turn_info)

        if not stac.is_relation_instance(t):
            t_span = t.text_span()
            t_text = doc.text(t_span)
            if stac.is_cdu(t):
                tids = [x for x in map(tid, t.terminals()) if x]
                if tids:
                    tspan = ET.SubElement(parent, 'b')
                    min_tid = min(tids)
                    max_tid = max(tids)
                    if min_tid == max_tid:
                        tspan.text = "%d: " % min_tid
                    else:
                        tspan.text = "%d-%d: " % (min_tid, max_tid)
            text_sp = html_span(parent, snippet(t_text, 100))
            text_sp.attrib['class'] = 'snippet'
            html_span(parent, ' %s' % t_span)
        return parent
Beispiel #23
0
 def without_cdus(self, sloppy=False):
     """
     Return a deep copy of this graph with all CDUs removed.
     Links involving these CDUs will point instead from/to
     their deep heads
     """
     g2    = copy.deepcopy(self)
     heads = g2.recursive_cdu_heads(sloppy)
     anno_heads = dict((g2.annotation(k),g2.annotation(v))\
                       for k,v in heads.items())
     # replace all links to/from cdus with to/from their heads
     for e_edge in g2.relations():
         links  = g2.links(e_edge)
         attrs  = g2.edge_attributes(e_edge)
         if any(g2.is_cdu(l) for l in links):
             # recreate the edge
             g2.del_edge(e_edge)
             g2.add_edge(e_edge)
             g2.add_edge_attributes(e_edge, attrs)
             for l in links:
                 l2 = heads[g2.mirror(l)] if g2.is_cdu(l) else l
                 g2.link(l2, e_edge)
     # now that we've pointed everything away, nuke the CDUs
     for e_cdu in g2.cdus():
         g2.del_node(g2.mirror(e_cdu))
         g2.del_edge(e_cdu)
     # to be on the safe side, we should also do similar link-rewriting
     # but on the underlying educe.annotation objects layer
     # (symptom of a yucky design) :-(
     for r in g2.doc.relations:
         if stac.is_relation_instance(r):
             src  = r.source
             tgt  = r.target
             src2 = anno_heads.get(src, src)
             tgt2 = anno_heads.get(tgt, tgt)
             r.source = src2
             r.target = tgt2
             r.span   = annotation.RelSpan(src2.local_id(), tgt2.local_id())
     # remove the actual CDU objects too
     g2.doc.schemas = [ s for s in g2.doc.schemas if not stac.is_cdu(s) ]
     return g2
Beispiel #24
0
def test_fake_objs():
    assert stac.is_edu(edu1)
    assert stac.is_relation_instance(rel1)
    assert stac.is_cdu(cdu1)
Beispiel #25
0
 def is_cdu(self, x):
     return super(Graph, self).is_cdu(x) and\
             stac.is_cdu(self.annotation(x))
Beispiel #26
0
 def expect_dialogue(anno):
     return stac.is_edu(anno) or stac.is_cdu(anno)
Beispiel #27
0
 def expect_dialogue(anno):
     "true if the annotation should live in a dialogue"
     return stac.is_edu(anno) or stac.is_cdu(anno)
Beispiel #28
0
def test_fake_objs():
    assert stac.is_edu(edu1)
    assert stac.is_relation_instance(rel1)
    assert stac.is_cdu(cdu1)
Beispiel #29
0
def are_single_headed_cdus(inputs, k, gra):
    """Check that each CDU has exactly one head DU.

    Parameters
    ----------
    gra : Graph
        Graph for the discourse structure.

    Returns
    -------
    report_items : list of ReportItem
        List of report items, one per faulty CDU.
    """
    report_items = []
    doc = inputs.corpus[k]
    contexts = inputs.contexts[k]

    # compute the transitive closure of DUs embedded under each CDU
    # * map each CDU to its member EDUs and CDUs, as two lists
    # keys are edge ids eg. 'e_pilot01_07_jhunter_1487683021582',
    # values are node ids eg. 'n_pilot01_07_stac_1464335440'
    cdu2mems = defaultdict(lambda: ([], []))
    for cdu_id in gra.cdus():
        cdu = gra.annotation(cdu_id)
        cdu_members = set(gra.cdu_members(cdu_id))
        cdu2mems[cdu_id] = ([
            x for x in cdu_members if stac.is_edu(gra.annotation(x))
        ], [x for x in cdu_members if stac.is_cdu(gra.annotation(x))])
    # * replace each nested CDU in the second list with its member DUs
    # (to first list), and mark CDUs for exploration (to second list) ;
    # repeat until fixpoint, ie. transitive closure complete for each CDU
    while any(v[1] for k, v in cdu2mems.items()):
        for cdu_id, (mem_edus, mem_cdus) in cdu2mems.items():
            for mem_cdu in mem_cdus:
                # switch between the edge and node representations of CDUs:
                # gra.mirror()
                nested_edus, nested_cdus = cdu2mems[gra.mirror(mem_cdu)]
                # add the nested CDU and its EDU members
                cdu2mems[cdu_id][0].append(mem_cdu)
                cdu2mems[cdu_id][0].extend(nested_edus)
                # store CDU members of the nested CDU for exploration
                cdu2mems[cdu_id][1].extend(nested_cdus)
                # delete current nested CDU from list of CDUs to be explored
                cdu2mems[cdu_id][1].remove(mem_cdu)
    # switch to simple dict, forget list of CDUs for exploration
    cdu2mems = {k: v[0] for k, v in cdu2mems.items()}
    # end transitive closure

    for cdu_id in gra.cdus():
        cdu = gra.annotation(cdu_id)
        cdu_mems = set(gra.cdu_members(cdu_id))
        cdu_rec_mems = set(cdu2mems[cdu_id])
        internal_head = dict()
        for cdu_mem in cdu_mems:
            for rel in gra.links(cdu_mem):
                if gra.is_relation(rel):
                    src, tgt = gra.rel_links(rel)
                    # src can be any DU under the current CDU, eg. even
                    # a member of a nested CDU ; this is probably too
                    # loose but we'll see later if we need to refine
                    if src in cdu_rec_mems and tgt in cdu_mems:
                        internal_head[tgt] = src
        unheaded_mems = cdu_mems - set(internal_head.keys())
        if len(unheaded_mems) > 1:
            report_items.append(SchemaItem(doc, contexts, cdu, []))
    return report_items
Beispiel #30
0
 def expect_dialogue(anno):
     "true if the annotation should live in a dialogue"
     return stac.is_edu(anno) or stac.is_cdu(anno)
Beispiel #31
0
def are_single_headed_cdus(inputs, k, gra):
    """Check that each CDU has exactly one head DU.

    Parameters
    ----------
    gra : Graph
        Graph for the discourse structure.

    Returns
    -------
    report_items : list of ReportItem
        List of report items, one per faulty CDU.
    """
    report_items = []
    doc = inputs.corpus[k]
    contexts = inputs.contexts[k]

    # compute the transitive closure of DUs embedded under each CDU
    # * map each CDU to its member EDUs and CDUs, as two lists
    # keys are edge ids eg. 'e_pilot01_07_jhunter_1487683021582',
    # values are node ids eg. 'n_pilot01_07_stac_1464335440'
    cdu2mems = defaultdict(lambda: ([], []))
    for cdu_id in gra.cdus():
        cdu = gra.annotation(cdu_id)
        cdu_members = set(gra.cdu_members(cdu_id))
        cdu2mems[cdu_id] = (
            [x for x in cdu_members if stac.is_edu(gra.annotation(x))],
            [x for x in cdu_members if stac.is_cdu(gra.annotation(x))]
        )
    # * replace each nested CDU in the second list with its member DUs
    # (to first list), and mark CDUs for exploration (to second list) ;
    # repeat until fixpoint, ie. transitive closure complete for each CDU
    while any(v[1] for k, v in cdu2mems.items()):
        for cdu_id, (mem_edus, mem_cdus) in cdu2mems.items():
            for mem_cdu in mem_cdus:
                # switch between the edge and node representations of CDUs:
                # gra.mirror()
                nested_edus, nested_cdus = cdu2mems[gra.mirror(mem_cdu)]
                # add the nested CDU and its EDU members
                cdu2mems[cdu_id][0].append(mem_cdu)
                cdu2mems[cdu_id][0].extend(nested_edus)
                # store CDU members of the nested CDU for exploration
                cdu2mems[cdu_id][1].extend(nested_cdus)
                # delete current nested CDU from list of CDUs to be explored
                cdu2mems[cdu_id][1].remove(mem_cdu)
    # switch to simple dict, forget list of CDUs for exploration
    cdu2mems = {k: v[0] for k, v in cdu2mems.items()}
    # end transitive closure

    for cdu_id in gra.cdus():
        cdu = gra.annotation(cdu_id)
        cdu_mems = set(gra.cdu_members(cdu_id))
        cdu_rec_mems = set(cdu2mems[cdu_id])
        internal_head = dict()
        for cdu_mem in cdu_mems:
            for rel in gra.links(cdu_mem):
                if gra.is_relation(rel):
                    src, tgt = gra.rel_links(rel)
                    # src can be any DU under the current CDU, eg. even
                    # a member of a nested CDU ; this is probably too
                    # loose but we'll see later if we need to refine
                    if src in cdu_rec_mems and tgt in cdu_mems:
                        internal_head[tgt] = src
        unheaded_mems = cdu_mems - set(internal_head.keys())
        if len(unheaded_mems) > 1:
            report_items.append(
                SchemaItem(doc, contexts, cdu, []))
    return report_items