Ejemplo n.º 1
0
    def tgt_html(grandparent, anno, naughty=False):
        """
        Describe the given annotation in HTML and append that
        description to the given HTML grandparent node.
        """
        parent = h.span(grandparent)
        h.span(parent, anno_code(anno))
        type_span = h.span(parent, '[%s] ' % anno.type)
        if naughty:
            type_span.attrib['class'] = 'naughty'

        if anno in contexts:
            turn = contexts[anno].turn
            turn_info = stac.split_turn_text(doc.text(turn.span))[0]
            turn_splits = turn_info.split(":")
            if len(turn_splits) > 1:
                tid = ET.SubElement(parent, 'b')
                tid.text = turn_splits[0] + ":"
                h.span(parent, ":".join(turn_splits[1:]))
            else:
                h.span(parent, turn_info)

        if not stac.is_relation_instance(anno):
            t_text = text(anno)
            if stac.is_cdu(anno):
                trange = turn_range(anno)
                if trange:
                    h.elem(parent, 'b', trange)
            h.span(parent,
                   text=snippet(t_text, 100),
                   attrib={'class': 'snippet'})
            h.span(parent, ' %s' % anno.text_span())
        return parent
Ejemplo n.º 2
0
def rough_type(anno):
    if anno.type == 'Segment' or stac.is_edu(anno):
        return 'EDU'
    elif stac.is_relation_instance(anno):
        return 'relation'
    else:
        return anno.type
Ejemplo n.º 3
0
    def tgt_html(grandparent, anno, naughty=False):
        """
        Describe the given annotation in HTML and append that
        description to the given HTML grandparent node.
        """
        parent = h.span(grandparent)
        h.span(parent, anno_code(anno))
        type_span = h.span(parent, '[%s] ' % anno.type)
        if naughty:
            type_span.attrib['class'] = 'naughty'

        if anno in contexts:
            turn = contexts[anno].turn
            turn_info = stac.split_turn_text(doc.text(turn.span))[0]
            turn_splits = turn_info.split(":")
            if len(turn_splits) > 1:
                tid = ET.SubElement(parent, 'b')
                tid.text = turn_splits[0] + ":"
                h.span(parent, ":".join(turn_splits[1:]))
            else:
                h.span(parent, turn_info)

        if not stac.is_relation_instance(anno):
            t_text = text(anno)
            if stac.is_cdu(anno):
                trange = turn_range(anno)
                if trange:
                    h.elem(parent, 'b', trange)
            h.span(parent,
                   text=snippet(t_text, 100),
                   attrib={'class': 'snippet'})
            h.span(parent, ' %s' % anno.text_span())
        return parent
Ejemplo n.º 4
0
def is_non2sided_rel(gra, _, rel):
    """
    Relation instance which does not have exactly a source and
    target link in the graph

    How this can possibly happen is a mystery
    """
    anno = gra.annotation(rel)
    return (stac.is_relation_instance(anno) and len(gra.links(rel)) != 2)
Ejemplo n.º 5
0
 def in_dialogue(x):
     if stac.is_edu(x):
         return x in units
     elif stac.is_relation_instance(x):
         return x.source in units and x.target in units
     elif stac.is_cdu(x):
         return all(t in units for t in x.terminals())
     else:
         return False
Ejemplo n.º 6
0
def is_arrow_inversion(g,contexts,r):
    """
    Relation in a graph that traverse a CDU boundary
    """
    n1, n2 = g.links(r)
    is_rel = stac.is_relation_instance(g.annotation(r))
    span1 = g.annotation(n1).text_span()
    span2 = g.annotation(n2).text_span()
    return is_rel and span1 > span2
Ejemplo n.º 7
0
 def in_dialogue(d_annos, anno):
     "if the given annotation is in the given dialogue"
     if stac.is_edu(anno):
         return anno in d_annos
     elif stac.is_relation_instance(anno):
         return anno.source in d_annos and anno.target in d_annos
     elif stac.is_cdu(anno):
         return all(t in d_annos for t in anno.terminals())
     else:
         return False
Ejemplo n.º 8
0
def is_dupe_rel(gra, _, rel):
    """
    Relation instance for which there are relation instances
    between the same source/target DUs (regardless of direction)
    """
    src, tgt = gra.links(rel)
    return any(x != rel and (
        gra.rel_links(x) == (src, tgt) or gra.rel_links(x) == (tgt, src))
               for x in gra.links(src)
               if stac.is_relation_instance(gra.annotation(x)))
Ejemplo n.º 9
0
def is_arrow_inversion(gra, _, rel):
    """
    Relation in a graph that goes from textual right to left
    (may not be a problem)
    """
    node1, node2 = gra.links(rel)
    is_rel = stac.is_relation_instance(gra.annotation(rel))
    span1 = gra.annotation(node1).text_span()
    span2 = gra.annotation(node2).text_span()
    return is_rel and span1 > span2
Ejemplo n.º 10
0
def is_non2sided_rel(gra, _, rel):
    """
    Relation instance which does not have exactly a source and
    target link in the graph

    How this can possibly happen is a mystery
    """
    anno = gra.annotation(rel)
    return (stac.is_relation_instance(anno) and
            len(gra.links(rel)) != 2)
Ejemplo n.º 11
0
 def in_dialogue(d_annos, anno):
     "if the given annotation is in the given dialogue"
     if stac.is_edu(anno):
         return anno in d_annos
     elif stac.is_relation_instance(anno):
         return anno.source in d_annos and anno.target in d_annos
     elif stac.is_cdu(anno):
         return all(t in d_annos for t in anno.terminals())
     else:
         return False
Ejemplo n.º 12
0
def is_arrow_inversion(gra, _, rel):
    """
    Relation in a graph that goes from textual right to left
    (may not be a problem)
    """
    node1, node2 = gra.links(rel)
    is_rel = stac.is_relation_instance(gra.annotation(rel))
    span1 = gra.annotation(node1).text_span()
    span2 = gra.annotation(node2).text_span()
    return is_rel and span1 > span2
Ejemplo n.º 13
0
def is_dupe_rel(gra, _, rel):
    """
    Relation instance for which there are relation instances
    between the same source/target DUs (regardless of direction)
    """
    src, tgt = gra.links(rel)
    return any(x != rel and
               (gra.rel_links(x) == (src, tgt) or
                gra.rel_links(x) == (tgt, src))
               for x in gra.links(src)
               if stac.is_relation_instance(gra.annotation(x)))
Ejemplo n.º 14
0
def is_puncture(gra, _, rel):
    """
    Relation in a graph that traverse a CDU boundary
    """
    if not stac.is_relation_instance(gra.annotation(rel)):
        return False
    n_from, n_to = gra.links(rel)
    cdus_from = gra.containing_cdu_chain(n_from)
    cdus_to = gra.containing_cdu_chain(n_to)
    prefix = len(cdus_from) - len(cdus_to)
    return prefix < 0 or cdus_from[prefix:] != cdus_to
Ejemplo n.º 15
0
def is_puncture(gra, _, rel):
    """
    Relation in a graph that traverse a CDU boundary
    """
    if not stac.is_relation_instance(gra.annotation(rel)):
        return False
    n_from, n_to = gra.links(rel)
    cdus_from = gra.containing_cdu_chain(n_from)
    cdus_to = gra.containing_cdu_chain(n_to)
    prefix = len(cdus_from) - len(cdus_to)
    return prefix < 0 or cdus_from[prefix:] != cdus_to
Ejemplo n.º 16
0
def has_non_du_member(anno):
    """
    True if `anno` is a relation that points to another relation,
    or if it's a CDU that has relation members
    """
    if stac.is_relation_instance(anno):
        members = [anno.source, anno.target]
    elif stac.is_cdu(anno):
        members = anno.members
    else:
        return False

    return any(is_non_du(x) for x in members)
Ejemplo n.º 17
0
def has_non_du_member(anno):
    """
    True if `anno` is a relation that points to another relation,
    or if it's a CDU that has relation members
    """
    if stac.is_relation_instance(anno):
        members = [anno.source, anno.target]
    elif stac.is_cdu(anno):
        members = anno.members
    else:
        return False

    return any(is_non_du(x) for x in members)
Ejemplo n.º 18
0
def rough_type(anno):
    """
    Return either

        * "EDU"
        * "relation"
        * or the annotation type
    """
    if anno.type == 'Segment' or stac.is_edu(anno):
        return 'EDU'
    elif stac.is_relation_instance(anno):
        return 'relation'
    else:
        return anno.type
Ejemplo n.º 19
0
def rough_type(anno):
    """
    Return either

        * "EDU"
        * "relation"
        * or the annotation type
    """
    if anno.type == 'Segment' or stac.is_edu(anno):
        return 'EDU'
    elif stac.is_relation_instance(anno):
        return 'relation'
    else:
        return anno.type
Ejemplo n.º 20
0
    def tgt_txt(t):

        tag = anno_code(t)

        if light:
            tagged_type = ''
        else:
            tagged_type = '%s[%s]' % (tag, t.type)

        if stac.is_relation_instance(t):
            return tagged_type
        else:
            sp = t.text_span()
            txt = doc.text(sp)
            return '%s {%s} %s' % (tagged_type, snippet(txt, 20), sp)
Ejemplo n.º 21
0
    def is_bad(anno):
        "true if the annotation is crosses a dialogue boundary"
        if stac.is_relation_instance(anno):
            members = [anno.source, anno.target]
        elif stac.is_cdu(anno):
            members = list(anno.members)
        else:
            members = []

        # don't worry about members which are relations
        members = [x for x in members if expect_dialogue(x)]
        dialogues = frozenset(dialogue(x) for x in members)
        if members:
            return len(dialogues) > 1
        else:
            return False
Ejemplo n.º 22
0
    def is_bad(anno):
        if stac.is_relation_instance(anno):
            members = [ anno.source, anno.target ]
        elif stac.is_cdu(anno):
            members = list(anno.members)
        else:
            members = []

        # don't worry about members which are relations
        members = list(filter(expect_dialogue, members))

        dialogues = frozenset(map(dialogue, members))
        if members:
            return len(dialogues) > 1
        else:
            return False
Ejemplo n.º 23
0
    def is_bad(anno):
        "true if the annotation is crosses a dialogue boundary"
        if stac.is_relation_instance(anno):
            members = [anno.source, anno.target]
        elif stac.is_cdu(anno):
            members = list(anno.members)
        else:
            members = []

        # don't worry about members which are relations
        members = [x for x in members if expect_dialogue(x)]
        dialogues = frozenset(dialogue(x) for x in members)
        if members:
            return len(dialogues) > 1
        else:
            return False
Ejemplo n.º 24
0
    def tgt_txt(anno):
        """
        Return a short text summary of the given annotation
        """
        tag = anno_code(anno)

        if light:
            tagged_type = ''
        else:
            tagged_type = '%s[%s]' % (tag, anno.type)

        if stac.is_relation_instance(anno):
            return tagged_type
        else:
            span = anno.text_span()
            txt = doc.text(span)
            return '%s {%s} %s' % (tagged_type, snippet(txt, 20), span)
Ejemplo n.º 25
0
    def tgt_txt(anno):
        """
        Return a short text summary of the given annotation
        """
        tag = anno_code(anno)

        if light:
            tagged_type = ''
        else:
            tagged_type = '%s[%s]' % (tag, anno.type)

        if stac.is_relation_instance(anno):
            return tagged_type
        else:
            span = anno.text_span()
            txt = doc.text(span)
            return '%s {%s} %s' % (tagged_type, snippet(txt, 20), span)
Ejemplo n.º 26
0
    def tgt_html(grandparent, t, naughty=False):
        def tid(x):
            if x in contexts:
                tid_str = contexts[x].turn.features['Identifier']
                return int(tid_str) if tid_str else None
            else:
                return None

        parent = html_span(grandparent)
        html_span(parent, anno_code(t))
        type_span = html_span(parent, '[%s] ' % t.type)
        if naughty:
            type_span.attrib['class'] = 'naughty'

        if t in contexts:
            turn = contexts[t].turn
            turn_info = stac.split_turn_text(doc.text(turn.span))[0]
            turn_splits = turn_info.split(":")
            if len(turn_splits) > 1:
                tid = ET.SubElement(parent, 'b')
                tid.text = turn_splits[0] + ":"
                trest = html_span(parent, ":".join(turn_splits[1:]))
            else:
                html_span(parent, turn_info)

        if not stac.is_relation_instance(t):
            t_span = t.text_span()
            t_text = doc.text(t_span)
            if stac.is_cdu(t):
                tids = [x for x in map(tid, t.terminals()) if x]
                if tids:
                    tspan = ET.SubElement(parent, 'b')
                    min_tid = min(tids)
                    max_tid = max(tids)
                    if min_tid == max_tid:
                        tspan.text = "%d: " % min_tid
                    else:
                        tspan.text = "%d-%d: " % (min_tid, max_tid)
            text_sp = html_span(parent, snippet(t_text, 100))
            text_sp.attrib['class'] = 'snippet'
            html_span(parent, ' %s' % t_span)
        return parent
Ejemplo n.º 27
0
 def without_cdus(self, sloppy=False):
     """
     Return a deep copy of this graph with all CDUs removed.
     Links involving these CDUs will point instead from/to
     their deep heads
     """
     g2    = copy.deepcopy(self)
     heads = g2.recursive_cdu_heads(sloppy)
     anno_heads = dict((g2.annotation(k),g2.annotation(v))\
                       for k,v in heads.items())
     # replace all links to/from cdus with to/from their heads
     for e_edge in g2.relations():
         links  = g2.links(e_edge)
         attrs  = g2.edge_attributes(e_edge)
         if any(g2.is_cdu(l) for l in links):
             # recreate the edge
             g2.del_edge(e_edge)
             g2.add_edge(e_edge)
             g2.add_edge_attributes(e_edge, attrs)
             for l in links:
                 l2 = heads[g2.mirror(l)] if g2.is_cdu(l) else l
                 g2.link(l2, e_edge)
     # now that we've pointed everything away, nuke the CDUs
     for e_cdu in g2.cdus():
         g2.del_node(g2.mirror(e_cdu))
         g2.del_edge(e_cdu)
     # to be on the safe side, we should also do similar link-rewriting
     # but on the underlying educe.annotation objects layer
     # (symptom of a yucky design) :-(
     for r in g2.doc.relations:
         if stac.is_relation_instance(r):
             src  = r.source
             tgt  = r.target
             src2 = anno_heads.get(src, src)
             tgt2 = anno_heads.get(tgt, tgt)
             r.source = src2
             r.target = tgt2
             r.span   = annotation.RelSpan(src2.local_id(), tgt2.local_id())
     # remove the actual CDU objects too
     g2.doc.schemas = [ s for s in g2.doc.schemas if not stac.is_cdu(s) ]
     return g2
Ejemplo n.º 28
0
def test_fake_objs():
    assert stac.is_edu(edu1)
    assert stac.is_relation_instance(rel1)
    assert stac.is_cdu(cdu1)
Ejemplo n.º 29
0
 def is_relation(self, x):
     return super(Graph, self).is_relation(x) and\
             stac.is_relation_instance(self.annotation(x))
Ejemplo n.º 30
0
Archivo: tests.py Proyecto: tjane/educe
def test_fake_objs():
    assert stac.is_edu(edu1)
    assert stac.is_relation_instance(rel1)
    assert stac.is_cdu(cdu1)