def speakers(contexts, anno): """ Returns the speakers for given annotation unit Takes : contexts (Context dict), Annotation """ if stac.is_edu(anno): edus = [anno] else: edus = [x for x in anno.terminals() if stac.is_edu(x)] return frozenset([contexts[x].speaker() for x in edus])
def test_innocent(self): "no squawking on in-dialogue relation" src = self.edu1_1 tgt = self.edu1_2 rel = FakeRelInst('r', src, tgt) doc = FakeDocument(self.edus1, [rel], []) contexts = Context.for_edus(doc) cp = doc.copies self.assertTrue(stac.is_edu(cp[src])) self.assertTrue(stac.is_edu(cp[rel].source)) self.assertFalse(is_cross_dialogue(contexts)(cp[rel]))
def rough_type(anno): if anno.type == 'Segment' or stac.is_edu(anno): return 'EDU' elif stac.is_relation_instance(anno): return 'relation' else: return anno.type
def in_dialogue(x): if stac.is_edu(x): return x in units elif stac.is_relation_instance(x): return x.source in units and in units elif stac.is_cdu(x): return all(t in units for t in x.terminals()) else: return False
def _add_edu(self, node): anno = self.core.annotation(node) label = self._edu_label(anno) attrs = { 'label' : textwrap.fill(label, 30) , 'shape' : 'plaintext' } if not self._edu_label(anno) or not stac.is_edu(anno): attrs['fontcolor'] = 'red' self.add_node(pydot.Node(node, **attrs))
def search_glozz_off_by_one(inputs, k): """ EDUs which have non-whitespace (or boundary) characters either on their right or left """ doc = inputs.corpus[k] contexts = inputs.contexts[k] txt = doc.text() return [OffByOneItem(doc, contexts, u) for u in doc.units if stac.is_edu(u) and is_maybe_off_by_one(txt, u)]
def in_dialogue(d_annos, anno): "if the given annotation is in the given dialogue" if stac.is_edu(anno): return anno in d_annos elif stac.is_relation_instance(anno): return anno.source in d_annos and in d_annos elif stac.is_cdu(anno): return all(t in d_annos for t in anno.terminals()) else: return False
def search_glozz_off_by_one(inputs, k): """ EDUs which have non-whitespace (or boundary) characters either on their right or left """ doc = inputs.corpus[k] contexts = inputs.contexts[k] txt = doc.text() return [OffByOneItem(doc, contexts, u) for u in doc.units\ if stac.is_edu(u) and is_maybe_off_by_one(txt, u)]
def node_speaker(anno): "return the designated speaker for an EDU or CDU" if stac.is_edu(anno): return edu_speaker(anno) elif stac.is_cdu(anno): speakers = frozenset(edu_speaker(x) for x in anno.terminals()) if len(speakers) == 1: return list(speakers)[0] else: return None else: return None
def node_speaker(n): if stac.is_edu(n): return edu_speaker(n) elif stac.is_cdu(n): terms = n.terminals() speakers = list(frozenset(map(edu_speaker, n.terminals()))) if len(speakers) == 1: return speakers[0] else: return None else: return None
def anno_code(t): """ Short code providing a clue what the annotation is """ if is_glozz_relation(t): return 'r' elif stac.is_edu(t): return 'e' elif is_glozz_unit(t): return 'u' elif is_glozz_schema(t): return 's' else: return '???'
def rough_type(anno): """ Return either * "EDU" * "relation" * or the annotation type """ if anno.type == 'Segment' or stac.is_edu(anno): return 'EDU' elif stac.is_relation_instance(anno): return 'relation' else: return anno.type
def anno_code(anno): """ Short code providing a clue what the annotation is """ if is_glozz_relation(anno): return 'r' elif stac.is_edu(anno): return 'e' elif is_glozz_unit(anno): return 'u' elif is_glozz_schema(anno): return 's' else: return '???'
def dialogue(anno): if stac.is_edu(anno): if anno not in contexts: return None else: return contexts[anno].dialogue elif stac.is_cdu(anno): units = anno.terminals() dialogues = list(map(dialogue, units)) if dialogues and all(d == dialogues[0] for d in dialogues[1:]): return dialogues[0] else: return None else: return None
def dialogue(anno): "return the enclosing dialogue for an EDU/CDU" if stac.is_edu(anno): if anno not in contexts: return None else: return contexts[anno].dialogue elif stac.is_cdu(anno): dialogues = [dialogue(x) for x in anno.terminals()] if dialogues and all(d == dialogues[0] for d in dialogues[1:]): return dialogues[0] else: return None else: return None
def cross_check_units(inputs, k1, k2, status): """ Return tuples for certain corpus[k1] units not present in corpus[k2] """ corpus = inputs.corpus if k1 not in corpus: raise MissingDocumentException(k1) if k2 not in corpus: raise MissingDocumentException(k2) doc1 = corpus[k1] doc2 = corpus[k2] contexts1 = inputs.contexts[k1] contexts2 = inputs.contexts[k2] missing = defaultdict(list) for unit in doc1.units: if stac.is_structure(unit) or stac.is_edu(unit): if not filter_matches(unit, doc2.units): rtype = rough_type(unit) approx = [x for x in doc2.units if x.span == unit.span] missing[rtype].append(MissingItem(status, doc1, contexts1, unit, doc2, contexts2, approx)) return missing
def cross_check_units(inputs, key1, key2, status): """ Return tuples for certain corpus[key1] units not present in corpus[key2] """ corpus = inputs.corpus if key1 not in corpus: raise MissingDocumentException(key1) if key2 not in corpus: raise MissingDocumentException(key2) doc1 = corpus[key1] doc2 = corpus[key2] contexts1 = inputs.contexts[key1] contexts2 = inputs.contexts[key2] missing = defaultdict(list) for unit in doc1.units: if stac.is_structure(unit) or stac.is_edu(unit): if not filter_matches(unit, doc2.units): rtype = rough_type(unit) approx = [x for x in doc2.units if x.span == unit.span] missing[rtype].append(MissingItem(status, doc1, contexts1, unit, doc2, contexts2, approx)) return missing
def is_non_du(anno): """ True if the annotation is neither an EDU nor a CDU """ return (is_glozz_relation(anno) or (is_glozz_unit(anno) and not stac.is_edu(anno)))
def test_fake_objs(): assert stac.is_edu(edu1) assert stac.is_relation_instance(rel1) assert stac.is_cdu(cdu1)
def is_edu(self, x): return super(Graph, self).is_edu(x) and\ stac.is_edu(self.annotation(x))
def expect_dialogue(anno): return stac.is_edu(anno) or stac.is_cdu(anno)
def expect_dialogue(anno): "true if the annotation should live in a dialogue" return stac.is_edu(anno) or stac.is_cdu(anno)
def are_single_headed_cdus(inputs, k, gra): """Check that each CDU has exactly one head DU. Parameters ---------- gra : Graph Graph for the discourse structure. Returns ------- report_items : list of ReportItem List of report items, one per faulty CDU. """ report_items = [] doc = inputs.corpus[k] contexts = inputs.contexts[k] # compute the transitive closure of DUs embedded under each CDU # * map each CDU to its member EDUs and CDUs, as two lists # keys are edge ids eg. 'e_pilot01_07_jhunter_1487683021582', # values are node ids eg. 'n_pilot01_07_stac_1464335440' cdu2mems = defaultdict(lambda: ([], [])) for cdu_id in gra.cdus(): cdu = gra.annotation(cdu_id) cdu_members = set(gra.cdu_members(cdu_id)) cdu2mems[cdu_id] = ([ x for x in cdu_members if stac.is_edu(gra.annotation(x)) ], [x for x in cdu_members if stac.is_cdu(gra.annotation(x))]) # * replace each nested CDU in the second list with its member DUs # (to first list), and mark CDUs for exploration (to second list) ; # repeat until fixpoint, ie. transitive closure complete for each CDU while any(v[1] for k, v in cdu2mems.items()): for cdu_id, (mem_edus, mem_cdus) in cdu2mems.items(): for mem_cdu in mem_cdus: # switch between the edge and node representations of CDUs: # gra.mirror() nested_edus, nested_cdus = cdu2mems[gra.mirror(mem_cdu)] # add the nested CDU and its EDU members cdu2mems[cdu_id][0].append(mem_cdu) cdu2mems[cdu_id][0].extend(nested_edus) # store CDU members of the nested CDU for exploration cdu2mems[cdu_id][1].extend(nested_cdus) # delete current nested CDU from list of CDUs to be explored cdu2mems[cdu_id][1].remove(mem_cdu) # switch to simple dict, forget list of CDUs for exploration cdu2mems = {k: v[0] for k, v in cdu2mems.items()} # end transitive closure for cdu_id in gra.cdus(): cdu = gra.annotation(cdu_id) cdu_mems = set(gra.cdu_members(cdu_id)) cdu_rec_mems = set(cdu2mems[cdu_id]) internal_head = dict() for cdu_mem in cdu_mems: for rel in gra.links(cdu_mem): if gra.is_relation(rel): src, tgt = gra.rel_links(rel) # src can be any DU under the current CDU, eg. even # a member of a nested CDU ; this is probably too # loose but we'll see later if we need to refine if src in cdu_rec_mems and tgt in cdu_mems: internal_head[tgt] = src unheaded_mems = cdu_mems - set(internal_head.keys()) if len(unheaded_mems) > 1: report_items.append(SchemaItem(doc, contexts, cdu, [])) return report_items
def is_non_du(anno): return is_glozz_relation(anno) or\ (is_glozz_unit(anno) and not stac.is_edu(anno))
def are_single_headed_cdus(inputs, k, gra): """Check that each CDU has exactly one head DU. Parameters ---------- gra : Graph Graph for the discourse structure. Returns ------- report_items : list of ReportItem List of report items, one per faulty CDU. """ report_items = [] doc = inputs.corpus[k] contexts = inputs.contexts[k] # compute the transitive closure of DUs embedded under each CDU # * map each CDU to its member EDUs and CDUs, as two lists # keys are edge ids eg. 'e_pilot01_07_jhunter_1487683021582', # values are node ids eg. 'n_pilot01_07_stac_1464335440' cdu2mems = defaultdict(lambda: ([], [])) for cdu_id in gra.cdus(): cdu = gra.annotation(cdu_id) cdu_members = set(gra.cdu_members(cdu_id)) cdu2mems[cdu_id] = ( [x for x in cdu_members if stac.is_edu(gra.annotation(x))], [x for x in cdu_members if stac.is_cdu(gra.annotation(x))] ) # * replace each nested CDU in the second list with its member DUs # (to first list), and mark CDUs for exploration (to second list) ; # repeat until fixpoint, ie. transitive closure complete for each CDU while any(v[1] for k, v in cdu2mems.items()): for cdu_id, (mem_edus, mem_cdus) in cdu2mems.items(): for mem_cdu in mem_cdus: # switch between the edge and node representations of CDUs: # gra.mirror() nested_edus, nested_cdus = cdu2mems[gra.mirror(mem_cdu)] # add the nested CDU and its EDU members cdu2mems[cdu_id][0].append(mem_cdu) cdu2mems[cdu_id][0].extend(nested_edus) # store CDU members of the nested CDU for exploration cdu2mems[cdu_id][1].extend(nested_cdus) # delete current nested CDU from list of CDUs to be explored cdu2mems[cdu_id][1].remove(mem_cdu) # switch to simple dict, forget list of CDUs for exploration cdu2mems = {k: v[0] for k, v in cdu2mems.items()} # end transitive closure for cdu_id in gra.cdus(): cdu = gra.annotation(cdu_id) cdu_mems = set(gra.cdu_members(cdu_id)) cdu_rec_mems = set(cdu2mems[cdu_id]) internal_head = dict() for cdu_mem in cdu_mems: for rel in gra.links(cdu_mem): if gra.is_relation(rel): src, tgt = gra.rel_links(rel) # src can be any DU under the current CDU, eg. even # a member of a nested CDU ; this is probably too # loose but we'll see later if we need to refine if src in cdu_rec_mems and tgt in cdu_mems: internal_head[tgt] = src unheaded_mems = cdu_mems - set(internal_head.keys()) if len(unheaded_mems) > 1: report_items.append( SchemaItem(doc, contexts, cdu, [])) return report_items
def is_non_du(anno): """ True if the annotation is neither an EDU nor a CDU """ return is_glozz_relation(anno) or (is_glozz_unit(anno) and not stac.is_edu(anno))