def __init__(self, hashcode, unit, rels, schemas, text): Document.__init__(self, unit, rels, schemas, text) self.hashcode = hashcode
def set_origin(self, origin): Document.set_origin(self, origin) for x in self.schemas: x.origin = origin
def __init__(self, units, rels, schemas, txt): Document.__init__(self, units, rels, schemas, txt)
def _mk_doc(self): """ Create an educe.annotation.Document from this graph """ def start(name): return ord(name) - ord('a') def glozz_id(name): return 'du_' + str(start(name)) def is_edu(name): return name not in self.cdus anno_units = list() anno_cdus = list() anno_rels = list() for du_name, speaker_set in self.speakers.items(): # EDU loop if not is_edu(du_name): continue du_start, du_glozz_id = start(du_name), glozz_id(du_name) x_edu = Unit(du_glozz_id, Span(du_start, du_start + 1), 'Segment', dict()) speaker = list(speaker_set)[0] turn = Unit('t' + du_glozz_id, Span(du_start, du_start + 1), 'Turn', { 'Identifier': du_start, 'Emitter': speaker }) self.anno_map[du_name] = x_edu anno_units.append(x_edu) anno_units.append(turn) for du_name, sub_names in self.cdus.items(): x_cdu = Schema( glozz_id(du_name), set(glozz_id(x) for x in sub_names if is_edu(x)), set(), set(glozz_id(x) for x in sub_names if not is_edu(x)), 'Complex_discourse_unit', dict()) self.anno_map[du_name] = x_cdu anno_cdus.append(x_cdu) rel_count = 0 for src_name in self.down: for tgt_name, rel_tag in self.down[src_name]: rel_glozz_id = 'rel_' + str(rel_count) rel_count += 1 if rel_tag == 'S': rel_name = 'Q-Elab' elif rel_tag == 'C': rel_name = 'Contrast' else: raise ValueError('Unknown tag {0}'.format(rel_tag)) rel = Relation(rel_glozz_id, RelSpan(glozz_id(src_name), glozz_id(tgt_name)), rel_name, dict()) self.anno_map[(src_name, tgt_name)] = rel anno_rels.append(rel) dialogue = Unit( 'dialogue_0', Span(0, max(u.text_span().char_end for u in anno_units)), 'Dialogue', {}) anno_units.append(dialogue) doc = Document(anno_units, anno_rels, anno_cdus, string.ascii_lowercase) return doc