def get_relations_minimal_centers(ucca_passage: Passage): """ Return all the most internal centers of main relations in each passage """ scenes = [x for x in ucca_passage.layer("1").all if x.tag == "FN" and x.is_scene()] minimal_centers = [] for sc in scenes: min_relations = [e.child for e in sc.outgoing if e.tag == 'P' or e.tag == 'S'] for mr in min_relations: centers = [e.child for e in mr.outgoing if e.tag == 'C'] if centers: while centers: for c in centers: ccenters = [e.child for e in c.outgoing if e.tag == 'C'] lcenters = centers centers = ccenters minimal_centers.append(lcenters) else: minimal_centers.append(min_relations) y = ucca_passage.layer("0") output = [] for scp in minimal_centers: for par in scp: output2 = [] positions = [d.position for d in par.get_terminals(False, True)] for pos in positions: if not output2: output2.append(str(y.by_position(pos))) elif str(y.by_position(pos)) != output2[-1]: output2.append(str(y.by_position(pos))) output.append(output2) return output
def get_yields(self, passage: Passage): for unit in passage.layer(layer1.LAYER_ID).all: if unit.tag: # UCCA if unit.tag == layer1.NodeTags.Foundational: if unit.connector: # nominal coordination yield from map(self.get_terminals, unit.centers) else: # predicate coordination, expressed as linkers + parallel scenes ccs = {l.ID for l in unit.linkers if all(t.tok[Attr.POS.value] == CCONJ for t in l.get_terminals())} if ccs: terminals = [] for edge in unit: if edge.child.ID in ccs: if not terminals: break elif edge.tag in (layer1.EdgeTags.Linker, layer1.EdgeTags.ParallelScene): if edge.tag == layer1.EdgeTags.ParallelScene and terminals and \ terminals[-1].position + 1 < edge.child.start_position: yield terminals terminals = [] terminals += self.get_terminals(edge.child) continue if terminals: yield terminals terminals = [] if terminals: yield terminals else: # UD children = [e.child for e in unit if e.tag == "conj"] if children: # UD and there is a coordination yield from (self.get_terminals(c, excluded_tags=COORDINATION_UD_TAGS) for c in [unit] + children)
def get_yields(self, passage: Passage): for unit in passage.layer(layer1.LAYER_ID).all: if unit.tag: for edge in unit: if edge.tag in self.relations \ and not edge.attrib.get("remote") and not edge.child.attrib.get("implicit"): yield self.get_terminals(edge.child), edge.tag else: if unit.token and unit.token.pos == "VERB": for edge in unit: if edge.tag not in {EdgeTags.Punctuation}: yield [unit], edge.tag
def get_yields(self, passage: Passage): for unit in passage.layer(layer1.LAYER_ID).all: if unit.tag: # UCCA if unit.tag == layer1.NodeTags.Foundational: yield from map( self.get_terminals, filter( layer1.FoundationalNode.is_scene, unit.participants if self.relations == ["xcomp"] else unit.elaborators)) else: # UD children = [e.child for e in unit if e.tag in self.relations] if children: # UD and there is a subordinate clause yield from map(self.get_terminals, children)
def get_yields(self, passage: Passage): for unit in passage.layer(layer1.LAYER_ID).all: if unit.ftag in (EdgeTags.Process, EdgeTags.State): yield self.get_terminals(unit), unit.ftag elif passage.extra.get("format") == "conllu": # UD predicate ftag = tag = unit.incoming[0].tag if unit.incoming else None funit = unit while ftag in LINK_RELATIONS: funit = funit.incoming[0].parent ftag = funit.incoming[0].tag if funit.incoming else None if ftag in PREDICATE_RELATIONS or ftag is None: heads = [e.child for e in unit if e.tag == "head"] while heads: unit = heads[0] heads = [e.child for e in unit if e.tag == "head"] yield self.get_terminals(unit), tag
def get_yields(self, passage: Passage): for unit in passage.layer(layer1.LAYER_ID).all: if unit.tag: # UCCA if unit.tag == layer1.NodeTags.Foundational: if unit.connector and unit.fparent and unit.fparent.connector: # nominal coordination yield from map(self.get_terminals, unit.centers) else: # UD children = [ e.child for e in unit if e.tag == "conj" and any(e1.tag == "conj" for e1 in unit.incoming) ] if children: # UD and there is a coordination yield from (self.get_terminals( c, excluded_tags=COORDINATION_UD_TAGS) for c in [unit] + children)
def get_scenes(ucca_passage: Passage): """Return all the ucca scenes in the given text""" ucca_scenes = [x for x in ucca_passage.layer('1').all if x.tag == "FN" and x.is_scene()] text_scenes = [] for scene in ucca_scenes: words = [] previous_word = '' for terminal in scene.get_terminals(False, True): word = terminal.text if word == previous_word: # TODO: Iterating this way on the scene sometimes yields duplicates. continue words.append(word) previous_word = word text_scenes.append(words) return text_scenes
def get_minimal_centers_from_relations(ucca_passage: Passage): """ Return all the most internal centers of main relations in each passage """ scenes = get_scenes_ucca(ucca_passage) scenes_minimal_centers = [] for sc in scenes: scenes_minimal_centers += _get_minimal_centers_from_scene(sc) y = ucca_passage.layer("0") output = [] for scp in scenes_minimal_centers: for par in scp: output2 = [] positions = [d.position for d in par.get_terminals(False, True)] for pos in positions: if not output2: output2.append(str(y.by_position(pos))) elif str(y.by_position(pos)) != output2[-1]: output2.append(str(y.by_position(pos))) output.append(output2) return output
def get_scenes_ucca(ucca_passage: Passage): return [ x for x in ucca_passage.layer('1').all if x.tag == "FN" and x.is_scene() ]
def get_minimal_centers_from_participants(P: Passage): """ P is a ucca passage. Return all the minimal participant centers in each scene """ scenes = get_scenes_ucca(P) n = [] for sc in scenes: # find participant nodes minimal_centers = [] participants = [e.child for e in sc.outgoing if e.tag == 'A'] for pa in participants: centers = [e.child for e in pa.outgoing if e.tag == 'C'] if centers: while centers: for c in centers: ccenters = [e.child for e in c.outgoing if e.tag in ['C', 'P', 'S']] #also addresses center Scenes lcenters = centers centers = ccenters minimal_centers.append(lcenters) elif pa.is_scene(): # address the case of Participant Scenes scene_centers = [e.child for e in pa.outgoing if e.tag in ['P', 'S']] for scc in scene_centers: centers = [e.child for e in scc.outgoing if e.tag == 'C'] if centers: while centers: for c in centers: ccenters = [e.child for e in c.outgoing if e.tag == 'C'] lcenters = centers centers = ccenters minimal_centers.append(lcenters) else: minimal_centers.append(scene_centers) elif any(e.tag == "H" for e in pa.outgoing): # address the case of multiple parallel Scenes inside a participant hscenes = [e.child for e in pa.outgoing if e.tag == 'H'] mh = [] for h in hscenes: hrelations = [e.child for e in h.outgoing if e.tag in ['P', 'S']] # in case of multiple # parallel scenes we generate new multiple centers for hr in hrelations: centers = [e.child for e in hr.outgoing if e.tag == 'C'] if centers: while centers: for c in centers: ccenters = [e.child for e in c.outgoing if e.tag == 'C'] lcenters = centers centers = ccenters mh.append(lcenters[0]) else: mh.append(hrelations[0]) minimal_centers.append(mh) else: minimal_centers.append([pa]) n.append(minimal_centers) y = P.layer("0") # find cases of multiple centers output = [] s = [] I = [] for scp in n: r = [] u = n.index(scp) for par in scp: if len(par) > 1: d = scp.index(par) par = [par[i:i+1] for i in range(len(par))] for c in par: r.append(c) I.append([u, d]) else: r.append(par) s.append(r) for scp in s: # find the spans of the participant nodes output1 = [] for par in scp: # TODO: sometimes "par" does not contain anything, which caused the original implementation (without the if) to crash when unpacking if len(par) != 1: # output2 = [] continue else: [par] = par output2 = flatten_unit(par) output1.append(output2) output.append(output1) y = [] # unify spans in case of multiple centers for scp in output: x = [] u = output.index(scp) for par in scp: for v in I: if par == output[v[0]][v[1]]: for l in range(1, len(n[v[0]][v[1]])): par.append((output[v[0]][v[1]+l])[0]) x.append(par) elif all(par != output[v[0]][v[1]+l] for l in range(1, len(n[v[0]][v[1]]))): x.append(par) if not I: x.append(par) y.append(x) return y
def get_participants_minimal_centers(P: Passage): """ P is a ucca passage. Return all the minimal participant centers in each scene """ scenes = [x for x in P.layer("1").all if x.tag == "FN" and x.is_scene()] n = [] for sc in scenes: # find participant nodes m = [] participants = [e.child for e in sc.outgoing if e.tag == 'A'] for pa in participants: centers = [e.child for e in pa.outgoing if e.tag == 'C'] if centers: while centers: for c in centers: ccenters = [e.child for e in c.outgoing if e.tag == 'C' or e.tag == 'P' or e.tag == 'S'] #also addresses center Scenes lcenters = centers centers = ccenters m.append(lcenters) elif pa.is_scene(): # address the case of Participant Scenes scenters = [e.child for e in pa.outgoing if e.tag == 'P' or e.tag == 'S'] for scc in scenters: centers = [e.child for e in scc.outgoing if e.tag == 'C'] if centers: while centers: for c in centers: ccenters = [e.child for e in c.outgoing if e.tag == 'C'] lcenters = centers centers = ccenters m.append(lcenters) else: m.append(scenters) elif any(e.tag == "H" for e in pa.outgoing): # address the case of multiple parallel Scenes inside a participant hscenes = [e.child for e in pa.outgoing if e.tag == 'H'] mh = [] for h in hscenes: hrelations = [e.child for e in h.outgoing if e.tag == 'P' or e.tag == 'S'] # in case of multiple parallel scenes we generate new multiple centers for hr in hrelations: centers = [e.child for e in hr.outgoing if e.tag == 'C'] if centers: while centers: for c in centers: ccenters = [e.child for e in c.outgoing if e.tag == 'C'] lcenters = centers centers = ccenters mh.append(lcenters[0]) else: mh.append(hrelations[0]) m.append(mh) else: m.append([pa]) n.append(m) y = P.layer("0") # find cases of multiple centers output = [] s = [] I = [] for scp in n: r = [] u = n.index(scp) for par in scp: if len(par) > 1: d = scp.index(par) par = [par[i:i+1] for i in range(len(par))] for c in par: r.append(c) I.append([u,d]) else: r.append(par) s.append(r) for scp in s: # find the spans of the participant nodes output1 = [] for [par] in scp: output2 = [] p = [] d = par.get_terminals(False,True) for i in range(0, len(d)): p.append(d[i].position) for k in p: if len(output2) == 0: output2.append(str(y.by_position(k))) elif str(y.by_position(k)) != output2[-1]: output2.append(str(y.by_position(k))) output1.append(output2) output.append(output1) y = [] # unify spans in case of multiple centers for scp in output: x = [] u = output.index(scp) for par in scp: for v in I: if par == output[v[0]][v[1]]: for l in range(1,len(n[v[0]][v[1]])): par.append((output[v[0]][v[1]+l])[0]) x.append(par) elif all(par != output[v[0]][v[1]+l] for l in range(1, len(n[v[0]][v[1]]))): x.append(par) if not I: x.append(par) y.append(x) return y
def get_num_scenes(ucca_passage: Passage): """ Returns the number of scenes in the ucca_passage. """ scenes = [x for x in ucca_passage.layer("1").all if x.tag == "FN" and x.is_scene()] return len(scenes)
def get_units(self, passage: Passage): for unit in passage.layer(layer1.LAYER_ID).all: for edge in unit: if (self.relations is None or edge.tag in self.relations) and edge.tag != layer1.EdgeTags.Terminal \ and not edge.attrib.get("remote") and not edge.child.attrib.get("implicit"): yield edge.child, edge.tag