def applySL(self, d): """ Search for any node with one occurence as p1 and one as p2 only. Combine these two by removing that node and merging the edges. """ triples = d.get_triples() ANodes = defaultdict(int) BNodes = defaultdict(int) for (a, b, c) in triples: ANodes[a] += 1 BNodes[c] += 1 for a in ANodes.keys(): if ANodes[a] == 1 and BNodes[a] == 1: # we have an edge that we can shorten: remove (x,X,a) and (a,X,z) for (x,Y,z) nrf = [None, Edge('*', d.count), None] # new rule from nrt = [0, 0] # new rule to new_amr = list(triples) for i in xrange(len(triples)): at = triples[i] if at[0] == a and at[2] != a: nrf[2] = at[2] nrt[1] = at elif at[2] == a and at[0] != a: nrf[0] = at[0] nrf[1][0] = at[1][0] nrt[0] = at index = i if nrt[0][1].isNonterminal() and nrt[1][1].isNonterminal(): new_amr[index] = tuple(nrf) new_amr.remove(nrt[1]) new_rule = RuleInstance(tuple(nrf), nrt, 'SL') return CanonicalDerivation.derive(d, new_amr, new_rule) return False
def applyCircle(self, d): """ A->B becomes A->B->B (circle) in reverse """ parent = defaultdict(set) triples = d.get_triples() for i in xrange(len(triples)): (a, b, c) = triples[i] parent[c].add((i, triples[i])) for i in xrange(len(triples)): candidate1 = triples[i] (a, b, c) = candidate1 if a == c and b.isNonterminal(): for index, candidate2 in parent[c]: (x, y, z) = candidate2 if y.isNonterminal(): # We found a candidate to remove (x,y,a,b,a) down to (x,y,a) nrf = (x, Edge(y[0], d.count), z) nrt = [candidate2, candidate1] new_amr = list(triples) new_amr[index] = nrf del new_amr[i] new_rule = RuleInstance(nrf, nrt, 'CC') return CanonicalDerivation.derive(d, new_amr, new_rule) return False
def applySW(self, d): """ Search for any multiple edges (a-X-b) and merge two of these """ triples = d.get_triples() Nodes = defaultdict(int) for (a, b, c) in triples: Nodes[(a, c)] += 1 for (a, c) in Nodes.keys(): if Nodes[(a, c)] > 1: # We have one edge that we can remove: remove (a,X,b) and (a,Y,b) for (a,Y,b) # If more than two, we can remove any one of these, given any other one of these for i in xrange(len(triples)): candidate = triples[i] (x, y, z) = candidate if x == a and z == c and y.isNonterminal(): for j in xrange(i + 1, len(triples)): candidate2 = triples[j] (k, l, m) = candidate2 if k == x and m == z and l.isNonterminal( ) and candidate != candidate2: nrf = (k, Edge(y[0], d.count), m) nrt = [candidate, candidate2] new_amr = list(triples) new_amr[i] = nrf del new_amr[j] new_rule = RuleInstance(nrf, nrt, 'SW') return CanonicalDerivation.derive( d, new_amr, new_rule) return False
def applyElongate(self, d): """ A->B becomes A->B->C in reverse """ child = defaultdict(set) parent = defaultdict(set) triples = d.get_triples() for trip in triples: (a, b, c) = trip child[a].add(trip) parent[c].add(trip) for i in xrange(len(triples)): candidate1 = triples[i] (b, x, c) = candidate1 if len(child[c]) == 0 and len( parent[c]) == 1 and x.isNonterminal(): for candidate2 in parent[b]: (a, y, tmp) = candidate2 if y.isNonterminal(): # we already know tmp == b # We found a candidate to remove (a,y,b,x,c) down to (a,y,b) nrf = (a, Edge(y[0], d.count), b) nrt = [candidate2, candidate1] new_amr = list(triples) new_amr[i] = nrf new_amr.remove(candidate2) new_rule = RuleInstance(nrf, nrt, 'LL') return CanonicalDerivation.derive(d, new_amr, new_rule) return False
def applySO(self, d): """ Search for any split a-X-b,a-Y-c where c is a leaf node Remove a-Y-c and let it be generated by a-X-b """ triples = d.get_triples() Leaves = defaultdict(int) Branches = defaultdict(int) for (a, b, c) in triples: Leaves[c] += 1 Branches[a] += 1 # If leaves[b] == 1 and branches[a] > 1 we can remove the (a,X,b) edge using SO for i in xrange(len(triples)): candidate = triples[i] (a, b, c) = candidate if Leaves[c] == 1 and Branches[a] > 1 and Branches[ c] == 0 and b.isNonterminal(): for j in xrange(len(triples)): candidate2 = triples[j] (x, y, z) = candidate2 if x == a and z != c and y.isNonterminal(): # Depending on the grammar it would make sense to install a clause here # which determines the 'surviving' edge based on some implicit ordering nrf = (x, Edge(y[0], d.count), z) nrt = [candidate2, candidate] rulename = 'OL' # short for open-left new_amr = list(triples) new_amr[j] = nrf del new_amr[i] new_rule = RuleInstance(nrf, nrt, rulename) return CanonicalDerivation.derive(d, new_amr, new_rule) return False
def get_possible_edges(self): """ returns a list of [(answer edges, question edges)] sorted by the probability of the source node as predicate their src as a predicate """ ret = [] for (src, chunk_text, chunk_prob) in self.get_chunk_prob(): # collect all edges in which source participates as a possible source cur_d = {"answer_edges": [], "question_edges": []} cur_edges = [ rel for rel in self.non_minimal_rels if src in rel.src ] # answer edges nodes = set([]) for dst, label in sorted([(edge.dst, edge.label) for edge in cur_edges], key=lambda ((start, end), _): end - start): if self.check_intersection(src, nodes) and \ self.check_intersection(dst, nodes) and \ (not intersecting_spans(src, dst)): nodes.add(src) nodes.add(dst) cur_d["answer_edges"].append( Edge(sent=self.sent, src=src, dst=dst, label=label)) ret.append(self.populate_graph(cur_d["answer_edges"])) self.non_minimal_rels = [ rel for rel in self.non_minimal_rels if rel not in cur_edges ]
def check_intersections(breakpoints, current_edge, edges_list): if len(breakpoints) > 1: for new_point in breakpoints: if new_point > current_edge.start_point: candidate_new_edge = Edge( current_edge.start_point, ) candidate_new_edge.end_point = new_point intersected = False for edge in edges_list: if passes_through( edge, candidate_new_edge, ): intersected = True if not intersected: current_edge.end_point = new_point return current_edge
def loadMR(mr): """ Load meaning representation using Dag class. Store only the set of triples, which will be required throughout """ dag = Dag.from_string(mr) #.stringify() triples = [] for triple in dag.triples(instances=False): triples.append((triple[0], Edge(triple[1], 1, 1), triple[2][0])) #amr = Dag.from_triples(triples) return triples
def applyDelex(self, d): triples = d.get_triples() for i in xrange(len(triples)): (a, b, c) = triples[i] if b.isTerminal(): ntLabel, tmp = b[0].split(":", 1) nrf = (a, Edge(ntLabel, d.count), c) nrt = [triples[i]] new_mrt = list(triples) new_mrt[i] = nrf # replace triple with new triple new_rule = RuleInstance(nrf, nrt, 'DL') return CanonicalDerivation.derive(d, new_mrt, new_rule) return False
def edge_builder(edge_context, pair): _logger.info('runnin-graph %s', edge_context.graph) vertice = edge_context.meta['vertice'] weight = None if edge_context.graph.is_weighted: weight = read_int(f'[{vertice}->{pair}] Edge weight', default=1, max_attempts=1, throlling_message='Using weight 1 by default.') edge = Edge((vertice, pair), value=weight) _logger.info(f'Creating edge {edge}...') edge_context.graph.edges.append(edge) return edge_context
def applyJointHit(self, d): """ edge A-B becomes edges A-C and B-C in reverse """ child = defaultdict(set) parent = defaultdict(set) triples = d.get_triples() for trip in triples: (a, b, c) = trip child[a].add(trip) parent[c].add(trip) for i in xrange(len(triples)): candidate1 = triples[i] (a, x, c) = candidate1 if len(child[c]) == 0 and len( parent[c]) == 2 and x.isNonterminal(): for candidate2 in parent[c]: (b, y, tmp) = candidate2 if y.isNonterminal() and b != a: # we know that c == tmp wrongWay = False for check in child[b]: # optional (attempts to avoid generating looped structures) (k, l, m) = check if m == a: wrongWay = True if not wrongWay: # We found a candidate to remove (a,x,c) (b,y,c) down to (a,?,b) # Now, let's iterate so that we can find the suitable edges (with labels) nrf = (a, Edge('*', d.count), b) nrt = [candidate1, candidate2] new_amr = list(triples) new_amr[i] = nrf new_amr.remove(candidate2) new_rule = RuleInstance(nrf, nrt, 'JH') return CanonicalDerivation.derive( d, new_amr, new_rule) return False
self.edges = [] self.nodes = set() self.amr_head = None non_minimal_rels = self.non_minimal_rels for (src, chunk_text, chunk_prob) in self.get_chunk_prob(): # collect all edges in which source participates as a possible source cur_edges = [rel for rel in non_minimal_rels if src in rel.src] for dst, label, wid, origin in sorted([(edge.dst, edge.label, edge.wid, "answer") for edge in cur_edges] +\ [(cur_src, edge.label, edge.wid, "question") for edge in cur_edges for cur_src in edge.src if cur_src != src], key = lambda ((start, end), l, w, o): end - start, reverse = True): pos_edge = Edge(sent=self.sent, src=src, dst=dst, label=label, wid=wid) if ((not projective) or self.check_projective((src, dst), self.edges)) and \ self.check_intersection(src, self.nodes) and \ self.check_intersection(dst, self.nodes) and \ (not intersecting_spans(src, dst)) and \ self.check_containment(self.edges + [pos_edge], self.nodes.union([src, dst])): if self.amr_head is None: self.amr_head = src self.total_edge_count += 1 pos_edge.label = list(