Beispiel #1
0
    def applySL(self, d):
        """
        Search for any node with one occurence as p1 and one as p2 only.
        Combine these two by removing that node and merging the edges.
        """
        triples = d.get_triples()
        ANodes = defaultdict(int)
        BNodes = defaultdict(int)
        for (a, b, c) in triples:
            ANodes[a] += 1
            BNodes[c] += 1

        for a in ANodes.keys():
            if ANodes[a] == 1 and BNodes[a] == 1:
                # we have an edge that we can shorten: remove (x,X,a) and (a,X,z) for (x,Y,z)
                nrf = [None, Edge('*', d.count), None]  # new rule from
                nrt = [0, 0]  # new rule to
                new_amr = list(triples)
                for i in xrange(len(triples)):
                    at = triples[i]
                    if at[0] == a and at[2] != a:
                        nrf[2] = at[2]
                        nrt[1] = at
                    elif at[2] == a and at[0] != a:
                        nrf[0] = at[0]
                        nrf[1][0] = at[1][0]
                        nrt[0] = at
                        index = i

                if nrt[0][1].isNonterminal() and nrt[1][1].isNonterminal():
                    new_amr[index] = tuple(nrf)
                    new_amr.remove(nrt[1])
                    new_rule = RuleInstance(tuple(nrf), nrt, 'SL')
                    return CanonicalDerivation.derive(d, new_amr, new_rule)
        return False
Beispiel #2
0
    def applyCircle(self, d):
        """
        A->B becomes A->B->B (circle) in reverse
        """

        parent = defaultdict(set)

        triples = d.get_triples()
        for i in xrange(len(triples)):
            (a, b, c) = triples[i]
            parent[c].add((i, triples[i]))

        for i in xrange(len(triples)):
            candidate1 = triples[i]
            (a, b, c) = candidate1
            if a == c and b.isNonterminal():
                for index, candidate2 in parent[c]:
                    (x, y, z) = candidate2
                    if y.isNonterminal():
                        # We found a candidate to remove (x,y,a,b,a) down to (x,y,a)
                        nrf = (x, Edge(y[0], d.count), z)
                        nrt = [candidate2, candidate1]
                        new_amr = list(triples)
                        new_amr[index] = nrf
                        del new_amr[i]
                        new_rule = RuleInstance(nrf, nrt, 'CC')
                        return CanonicalDerivation.derive(d, new_amr, new_rule)
        return False
Beispiel #3
0
    def applySW(self, d):
        """
        Search for any multiple edges (a-X-b) and merge two of these
        """
        triples = d.get_triples()
        Nodes = defaultdict(int)
        for (a, b, c) in triples:
            Nodes[(a, c)] += 1

        for (a, c) in Nodes.keys():
            if Nodes[(a, c)] > 1:
                # We have one edge that we can remove: remove (a,X,b) and (a,Y,b) for (a,Y,b)
                # If more than two, we can remove any one of these, given any other one of these
                for i in xrange(len(triples)):
                    candidate = triples[i]
                    (x, y, z) = candidate
                    if x == a and z == c and y.isNonterminal():
                        for j in xrange(i + 1, len(triples)):
                            candidate2 = triples[j]
                            (k, l, m) = candidate2
                            if k == x and m == z and l.isNonterminal(
                            ) and candidate != candidate2:
                                nrf = (k, Edge(y[0], d.count), m)
                                nrt = [candidate, candidate2]
                                new_amr = list(triples)
                                new_amr[i] = nrf
                                del new_amr[j]
                                new_rule = RuleInstance(nrf, nrt, 'SW')
                                return CanonicalDerivation.derive(
                                    d, new_amr, new_rule)
        return False
Beispiel #4
0
    def applyElongate(self, d):
        """
        A->B becomes A->B->C in reverse
        """

        child = defaultdict(set)
        parent = defaultdict(set)

        triples = d.get_triples()
        for trip in triples:
            (a, b, c) = trip
            child[a].add(trip)
            parent[c].add(trip)

        for i in xrange(len(triples)):
            candidate1 = triples[i]
            (b, x, c) = candidate1
            if len(child[c]) == 0 and len(
                    parent[c]) == 1 and x.isNonterminal():
                for candidate2 in parent[b]:
                    (a, y, tmp) = candidate2
                    if y.isNonterminal():  # we already know tmp == b
                        # We found a candidate to remove (a,y,b,x,c) down to (a,y,b)
                        nrf = (a, Edge(y[0], d.count), b)
                        nrt = [candidate2, candidate1]
                        new_amr = list(triples)
                        new_amr[i] = nrf
                        new_amr.remove(candidate2)
                        new_rule = RuleInstance(nrf, nrt, 'LL')
                        return CanonicalDerivation.derive(d, new_amr, new_rule)
        return False
Beispiel #5
0
    def applySO(self, d):
        """
        Search for any split a-X-b,a-Y-c where c is a leaf node
        Remove a-Y-c and let it be generated by a-X-b
        """
        triples = d.get_triples()
        Leaves = defaultdict(int)
        Branches = defaultdict(int)
        for (a, b, c) in triples:
            Leaves[c] += 1
            Branches[a] += 1

        # If leaves[b] == 1 and branches[a] > 1 we can remove the (a,X,b) edge using SO
        for i in xrange(len(triples)):
            candidate = triples[i]
            (a, b, c) = candidate
            if Leaves[c] == 1 and Branches[a] > 1 and Branches[
                    c] == 0 and b.isNonterminal():
                for j in xrange(len(triples)):
                    candidate2 = triples[j]
                    (x, y, z) = candidate2
                    if x == a and z != c and y.isNonterminal():
                        # Depending on the grammar it would make sense to install a clause here
                        # which determines the 'surviving' edge based on some implicit ordering
                        nrf = (x, Edge(y[0], d.count), z)
                        nrt = [candidate2, candidate]
                        rulename = 'OL'  # short for open-left
                        new_amr = list(triples)
                        new_amr[j] = nrf
                        del new_amr[i]
                        new_rule = RuleInstance(nrf, nrt, rulename)
                        return CanonicalDerivation.derive(d, new_amr, new_rule)
        return False
Beispiel #6
0
 def get_possible_edges(self):
     """
     returns a list of [(answer edges, question edges)]
     sorted by the probability of the source node as predicate their src as a predicate
     """
     ret = []
     for (src, chunk_text, chunk_prob) in self.get_chunk_prob():
         # collect all edges in which source participates as a possible source
         cur_d = {"answer_edges": [], "question_edges": []}
         cur_edges = [
             rel for rel in self.non_minimal_rels if src in rel.src
         ]
         # answer edges
         nodes = set([])
         for dst, label in sorted([(edge.dst, edge.label)
                                   for edge in cur_edges],
                                  key=lambda
                                  ((start, end), _): end - start):
             if self.check_intersection(src, nodes) and \
                self.check_intersection(dst, nodes) and \
                (not intersecting_spans(src, dst)):
                 nodes.add(src)
                 nodes.add(dst)
                 cur_d["answer_edges"].append(
                     Edge(sent=self.sent, src=src, dst=dst, label=label))
         ret.append(self.populate_graph(cur_d["answer_edges"]))
         self.non_minimal_rels = [
             rel for rel in self.non_minimal_rels if rel not in cur_edges
         ]
Beispiel #7
0
def check_intersections(breakpoints, current_edge, edges_list):
    if len(breakpoints) > 1:
        for new_point in breakpoints:
            if new_point > current_edge.start_point:
                candidate_new_edge = Edge(
                    current_edge.start_point,
                    )
                candidate_new_edge.end_point = new_point
                intersected = False
                for edge in edges_list:
                    if passes_through(
                            edge,
                            candidate_new_edge,
                            ):
                        intersected = True
                if not intersected:
                    current_edge.end_point = new_point
    return current_edge
Beispiel #8
0
def loadMR(mr):
    """
    Load meaning representation using Dag class.
    Store only the set of triples, which will be required throughout
    """
    dag = Dag.from_string(mr)  #.stringify()
    triples = []

    for triple in dag.triples(instances=False):
        triples.append((triple[0], Edge(triple[1], 1, 1), triple[2][0]))
    #amr = Dag.from_triples(triples)
    return triples
Beispiel #9
0
    def applyDelex(self, d):
        triples = d.get_triples()
        for i in xrange(len(triples)):
            (a, b, c) = triples[i]
            if b.isTerminal():
                ntLabel, tmp = b[0].split(":", 1)
                nrf = (a, Edge(ntLabel, d.count), c)
                nrt = [triples[i]]

                new_mrt = list(triples)
                new_mrt[i] = nrf  # replace triple with new triple

                new_rule = RuleInstance(nrf, nrt, 'DL')
                return CanonicalDerivation.derive(d, new_mrt, new_rule)
        return False
Beispiel #10
0
def edge_builder(edge_context, pair):
    _logger.info('runnin-graph %s', edge_context.graph)
    vertice = edge_context.meta['vertice']
    weight = None

    if edge_context.graph.is_weighted:
        weight = read_int(f'[{vertice}->{pair}] Edge weight',
                          default=1,
                          max_attempts=1,
                          throlling_message='Using weight 1 by default.')

    edge = Edge((vertice, pair), value=weight)
    _logger.info(f'Creating edge {edge}...')
    edge_context.graph.edges.append(edge)
    return edge_context
Beispiel #11
0
    def applyJointHit(self, d):
        """
        edge A-B becomes edges A-C and B-C in reverse
        """

        child = defaultdict(set)
        parent = defaultdict(set)

        triples = d.get_triples()
        for trip in triples:
            (a, b, c) = trip
            child[a].add(trip)
            parent[c].add(trip)

        for i in xrange(len(triples)):
            candidate1 = triples[i]
            (a, x, c) = candidate1
            if len(child[c]) == 0 and len(
                    parent[c]) == 2 and x.isNonterminal():
                for candidate2 in parent[c]:
                    (b, y, tmp) = candidate2
                    if y.isNonterminal() and b != a:  # we know that c == tmp
                        wrongWay = False
                        for check in child[b]:
                            # optional (attempts to avoid generating looped structures)
                            (k, l, m) = check
                            if m == a: wrongWay = True
                        if not wrongWay:
                            # We found a candidate to remove (a,x,c) (b,y,c) down to (a,?,b)
                            # Now, let's iterate so that we can find the suitable edges (with labels)
                            nrf = (a, Edge('*', d.count), b)
                            nrt = [candidate1, candidate2]
                            new_amr = list(triples)
                            new_amr[i] = nrf
                            new_amr.remove(candidate2)
                            new_rule = RuleInstance(nrf, nrt, 'JH')
                            return CanonicalDerivation.derive(
                                d, new_amr, new_rule)
        return False
Beispiel #12
0
        self.edges = []
        self.nodes = set()
        self.amr_head = None
        non_minimal_rels = self.non_minimal_rels
        for (src, chunk_text, chunk_prob) in self.get_chunk_prob():
            # collect all edges in which source participates as a possible source
            cur_edges = [rel for rel in non_minimal_rels if src in rel.src]
            for dst, label, wid, origin in sorted([(edge.dst, edge.label, edge.wid, "answer") for edge in cur_edges] +\
                                                  [(cur_src, edge.label, edge.wid, "question")
                                                   for edge in cur_edges
                                                   for cur_src in edge.src if cur_src != src],
                                                  key = lambda ((start, end), l, w, o): end - start,
                                            reverse = True):
                pos_edge = Edge(sent=self.sent,
                                src=src,
                                dst=dst,
                                label=label,
                                wid=wid)
                if ((not projective) or self.check_projective((src, dst), self.edges)) and \
                   self.check_intersection(src, self.nodes) and \
                   self.check_intersection(dst, self.nodes) and \
                   (not intersecting_spans(src, dst)) and \
                   self.check_containment(self.edges +
                                          [pos_edge],
                                          self.nodes.union([src, dst])):

                    if self.amr_head is None:
                        self.amr_head = src
                    self.total_edge_count += 1

                    pos_edge.label = list(