Example #1
0
    def apply(self, chart, grammar, edge):
        if edge.is_complete(): return
        nextsym, index = edge.nextsym(), edge.end()
        if not is_nonterminal(nextsym): return

        # If we've already applied this rule to an edge with the same
        # next & end, and the chart & grammar have not changed, then
        # just return (no new edges to add).
        nextsym_with_bindings = edge.next_with_bindings()
        done = self._done.get((nextsym_with_bindings, index), (None, None))
        if done[0] is chart and done[1] is grammar:
            return

        for prod in grammar.productions(lhs=nextsym):
            # If the left corner in the predicted production is
            # leaf, it must match with the input.
            if prod.rhs():
                first = prod.rhs()[0]
                if is_terminal(first):
                    if index >= chart.num_leaves(): continue
                    if first != chart.leaf(index): continue

            # We rename vars here, because we don't want variables
            # from the two different productions to match.
            if unify(prod.lhs(), nextsym_with_bindings, rename_vars=True):
                new_edge = FeatureTreeEdge.from_production(prod, edge.end())
                if chart.insert(new_edge, ()):
                    yield new_edge

        # Record the fact that we've applied this rule.
        self._done[nextsym_with_bindings, index] = (chart, grammar)
Example #2
0
    def apply(self, chart, grammar, edge):
        if edge.is_incomplete():
            return
        found = edge.lhs()
        for prod in grammar.productions(rhs=found):
            bindings = {}
            if isinstance(edge, FeatureTreeEdge):
                _next = prod.rhs()[0]
                if not is_nonterminal(_next):
                    continue

                # We rename vars here, because we don't want variables
                # from the two different productions to match.
                used_vars = find_variables((prod.lhs(), ) + prod.rhs(),
                                           fs_class=FeatStruct)
                found = found.rename_variables(used_vars=used_vars)

                result = unify(_next, found, bindings, rename_vars=False)
                if result is None:
                    continue

            new_edge = FeatureTreeEdge.from_production(
                prod, edge.start()).move_dot_forward(edge.end(), bindings)
            if chart.insert(new_edge, (edge, )):
                yield new_edge
Example #3
0
    def apply(self, chart, grammar, left_edge, right_edge):
        # Make sure the rule is applicable.
        if not (left_edge.end() == right_edge.start()
                and left_edge.is_incomplete() and right_edge.is_complete()
                and isinstance(left_edge, FeatureTreeEdge)):
            return
        found = right_edge.lhs()
        nextsym = left_edge.nextsym()
        if isinstance(right_edge, FeatureTreeEdge):
            if not is_nonterminal(nextsym): return
            if left_edge.nextsym()[TYPE] != right_edge.lhs()[TYPE]: return
            # Create a copy of the bindings.
            bindings = left_edge.bindings()
            # We rename vars here, because we don't want variables
            # from the two different productions to match.
            found = found.rename_variables(used_vars=left_edge.variables())
            # Unify B1 (left_edge.nextsym) with B2 (right_edge.lhs) to
            # generate B3 (result).
            result = unify(nextsym, found, bindings, rename_vars=False)
            if result is None: return
        else:
            if nextsym != found: return
            # Create a copy of the bindings.
            bindings = left_edge.bindings()

        # Construct the new edge.
        new_edge = left_edge.move_dot_forward(right_edge.end(), bindings)

        # Add it to the chart, with appropriate child pointers.
        if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
            yield new_edge
Example #4
0
    def apply_iter(self, chart, grammar, left_edge, right_edge):
        # Make sure the rule is applicable.
        if not (left_edge.end() == right_edge.start() and
                left_edge.is_incomplete() and
                right_edge.is_complete() and
                isinstance(left_edge, TreeEdge) and
                isinstance(right_edge, TreeEdge) and
                left_edge.next()[TYPE] == right_edge.lhs()[TYPE]):
            return

        # Unify B1 (left_edge.next) with B2 (right_edge.lhs) to
        # generate B3 (result).
        bindings = left_edge.bindings() # creates a copy.
        result = unify(left_edge.next(), right_edge.lhs(),
                       bindings, rename_vars=False)
        if result is None: return

        # Construct the new edge.
        new_edge = FeatureTreeEdge(span=(left_edge.start(), right_edge.end()),
                                   lhs=left_edge.lhs(), rhs=left_edge.rhs(),
                                   dot=left_edge.dot()+1, bindings=bindings)
        
        # Add it to the chart, with appropriate child pointers.
        changed_chart = False
        for cpl1 in chart.child_pointer_lists(left_edge):
            if chart.insert(new_edge, cpl1+(right_edge,)):
                changed_chart = True

        # If we changed the chart, then generate the edge.
        if changed_chart: yield new_edge
Example #5
0
    def apply(self, chart, grammar, edge):
        if edge.is_incomplete():
            return
        found = edge.lhs()
        for prod in grammar.productions(rhs=found):
            bindings = {}
            if isinstance(edge, FeatureTreeEdge):
                _next = prod.rhs()[0]
                if not is_nonterminal(_next):
                    continue

                # We rename vars here, because we don't want variables
                # from the two different productions to match.
                used_vars = find_variables(
                    (prod.lhs(),) + prod.rhs(), fs_class=FeatStruct
                )
                found = found.rename_variables(used_vars=used_vars)

                result = unify(_next, found, bindings, rename_vars=False)
                if result is None:
                    continue

            new_edge = FeatureTreeEdge.from_production(
                prod, edge.start()
            ).move_dot_forward(edge.end(), bindings)
            if chart.insert(new_edge, (edge,)):
                yield new_edge
    def apply(self, chart, grammar, left_edge, right_edge):
        # Make sure the rule is applicable.
        if not (left_edge.end() == right_edge.start() and
                left_edge.is_incomplete() and
                right_edge.is_complete() and
                isinstance(left_edge, FeatureTreeEdge)):
            return
        found = right_edge.lhs()
        nextsym = left_edge.nextsym()
        if isinstance(right_edge, FeatureTreeEdge):
            if not is_nonterminal(nextsym): return
            if left_edge.nextsym()[TYPE] != right_edge.lhs()[TYPE]: return
            # Create a copy of the bindings.
            bindings = left_edge.bindings()
            # We rename vars here, because we don't want variables
            # from the two different productions to match.
            found = found.rename_variables(used_vars=left_edge.variables())
            # Unify B1 (left_edge.nextsym) with B2 (right_edge.lhs) to
            # generate B3 (result).
            result = unify(nextsym, found, bindings, rename_vars=False)
            if result is None: return
        else:
            if nextsym != found: return
            # Create a copy of the bindings.
            bindings = left_edge.bindings()

        # Construct the new edge.
        new_edge = left_edge.move_dot_forward(right_edge.end(), bindings)

        # Add it to the chart, with appropriate child pointers.
        if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
            yield new_edge
Example #7
0
    def apply_iter(self, chart, grammar, left_edge, right_edge):
        # Make sure the rule is applicable.
        if not (left_edge.end() == right_edge.start() and
                left_edge.is_incomplete() and
                right_edge.is_complete() and
                isinstance(left_edge, TreeEdge) and
                isinstance(right_edge, TreeEdge)):
            return

        # Unify B1 (left_edge.next) with B2 (right_edge.lhs) to
        # generate B3 (result).
        bindings = left_edge.bindings() # creates a copy.
        result = unify(left_edge.next(), right_edge.lhs(),
                       bindings, rename_vars=False)
        if result is None: return

        # Construct the new edge.
        new_edge = FeatureTreeEdge(span=(left_edge.start(), right_edge.end()),
                                   lhs=left_edge.lhs(), rhs=left_edge.rhs(),
                                   dot=left_edge.dot()+1, bindings=bindings)
        
        # Add it to the chart, with appropriate child pointers.
        changed_chart = False
        for cpl1 in chart.child_pointer_lists(left_edge):
            if chart.insert(new_edge, cpl1+(right_edge,)):
                changed_chart = True

        # If we changed the chart, then generate the edge.
        if changed_chart: yield new_edge
    def apply(self, chart, grammar, edge):
        if edge.is_complete(): return
        nextsym, index = edge.nextsym(), edge.end()
        if not is_nonterminal(nextsym): return

        # If we've already applied this rule to an edge with the same
        # next & end, and the chart & grammar have not changed, then
        # just return (no new edges to add).
        nextsym_with_bindings = edge.next_with_bindings()
        done = self._done.get((nextsym_with_bindings, index), (None, None))
        if done[0] is chart and done[1] is grammar:
            return

        for prod in grammar.productions(lhs=nextsym):
            # If the left corner in the predicted production is
            # leaf, it must match with the input.
            if prod.rhs():
                first = prod.rhs()[0]
                if is_terminal(first):
                    if index >= chart.num_leaves(): continue
                    if first != chart.leaf(index): continue

            # We rename vars here, because we don't want variables
            # from the two different productions to match.
            if unify(prod.lhs(), nextsym_with_bindings, rename_vars=True):
                new_edge = FeatureTreeEdge.from_production(prod, edge.end())
                if chart.insert(new_edge, ()):
                    yield new_edge

        # Record the fact that we've applied this rule.
        self._done[nextsym_with_bindings, index] = (chart, grammar)
Example #9
0
    def compute_children(self) -> List["FeatureGrammarNode"]:
        child_list: List["FeatureGrammarNode"] = []

        # First we retrieve all variables used in current derivation
        used_vars: Set[Variable] = set()
        for symbol in self.symbols:
            if not isinstance(symbol, str):
                used_vars |= find_variables(symbol)

        for idx, symbol in enumerate(self.symbols):
            if isinstance(symbol, str):
                continue

            # For each non terminal symbol in current derivation , we select a production rule
            # that has a left hand side matching this symbol
            for production in self.feature_grammar.productions(lhs=symbol):

                # We rename all the variable in the production rules to avoid name conflicts
                # TODO put this after a check to avoid to do it if not neccessary
                new_vars = dict()
                lhs = rename_variables(production.lhs(),
                                       used_vars=used_vars,
                                       new_vars=new_vars)
                rhs = [
                    rename_variables(rhs_symb,
                                     used_vars=used_vars,
                                     new_vars=new_vars)
                    for rhs_symb in production.rhs()
                ]

                # Compute the new binding
                new_bindings = dict()
                lhs = unify(lhs, symbol, bindings=new_bindings)
                if lhs is None:  # Unification failed
                    continue

                # Propagate the bindings to the siblings
                new_siblings = [
                    substitute_bindings(sibling, bindings=new_bindings)
                    for sibling in self.symbols
                ]

                # Propagate the bindings to the rhs symbols
                new_rhs = [
                    substitute_bindings(rhs_symb, bindings=new_bindings)
                    for rhs_symb in rhs
                ]

                # Create the new child
                new_child = FeatureGrammarNode(
                    tuple(new_siblings[:idx] + new_rhs +
                          new_siblings[idx + 1:]),
                    self.feature_grammar,
                )
                child_list.append(new_child)

        return child_list if len(child_list) != 0 else [
            FeatureGrammarNode("DEAD_END", None)
        ]
Example #10
0
 def _parses(self, chart, start, tree_class):
     # Output a list of complete parses.
     trees = []
     for edge in chart.select(span=(0, chart.num_leaves())):
         if unify(edge.lhs(), start, rename_vars=True):
             trees += chart.trees(edge, complete=True,
                                  tree_class=tree_class)
     return trees
 def parses(self, start, tree_class=Tree):
     for edge in self.select(start=0, end=self._num_leaves):
         if ((isinstance(edge, FeatureTreeEdge)) and
             (edge.lhs()[TYPE] == start[TYPE]) and
             (unify(edge.lhs(), start, rename_vars=True))
             ):
             for tree in self.trees(edge, complete=True, tree_class=tree_class):
                 yield tree
Example #12
0
 def parses(self, start, tree_class=Tree):
     for edge in self.select(start=0, end=self._num_leaves):
         if ((isinstance(edge, FeatureTreeEdge)) and
             (edge.lhs()[TYPE] == start[TYPE]) and
             (unify(edge.lhs(), start, rename_vars=True)) 
             ):
             for tree in self.trees(edge, complete=True, tree_class=tree_class):
                 yield tree
Example #13
0
 def _parses(self, chart, start, tree_class):
     # Output a list of complete parses.
     trees = []
     for edge in chart.select(span=(0, chart.num_leaves())):
         if unify(edge.lhs(), start, rename_vars=True):
             trees += chart.trees(edge,
                                  complete=True,
                                  tree_class=tree_class)
     return trees
Example #14
0
 def _parses(self, chart, start, tree_class):
     # Output a list of complete parses.
     trees = []
     for edge in chart.select(span=(0, chart.num_leaves())):
         if ( (not isinstance(edge, LeafEdge)) and
              (edge.lhs()[TYPE] == start[TYPE]) and
              (unify(edge.lhs(), start, rename_vars=True)) ):
             trees += chart.trees(edge, complete=True,
                                  tree_class=tree_class)
     return trees
Example #15
0
 def apply_iter(self, chart, grammar, edge):
     if edge.is_complete(): return
     for prod in grammar.productions():
         # Note: we rename vars here, because we don't want variables
         # from the two different productions to match.
         if unify(prod.lhs(), edge.next_with_bindings(), rename_vars=True):
             new_edge = FeatureTreeEdge(span=(edge.end(), edge.end()),
                                        lhs=prod.lhs(),
                                        rhs=prod.rhs(), dot=0)
             if chart.insert(new_edge, ()):
                 yield new_edge
 def apply_iter(self, chart, grammar, edge):
     if edge.is_complete(): return
     #if not isinstance(edge.next(), FeatStructNonterminal): return
     for prod in grammar.productions(lhs=edge.next()):
         # Note: we rename vars here, because we don't want variables
         # from the two different productions to match.
         if (unify(prod.lhs(), edge.next_with_bindings(), rename_vars=True)):
             new_edge = FeatureTreeEdge(span=(edge.end(), edge.end()),
                                        lhs=prod.lhs(),
                                        rhs=prod.rhs(), dot=0)
             if chart.insert(new_edge, ()):
                 yield new_edge
Example #17
0
 def apply_iter(self, chart, gramar, edge):
     if edge.is_complete() or edge.end() >= chart.num_leaves(): return
     index = edge.end()
     leaf = chart.leaf(index)
     for pos in self._word_to_pos.get(leaf, []):
         if unify(pos, edge.next_with_bindings(), rename_vars=True):
             new_leaf_edge = LeafEdge(leaf, index)
             if chart.insert(new_leaf_edge, ()):
                 yield new_leaf_edge
             new_pos_edge = FeatureTreeEdge((index, index + 1), pos, [leaf],
                                            1)
             if chart.insert(new_pos_edge, (new_leaf_edge, )):
                 yield new_pos_edge
Example #18
0
 def apply_iter(self, chart, gramar, edge):
     if edge.is_complete() or edge.end()>=chart.num_leaves(): return
     index = edge.end()
     leaf = chart.leaf(index)
     for pos in self._word_to_pos.get(leaf, []):
         if unify(pos, edge.next_with_bindings(), rename_vars=True):
             new_leaf_edge = LeafEdge(leaf, index)
             if chart.insert(new_leaf_edge, ()):
                 yield new_leaf_edge
             new_pos_edge = FeatureTreeEdge((index, index+1), pos,
                                            [leaf], 1)
             if chart.insert(new_pos_edge, (new_leaf_edge,)):
                 yield new_pos_edge
Example #19
0
 def apply_iter(self, chart, gramar, edge):
     if edge.is_complete() or edge.end()>=chart.num_leaves(): return
     index = edge.end()
     leaf = chart.leaf(index)
     for pos in [prod.lhs() for prod in gramar.productions(rhs=leaf)]:
         if (pos[TYPE] == edge.next()[TYPE] and 
             unify(pos, edge.next_with_bindings(), rename_vars=True)):
             new_leaf_edge = LeafEdge(leaf, index)
             if chart.insert(new_leaf_edge, ()):
                 yield new_leaf_edge
             new_pos_edge = FeatureTreeEdge((index, index+1), pos,
                                            [leaf], 1)
             if chart.insert(new_pos_edge, (new_leaf_edge,)):
                 yield new_pos_edge
Example #20
0
 def apply_iter(self, chart, grammar, edge):
     if edge.is_complete(): return
     for prod in grammar.productions():
         # Be sure not to predict lexical edges. 
         # (The ScannerRule takes care of those.)
         if len(prod.rhs()) == 1 and isinstance(prod.rhs()[0], str): continue
         # Note: we rename vars here, because we don't want variables
         # from the two different productions to match.
         if ((prod.lhs()[TYPE] == edge.next()[TYPE]) and 
             unify(prod.lhs(), edge.next_with_bindings(), rename_vars=True)):
             new_edge = FeatureTreeEdge(span=(edge.end(), edge.end()),
                                        lhs=prod.lhs(),
                                        rhs=prod.rhs(), dot=0)
             if chart.insert(new_edge, ()):
                 yield new_edge
Example #21
0
 def apply_iter(self, chart, grammar, edge):
     if edge.is_incomplete(): return
     if isinstance(edge, FeatureTreeEdge): 
         for prod in grammar.productions(rhs=edge.lhs()):
             next = prod.rhs()[0]
             if not isinstance(next, FeatStructNonterminal): continue
             bindings = {}
             if unify(next, edge.lhs(), bindings):
                 new_edge = FeatureTreeEdge(edge.span(), prod.lhs(), prod.rhs(), 1, bindings)
                 if chart.insert(new_edge, (edge,)):
                     yield new_edge
     else: # The edge is a LeafEdge:
         for prod in grammar.productions(rhs=edge.lhs()):
             new_edge = FeatureTreeEdge(edge.span(), prod.lhs(), prod.rhs(), 1)
             if chart.insert(new_edge, (edge,)):
                 yield new_edge
 def apply_iter(self, chart, grammar, edge):
     if edge.is_incomplete(): return
     if isinstance(edge, FeatureTreeEdge): 
         for prod in grammar.productions(rhs=edge.lhs()):
             next = prod.rhs()[0]
             if not isinstance(next, FeatStructNonterminal): continue
             bindings = {}
             if unify(next, edge.lhs(), bindings):
                 new_edge = FeatureTreeEdge(edge.span(), prod.lhs(), prod.rhs(), 1, bindings)
                 if chart.insert(new_edge, (edge,)):
                     yield new_edge
     else: # The edge is a LeafEdge:
         for prod in grammar.productions(rhs=edge.lhs()):
             new_edge = FeatureTreeEdge(edge.span(), prod.lhs(), prod.rhs(), 1)
             if chart.insert(new_edge, (edge,)):
                 yield new_edge
Example #23
0
 def generate_from (self, x):
     options = self.expansions(x)
     if not options:
         raise Failure
     (r, x, bindings) = random.choice(options)
     children = []
     for y in r.rhs():
         if isinstance(y, str):
             children.append(y)
         else:
             y = y.substitute_bindings(bindings)
             child = self.generate_from(y)
             children.append(child)
             # just to update the bindings
             if not unify(y, child.label(), bindings, rename_vars=False):
                 raise Exception("This can't happen")
     x = x.substitute_bindings(bindings).rename_variables()
     return Tree(x, children)
Example #24
0
def iter_expansions(x, g):
    for r in g.productions(lhs=x):
        bindings = {}
        x1 = unify(x, r.lhs(), bindings, rename_vars=False)
        if x1:
            yield (r, x1, bindings)
Example #25
0
def checkSentence(iii,s,corrlist,rec=0):
	"""
	iii: index of sentence in the sentence list
	s: sentence in nodedic format
	rec: recursion level
	"""
	#print "_____________________________________________"
	#if rec>10:
		#print s,rec
		#1/0
	if debug: print "checking sentence",iii,"rec",rec,"len(s)",len(s)
	
	
	# TODO: kick out:
	if rec<1:
		#print "____________________________"
		#for i in s:	print i,s[i][tokenname], s[i]
		#print "____________________________"
		
		
		for i in s:
			#print s
			if "gov" in s[i]:
				for g in s[i]["gov"]:
					if s[i]["gov"][g].endswith("_invisible"):
						s[i]["gov"][g]=s[i]["gov"][g].replace("_invisible","_inherited")
	
	
	if rec<3:
				
		
		for i in sorted(s):
			
			if debug>1:
				try:
					print "checking:",i,s[i][tokenname]
					
					#s[i]
					#print s[i]["lemma"],s[i]["lemma"] in corrdic,i in s
					#print corrdic
				except:
					print "index",i,"is gone"
				#if s[i]['cat']=="unknown" and s[i][tokenname]=="d'":
				#if s[i][tokenname]==u"écrivant":
					#print "********************************************************"
					#for i in sorted(s):
						#print s[i]
					##print s
					#1/0
			#print i,s[i]		
			
			
			if i in s:
				for matchdic,insdic in corrlist:
					if unify(s[i],matchdic):
						
						
						#( s[i]["lemma"] in corrdic or s[i][tokenname] in corrdic):
						if debug>1:
							print "èèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèè"
							#if s[i]["lemma"] in corrdic : print corrdic[s[i]["lemma"]]
							#else:print corrdic[s[i][tokenname]]
							print s[i]
							print "matched oooooooooooooooooooooooooooooooo"
							print matchdic
							print "èèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèè"
						#if s[i]["lemma"] in corrdic : insdic=corrdic[s[i]["lemma"]]
						#else:insdic=corrdic[s[i][tokenname]]
						if isinstance(insdic,int):# glueing. in this case insdic contains the direction of the token glueing
							s = glue(copy.deepcopy(s),i,insdic)
							s = checkSentence(iii,copy.deepcopy(s),corrlist,rec+1)
						else:
							s = integrate(copy.deepcopy(s),i,insdic)
							if len(insdic)>1:
								s=checkSentence(iii,copy.deepcopy(s),corrlist,rec+1)
			#elif i in s and  ( s[i]["lemma"] in gluedic or s[i][tokenname] in gluedic) :
				#if debug>1:
					#print "gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg"
					#if s[i]["lemma"] in gluedic :print gluedic[s[i]["lemma"]]
					#else:print gluedic[s[i][tokenname]]
					#print "gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg"
				#if s[i]["lemma"] in gluedic : insdic=gluedic[s[i]["lemma"]]
				#else:insdic=gluedic[s[i][tokenname]]
				#news = glue(s,i,insdic)
				#s=checkSentence(iii,news,corrlist,rec+1)
			
			
	return s			
Example #26
0
 def iter_expansions (self, x):
     for r in self.__grammar.productions(lhs=x):
         bindings = {}
         x1 = unify(x, r.lhs(), bindings, rename_vars=False)
         if x1:
             yield (r, x1, bindings)