Exemple #1
0
 def add_n_ary_rule(self, rule):
     """
     Handles adding a rule with three or more non-terminals on the RHS.
     We introduce a new category which covers all elements on the RHS except
     the first, and then generate two variants of the rule: one which
     consumes those elements to produce the new category, and another which
     combines the new category which the first element to produce the
     original LHS category.  We add these variants in place of the
     original rule.  (If the new rules still contain more than two elements
     on the RHS, we'll wind up recursing.)
     For example, if the original rule is:
         Rule('$Z', '$A $B $C $D')
     then we create a new category '$Z_$A' (roughly, "$Z missing $A to the left"),
     and add these rules instead:
         Rule('$Z_$A', '$B $C $D')
         Rule('$Z', '$A $Z_$A')
     """
     def add_category(base_name):
         assert is_cat(base_name)
         name = base_name
         while name in self.categories:
             name = name + '_'
         self.categories.add(name)
         return name
     category = add_category('%s_%s' % (rule.lhs, rule.rhs[0]))
     self.add_rule(Rule(category, rule.rhs[1:], lambda sems: sems))
     self.add_rule(Rule(rule.lhs, (rule.rhs[0], category),
                         lambda sems: rule.apply_semantics([sems[0]] + sems[1])))
Exemple #2
0
 def add_rule_containing_optional(self, rule):
     """
     Handles adding a rule which contains an optional element on the RHS.
     We find the leftmost optional element on the RHS, and then generate
     two variants of the rule: one in which that element is required, and
     one in which it is removed.  We add these variants in place of the
     original rule.  (If there are more optional elements further to the
     right, we'll wind up recursing.)
     For example, if the original rule is:
         Rule('$Z', '$A ?$B ?$C $D')
     then we add these rules instead:
         Rule('$Z', '$A $B ?$C $D')
         Rule('$Z', '$A ?$C $D')
     """
     # Find index of the first optional element on the RHS.
     first = next((idx for idx, elt in enumerate(rule.rhs) if is_optional(elt)), -1)
     assert first >= 0
     assert len(rule.rhs) > 1, 'Entire RHS is optional: %s' % rule
     prefix = rule.rhs[:first]
     suffix = rule.rhs[(first + 1):]
     # First variant: the first optional element gets deoptionalized.
     deoptionalized = (rule.rhs[first][1:],)
     self.add_rule(Rule(rule.lhs, prefix + deoptionalized + suffix, rule.sem))
     # Second variant: the first optional element gets removed.
     # If the semantics is a value, just keep it as is.
     sem = rule.sem
     # But if it's a function, we need to supply a dummy argument for the removed element.
     if isinstance(rule.sem, FunctionType):
         sem = lambda sems: rule.sem(sems[:first] + [None] + sems[first:])
     self.add_rule(Rule(rule.lhs, prefix + suffix, sem))
Exemple #3
0
 def apply_annotators(self, chart, tokens, i, j):
     """Add parses to chart cell (i, j) by applying annotators."""
     if self.annotators:
         words = [t['word'] for t in tokens]
         for annotator in self.annotators:
             for category, semantics in annotator.annotate(tokens[i:j]):
                 rule = Rule(category, tuple(words[i:j]), semantics)
                 chart[(i, j)].append(Parse(rule, words[i:j]))
Exemple #4
0
 def apply_aliases(self, chart, words, i, j):
     """Add parses to chart cell (i, j) by applying user lists."""
     if self.aliases:
         key = ' '.join(words[i:j])
         if key in self.aliases:
             lhs = '$UserList'
             rhs = tuple(key.split())
             semantics = ('.alias', ('.string', key))
             rule = Rule(lhs, rhs, semantics)
             chart[(i, j)].append(Parse(rule, words[i:j]))
Exemple #5
0
    def __init__(self, bases, entity_names=[], aliases={},
        beam_width=10, top_k=-1, start_symbol='$ROOT'):

        # Extract from bases
        bases = bases if isinstance(bases, list) else [bases]
        rules = []
        self.ops = {}
        self.helpers = {}
        self.annotators = []
        self.translate_ops = {}
        for base in bases:
            rules += base.rules
            self.ops.update(base.ops)
            self.helpers.update(base.helpers)
            self.annotators += base.annotators
            self.translate_ops.update(base.translate_ops)

        # Add aliases and candidate-specific rules
        self.aliases = aliases
        for i, arg in enumerate(entity_names):
            rules.append(Rule('$ArgX', arg, ('.arg', ('.int', i + 1))))

        # Set parameters
        self.beam_width = beam_width
        self.top_k = top_k

        # Initialize
        self.categories = set()
        self.lexical_rules = defaultdict(list)
        self.unary_rules = defaultdict(list)
        self.binary_rules = defaultdict(list)
        self.start_symbol = start_symbol
        self.parser = Spacy()
        for rule in rules:
            self.add_rule(rule)
        print('Grammar construction complete.')