def add_n_ary_rule(self, rule): """ Handles adding a rule with three or more non-terminals on the RHS. We introduce a new category which covers all elements on the RHS except the first, and then generate two variants of the rule: one which consumes those elements to produce the new category, and another which combines the new category which the first element to produce the original LHS category. We add these variants in place of the original rule. (If the new rules still contain more than two elements on the RHS, we'll wind up recursing.) For example, if the original rule is: Rule('$Z', '$A $B $C $D') then we create a new category '$Z_$A' (roughly, "$Z missing $A to the left"), and add these rules instead: Rule('$Z_$A', '$B $C $D') Rule('$Z', '$A $Z_$A') """ def add_category(base_name): assert is_cat(base_name) name = base_name while name in self.categories: name = name + '_' self.categories.add(name) return name category = add_category('%s_%s' % (rule.lhs, rule.rhs[0])) self.add_rule(Rule(category, rule.rhs[1:], lambda sems: sems)) self.add_rule(Rule(rule.lhs, (rule.rhs[0], category), lambda sems: rule.apply_semantics([sems[0]] + sems[1])))
def add_rule_containing_optional(self, rule): """ Handles adding a rule which contains an optional element on the RHS. We find the leftmost optional element on the RHS, and then generate two variants of the rule: one in which that element is required, and one in which it is removed. We add these variants in place of the original rule. (If there are more optional elements further to the right, we'll wind up recursing.) For example, if the original rule is: Rule('$Z', '$A ?$B ?$C $D') then we add these rules instead: Rule('$Z', '$A $B ?$C $D') Rule('$Z', '$A ?$C $D') """ # Find index of the first optional element on the RHS. first = next((idx for idx, elt in enumerate(rule.rhs) if is_optional(elt)), -1) assert first >= 0 assert len(rule.rhs) > 1, 'Entire RHS is optional: %s' % rule prefix = rule.rhs[:first] suffix = rule.rhs[(first + 1):] # First variant: the first optional element gets deoptionalized. deoptionalized = (rule.rhs[first][1:],) self.add_rule(Rule(rule.lhs, prefix + deoptionalized + suffix, rule.sem)) # Second variant: the first optional element gets removed. # If the semantics is a value, just keep it as is. sem = rule.sem # But if it's a function, we need to supply a dummy argument for the removed element. if isinstance(rule.sem, FunctionType): sem = lambda sems: rule.sem(sems[:first] + [None] + sems[first:]) self.add_rule(Rule(rule.lhs, prefix + suffix, sem))
def apply_annotators(self, chart, tokens, i, j): """Add parses to chart cell (i, j) by applying annotators.""" if self.annotators: words = [t['word'] for t in tokens] for annotator in self.annotators: for category, semantics in annotator.annotate(tokens[i:j]): rule = Rule(category, tuple(words[i:j]), semantics) chart[(i, j)].append(Parse(rule, words[i:j]))
def apply_aliases(self, chart, words, i, j): """Add parses to chart cell (i, j) by applying user lists.""" if self.aliases: key = ' '.join(words[i:j]) if key in self.aliases: lhs = '$UserList' rhs = tuple(key.split()) semantics = ('.alias', ('.string', key)) rule = Rule(lhs, rhs, semantics) chart[(i, j)].append(Parse(rule, words[i:j]))
def __init__(self, bases, entity_names=[], aliases={}, beam_width=10, top_k=-1, start_symbol='$ROOT'): # Extract from bases bases = bases if isinstance(bases, list) else [bases] rules = [] self.ops = {} self.helpers = {} self.annotators = [] self.translate_ops = {} for base in bases: rules += base.rules self.ops.update(base.ops) self.helpers.update(base.helpers) self.annotators += base.annotators self.translate_ops.update(base.translate_ops) # Add aliases and candidate-specific rules self.aliases = aliases for i, arg in enumerate(entity_names): rules.append(Rule('$ArgX', arg, ('.arg', ('.int', i + 1)))) # Set parameters self.beam_width = beam_width self.top_k = top_k # Initialize self.categories = set() self.lexical_rules = defaultdict(list) self.unary_rules = defaultdict(list) self.binary_rules = defaultdict(list) self.start_symbol = start_symbol self.parser = Spacy() for rule in rules: self.add_rule(rule) print('Grammar construction complete.')