def induce_entity_grammar(self, start_grammar): """Induce an entity-swapping grammar. Get the entities from the original dataset. Get the places to put holes from start_grammar. """ new_grammar = Grammar() # Entity rules for x, y in self.dataset: alignments = self.domain.get_entity_alignments(x, y) for cat, x_span, y_span in alignments: x_str = x[x_span[0]:x_span[1]] y_str = y[y_span[0]:y_span[1]] new_grammar.add_rule(cat, x_str, y_str) # Root/template rules for cat, x_str, y_str in start_grammar.rule_list: # Anchor on single mention in x--allow one-to-many x-to-y mapping alignments = self.domain.get_entity_alignments(x_str, y_str) x_swaps = list(set( [(x_span, '%s_%d' % (inner_cat, x_span[0])) for i, (inner_cat, x_span, y_span) in enumerate(alignments)])) x_new = self.splice(x_str, x_swaps) y_swaps = [(y_span, '%s_%d' % (inner_cat, x_span[0])) for i, (inner_cat, x_span, y_span) in enumerate(alignments)] y_new = self.splice(y_str, y_swaps) new_grammar.add_rule(cat, x_new, y_new) # new_grammar.print_self() return new_grammar
def induce_nesting_grammar(self, start_grammar): """Induce an entity-swapping grammar. Get everything from the start_grammar. """ new_grammar = Grammar() for cat, x_str, y_str in start_grammar.rule_list: alignments, productions = self.domain.get_nesting_alignments( x_str, y_str) # Replacements for cat_p, x_p, y_p in productions: new_grammar.add_rule(cat_p, x_p, y_p) # Templates x_swaps = list( set([(x_span, '%s_%d' % (inner_cat, x_span[0])) for i, (inner_cat, x_span, y_span) in enumerate(alignments)])) x_new = self.splice(x_str, x_swaps) y_swaps = [(y_span, '%s_%d' % (inner_cat, x_span[0])) for i, (inner_cat, x_span, y_span) in enumerate(alignments)] y_new = self.splice(y_str, y_swaps) new_grammar.add_rule(cat, x_new, y_new) new_grammar.print_self() return new_grammar
def new_c(): ret = Grammar() ret.add_nonterminals({k for k in rules.keys()}) for right in rules.values(): for tokens in right: ret.add_terminals({tok for tok in tokens if tok and (callable(tok) or (isinstance(tok, str) and tok[0] != "$"))}) for k, v in rules.items(): ret.add_rule(k, v) return ret
def separate_prefixes(g: grammar.Grammar, layer: grammar.NonTerminal, prefix: grammar.Derivation, root: PrefixNode, common_depth: int, nterm_sequence: Iterator): """ Separate written into tree derivations by common prefixes. Uses recursion, maximal depth of it can be as big as depth of tree plus 1. :param g: Grammar, to which derivations will be recorded. :param layer: non-terminal symbol to which the derivation belong. :param prefix: common prefix. :param root: prefix tree. :param common_depth: depth of common prefix. :param nterm_sequence: sequence of new non-terminals. :return: none. """ # Root in None means that it's leaf. if root is None: g.add_rule(layer, prefix) return # Common depth can be only in beginning. if common_depth == -1: common_depth = 1 else: if len(root) == 1: common_depth += 1 else: common_depth = 0 if common_depth >= 1: new_layer = layer else: # If there is fork, we have to write # production of form # Layer --> prefixNewLayer # where NewLayer non-terminal # will keep symbols of the fork. new_layer = next(nterm_sequence) g.add_rule(layer, prefix + (new_layer, )) for symb, next_node in root.items(): # Handling case of the EmptyWord. if type(symb) == tuple: t_symb = symb else: t_symb = (symb, ) # Prefix assembling. if common_depth >= 1: new_prefix = prefix + t_symb else: new_prefix = t_symb separate_prefixes(g, new_layer, new_prefix, next_node, common_depth, nterm_sequence)
def make_grammar(self): grammar = Grammar() r1 = Rule( Symbol("NP", {"AGR": "?a"}), [ Symbol("ART", {"AGR": "?a"}), Symbol("N", {"AGR": "?a"})]) r1.set_variable_code("?a", -1L) # -1L should be default for any undefined variable # that is referenced while constructing grammar.add_rule(r1) return grammar
def rulelist(self, start, end, item, tail): # Helper function def process_elements(grammar, rulename, elements): new_elements = [] for element in elements: element_type = type(element) if element_type is tuple: min, max = element[0], element[1] rest = process_elements(grammar, rulename, element[2:]) new_elements.extend(min * rest) if max is None: new_elements.append((None,) + rest) else: max = max - min if max > 0: new_elements.append((max,) + rest) elif element_type is Alternation: aux = grammar.get_internal_rulename(rulename) for alt in element: grammar.add_rule(aux, *alt) new_elements.append(aux) else: new_elements.append(element) if type(elements) is tuple: return tuple(new_elements) return new_elements # Go grammar = Grammar() rulename, alternation = item for elements in alternation: elements = process_elements(grammar, rulename, elements) grammar.add_rule(rulename, *elements) while tail: item, tail = tail if item is None: continue rulename, alternation = item for elements in alternation: elements = process_elements(grammar, rulename, elements) grammar.add_rule(rulename, *elements) return grammar
def rulelist(self, start, end, item, tail): # Helper function def process_elements(grammar, rulename, elements): new_elements = [] for element in elements: element_type = type(element) if element_type is tuple: min, max = element[0], element[1] rest = process_elements(grammar, rulename, element[2:]) new_elements.extend(min * rest) if max is None: new_elements.append((None, ) + rest) else: max = max - min if max > 0: new_elements.append((max, ) + rest) elif element_type is Alternation: aux = grammar.get_internal_rulename(rulename) for alt in element: grammar.add_rule(aux, *alt) new_elements.append(aux) else: new_elements.append(element) if type(elements) is tuple: return tuple(new_elements) return new_elements # Go grammar = Grammar() rulename, alternation = item for elements in alternation: elements = process_elements(grammar, rulename, elements) grammar.add_rule(rulename, *elements) while tail: item, tail = tail if item is None: continue rulename, alternation = item for elements in alternation: elements = process_elements(grammar, rulename, elements) grammar.add_rule(rulename, *elements) return grammar
def induce_concat_grammar(self, start_grammar, concat_num): new_grammar = Grammar() for cat, x_str, y_str in start_grammar.rule_list: if cat == start_grammar.ROOT: new_grammar.add_rule('$sentence', x_str, y_str) else: new_grammar.add_rule(cat, x_str, y_str) root_str = (' %s ' % Vocabulary.END_OF_SENTENCE).join( '$sentence_%d' % i for i in range(concat_num)) new_grammar.add_rule(new_grammar.ROOT, root_str, root_str) #new_grammar.print_self() return new_grammar
def induce_concat_grammar(self, start_grammar, concat_num): new_grammar = Grammar() for cat, x_str, y_str in start_grammar.rule_list: if cat == start_grammar.ROOT: # print("This is X :", x_str) # print("This is Y :", y_str) new_grammar.add_rule('$sentence', x_str, y_str) else: new_grammar.add_rule(cat, x_str, y_str) # print("This is the grammar :", new_grammar.rule_list) root_str = (' %s ' % '[SEP]').join('$sentence_%d' % i for i in range(concat_num)) # TODO here if issue # print("This is the root_str :", root_str) new_grammar.add_rule(new_grammar.ROOT, root_str, root_str) # new_grammar.print_self() return new_grammar
print(response_str) print() exit() # logf = open('log.txt', "w", encoding='utf8') word_list = req_dict['word_list'] m_list = [Monomial(d) for d in word_list] sys.stderr.write(str(m_list) + '\n') grammar = Grammar() rule_list = req_dict['grammar'] for rule in rule_list: rule_str = ' '.join(rule[:2]) + ' ' + ' , '.join(rule[2:]) # logf.write(str(rule_str) + '\n\n') try: grammar.add_rule(rule_from_string(rule_str)) response_str = 'OK' except Exception as ex: response_str = str(ex) cyk = CYK_Parser(m_list, grammar) cyk.parse() dict_list, parse_table = cyk.table_to_plain_data() print('Content-type:text/html') print() # blank line, end of headers print(json.dumps({'dict_list': dict_list, 'parse_table': parse_table})) # logf.close()
def induce(parsed_sents): def get_rule_from_node(node): if not node.is_leaf(): child_tags = [] for child in node.children: if child.is_leaf(): leafs[node.tag].add(child.text) else: child_tags.append(child.tag) if len(child_tags): key = slugify(child_tags) nodes[node.tag].add(key) g = Grammar() for parsed_sent in parsed_sents: root = Node() current = root for char in str(parsed_sent): if char == '(': child = Node() child.parent = current current.children.append(child) current = child elif char == ')': if isinstance(current, Leaf): current = current.parent current = current.parent elif re.match(r'\s', char): current.tag_parsed = True else: if isinstance(current, Leaf): current.text += char elif current.tag_parsed: leaf = Leaf() leaf.parent = current leaf.text += char current.children.append(leaf) current = leaf else: current.tag += char leafs = defaultdict(set) nodes = defaultdict(set) root.children[0].descend(get_rule_from_node) for (src, tar_set) in nodes.items(): for slugged_tars in tar_set: tars = deslugify(slugged_tars) g.add_rule(src, tars, False) for (src, tars) in leafs.items(): g.add_rule(src, tars, True) g.dedup() return g
from grammar import Grammar from rule_builder import rule_from_string grammar = Grammar() grammar.add_rule(rule_from_string('S -> subj:NP , verb:VP')) print(grammar)
}, "$NP": { ("$A", "$NP"): Fraction(3, 10), ("$N",): Fraction(7, 10), }, "$VP": { ("$V", "$NP"): Fraction(3, 4), ("$V",): Fraction(1, 4), }, "$N": { ('ideas',): .5, ('linguists',): .5, }, "$V": { ("hate",): .5, ("generate",): .5, }, "$A": { ("great",): .5, ("green",): .5, } } english = Grammar() english.add_terminals({'generate', 'hate', 'great', 'green', 'ideas', 'linguists'}) english.add_nonterminals({"$S", '$NP', '$VP', '$N', '$V', '$A'}) for k, v in rules.items(): english.add_rule(k, v) print(english.expand("$S"))