def test_parsing(self): # Ignoring semantics for now... numeral_rules = [ Rule('$E', 'one'), Rule('$E', 'two'), Rule('$E', 'three'), Rule('$E', 'four'), ] operator_rules = [ Rule('$UnOp', 'minus'), Rule('$BinOp', 'plus'), Rule('$BinOp', 'minus'), Rule('$BinOp', 'times'), ] compositional_rules = [ Rule('$E', '$UnOp $E'), Rule('$EBO', '$E $BinOp'), Rule('$E', '$EBO $E') ] arithmetic_rules = numeral_rules + operator_rules + compositional_rules arithmetic_grammar = Grammar(arithmetic_rules) for example in self.one_parse_examples: self.assertEqual(1, len(arithmetic_grammar.parse(example.input)), example) # print(arithmetic_grammar.parse(example.input)[0]) for example in self.two_parse_examples: self.assertEqual(2, len(arithmetic_grammar.parse(example.input)), example)
def test_operator_precedence_features(self): """ See if a count of operator precedence patterns is a good feature for ranking parses. """ arithmetic_grammar = Grammar(self.arithmetic_rules) parses = arithmetic_grammar.parse("two times two plus three") self.assertEqual(2, len(parses)) # Look at Parse.operator_precedence_features(). It generates different # results for the two parses parse0_features = parses[0].operator_precedence_features() parse1_features = parses[1].operator_precedence_features() # In the first parse, + precedes * once self.assertEqual(parse0_features, {('+', '*'): 1.0}) # In the second parse, * precedes + once self.assertEqual(parse1_features, {('*', '+'): 1.0}) # Look at Parse.score() parse0_score = parses[0].score(Parse.operator_precedence_features, self.weights) parse1_score = parses[1].score(Parse.operator_precedence_features, self.weights) # Parse.operator_precedence_features() is good at distinguishing parses self.assertEqual(-1.0, parse0_score) self.assertEqual(1.0, parse1_score)
def delete_nonderivable_nonterminals(grammar): new_grammar = Grammar() new_grammar.axiom = grammar.axiom new_grammar.terminals = grammar.terminals unwatched = list([new_grammar.axiom]) watched = set() while unwatched: nonterminal = unwatched[0] unwatched = unwatched.remove(nonterminal) or [] watched.add(nonterminal) rules = find_rules_for_nonterminal(grammar.rules, nonterminal) for rule in rules: for symbol in rule.right_side: if isinstance(symbol, Nonterminal): if symbol not in watched and symbol not in unwatched: unwatched.append(symbol) new_grammar.nonterminals = watched new_rules = [] for rule in grammar.rules: if rule.left_side[0] in watched: new_rules.append(rule) new_grammar.rules = new_rules return new_grammar
def lookahead(prod: Sequence[str], dotpos: int, grammar: Grammar) -> Set[str]: if dotpos >= (len(prod) - 1): return {"$$"} epsilons = grammar.epsilon_nonterms() nonterms = grammar.nonterms() ret = set() nt_passed = set() stk = deque() stk.append(prod[dotpos + 1:]) while len(stk) > 0: cur = stk.popleft() hit = False for token in cur: if token in nonterms: if token in nt_passed: if token in epsilons: continue else: hit = True break else: nt_passed.add(token) for prod in grammar[token]: stk.append(prod) else: ret.add(token) if token not in epsilons: hit = True break if not hit: ret.add("$$") return ret
class ShellParser(object): services = None def __init__(self): self.tokens = None def __call__(self, command_string): self.parse(command_string) def buildGrammar(self): self.grammar = Grammar() self.grammar.parser = self self.grammar.makeBNF() return self.grammar def parse(self, cmd): tokens = grammar.parseString(cmd) out = """ command: %s service type: %s action: %s """ % (tokens.commandtype, tokens.servicetype, tokens.action) self.tokens = tokens return out
def get_energy(self, simulation_case): case_name = simulation_case.case_name configuration.configurations_dict["case_name"] = case_name if isinstance(simulation_case.hmm_dict, HMM): hmm = simulation_case.hmm_dict else: hmm = HMM(simulation_case.hmm_dict) if isinstance(simulation_case.flat_rule_set_list, RuleSet): rule_set = simulation_case.flat_rule_set_list else: rule_set_list = [] for flat_rule in simulation_case.flat_rule_set_list: rule_set_list.append(Rule(*flat_rule)) rule_set = RuleSet(rule_set_list) grammar = Grammar(hmm, rule_set) self.write_to_dot_to_file(hmm, "hmm_" + case_name) self.write_to_dot_to_file(grammar.get_nfa(), "grammar_nfa_" + case_name) hypothesis = Hypothesis(grammar, self.data) energy = hypothesis.get_energy() if self.target_energy: print("{}: {} distance from target: {}".format( case_name, hypothesis.get_recent_energy_signature(), energy - self.target_energy)) else: print("{}: {}".format(case_name, hypothesis.get_recent_energy_signature())) return energy
def test_parser_kleene(self): hmm = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2', FINAL_STATE], ['at', 'attstktttt', 'st']), 'q2': ([FINAL_STATE], ['o']) }) hmm_transducer = hmm.get_transducer() self.write_to_dot_to_file(hmm_transducer, "test_hmm_transducer_kleene") assimilation_rule_with_kleene = Rule([{ "cons": "-" }], [{ "low": "+" }], [{ "cons": "-" }, { "cons": "+", "kleene": True }], [], obligatory=True) rule_set_with_kleene = RuleSet([assimilation_rule_with_kleene]) grammar = Grammar(hmm, rule_set_with_kleene) nfa = grammar.get_nfa() self.write_to_dot_to_file(nfa, "test_parser_nfa_kleene")
def test_sequitur(self): """docstring for test_sequitur""" g = Grammar() g.train_string("Hello, world!") self.assertEqual("0 --(0)--> H e l l o , _ w o r l d ! \n", g.print_grammar())
def generate_samples(grammar_dir, outfiles): """Generates a set of samples and writes them to the output files. Args: grammar_dir: directory to load grammar files from. outfiles: A list of output filenames. """ f = open(os.path.join(grammar_dir, 'ox_template.html')) template = f.read() f.close() jsgrammar = Grammar() err = jsgrammar.parse_from_file(os.path.join(grammar_dir, 'oxjs.txt')) if err > 0: print('There were errors parsing grammar') return for outfile in outfiles: result = generate_new_sample(template, jsgrammar) if result is not None: print('Writing a sample to ' + outfile) try: f = open(outfile, 'w') f.write(result) f.close() except IOError: print('Error writing to output')
def get_energy(self, hmm, rule_set_list, case_name): grammar = Grammar(hmm, RuleSet(rule_set_list)) self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa") hypothesis = Hypothesis(grammar, self.data) energy = hypothesis.get_energy() print("{}: {}".format(case_name, hypothesis.get_recent_energy_signature())) return energy
class UI: def __init__(self): self.__grammar = Grammar() @staticmethod def print_menu(): print('Options are:') print(' 0 - Exit') print(' 1 - See non-terminals') print(' 2 - See terminals') print(' 3 - See productions') print(' 4 - See start') def run(self): while True: UI.print_menu() option = input('Enter option: ') if option == '0': break elif option == '1': print(self.__grammar.get_non_terminals_string()) elif option == '2': print(self.__grammar.get_terminals_string()) elif option == '3': print(self.__grammar.get_productions_string()) elif option == '4': print(self.__grammar.start) else: print('Incorrect option')
def test_morphology_only(self): self.initialise_segment_table("plural_english_segment_table.txt") data = [u'tozat', u'tozgoat', u'tozgo', u'tozdoat', u'tozdo', u'tozzoat', u'tozzo', u'toz', u'dagat', u'daggoat', u'daggo', u'dagdoat', u'dagdo', u'dagzoat', u'dagzo', u'dag', u'gasat', u'gasgoat', u'gasgo', u'gasdoat', u'gasdo', u'gaszoat', u'gaszo', u'gas', u'kodat', u'kodgoat', u'kodgo', u'koddoat', u'koddo', u'kodzoat', u'kodzo', u'kod', u'katat', u'katgoat', u'katgo', u'katdoat', u'katdo', u'katzoat', u'katzo', u'kat', u'dotat', u'dotgoat', u'dotgo', u'dotdoat', u'dotdo', u'dotzoat', u'dotzo', u'dot'] #target hmm = {'q0': ['q1'], 'q1': (['q2', 'q3', 'qf'], ['dag', 'kat', 'dot', 'kod', 'gas', 'toz']), 'q2': (['q3','qf'], ['zo', 'go', 'do']), 'q3': (['qf'], ['at'])} self.configurations.simulation_data = data self.assertLess(Hypothesis(Grammar(hmm, [])).get_energy(), 5190) #single_sate hmm = HMM({'q0': ['q1'], 'q1': (['q1', 'qf'], ['dag', 'kat', 'dot', 'kod', 'gas', 'toz'] + ['zo', 'go', 'do'] + ['at']) }) self.assertLess(Hypothesis(Grammar(hmm, [])).get_energy(), 6430) #two state hmm = {'q0': ['q1'], 'q1': (['q1', 'q2', 'qf'], ['dag', 'kat', 'dot', 'kod', 'gas', 'toz'] + ['zo', 'go', 'do']), 'q2': (['qf'], ['at']) } self.assertLess(Hypothesis(Grammar(hmm, [])).get_energy(), 6010) #from simualation hmm = HMM({'q0': ['q1'], 'q1': (['q1', 'qf'], ['toz', 'do', 'zo', 'gas', 'kod', 'dag', 'at', 'zoat', 'kat', 'go', 'dot']) })
def setUp(self): rules = [ 'S -> VP | NP VP', 'VP -> V', 'NP -> Det N | N', 'V -> walk | fly | book', 'N -> I | you | cows | book', 'Det -> the', 'Det -> a' ] self.grammar = Grammar(rules)
def teardown(self, g: grammar.Grammar): """We have introduced new nodes, so we need to recalculate min tokens. FIXME: Increasingly min tokens looks like it shouldn't be part of initial grammar creation. """ g._calc_min_tokens()
def generate_samples(grammar_dir, outfiles): """Generates a set of samples and writes them to the output files. Args: grammar_dir: directory to load grammar files from. outfiles: A list of output filenames. """ f = open(os.path.join(grammar_dir, 'template.html')) template = f.read() f.close() jsgrammar = Grammar() err = jsgrammar.parse_from_file(os.path.join(grammar_dir, 'jscript.txt')) if err > 0: print('There were errors parsing grammar') return for outfile in outfiles: result = GenerateNewSample(template, jsgrammar) if result is not None: print('Writing a sample to ' + outfile) try: f = open(outfile, 'w') f.write(result) f.close() except IOError: print('Error writing to output')
def construct_table(grammar: Grammar, states: list, algo_suit): final_item = LR0Item(grammar.get_start_prodctions()[0], 1) table = list() # table[src_state][sym] = set(LRAction) for state in states: actions = defaultdict(set) table.append(actions) for sym, edge in state.edges.items(): if sym == '': continue if grammar.is_terminal(sym): # Terminal, shift actions[sym].add(LRAction.new_shift(edge.dst_state)) else: # Nonterminal, goto actions[sym].add(LRAction.new_goto(edge.dst_state)) if '' in state.edges: edge = state.edges[''] current_item = tuple(edge.src_items)[0] if final_item.prod == current_item.prod and \ final_item.pos == current_item.pos: # Accept actions['$'].add(LRAction.new_accept()) else: # Reduce algo_suit.build_reduce(actions, edge) return table
def generate(input_str): ''' Parses an input string and returns another one containing the generated program skeleton. ''' HEADER, L, ENDCODE = parser.parse(input_str) result = 'from skel import Grammar\n' if HEADER is not None: result += HEADER + '\n' result = result + """ def generate(self): """ result = '' grammar = Grammar(Parser.START_SYMBOL) if L: for T in L: grammar.addRule(T) result += grammar.generate(Parser.START_SYMBOL) if ENDCODE is not None: result += ENDCODE + '\n' return result
def buildgrammar(self): g = Grammar() g.nonterminals = self.states g.terminals = self.symbols g.startsymbol = str(self.initialstate) tf = self.transitions if len(tf) > 0: for t in tf: if len(t) == 3: g.add_production(t[0], t[1] + t[2]) if t[2] in self.finalstates: g.add_production(t[0], t[1] + '') if g.starsymbol in self.finalstates: g.add_production(g.starsymbol, 'e') self.grammar = g print 'Nonterminals: ', self.grammar.nonterminals print 'Terminals: ', self.grammar.terminals print 'Start symbol: ', self.grammar.startsymbol print 'Productions: ', self.grammar.productions return
def loss(self, rec_input, program, request): variables, productions = self._run(rec_input) g = Grammar( variables, [(productions[k].view(1), t, prog) for k, (_, t, prog) in enumerate(self.grammar.productions)]) return -g.logLikelihood(request, program)
def __init__(self): self.context = Context() self.grammar = Grammar([ SyntaxRule('S', 'VP', 'NP', 0.125), SyntaxRule('S', 'VP', 'Noun', 0.125), SyntaxRule('S', 'Verb', 'NP', 0.125), SyntaxRule('S', 'Verb', 'Noun', 0.125), SyntaxRule('S', 'NP', 'VP', 0.125), SyntaxRule('S', 'Noun', 'VP', 0.125), SyntaxRule('S', 'Pronoun', 'VP', 0.125), SyntaxRule('S', 'S', 'ConjClause', 0.125), SyntaxRule('ConjClause', 'Conj', 'S', 1), SyntaxRule('VP', 'Verb', 'Pronoun', 0.2), SyntaxRule('VP', 'Verb', 'PP', 0.2), SyntaxRule('VP', 'VP', 'PP', 0.2), SyntaxRule('VP', 'Adverb', 'Verb', 0.2), SyntaxRule('VP', 'Adverb', 'VP', 0.2), SyntaxRule('NP', 'NP', 'PP', 1 / 3), SyntaxRule('NP', 'Noun', 'PP', 1 / 3), SyntaxRule('NP', 'Article', 'Noun', 1 / 3), SyntaxRule('PP', 'Preposition', 'NP', 1 / 3), SyntaxRule('PP', 'Preposition', 'Noun', 1 / 3), SyntaxRule('PP', 'Preposition', 'Pronoun', 1 / 3), ], [ LexicalRule('Preposition', 'to', 0.2), LexicalRule('Preposition', 'inside', 0.2), LexicalRule('Preposition', 'in', 0.2), LexicalRule('Preposition', 'from', 0.2), LexicalRule('Preposition', 'of', 0.2), LexicalRule('Article', 'the', 1), LexicalRule('Noun', 'contents', 0.25), LexicalRule('Noun', 'everything', 0.25), LexicalRule('Noun', CommandInputToken.placeholder(), 0.25), LexicalRule('Noun', 'there', 0.25), LexicalRule('Pronoun', 'me', 0.5), LexicalRule('Pronoun', 'what', 0.5), LexicalRule('Adverb', 'recursively', 1), LexicalRule('Verb', 'run', 1 / 17), LexicalRule('Verb', 'execute', 1 / 17), LexicalRule('Verb', 'do', 1 / 17), LexicalRule('Verb', 'show', 1 / 17), LexicalRule('Verb', 'list', 1 / 17), LexicalRule('Verb', 'tell', 1 / 17), LexicalRule('Verb', 'move', 1 / 17), LexicalRule('Verb', 'rename', 1 / 17), LexicalRule('Verb', 'place', 1 / 17), LexicalRule('Verb', 'copy', 1 / 17), LexicalRule('Verb', 'duplicate', 1 / 17), LexicalRule('Verb', 'delete', 1 / 17), LexicalRule('Verb', 'remove', 1 / 17), LexicalRule('Verb', 'is', 1 / 17), LexicalRule('Verb', 'put', 1 / 17), LexicalRule('Verb', 'display', 1 / 17), LexicalRule('Verb', 'find', 1 / 17), LexicalRule('Conj', 'and', 1 / 3), LexicalRule('Conj', 'then', 1 / 3), LexicalRule('Conj', ConjunctorToken(), 1 / 3), ])
def __init__(self, canvas, namespace=None): Grammar.__init__(self, canvas, namespace) self._autoclosepath = True self._path = None self._canvas.size = None self._frame = 1 self._set_initial_defaults() ### TODO Look at these
def __init__(self, canvas, namespace = None): Grammar.__init__(self, canvas, namespace) self._autoclosepath = True self._path = None self._canvas.size = None self._frame = 1 self._set_initial_defaults() ### TODO Look at these
def test_parser2(self): hmm = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2', FINAL_STATE], ['dog', 'kat']), 'q2': ([FINAL_STATE], ['z']) }) grammar = Grammar(hmm, self.plural_english_rule_set) nfa = grammar.get_nfa()
def __init__(self, grammar: Grammar, resolver: Callable[[Item, Item], Item]): self.__grammar = grammar old_start = grammar.start() new_start = old_start + "'" new_start_rule = new_start + " -> " + old_start g = Grammar([new_start_rule] + str(grammar).split("\n")) start_item = Item(new_start, (old_start,), {"$"}, 0) self.__sets = ItemSet.generate(start_item.closure(g), g, resolver)
def test_terminal(self): x = Grammar([ "S -> A B", "A -> c | d", "B -> ef | g S", ]) self.assertRaises(CFGException, lambda: x.lex(["d A"]))
def test_sequitur_base(self): """docstring for test_sequitur_base""" g = Grammar() g.train_string("abcabdabcabd") self.assertEqual( "0 --(0)--> 1 1 \n1 --(2)--> 2 c 2 d abcabd\n2 --(2)--> a b ab\n", g.print_grammar())
def test_get_calc(): grammar = Grammar() try: grammar.get_calc('¬') assert False except: assert True
def __init__(self): """ Load the shared BIN grammar if not already there, then initialize the Parser parent class """ g = BIN_Parser._grammar if g is None: g = Grammar() g.read("Reynir.grammar") BIN_Parser._grammar = g Parser.__init__(self, g)
def test_crossover(self): self.initialise_segment_table("dag_zook_segments_new.txt") rule_set_1 = RuleSet([ Rule(*[[{ "cons": "+" }], [{ "voice": "-" }], [{ "low": "+" }], [{ "cont": "-" }], True]) ]) rule_set_2 = RuleSet([ Rule(*[[{ "cons": "+" }], [{ "low": "-" }], [{ "voice": "-" }], [], False]) ]) plural_english_data = 1 * ['kats', 'dogz', 'kat', 'dog'] hmm_1 = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2', FINAL_STATE], ['dag', 'kot']), 'q2': ([FINAL_STATE], ['z']) }) hmm_2 = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2'], ['dog', 'kat']), 'q2': (['q3'], ['s']), 'q3': ([FINAL_STATE], ['z']) }) grammar_1 = Grammar(hmm_1, rule_set_1) grammar_2 = Grammar(hmm_2, rule_set_2) hypothesis_1 = Hypothesis(grammar_1, plural_english_data) hypothesis_2 = Hypothesis(grammar_2, plural_english_data) offspring_1, offspring_2 = GeneticAlgorithm.crossover( hypothesis_1, hypothesis_2) print("*** Parents:\n") GeneticAlgorithm.log_hypothesis(hypothesis_1) GeneticAlgorithm.log_hypothesis(hypothesis_2) print("\n\n*** Offspring:\n") GeneticAlgorithm.log_hypothesis(offspring_1) GeneticAlgorithm.log_hypothesis(offspring_2) offspring_3, offspring_4 = GeneticAlgorithm.crossover( offspring_1, offspring_2) print("\n\n*** 2nd gen offspring:\n") GeneticAlgorithm.log_hypothesis(offspring_3) GeneticAlgorithm.log_hypothesis(offspring_4)
def inicia(self): case = 0 lex = Lex() # # 1 ER PALAVRAS RESERVADAS a1 = lex.lexer('reservado', case) dict = a1.getDictAutomato() case += len(dict) # # 2 ER IDENTIFICADORES a2 = lex.lexer('identificadores', case) # # 3 GRAMATICA DE SESPECIAL terminais = ['+', '-', '=', '/', '*', '>', '<', '!'] nTerminais = ['S'] producoes = {'S': ['+', '-', '=', '/', '*', '>', '<', '!']} inicial = 'S' g = Grammar(producoes,terminais, nTerminais, inicial) s, i, f = g.convertGtoAF() a3 = Automato(s, i, f) a3.determina() a3.printAtomato() print("\n") dict = a2.getDictAutomato() case += len(dict) a3 = lex.renameState(a3, case) # # 4 GRAMATICA SEPARADORES terminais2 = [':',';', ' ', '(', ')', '[', ']', ',', '\n'] nTerminais2 = ['S'] producoes2 = {'S': [':',';', ' ', '(', ')', '[', ']', ',', '\n']} inicial2 = 'S' g = Grammar(producoes2,terminais2, nTerminais2, inicial2) s2, i2, f2 = g.convertGtoAF() a4 = Automato(s2, i2, f2) a4.determina() a4.printAtomato() print("\n") dict = a3.getDictAutomato() case += len(dict) a4 = lex.renameState(a4, case) # ER CONSTANTES dict = a4.getDictAutomato() case += len(dict) a5 = lex.lexer('constantes', case) r = a5 r = a1.oU([a2, a3, a4, a5]) print ("\n") r.determina() r.printAtomato() with open('automato.pkl', 'wb') as output: pickle.dump(r, output, pickle.HIGHEST_PROTOCOL)
def separate_prefixes(g: grammar.Grammar, layer: grammar.NonTerminal, prefix: grammar.Derivation, root: PrefixNode, common_depth: int, nterm_sequence: Iterator): """ Separate written into tree derivations by common prefixes. Uses recursion, maximal depth of it can be as big as depth of tree plus 1. :param g: Grammar, to which derivations will be recorded. :param layer: non-terminal symbol to which the derivation belong. :param prefix: common prefix. :param root: prefix tree. :param common_depth: depth of common prefix. :param nterm_sequence: sequence of new non-terminals. :return: none. """ # Root in None means that it's leaf. if root is None: g.add_rule(layer, prefix) return # Common depth can be only in beginning. if common_depth == -1: common_depth = 1 else: if len(root) == 1: common_depth += 1 else: common_depth = 0 if common_depth >= 1: new_layer = layer else: # If there is fork, we have to write # production of form # Layer --> prefixNewLayer # where NewLayer non-terminal # will keep symbols of the fork. new_layer = next(nterm_sequence) g.add_rule(layer, prefix + (new_layer, )) for symb, next_node in root.items(): # Handling case of the EmptyWord. if type(symb) == tuple: t_symb = symb else: t_symb = (symb, ) # Prefix assembling. if common_depth >= 1: new_prefix = prefix + t_symb else: new_prefix = t_symb separate_prefixes(g, new_layer, new_prefix, next_node, common_depth, nterm_sequence)
def test_domato(grammar, start_symbol, tries): grammar = "grammars/" + grammar + ".txt" start = time.time() my_grammar = Grammar() my_grammar.parse_from_file(grammar) for i in range(tries): result = my_grammar.generate_symbol(start_symbol) end = time.time() runtime = end - start return runtime
def make_grammar(self): grammar = Grammar() r1 = Rule( Symbol("NP", {"AGR": "?a"}), [ Symbol("ART", {"AGR": "?a"}), Symbol("N", {"AGR": "?a"})]) r1.set_variable_code("?a", -1L) # -1L should be default for any undefined variable # that is referenced while constructing grammar.add_rule(r1) return grammar
def __init__(self, transcript, probs, state2idx={}, transition=0.5): """Viterbi decoding for given likelihoods and transcript. Note: Leave state2idx empty if state value == corresponding index in probs Args: transcript (list): states of the transcript probs (np.ndarray): (n_frames, n_states) grid of minus loglikelihoods for each frame for each state state2idx (dict): keys are states of the transcript, values are indexes in the loglikelihood table (probs) for the corresponding states. transition: default=0.5, means that transition to the next state and decision to stay at the current one is the same, in this case decoding is based just on frames probabilities. Examples: >>> probs = [[-1, -2, -3], >>> [-2, -1, -3], >>> [-4, -2, -1], >>> [-1, -4, -2]] >>> transcript = [1, 0, 2] >>> v = Viterbi(transcript, probs) or >>> transcript = [10, 3, 5] >>> state2idx = {10: 1, 3: 0, 5: 2} >>> v = Viterbi(transcript, probs, state2idx=state2idx) >>> alignment = v.inference() >>> print(alignment) [1, 0, 0, 2] """ self._grammar = Grammar(transcript) self._state2idx = state2idx self._transition_self = -np.log(transition) self._transition_next = -np.log(1 - transition) self._transitions = np.array([self._transition_self, self._transition_next]) self._probs = probs self._state = self._probs[0, 0] self._number_frames = self._probs.shape[0] # probabilities matrix self._T1 = np.zeros((len(self._grammar), self._number_frames)) + np.inf self._T1[0, 0] = self._state # argmax matrix self._T2 = np.zeros((len(self._grammar), self._number_frames)) + np.inf self._T2[0, 0] = 0 self._frame_idx = 1
def test_morpheme_boundary(self): self.configurations["MORPHEME_BOUNDARY_FLAG"] = True self.initialise_segment_table("plural_english_segment_table.txt") hmm = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2', FINAL_STATE], ['dog', 'kat']), 'q2': ([FINAL_STATE], ['z']) }) grammar = Grammar(hmm) self.assertCountEqual(['dog', 'kat', 'dogz', 'katz'], grammar.get_all_outputs())
def test_plural_english_grammar(self): self.initialise_segment_table("plural_english_segment_table.txt") rule_set = self.get_rule_set("plural_english_rule_set.json") hmm = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2', FINAL_STATE], ['dog', 'kat']), 'q2': ([FINAL_STATE], ['z']) }) grammar = Grammar(hmm, rule_set) grammar_transducer = grammar.get_transducer()
def test_grammar_productions(self): grammar = Grammar("grammars/grammar1.json") # Check start variable productions. rules = grammar.produces("S") self.assertEqual(rules, ["aAb"]) rules = grammar.produces("A") self.assertEqual(rules, ["aAb", "#"]) # Check nonexistent variable productions. rules = grammar.produces("N") self.assertFalse(rules)
def test2(): print("\n\n------ Test 2 ---------") # Test grammar 2 - read from file g = Grammar() g.read("Reynir.test.grammar") #print("Grammar:") #print(str(g)) #print() # s = "Villi leit út eða Anna og köttur komu beint heim og kona eða maður fóru snemma inn" s = "kona með kött myrti mann með hálsbindi með hund og Páll fór út" # s = "kona með kött myrti mann með hund og Villi fór út" # s = "Villi leit út" class NameToken(Token): NÖFN_NF = ["Villi", "Anna", "Hlín", "Páll"] NÖFN_ÞF = ["Villa", "Önnu", "Hlín", "Pál"] NÖFN_ÞGF = ["Villa", "Önnu", "Hlín", "Páli"] def matches(self, terminal): """ Does this token match the given terminal? """ if not terminal.name().startswith("nafn_"): return False if terminal.name().endswith("_nf"): return self._val in NameToken.NÖFN_NF if terminal.name().endswith("_þf"): return self._val in NameToken.NÖFN_ÞF if terminal.name().endswith("_þgf"): return self._val in NameToken.NÖFN_ÞGF return False def make_token(w): if w[0].isupper(): return NameToken('nafn', w) return Token('orð', w) toklist = [make_token(w) for w in s.split()] p = Parser.for_grammar(g) forest = p.go(toklist) print("Parse combinations: {0}".format(Parser.num_combinations(forest))) Parser.print_parse_forest(forest)
def generate_samples(grammar_dir, outfiles): """Generates a set of samples and writes them to the output files. Args: grammar_dir: directory to load grammar files from. outfiles: A list of output filenames. """ f = open(os.path.join(grammar_dir, 'template.html')) template = f.read() f.close() htmlgrammar = Grammar() err = htmlgrammar.parse_from_file(os.path.join(grammar_dir, 'html.txt')) # CheckGrammar(htmlgrammar) if err > 0: print('There were errors parsing grammar') return cssgrammar = Grammar() err = cssgrammar.parse_from_file(os.path.join(grammar_dir, 'css.txt')) # CheckGrammar(cssgrammar) if err > 0: print('There were errors parsing grammar') return jsgrammar = Grammar() err = jsgrammar.parse_from_file(os.path.join(grammar_dir, 'js.txt')) # CheckGrammar(jsgrammar) if err > 0: print('There were errors parsing grammar') return # JS and HTML grammar need access to CSS grammar. # Add it as import htmlgrammar.add_import('cssgrammar', cssgrammar) jsgrammar.add_import('cssgrammar', cssgrammar) for outfile in outfiles: result = generate_new_sample(template, htmlgrammar, cssgrammar, jsgrammar) if result is not None: print('Writing a sample to ' + outfile) try: f = open(outfile, 'w') f.write(result) f.close() except IOError: print('Error writing to output')
def test(string='balance.txt', spec='g1.txt'): from balance import BalanceLexer G = Grammar() source = open(spec,'r') G.generate(source) G.bnf2cnf() print "grammer==",G lexer= BalanceLexer() balance=open(string,'r') lexer.scanFile(balance) S=lexer.getStream() print "stream ===",S C=CYKChart() C.Build_CYK_Chart(G,S) print C
def btn_parser_clicked(self): if(self.verify_grammar_ll1()): g = Grammar.text_to_grammar(self.ui.text_grammar.toPlainText()) r = RecursiveDescentParser(g) self._current_parser = r self.ui.text_parser.setText(r.parser_code(self.log).strip().replace('\t',' ')) QMessageBox.information(self,'Geração do parser descendente recursivo','O parser foi gerado!')
def main(): webStr = None queryString = None opts, args = getopt.getopt(sys.argv[1:], "i:q:", ["input", "query"]) for o, a in opts: if o == "-i": webStr = a elif o == "-q": queryString = a if webStr is None or queryString is None: print "Incorrect usage" sys.exit(-1) xsb = XSB() try: webStr = webStr.replace("<newline>", "\n") polStr = "\n".join([l for l in webStr.split("\n") if ":-" in l]) policy = Policy.fromString(escapeCharacters(polStr)) query = Atom.fromElements(Grammar.parseAtom(escapeCharacters(queryString))) policy.processPolicy() policy.checkQuery(query) xsb.loadPolicy(policy) print xsb.query(query) xsb.close() except Exception as e: print "Error:", e xsb.close() sys.exit(-1)
def test_parser_code_nonterminal(self): s = "S -> A B C\n" s +="A -> a A | &\n" s +="B -> b B | A C d\n" s +="C -> c C | D\n" s +="D -> &" g = Grammar.text_to_grammar(s) r = RecursiveDescentParser(g) c = '''\ def S(): global current_symbol if current_symbol in ['a', 'b', 'c', 'd']: A() B() C() \t else: raise Exception('S',['a', 'b', 'c', 'd'],current_symbol)''' self.assertEqual(c.strip(),r._parser_code_nonterminal('S').strip()) c = '''\ def A(): global current_symbol if current_symbol in ['a']: if current_symbol == 'a': next_lexic_symbol() else: raise Exception('A','a',current_symbol) A()''' self.assertEqual(c.strip(),r._parser_code_nonterminal('A').strip())
def verify_grammar_ll1(self): self.log('Verificando se a gramática é LL(1)') try: g = Grammar.text_to_grammar(self.ui.text_grammar.toPlainText()) except Exception: QMessageBox.critical(self,'Erro durante criação da gramática','O texto que foi tentado a conversão para gramática não é válido') raise Exception('Erro durante criação da gramática','O texto que foi tentado a conversão para gramática não é válido') try: g.is_ll1(self.log) self.log('A gramática é LL(1)!') return True except Exception as err: if err.args[0] == 'LEFT_RECURSION': nts = ', '.join(err.args[1]) QMessageBox.critical(self,'Recursão à esquerda','Os seguintes não terminais levam a uma recursão à esquerda:\n\t%s'%(nts)) self.log('Recursão a esquerda encontrada encontrada nos não terminais: %s'%(nts)) raise Exception('Recursão à esquerda','Os seguintes não terminais levam a uma recursão à esquerda: %s'%(nts)) elif err.args[0] == 'LEFT_FACTORING': nts = ', '.join(err.args[1]) QMessageBox.critical(self,'Fatoração à esquerda','Os seguintes não terminais não estão fatorados à esquerda:\n\t%s'%(nts)) self.log('Não fatoração encontrada nos não terminais: %s'%(nts)) raise Exception('Fatoração à esquerda','Os seguintes não terminais não estão fatorados à esquerda: %s'%(nts)) elif err.args[0] == 'FIRST_FOLLOW_CONFLICT': nts = ', '.join(err.args[1]) QMessageBox.critical(self,'Conflito first/follow','Houve conflito entre o first e o follow dos seguintes não terminais:\n\t%s'%(nts)) self.log('Conflito first/follow encontrado nos não terminais: %s'%(nts)) raise Exception('Conflito first/follow','Houve conflito entre o first e o follow dos seguintes não terminais: %s'%(nts)) else: QMessageBox.critical(self,'Erro inesperado durante verificação LL(1)',err.__repr__()) raise Exception('Erro inesperado durante verificação LL(1)',err.__repr__())
def __init__(self, code = "zxx"): """ Create a lect object. A I{lect} is language variety; it can either be a spoken or a written form, and a colloquial, mediatic or standard form, and so on. It wraps serialization and high-level features. It contains three independent internal members: - L{lexicon<lexicon>} - L{grammar<grammar>} - L{inflections<inflection>} @type code: str @param code: A language code according to U{ISO<http://www.iso.org>} standard. For the language codes, refer to 639-3 specifications. A country/variety code and a representation system might be added: C{eng-US}, C{esp:ERG}, C{por-BR:IPA} """ self.code = code self.name = u"" self.english_name = "" self.__p_o_s = () self.__lemma_categories = {} self.__categories = {} self.grammar = Grammar(code) self.lexicon = Lexicon() self.inflections = Inflections() self.properties = {"separator" : " ", "capitalization" : "3"} #Lexical and Initials
def test_parser_code_production(self): s = "S -> A B C\n" s +="A -> a A | &\n" s +="B -> b B | A C d\n" s +="C -> c C | &" g = Grammar.text_to_grammar(s) r = RecursiveDescentParser(g) c = '''\ A() B() C()''' self.assertEqual(c.strip(),r._parser_code_production(Production('S','A B C'),'S').strip()) c = '''\ if current_symbol == 'a': next_lexic_symbol() else: raise Exception('A','a',current_symbol) A()''' self.assertEqual(c.strip(),r._parser_code_production(Production('A','a A'),'A').strip()) c = '''\ A() C() if current_symbol == 'd': next_lexic_symbol() else: raise Exception('B','d',current_symbol)''' self.assertEqual(c.strip(),r._parser_code_production(Production('B','A C d'),'B').strip())
def fromString(self, string): policy=Policy() elements = Grammar.parsePolicy(string) for ruleElements in elements: rule = Rule.fromElements(ruleElements) policy.rules.append(rule) return policy
def test_grammar_rules(self): grammar = Grammar("grammars/grammar1.json") # Check that the correct rules are returned. rule = grammar.get_rule("S", "a") self.assertEqual(rule, "aAb") rule = grammar.get_rule("A", "#") self.assertEqual(rule, "#") # Check nonexistent input symbol. rule = grammar.get_rule("S", "k") self.assertFalse(rule) # Check nonexistent variable. rule = grammar.get_rule("N", "a") self.assertFalse(rule)
def solve(): G = Grammar() source = open("cky.txt",'r') G.generate(source) G.bnf2cnf() print "grammer==",G lexer= Telescope() balance=open('telescope','r') lexer.scanFile(balance) S=lexer.getStream() print "stream ===",S C=CYKChart() C.Build_CYK_Chart(G,S) print C genDot(C,"cky.dot") system("dot -Tjpg cky.dot -o cky.jpg") print "cky.jpg created"
def main(args): lexarname =None gramarspec =None inputfile =None outputtype =None outputfile =None argc= len(args) if argc == 1: print "usage: main.py lexarname, gramarspec, inputfile,[output-type] [output-file ]\n" return if argc > 1 : lexarname = args[1] if argc > 2 : gramarspec = args[2] if argc > 3 : inputfile = args[3] if argc > 4 : outputtype=args[4] else: outputtype="dot" if argc > 5 : outputfile = args[5] else: outputfile = inputfile G = Grammar() source = open(gramarspec,'r') G.generate(source) G.bnf2cnf() print "grammer==",G if sep in lexarname: lexarname = lexarname.replace(sep,".") lexerclass=__import__(lexarname) lexer=lexerclass.Lexer() lexer.scan(inputfile) S=lexer.getStream() print "stream ===",S C=CYKChart() C.Build_CYK_Chart(G,S) print C print C.graph if outputtype=="dot": genDot(C,outputfile) system("dot -Tjpg %s -o %s "%(outputfile, outputfile)) # todo, see if dot takes STDIN so I can pipe this to it print "%s generated"%(outputfile) elif outputtype=="js": genVIZ(C,outputfile)
def gramaticaAutomato(input): input = entrytext.get() l = LeitorG(input) dict, termi,nonter,ini = l.ler() g = Grammar(dict,termi,nonter,ini) s, inicial, final = g.convertGtoAF() a = Automato(s,inicial,final) a.printAtomato() a.writeAutomataToFile(input) input = input.replace('.in', '') data_file = open('../testes/'+input+'.out') data = data_file.read() data_file.close() test = Tk.Tk() Results = Tk.Label(test, text = data) Results.grid(row = 20, column = 3, sticky= Tk.W)
def automatoGramatica(input): l = Leitor(input) dict, ini, final = l.ler() a = Automato(dict,ini,final) prod,terminais,nonTerminais,inicial = a.automataToGrammar() g = Grammar(prod,terminais,nonTerminais,inicial) g.printGrammar() g.writeGrammarToFile(input) input = input.replace('.in', '') data_file = open('../testes/'+input+'.out') data = data_file.read() data_file.close() test = Tk.Tk() Results = Tk.Label(test, text = data) Results.grid(row = 20, column = 3, sticky= Tk.W)
def get_grammar(self): print '----------------- BEGIN regex_to_grammar -----------------\n' binary_op = ['|', '.'] terminals = set(list(self.re)) if '+' in terminals: terminals.remove('+') if '*' in terminals: terminals.remove('*') if '|' in terminals: terminals.remove('|') if '.' in terminals: terminals.remove('.') for i in self.re: if i in terminals: # print 'Stack=',self.stack ,'char', i self.stack.append(i) else: if i not in binary_op: # print 'Stack=',self.stack ,'char', i self.stack.append( self.create_new_rules(i, self.stack.pop())) else: # print 'Stack=',self.stack ,'char', i dummy = self.create_new_rules(i, self.stack[-2:]) a = self.stack.pop() b = self.stack.pop() self.stack.append(dummy) self.dictionary['S'] = [('S', ''.join(self.stack.pop()))] # print "Dictionary/Grammar:", self.dictionary # print 'Terminals:', list(terminals) # print 'Non Terminals:', self.dictionary.keys() # print 'Number of rules:', len(self.dictionary) grammar_ = Grammar( set(self.dictionary.keys()), terminals, self.dictionary, 'S') print "Grammar: ", grammar_.print_grammar() print grammar_.grammar[2] print '\n----------------- END regex_to_grammar -----------------\n\n' return grammar_
def main(): pygame.midi.init() player= pygame.midi.Output(0) player.set_instrument(25,1) sounds = {'C': 48, 'F': 53,'G': 55} nonterminals = ['q', 'w'] productions = {'q': [("CCCq", 1), ("FGw", 1)], 'w': [("FCCq", 1)]} gr = Grammar("q", nonterminals, productions) while True: gr.make_production() music = gr.show_word() for note in music: chord(player, sounds[note])
def gr_fa_btn_clicked(self, table): st = self.ui.gr_text.toPlainText() if st == '': st = "S -> aS | a | bS | b" gr = Grammar.text_to_grammar(st) fa = gr.to_finite_automaton() self.add_fa_on_list("GR => FA", fa) self.set_fa_on_table(fa, table)
def test_text_to_grammar(self): s = "S -> a" g = Grammar.text_to_grammar(s) self.assertEqual(g._productions, {Production('S','a')}) s = "S -> b S | a" g = Grammar.text_to_grammar(s) self.assertEqual(g._productions, {Production('S','a'), Production('S',['b','S'])}) s = "S1 -> b S1 a | a" g = Grammar.text_to_grammar(s) self.assertEqual(g._productions, {Production('S1','a'), Production('S1',['b','S1','a'])}) s = "C -> if E then C C' | comando\n" s +="C' -> else C | &\n" s +="E -> exp" g = Grammar.text_to_grammar(s) self.assertEqual(g._productions, {Production("C","if E then C C'"), Production("C","comando"),\ Production("C'","else C"), Production("C'","&"), Production("E","exp")})
def solve(): from os import system G = Grammar() source = open("infix.txt",'r') G.generate(source) G.bnf2cnf() print "grammer==",G lexer= Infix() balance=open('input.txt','r') lexer.scanFile(balance) S=lexer.getStream() print "stream ===",S C=CYKChart() C.Build_CYK_Chart(G,S) print C print C.graph genDot(C,"infix.dot") system("dot -Tjpg infix.dot -o infix.jpg") print "infix.jpg created"
def delete_useless_nonterminals(grammar): new_rules = list(grammar.rules) new_nonterminals = set(grammar.nonterminals) while True: useless_nonterminals = find_useless_nonterminals(new_rules) if not useless_nonterminals: break new_rules = delete_useless_nonterminals_from_rules(new_rules, useless_nonterminals) new_rules = delete_useless_nonterminals_rules(new_rules, useless_nonterminals) new_nonterminals.difference_update(useless_nonterminals) new_grammar = Grammar() new_grammar.axiom = grammar.axiom new_grammar.terminals = set(grammar.terminals) new_grammar.nonterminals = new_nonterminals new_grammar.rules = new_rules return new_grammar