Beispiel #1
0
    def test_parsing(self):
        # Ignoring semantics for now...
        numeral_rules = [
            Rule('$E', 'one'),
            Rule('$E', 'two'),
            Rule('$E', 'three'),
            Rule('$E', 'four'),
        ]

        operator_rules = [
            Rule('$UnOp', 'minus'),
            Rule('$BinOp', 'plus'),
            Rule('$BinOp', 'minus'),
            Rule('$BinOp', 'times'),
        ]

        compositional_rules = [
            Rule('$E', '$UnOp $E'),
            Rule('$EBO', '$E $BinOp'),
            Rule('$E', '$EBO $E')
        ]

        arithmetic_rules = numeral_rules + operator_rules + compositional_rules

        arithmetic_grammar = Grammar(arithmetic_rules)
        for example in self.one_parse_examples:
            self.assertEqual(1, len(arithmetic_grammar.parse(example.input)),
                             example)
            # print(arithmetic_grammar.parse(example.input)[0])
        for example in self.two_parse_examples:
            self.assertEqual(2, len(arithmetic_grammar.parse(example.input)),
                             example)
Beispiel #2
0
    def test_operator_precedence_features(self):
        """
        See if a count of operator precedence patterns is a good feature for 
        ranking parses.
        """
        arithmetic_grammar = Grammar(self.arithmetic_rules)
        parses = arithmetic_grammar.parse("two times two plus three")
        self.assertEqual(2, len(parses))
        # Look at Parse.operator_precedence_features(). It generates different
        # results for the two parses
        parse0_features = parses[0].operator_precedence_features()
        parse1_features = parses[1].operator_precedence_features()
        # In the first parse, + precedes * once
        self.assertEqual(parse0_features, {('+', '*'): 1.0})
        # In the second parse, * precedes + once
        self.assertEqual(parse1_features, {('*', '+'): 1.0})

        # Look at Parse.score()
        parse0_score = parses[0].score(Parse.operator_precedence_features,
                                       self.weights)
        parse1_score = parses[1].score(Parse.operator_precedence_features,
                                       self.weights)
        # Parse.operator_precedence_features() is good at distinguishing parses
        self.assertEqual(-1.0, parse0_score)
        self.assertEqual(1.0, parse1_score)
def delete_nonderivable_nonterminals(grammar):
	new_grammar = Grammar()
	new_grammar.axiom = grammar.axiom
	new_grammar.terminals = grammar.terminals

	unwatched = list([new_grammar.axiom])
	watched = set()
	while unwatched:
		nonterminal = unwatched[0]
		unwatched = unwatched.remove(nonterminal) or []
		watched.add(nonterminal)

		rules = find_rules_for_nonterminal(grammar.rules, nonterminal)
		for rule in rules:
			for symbol in rule.right_side:
				if isinstance(symbol, Nonterminal):
					if symbol not in watched and symbol not in unwatched:
						unwatched.append(symbol)

	new_grammar.nonterminals = watched

	new_rules = []
	for rule in grammar.rules:
		if rule.left_side[0] in watched:
			new_rules.append(rule)

	new_grammar.rules = new_rules

	return new_grammar	 
Beispiel #4
0
def lookahead(prod: Sequence[str], dotpos: int, grammar: Grammar) -> Set[str]:
    if dotpos >= (len(prod) - 1):
        return {"$$"}

    epsilons = grammar.epsilon_nonterms()
    nonterms = grammar.nonterms()
    ret = set()
    nt_passed = set()
    stk = deque()
    stk.append(prod[dotpos + 1:])

    while len(stk) > 0:
        cur = stk.popleft()
        hit = False
        for token in cur:
            if token in nonterms:
                if token in nt_passed:
                    if token in epsilons:
                        continue
                    else:
                        hit = True
                        break
                else:
                    nt_passed.add(token)
                    for prod in grammar[token]:
                        stk.append(prod)
            else:
                ret.add(token)
            if token not in epsilons:
                hit = True
                break
        if not hit:
            ret.add("$$")

    return ret
Beispiel #5
0
class ShellParser(object):

    services = None

    def __init__(self):
        self.tokens = None

    def __call__(self, command_string):
        self.parse(command_string)

    def buildGrammar(self):
        self.grammar = Grammar()
        self.grammar.parser = self
        self.grammar.makeBNF()
        return self.grammar

    def parse(self, cmd):
        tokens = grammar.parseString(cmd)
        out = """
            command: %s
            service type: %s
            action: %s
        """ % (tokens.commandtype, tokens.servicetype, tokens.action)
        self.tokens = tokens
        return out
 def get_energy(self, simulation_case):
     case_name = simulation_case.case_name
     configuration.configurations_dict["case_name"] = case_name
     if isinstance(simulation_case.hmm_dict, HMM):
         hmm = simulation_case.hmm_dict
     else:
         hmm = HMM(simulation_case.hmm_dict)
     if isinstance(simulation_case.flat_rule_set_list, RuleSet):
         rule_set = simulation_case.flat_rule_set_list
     else:
         rule_set_list = []
         for flat_rule in simulation_case.flat_rule_set_list:
             rule_set_list.append(Rule(*flat_rule))
         rule_set = RuleSet(rule_set_list)
     grammar = Grammar(hmm, rule_set)
     self.write_to_dot_to_file(hmm, "hmm_" + case_name)
     self.write_to_dot_to_file(grammar.get_nfa(),
                               "grammar_nfa_" + case_name)
     hypothesis = Hypothesis(grammar, self.data)
     energy = hypothesis.get_energy()
     if self.target_energy:
         print("{}: {} distance from target: {}".format(
             case_name, hypothesis.get_recent_energy_signature(),
             energy - self.target_energy))
     else:
         print("{}: {}".format(case_name,
                               hypothesis.get_recent_energy_signature()))
     return energy
    def test_parser_kleene(self):
        hmm = HMM({
            INITIAL_STATE: ['q1'],
            'q1': (['q2', FINAL_STATE], ['at', 'attstktttt', 'st']),
            'q2': ([FINAL_STATE], ['o'])
        })

        hmm_transducer = hmm.get_transducer()
        self.write_to_dot_to_file(hmm_transducer, "test_hmm_transducer_kleene")

        assimilation_rule_with_kleene = Rule([{
            "cons": "-"
        }], [{
            "low": "+"
        }], [{
            "cons": "-"
        }, {
            "cons": "+",
            "kleene": True
        }], [],
                                             obligatory=True)

        rule_set_with_kleene = RuleSet([assimilation_rule_with_kleene])
        grammar = Grammar(hmm, rule_set_with_kleene)

        nfa = grammar.get_nfa()
        self.write_to_dot_to_file(nfa, "test_parser_nfa_kleene")
Beispiel #8
0
    def test_sequitur(self):
        """docstring for test_sequitur"""
        g = Grammar()
        g.train_string("Hello, world!")

        self.assertEqual("0 --(0)--> H e l l o , _ w o r l d ! \n",
                         g.print_grammar())
Beispiel #9
0
def generate_samples(grammar_dir, outfiles):
    """Generates a set of samples and writes them to the output files.

    Args:
      grammar_dir: directory to load grammar files from.
      outfiles: A list of output filenames.
    """

    f = open(os.path.join(grammar_dir, 'ox_template.html'))
    template = f.read()
    f.close()

    jsgrammar = Grammar()
    err = jsgrammar.parse_from_file(os.path.join(grammar_dir, 'oxjs.txt'))

    if err > 0:
        print('There were errors parsing grammar')
        return

    for outfile in outfiles:
        result = generate_new_sample(template, jsgrammar)

        if result is not None:
            print('Writing a sample to ' + outfile)
            try:
                f = open(outfile, 'w')
                f.write(result)
                f.close()
            except IOError:
                print('Error writing to output')
 def get_energy(self, hmm, rule_set_list, case_name):
     grammar = Grammar(hmm, RuleSet(rule_set_list))
     self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
     hypothesis = Hypothesis(grammar, self.data)
     energy = hypothesis.get_energy()
     print("{}: {}".format(case_name, hypothesis.get_recent_energy_signature()))
     return energy
Beispiel #11
0
class UI:
    def __init__(self):
        self.__grammar = Grammar()

    @staticmethod
    def print_menu():
        print('Options are:')
        print('     0 - Exit')
        print('     1 - See non-terminals')
        print('     2 - See terminals')
        print('     3 - See productions')
        print('     4 - See start')

    def run(self):
        while True:
            UI.print_menu()
            option = input('Enter option: ')
            if option == '0':
                break
            elif option == '1':
                print(self.__grammar.get_non_terminals_string())
            elif option == '2':
                print(self.__grammar.get_terminals_string())
            elif option == '3':
                print(self.__grammar.get_productions_string())
            elif option == '4':
                print(self.__grammar.start)
            else:
                print('Incorrect option')
    def test_morphology_only(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        data = [u'tozat', u'tozgoat', u'tozgo', u'tozdoat', u'tozdo', u'tozzoat', u'tozzo', u'toz', u'dagat', u'daggoat', u'daggo', u'dagdoat', u'dagdo', u'dagzoat', u'dagzo', u'dag', u'gasat', u'gasgoat', u'gasgo', u'gasdoat', u'gasdo', u'gaszoat', u'gaszo', u'gas', u'kodat', u'kodgoat', u'kodgo', u'koddoat', u'koddo', u'kodzoat', u'kodzo', u'kod', u'katat', u'katgoat', u'katgo', u'katdoat', u'katdo', u'katzoat', u'katzo', u'kat', u'dotat', u'dotgoat', u'dotgo', u'dotdoat', u'dotdo', u'dotzoat', u'dotzo', u'dot']

        #target
        hmm = {'q0': ['q1'],
              'q1': (['q2', 'q3', 'qf'], ['dag', 'kat', 'dot', 'kod', 'gas', 'toz']),
              'q2': (['q3','qf'], ['zo', 'go', 'do']),
              'q3': (['qf'], ['at'])}
        self.configurations.simulation_data = data
        self.assertLess(Hypothesis(Grammar(hmm, [])).get_energy(), 5190)

        #single_sate
        hmm = HMM({'q0': ['q1'],
              'q1': (['q1', 'qf'], ['dag', 'kat', 'dot', 'kod', 'gas', 'toz'] + ['zo', 'go', 'do'] + ['at'])
                })
        self.assertLess(Hypothesis(Grammar(hmm, [])).get_energy(), 6430)


        #two state
        hmm = {'q0': ['q1'],
              'q1': (['q1', 'q2', 'qf'], ['dag', 'kat', 'dot', 'kod', 'gas', 'toz'] + ['zo', 'go', 'do']),
              'q2': (['qf'], ['at'])
                }
        self.assertLess(Hypothesis(Grammar(hmm, [])).get_energy(), 6010)

        #from simualation
        hmm = HMM({'q0': ['q1'],
      'q1': (['q1', 'qf'], ['toz', 'do', 'zo', 'gas', 'kod', 'dag', 'at', 'zoat', 'kat', 'go', 'dot'])
        })
Beispiel #13
0
 def setUp(self):
     rules = [
         'S -> VP | NP VP', 'VP -> V', 'NP -> Det N | N',
         'V -> walk | fly | book', 'N -> I | you | cows | book',
         'Det -> the', 'Det -> a'
     ]
     self.grammar = Grammar(rules)
Beispiel #14
0
 def teardown(self, g: grammar.Grammar):
     """We have introduced new nodes, so we need
     to recalculate min tokens.
     FIXME: Increasingly min tokens looks like it shouldn't
            be part of initial grammar creation.
     """
     g._calc_min_tokens()
Beispiel #15
0
def generate_samples(grammar_dir, outfiles):
    """Generates a set of samples and writes them to the output files.

    Args:
      grammar_dir: directory to load grammar files from.
      outfiles: A list of output filenames.
    """

    f = open(os.path.join(grammar_dir, 'template.html'))
    template = f.read()
    f.close()

    jsgrammar = Grammar()
    err = jsgrammar.parse_from_file(os.path.join(grammar_dir, 'jscript.txt'))
    if err > 0:
        print('There were errors parsing grammar')
        return

    for outfile in outfiles:
        result = GenerateNewSample(template, jsgrammar)

        if result is not None:
            print('Writing a sample to ' + outfile)
            try:
                f = open(outfile, 'w')
                f.write(result)
                f.close()
            except IOError:
                print('Error writing to output')
Beispiel #16
0
def construct_table(grammar: Grammar, states: list, algo_suit):
    final_item = LR0Item(grammar.get_start_prodctions()[0], 1)
    table = list()  # table[src_state][sym] = set(LRAction)
    for state in states:
        actions = defaultdict(set)
        table.append(actions)
        for sym, edge in state.edges.items():
            if sym == '':
                continue
            if grammar.is_terminal(sym):
                # Terminal, shift
                actions[sym].add(LRAction.new_shift(edge.dst_state))
            else:
                # Nonterminal, goto
                actions[sym].add(LRAction.new_goto(edge.dst_state))
        if '' in state.edges:
            edge = state.edges['']
            current_item = tuple(edge.src_items)[0]
            if final_item.prod == current_item.prod and \
               final_item.pos == current_item.pos:
                # Accept
                actions['$'].add(LRAction.new_accept())
            else:
                # Reduce
                algo_suit.build_reduce(actions, edge)
    return table
Beispiel #17
0
def generate(input_str):
    ''' Parses an input string and returns another one
    containing the generated program skeleton.
    '''
    HEADER, L, ENDCODE = parser.parse(input_str)

    result = 'from skel import Grammar\n'

    if HEADER is not None:
        result += HEADER + '\n'

    result = result + """

def generate(self):
    
    """

    result = ''

    grammar = Grammar(Parser.START_SYMBOL)
    if L:
        for T in L:
            grammar.addRule(T)

    result += grammar.generate(Parser.START_SYMBOL)

    if ENDCODE is not None:
        result += ENDCODE + '\n'

    return result
Beispiel #18
0
    def buildgrammar(self):
        g = Grammar()
        g.nonterminals = self.states
        g.terminals = self.symbols
        g.startsymbol = str(self.initialstate)
        
        tf = self.transitions
        if len(tf) > 0:
            for t in tf:
                if len(t) == 3:
                    g.add_production(t[0], t[1] + t[2])
                    if t[2] in self.finalstates:
                        g.add_production(t[0], t[1] + '')
        
        if g.starsymbol in self.finalstates:
            g.add_production(g.starsymbol, 'e')
        
        self.grammar = g

        print 'Nonterminals: ', self.grammar.nonterminals
        print 'Terminals: ', self.grammar.terminals
        print 'Start symbol: ', self.grammar.startsymbol
        print 'Productions: ', self.grammar.productions

        return
Beispiel #19
0
 def loss(self, rec_input, program, request):
     variables, productions = self._run(rec_input)
     g = Grammar(
         variables,
         [(productions[k].view(1), t, prog)
          for k, (_, t, prog) in enumerate(self.grammar.productions)])
     return -g.logLikelihood(request, program)
Beispiel #20
0
    def __init__(self):
        self.context = Context()

        self.grammar = Grammar([
            SyntaxRule('S', 'VP', 'NP', 0.125),
            SyntaxRule('S', 'VP', 'Noun', 0.125),
            SyntaxRule('S', 'Verb', 'NP', 0.125),
            SyntaxRule('S', 'Verb', 'Noun', 0.125),
            SyntaxRule('S', 'NP', 'VP', 0.125),
            SyntaxRule('S', 'Noun', 'VP', 0.125),
            SyntaxRule('S', 'Pronoun', 'VP', 0.125),
            SyntaxRule('S', 'S', 'ConjClause', 0.125),
            SyntaxRule('ConjClause', 'Conj', 'S', 1),
            SyntaxRule('VP', 'Verb', 'Pronoun', 0.2),
            SyntaxRule('VP', 'Verb', 'PP', 0.2),
            SyntaxRule('VP', 'VP', 'PP', 0.2),
            SyntaxRule('VP', 'Adverb', 'Verb', 0.2),
            SyntaxRule('VP', 'Adverb', 'VP', 0.2),
            SyntaxRule('NP', 'NP', 'PP', 1 / 3),
            SyntaxRule('NP', 'Noun', 'PP', 1 / 3),
            SyntaxRule('NP', 'Article', 'Noun', 1 / 3),
            SyntaxRule('PP', 'Preposition', 'NP', 1 / 3),
            SyntaxRule('PP', 'Preposition', 'Noun', 1 / 3),
            SyntaxRule('PP', 'Preposition', 'Pronoun', 1 / 3),
        ], [
            LexicalRule('Preposition', 'to', 0.2),
            LexicalRule('Preposition', 'inside', 0.2),
            LexicalRule('Preposition', 'in', 0.2),
            LexicalRule('Preposition', 'from', 0.2),
            LexicalRule('Preposition', 'of', 0.2),
            LexicalRule('Article', 'the', 1),
            LexicalRule('Noun', 'contents', 0.25),
            LexicalRule('Noun', 'everything', 0.25),
            LexicalRule('Noun', CommandInputToken.placeholder(), 0.25),
            LexicalRule('Noun', 'there', 0.25),
            LexicalRule('Pronoun', 'me', 0.5),
            LexicalRule('Pronoun', 'what', 0.5),
            LexicalRule('Adverb', 'recursively', 1),
            LexicalRule('Verb', 'run', 1 / 17),
            LexicalRule('Verb', 'execute', 1 / 17),
            LexicalRule('Verb', 'do', 1 / 17),
            LexicalRule('Verb', 'show', 1 / 17),
            LexicalRule('Verb', 'list', 1 / 17),
            LexicalRule('Verb', 'tell', 1 / 17),
            LexicalRule('Verb', 'move', 1 / 17),
            LexicalRule('Verb', 'rename', 1 / 17),
            LexicalRule('Verb', 'place', 1 / 17),
            LexicalRule('Verb', 'copy', 1 / 17),
            LexicalRule('Verb', 'duplicate', 1 / 17),
            LexicalRule('Verb', 'delete', 1 / 17),
            LexicalRule('Verb', 'remove', 1 / 17),
            LexicalRule('Verb', 'is', 1 / 17),
            LexicalRule('Verb', 'put', 1 / 17),
            LexicalRule('Verb', 'display', 1 / 17),
            LexicalRule('Verb', 'find', 1 / 17),
            LexicalRule('Conj', 'and', 1 / 3),
            LexicalRule('Conj', 'then', 1 / 3),
            LexicalRule('Conj', ConjunctorToken(), 1 / 3),
        ])
Beispiel #21
0
Datei: bot.py Projekt: msarch/py
    def __init__(self, canvas, namespace=None):
        Grammar.__init__(self, canvas, namespace)
        self._autoclosepath = True
        self._path = None

        self._canvas.size = None
        self._frame = 1
        self._set_initial_defaults()  ### TODO Look at these
Beispiel #22
0
Datei: bot.py Projekt: msarch/py
    def __init__(self, canvas, namespace = None):
        Grammar.__init__(self, canvas, namespace)
        self._autoclosepath = True
        self._path = None

        self._canvas.size = None
        self._frame = 1
        self._set_initial_defaults() ### TODO Look at these
 def test_parser2(self):
     hmm = HMM({
         INITIAL_STATE: ['q1'],
         'q1': (['q2', FINAL_STATE], ['dog', 'kat']),
         'q2': ([FINAL_STATE], ['z'])
     })
     grammar = Grammar(hmm, self.plural_english_rule_set)
     nfa = grammar.get_nfa()
Beispiel #24
0
 def __init__(self, grammar: Grammar, resolver: Callable[[Item, Item], Item]):
     self.__grammar = grammar
     old_start = grammar.start()
     new_start = old_start + "'"
     new_start_rule = new_start + " -> " + old_start
     g = Grammar([new_start_rule] + str(grammar).split("\n"))
     start_item = Item(new_start, (old_start,), {"$"}, 0)
     self.__sets = ItemSet.generate(start_item.closure(g), g, resolver)
Beispiel #25
0
    def test_terminal(self):
        x = Grammar([
            "S -> A B",
            "A -> c | d",
            "B -> ef | g S",
        ])

        self.assertRaises(CFGException, lambda: x.lex(["d A"]))
Beispiel #26
0
    def test_sequitur_base(self):
        """docstring for test_sequitur_base"""
        g = Grammar()
        g.train_string("abcabdabcabd")

        self.assertEqual(
            "0 --(0)--> 1 1 \n1 --(2)--> 2 c 2 d                                       abcabd\n2 --(2)--> a b                                           ab\n",
            g.print_grammar())
Beispiel #27
0
def test_get_calc():
    grammar = Grammar()

    try:
        grammar.get_calc('¬')
        assert False
    except:
        assert True
Beispiel #28
0
 def __init__(self):
     """ Load the shared BIN grammar if not already there, then initialize
         the Parser parent class """
     g = BIN_Parser._grammar
     if g is None:
         g = Grammar()
         g.read("Reynir.grammar")
         BIN_Parser._grammar = g
     Parser.__init__(self, g)
Beispiel #29
0
    def test_crossover(self):
        self.initialise_segment_table("dag_zook_segments_new.txt")
        rule_set_1 = RuleSet([
            Rule(*[[{
                "cons": "+"
            }], [{
                "voice": "-"
            }], [{
                "low": "+"
            }], [{
                "cont": "-"
            }], True])
        ])
        rule_set_2 = RuleSet([
            Rule(*[[{
                "cons": "+"
            }], [{
                "low": "-"
            }], [{
                "voice": "-"
            }], [], False])
        ])
        plural_english_data = 1 * ['kats', 'dogz', 'kat', 'dog']
        hmm_1 = HMM({
            INITIAL_STATE: ['q1'],
            'q1': (['q2', FINAL_STATE], ['dag', 'kot']),
            'q2': ([FINAL_STATE], ['z'])
        })
        hmm_2 = HMM({
            INITIAL_STATE: ['q1'],
            'q1': (['q2'], ['dog', 'kat']),
            'q2': (['q3'], ['s']),
            'q3': ([FINAL_STATE], ['z'])
        })

        grammar_1 = Grammar(hmm_1, rule_set_1)
        grammar_2 = Grammar(hmm_2, rule_set_2)

        hypothesis_1 = Hypothesis(grammar_1, plural_english_data)
        hypothesis_2 = Hypothesis(grammar_2, plural_english_data)
        offspring_1, offspring_2 = GeneticAlgorithm.crossover(
            hypothesis_1, hypothesis_2)

        print("*** Parents:\n")
        GeneticAlgorithm.log_hypothesis(hypothesis_1)
        GeneticAlgorithm.log_hypothesis(hypothesis_2)

        print("\n\n*** Offspring:\n")
        GeneticAlgorithm.log_hypothesis(offspring_1)
        GeneticAlgorithm.log_hypothesis(offspring_2)

        offspring_3, offspring_4 = GeneticAlgorithm.crossover(
            offspring_1, offspring_2)

        print("\n\n*** 2nd gen offspring:\n")
        GeneticAlgorithm.log_hypothesis(offspring_3)
        GeneticAlgorithm.log_hypothesis(offspring_4)
Beispiel #30
0
	def inicia(self):
		case = 0
		lex = Lex()

		# # 1 ER PALAVRAS RESERVADAS
		a1 = lex.lexer('reservado', case)
		dict = a1.getDictAutomato()
		case += len(dict)

		# # 2 ER IDENTIFICADORES
		a2 = lex.lexer('identificadores', case)

		# # 3 GRAMATICA DE SESPECIAL
		terminais = ['+', '-', '=', '/', '*', '>', '<', '!']
		nTerminais = ['S']
		producoes = {'S': ['+', '-', '=', '/', '*', '>', '<', '!']}
		inicial = 'S'
		g = Grammar(producoes,terminais, nTerminais, inicial)
		s, i, f = g.convertGtoAF()
		a3 = Automato(s, i, f)
		a3.determina()
		a3.printAtomato()
		print("\n")

		dict = a2.getDictAutomato()
		case += len(dict)
		a3 = lex.renameState(a3, case)

		# # 4 GRAMATICA SEPARADORES
		terminais2 = [':',';', ' ', '(', ')', '[', ']', ',', '\n']
		nTerminais2 = ['S']
		producoes2 = {'S': [':',';', ' ', '(', ')', '[', ']', ',', '\n']}
		inicial2 = 'S'
		g = Grammar(producoes2,terminais2, nTerminais2, inicial2)
		s2, i2, f2 = g.convertGtoAF()
		a4 = Automato(s2, i2, f2)
		a4.determina()
		a4.printAtomato()
		print("\n")

		dict = a3.getDictAutomato()
		case += len(dict)
		a4 = lex.renameState(a4, case)

		# ER CONSTANTES
		dict = a4.getDictAutomato()
		case += len(dict)
		a5 = lex.lexer('constantes', case)
		r = a5

		r = a1.oU([a2, a3, a4, a5])
		print ("\n")
		r.determina()
		r.printAtomato()

		with open('automato.pkl', 'wb') as output:
		    pickle.dump(r, output, pickle.HIGHEST_PROTOCOL)
Beispiel #31
0
def separate_prefixes(g: grammar.Grammar, layer: grammar.NonTerminal,
                      prefix: grammar.Derivation, root: PrefixNode,
                      common_depth: int, nterm_sequence: Iterator):
    """
    Separate written into tree derivations by common prefixes.

    Uses recursion, maximal depth of it can be as big as
    depth of tree plus 1.

    :param g: Grammar, to which derivations will be recorded.
    :param layer: non-terminal symbol to which the derivation belong.
    :param prefix: common prefix.
    :param root: prefix tree.
    :param common_depth: depth of common prefix.
    :param nterm_sequence: sequence of new non-terminals.
    :return: none.
    """
    # Root in None means that it's leaf.
    if root is None:
        g.add_rule(layer, prefix)
        return

    # Common depth can be only in beginning.
    if common_depth == -1:
        common_depth = 1
    else:
        if len(root) == 1:
            common_depth += 1
        else:
            common_depth = 0

    if common_depth >= 1:
        new_layer = layer
    else:
        # If there is fork, we have to write
        # production of form
        # Layer --> prefixNewLayer
        # where NewLayer non-terminal
        # will keep symbols of the fork.
        new_layer = next(nterm_sequence)
        g.add_rule(layer, prefix + (new_layer, ))

    for symb, next_node in root.items():
        # Handling case of the EmptyWord.
        if type(symb) == tuple:
            t_symb = symb
        else:
            t_symb = (symb, )
        # Prefix assembling.
        if common_depth >= 1:
            new_prefix = prefix + t_symb
        else:
            new_prefix = t_symb

        separate_prefixes(g, new_layer, new_prefix, next_node, common_depth,
                          nterm_sequence)
Beispiel #32
0
def test_domato(grammar, start_symbol, tries):
    grammar = "grammars/" + grammar + ".txt"
    start = time.time()
    my_grammar = Grammar()
    my_grammar.parse_from_file(grammar)
    for i in range(tries):
        result = my_grammar.generate_symbol(start_symbol)
    end = time.time()
    runtime = end - start
    return runtime
Beispiel #33
0
 def make_grammar(self):
     grammar = Grammar()
     r1 = Rule(
         Symbol("NP", {"AGR": "?a"}), [
             Symbol("ART", {"AGR": "?a"}), Symbol("N", {"AGR": "?a"})])
     r1.set_variable_code("?a", -1L)
     # -1L should be default for any undefined variable
     # that is referenced while constructing
     grammar.add_rule(r1)
     return grammar
Beispiel #34
0
    def __init__(self, transcript, probs, state2idx={}, transition=0.5):
        """Viterbi decoding for given likelihoods and transcript.

        Note:
            Leave state2idx empty if state value == corresponding index in probs

        Args:
            transcript (list): states of the transcript
            probs (np.ndarray):  (n_frames, n_states)
                grid of minus loglikelihoods for each frame for each state
            state2idx (dict): keys are states of the transcript, values are
                indexes in the loglikelihood table (probs) for the corresponding
                states.
            transition: default=0.5, means that transition to the next state
                and decision to stay at the current one is the same, in this
                case decoding is based just on frames probabilities.

        Examples:
            >>> probs = [[-1, -2, -3],
            >>>          [-2, -1, -3],
            >>>          [-4, -2, -1],
            >>>          [-1, -4, -2]]

            >>> transcript = [1, 0, 2]
            >>> v = Viterbi(transcript, probs)

            or

            >>> transcript = [10, 3, 5]
            >>> state2idx = {10: 1, 3: 0, 5: 2}
            >>> v = Viterbi(transcript, probs, state2idx=state2idx)

            >>> alignment = v.inference()
            >>> print(alignment)
            [1, 0, 0, 2]

        """
        self._grammar = Grammar(transcript)
        self._state2idx = state2idx
        self._transition_self = -np.log(transition)
        self._transition_next = -np.log(1 - transition)
        self._transitions = np.array([self._transition_self, self._transition_next])

        self._probs = probs
        self._state = self._probs[0, 0]
        self._number_frames = self._probs.shape[0]

        # probabilities matrix
        self._T1 = np.zeros((len(self._grammar), self._number_frames)) + np.inf
        self._T1[0, 0] = self._state
        # argmax matrix
        self._T2 = np.zeros((len(self._grammar), self._number_frames)) + np.inf
        self._T2[0, 0] = 0

        self._frame_idx = 1
Beispiel #35
0
 def test_morpheme_boundary(self):
     self.configurations["MORPHEME_BOUNDARY_FLAG"] = True
     self.initialise_segment_table("plural_english_segment_table.txt")
     hmm = HMM({
         INITIAL_STATE: ['q1'],
         'q1': (['q2', FINAL_STATE], ['dog', 'kat']),
         'q2': ([FINAL_STATE], ['z'])
     })
     grammar = Grammar(hmm)
     self.assertCountEqual(['dog', 'kat', 'dogz', 'katz'],
                           grammar.get_all_outputs())
Beispiel #36
0
    def test_plural_english_grammar(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        rule_set = self.get_rule_set("plural_english_rule_set.json")

        hmm = HMM({
            INITIAL_STATE: ['q1'],
            'q1': (['q2', FINAL_STATE], ['dog', 'kat']),
            'q2': ([FINAL_STATE], ['z'])
        })

        grammar = Grammar(hmm, rule_set)
        grammar_transducer = grammar.get_transducer()
Beispiel #37
0
	def test_grammar_productions(self):
		grammar = Grammar("grammars/grammar1.json")

		# Check start variable productions.
		rules = grammar.produces("S")
		self.assertEqual(rules, ["aAb"])

		rules = grammar.produces("A")
		self.assertEqual(rules, ["aAb", "#"])

		# Check nonexistent variable productions.
		rules = grammar.produces("N")
		self.assertFalse(rules)
Beispiel #38
0
def test2():

    print("\n\n------ Test 2 ---------")

    # Test grammar 2 - read from file

    g = Grammar()
    g.read("Reynir.test.grammar")

    #print("Grammar:")
    #print(str(g))
    #print()

    # s = "Villi leit út eða Anna og köttur komu beint heim og kona eða maður fóru snemma inn"
    s = "kona með kött myrti mann með hálsbindi með hund og Páll fór út"
    # s = "kona með kött myrti mann með hund og Villi fór út"
    # s = "Villi leit út"

    class NameToken(Token):

        NÖFN_NF = ["Villi", "Anna", "Hlín", "Páll"]
        NÖFN_ÞF = ["Villa", "Önnu", "Hlín", "Pál"]
        NÖFN_ÞGF = ["Villa", "Önnu", "Hlín", "Páli"]

        def matches(self, terminal):
            """ Does this token match the given terminal? """
            if not terminal.name().startswith("nafn_"):
                return False
            if terminal.name().endswith("_nf"):
                return self._val in NameToken.NÖFN_NF
            if terminal.name().endswith("_þf"):
                return self._val in NameToken.NÖFN_ÞF
            if terminal.name().endswith("_þgf"):
                return self._val in NameToken.NÖFN_ÞGF
            return False

    def make_token(w):
        if w[0].isupper():
            return NameToken('nafn', w)
        return Token('orð', w)

    toklist = [make_token(w) for w in s.split()]

    p = Parser.for_grammar(g)

    forest = p.go(toklist)

    print("Parse combinations: {0}".format(Parser.num_combinations(forest)))

    Parser.print_parse_forest(forest)
Beispiel #39
0
def generate_samples(grammar_dir, outfiles):
    """Generates a set of samples and writes them to the output files.

    Args:
      grammar_dir: directory to load grammar files from.
      outfiles: A list of output filenames.
    """

    f = open(os.path.join(grammar_dir, 'template.html'))
    template = f.read()
    f.close()

    htmlgrammar = Grammar()
    err = htmlgrammar.parse_from_file(os.path.join(grammar_dir, 'html.txt'))
    # CheckGrammar(htmlgrammar)
    if err > 0:
        print('There were errors parsing grammar')
        return

    cssgrammar = Grammar()
    err = cssgrammar.parse_from_file(os.path.join(grammar_dir, 'css.txt'))
    # CheckGrammar(cssgrammar)
    if err > 0:
        print('There were errors parsing grammar')
        return

    jsgrammar = Grammar()
    err = jsgrammar.parse_from_file(os.path.join(grammar_dir, 'js.txt'))
    # CheckGrammar(jsgrammar)
    if err > 0:
        print('There were errors parsing grammar')
        return

    # JS and HTML grammar need access to CSS grammar.
    # Add it as import
    htmlgrammar.add_import('cssgrammar', cssgrammar)
    jsgrammar.add_import('cssgrammar', cssgrammar)

    for outfile in outfiles:
        result = generate_new_sample(template, htmlgrammar, cssgrammar,
                                     jsgrammar)

        if result is not None:
            print('Writing a sample to ' + outfile)
            try:
                f = open(outfile, 'w')
                f.write(result)
                f.close()
            except IOError:
                print('Error writing to output')
Beispiel #40
0
def test(string='balance.txt', spec='g1.txt'):
    from balance import BalanceLexer
    G = Grammar()
    source = open(spec,'r')
    G.generate(source)
    G.bnf2cnf()
    print "grammer==",G
    lexer= BalanceLexer()
    balance=open(string,'r')
    lexer.scanFile(balance)
    S=lexer.getStream()
    print "stream ===",S
    C=CYKChart()
    C.Build_CYK_Chart(G,S)
    print C
	def btn_parser_clicked(self):
		if(self.verify_grammar_ll1()):
			g = Grammar.text_to_grammar(self.ui.text_grammar.toPlainText())
			r = RecursiveDescentParser(g)
			self._current_parser = r
			self.ui.text_parser.setText(r.parser_code(self.log).strip().replace('\t','    '))
			QMessageBox.information(self,'Geração do parser descendente recursivo','O parser foi gerado!')
Beispiel #42
0
def main():
    webStr = None
    queryString = None
    opts, args = getopt.getopt(sys.argv[1:], "i:q:", ["input", "query"])
    for o, a in opts:
        if o == "-i":
            webStr = a
        elif o == "-q":
            queryString = a

    if webStr is None or queryString is None:
        print "Incorrect usage"
        sys.exit(-1)

    xsb = XSB()
    try:
        webStr = webStr.replace("<newline>", "\n")
        polStr = "\n".join([l for l in webStr.split("\n") if ":-" in l])
        policy = Policy.fromString(escapeCharacters(polStr))
        query = Atom.fromElements(Grammar.parseAtom(escapeCharacters(queryString)))
        policy.processPolicy()
        policy.checkQuery(query)
        xsb.loadPolicy(policy)
        print xsb.query(query)
        xsb.close()
    except Exception as e:
        print "Error:", e
        xsb.close()
        sys.exit(-1)
	def test_parser_code_nonterminal(self):
		s = "S -> A B C\n"
		s +="A -> a A | &\n"
		s +="B -> b B | A C d\n"
		s +="C -> c C | D\n"
		s +="D -> &"
		g = Grammar.text_to_grammar(s)
		r = RecursiveDescentParser(g)

		c = '''\
def S():
	global current_symbol
	if current_symbol in ['a', 'b', 'c', 'd']:
		A()
		B()
		C()
\t
	else:
		raise Exception('S',['a', 'b', 'c', 'd'],current_symbol)'''
		self.assertEqual(c.strip(),r._parser_code_nonterminal('S').strip())

		c = '''\
def A():
	global current_symbol
	if current_symbol in ['a']:
		if current_symbol == 'a':
			next_lexic_symbol()
		else:
			raise Exception('A','a',current_symbol)
		A()'''
		self.assertEqual(c.strip(),r._parser_code_nonterminal('A').strip())
	def verify_grammar_ll1(self):
		self.log('Verificando se a gramática é LL(1)')

		try:
			g = Grammar.text_to_grammar(self.ui.text_grammar.toPlainText())
		except Exception:
			QMessageBox.critical(self,'Erro durante criação da gramática','O texto que foi tentado a conversão para gramática não é válido')
			raise Exception('Erro durante criação da gramática','O texto que foi tentado a conversão para gramática não é válido')

		try:
			g.is_ll1(self.log)
			self.log('A gramática é LL(1)!')
			return True
		except Exception as err:
			if err.args[0] == 'LEFT_RECURSION':
				nts = ', '.join(err.args[1])
				QMessageBox.critical(self,'Recursão à esquerda','Os seguintes não terminais levam a uma recursão à esquerda:\n\t%s'%(nts))
				self.log('Recursão a esquerda encontrada encontrada nos não terminais: %s'%(nts))
				raise Exception('Recursão à esquerda','Os seguintes não terminais levam a uma recursão à esquerda: %s'%(nts))
			elif err.args[0] == 'LEFT_FACTORING':
				nts = ', '.join(err.args[1])
				QMessageBox.critical(self,'Fatoração à esquerda','Os seguintes não terminais não estão fatorados à esquerda:\n\t%s'%(nts))
				self.log('Não fatoração encontrada nos não terminais: %s'%(nts))
				raise Exception('Fatoração à esquerda','Os seguintes não terminais não estão fatorados à esquerda: %s'%(nts))
			elif err.args[0] == 'FIRST_FOLLOW_CONFLICT':
				nts = ', '.join(err.args[1])
				QMessageBox.critical(self,'Conflito first/follow','Houve conflito entre o first e o follow dos seguintes não terminais:\n\t%s'%(nts))
				self.log('Conflito first/follow encontrado nos não terminais: %s'%(nts))
				raise Exception('Conflito first/follow','Houve conflito entre o first e o follow dos seguintes não terminais: %s'%(nts))
			else:
				QMessageBox.critical(self,'Erro inesperado durante verificação LL(1)',err.__repr__())
				raise Exception('Erro inesperado durante verificação LL(1)',err.__repr__())
Beispiel #45
0
	def __init__(self, code = "zxx"):
		"""
		Create a lect object.
		A I{lect} is language variety; it can either be a spoken or a written form, and a colloquial, mediatic or standard form, and so on.

		It wraps serialization and high-level features.

		It contains three independent internal members:
			- L{lexicon<lexicon>}
			- L{grammar<grammar>}
			- L{inflections<inflection>}

		@type code: str
		@param code:
			A language code according to U{ISO<http://www.iso.org>} standard.

			For the language codes, refer to 639-3 specifications.

			A country/variety code and a representation system might be added: C{eng-US}, C{esp:ERG}, C{por-BR:IPA}
		"""
		self.code = code
		self.name = u""
		self.english_name = ""
		self.__p_o_s = ()
		self.__lemma_categories = {}
		self.__categories = {}
		self.grammar = Grammar(code)
		self.lexicon = Lexicon()
		self.inflections = Inflections()
		self.properties = {"separator" : " ", "capitalization" : "3"} #Lexical and Initials
	def test_parser_code_production(self):
		s = "S -> A B C\n"
		s +="A -> a A | &\n"
		s +="B -> b B | A C d\n"
		s +="C -> c C | &"
		g = Grammar.text_to_grammar(s)
		r = RecursiveDescentParser(g)

		c = '''\
	A()
	B()
	C()'''
		self.assertEqual(c.strip(),r._parser_code_production(Production('S','A B C'),'S').strip())

		c = '''\
	if current_symbol == 'a':
		next_lexic_symbol()
	else:
		raise Exception('A','a',current_symbol)
	A()'''
		self.assertEqual(c.strip(),r._parser_code_production(Production('A','a A'),'A').strip())

		c = '''\
	A()
	C()
	if current_symbol == 'd':
		next_lexic_symbol()
	else:
		raise Exception('B','d',current_symbol)'''
		self.assertEqual(c.strip(),r._parser_code_production(Production('B','A C d'),'B').strip())
Beispiel #47
0
 def fromString(self, string):
     policy=Policy()
     elements = Grammar.parsePolicy(string)
     for ruleElements in elements:
         rule = Rule.fromElements(ruleElements)
         policy.rules.append(rule)
     return policy
Beispiel #48
0
	def test_grammar_rules(self):
		grammar = Grammar("grammars/grammar1.json")

		# Check that the correct rules are returned.
		rule = grammar.get_rule("S", "a")
		self.assertEqual(rule, "aAb")

		rule = grammar.get_rule("A", "#")
		self.assertEqual(rule, "#")

		# Check nonexistent input symbol.
		rule = grammar.get_rule("S", "k")
		self.assertFalse(rule)

		# Check nonexistent variable.
		rule = grammar.get_rule("N", "a")
		self.assertFalse(rule)
def solve():
    G = Grammar()
    source = open("cky.txt",'r')
    G.generate(source)
    G.bnf2cnf()
    print "grammer==",G
    lexer= Telescope()
    balance=open('telescope','r')
    lexer.scanFile(balance)
    S=lexer.getStream()
    print "stream ===",S
    C=CYKChart()
    C.Build_CYK_Chart(G,S)
    print C
    genDot(C,"cky.dot")
    system("dot -Tjpg cky.dot -o cky.jpg")
    print "cky.jpg created"
Beispiel #50
0
def main(args):
    lexarname =None
    gramarspec =None
    inputfile =None
    outputtype =None
    outputfile =None
    argc= len(args)
    if argc == 1: 
        print "usage: main.py  lexarname, gramarspec, inputfile,[output-type] [output-file ]\n"
        return
    if argc > 1 : 
        lexarname = args[1]
    if argc > 2 :
        gramarspec = args[2]
    if argc > 3 : 
        inputfile = args[3]
    if argc > 4 :
        outputtype=args[4]
    else:
        outputtype="dot"
    if argc > 5 :
        outputfile = args[5]
    else:
        outputfile = inputfile
    G = Grammar()
    source = open(gramarspec,'r')
    G.generate(source)
    G.bnf2cnf()
    print "grammer==",G
    if sep in lexarname:
        lexarname = lexarname.replace(sep,".")
    lexerclass=__import__(lexarname)
    lexer=lexerclass.Lexer()
    lexer.scan(inputfile)
    S=lexer.getStream()
    print "stream ===",S
    C=CYKChart()
    C.Build_CYK_Chart(G,S)
    print C
    print C.graph
    if outputtype=="dot":
        genDot(C,outputfile)
        system("dot -Tjpg %s  -o %s "%(outputfile, outputfile)) # todo, see if dot takes STDIN so I can pipe this to it 
        print "%s generated"%(outputfile)
    elif outputtype=="js":
       genVIZ(C,outputfile)
Beispiel #51
0
def gramaticaAutomato(input):
    input = entrytext.get()
    l = LeitorG(input)
    dict, termi,nonter,ini = l.ler()
    g = Grammar(dict,termi,nonter,ini)
    s, inicial, final = g.convertGtoAF()
    a = Automato(s,inicial,final)
    a.printAtomato()
    a.writeAutomataToFile(input)

    input = input.replace('.in', '')
    data_file = open('../testes/'+input+'.out')
    data = data_file.read()
    data_file.close()
    test = Tk.Tk()
    Results = Tk.Label(test, text = data)
    Results.grid(row = 20, column = 3, sticky= Tk.W)
Beispiel #52
0
def automatoGramatica(input):
    l = Leitor(input)
    dict, ini, final = l.ler()
    a = Automato(dict,ini,final)

    prod,terminais,nonTerminais,inicial = a.automataToGrammar()
    g = Grammar(prod,terminais,nonTerminais,inicial)
    g.printGrammar()
    g.writeGrammarToFile(input)

    input = input.replace('.in', '')
    data_file = open('../testes/'+input+'.out')
    data = data_file.read()
    data_file.close()
    test = Tk.Tk()
    Results = Tk.Label(test, text = data)
    Results.grid(row = 20, column = 3, sticky= Tk.W)
    def get_grammar(self):
        print '-----------------  BEGIN regex_to_grammar -----------------\n'

        binary_op = ['|', '.']
        terminals = set(list(self.re))
        if '+' in terminals:
            terminals.remove('+')
        if '*' in terminals:
            terminals.remove('*')
        if '|' in terminals:
            terminals.remove('|')
        if '.' in terminals:
            terminals.remove('.')

        for i in self.re:

            if i in terminals:
                # print 'Stack=',self.stack ,'char', i
                self.stack.append(i)
            else:
                if i not in binary_op:
                    # print 'Stack=',self.stack ,'char', i
                    self.stack.append(
                        self.create_new_rules(i, self.stack.pop()))
                else:
                    # print 'Stack=',self.stack ,'char', i
                    dummy = self.create_new_rules(i, self.stack[-2:])
                    a = self.stack.pop()
                    b = self.stack.pop()

                    self.stack.append(dummy)

        self.dictionary['S'] = [('S', ''.join(self.stack.pop()))]
        # print "Dictionary/Grammar:", self.dictionary
        # print 'Terminals:', list(terminals)
        # print 'Non Terminals:', self.dictionary.keys()
        # print 'Number of rules:', len(self.dictionary)
        grammar_ = Grammar(
            set(self.dictionary.keys()), terminals, self.dictionary, 'S')
        
        print "Grammar: ",
        grammar_.print_grammar()
        print grammar_.grammar[2]
        print '\n-----------------  END regex_to_grammar -----------------\n\n'
        return grammar_
Beispiel #54
0
def main():
    pygame.midi.init()
    player= pygame.midi.Output(0)
    player.set_instrument(25,1)

    sounds = {'C': 48, 'F': 53,'G': 55}

    nonterminals = ['q', 'w']

    productions = {'q': [("CCCq", 1), ("FGw", 1)], 'w': [("FCCq", 1)]}

    gr = Grammar("q", nonterminals, productions)

    while True:
        gr.make_production()
        music = gr.show_word()
        for note in music:
            chord(player, sounds[note])
	def gr_fa_btn_clicked(self, table):
		st = self.ui.gr_text.toPlainText()
		if st == '':
			st = "S -> aS | a | bS | b"

		gr = Grammar.text_to_grammar(st)
		fa = gr.to_finite_automaton()

		self.add_fa_on_list("GR => FA", fa)
		self.set_fa_on_table(fa, table)
	def test_text_to_grammar(self):
		s = "S -> a"
		g = Grammar.text_to_grammar(s)
		self.assertEqual(g._productions, {Production('S','a')})

		s = "S -> b S | a"
		g = Grammar.text_to_grammar(s)
		self.assertEqual(g._productions, {Production('S','a'), Production('S',['b','S'])})

		s = "S1 -> b S1 a | a"
		g = Grammar.text_to_grammar(s)
		self.assertEqual(g._productions, {Production('S1','a'), Production('S1',['b','S1','a'])})

		s = "C -> if E then C C' | comando\n"
		s +="C' -> else C | &\n"
		s +="E -> exp"
		g = Grammar.text_to_grammar(s)
		self.assertEqual(g._productions, {Production("C","if E then C C'"), Production("C","comando"),\
		 Production("C'","else C"), Production("C'","&"), Production("E","exp")})
def solve():
    from os import system
    G = Grammar()
    source = open("infix.txt",'r')
    G.generate(source)
    G.bnf2cnf()
    print "grammer==",G
    lexer= Infix()
    balance=open('input.txt','r')
    lexer.scanFile(balance)
    S=lexer.getStream()
    print "stream ===",S
    C=CYKChart()
    C.Build_CYK_Chart(G,S)
    print C
    print C.graph
    genDot(C,"infix.dot")
    system("dot -Tjpg infix.dot -o infix.jpg")
    print "infix.jpg created"
def delete_useless_nonterminals(grammar):
	new_rules = list(grammar.rules)
	new_nonterminals = set(grammar.nonterminals)

	while True:
		useless_nonterminals = find_useless_nonterminals(new_rules)
		if not useless_nonterminals:
			break

		new_rules = delete_useless_nonterminals_from_rules(new_rules, useless_nonterminals)
		new_rules = delete_useless_nonterminals_rules(new_rules, useless_nonterminals)
		new_nonterminals.difference_update(useless_nonterminals) 

	new_grammar = Grammar()
	new_grammar.axiom = grammar.axiom
	new_grammar.terminals = set(grammar.terminals)
	new_grammar.nonterminals = new_nonterminals
	new_grammar.rules = new_rules
	return new_grammar