def main(args): for input_str in args.input: fsa = make_linear_fsa(input_str) cfg = make_grammar(fsa) parser = Earley(cfg, fsa) forest = parser.do('[S]', '[GOAL]') if not forest: print 'NO PARSE FOUND' continue new_rules = [] for rule in forest: if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)): new_rules.append( Rule(rule.lhs, reversed(rule.rhs), rule.log_prob)) [forest.add(rule) for rule in new_rules] print '# FOREST' print forest print if args.show_permutations: counts = count_derivations(forest, '[GOAL]') total = 0 for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k): print p, n total += n print len(counts['p'].keys()), total
def main(args): for input_str in args.input: fsa = make_linear_fsa(input_str) cfg = make_grammar(fsa) parser = Earley(cfg, fsa) forest = parser.do('[S]', '[GOAL]') if not forest: print 'NO PARSE FOUND' continue new_rules = [] for rule in forest: if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)): new_rules.append(Rule(rule.lhs, reversed(rule.rhs), rule.log_prob)) [forest.add(rule) for rule in new_rules] print '# FOREST' print forest print if args.show_permutations: counts = count_derivations(forest, '[GOAL]') total = 0 for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k): print p, n total += n print len(counts['p'].keys()), total
def main(args): wcfg = WCFG(read_grammar_rules(args.grammar)) #print 'GRAMMAR' #print wcfg for input_str in args.input: wfsa = make_linear_fsa(input_str) #print 'FSA' #print wfsa parser = Earley(wcfg, wfsa) forest = parser.do('[S]', '[GOAL]') if not forest: print 'NO PARSE FOUND' continue new_rules = [] for rule in forest: if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)): new_rules.append( Rule(rule.lhs, reversed(rule.rhs), rule.log_prob)) [forest.add(rule) for rule in new_rules] print '# FOREST' print forest print if args.show_permutations: print '# PERMUTATIONS' counts = count_derivations(forest, '[GOAL]') total = 0 for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k): print 'permutation=(%s) derivations=%d' % (' '.join( str(i) for i in p), n) total += n print 'permutations=%d derivations=%d' % (len( counts['p'].keys()), total) print
def earley(sentence): earley = Earley() #earley.readCFGRules(FilePath.ROOT + "rules_small.txt") earley.readCFGRules(FilePath.ROOT + "rules.txt") #earley.showRules() earley.parse(sentence) earley.showStates() print("") earley.traceRoot()
def test_predict(): earley = Earley('a') earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')] earley.predict(0) is_added = False to_add = Situation('S', 'a', 0, 0) for sit in earley.situations_dict[0]: if sit == to_add: is_added = True break assert is_added
def test_scan(): earley = Earley('a') earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')] earley.situations_dict[0].add(Situation('S', 'a', 0, 0)) earley.scan(0, 'a') is_added = False to_add = Situation('S', 'a', 0, 1) for sit in earley.situations_dict[1]: if sit == to_add: is_added = True break assert is_added
def test_intersection_weights(): # Load the grammar grammar = "/home/cxiao/pcfg_sampling/examples/cfg" grammarfmt = "bar" wcfg = load_grammar(grammar, grammarfmt, transform=float) # Construct the wdfsa wfsa = WDFSA() for word in wcfg.terminals: wfsa.add_arc(0, 0, make_terminal(word), 0.0) wfsa.add_arc(0, 0, make_terminal('dog'), -0.5) wfsa.make_initial(0) wfsa.make_final(0) # Intersection parser = Earley(wcfg, wfsa) forest = parser.do('[S]', '[GOAL]') if forest.get('[NN,0-0]')[1].log_prob == -1.7039: print "Succeed, the earley intersection correctly changes the weight for a unigram automata"
def get_forest(input_str, wcfg): wfsa = make_linear_fsa(input_str) # print 'FSA' # print wfsa parser = Earley(wcfg, wfsa) forest = parser.do('[S]', '[GOAL]') if not forest: return 'NO PARSE FOUND' new_rules = [] for rule in forest: if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)): new_rules.append(Rule(rule.lhs, reversed(rule.rhs), rule.log_prob)) [forest.add(rule) for rule in new_rules] return forest
def initialise(wcfg, wfsa, root, goal, intersection): """ Calculate a first derivation based on a simpler (thus smaller/faster) version of the grammar Thereby determining the initial conditions. Only applicable with the 'milos' grammar format, i.e. non-terminals have the form: '[P1234*2_1]' """ smaller = WCFG([]) logging.debug('Creating a smaller grammar for initial conditions...') for line in wcfg: if 0 < permutation_length(line.lhs) <= 2: smaller.add(line) elif line.lhs == root or line.lhs == '[UNK]': smaller.add(line) if intersection == 'nederhof': init_parser = Nederhof(smaller, wfsa) elif intersection == 'earley': init_parser = Earley(smaller, wfsa) else: raise NotImplementedError('I do not know this algorithm: %s' % intersection) logging.debug('Init Parsing...') init_forest = init_parser.do(root, goal) if not init_forest: print 'NO PARSE FOUND' return {} else: logging.debug('Forest: rules=%d', len(init_forest)) logging.debug('Init Topsorting...') # sort the forest sorted_nodes = top_sort(init_forest) # calculate the inside weight of the sorted forest logging.debug('Init Inside...') init_inside_prob = inside(init_forest, sorted_nodes) logging.debug('Init Sampling...') gen_sampling = GeneralisedSampling(init_forest, init_inside_prob) init_d = gen_sampling.sample(goal) return get_conditions(init_d)
def main(): word = input('Введите распознаваемое слово\n') earley = Earley(word) n = int(input('Введите количество правил в грамматике: ')) print('Введите правила грамматики в формате S -> aB') for i in range(n): parts = input(f'Правило {i + 1} из {n}: ').split() if len(parts) != 3: print("Неправильное правило") exit(1) earley.add_rule(Rule(parts[0], parts[2])) earley.add_rule(Rule('S#', 'S')) if earley.get_answer(): print("YES\n") else: print("NO\n")
def test_final_weights(): # Load the grammar grammar = "/home/cxiao/pcfg_sampling/examples/cfg" grammarfmt = "bar" wcfg = load_grammar(grammar, grammarfmt, transform=float) # Construct the wdfsa sentence = "the dog barks" wfsa = make_linear_fsa(sentence) # Intersection parser1 = Earley(wcfg, wfsa) forest1 = parser1.do('[S]', '[GOAL]') parser2 = Nederhof(wcfg, wfsa) forest2 = parser2.do('[S]', '[GOAL]') if forest1.get('[GOAL]')[0].log_prob == forest2.get( '[GOAL]')[0].log_prob == 0.0: print "Succeed, default final weight is 0.0 in log semiring" wfsa.make_final(len(sentence.split()), -0.5) parser1 = Earley(wcfg, wfsa) forest1 = parser1.do('[S]', '[GOAL]') parser2 = Nederhof(wcfg, wfsa) forest2 = parser2.do('[S]', '[GOAL]') if forest1.get('[GOAL]')[0].log_prob == forest2.get( '[GOAL]')[0].log_prob == -0.5: print "Succeed, change final weight to -0.5 in log semiring"
from earley import Earley from bottomup import BottomUp from topdown import TopDown import nltk import matplotlib.pyplot as plt sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt') sentences = nltk.parse.util.extract_test_sentences(sentences) #sentences.sort(lambda x,y: cmp(len(x[0]), len(y[0]))) #print sentences len_array = [] edges_array = [] time_earley_array = [] time_topdown_array = [] time_bottomup_array = [] for i in range(8): print "Iteration--->", i testsentence = sentences[i][0] len_array.append(len(testsentence)) ep = Earley() edges_array.append(ep.parse(testsentence)) time_earley_array.append(ep.time(testsentence)) bu = BottomUp() time_bottomup_array.append(bu.time(testsentence)) td = TopDown() time_topdown_array.append(td.time(testsentence)) plt.plot(len_array, time_earley_array, 'r--', len_array, time_topdown_array, 'b--') plt.show()
def test_get_answer(): earley = Earley('a') earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')] assert earley.get_answer() earley = Earley('ab') earley.rules_list = [Rule('U', 'S'), Rule('S', 'aA'), Rule('A', 'b')] assert earley.get_answer() earley = Earley('ac') earley.rules_list = [Rule('U', 'S'), Rule('S', 'aA'), Rule('A', 'b')] assert not earley.get_answer()
from earley import Earley if __name__ == '__main__': import sys import json if len(sys.argv) < 3: print 'Program needs to be invoked like this:\n' print 'python earleyParser.py grammar.json "sentence to parse"\n' print 'you can turn on the debug mode changing the debug variable to 1' print 'to see the whole process' print '\nExamples of phrases the program recognises:' print '--->I want to know about the subjects' print '--->tell me about the content' print '--->about prizing' print '--->enrollment' print '--->tell me about subjects' sys.exit() grammar_file = open(sys.argv[1]) words = sys.argv[2].split(' ') grammar = json.load(grammar_file) e = Earley() debug = 0 chart = e.earleyParse(words, grammar,debug)
def exact_sample(wcfg, wfsa, root='[S]', goal='[GOAL]', n=1, intersection='nederhof'): """ Sample a derivation given a wcfg and a wfsa, with exact sampling, a form of MC-sampling """ samples = [] if intersection == 'nederhof': parser = Nederhof(wcfg, wfsa) logging.info('Using Nederhof parser') elif intersection == 'earley': parser = Earley(wcfg, wfsa) logging.info('Using Earley parser') else: raise NotImplementedError('I do not know this algorithm: %s' % intersection) logging.debug('Parsing...') forest = parser.do(root, goal) if not forest: print 'NO PARSE FOUND' return False else: logging.debug('Forest: rules=%d', len(forest)) logging.debug('Topsorting...') # sort the forest sorted_nodes = top_sort(forest) # calculate the inside weight of the sorted forest logging.debug('Inside...') inside_prob = inside(forest, sorted_nodes) gen_sampling = GeneralisedSampling(forest, inside_prob) logging.debug('Sampling...') it = 0 while len(samples) < n: it += 1 if it % 10 == 0: logging.info('%d/%d', it, n) # retrieve a random derivation, with respect to the inside weight distribution d = gen_sampling.sample(goal) samples.append(d) counts = Counter(tuple(d) for d in samples) for d, n in counts.most_common(): score = sum(r.log_prob for r in d) prob = math.exp(score - inside_prob[goal]) print '# n=%s estimate=%s prob=%s score=%s' % ( n, float(n) / len(samples), prob, score) tree = make_nltk_tree(d) inline_tree = inlinetree(tree) print inline_tree, "\n"