예제 #1
0
def main(args):

    for input_str in args.input:
        fsa = make_linear_fsa(input_str)
        cfg = make_grammar(fsa)
        parser = Earley(cfg, fsa)
        forest = parser.do('[S]', '[GOAL]')
        if not forest:
            print 'NO PARSE FOUND'
            continue
        new_rules = []
        for rule in forest:
            if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
                new_rules.append(
                    Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
        [forest.add(rule) for rule in new_rules]
        print '# FOREST'
        print forest
        print

        if args.show_permutations:
            counts = count_derivations(forest, '[GOAL]')
            total = 0
            for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k):
                print p, n
                total += n
            print len(counts['p'].keys()), total
def main(args):

    for input_str in args.input:
        fsa = make_linear_fsa(input_str)
        cfg = make_grammar(fsa)
        parser = Earley(cfg, fsa)
        forest = parser.do('[S]', '[GOAL]')
        if not forest:
            print 'NO PARSE FOUND'
            continue
        new_rules = []
        for rule in forest:
            if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
                new_rules.append(Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
        [forest.add(rule) for rule in new_rules]
        print '# FOREST'
        print forest
        print

        if args.show_permutations:
            counts = count_derivations(forest, '[GOAL]')
            total = 0
            for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k):
                print p, n
                total += n
            print len(counts['p'].keys()), total
예제 #3
0
def main(args):
    wcfg = WCFG(read_grammar_rules(args.grammar))
    #print 'GRAMMAR'
    #print wcfg

    for input_str in args.input:
        wfsa = make_linear_fsa(input_str)
        #print 'FSA'
        #print wfsa
        parser = Earley(wcfg, wfsa)
        forest = parser.do('[S]', '[GOAL]')
        if not forest:
            print 'NO PARSE FOUND'
            continue
        new_rules = []
        for rule in forest:
            if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
                new_rules.append(
                    Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
        [forest.add(rule) for rule in new_rules]
        print '# FOREST'
        print forest
        print

        if args.show_permutations:
            print '# PERMUTATIONS'
            counts = count_derivations(forest, '[GOAL]')
            total = 0
            for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k):
                print 'permutation=(%s) derivations=%d' % (' '.join(
                    str(i) for i in p), n)
                total += n
            print 'permutations=%d derivations=%d' % (len(
                counts['p'].keys()), total)
            print
예제 #4
0
파일: main.py 프로젝트: Rigeru/NLP
def earley(sentence):
    earley = Earley()
    #earley.readCFGRules(FilePath.ROOT + "rules_small.txt")
    earley.readCFGRules(FilePath.ROOT + "rules.txt")
    #earley.showRules()
    earley.parse(sentence)
    earley.showStates()
    print("")
    earley.traceRoot()
예제 #5
0
def test_predict():
    earley = Earley('a')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')]
    earley.predict(0)

    is_added = False
    to_add = Situation('S', 'a', 0, 0)
    for sit in earley.situations_dict[0]:
        if sit == to_add:
            is_added = True
            break

    assert is_added
예제 #6
0
def test_scan():
    earley = Earley('a')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')]
    earley.situations_dict[0].add(Situation('S', 'a', 0, 0))
    earley.scan(0, 'a')

    is_added = False
    to_add = Situation('S', 'a', 0, 1)
    for sit in earley.situations_dict[1]:
        if sit == to_add:
            is_added = True
            break

    assert is_added
예제 #7
0
def test_intersection_weights():
    # Load the grammar
    grammar = "/home/cxiao/pcfg_sampling/examples/cfg"
    grammarfmt = "bar"
    wcfg = load_grammar(grammar, grammarfmt, transform=float)
    # Construct the wdfsa
    wfsa = WDFSA()
    for word in wcfg.terminals:
        wfsa.add_arc(0, 0, make_terminal(word), 0.0)
    wfsa.add_arc(0, 0, make_terminal('dog'), -0.5)
    wfsa.make_initial(0)
    wfsa.make_final(0)
    # Intersection
    parser = Earley(wcfg, wfsa)
    forest = parser.do('[S]', '[GOAL]')
    if forest.get('[NN,0-0]')[1].log_prob == -1.7039:
        print "Succeed, the earley intersection correctly changes the weight for a unigram automata"
예제 #8
0
def get_forest(input_str, wcfg):
    wfsa = make_linear_fsa(input_str)

    # print 'FSA'
    # print wfsa

    parser = Earley(wcfg, wfsa)
    forest = parser.do('[S]', '[GOAL]')
    if not forest:
        return 'NO PARSE FOUND'

    new_rules = []
    for rule in forest:
        if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
            new_rules.append(Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
    [forest.add(rule) for rule in new_rules]

    return forest
예제 #9
0
def initialise(wcfg, wfsa, root, goal, intersection):
    """
    Calculate a first derivation based on a simpler (thus smaller/faster) version of the grammar
    Thereby determining the initial conditions.
    Only applicable with the 'milos' grammar format, i.e. non-terminals have the form: '[P1234*2_1]'
    """
    smaller = WCFG([])

    logging.debug('Creating a smaller grammar for initial conditions...')
    for line in wcfg:
        if 0 < permutation_length(line.lhs) <= 2:
            smaller.add(line)
        elif line.lhs == root or line.lhs == '[UNK]':
            smaller.add(line)

    if intersection == 'nederhof':
        init_parser = Nederhof(smaller, wfsa)
    elif intersection == 'earley':
        init_parser = Earley(smaller, wfsa)
    else:
        raise NotImplementedError('I do not know this algorithm: %s' %
                                  intersection)

    logging.debug('Init Parsing...')
    init_forest = init_parser.do(root, goal)

    if not init_forest:
        print 'NO PARSE FOUND'
        return {}
    else:
        logging.debug('Forest: rules=%d', len(init_forest))

        logging.debug('Init Topsorting...')
        # sort the forest
        sorted_nodes = top_sort(init_forest)

        # calculate the inside weight of the sorted forest
        logging.debug('Init Inside...')
        init_inside_prob = inside(init_forest, sorted_nodes)

        logging.debug('Init Sampling...')
        gen_sampling = GeneralisedSampling(init_forest, init_inside_prob)
        init_d = gen_sampling.sample(goal)

    return get_conditions(init_d)
예제 #10
0
def main():
    word = input('Введите распознаваемое слово\n')

    earley = Earley(word)

    n = int(input('Введите количество правил в грамматике: '))
    print('Введите правила грамматики в формате S -> aB')
    for i in range(n):
        parts = input(f'Правило {i + 1} из {n}: ').split()
        if len(parts) != 3:
            print("Неправильное правило")
            exit(1)
        earley.add_rule(Rule(parts[0], parts[2]))
    earley.add_rule(Rule('S#', 'S'))

    if earley.get_answer():
        print("YES\n")
    else:
        print("NO\n")
예제 #11
0
def test_final_weights():
    # Load the grammar
    grammar = "/home/cxiao/pcfg_sampling/examples/cfg"
    grammarfmt = "bar"
    wcfg = load_grammar(grammar, grammarfmt, transform=float)
    # Construct the wdfsa
    sentence = "the dog barks"
    wfsa = make_linear_fsa(sentence)
    # Intersection
    parser1 = Earley(wcfg, wfsa)
    forest1 = parser1.do('[S]', '[GOAL]')
    parser2 = Nederhof(wcfg, wfsa)
    forest2 = parser2.do('[S]', '[GOAL]')
    if forest1.get('[GOAL]')[0].log_prob == forest2.get(
            '[GOAL]')[0].log_prob == 0.0:
        print "Succeed, default final weight is 0.0 in log semiring"
    wfsa.make_final(len(sentence.split()), -0.5)
    parser1 = Earley(wcfg, wfsa)
    forest1 = parser1.do('[S]', '[GOAL]')
    parser2 = Nederhof(wcfg, wfsa)
    forest2 = parser2.do('[S]', '[GOAL]')
    if forest1.get('[GOAL]')[0].log_prob == forest2.get(
            '[GOAL]')[0].log_prob == -0.5:
        print "Succeed, change final weight to -0.5 in log semiring"
예제 #12
0
파일: test.py 프로젝트: SaptakS/nlp-demo
from earley import Earley
from bottomup import BottomUp
from topdown import TopDown
import nltk
import matplotlib.pyplot as plt

sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
sentences = nltk.parse.util.extract_test_sentences(sentences)
#sentences.sort(lambda x,y: cmp(len(x[0]), len(y[0])))
#print sentences
len_array = []
edges_array = []
time_earley_array = []
time_topdown_array = []
time_bottomup_array = []
for i in range(8):
    print "Iteration--->", i
    testsentence = sentences[i][0]
    len_array.append(len(testsentence))
    ep = Earley()
    edges_array.append(ep.parse(testsentence))
    time_earley_array.append(ep.time(testsentence))
    bu = BottomUp()
    time_bottomup_array.append(bu.time(testsentence))
    td = TopDown()
    time_topdown_array.append(td.time(testsentence))

plt.plot(len_array, time_earley_array, 'r--', len_array, time_topdown_array,
         'b--')
plt.show()
예제 #13
0
파일: test.py 프로젝트: SaptakS/nlp-demo
from earley import Earley
from bottomup import BottomUp
from topdown import TopDown
import nltk
import matplotlib.pyplot as plt

sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
sentences = nltk.parse.util.extract_test_sentences(sentences)
#sentences.sort(lambda x,y: cmp(len(x[0]), len(y[0])))
#print sentences
len_array = []
edges_array = []
time_earley_array = []
time_topdown_array = []
time_bottomup_array = []
for i in range(8):
	print "Iteration--->", i
	testsentence = sentences[i][0]
	len_array.append(len(testsentence))
	ep = Earley()
	edges_array.append(ep.parse(testsentence))
	time_earley_array.append(ep.time(testsentence))
	bu = BottomUp()
	time_bottomup_array.append(bu.time(testsentence))
	td = TopDown()
	time_topdown_array.append(td.time(testsentence))

plt.plot(len_array, time_earley_array, 'r--', len_array, time_topdown_array, 'b--')
plt.show()
예제 #14
0
def test_get_answer():
    earley = Earley('a')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')]
    assert earley.get_answer()

    earley = Earley('ab')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'aA'), Rule('A', 'b')]
    assert earley.get_answer()

    earley = Earley('ac')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'aA'), Rule('A', 'b')]
    assert not earley.get_answer()
예제 #15
0
from earley import Earley

if __name__ == '__main__':
  import sys
  import json
  
  if len(sys.argv) < 3:
    print 'Program needs to be invoked like this:\n'
    print 'python earleyParser.py grammar.json "sentence to parse"\n'
    print 'you can turn on the debug mode changing the debug variable to 1'
    print 'to see the whole process'
    print '\nExamples of phrases the program recognises:'
    print '--->I want to know about the subjects'
    print '--->tell me about the content'
    print '--->about prizing'
    print '--->enrollment'
    print '--->tell me about subjects'
    sys.exit()

  grammar_file = open(sys.argv[1])
  words = sys.argv[2].split(' ')
  grammar = json.load(grammar_file)
  
  e = Earley()
  debug = 0
  chart = e.earleyParse(words, grammar,debug)

예제 #16
0
def exact_sample(wcfg,
                 wfsa,
                 root='[S]',
                 goal='[GOAL]',
                 n=1,
                 intersection='nederhof'):
    """
    Sample a derivation given a wcfg and a wfsa, with exact sampling, a
    form of MC-sampling
    """
    samples = []

    if intersection == 'nederhof':
        parser = Nederhof(wcfg, wfsa)
        logging.info('Using Nederhof parser')
    elif intersection == 'earley':
        parser = Earley(wcfg, wfsa)
        logging.info('Using Earley parser')
    else:
        raise NotImplementedError('I do not know this algorithm: %s' %
                                  intersection)

    logging.debug('Parsing...')
    forest = parser.do(root, goal)

    if not forest:
        print 'NO PARSE FOUND'
        return False
    else:

        logging.debug('Forest: rules=%d', len(forest))

        logging.debug('Topsorting...')
        # sort the forest
        sorted_nodes = top_sort(forest)

        # calculate the inside weight of the sorted forest
        logging.debug('Inside...')
        inside_prob = inside(forest, sorted_nodes)

        gen_sampling = GeneralisedSampling(forest, inside_prob)

        logging.debug('Sampling...')
        it = 0
        while len(samples) < n:
            it += 1
            if it % 10 == 0:
                logging.info('%d/%d', it, n)

            # retrieve a random derivation, with respect to the inside weight distribution
            d = gen_sampling.sample(goal)

            samples.append(d)

        counts = Counter(tuple(d) for d in samples)
        for d, n in counts.most_common():
            score = sum(r.log_prob for r in d)
            prob = math.exp(score - inside_prob[goal])
            print '# n=%s estimate=%s prob=%s score=%s' % (
                n, float(n) / len(samples), prob, score)
            tree = make_nltk_tree(d)
            inline_tree = inlinetree(tree)
            print inline_tree, "\n"