Esempi in Python per ViterbiParser.trace, esempi in Python per nltk.parse.ViterbiParser.trace

Esempio n. 1

0

Mostra file

File: symbolic.py Progetto: SkittlePox/2951-Final-Project

def main():
    # print(nltk.corpus.treebank.parsed_sents('wsj_0001.mrg')[0])
    # nltk.corpus.treebank.parsed_sents('wsj_0001.mrg')[0].draw()

    # print("Induce PCFG grammar from treebank data:")
    #
    productions = []
    print(len(treebank.fileids()))
    for item in treebank.fileids(): # Goes through all trees
      for tree in treebank.parsed_sents(item):
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)# Remove branches A-B-C into A-B+C
        tree.chomsky_normal_form(horzMarkov = 2)# Remove A->(B,C,D) into A->B,C+D->D
        productions += tree.productions()
    # #
    # # print(type(productions[0]))
    # #
    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    # # # print(grammar)    # This is a PCFG
    # pickle.dump(grammar, open("tbank-grammar.p", "wb"))
    # t = time.time()
    # grammar = pickle.load(open("tbank-grammar.p", "rb"))
    # textf = open("lexicon.txt", "w")
    # n = textf.write(str(reduce(lambda a, b: a + "\n" + b, list(filter(lambda x: "'" in x, str(grammar).split("\n"))))))
    # textf.close()
    # print(time.time()-t)
    parser = ViterbiParser(grammar)
    # pickle.dump(parser, open("cky-parser.p", "wb"))
    # parser = pickle.load(open("cky-parser.p", "rb"))
    parser.trace(0)
    sent = "John will join the board"
    tokens = sent.split()

    try:
        grammar.check_coverage(tokens)
        print("All words covered")
        parses = parser.parse_all(tokens)
        if parses:
            lp = len(parses)
            print(lp)
            print(parses[0].label())
            # parses[0].draw()
            p = reduce(lambda a,b:a+b.prob(), list(filter(lambda x: x.label() == 'S', parses)), 0.0)
        else:
            p = 0

        print("Probability:", p)
    except:
        print("Some words not covered")

Esempio n. 2

0

Mostra file

File: viterbi.py Progetto: dieuwkehupkes/Thesis

def test():
    """
	A test to check if the changes I made have the intended
	effect
	"""
    import nltk
    from nltk.parse import ViterbiParser
    sent = 'I saw the man with my telescope'
    tokens = sent.split()
    grammar = nltk.toy_pcfg1
    parser = ViterbiParser(grammar)
    parser.trace(3)
    parses = parser.nbest_parse(tokens)
    print(parses)

Esempio n. 3

0

Mostra file

File: viterbi.py Progetto: dieuwkehupkes/Thesis

def test():
	"""
	A test to check if the changes I made have the intended
	effect
	"""
	import nltk	
	from nltk.parse import ViterbiParser
	sent = 'I saw the man with my telescope'
	tokens = sent.split()
	grammar = nltk.toy_pcfg1
	parser = ViterbiParser(grammar)
	parser.trace(3)
	parses = parser.nbest_parse(tokens)
	print(parses)

Esempio n. 4

0

Mostra file

File: induce_pcfg.py Progetto: pramodkaushik/nlu-hmrnn

    def parse_sentence(self, sent):
        """
        Parse sent using induced grammar
        Visualize the most likely parse tree for sent
        :return: None. Save parsing results to pcfg.txt
        """
        if self.grammar is None:
            raise ValueError("PCFG hasn't been induced yet.")
        # other parser option(s): e.g., parser = pchart.InsideChartParser(self.grammar)
        parser = ViterbiParser(self.grammar)
        parser.trace(3)

        # http://www.nltk.org/api/nltk.parse.html
        sys.stdout = open('pcfg.txt', 'w')
        parses = parser.parse(sent)
        for parse in parses:
            print(parse)
            # visualize the tree:
            print(parse.draw())

Esempio n. 5

0

Mostra file

File: symbolic.py Progetto: SkittlePox/2951-Final-Project

def Parser_Section():
    demos = [('I saw John through the telescope', toy_pcfg1)]
    sent, grammar = demos[0]
    # print(grammar)

    # Tokenize the sentence.
    tokens = sent.split()
    parser = ViterbiParser(grammar)

    parser.trace(0) # Use this to change verbosity
    t = time.time()
    parses = parser.parse_all(tokens)
    print("Time:", time.time()-t)

    if parses:
        lp = len(parses)
        p = reduce(lambda a,b:a+b.prob(), parses, 0.0)
    else:
        p = 0

    print("Probability:", p)

Esempio n. 6

0

Mostra file

File: viterbi.py Progetto: AirJunda/cs114spring2019hw

def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    from nltk import tokenize
    from nltk.parse import ViterbiParser
    from nltk.grammar import toy_pcfg1, toy_pcfg2

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [
        ('I saw the man with my telescope', toy_pcfg1),
        ('the boy saw Jack with Bob under the table with a telescope',
         toy_pcfg2),
    ]

    # Ask the user which demo they want to use.
    print()
    for i in range(len(demos)):
        print('%3s: %s' % (i + 1, demos[i][0]))
        print('     %r' % demos[i][1])
        print()
    print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ')
    try:
        snum = int(sys.stdin.readline().strip()) - 1
        sent, grammar = demos[snum]
    except:
        print('Bad sentence number')
        return

    # Tokenize the sentence.
    tokens = sent.split()

    parser = ViterbiParser(grammar)
    all_parses = {}

    print('\nsent: %s\nparser: %s\ngrammar: %s' % (sent, parser, grammar))
    parser.trace(3)
    t = time.time()
    parses = parser.parse_all(tokens)
    time = time.time() - t
    average = (reduce(lambda a, b: a + b.prob(), parses, 0) /
               len(parses) if parses else 0)
    num_parses = len(parses)
    for p in parses:
        all_parses[p.freeze()] = 1

    # Print some summary statistics
    print()
    print('Time (secs)   # Parses   Average P(parse)')
    print('-----------------------------------------')
    print('%11.4f%11d%19.14f' % (time, num_parses, average))
    parses = all_parses.keys()
    if parses:
        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
    else:
        p = 0
    print('------------------------------------------')
    print('%11s%11d%19.14f' % ('n/a', len(parses), p))

    # Ask the user if we should draw the parses.
    print()
    print('Draw parses (y/n)? ', end=' ')
    if sys.stdin.readline().strip().lower().startswith('y'):
        from nltk.draw.tree import draw_trees

        print('  please wait...')
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print()
    print('Print parses (y/n)? ', end=' ')
    if sys.stdin.readline().strip().lower().startswith('y'):
        for parse in parses:
            print(parse)

Esempio n. 7

0

Mostra file

File: viterbi.py Progetto: B-Rich/Fem-Coding-Challenge

def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    import nltk
    from nltk import tokenize
    from nltk.parse import ViterbiParser

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [('I saw the man with my telescope', nltk.toy_pcfg1),
             ('the boy saw Jack with Bob under the table with a telescope', nltk.toy_pcfg2)]

    # Ask the user which demo they want to use.
    print
    for i in range(len(demos)):
        print '%3s: %s' % (i+1, demos[i][0])
        print '     %r' % demos[i][1]
        print
    print 'Which demo (%d-%d)? ' % (1, len(demos)),
    try:
        snum = int(sys.stdin.readline().strip())-1
        sent, grammar = demos[snum]
    except:
        print 'Bad sentence number'
        return

    # Tokenize the sentence.
    tokens = sent.split()

    parser = ViterbiParser(grammar)
    all_parses = {}

    print '\nsent: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar)
    parser.trace(3)
    t = time.time()
    parses = parser.nbest_parse(tokens)
    time = time.time()-t
    if parses:
        average = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
    else:
        average = 0
    num_parses = len(parses)
    for p in parses:
        all_parses[p.freeze()] = 1

    # Print some summary statistics
    print
    print 'Time (secs)   # Parses   Average P(parse)'
    print '-----------------------------------------'
    print '%11.4f%11d%19.14f' % (time, num_parses, average)
    parses = all_parses.keys()
    if parses:
        p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
    else: p = 0
    print '------------------------------------------'
    print '%11s%11d%19.14f' % ('n/a', len(parses), p)

    # Ask the user if we should draw the parses.
    print
    print 'Draw parses (y/n)? ',
    if sys.stdin.readline().strip().lower().startswith('y'):
        from nltk.draw.tree import draw_trees
        print '  please wait...'
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print
    print 'Print parses (y/n)? ',
    if sys.stdin.readline().strip().lower().startswith('y'):
        for parse in parses:
            print parse

Esempio n. 8

0

Mostra file

File: Preprocess.py Progetto: SkittlePox/2951-Final-Project

def create_viterbi_parser(grammar, pickle_it=False, filename="viterbi"):
    parser = ViterbiParser(grammar)
    parser.trace(0)
    if pickle_it:
        pickle.dump(parser, open("%s%s-parser.p" % (var_dir, filename), "wb"))
    return parser

Esempio n. 9

0

Mostra file

File: NLP_all_sentences_AR.py Progetto: OMARI1988/robot_simulation_modified

def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    from nltk import tokenize
    from nltk.parse import ViterbiParser
    from nltk.grammar import toy_pcfg1, toy_pcfg2
    from nltk.draw.tree import draw_trees
    from nltk import Tree
    from nltk.draw.util import CanvasFrame
    from nltk.draw import TreeWidget

    # Define two demos.  Each demo has a sentence and a grammar.
    # demos = [('move the green sphere to the bottom left corner', learned_pcfg),
    #          ('move the green ball over the red block', learned_pcfg),
    #          ('take the green pyramid and put it in the top left corner', learned_pcfg),
    #           ('put the green pyramid on the red block', learned_pcfg),
    #           ('move the red cylinder and place it on top of the blue cylinder that is on top of a green cylinder', learned_pcfg),]

    # Ask the user which demo they want to use.
    # print()
    # for i in range(len(demos)):
    #     print('%3s: %s' % (i+1, demos[i][0]))
    #     print('     %r' % demos[i][1])
    #     print()
    # print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ')
    # try:
    #     snum = int(sys.stdin.readline().strip())-1
    #     sent, grammar = demos[snum]
    # except:
    #     print('Bad sentence number')
    #     return

    max_scene = 1

    if max_scene<10:            sc = '0000'+str(max_scene)
    elif max_scene<100:         sc = '000'+str(max_scene)
    elif max_scene<1000:        sc = '00'+str(max_scene)
    elif max_scene<10000:       sc = '0'+str(max_scene)

    g = 'grammar_'+sc+'.txt'
    learned_pcfg = load('/home/omari/Dropbox/robot_modified/AR/grammar/'+g)
    grammar = learned_pcfg

    file1 = open('/home/omari/Dropbox/robot_modified/AR/hypotheses/matched_commands.txt', 'r')
    g1 = [i for i in file1.readlines()]
    for line in g1:
        line = unicode(line,encoding='utf-8')
        sent = line.split('\n')[0].split('-')[-1]
        scene = line.split('\n')[0].split('-')[0]
        sent_num = line.split('\n')[0].split('-')[1]
        print(line)
        if scene == '239' and sent_num == '0':  continue


        # Tokenize the sentence.
        tokens = sent.split()

        parser = ViterbiParser(grammar)
        all_parses = {}

        # print('\nsent: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar))
        parser.trace(3)
        parses = parser.parse_all(tokens)
        average = (reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
                   if parses else 0)
        num_parses = len(parses)
        for p in parses:
            all_parses[p.freeze()] = 1

        # Print some summary statistics
        # print()
        # print('Time (secs)   # Parses   Average P(parse)')
        # print('-----------------------------------------')
        # print('%11.4f%11d%19.14f' % (time, num_parses, average))
        parses = all_parses.keys()
        if parses:
            p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
        else: p = 0
        # print('------------------------------------------')
        # print('%11s%11d%19.14f' % ('n/a', len(parses), p))

        # Ask the user if we should draw the parses.
        # print()
        # print('Draw parses (y/n)? ', end=' ')
        # if sys.stdin.readline().strip().lower().startswith('y'):

        #     print('  please wait...')
        # draw_trees(*parses)

        cf = CanvasFrame()
        # t = Tree(parses)
        t = Tree.fromstring('(S  (CH_POS_PREPOST move)  (PRE_POST    (PRE      (the the)      (_entity (F_HSV green) (F_SHAPE sphere)))    (PREPOST_connect (to to) (the the))    (POST      (_F_POS (F_POS (_bottom_left (bottom bottom) (left left)))) (corner corner))))')

        tc = TreeWidget(cf.canvas(), t, draggable=1,
                        node_font=('helvetica', -14),
                        leaf_font=('helvetica', -12),
                        roof_fill='white', roof_color='black',
                        leaf_color='green4', node_color='blue4')
        cf.add_widget(tc,10,10)

        # tc = TreeWidget(cf.canvas(),t)
        # cf.add_widget(tc,10,10) # (10,10) offsets
        cf.print_to_file('/home/omari/Dropbox/robot_modified/trees/scene-'+scene+'-'+sent_num+'.ps')
        cf.destroy()

Esempio n. 10

0

Mostra file

File: parser.py Progetto: xiangnanyue/parser

############ create PCFG from the productions #######
from nltk import Nonterminal
from nltk import induce_pcfg

S = Nonterminal('SENT')
grammar = induce_pcfg(S, productions)
print(grammar)

######### Parser with CYK dynamic algorithm ########
from nltk.parse import pchart
from nltk.parse import ViterbiParser
from nltk.treetransforms import un_chomsky_normal_form

parser = ViterbiParser(grammar)
parser.trace(2)

parses_bank = []

test_file = open(args.test_dir, 'wb')
test_output_file = open(args.output_dir, 'wb')

for i in range(valid_idx, test_idx + 1):
    # take the leaves of each tree of testset and store
    # them in the test file
    tokens = treebank[i][0].leaves()
    sentence = u" ".join(tokens)
    test_file.write((sentence + u"\n").encode('utf-8'))

    print 'parsing :', sentence
    # we will use lexicon knowledge to replace the

Esempio n. 11

0

Mostra file

def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys
    import time

    from nltk import tokenize
    from nltk.grammar import PCFG
    from nltk.parse import ViterbiParser

    toy_pcfg1 = PCFG.fromstring("""
    S -> NP VP [1.0]
    NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
    Det -> 'the' [0.8] | 'my' [0.2]
    N -> 'man' [0.5] | 'telescope' [0.5]
    VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
    V -> 'ate' [0.35] | 'saw' [0.65]
    PP -> P NP [1.0]
    P -> 'with' [0.61] | 'under' [0.39]
    """)

    toy_pcfg2 = PCFG.fromstring("""
    S    -> NP VP         [1.0]
    VP   -> V NP          [.59]
    VP   -> V             [.40]
    VP   -> VP PP         [.01]
    NP   -> Det N         [.41]
    NP   -> Name          [.28]
    NP   -> NP PP         [.31]
    PP   -> P NP          [1.0]
    V    -> 'saw'         [.21]
    V    -> 'ate'         [.51]
    V    -> 'ran'         [.28]
    N    -> 'boy'         [.11]
    N    -> 'cookie'      [.12]
    N    -> 'table'       [.13]
    N    -> 'telescope'   [.14]
    N    -> 'hill'        [.5]
    Name -> 'Jack'        [.52]
    Name -> 'Bob'         [.48]
    P    -> 'with'        [.61]
    P    -> 'under'       [.39]
    Det  -> 'the'         [.41]
    Det  -> 'a'           [.31]
    Det  -> 'my'          [.28]
    """)

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [
        ("I saw the man with my telescope", toy_pcfg1),
        ("the boy saw Jack with Bob under the table with a telescope",
         toy_pcfg2),
    ]

    # Ask the user which demo they want to use.
    print()
    for i in range(len(demos)):
        print(f"{i + 1:>3}: {demos[i][0]}")
        print("     %r" % demos[i][1])
        print()
    print("Which demo (%d-%d)? " % (1, len(demos)), end=" ")
    try:
        snum = int(sys.stdin.readline().strip()) - 1
        sent, grammar = demos[snum]
    except:
        print("Bad sentence number")
        return

    # Tokenize the sentence.
    tokens = sent.split()

    parser = ViterbiParser(grammar)
    all_parses = {}

    print(f"\nsent: {sent}\nparser: {parser}\ngrammar: {grammar}")
    parser.trace(3)
    t = time.time()
    parses = parser.parse_all(tokens)
    time = time.time() - t
    average = (reduce(lambda a, b: a + b.prob(), parses, 0) /
               len(parses) if parses else 0)
    num_parses = len(parses)
    for p in parses:
        all_parses[p.freeze()] = 1

    # Print some summary statistics
    print()
    print("Time (secs)   # Parses   Average P(parse)")
    print("-----------------------------------------")
    print("%11.4f%11d%19.14f" % (time, num_parses, average))
    parses = all_parses.keys()
    if parses:
        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
    else:
        p = 0
    print("------------------------------------------")
    print("%11s%11d%19.14f" % ("n/a", len(parses), p))

    # Ask the user if we should draw the parses.
    print()
    print("Draw parses (y/n)? ", end=" ")
    if sys.stdin.readline().strip().lower().startswith("y"):
        from nltk.draw.tree import draw_trees

        print("  please wait...")
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print()
    print("Print parses (y/n)? ", end=" ")
    if sys.stdin.readline().strip().lower().startswith("y"):
        for parse in parses:
            print(parse)

Esempio n. 12

0

Mostra file

File: NLP_AR.py Progetto: OMARI1988/robot_simulation_modified

def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    from nltk import tokenize
    from nltk.parse import ViterbiParser
    from nltk.grammar import toy_pcfg1, toy_pcfg2

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [('حرك الكرة الخضراء في أسفل الزاوية اليسرى', learned_pcfg),
             ('حرك الكرة', learned_pcfg),
             ('take the green pyramid and put it in the top left corner', learned_pcfg),
              ('move the pink triangle on top of the black square', learned_pcfg),
              ('move the red block and place it on top of the blue block that is on top of a green block', learned_pcfg),
              ('move the green block on top of the blue block', learned_pcfg)]

    # Ask the user which demo they want to use.
    print()
    for i in range(len(demos)):
        print('%3s: %s' % (i+1, demos[i][0]))
        # print('     %r' % demos[i][1])
        print()
    print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ')
    try:
        snum = int(sys.stdin.readline().strip())-1
        sent, grammar = demos[snum]
    except:
        print('Bad sentence number')
        return

    # Tokenize the sentence.
    tokens = sent.split()
    print(grammar)

    parser = ViterbiParser(grammar)
    all_parses = {}

    # print('\nsent: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar))
    parser.trace(3)
    t = time.time()
    parses = parser.parse_all(tokens)
    time = time.time()-t
    average = (reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
               if parses else 0)
    num_parses = len(parses)
    for p in parses:
        all_parses[p.freeze()] = 1

    # Print some summary statistics
    print()
    print('Time (secs)   # Parses   Average P(parse)')
    print('-----------------------------------------')
    print('%11.4f%11d%19.14f' % (time, num_parses, average))
    parses = all_parses.keys()
    if parses:
        p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
    else: p = 0
    print('------------------------------------------')
    print('%11s%11d%19.14f' % ('n/a', len(parses), p))

    # Ask the user if we should draw the parses.
    print()
    print('Draw parses (y/n)? ', end=' ')
    if sys.stdin.readline().strip().lower().startswith('y'):
        from nltk.draw.tree import draw_trees
        print('  please wait...')
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print()
    print('Print parses (y/n)? ', end=' ')
    if sys.stdin.readline().strip().lower().startswith('y'):
        for parse in parses:
            print(parse)

Esempio n. 13

0

Mostra file

File: music_grammar.py Progetto: bigpianist/PCFGMelodicReduction

def parseAllTestXmls(fileList, grammar, allTestSolutionsDict, verbose=False, displayTrees=False):
	testPitchLists = []
	testIntervalLists = []
	totalCorrect = 0
	totalCorrectNonN = 0
	totalProductions = 0
	totalLeaves = 0
	parseTreeStrings = {}
	for filepath in fileList:
		curPitchList = getPitchListFromFile(filepath)
		testPitchLists.append(curPitchList)
		testIntervalLists.append(getIntervalStringsFromPitchList(curPitchList, verbose))
		if verbose:
			print(testIntervalLists[-1])
		listLen = len(testIntervalLists[-1])
		if verbose:
			print(tree)
		parser = ViterbiParser(grammar)
		if verbose:
			parser.trace(0)#3
		else:
			parser.trace(0)
		try:
			parses = parser.parse_all(testIntervalLists[-1])
		except Exception as errorMsg:
			print("error parsing file " + filepath)
			print(errorMsg)
		numTrees = sum(1 for _ in parses)
		if numTrees > 0 and displayTrees == True:
			from nltk.draw.tree import draw_trees
			draw_trees(*parses)
		if numTrees == 0:
			print("Couldn't find a valid parse, this is bad, very very bad")
			return 0,0
		numCorrect = 0
		numCorrectNonN = 0
		bottomCorrect = 0
		bottomCorrectNonN = 0
		solutionTree = None
		try:
			solutionTreeStr = allTestSolutionsDict[filepath]
			solutionTree = Tree.fromstring(solutionTreeStr)
		except Exception as errorMsg:
			print("couldn't find solution for file " + filepath)
			print(errorMsg)
		if solutionTree != None and solutionTree != '':
			parseTreeStrings[filepath] = str(parses[0])
			numCorrect, numCorrectNonN = validate_tree.compareTrees(solutionTree, parses[0])
			numProductions = len(solutionTree.productions())
			totalProductions += numProductions

			#solutionTree.draw()
			#parses[0].draw()
			bottomCorrect, bottomCorrectNonN = validate_tree.compareTreesBottomUp(solutionTree, parses[0])
			parseTreeStrings[filepath+'_afterComparison'] = str(parses[0])
			totalLeaves += len(solutionTree.leaves())
			#parses[0].draw()

		totalCorrect += bottomCorrect
		totalCorrect += numCorrect
		totalCorrectNonN += numCorrectNonN
		totalCorrectNonN += bottomCorrectNonN
	return totalCorrect, totalCorrectNonN, totalProductions, totalLeaves, parseTreeStrings

Esempio n. 14

0

Mostra file

    print(t)
### try RandomChartParser, UnsortedChartParser, LongestChartParser

# In[ ]:

parser = nltk.parse.EarleyChartParser(grammar)
for t in parser.parse(tokens):
    print(t)

# In[ ]:

### CYK parser gets the most probable parse
from nltk.parse import ViterbiParser

parser = ViterbiParser(grammar)
parser.trace(3)
parsed_sent = list(parser.parse_all(tokens))  # to convert generator to list
parsed_sent[0].draw()
for t in parsed_sent:
    print(t)

# In[ ]:

### CYK parser gets the most probable parse
from nltk.parse import ViterbiParser

parser = ViterbiParser(grammar)
parser.trace(3)
parsed_sent = list(parser.parse(tokens))  # to convert generator to list
parsed_sent[0].draw()
for t in parsed_sent:

Esempio n. 15

0

Mostra file

def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys
    import time

    from nltk import tokenize
    from nltk.grammar import toy_pcfg1, toy_pcfg2
    from nltk.parse import ViterbiParser

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [
        ("I saw the man with my telescope", toy_pcfg1),
        ("the boy saw Jack with Bob under the table with a telescope",
         toy_pcfg2),
    ]

    # Ask the user which demo they want to use.
    print()
    for i in range(len(demos)):
        print(f"{i + 1:>3}: {demos[i][0]}")
        print("     %r" % demos[i][1])
        print()
    print("Which demo (%d-%d)? " % (1, len(demos)), end=" ")
    try:
        snum = int(sys.stdin.readline().strip()) - 1
        sent, grammar = demos[snum]
    except:
        print("Bad sentence number")
        return

    # Tokenize the sentence.
    tokens = sent.split()

    parser = ViterbiParser(grammar)
    all_parses = {}

    print(f"\nsent: {sent}\nparser: {parser}\ngrammar: {grammar}")
    parser.trace(3)
    t = time.time()
    parses = parser.parse_all(tokens)
    time = time.time() - t
    average = (reduce(lambda a, b: a + b.prob(), parses, 0) /
               len(parses) if parses else 0)
    num_parses = len(parses)
    for p in parses:
        all_parses[p.freeze()] = 1

    # Print some summary statistics
    print()
    print("Time (secs)   # Parses   Average P(parse)")
    print("-----------------------------------------")
    print("%11.4f%11d%19.14f" % (time, num_parses, average))
    parses = all_parses.keys()
    if parses:
        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
    else:
        p = 0
    print("------------------------------------------")
    print("%11s%11d%19.14f" % ("n/a", len(parses), p))

    # Ask the user if we should draw the parses.
    print()
    print("Draw parses (y/n)? ", end=" ")
    if sys.stdin.readline().strip().lower().startswith("y"):
        from nltk.draw.tree import draw_trees

        print("  please wait...")
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print()
    print("Print parses (y/n)? ", end=" ")
    if sys.stdin.readline().strip().lower().startswith("y"):
        for parse in parses:
            print(parse)