def compute_sentence_probability(grammar, tokens):
    invalid_prob = 1e-20

    earley_parser = nltk.EarleyChartParser(grammar, trace=0)
    viterbi_parser = nltk.ViterbiParser(grammar)
    try:
        e_chart = earley_parser.chart_parse(tokens)
    except ValueError:
        return 0
        # d, tokens = find_closest_tokens(language, tokens)
        # return invalid_prob ** d

    # If the sentence is complete, return the Viterbi likelihood
    v_parses = viterbi_parser.parse_all(tokens)
    if v_parses:
        prob = functools.reduce(lambda a, b: a + b.prob(), v_parses,
                                0) / len(v_parses)
        return prob

    # If the sentence is incomplete, return the sum of probabilities of all possible sentences
    prob = 0
    for edge in e_chart.edges():
        if edge.end() == len(tokens) and isinstance(edge.nextsym(), str):
            prob += get_edge_prob(edge, e_chart, grammar)
    return prob
Exemple #2
0
def pcfg_parser():
    #  grammar = nltk.parse_pcfg("""
    #    S -> NP VP         [1.0]
    #    VP -> TV NP        [0.4]
    #    VP -> IV           [0.3]
    #    VP -> DatV NP NP   [0.3]
    #    TV -> 'saw'        [1.0]
    #    IV -> 'ate'        [1.0]
    #    DatV -> 'gave'     [1.0]
    #    NP -> 'telescopes' [0.8]
    #    NP -> 'Jack'       [0.2]
    #  """)
    # alternative repr, or clause probs must sum to 1
    grammar = nltk.parse_pcfg("""
    S -> NP VP         [1.0]
    VP -> TV NP [0.4] | IV [0.3] | DatV NP NP [0.3]
    TV -> 'saw'        [1.0]
    IV -> 'ate'        [1.0]
    DatV -> 'gave'     [1.0]
    NP -> 'telescopes' [0.8]
    NP -> 'Jack'       [0.2]
  """)
    print grammar
    viterbi_parser = nltk.ViterbiParser(grammar)
    print viterbi_parser.parse("Jack saw telescopes".split())
Exemple #3
0
def Viterbi_fromfile(grammarfile):

    print 'Build a parser from ', grammarfile
    f = open(grammarfile)
    grammarstring = f.read()
    f.close()
    grammar = nltk.parse_pcfg(grammarstring)

    print 'Grammar size: ', len(grammar.productions())
    return nltk.ViterbiParser(grammar)
Exemple #4
0
def validate(text):
  grammar = nltk.PCFG.fromstring(grammar_str)
  parser = nltk.ViterbiParser(grammar)
  trees = parser.parse(list(text))
  valid = False
  location_name = None
  for tree in trees:
    value = tree_iterator(tree).leaves()
    location_name = ''.join(value)
    valid = True
  return (valid,location_name)
Exemple #5
0
    def viterbi_parser(self, include_edgelabels=True):
        if self._pcfg_parser is not None:
            return self._pcfg_parser

        def constructor():
            return self.pcfg(include_edgelabels)

        self._pcfg = _cached(
            self._pcfg, TigerCorpusReader.STORAGE_ROOT + u"/" +
            TigerCorpusReader.PCFG_FILE_SUFFIX, constructor)
        self._pcfg_parser = nltk.ViterbiParser(self._pcfg)
        return self._pcfg_parser
Exemple #6
0
    def _parser(self, tokens: List[str]):
        """Generates a Parse Tree from a list of tokens
        provided by the Lexer.

        Args:
            tokens: A tokenized list of commands and Entities.
            i.e. ['control_play', 'query_similar_entities', 'Justin Bieber']

        Returns: An nltk parse tree, as defined by the CFG given
                 in this function.

        """

        # TODO:   Improve the CFG work for the following:
        #          -  Play songs faster than despicito
        #          -  Play something similar to despicito but faster
        #          -  Play something similar to u2 and justin bieber

        def gen_lexing_patterns(vals: List[str]):
            # TODO: Here we remove entries containing ',
            #       as it is a special character used by
            #       the NLTK parser. We need to fix this
            #       eventually.
            safe_vals = [s for s in vals if "\'" not in s]
            return "' | '".join(safe_vals) or "NONE"

        # A Probabilistic Context Free Grammar (PCFG)
        # can be used to simulate "operator precedence",
        # which removes the problems of ambiguity in
        # the grammar.
        grammar = nltk.PCFG.fromstring("""
        Root -> Terminal_Command Result         [0.6]
        Root -> Terminal_Command                [0.4]
        Result -> Entity                        [0.5]
        Result -> Unary_Command Result          [0.1]
        Result -> Result Binary_Command Result  [0.4]
        Entity -> '{}'                          [1.0]
        Unary_Command -> '{}'                   [1.0]
        Terminal_Command -> '{}'                [1.0]
        Binary_Command -> '{}'                  [1.0]
        """.format(
            gen_lexing_patterns(self.kb_named_entities),
            gen_lexing_patterns(self.keywords.get("unary").keys()),
            gen_lexing_patterns(self.keywords.get("terminal").keys()),
            gen_lexing_patterns(self.keywords.get("binary").keys()),
        ))

        parser = nltk.ViterbiParser(grammar)
        # TODO: Returns the first tree, but need to deal with
        #       case where grammar is ambiguous, and more than
        #       one tree is returned.
        return next(parser.parse(tokens))
def sample_complete_sentence(grammar, tokens):
    # tokens should not exceed the longest possible sentence from the grammar
    # Set truncate=True when calling find_closest_tokens() to find a matched sentence
    complete_tokens = tokens[:]
    viterbi_parser = nltk.ViterbiParser(grammar)
    while not viterbi_parser.parse_all(complete_tokens):
        symbols, probs = predict_next_symbols(grammar, complete_tokens)
        try:
            complete_tokens.append(symbols[np.argmax(probs)])
        except ValueError:
            # Cannot predict the next token (symbols and probs are empty lists), but the sentence is incomplete
            print tokens
            print complete_tokens
    return complete_tokens
Exemple #8
0
def calc_logprior(grammar, tokens):
	invalid_prob=1e-20

	viterbi_parser=nltk.ViterbiParser(grammar)

	try:
		v_parses=viterbi_parser.parse_all(tokens)
		if v_parses:
			prob=reduce(lambda a, b: a+b.prob(), v_parses, 0)/len(v_parses)
		else:
			print 'fail parse'
			return math.log(invalid_prob)
	except ValueError:
		return math.log(invalid_prob)

	return math.log(prob)
 def get_processed_trees(self, grammar=None, trace=False):
     import os
     if not grammar:
         grammar_file = os.path.join(os.path.dirname(__file__),
                                     '../../../grammars/generic_question',
                                     self.question.category)
     else:
         # do based on priority
         # do map first
         grammar_file = os.path.join(os.path.dirname(__file__),
                                     '../../../grammars', grammar)
     with open(grammar_file, 'r') as file:
         grammar_str = file.read()
     grammar = nltk.PCFG.fromstring(grammar_str)
     parser = nltk.ViterbiParser(grammar)
     if trace: parser.trace()
     trees = parser.parse(self.question.question_extract)
     return trees
Exemple #10
0
def main():
    """
    main function of the second and third part of PCL2 exercise 6

    call of the script via the command line. Example call:
        
        $ python aufgabe02.py -g grammar.txt -s sentences.txt -o out.tex

    required arguments:
     -g / --grammar: txt-file containing either a CFG or PCFG
    optional arguments:
     -s / --sents:   txt-file containing sentences (one per line)
     -o / --out:     tex-file where the trees should get written to

    Unless you set an output file, the parsed sentences are only displayed
    on the command line. Otherwise, the trees are written qtree-conform to
    the declared tex-file.

    Assuming you have LaTeX installed, you can create a pdf file with your 
    trees by typing the following command in your command line:

        $ pdflatex outfile.tex
    
    If no sentences(-s) given, you can write your sentences directly on the command
    line. To finish the input-mode, press 'ctrl+D'.

    """
    # setting up the arguments with argparse
    argparser = argparse.ArgumentParser()
    argparser.add_argument('-o','--out', type=argparse.FileType('w'),\
        metavar='FILE', help='output file')
    argparser.add_argument('-g','--grammar', type=argparse.FileType('r'),\
        metavar='FILE', help='grammar file')
    argparser.add_argument('-s','--sents', type=argparse.FileType('r'), \
        default=sys.stdin, metavar='FILE', help='sentence file')
    args = argparser.parse_args()

    # try to form a string from the data of the grammar file
    try:
        grammar_string = "".join(args.grammar)
    # if no grammar is given, exit the script
    # (assuming that nobody wants to write the same grammar over and over again)
    except TypeError:
        print "try:\t$ python aufgabe02.py -g [grammar.txt] -s [sentence.txt] -o "\
        "[outfile.tex]\n\t(where '-s' and '-o' are optional arguments)"
        exit()

    # parsing grammar from string
    try:
        grammar = nltk.CFG.fromstring(grammar_string)
        parser = nltk.ChartParser(grammar)
    except ValueError:
        # Part of Ex03
        # if the grammar contains probabilites, take the PCFG-method
        # (and use the ViterbiParser)
        grammar = nltk.PCFG.fromstring(grammar_string)
        parser = nltk.ViterbiParser(grammar)

    if DEBUG: print "parser used:", type(parser)

    # collecting input form the given file (or: stdin by default)
    sent_file = args.sents
    # assigning the output file to a new variable
    out_file = args.out

    # list containing all possible trees for every sentence
    all_trees_from_all_sentences = []

    # parse all sentences inside the sentence file
    for sent in sent_file:
        if DEBUG: print "\nparsing sentence: ", sent, type(sent)
        sent = sent.split()
        try:
            # parsing the sentence with the given parser
            # and appending it to the bigger list (all_trees_from_all_sentences)
            all_trees_from_all_sentences.append(
                build_tree_variations(parser, sent))
            if DEBUG: print "parsing of sentence: ", sent, "DONE"
        except ValueError as e:
            print "\nERROR:", sent, "couldn't be parsed\nReason: %s" % e

    # printing the trees onto the command line with pretty print
    for tree_variations in all_trees_from_all_sentences:
        # extracting the data from the given tuple
        (sent, tree_variations) = tree_variations
        sent = " ".join(sent)
        print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
        print "sentence: '%s'" % sent
        v_enum = 1
        for tree in tree_variations:
            print "version %i" % v_enum
            # http://www.nltk.org/howto/tree.html
            tree.pretty_print(unicodelines=True, nodedist=4)
            v_enum += 1

    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    # compiling the .tex file
    if out_file:
        write_out_to_tex(all_trees_from_all_sentences, out_file, grammar)
    else:
        print "(no output file selected. write '-o outfile.tex' as an argument when " \
            "running the script if you want to generate a .tex file)"
  S -> NP VP [0.9]| VP  [0.1]
  VP -> TranV NP [0.3]
  VP -> InV  [0.3]
  VP -> DatV NP PP  [0.4]
  PP -> P NP   [1.0]
  TranV -> "saw" [0.2] | "ate" [0.2] | "walked" [0.2] | "shot" [0.2] | "book" [0.2]
  InV -> "ate" [0.5] | "walked" [0.5]
  DatV -> "gave" [0.2] | "ate" [0.2] | "saw" [0.2] | "walked" [0.2] | "shot" [0.2]
  NP -> Prop [0.2]| Det N [0.4] | Det N PP [0.4]
  Prop -> "John" [0.25]| "Mary" [0.25] | "Bob" [0.25] | "I" [0.25] 
  Det -> "a" [0.2] | "an" [0.2] | "the" [0.2] | "my" [0.2] | "that" [0.2]
  N -> "man" [0.15] | "dog" [0.15] | "cat" [0.15] | "park" [0.15] | "telescope" [0.1] | "flight" [0.1] | "elephant" [0.1] | "pajamas" [0.1]
  P -> "in" [0.2] | "on" [0.2] | "by" [0.2] | "with" [0.2] | "through" [0.2]
  """)

viterbi_parser = nltk.ViterbiParser(prob_grammar)
for tree in viterbi_parser.parse(['John', 'saw', 'a', 'telescope']):
    print(tree)
## Last week�s Exercise
# Define sentences for the exercise (the last sentence is newly added here)

sentex1 = "I want a flight through Houston".split()
sentex2 = "Jack walked with the dog".split()
sentex3 = "I want to book that flight".split()
sentex4 = "John gave the dog a bone".split()

# extend the flight grammar:
flight_grammar = nltk.CFG.fromstring("""
  S -> NP VP | VP
  VP -> V NP | V NP PP
  PP -> P NP
Exemple #12
0
def pcfg_chartparser(grammarfile):
    f = open(grammarfile)
    grammar = nltk.PCFG.fromstring(f.read())
    f.close()
    return nltk.ViterbiParser(grammar)
grammar1 = nltk.CFG.fromstring("""
 S -> NP VP
 VP -> V NP | V NP PP
 PP -> P NP
 V -> "saw" | "ate" | "walked"
 NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
 Det -> "a" | "an" | "the" | "my"
 N -> "man" | "dog" | "cat" | "telescope" | "park"
 P -> "in" | "on" | "by" | "with"
 """)

#Using different NLTK parsers with the same grammar
parser1 = nltk.ChartParser(groucho_grammar)
parser2 = nltk.RecursiveDescentParser(groucho_grammar)
parser3 = nltk.ShiftReduceParser(groucho_grammar)
parser4 = nltk.ViterbiParser(groucho_grammar)

sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']

'''print("Output according to Chart parser")
for tree in parser1.parse(sent):
  print(tree)

print("Output according to RD parser")
for tree in parser2.parse(sent):
  print(tree)

print("Output according to SR parser")
for tree in parser3.parse(sent):
  print(tree)
Exemple #14
0
treebank_productions[0:10]

# add productions for each word, POS tag
for word, tag in treebank.tagged_words():
    t = nltk.Tree.fromstring("(" + tag + " " + word + ")")
    for production in t.productions():
        treebank_productions.append(production)

# build the PCFG based grammar
treebank_grammar = nltk.grammar.induce_pcfg(
    Nonterminal('S'),
    treebank_productions
)

# build the parser
viterbi_parser = nltk.ViterbiParser(treebank_grammar)

# get sample sentence tokens
tokens = nltk.word_tokenize(sentence)

# get parse tree for sample sentence
result = list(viterbi_parser.parse(tokens))

# get tokens and their POS tags
from pattern.en import tag as pos_tagger
tagged_sent = pos_tagger(sentence)

print tagged_sent

# extend productions for sample sentence tokens
Exemple #15
0
import nltk

grammar3 = nltk.data.load('file:pcfg2.cfg', 'pcfg')
parser1 = nltk.ViterbiParser(grammar3)

sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']

for tree in parser1.parse(sent):
    print(tree)
Exemple #16
0
def Viterbi_fromgrammar(grammar):
    return nltk.ViterbiParser(grammar)
P_assignment_grammar = nltk.PCFG.fromstring("""
S -> NP VP [1]
VP -> V NP NP [0.25]| V TO VP ADVP [0.25]| V ADVP [0.50]
ADVP -> ADV [0.25]| V ADV [0.25]| ADV ADVP [0.25]| ADV ADJ [0.25]
NP -> Prop [0.43]| Prop N [0.1425]| NU N [0.1425]| Det ADJ N [0.1425]| N [0.1425]
ADV -> "now" [.25]| "ago" [.25]| "not" [.25]| "always" [.25]
ADJ -> "naive" [.5]| "nice" [.5]
V -> "go" [.166]| "had" [.166]| "came" [.166]| "visit" [.166]| "may" [.166]| "are" [.166]
Prop -> "We" [.2]| "She" [.2]| "me" [.2]| "You" [.2]| "Their" [.2]
Det -> "a" [1]
N -> "yesterday" [.25]| "days" [.25]| "kids" [.25]| "party" [.25]
TO -> "to" [1]
NU -> "two" [1]
""")

P_parser = nltk.ViterbiParser(P_assignment_grammar)

P_sent1 = 'We had a nice party yesterday'.split()
P_sent2 = 'She came to visit me two days ago'.split()
P_sent3 = 'You may go now'.split()
P_sent4 = 'Their kids are not always naive'.split()

P_sent5 = 'Their kids had a nice party yesterday'.split()
P_sent6 = 'You are not always nice'.split()
P_sent7 = 'Their kids may go now'.split()
P_sent8 = 'Their party had kids yesterday'.split()

for treeP1 in P_parser.parse(P_sent1):
    print(treeP1)

for treeP2 in P_parser.parse(P_sent2):
Exemple #18
0
def parse(grammar, raw_sents, goldenStandard):
    """
    Parses the raw text with the provided grammar and compares chunking result to the golden standard.
    Counts false positives and false negatives of chunked noun phrases
    """
    parser = nltk.ViterbiParser(grammar)
    falsePositives = 0
    falseNegative = 0
    amountPosTest = 0
    amountNegTest = 0
    posSucess = 0
    negSucess = 0
    for i in range(0, len(goldenStandard)):
        if len(raw_sents[i]) > 12:
            continue
        print("==== Parsing sentence " + str(i), flush=True)
        # This will raise an exception if the tokens in the test_sentence
        # are not covered by the grammar; should not happen.
        grammar.check_coverage(raw_sents[i])
        # Test prints for seeing each parsed sentenced
        '''
        print(raw_test_set[i])
        print("[" + str(i) + "] Reference parse:")
        print(test_set[i])
        print("[" + str(i) + "] Parse trees:")'''
        for tree in parser.parse(raw_sents[i]):
            #print(tree)
            for parsedTree in tree.subtrees():
                if 'NP' in parsedTree.label() and parsedTree.label(
                ) != 'NNP' and parsedTree.label() != 'NNPS':
                    amountPosTest += 1
                    checkSuccess = posSucess
                    for goldTree in goldenStandard[i].subtrees():
                        if 'NP' in goldTree.label() and goldTree.label(
                        ) != 'NNP' and goldTree.label() != 'NNPS':
                            if parsedTree.leaves() == goldTree.leaves():
                                posSucess += 1
                                break
                    if checkSuccess == posSucess:
                        falsePositives += 1
                        print(
                            "FALSE POSITIVE, Noun phrase not in golden standard:",
                            parsedTree.leaves())

            for goldTree in goldenStandard[i].subtrees():
                if 'NP' in goldTree.label(
                ) and goldTree.label() != 'NNP' and goldTree.label() != 'NNPS':
                    amountNegTest += 1
                    checkSuccess = negSucess
                    for parsedTree in tree.subtrees():
                        if 'NP' in parsedTree.label() and parsedTree.label(
                        ) != 'NNP' and parsedTree.label() != 'NNPS':
                            if parsedTree.leaves() == goldTree.leaves():
                                negSucess += 1
                                break
                    if checkSuccess == negSucess:
                        falseNegative += 1
                        print(
                            "FALSE NEGATIVE, Noun phrase not in parsed tree:",
                            goldTree.leaves())
    print("false positives: ", falsePositives, "out of", amountPosTest,
          "tests")
    print("false negatives: ", falseNegative, "out of", amountNegTest, "tests")
    print("correctly parsed noun phrases:", posSucess, "out of",
          posSucess + falseNegative, "in gold standard")
Exemple #19
0
def main(sentences, grammarfile, pcfg_grammar, algo, output, \
	 to_keeps, percent_discard, beam=0):

    grammar = nltk.data.load("file:%s" %(grammarfile))
    chart_parser = ChartParser(grammar,strategy=EARLEY_STRATEGY,trace=0)
    
    f = open(pcfg_grammar)
    pcfgrammar = f.read()
    f.close()

    if algo == "viterbi":
	pcfg_parser = nltk.ViterbiParser(nltk.parse_pcfg(pcfgrammar))
    elif algo == "inside":
	pcfg_parser = pchart.InsideChartParser(nltk.parse_pcfg(pcfgrammar),\
					       beam_size=beam)
    elif algo == "random":
	pcfg_parser = pchart.RandomChartParser(nltk.parse_pcfg(pcfgrammar),\
					       beam_size=beam)
    elif algo == "longest":
	pcfg_parser = pchart.LongestChartParser(nltk.parse_pcfg(pcfgrammar),\
						beam_size=beam)
    elif algo == "unsorted":
	pcfg_parser = pchart.UnsortedChartParser(nltk.parse_pcfg(pcfgrammar),\
						 beam_size=beam)	
    elif algo == "chart":
	pass
    else:
	print "unrecognized algorithm: %s" %(algo)
	return 1
	
    forest = []
    for sentence in sentences:
	parsed_sent = sentence.split()
	print "parsed_sent: %s" %(parsed_sent)
	start = datetime.now()

	if algo == "chart":
	    trees = chart_parser.nbest_parse(parsed_sent)
	else:
	    trees = pcfg_parser.nbest_parse(parsed_sent)
	    
	end = datetime.now()
	elapsed = end - start
	print "parsing time elapsed: %s" %(elapsed)
	print "parsing time elapsed: %d us" %(elapsed.microseconds)

	if (len(trees) == 0):
	    print "failed to parse: %s" %(sentence)
	    return 1;
	forest.append(trees)

    all_productions = grammar.productions()
    # randomly shuffle the productions
    all_productions = all_productions[0:len(all_productions)]
    random.shuffle(all_productions)
    random.shuffle(all_productions)

    status = 0
    for keep in to_keeps:
	for discard in percent_discard:
	    status += create_pruned_grammar(forest, all_productions, keep,\
					    discard, output)
    return status
Exemple #20
0
def pcfg_test(pcfg, trees, correct_trees, vocab):
    #    Write a function pcfg_test() that takes as its input a nltk.PCFG object,
    #    and a collection of nltk.tree.Tree objects. Within this function, use an
    #    nltk.ViterbiParser built from your PCFG to parse the sentences from the
    #    trees you have been given. Then, compare your highest-probability parses
    #    with the correct parses that your function has been given. Measure recall
    #    and precision for each sentence in your test set, and report overall
    #    recall, precision and F1 score for all sentence in your test set. Have
    #    this function print out your results.

    s = str(pcfg.productions())
    missing = []
    edited_trees = []
    for t in trees:
        t_string = str(t)
        for word in t.leaves():
            if word not in s:
                print(word)
                missing.append(word)
                t_string = re.sub(word, "<UNK>", t_string)
        new = nltk.Tree.fromstring(t_string)
        edited_trees.append(new)

    print(
        str(len(missing)) +
        " words from testset missing from grammar vocabulary during testing. \n"
    )

    # Instantiate parser
    vp = nltk.ViterbiParser(pcfg)

    ## Accuracy stats

    #    A hypothesized constituent C_n is correct if there is a constituent
    #    in the reference C_r with the same wordwise starting point, wordwise ending
    #    point, and nonterminal symbol. We need all three to be correct.
    #    Recall is the number of correct constituents in hypothesis divided by
    #    number of constituents in reference. So if we have 8 correct terminals in
    #    our output and the correct reference tree has 10 terminals, our recall is 8/10.
    #    Precision is the number of correct constituents in hypothesis divided by the total
    #    number of constituents in hypothesis. So if our output tree has 8/11 terminals
    #    correct, 8/11 is our precision.

    #   F1-Measure: 2PR / P+R is the stat commonly reported.

    ## Build a list of the ranges of constituents in reference. A 1-4, C 2-4, E 2-2
    # Replace bottom node (the one that is just A->"word") with a tree structure A->1
    # Test if each range of constituents is in the reference list

    for i in range(len(edited_trees)):
        t = edited_trees[i]
        correct_terminals = correct_trees[i].leaves()
        print("Analyzing sentence: " + str(correct_terminals) + "\n")
        sent = t.leaves()
        print("Finding most probable parse for tokens: " + str(sent) + "\n")
        parses = vp.parse(sent)
        for p in parses:
            print("Predicted most likely parse tree: \n")
            print(p)
            print(p.leaves())
            print("Recall :" + str(Recall(correct_terminals, p.leaves())))
            print("Precision :" +
                  str(Precision(correct_terminals, p.leaves())))
            print("F1 Score :" + str(F1Score(correct_terminals, p.leaves())))
            print("\n\n")
Exemple #21
0
def pcfg_chartparser(grammarfile):
    f = open(grammarfile)
    grammar = f.read()
    f.close()
    return nltk.ViterbiParser(nltk.parse_pcfg(grammar))
Exemple #22
0
#Save PCFG into a text file
manual_pcfg = []
for key in rules_prob.keys():
    manual_pcfg.append(str(key) + ' [' + str(rules_prob[key]) + ']')
manual_pcfg = "\n".join([r for r in manual_pcfg])

pcfg_file = open("pcfg_manual.txt", "w")
pcfg_file.write(manual_pcfg)
pcfg_file.close()

#In order to validate our results we'll use the NLTK package to
from nltk import induce_pcfg

S = Nonterminal('S')
grammar = induce_pcfg(S, gram_rules)

sent = "show me the meals on the flight from Phoenix".split()
inside_parser = nltk.InsideChartParser(grammar)

i = 1
for tree in inside_parser.parse(sent):
    print('Tree number ' + str(i) + ":")
    print(tree)
    i += 1

print(
    "Using the Viterbi parser from NLTK to determine which tree is most likely"
)
viterbi_parser = nltk.ViterbiParser(grammar)
for tree in viterbi_parser.parse(sent):
    print(tree)
    VP -> V NP [0.25]| V P VP [0.25]| Aux V [0.25]| V Adv Adv Adj [0.25] 
    NP -> N [0.143]| Det Adj N [0.143]| Prop [0.571]| Prop N [0.143]
    PP -> Adj N Adv [0.666]| Adv [0.333]
    V -> "had" [0.2]| "came" [0.2]| "go" [0.2]| "visit" [0.2]| "are" [0.2] 
    Prop -> "We" [0.2]| "She" [0.2]| "You" [0.2]| "Their" [0.2]| "me" [0.2] 
    Det -> "a" [1]
    N -> "party" [0.25]| "kids" [0.25]| "yesterday" [0.25]| "days" [0.25]
    P -> "to" [1]
    Adj -> "nice" [0.333]| "naive" [0.333]| "two" [0.333]
    Adv -> "always" [0.25]| "ago" [0.25]| "now" [0.25]| "not" [0.25]
    Aux -> "may" [1]
""")

# In[3]:

HW3_parser = nltk.ViterbiParser(HW3_grammar)

# In[4]:

# HW3_parser = nltk.RecursiveDescentParser(HW3_grammar)

# In[5]:

sen1 = "We had a nice party yesterday"
tree1 = HW3_parser.parse(sen1.split())
for tree in list(tree1):
    print(tree)

# In[6]:

sen2 = "She came to visit me two days ago"