Exemple #1
def pcfg_parser():
    #  grammar = nltk.parse_pcfg("""
    #    S -> NP VP         [1.0]
    #    VP -> TV NP        [0.4]
    #    VP -> IV           [0.3]
    #    VP -> DatV NP NP   [0.3]
    #    TV -> 'saw'        [1.0]
    #    IV -> 'ate'        [1.0]
    #    DatV -> 'gave'     [1.0]
    #    NP -> 'telescopes' [0.8]
    #    NP -> 'Jack'       [0.2]
    #  """)
    # alternative repr, or clause probs must sum to 1
    grammar = nltk.parse_pcfg("""
    S -> NP VP         [1.0]
    VP -> TV NP [0.4] | IV [0.3] | DatV NP NP [0.3]
    TV -> 'saw'        [1.0]
    IV -> 'ate'        [1.0]
    DatV -> 'gave'     [1.0]
    NP -> 'telescopes' [0.8]
    NP -> 'Jack'       [0.2]
    print grammar
    viterbi_parser = nltk.ViterbiParser(grammar)
    print viterbi_parser.parse("Jack saw telescopes".split())
Exemple #2
def pcfg_parser():
    #  grammar = nltk.parse_pcfg("""
    #    S -> NP VP         [1.0]
    #    VP -> TV NP        [0.4]
    #    VP -> IV           [0.3]
    #    VP -> DatV NP NP   [0.3]
    #    TV -> 'saw'        [1.0]
    #    IV -> 'ate'        [1.0]
    #    DatV -> 'gave'     [1.0]
    #    NP -> 'telescopes' [0.8]
    #    NP -> 'Jack'       [0.2]
    #  """)
    # alternative repr, or clause probs must sum to 1
    grammar = nltk.parse_pcfg(
    S -> NP VP         [1.0]
    VP -> TV NP [0.4] | IV [0.3] | DatV NP NP [0.3]
    TV -> 'saw'        [1.0]
    IV -> 'ate'        [1.0]
    DatV -> 'gave'     [1.0]
    NP -> 'telescopes' [0.8]
    NP -> 'Jack'       [0.2]
    print grammar
    viterbi_parser = nltk.ViterbiParser(grammar)
    print viterbi_parser.parse("Jack saw telescopes".split())
Exemple #3
def Viterbi_fromfile(grammarfile):

    print 'Build a parser from ', grammarfile
    f = open(grammarfile)
    grammarstring = f.read()
    grammar = nltk.parse_pcfg(grammarstring)

    print 'Grammar size: ', len(grammar.productions())
    return nltk.ViterbiParser(grammar)
def Viterbi_fromfile(grammarfile):
    print 'Build a parser from ',grammarfile
    f = open(grammarfile)
    grammarstring = f.read()
    grammar = nltk.parse_pcfg(grammarstring)
    print 'Grammar size: ',len(grammar.productions())
    return nltk.ViterbiParser(grammar)
Exemple #5
def main(sentences, grammarfile, pcfg_grammar, algo, output, \
	 to_keeps, percent_discard, beam=0):

    grammar = nltk.data.load("file:%s" %(grammarfile))
    chart_parser = ChartParser(grammar,strategy=EARLEY_STRATEGY,trace=0)
    f = open(pcfg_grammar)
    pcfgrammar = f.read()

    if algo == "viterbi":
	pcfg_parser = nltk.ViterbiParser(nltk.parse_pcfg(pcfgrammar))
    elif algo == "inside":
	pcfg_parser = pchart.InsideChartParser(nltk.parse_pcfg(pcfgrammar),\
    elif algo == "random":
	pcfg_parser = pchart.RandomChartParser(nltk.parse_pcfg(pcfgrammar),\
    elif algo == "longest":
	pcfg_parser = pchart.LongestChartParser(nltk.parse_pcfg(pcfgrammar),\
    elif algo == "unsorted":
	pcfg_parser = pchart.UnsortedChartParser(nltk.parse_pcfg(pcfgrammar),\
    elif algo == "chart":
	print "unrecognized algorithm: %s" %(algo)
	return 1
    forest = []
    for sentence in sentences:
	parsed_sent = sentence.split()
	print "parsed_sent: %s" %(parsed_sent)
	start = datetime.now()

	if algo == "chart":
	    trees = chart_parser.nbest_parse(parsed_sent)
	    trees = pcfg_parser.nbest_parse(parsed_sent)
	end = datetime.now()
	elapsed = end - start
	print "parsing time elapsed: %s" %(elapsed)
	print "parsing time elapsed: %d us" %(elapsed.microseconds)

	if (len(trees) == 0):
	    print "failed to parse: %s" %(sentence)
	    return 1;

    all_productions = grammar.productions()
    # randomly shuffle the productions
    all_productions = all_productions[0:len(all_productions)]

    status = 0
    for keep in to_keeps:
	for discard in percent_discard:
	    status += create_pruned_grammar(forest, all_productions, keep,\
					    discard, output)
    return status
Exemple #6
def pcfg_chartparser(grammarfile):
    f = open(grammarfile)
    grammar = f.read()
    return nltk.ViterbiParser(nltk.parse_pcfg(grammar))
Exemple #7
    def give(t):
        return t.node == 'VP' and len(t) > 2 and t[1].node == 'NP' and (t[2].node == 'PP-DTV' or t[2].node == 'NP') and ('give' in t[0].leaves() or 'gave' in t[0].leaves())

    def sent(t):
        return ' '.join(token for token in t.leaves() if token[0] not in '*-0')

    def print_node(t, width):
        output = "%s %s: %s / %s: %s" % (sent(t[0]), t[1].node, sent(t[1]), t[2].node, sent(t[2]))
        if len(output) > width:
            output = output[:width] + "..."
        print output
    for tree in nltk.corpus.treebank.parsed_sents():
        for t in tree.subtrees(give):
            print_node(t, 72)

    grammar = nltk.parse_pcfg("""    
        S    -> NP VP              [1.0]
        VP   -> TV NP              [0.4]
        VP   -> IV                 [0.3]
        VP   -> DatV NP NP         [0.3]
        TV   -> 'saw'              [1.0]
        IV   -> 'ate'              [1.0]
        DatV -> 'gave'             [1.0]
        NP   -> 'telescopes'       [0.8]    
        NP   -> 'Jack'             [0.2]
    print grammar

Exemple #8
def pcfg_chartparser(grammarfile):
 f = open(grammarfile)
 grammar = f.read()
 return nltk.ViterbiParser(nltk.parse_pcfg(grammar))
Exemple #9

pcfg = []

#creo la pcfg, quindi inserendo le probabilita'
for p in probs:
	#parse_pcfg non accetta tutta una serie di caratteri e non terminali composti da caratteri non alfanumerici
	#quindi sono necessarie un po' di replace
	if not wf.startswith('\"'):
		pcfg.append(wf+" ["+str('{0:.10f}'.format(probs[str(p)]))+"]")
	#print p

#creazione grammatica
grammar = nltk.parse_pcfg(pcfg)
viterbi_parser = nltk.ViterbiParser(grammar)
#frase di prova
sent = 'I can finally drink a beer now'
print sent

for tree in viterbi_parser.nbest_parse(sent,3):
	print tree

##        print "%-8s\t %-16s\t %s" %("WORD", "FWD_PROB", "SURPRISAL")
##        print "-"*50
##        for i in range(len(pre_probs)):
##            if i > 0:
##                print "%-8s\t %-16s\t %s" %(pre_probs[i][0], str(pre_probs[i][1]),\
##                                            str(math.log(pre_probs[i-1][1]/pre_probs[i][1], 2)))
##            else:
##                print "%-8s\t %-16s\t %s" %(pre_probs[i][0], str(pre_probs[i][1]),\
##                                            str(math.log(1./pre_probs[i][1], 2)))
if __name__ == "__main__":
    ftext = open('allsents.pcfg.txt').read() + open('allsents.lexicon.txt').read()

    PROB_RE = re.compile(r'( \[ [+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)? \] ) \s*', re.VERBOSE)
    nltk.grammar._PROBABILITY_RE = PROB_RE
    gram = nltk.parse_pcfg(ftext)

    pparser = PrefixParser(gram)
    sentences = '''The actor who was impressed by the critic humiliated the director.
The actor who the critic was impressed by humiliated the director.
The actor who impressed the critic humiliated the director.
The actor who the critic impressed humiliated the director.
The director humiliated the actor who impressed the critic.
The director humiliated the actor who the critic impressed.
The activist began the rebellion by organizing the strike.
The actress was praised by the director filming the movie.
The babysitter grounded the child and called the parents.
The dictator was loved by the people and hated by the world.
The crowd admired the vocalist of the band.
The dog was attacked by the leopard from the zoo.