Example #1
0
def drawing(tree):
    if not (tree is None):
        tree.draw()
Example #2
0
def drawing(tree):
    tree.draw()
print("Example sentences")
print(str_sentence1)
print(str_sentence2)
tokens1 = nltk.word_tokenize(str_sentence1)
tokens2 = nltk.word_tokenize(str_sentence2)

# Create the Chart and Viterbi parsers, with the input grammar
chart_parser = nltk.ChartParser(grammar)
viterbi_parser = nltk.ViterbiParser(grammar)

# Results for the Chart Parser
print("Parse trees obtained with the Chart parser")
print("Sentence 1")
for tree in chart_parser.parse(tokens1):
    print(tree)
    tree.draw()

print("Sentence 2")
for tree in chart_parser.parse(tokens2):
    print(tree)
    tree.draw()

# Results for the Viterbi Parser
print("Parse trees obtained with the Viterbi parser")
print("Sentence 1")
for tree in viterbi_parser.parse(tokens1):
    print(tree)
    tree.draw()

print("Sentence 2")
for tree in viterbi_parser.parse(tokens2):
Example #4
0
	def namedEntityRecognition(self,sentence):
		tokens = nltk.word_tokenize(sentence)
		
		pos_tags = nltk.pos_tag(tokens)
		#print nltk.ne_chunk(pos_tags, binary=True)
		grammar = "NP: {<DT|PP\$>?<JJ>*<NNP>+}"
		"""
		NP: {<DT|PP\$>?<JJ>*<NN>}   # chunk determiner/possessive, adjectives and proper nouns
		      {<NNP>+}                # chunk sequences of proper nouns
		"""
		
		cp = nltk.RegexpParser(grammar)
		tree = cp.parse(pos_tags)
		
		possibleTitles = []
		possibleTitle = ""
		possibleDelimTitle = ""
		delimCounter = 0
		delimBool = False
		tree.draw()
		for subtree in tree:
			if str(type (subtree)) == "<type 'tuple'>":
				if delimBool ==True:
					delimCounter+=1
					possibleDelimTitle +=subtree[0]+" "
				
				if delimCounter > numWrdDelimiter:
					delimBool = False
					delimCounter = 0
					possibleDelimTitle = ""
				possibleTitle = ""
			else:
			
				if subtree.node == "NP":

					firstLeaf = subtree.leaves()[0]
					firstString = firstLeaf[0]
					#print subtree
					if firstString.endswith("."):
						#check first leaf to see if a leaf of a
						#subtree begins with a string with a period
						#and the end.  This happens sometimes when a
						#PN begins a sentence, the previouse tuple gets
						#included as a PN
						subtree.pop(0)
					
					if len(subtree)>1:
						
						for leaf in subtree.leaves():
							possibleTitle +=leaf[0]+" "
						
						possibleTitles.append(possibleTitle.strip())
						
					
					if len(subtree)>=1:
						#try to concat a movie title if its seperated by non nouns
						
						
						for leaf in subtree.leaves():
							possibleDelimTitle +=leaf[0]+" "
						
						if delimCounter > 0:
							#the tail end of the split proper noun
							possibleTitles.append(possibleDelimTitle.strip())
							
						
						#reset the counter
						delimBool = True
						delimCounter = 0
				
			
		for title in possibleTitles:
			#if we have a title with all caps we can discard this with high confidence
			titleClean = re.sub(r'\W+', ' ', title)
			if titleClean.isupper():
				possibleTitles = []
			
			if "." in title:
				
				#sometimes the NLTK will say "Jump Street. I"
				#instead of "Jump Street", this alters this case
				possibleTitles[possibleTitles.index(title)]= title.split(".")[0]

			if " 's" in title:
				possibleTitles[possibleTitles.index(title)]= title.replace(" 's","'s")
		return possibleTitles
Example #5
0
def drawTree():
    tree = nltk.Tree.parse('(Tweet (Adj old) (NP (N men) (Conj and) (N women)))')

    cmutree = nltk.Tree.parse('(Tweet (Adj old) (NP (N men) (Conj and) (N women)))')
    tree.draw()
Example #6
0
def main(tigerFile):
    tree = ptbToTree(tigerFile)
    tree.draw()