Exemple #1
0
def findSyllableWord(word,syllableSize): 
	synsets = wn.synsets(word)
	for syns in synsets:
		name = syns.name
		lemmas =  syns.lemma_names
		for wordstring in lemmas:
			if(syllables_en.count(wordstring) == syllableSize and wordstring != word):
				return {'word':word,'syllable':syllableSize}
	return	{'word':word,'syllable':syllables_en.count(word)}
Exemple #2
0
def is_haiku(poem):
    import re
    text_orig = text

    # TODO: This block automatically returns false if the Haiku contains numbers
    # this is bullshit.
    if filter(str.isdigit, str(text)):
        return False

    haiku_format = [5, 7, 5]

    # TODO: 
    # Check for n*3 lines???
    if len(text) != 3:
        return False

    syl_count = []
    
    for line in poem:
        # TODO:
        # This removes all puncuation, but since we're doing this on code, we don't 
        # necessarily want that.
        words = nltk.wordpunct_tokenize(re.sub('[^a-zA-Z_ ]', '', line))
        #word_count = 0
        # This seems easier
        count = 0
        for word in words:
            word = word.lower()
            #count = len([syl for syl in d[word.lower()]])
            count += syllables_en.count(word)
            print '%s: %d'%(word, count)
        
        syl_count.append(count)

    if syl_count == haiku_format:
        return True

    return False
Exemple #3
0
def find_synset_word(word):
    

    wordstring=word

    # get rid of punctuation
    #wordstring.translate(None, string.punctuation)
    word_punct = strip_punctuation_bool(word)
    word = word_punct['word']
    punct = word_punct['punct']

    syllableSize=syllables_en.count(word)

    synsets = wn.synsets(word)
    shuffle(synsets)
    #print word,"synset:",synsets


    replacement_candidates = []

    for syns in synsets:

        lemmas =  syns.lemma_names
        ## # ##print "LEMMAS:",lemmas
        ## # ##print "hypernyms:",syns.hypernyms()
        ## # ##print "hyponyms:",syns.hyponyms()
        ## # ##print "holonyms:",syns.member_holonyms()
        ## # print syns,"antonyms:",syns.lemmas[0].antonyms()
        
        for w in lemmas:
            replacement_candidates.append(w)

        for w in syns.hyponyms():
            replacement_candidates.append(w.name.split(".")[0])

        # for w in syns.hypernyms():
        #     replacement_candidates.append(w.name.split(".")[0])

        # for w in syns.member_holonyms():
        #     replacement_candidates.append(w.name.split(".")[0])

        # for w in syns.member_meronyms():
        #     replacement_candidates.append(w.name.split(".")[0])

        # for w in syns.member_synonyms():
        #     replacement_candidates.append(w.name.split(".")[0])

        for w in syns.lemmas[0].antonyms():
            replacement_candidates.append(w.name.split(".")[0])

        ## # ##print "replacement_candidates:",replacement_candidates
        shuffle(replacement_candidates)

        for wordstring in replacement_candidates:
            #find an approximate matchb
            #print "wordstring in name:",wordstring
            if (approx_equal(wordstring,word) and wordstring.lower() != word.lower() and len(wordstring)>len(word)):
                #print "SYNSET approx_equal:",word,wordstring
                return wordstring+punct
            #len same, word not
            elif(len(wordstring) == len(word) and wordstring.lower() != word.lower()):
                #print "SYNSET len same, word not:",word,wordstring
                return wordstring+punct
            elif word.lower() not in wordstring.lower() and wordstring.lower() not in word.lower():
                #print word, "SYNSET not in:",wordstring+punct
                return wordstring+punct

            # elif(syllables_en.count(wordstring) == syllableSize and wordstring.lower() != word.lower() and len(word)):
            #     ##print "SYNSET syllable same, word not:",word,wordstring
            #     return wordstring+punct


        # nothing found yet, look inside ...
        #s = wordnet.synsets(word)             
        replacement_candidates = []
        for w in syns.attributes():
            ##print "attributes :",w.name.split(".")[0]
            replacement_candidates.append(w.name.split(".")[0])
        for w in syns.similar_tos():
            ##print "similar_tos:",w.name.split(".")[0]
            replacement_candidates.append(w.name.split(".")[0])
        for w in syns.substance_meronyms():
            ##print "substance_meronyms :",w.name.split(".")[0]
            replacement_candidates.append(w.name.split(".")[0])
        for w in syns.entailments():
            ##print "entailments :",w.name.split(".")[0]
            replacement_candidates.append(w.name.split(".")[0])
        # print word,"nothing found yet, look inside ...",replacement_candidates

        replacement_candidates.sort(key = len)
        for wordstring in replacement_candidates:
            #print "trying :",wordstring
            if wordstring not in stopwords.words('english') and wordstring not in personal_pronouns and wordstring not in word:
                ##print "SYNSET final choice:",word,wordstring
                return wordstring+punct


    ##print "SYNSET escape case, return original:",word
    return    wordstring
Exemple #4
0
	
textchunk += '''
	They want to deliver vast amounts of information over the Internet.
	And again, the Internet is not something that you just dump something on. 
	It's not a big truck. It's a series of tubes. And if you don't understand, 
	those tubes can be filled and if they are filled, when you put your message in, 
	it gets in line and it's going to be delayed by anyone that puts into that tube 
	enormous amounts of material, enormous amounts of material
	'''
	
poem = ''
wordmap = [] # a list that will contain a tuple (word,syllable_count)
words = nltk.word_tokenize(textchunk)
for iter,word in enumerate(words):	
	word += " "
	syls = syllables_en.count(word)
	wordmap.append((word,syls))
	
	
	
def findSyllableWord(word,syllableSize): 
	synsets = wn.synsets(word)
	for syns in synsets:
		name = syns.name
		lemmas =  syns.lemma_names
		for wordstring in lemmas:
			if(syllables_en.count(wordstring) == syllableSize and wordstring != word):
				return {'word':word,'syllable':syllableSize}
	return	{'word':word,'syllable':syllables_en.count(word)}

def find_synset_word(word):

    wordstring = word

    # get rid of punctuation
    #wordstring.translate(None, string.punctuation)
    word_punct = strip_punctuation(word)
    word = word_punct['word']
    punct = word_punct['punct']

    syllableSize = syllables_en.count(word)

    synsets = wn.synsets(word)
    shuffle(synsets)
    ## # print word,"synset:",synsets

    replacement_candidates = []

    for syns in synsets:

        lemmas = syns.lemma_names
        ## # print "LEMMAS:",lemmas
        ## # print "hypernyms:",syns.hypernyms()
        ## # print "hyponyms:",syns.hyponyms()
        ## # print "holonyms:",syns.member_holonyms()
        ## # print syns,"antonyms:",syns.lemmas[0].antonyms()

        for w in lemmas:
            replacement_candidates.append(w)

        for w in syns.hyponyms():
            replacement_candidates.append(w.name.split(".")[0])

        for w in syns.hypernyms():
            replacement_candidates.append(w.name.split(".")[0])

        for w in syns.member_holonyms():
            replacement_candidates.append(w.name.split(".")[0])

        ## # print "replacement_candidates:",replacement_candidates

        for wordstring in replacement_candidates:
            #find an approximate matchb
            ## # print "wordstring in name:",wordstring
            if (approx_equal(wordstring, word)
                    and wordstring.lower() != word.lower()
                    and len(wordstring) > len(word)):
                ## # print "SYNSET approx_equal:",word,wordstring
                return wordstring + punct
            #len same, word not
            if (len(wordstring) == len(word)
                    and wordstring.lower() != word.lower()):
                ## # print "SYNSET len same, word not:",word,wordstring
                return wordstring + punct

            if (syllables_en.count(wordstring) == syllableSize
                    and wordstring.lower() != word.lower()):
                ## # print "SYNSET syllable same, word not:",word,wordstring
                return wordstring + punct

    ## # print "SYNSET escape case, return original:",word
    return wordstring
Exemple #6
0
def synset_creeley(word):
    

    wordstring=word

    # get rid of punctuation
    #wordstring.translate(None, string.punctuation)
    word_punct = strip_punctuation_bool(word)
    word = word_punct['word']
    punct = word_punct['punct']

    syllableSize=syllables_en.count(word)

    synsets = wn.synsets(word)
    shuffle(synsets)
    #print word,"synset:",synsets


    replacement_candidates = []

    for syns in synsets:

        lemmas =  syns.lemma_names
        # print "word:",word
        # print "LEMMAS:",lemmas
        # print "hypernyms:",syns.hypernyms()
        # print "hyponyms:",syns.hyponyms()
        # print "holonyms:",syns.member_holonyms()
        # print syns,"antonyms:",syns.lemmas[0].antonyms()
        
        for w in lemmas:
            replacement_candidates.append(w)

        for w in syns.hyponyms():
            replacement_candidates.append(w.name.split(".")[0])

        for w in syns.hypernyms():
            replacement_candidates.append(w.name.split(".")[0])

        for w in syns.member_holonyms():
            replacement_candidates.append(w.name.split(".")[0])

        for w in syns.member_meronyms():
            replacement_candidates.append(w.name.split(".")[0])

        # for w in syns.member_synonyms():
        #     replacement_candidates.append(w.name.split(".")[0])

        for w in syns.lemmas[0].antonyms():
            replacement_candidates.append(w.name.split(".")[0])

        for w in syns.attributes():
            replacement_candidates.append(w.name.split(".")[0])

        for w in syns.similar_tos():
            replacement_candidates.append(w.name.split(".")[0])

        for w in syns.substance_meronyms():
            replacement_candidates.append(w.name.split(".")[0])

        for w in syns.entailments():
            replacement_candidates.append(w.name.split(".")[0])

        replacement_candidates.sort(key=len)

        #print word,replacement_candidates
        if replacement_candidates[0] is not None:
            return replacement_candidates[0]
        else:
            return word
Exemple #7
0
def find_synset_word(word):
    

    wordstring=word

    # get rid of punctuation
    #wordstring.translate(None, string.punctuation)
    word_punct = strip_punctuation_bool(word)
    word = word_punct['word']
    punct = word_punct['punct']

    syllableSize=syllables_en.count(word)

    synsets = wn.synsets(word)
    shuffle(synsets)
    #print word,"synset:",synsets


    replacement_candidates = []

    for syns in synsets:

        lemmas =  syns.lemma_names
        ## # ##print "LEMMAS:",lemmas
        ## # ##print "hypernyms:",syns.hypernyms()
        ## # ##print "hyponyms:",syns.hyponyms()
        ## # ##print "holonyms:",syns.member_holonyms()
        ## # print syns,"antonyms:",syns.lemmas[0].antonyms()
        
        for w in lemmas:
            replacement_candidates.append(w)

        for w in syns.hyponyms():
            replacement_candidates.append(w.name.split(".")[0])

        # for w in syns.hypernyms():
        #     replacement_candidates.append(w.name.split(".")[0])

        # for w in syns.member_holonyms():
        #     replacement_candidates.append(w.name.split(".")[0])

        # for w in syns.member_meronyms():
        #     replacement_candidates.append(w.name.split(".")[0])

        # for w in syns.member_synonyms():
        #     replacement_candidates.append(w.name.split(".")[0])

        for w in syns.lemmas[0].antonyms():
            replacement_candidates.append(w.name.split(".")[0])

        ## # ##print "replacement_candidates:",replacement_candidates
        shuffle(replacement_candidates)

        for wordstring in replacement_candidates:
            #find an approximate matchb
            #print "wordstring in name:",wordstring
            if (approx_equal(wordstring,word) and wordstring.lower() != word.lower() and len(wordstring)>len(word)):
                #print "SYNSET approx_equal:",word,wordstring
                return wordstring+punct
            #len same, word not
            elif(len(wordstring) == len(word) and wordstring.lower() != word.lower()):
                #print "SYNSET len same, word not:",word,wordstring
                return wordstring+punct
            elif word.lower() not in wordstring.lower() and wordstring.lower() not in word.lower():
                #print word, "SYNSET not in:",wordstring+punct
                return wordstring+punct

            # elif(syllables_en.count(wordstring) == syllableSize and wordstring.lower() != word.lower() and len(word)):
            #     ##print "SYNSET syllable same, word not:",word,wordstring
            #     return wordstring+punct


        # nothing found yet, look inside ...
        #s = wordnet.synsets(word)             
        replacement_candidates = []
        for w in syns.attributes():
            ##print "attributes :",w.name.split(".")[0]
            replacement_candidates.append(w.name.split(".")[0])
        for w in syns.similar_tos():
            ##print "similar_tos:",w.name.split(".")[0]
            replacement_candidates.append(w.name.split(".")[0])
        for w in syns.substance_meronyms():
            ##print "substance_meronyms :",w.name.split(".")[0]
            replacement_candidates.append(w.name.split(".")[0])
        for w in syns.entailments():
            ##print "entailments :",w.name.split(".")[0]
            replacement_candidates.append(w.name.split(".")[0])
        # print word,"nothing found yet, look inside ...",replacement_candidates

        replacement_candidates.sort(key = len)
        for wordstring in replacement_candidates:
            #print "trying :",wordstring
            if wordstring not in stopwords.words('english') and wordstring not in personal_pronouns and wordstring not in word:
                ##print "SYNSET final choice:",word,wordstring
                return wordstring+punct


    ##print "SYNSET escape case, return original:",word
    return    wordstring
    def get_total_syl(self):
		for word in self.words_array:
			word_syl = syllables_en.count(word)
			self.syl_count += word_syl
Exemple #9
0
	def make_syll(self,python=True):
		"""
		This function takes a text, just a long string, and returns 
		a list of words with the number of syllables associated with it 
		attached. It also has a timing component, because the lookup process is often
		a bit lengthy
		This returns a list with words in their original order.
		18/10/2014 Incorporating the above "find_syll" function into this one to make
		the dictionary lookup process quicker (it takes forever right now!)
		21/1/2015 - Got a UnicodeDecodeError on line 106 - "words = word_tokenize(self.text)"
		I need to deal with this.
		"""
		time1 = 0
		words = word_tokenize(self.text)
		wording = []
		if python:
			for word in words:
				if word.isalpha():
					word = word.strip('\n').strip('\n')
					word += " "	
					wording.append([word,syllables_en.count(word)])
		if not python:
			t1 = time.time()
			for word in words:
				if word.isalpha():
					word = word.strip('\n').strip('\n')
					try:
						word = word.strip()
						num_syll = syll_dic[singularize(word.lower())]
						word += " "
						wording.append([word,num_syll])
					except KeyError:
						# print("Dictionary look up failed for word {}, reverting to Python function".format(word))
						word += " "
						wording.append([word,syllables_en.count(word)])
				# print("All done with word: {}".format(word))
			t2 = time.time() - t1

		if len(wording) == 1:
			return wording[0]
		else:
			return wording

			# with InOut(self.textdir): #changes directory, and closes it outside of with statement
			# 	with open(self.dic,'r') as dic, open(self.syll,'r') as syll: #where self.dic and self.syll are the files of the dictionary and hyphenated dictionaries respectively
			# 		for word in words:
			# 			t1 = time.time()
			# 			if word.isalpha():
			# 				word = word.strip('\n')
			# 				word = word.strip('\n')						
			# 				for index, (linedic, linesyll) in enumerate(zip(dic, syll)):
			# 					# print(r"{}".format(linedic.lower().strip('\n')))
			# 					if linedic.lower().strip('\r\n') == word.lower():
			# 						num_syll = 1 #because the number of syllables will be one more than the number of plus signs
			# 						for char in linesyll:
			# 							if char == "+" or char == " ":
			# 								num_syll += 1
			# 						word = word + " "
			# 						wording.append([word,num_syll])
			# 						time1 += time.time()-t1
			# 						print(time1)
			# 					else:							
			# 						pass
			# 					wording.append([word,syllables_en.count(word)])
			# 			dic.seek(0) #reset
			# 			syll.seek(0)
		if len(wording) == 1:
			return wording[0] #in case you want to find syllable length of a single word	
		else:
			return wording