def getEntities(parser, tweet, xEntities): try: spacyParsedObject = parser(tweet) sentence = TextBlob(tweet) textblobTaggedObject = sentence.parse().split() patterntaggedObject = tag(tweet, tokenize=True) for word in patterntaggedObject: word, wordtag=word if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP": v = str(word) v = v.strip() if(v not in xEntities): xEntities[v]=str(wordtag) for taggedObject in textblobTaggedObject: for word in taggedObject: word, wordtag=word[0], word[1] if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP": v = str(word) v = v.strip() if(v not in xEntities): xEntities[v]=str(wordtag) for word in spacyParsedObject: if word.tag_ == "NNP" or word.tag_ == "NN" or word.tag_ == "PRP": v = str(word) v = v.strip() if(v not in xEntities): xEntities[v]=str(word.tag_) return xEntities except Exception as e: return e
def test_get_np_for_default(self): text_list = self.text_list for text in text_list: b = TextBlob(text) print(b.noun_phrases) print(b.parse())
def tag_documents_text(client): documents = client['cornell']['documents'] for doc in documents.find(): blob = TextBlob(doc['text'], pos_tagger=PerceptronTagger()) parsed_blob = blob.parse() documents.update({'name':doc['name']},{'$set':{'parsed_perceptron':parsed_blob}})
def getEntities(parser, tweet, xEntities): try: spacyParsedObject = parser(tweet) sentence = TextBlob(tweet) textblobTaggedObject = sentence.parse().split() patterntaggedObject = tag(tweet, tokenize=True) for word in patterntaggedObject: word, wordtag = word if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP": v = str(word) v = v.strip() if (v not in xEntities): xEntities[v] = str(wordtag) for taggedObject in textblobTaggedObject: for word in taggedObject: word, wordtag = word[0], word[1] if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP": v = str(word) v = v.strip() if (v not in xEntities): xEntities[v] = str(wordtag) for word in spacyParsedObject: if word.tag_ == "NNP" or word.tag_ == "NN" or word.tag_ == "PRP": v = str(word) v = v.strip() if (v not in xEntities): xEntities[v] = str(word.tag_) return xEntities except Exception as e: return e
def extract_trigrams(client): documents = client['cornell']['documents'] for doc in documents.find(): blob = TextBlob(doc['text']) valid_trigrams = [] for s in blob.sentences: sentence = TextBlob(s.dict['raw']) sentence = TextBlob(sentence.parse()) trigrams = sentence.ngrams(n=3) valid_trigrams = valid_trigrams + get_valid_trigrams(trigrams) documents.update({'name':doc['name']},{'$set':{'trigrams':valid_trigrams}})
def get_structure(): train = [] for sent in sentences: blob = TextBlob(sent) #名词短语个数 a1 = len(blob.noun_phrases) #词性个数 af = blob.tags af = [j for (i, j) in af] #parse a2 = blob.parse().count('O') temp = [a1, a2] temp.extend(match_tag(af)) train.append(temp) return train
def nlp_run(label, command): text = TextBlob(label) sentvalue = text.sentiment.polarity subvalue = text.sentiment.subjectivity tags = text.tags parse = text.parse() nouns = text.noun_phrases correct = text.correct() if command == "pol": if sentvalue < 0: tst = "The polarity is " + str(sentvalue) + " ," "which means the text is negative" return tst elif sentvalue > 0: tst = "The polarity is " + str(sentvalue) + " ," "which means the text is positive" return tst elif sentvalue == 0: tst = "Polarity cannot be detected :(" return tst elif command == "spel": return correct elif command == "tag": tst = str(tags) return tst elif command == "parse": tst = str(parse) return tst elif command == "noun": tst = str(nouns) return tst elif command == "sub": if subvalue > 0.5: tst = "The subjectivity is " + str(sentvalue) + " ," "which means the text is subjective" return tst elif subvalue < 0.5: tst = "The subjectivity is " + str(sentvalue) + " ," "which means the text is objective" return tst elif subvalue == 0.5: tst = "subjectivity cannot be detected :(" return tst
def nlp_parse(): try: req_json = request.get_json() if req_json is None: return jsonify(error='this service require A JSON request') else: if not ('text' in req_json): raise Exception('Missing mandatory paramater "text"') text = req_json['text'] blob = TextBlob(text) nounPhrases = blob.parse().split() return json.JSONEncoder().encode(nounPhrases) except Exception as ex: app.log.error(type(ex)) app.log.error(ex.args) app.log.error(ex) return jsonify(error=str(ex))
# maybe need more than two headlines # print sys.argv[1] # print sys.argv[2] # headlines 1 and 2 - analyze, mix and send back to node # blob = TextBlob(sys.argv[1]) # # print blob.tags # blob2 = TextBlob(sys.argv[2]) # print blob2.tags for i, val in enumerate(news): headline = news[i]['title'] headlines.append(headline) headblob = TextBlob(headline, np_extractor=extractor) headblobs.append(headblob.noun_phrases) parsed = headblob.parse() headParsed.append(parsed) # for item in headParsed: # print item # get the first noun phrase from each headline and swap them # grab a random noun phrase from each headline h1i = int(random.random()*20) h1 = headlines[h1i] r1 = int(random.random()*len(headblobs[h1i])) np1 = headblobs[h1i][r1] # capitalize the noun phrase # np1 = ' '.join(word[0].upper() + word[1:] for word in np1.split())
def analyze(self, text): text = TextBlob(text) return text.parse()
# **************** "iNNovationMerge DailyCodeHub" **************** # Visit https://www.innovationmerge.com/ # Theme : Natural Language Processing using TextBlob in Python # NLP - Parsing the Text from textblob import TextBlob text = 'INNovationMerge is an online learning platform. \ developed for the users who wants to learn and practice \ technologies with the respective environments.' blob = TextBlob(text) print(blob.parse()) # Output: # INNovationMerge/NN/B-NP/O is/VBZ/B-VP/O an/DT/O/O online/JJ/B-ADJP/O learning/VBG/B-VP/O platform/NN/B-NP/O ././O/O developed/VBN/B-VP/O for/IN/B-PP/B-PNP the/DT/B-NP/I-PNP users/NNS/I-NP/I-PNP who/WP/O/O wants/VBZ/B-VP/O to/TO/B-PP/O learn/VB/B-VP/O and/CC/O/O practice/NN/B-NP/O technologies/NNS/I-NP/O with/IN/B-PP/B-PNP the/DT/B-NP/I-PNP respective/JJ/I-NP/I-PNP environments/NNS/I-NP/I-PNP ././O/O
except: first_attempt = "" if first_attempt != "": return first_attempt try: second_attempt = parse_second(q, bigblob, uncommon, mode) except: second_attempt = "" if second_attempt != "": return second_attempt third_attempt = b.backup_answer(q, n.nps, raw) if third_attempt != "": return third_attempt if len(n.nps) > 0: return n.nps[0] else: return "Yes" #guess if __name__ == "__main__": q = raw_input("Ask a question\n") q = TextBlob(q, np_extractor=extractor) print q.noun_phrases noun_phrases, idxs = n.get_nps_from_blob(q) print noun_phrases print q.words first = noun_phrases[0] print n.get_np_tags(first, q) print q.tags print q.parse() #print p.extract_generic_relations(q)
sent = TextBlob("I haawve goood speling") correct_sent = sent.correct() w = Word("haave") spellcheck = w.spellcheck() #Get Word and Noun Phrase Frequencies words = TextBlob('We are no longer together. We are enemies now.') word_counts = words.word_counts #You can specify whether or not the search should be case-sensitive (default is False). #Translation and Language Detection en_blob = TextBlob("You are my best friend") pl_blob = en_blob.translate(to='pl') blob = TextBlob("Mam na imię Piotr") detected_lang = blob.detect_language() #Parsing text = TextBlob('I know You') text_parse = text.parse() #string text = TextBlob("Hello World") upper_text = text.upper() find_world = text.find("World") #ngrams blob = TextBlob("Now is better than never.") ngram = blob.ngrams(n=3)
def check_sarc(tweet): blob = TextBlob(tweet, parser=PatternParser()) tokens = blob.parse().split(' ') dic = defaultdict(list) # stores all phrases by category temp = '' phrases = [] # list of all phrases for t in tokens: if t.split('/')[2] == 'O': if temp: phrases.append((ctag, temp)) dic[t.split('/')[2]].append(temp) temp = t.split('/')[0] + ' ' ctag = t.split('/')[2] elif 'B-' in t.split('/')[2]: if temp: phrases.append((ctag, temp)) temp = t.split('/')[0] + ' ' dic[t.split('/')[2].split('-')[1]].append(temp) ctag = t.split('/')[2].split('-')[1] elif 'I-' in t.split('/')[2]: dic[t.split('/')[2].split('-')[1]][-1] += t.split('/')[0] + ' ' temp += t.split('/')[0] + ' ' ctag = t.split('/')[2].split('-')[1] else: pass if temp: phrases.append((ctag, temp)) SF = [] sf = [] for i in phrases: if i[0] in ['NP', 'ADjP']: SF.append(i[1]) elif i[0] == 'VP': sf.append(i[1]) for i in range(len(phrases) - 1): if phrases[i][0] == 'NP' and phrases[i + 1][0] == 'VP': SF.append(phrases[i][1] + ' ' + phrases[i + 1][1]) elif phrases[i][0] == 'ADVP' and phrases[i + 1][0] == 'VP': sf.append(phrases[i][1] + ' ' + phrases[i + 1][1]) elif phrases[i][0] == 'VP' and phrases[i + 1][0] == 'ADVP': sf.append(phrases[i][1] + ' ' + phrases[i + 1][1]) elif phrases[i][0] == 'ADJP' and phrases[i + 1][0] == 'VP': sf.append(phrases[i][1] + ' ' + phrases[i + 1][1]) elif phrases[i][0] == 'VP' and phrases[i + 1][0] == 'NP': sf.append(phrases[i][1] + ' ' + phrases[i + 1][1]) for i in range(len(phrases) - 2): if phrases[i][0] == 'VP' and phrases[i + 1][0] == 'ADVP' and phrases[ i + 2][0] == 'ADJP': sf.append(phrases[i][1] + ' ' + phrases[i + 1][1] + ' ' + phrases[i + 1][1]) elif phrases[i][0] == 'VP' and phrases[i + 1][0] == 'ADJP' and phrases[ i + 2][0] == 'NP': sf.append(phrases[i][1] + ' ' + phrases[i + 1][1] + ' ' + phrases[i + 2][1]) elif phrases[i][0] == 'ADVP' and phrases[ i + 1][0] == 'ADJP' and phrases[i + 2][0] == 'NP': sf.append(phrases[i][1] + ' ' + phrases[i + 1][1] + ' ' + phrases[i + 2][1]) print SF print sf PSF = [] NSF = [] psf = [] nsf = [] for i in SF: blob = TextBlob(i) if blob.polarity > 0: PSF.append(i) elif blob.polarity < 0: NSF.append(i) elif blob.polarity == 0: pass for i in sf: blob = TextBlob(i) if blob.polarity > 0: psf.append(i) elif blob.polarity < 0: psf.append(i) elif blob.polarity == 0: pass print PSF print NSF print psf print nsf if (PSF and nsf) or (psf and NSF): return 1 else: return 0
) print(monty.word_counts['ekki']) # through the word_counts dictionary print(monty.words.count('ekki')) # using the count() method print(monty.words.count('ekki', case_sensitive=True)) # specify case sensitivity print(wiki.noun_phrases.count('python')) # translation and language detection # en_blob = TextBlob(u'Simple is better than complex.') # print(en_blob.translate(to='es')) # chinese_blob = TextBlob(u"美丽优于丑陋") # print(chinese_blob.translate(from_lang="zh-CN", to='en')) # b = TextBlob(u"بسيط هو أفضل من مجمع") # print(b.detect_language()) # parsing b = TextBlob("And now for something completely different.") print(b.parse()) # textblobs are like python strings! print(zen[0:19]) print(zen.upper()) print(zen.find("Simple")) apple_blob = TextBlob('apples') banana_blob = TextBlob('bananas') print(apple_blob < banana_blob) print(apple_blob == 'apples') apple_blob + ' and ' + banana_blob TextBlob("apples and bananas") print("{0} and {1}".format(apple_blob, banana_blob)) # n-grams blob = TextBlob("Now is better than never.") print(blob.ngrams(n=3)) # getting start and end indices of sentences
from textblob.wordnet import VERB raw_query = "Physics is a better subject to study than Mathematics. I like Physics more than I like Mathematics. Physicists are more intelligent than Mathematicians." # Get input ready for use query = TextBlob(raw_query) print 'Query: ', query tags = query.tags print 'Tags: ', tags nouns = query.noun_phrases print 'Nouns: ', nouns sentiment = query.sentiment print 'Sentiment: ', sentiment words = query.words print 'Words: ', words sentences = query.sentences print 'Sentences: ', sentences parse = query.parse() print 'Parse: ', parse language = query.detect_language() print 'Language: ', language # TODO : add spelling checks to correct the input sentences for better searches corrected = query.correct() print 'Corrected: ', corrected # Search for results w = Word('Octopus') print '\nSynsets: ', w.synsets print '\nDefinitions: ', w.definitions print Word("hack").get_synsets(pos=VERB)
def get_value_instruction(sent): # Text blob part of speech identification algorithm blob = TextBlob(sent) blob.parse() # isolating tags of words in instruction tags = blob.tags decoded = "" # if an adjective is present then truth is set to True to activate the # correct pipeline truth = False for x in range(len(tags)): if "JJ" in tags[x]: truth = True break # when an adjective exists this pipeline is run if truth: try: for x in range(len(tags)): if "JJ" in tags[x]: q = x + 1 decoded += sent.split()[x] + "_" # while the word after the adjective is any of these parts # of speech they're added to the instruction final while ("VBN" in tags[q] or "VBG" in tags[q] or "NN" in tags[q] or "NNS" in tags[q] or "RB" in tags[q] or ("NNS" in tags[q] and "IN" in tags[q + 1])): decoded += sent.split()[q] + "_" # if an interjection is present then you want to skip # over it if ("IN" in tags[q + 1]): decoded += sent.split()[q + 1] + "_" q += 2 continue q += 1 if q >= len(tags): break except BaseException: pass # if there's no adjective present you want to run this pipeline else: try: # you iterate through the tags and identify certain parts of speech for x in range(len(tags)): if x < len(tags) - 1: if "IN" in tags[x + 1]: decoded += sent.split()[x] + "_" decoded += sent.split()[x + 1] + "_" x = x + 2 continue # if any of these parts of speech are in the instruction then # you want to extract them. The parts of speech can be found: # https://repository.upenn.edu/cgi/viewcontent.cgi?article=1603&context=cis_reports if "NN" in tags[x] or "NNS" in tags[x] or "RB" in tags[ x] or "VBG" in tags[x] or "VBN" in tags[x]: decoded += sent.split()[x] + "_" else: continue except BaseException: print(x) print(tags[x]) print("Please try re-typing your sentence") decoded = decoded[:-1] # If it's two words then you just choose the second word: we're assuming two words = predict apples, even if this is false in the example apples red # similarity identificatin will still pick up on the right column if len(sent.split()) == 2: decoded = sent.split()[1] return decoded
def process_single_question(q): """ Check EverNote 180127 for detail :param q: input question :return: token_list, a list of raw words chunk_pos_list, [(st, ed)] indicating the position [st, ed) """ blob = TextBlob(q) shallow_parse = blob.parse().replace('\n', ' ').split(' ') LogInfo.logs(shallow_parse) chunk_tup_list = [] for item in shallow_parse: spt = item.split('/') token = spt[0] chunk_tag = spt[2] chunk_tup_list.append([token, chunk_tag]) while True: # deal with . tups_len = len(chunk_tup_list) dot_idx = -1 for idx, tup in enumerate(chunk_tup_list): if tup[0] == u".": dot_idx = idx # capture the index of ' break if dot_idx == -1: break assert dot_idx > 0 chunk_tup_list[dot_idx-1][0] += chunk_tup_list[dot_idx][0] del chunk_tup_list[dot_idx] while True: # deal with ' tups_len = len(chunk_tup_list) quote_idx = -1 for idx, tup in enumerate(chunk_tup_list): if tup[0] == u"'": quote_idx = idx # capture the index of ' break if quote_idx == -1: break assert quote_idx > 0 if quote_idx < tups_len - 1 and chunk_tup_list[quote_idx+1][0] == u"s": chunk_tup_list[quote_idx][0] += chunk_tup_list[quote_idx+1][0] del chunk_tup_list[quote_idx+1] else: chunk_tup_list[quote_idx-1][0] += chunk_tup_list[quote_idx][0] chunk_tup_list[quote_idx-1][0] += chunk_tup_list[quote_idx+1][0] del chunk_tup_list[quote_idx+1] del chunk_tup_list[quote_idx] token_list = [tup[0] for tup in chunk_tup_list] chunk_pos_list = [] st = -1 for idx in range(len(chunk_tup_list)): tag = chunk_tup_list[idx][1] if tag in ('B-NP', 'I-NP'): if st != -1: continue else: st = idx else: if st != -1 and token_list[st].lower() not in wh_set: chunk_pos_list.append((st, idx)) st = -1 # if tag 'I-NP': # continue # else: # if st != -1 and token_list[st].lower() not in wh_set: # chunk_pos_list.append((st, idx)) # st = -1 # if tag == 'B-NP': # st = idx if st != -1 and token_list[st].lower() not in wh_set: chunk_pos_list.append((st, len(chunk_tup_list))) return token_list, chunk_pos_list
################################### # 对于文本特征构建组成新的训练文件 # ################################# from textblob import TextBlob with open('f1.txt', 'a', encoding='utf-8') as f1in: with open('f2.txt', 'a', encoding='utf-8') as f2in: with open('f3.txt', 'a', encoding='utf-8') as f3in: with open('ctrain.txt', encoding='utf-8') as f: count = 0 for line in f.readlines(): count += 1 text = line.split('\n')[0].split('\t')[1] tag = TextBlob(text) # print(tag.tags) print(tag.parse().split(' ')) f = tag.parse().split(' ') for i in f: i = i.split('/') f1in.write(i[1] + ' ') f2in.write(i[2] + ' ') f3in.write(i[3] + ' ') f1in.write('\n') f2in.write('\n') f3in.write('\n') # print(line) # f count==10:break from textblob import TextBlob with open('ftmp_test.txt', 'w', encoding='utf-8') as fin: with open('tmp_test', encoding='utf-8') as f:
for sent in [ "A woman without her man is nothing. ", "A woman, without her man, is nothing.", "A woman: without her, man is nothing." ]: wiki = TextBlob(sent) print("Wiki tags: %s : " % sent, wiki.tags) for sent in [ "For more information see the Commercial Driver Guide available at www.dol.wa.gov or at any driver licensing office.", "You can get an instruction permit or a driver license at any driver licensing office.", "Some offices do not offer testing so before you come in be sure the one you plan to visit offers the testing you need.", "In an effort to reduce wait times legislation was passed to allow driver training schools licensed by the Department of Licensing and school districts that offer a traffic safety education program under the supervision of the Office of the Superintendent of Public Instruction to administer driver licensing examinations.", "A list of approved schools as well as driver licensing offices can be found on our website.", "Please contact an approved school for their specific testing requirements.", "To be issued an instruction permit you must: ** be at least 15-1/2 years old (or 15 years old if enrolled in an approved driver-training course); ** pass the knowledge test (unless enrolled in an approved driver-training course); ** complete the vision and medical screenings and; ** pay an application/examination fee.", "If you pay an application/examination fee and are ( at least 15-1/2 years old or ( 15 years old and enrolled in an approved driver-training course) ) and ( pass the knowledge test or are enrolled in an approved driver-training course) and complete the ( vision and medical screenings) then you will be issued an instruction permit.", "If you are under 18 you must also bring your parent or guardian with you to the licensing office when you apply." # They must show proof of identity and proof of relationship to you and must also sign a Parental Authorization Affidavit. # When last names are different we require more documents 1-3 proving relationship. # The permit is valid for one year and you can renew it. # If you are enrolled in an approved driver-training course you can get an instruction permit at age 15. # You will need a waiver from your school allowing you to apply for the permit up to 10 days before the class starts. ]: wiki = TextBlob(sent) print("\nWiki tags: %s : " % sent, wiki.tags) print("Noun phrase extraction", wiki.noun_phrases) print("Parsed", wiki.parse()) print("n-grams", wiki.ngrams(n=3))
def main(argv): #with sr.Microphone() as source: finalTranscript=""; translationSuccess=0; optionChoosen ="0"; exitOption="0" welcomeTextSpeak=0 threeOptionAware=0 while exitOption in ("0"): m = sr.Microphone(); optionChoosen ="0"; while optionChoosen not in ("1", "2", "3"): if welcomeTextSpeak == 0: welcomeTextSpeak = welcomeTextSpeak+1; welcomeText="Hello user. Welcome to Shriram's Voice based Natural Language Processing Tool" print(welcomeText) Text2SpeechRequest("for_welcome",welcomeText); silenceText="Please wait while I analyze your environment and self adjust myself to better undersand what you say" print(silenceText) Text2SpeechRequest("for_silence",silenceText); r = sr.Recognizer() with m as source: r.adjust_for_ambient_noise(source) thresholdText="Optimal Energy Threshold set to: {}".format(r.energy_threshold) print (thresholdText); if(r.energy_threshold > 500): environmentNoisyText="Your environment is noisy so please help me by speaking a bit loud" print(environmentNoisyText); Text2SpeechRequest("for_notifying_noisy_environment",environmentNoisyText); with sr.Microphone( device_index = None, sample_rate = 48000 ) as source: #----------------------------------------------------------------ASSISTING FOR OTHER OPTIONS--------------------------------------------------# sayCommandText="Can you please say your request now" print(sayCommandText) Text2SpeechRequest("for_requesting_command",sayCommandText); audio = r.listen(source) waitForUnderstandingText="Please wait while I try to understand what you just said" print(waitForUnderstandingText) Text2SpeechRequest("for_waiting_to_undersanding_input",waitForUnderstandingText) if threeOptionAware==0: threeOptionAware=threeOptionAware+1; learningRequestText= "I am learning to understand how you speak, so I am going to provide you with two different options that represents what I understood."; print(learningRequestText); Text2SpeechRequest("for_notifying_about_2_options",learningRequestText); transcriptGoogle= GetGoogleTranscripts(r,sr,audio); if not transcriptGoogle =='None': print("Option # 1: Did you spoke the following words? : " + transcriptGoogle) else: print("Sorry I cannot recognize what you said. Please try again"); finalTranscript=""; transcriptIbm= GetIBMWatsonTranscripts(r,sr,audio); if not transcriptIbm == 'None': print("Option # 2: Did you spoke the following words? : " + transcriptIbm); else: print("Sorry I cannot recognize what you said. Please try again") finalTranscript=""; choiceText='Please say the option number which closely relates to what you just said. If you are not satisfied please say restart.' print(choiceText) Text2SpeechRequest("for_getting_transcript_choice",choiceText); audioListenTranscriptChoice = r.listen(source) transcriptGoogleListenTranscriptChoice= GetGoogleTranscripts(r,sr,audioListenTranscriptChoice); if re.search( "1" , transcriptGoogleListenTranscriptChoice.lower()): optionChoosen="1" finalTranscript=transcriptGoogle; elif re.search( "2" , transcriptGoogleListenTranscriptChoice.lower()): optionChoosen="2"; finalTranscript=transcriptIbm; else: print("Restarting to get your request again and understand your request thoroughly"); optionChoosen="0" if optionChoosen in ("1", "2"): if len(finalTranscript) > 0: nltkOpsText= "Lets perform some natural language analysis on the text you had just talked "; print(nltkOpsText) Text2SpeechRequest("for_notifying_nltk_operations",nltkOpsText); print("Parts Of Speech Tags in your sentence are :") print(str(nltk.pos_tag(nltk.word_tokenize(finalTranscript)))) blob = TextBlob(finalTranscript); print("Grammatical structure of your sentence is :"); print(blob.parse()) overAllSentiment = 0; for sentence in blob.sentences: overAllSentiment=overAllSentiment+sentence.sentiment.polarity; print("Sentiment score of your sentence is :"); print(overAllSentiment) if overAllSentiment > 0.0: positiveText="You have spoken a positive sentence. Seems you are happy !"; print(positiveText); Text2SpeechRequest("for_notifying_positive_sentiment",positiveText); elif overAllSentiment < 0.0: negativeText="You have spoken a negative sentence. Seems you are not happy."; print(negativeText); Text2SpeechRequest("for_notifying_negative_sentiment",negativeText); else: neutralText="You have spoken a neutral sentence."; print(neutralText); Text2SpeechRequest("for_notifying_neutral_text",neutralText); exitOptionText="Are you satisfied with the results that I had presented to you? Do you want me to help you with anything else?" print(exitOptionText); Text2SpeechRequest("for_exit_choice_getting",exitOptionText) audioListenExit = r.listen(source) transcriptGoogleListenExit= GetGoogleTranscripts(r,sr,audioListenExit); if re.search( "yes|yeah|fine|yup|up|ok|satisfied" , transcriptGoogleListenExit.lower()): exitOption="0"; elif re.search( "no|thats ok|i am good|done|thanks" , transcriptGoogleListenExit.lower()): exitOption="1"; else: optionChoosen="0" sr.Microphone.MicrophoneStream.close; goodByeText="Thank you for using Shriram's NLP Application!. Hope I was of help to you. Have a great day" print(goodByeText) Text2SpeechRequest("for_goodbye_note",goodByeText)
def check_sarc(tweet): blob = TextBlob(tweet, parser=PatternParser()) tokens = blob.parse().split(' ') dic = defaultdict(list) # stores all phrases by category temp = '' phrases = [] # list of all phrases for t in tokens: if t.split('/')[2] == 'O': if temp: phrases.append((ctag,temp)) dic[t.split('/')[2]].append(temp) temp = t.split('/')[0]+' ' ctag = t.split('/')[2] elif 'B-' in t.split('/')[2]: if temp: phrases.append((ctag,temp)) temp = t.split('/')[0]+' ' dic[t.split('/')[2].split('-')[1]].append(temp) ctag = t.split('/')[2].split('-')[1] elif 'I-' in t.split('/')[2]: dic[t.split('/')[2].split('-')[1]][-1] += t.split('/')[0]+' ' temp += t.split('/')[0]+' ' ctag = t.split('/')[2].split('-')[1] else: pass if temp: phrases.append((ctag,temp)) SF = [] sf = [] for i in phrases: if i[0] in ['NP','ADjP']: SF.append(i[1]) elif i[0]=='VP': sf.append(i[1]) for i in range(len(phrases)-1): if phrases[i][0]=='NP' and phrases[i+1][0]=='VP': SF.append(phrases[i][1]+' '+phrases[i+1][1]) elif phrases[i][0]=='ADVP' and phrases[i+1][0]=='VP': sf.append(phrases[i][1]+' '+phrases[i+1][1]) elif phrases[i][0]=='VP' and phrases[i+1][0]=='ADVP': sf.append(phrases[i][1]+' '+phrases[i+1][1]) elif phrases[i][0]=='ADJP' and phrases[i+1][0]=='VP': sf.append(phrases[i][1]+' '+phrases[i+1][1]) elif phrases[i][0]=='VP' and phrases[i+1][0]=='NP': sf.append(phrases[i][1]+' '+phrases[i+1][1]) for i in range(len(phrases)-2): if phrases[i][0]=='VP' and phrases[i+1][0]=='ADVP' and phrases[i+2][0]=='ADJP': sf.append(phrases[i][1]+' '+phrases[i+1][1]+' '+phrases[i+1][1]) elif phrases[i][0]=='VP' and phrases[i+1][0]=='ADJP' and phrases[i+2][0]=='NP': sf.append(phrases[i][1]+' '+phrases[i+1][1]+' '+phrases[i+2][1]) elif phrases[i][0]=='ADVP' and phrases[i+1][0]=='ADJP' and phrases[i+2][0]=='NP': sf.append(phrases[i][1]+' '+phrases[i+1][1]+' '+phrases[i+2][1]) print SF print sf PSF = [] NSF = [] psf = [] nsf = [] for i in SF: blob = TextBlob(i) if blob.polarity > 0: PSF.append(i) elif blob.polarity < 0: NSF.append(i) elif blob.polarity == 0: pass for i in sf: blob = TextBlob(i) if blob.polarity > 0: psf.append(i) elif blob.polarity < 0: psf.append(i) elif blob.polarity == 0: pass print PSF print NSF print psf print nsf if (PSF and nsf) or (psf and NSF): return 1 else: return 0
from nltk import Tree from nltk.grammar import CFG from nltk.parse.generate import generate, demo_grammar from nltk import CFG import string , re import wordpolarity s = "I do bad things for good people." mystring = s.translate(None , string.punctuation) #print s.parse() b = TextBlob(mystring) print b.sentiment.polarity g = str(b.parse()) x = g.split() word_list = [] mystr = mystring.split() space_list = x main_list = [] #print (space_list) for word in space_list: new_list = word.split("/") main_list.append(new_list) a={}
__author__ = 'cloudera' from senticnet.senticnet import Senticnet from textblob import TextBlob sentence = "One of the very first Apple 1 computers, worth about 500,000, goes on sale later this month at Christie's auction house, the latest vintage tech sale." sn = Senticnet() concept_info = sn.concept('love') print 'sn.concept(love) = ', concept_info polarity = sn.polarity('love') print 'polarity(love) = ', polarity semantics = sn.semantics('love') print 'semantics = ', semantics sentics = sn.sentics('love') print 'sentics = ', sentics sentenceBlob = TextBlob(sentence) print sentenceBlob.parse() print sentenceBlob.sentiment sentenceConcept = sn.concept(sentence) print sentenceConcept
b = TextBlob("I havv goood speling!") print(b.correct()) #w = Word('havv') #print(w.spellcheck()) monty = TextBlob("We are no longer the Knights who say Ni. " "We are now the Knights who say Ekki ekki ekki PTANG.") print(monty.word_counts['ekki']) print(monty.words.count('ekki')) print(monty.words.count('ekki', case_sensitive=True)) #Case_sensitive-->Sensible aux majuscules/minuscules en_blob = TextBlob(u'The sooner, the better') print(en_blob.translate(to='fr')) b = TextBlob(u"今年の夏の間に日本に行きました") print(b.translate(from_lang='ja',to='fr')) c = TextBlob("And now for something completely different.") print(c.parse()) blob = TextBlob("Now is better than never.") blob.ngrams(n=3) for s in zen.sentences: print(s) print("---- Starts at index {}, Ends at index {}".format(s.start, s.end))
blob.tokens #This is an alternative way tokenizer = BlanklineTokenizer() blob = TextBlob("A token\n\nof appreciation") blob.tokenize(tokenizer) # Noun phrase chunkers from textblob.np_extractors import ConllExtractor extractor = ConllExtractor() blob = TextBlob("Python is a high-level programming language.", np_extractor=extractor) blob.noun_phrases # POS taggers from textblob.taggers import NLTKTagger nltk_tagger = NLTKTagger() blob = TextBlob("Tag! You're It!", pos_tagger=nltk_tagger) blob.pos_tags # Parser from textblob.parsers import PatternParser blob = TextBlob("Parsing is fun.", parser=PatternParser()) blob.parse() # TextBlob that share same model rom textblob.taggers import NLTKTagger tb = Blobber(pos_tagger=NLTKTagger()) blob1 = tb("This is a blob.") blob2 = tb("This is another blob.") blob1.pos_tagger is blob2.pos_tagger
#Words can be lemmatized by the lemmatize method, but notice that the TextBlog lemmatize method is # inherited from NLTK Word Lemmatizer, and the default POS Tag is "n", if you want lemmatize other #pos tag words, you need specify it: nlpblob.words[138].pluralize().lemmatize() nlpblob.words[21].pluralize().lemmatize() #9)Spelling Correction #TextBlob Spelling correction is based on Peter Norvig"s "How to Write a Spelling Corrector", which is # implemented in the pattern library: b = TextBlob("I havv good speling!") b.correct() #Word objects also have a spellcheck() method that returns a list of (word, confidence) tuples with spelling suggestions: #9) Parsing: TextBlob parse method is based on pattern parser: nlpblob.parse() #10) Translation and Language Detection: By Google"s API: #Detect nlpblob.detect_language() nlpblob.translate(to="hi") nlpblob.translate(to="kn") # es fr nlpblob.translate(to="fr") # es fr nlpblob.translate(to="zh") # Few more example. How to get keyword for any particular language non_eng_blob = TextBlob("हिन्दी समाचार की आधिकारिक वेबसाइट. पढ़ें देश और दुनिया की ताजा ख़बरें") non_eng_blob.detect_language() non_eng_blob = TextBlob("ಮುಖ್ಯ ವಾರ್ತೆಗಳು ಜನಪ್ರಿಯ")