def tokenize(text, spell=False, stem=False, lemma=False, lower=False, stop=False): # lowercase, remove non-alphas and punctuation b = TextBlob(unicode(text, 'utf8')) if spell: b = b.correct() words = b.words if lower: words = words.lower() if lemma: words = words.lemmatize() if stem: words = [stemmer.stem(w) for w in words] if stop: tokens = [w.encode('utf-8') for w in words if w.isalpha() and w not in stopwords] else: tokens = [w.encode('utf-8') for w in words if w.isalpha()] # letters_only = re.sub("[^a-zA-Z]", " ", text) # # ngrams # temp_list = [] # for i in range(1,ngram+1): # temp = [list(i) for i in TextBlob(' '.join(tokens)).ngrams(i)] # try: # if len(temp[0]) == 1: # temp_list.extend([i[0] for i in temp]) # else: # for i in temp: # temp_list.append(tuple(i)) # except: # pass # return temp_list return tokens
def correctSpelling(text): ''' Correcting the spelling of the words :param text: the input text :return: corrected the spelling in the words ''' textBlob = TextBlob(text) return textBlob.correct()
def correction(): """Simple handler that parses a query parameter and returns a best-guess spelling correction using the TextBlob library. urls should take the form '/correction?text=some%20textt%20to%20corect' data returned will be a JSON object that looks like: {text: "some text to correct"} """ text = request.args.get('text', '') text = TextBlob(text) return jsonify(text=unicode(text.correct()))
def post_process_review(review_id): review = database.Review.get_one_by(id=review_id) if not review: return original_review_body = review.body # check for profanity review.profanity = profanity.contains_profanity(original_review_body) if review.profanity: review.profanity_not_removed_body = original_review_body review.body = profanity.censor(original_review_body) # sentiment analysis text_blob = TextBlob(original_review_body) review.sentiment_polarity = text_blob.sentiment.polarity review.sentiment_subjectivity = text_blob.sentiment.subjectivity review.spell_checked_body = unicode(text_blob.correct()) # store database.add(review) database.push()
def articleSearch(): '''Main function that searches for an article, based on keyword(s)''' key = 'NYTimes_Article_Search_key' numberError = True menuRepeat = True while (menuRepeat == True): query = input("What game would you like to search?\n") queryblob = TextBlob(query) if (queryblob != queryblob.correct()): while (numberError == True): answer = input("Did you mean " + '"' + str(queryblob.correct()) + '"' + "? (1 for yes, 2 for no)\n") if (answer == '1'): query = queryblob.correct() numberError = False elif (answer == '2'): query = queryblob numberError = False else: print("\nTry again. (1 for yes, 2 for no)") numberError = True print(query) # URL for the technology page w/ API key url = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?q={query}&api-key={key}' # Get the data data = requests.get(url) # Turn data into JSON import json data = data.json() #print(data['response']['docs'][1]['headline']['main']) # Write try: myfile = open('gamingnews.txt', 'w') i = 1 for article in data['response']['docs']: test = TextBlob(article['headline']['main']) count = test.noun_phrases.count(query) myfile.write( str(i) + ': ' + article['headline']['main'] + " n = " + str(count) + '\n') i += 1 myfile.close() # Read myfile = open('gamingnews.txt', 'r') for file in myfile: print(file) myfile.close() except: print("\nNo news found on this game. Sorry. :'[\n") # Ask the user if they want to do another search menuError = True while (menuError == True): menuAnswer = input( "Would you like to search again? (1 for yes, 2 for no)\n") if (menuAnswer == '1'): menuRepeat = True menuError = False elif (menuAnswer == '2'): print( "\nThanks for using my Video Game Search API. Goodbye!\n") menuRepeat = False menuError = False else: print("\nTry again. (1 for yes, 2 for no)") menuError = True
def main() : sent = TextBlob("my name pras tean is") var = sent.correct() print var
adj_ctr += 1 adj.append(pair[0]) elif tag == 'NN' or tag == 'NNS': noun_ctr += 1 noun.append(pair[0]) print('\nTotal number of adjectives in tweet collection= ', adj_ctr) print("List of adjectives in tweet collection: ", adj) print('\nTotal number of nouns in tweet collection= ', noun_ctr, noun) print("List of nouns in tweet collection: ", noun) #working on string entered manually st = "I amm an ostrich and nobody can see me, not even I myself." st2 = "It's so sad that turtles can only walk slow." st3 = "Not being smart and amazing is not worst thing in the world." blob = TextBlob(st) blob2 = TextBlob(st2) blob3 = TextBlob(st3) print('\nOriginal string: ', st) print('Spell checked string: ', blob.correct()) print("Detecting language in above sentence...", blob.detect_language()) print(blob.translate(to='hi')) if blob2.sentiment.polarity > 0: print("\n'", st2, "'", 'is positive') else: print("\n'", st2, ",", 'is negative') if blob3.sentiment.polarity > 0: print("\n'", st3, "'", 'is positive') else: print("\n'", st3, "'", 'is negative')
from textblob import TextBlob text = input("Enter a text that needs spelling correction: ") blob = TextBlob(text) blob_corrected = blob.correct() print(blob_corrected.string)
def translate(english_word): english_blob = TextBlob(english_word) english_blob = english_blob.correct() return unicode(english_blob.translate('en', 'ar'))
from textblob import TextBlob import sys string_tocheck = TextBlob(sys.argv[1]) print string_tocheck.correct()
horizontal_img = cv2.flip(img, 0) plt.imshow(horizontal_img) vertical_img = cv2.flip(img, 1) plt.imshow(vertical_img) both_img = cv2.flip(img,-1) plt.imshow(both_img) #................................................................................... import nltk nltk.download() from textblob import TextBlob # string stin = TextBlob('heloo this is new prograaming languaage') stin # correcting the textblob stin.correct() stin.word_counts TextBlob('æˆ‘çˆ±ä½ ').detect_language() TextBlob('').translate() TextBlob('').translate.lower() stin.pos_tags #..................................................................................... a= 'hello python' b ='hello python.learning' # getting unique words set(a) # sorted return all keys in sorted
from textblob import TextBlob # correcter library. import enchant # word suggester library. while True: UserInput=input("Enter: ") A=TextBlob(UserInput) B= A.correct() print(B) c = enchant.Dict("en_US") c.check(UserInput) print(c.suggest(UserInput)) # source # textblob from textblob website # enchant from geeksforgeeks
def correct(): text = request.args.get('text').strip().encode('utf-8', "ignore") blob = TextBlob(text) return jsonify({'correct': str(blob.correct())})
import argparse import cv2 # construct the argument parser and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required=True, help="path to input image to be OCR'd") args = vars(ap.parse_args()) # load the input image and convert it from BGR to RGB channel # ordering image = cv2.imread(args["image"]) rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # use Tesseract to OCR the image text = pytesseract.image_to_string(rgb) # show the text *before* ocr-spellchecking has been applied print("BEFORE SPELLCHECK") print("=================") print(text) print("\n") # apply spell checking to the OCR'd text tb = TextBlob(text) corrected = tb.correct() # show the text after ocr-spellchecking has been applied print("AFTER SPELLCHECK") print("================") print(corrected)
w.lemmatize() # Finding the nearest word w = Word("went") w.lemmatize("v") # Pass in WordNet part of speech (verb) w = Word("left") w.lemmatize("v") # Pass in WordNet part of speech (verb) ############################################################################### """ Spelling Correction Use the correct() method to attempt spelling correction. """ b = TextBlob("I havv goood speling!") print(b.correct()) """ Word objects have a spellcheck() Word.spellcheck() method that returns a list of (word, confidence) tuples with spelling suggestions. """ from textblob import Word w = Word('falibility') w.spellcheck() w = Word('carrer') w.spellcheck() ###############################################################################
words = movie_reviews.words(fileid) pos.append((create_word_features(words), "positive")) #print("Training") train_set = neg[:round(0.7 * len(neg))] + pos[:round(0.7 * len(pos))] test_set = neg[round((1 - 0.7) * len(neg)):] + pos[round((1 - 0.7) * len(pos)):] classifier = NaiveBayesClassifier.train(train_set) accuracy = nltk.classify.util.accuracy(classifier, test_set) print("Accuracy for the classifier is : ", round(accuracy * 100, 4)) print("") x = "The movie was ok in the begining, but got bad later." # str(input()) a = TextBlob(x) x = str(a.correct()) print(x) ans = classifier.classify(create_word_features(x)) print("The comment is : ", ans) sentiment_dictionary = {} for line in open('/word-score.txt'): word, score = line.split('\t') sentiment_dictionary[word] = int(score) words = word_tokenize(x.lower()) print("Rating given to the review is : ", sum(sentiment_dictionary.get(word, 0) for word in words)) print("Polarity of the comment is : ", round(a.polarity, 4))
comment_dict[i].append(sentence[k]) #delete unwanted '' words for j in range(len(comment_dict)): comment_dict[j] = [comment_dict[j][i] for i in range(len(comment_dict[j])) if comment_dict[j][i] not in ''] for i in range(len(comment_dict)): reviewText[i] = ('. '.join(comment_dict[i][j] for j in range(len(comment_dict[i])))) # spelling correction for i in range(len(reviewText)): b = TextBlob(reviewText[i]) reviewText[i] = b.correct() dataset_corrected = DataFrame({ 'user_id': userID, 'p_id': productID, 'p_name': productname, 'rating': rating, 'age': age, 'reviewText': reviewText}) # creating corpus corpus = defaultdict(set) for i in range(len(reviewText)): wiki = reviewText[i] corpus[i] = wiki.sentences # print(corpus)
Goolge = 'https://google.com' Craigs = 'https://craigslist.org' Cars = 'https://cars.com' response = requests.get(Goolge) response2 = requests.get(Craigs) GoogleHtml = response.content CraigsHtml = response2.content #dont really have a need for the raw html tho BaseQuery = input("What exactly are you searching for ?") NewBaseQuery = TextBlob(BaseQuery)#Have to make query a blob before we correct it #BaseQuery.noun_phrases print(NewBaseQuery.correct()) #attempts to correct any spelling errors NewBaseQuery = NewBaseQuery.correct() print("so you're looking for " + BaseQuery) print(NewBaseQuery.words) AutoGroup = ['auto', 'car', 'vehicle'] if any(word in AutoGroup for word in NewBaseQuery.words): #checks Autogroup against NewBaseQuery.words MakeQuery = input("Do you have a preference of Make?") #ModelQuery = input("Do you have a Model preference?") #CashValue = input("And exactly how much are you willing to spend?") Zip = input("And Lastly what is your zipcode?") Ibrowser = webdriver.Chrome() Ibrowser.get(Cars)
def calc_main(): st.write("Nimbus Words") st.sidebar.header("Input Options") activites = ["Summary", "Tokenizer","Synonyms","Translator","Search","Spell Correction"] choice = st.sidebar.selectbox("Select Activity",activites) if choice == "Summary": st.title('AI Text Summarizer') text = st.text_area("Input Text For Summary",height=300) if st.button("summarize"): st.success(summary(text)) text_range= st.sidebar.slider("Summarize words Range",25,500) text = st.text_area("Input Text For Summary",height=250) if st.button("custom summarization"): st.warning(summarize(text,word_count=text_range)) # Tokenizer elif choice == "Tokenizer": st.title('Text Tokenizer') row_data = st.text_area("write Text For Tokenizer") docx= nlp(row_data) if st.button("Tokenizer"): spacy_streamlit.visualize_tokens(docx,attrs=['text','pos_','dep_','ent_type_']) if st.button("NER"): spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels) if st.button("Text Relationship"): spacy_streamlit.visualize_parser(docx) # synonyms elif choice == "Synonyms": st.title('Synonym Generator') text = st.text_area("Enter Text") if st.button("Find"): for syn in wordnet.synsets(text): for i in syn.lemmas(): st.success(i.name()) if st.checkbox("Defination"): for syn in wordnet.synsets(text): st.warning(syn.definition()) if st.checkbox("Example"): for syn in wordnet.synsets(text): st.success(syn.examples()) # Translator elif choice == "Translator": st.title('Speech Tranlation') row_text = st.text_area("Enter Your Text For Translation",height=300) translation_text = TextBlob(row_text) list1 = ["en","ta","pa","gu","hi","ur","kn","bn","te"] a= st.selectbox("select",list1) if st.button("search"): #input1 = TextBlob("Simple is better than complex") st.success(translation_text.translate(to=a)) #Search Bar elif choice == "Search": st.title('Web Search') row_text= st.text_input("Search Anything") google = Google(license=None) if st.button("search"): for search_result in google.search(row_text): st.write(search_result.text) st.warning(search_result.url) elif choice == "Spell Correction": st.title('AI Spell Correction') text_data = st.text_area("Enter Text Here") a = TextBlob(text_data) if st.button("Correct"): st.success(a.correct()) st.title('Pluralize & Singularize') text_data1 = st.text_input("Enter a word For pluralize / singularize") if st.checkbox("pluralize"): st.warning(pluralize(text_data1)) if st.checkbox("singularize"): st.warning(singularize(text_data1)) st.title('Compartitive & Superlative') text2 = st.text_input("Enter Text For comparative & superlative") if st.checkbox("comparative"): st.success(comparative(text2)) if st.checkbox("superlative"): st.success(superlative(text2))
#def correct_learn(): #correcnt and learn here polarity_corr["cant"] = -0.25 polarity_corr["crashes"] = -0.25 print "test\n", polarity_corr["cant"] input=TextBlob(raw_input("Statement goes here:\n")) print "tag start ***" for tag in input.tags: tag_list.append(tag[0]) print(tag[0]) print "*** tag end" input=input.correct() #print "corrected-> ", input, "\n" for sentence in input.sentences: print(sentence.sentiment) pol=(sentence.sentiment.polarity) sub=sentence.sentiment.subjectivity if(sentence.sentiment.polarity<0.0): print "negative" for lst in tag_list: #print lst if((lst.lower()) in bag2): print "\nKey area : ",lst, "\n" #bag2.index(lst.lower()) #return 'a' in bag2 ''''
text_file.write(content) return (num, boxes) cont, boxes = get_boxes(cont, method="top-to-bottom") count_coord = 0 #counting the number of pages / files in extracted_text directory for i in range(n): if os.path.exists("extracted_text\\img" + str(i) + ".txt"): with open( "extracted_text\\img" + str(i) + ".txt", "r") as f: # Opening the test file with the intention to read text1 = f.read() # Reading the file textBlb = TextBlob(text1) # Making our first textblob textCorrected = textBlb.correct() # Correcting the text # corrected_text = remove_special_characters(str(textCorrected)) os.makedirs("../BDRP_Project/after_spelling_correction", exist_ok=True) text_file = open( "after_spelling_correction\\text_corrected" + str(i) + ".txt", "w") text2 = str(textCorrected) text_file.write(text2) #evaluating originalCompCorrected1 = compare(text1, text2) # print("",originalCompCorrected1) print( "Percentage of fixed mistakes in extracted text\t " + str(i) + "\t after spelling correction:", percentageOfFixedMistakes(originalCompCorrected1), "%")
from textblob import TextBlob a = input('enter the text : ') print(str(a)) b = TextBlob(a) print(str(b.correct()))
# TODO check the following # > hwo to use .correct() so that the text are corrected # >does it deal with hashtags? from textblob import TextBlob import nltk from tqdm import tqdm nltk.download('punkt') text = "i am 😞" # emoji sentiment.polarity = 0 text = "I hate going to school" text = 'I am tyyping it wrong on purrpose' for i in tqdm(range(100)): wiki = TextBlob(text) wiki = TextBlob(text) wiki = wiki.correct() # correcct the misspelling wiki.tags wiki.noun_phrases wiki.sentiment wiki.sentiment.polarity
# You can access the synsets for a Word via the synsets property or the get_synsets method, optionally passing in a part of speech. word = Word("good") print word.synsets print Word("hack").get_synsets(pos=VERB) print Word("octopus").definitions[1] print Word("octopus").synsets #word_dictinary('project') # WordLists (A WordList is just a Python list with additional methods.) animals = TextBlob("cat dog octopus") print animals.words print animals.words.pluralize() # Spelling Correction (Use the correct() method to attempt spelling correction.) b = TextBlob("I havv goood speling!") print(b.correct()) w = Word('falibility') print w.spellcheck() # Get Word and Noun Phrase Frequencies monty = TextBlob("We are no longer the Knights who say Ni. " "We are now the Knights who say Ekki ekki ekki PTANG.") print monty.word_counts['ekki'] # The second way is to use the count() method. print monty.words.count('ekki') print monty.words.count('Ekki', case_sensitive=True) # TextBlobs Are Like Python Strings print zen.upper() # You can make comparisons between TextBlobs and strings. apple_blob = TextBlob('apples')
print(text.words.count('you')) # In[10]: print(text.tags) # In[11]: print(text[:10]) # ### Pre-processing # In[12]: # check spelling text = text.correct() # In[13]: # convert to lower case text = text.lower() # In[14]: # form back sentence clean_text = ' '.join(text.words) # In[15]: print(clean_text)
def correct_spell(): input_stat = input("Enter Text here: ") blob_init = TextBlob(input_stat) return print(blob_init.correct())
def correct(self, msg): b = TextBlob(msg) corr = b.correct() if msg != corr: return corr
from textblob import TextBlob a = TextBlob("intreset") print(a.correct()) b = (a.correct()) print(b.detect_language()) print(b.translate(to="fr"))
def spellcheck(message): text = TextBlob(message) cc = ''+text.correct() response = {'crt' : cc} return jsonify(response)
def AnalyseSentiments(): nlp = StanfordCoreNLP('http://localhost:9000') Reviews = [] operations = {'annotators': 'tokenize,lemma,pos,sentiment', 'outputFormat': 'json'} allReviewData = [] InitialWriteToFile() l = -1 fileName = "TwentySeven.csv" with open(fileName, 'r') as f: reader = csv.reader(f, delimiter=',') for row in reader: l += 1 try: if l == 28: #Break at the last row. Avoids errors break tokens = nltk.word_tokenize(row[0]) Reviews.append(row[0]) if not len(tokens)<2: allReviewData.append(row) print(str(l)) except(IndexError): pass filtered_Reviews = [] print("Finished reading") for Review in Reviews: tokens = nltk.word_tokenize(Review) if not len(tokens) < 2: filtered_Reviews.append(tokens) allReviewData_Final = allReviewData onReviewNumber = 0 ReviewsJoinedToMakeASentence = "" filtered_ReviewsAfterProcess1 = [] print("Finished Filtering Reviews with len less than 2\n") print("Starting spell check\n") t0 = time.time() spellCount = 0 for Review in filtered_Reviews: JoinedTokens = ' '.join(word for word in Review) filtered_review = TextBlob(str(JoinedTokens)) JoinedTokens = str(filtered_review.correct()) filtered_ReviewsAfterProcess1.append(JoinedTokens) spellCount += 1 if spellCount%50 == 0: print("Spellchecked: "+str(spellCount)) flag = 0 i = 0 pos = [] reviewsWithSentiment = [] print("Starting Sentiment Analysis") counter = -1 for Review in filtered_ReviewsAfterProcess1: res = nlp.annotate(Review,operations) counter += 1 try: for s in res["sentences"]: for token in s["tokens"]: stringNone = str(token["pos"]) pos.append(token["pos"]) # Flags if a review has a verb or noun. if str(token["pos"]) == "NN" or str(token["pos"]) == "NNS" or str(token["pos"]) == "NNP" or str(token["pos"]) == "NNPS": flag = 1 if str(token["pos"]) == "VB" or str(token["pos"]) == "VBG" or str(token["pos"]) == "VBD" or str(token["pos"]) == "VBN" or str(token["pos"]) == "VBP" or str(token["pos"]) == "VBZ": flag = 1 if flag == 1: pos = [] flag = 0 reviewWithoutStopWords, classification = textBllob(Review) if counter %50 == 0: print ("Sentiment Analysed: "+str(counter)) try: FinalWrite(str(reviewWithoutStopWords), str(allReviewData_Final[counter][1]), str(allReviewData_Final[counter][2]), str(allReviewData_Final[counter][3]), str(allReviewData_Final[counter][4]), str(allReviewData_Final[counter][5]), str(allReviewData_Final[counter][6]), str(allReviewData_Final[counter][7]), str(classification)) time.sleep(1) except(IndexError): pass except(TypeError): pass
def spell_corrector(sentence): tb = TextBlob(sentence) tb = tb.correct() return tb.string
def correct(self, phrase): blob = TextBlob(phrase) correct_blob = blob.correct() return str(correct_blob)
def correction(): text = request.args.get('text', '') text = TextBlob(text) return jsonify(text=unicode(text.correct()))
toxic = [] for element in toxic_words: toxic.append(element[0]) non_toxic = [] for element in non_toxic_words: non_toxic.append(element[0]) toxic_list = [] for word in toxic: if word not in non_toxic: toxic_list.append(word) from gensim.models import KeyedVectors en_model = KeyedVectors.load_word2vec_format('../crawl-300d-2M.vec') toxic_correct = [] for word in toxic_list: if word in en_model.vocab: toxic_correct.append(word) else: b = TextBlob(word) b.correct() if b in en_model.vocab: toxic_correct.append(b) else: continue import pickle with open('toxic_words', "wb") as f: pickle.dump(toxic_correct, f)
# Section 12.2.9 Self Check snippets # Exercise 2 from textblob import TextBlob sentence = TextBlob('I canot beleive I misspeled thees werds') sentence.correct() ########################################################################## # (C) Copyright 2019 by Deitel & Associates, Inc. and # # Pearson Education, Inc. All Rights Reserved. # # # # DISCLAIMER: The authors and publisher of this book have used their # # best efforts in preparing the book. These efforts include the # # development, research, and testing of the theories and programs # # to determine their effectiveness. The authors and publisher make # # no warranty of any kind, expressed or implied, with regard to these # # programs or to the documentation contained in these books. The authors # # and publisher shall not be liable in any event for incidental or # # consequential damages in connection with, or arising out of, the # # furnishing, performance, or use of these programs. # ##########################################################################
from textblob import TextBlob article="good mornind,always be the reson that someone smiles today" blob=TextBlob(article) print(blob.correct()) print(blob.translate(to="es"))
def spelling_correction(self, document): model = TextBlobModel(document) return {'correct': str(model.correct())}
blob.words blob.noun_phrases # sentiment analysis blob = TextBlob('I hate this horrible movie. This movie is not very good.') blob.sentences blob.sentiment.polarity [sent.sentiment.polarity for sent in blob.sentences] # singularize and pluralize blob = TextBlob('Put away the dishes.') [word.singularize() for word in blob.words] [word.pluralize() for word in blob.words] # spelling correction blob = TextBlob('15 minuets late') blob.correct() # spellcheck Word('parot').spellcheck() # definitions Word('bank').define() Word('bank').define('v') # translation and language identification blob = TextBlob('Welcome to the classroom.') blob.translate(to='es') blob = TextBlob('Hola amigos') blob.detect_language()
text = text.replace(' ', ' ') text= text.replace(' ', ' ') text_blob = TextBlob(text) #removing stop words text_blob = TextBlob(text).lower() for d in stopwords.words('english'): text_blob = text_blob.replace(d.lower() + ' ', ' ') #cleaning to remove extra spaces text_blob = text_blob.replace(' ', ' ') #correcting spelling text_blob=text_blob.correct() #lemmatization text_blob=Word(text_blob).lemmatize() else: text_blob = TextBlob(text).lower() #sentiment analysis score_vader = analyser.polarity_scores(text_blob) score_textblot = text_blob.sentiment #appending in the file text_cleaned.append(text_blob) score_neg.append(score_vader["neg"]) score_neu.append(score_vader["neu"]) score_pos.append(score_vader["pos"])
def clean_text(self, text): blob = TextBlob(text.lower()) return str(blob.correct())
from textblob import TextBlob a = "whate was youre namey?" print("original text: " + str(a)) b = TextBlob(a) print("corrected text: " + str(b.correct()))
text=pytesseract.image_to_string(Image.open('final.jpg')) #Getting the text from the image using pytesseract if len(text)!=0: print(text) token=nltk.word_tokenize(text) l=len(token) list_sugg=[] for i in range(0,l): print("...................") t_line=TextBlob(token[i]) w_line=Word(token[i]) l=w_line.spellcheck() length=len(l) print("are you looking for") for i in range(0,length): print(str(i+1)+"->"+str(l[i][0])) print("according to me :"+str(t_line.correct())) list_sugg.append(str(t_line.correct())) print("according to me......") print(" ".join(list_sugg)) #'q' for exit if cv2.waitKey(1) &0xFF == ord('q'): break except: break #Exiting cam.release() cv2.destroyAllWindows()
def __init__(self, text): __txtBlob = TextBlob(text) self.__senti = TextBlob(str(__txtBlob.correct())).sentiment
def correct_spelling(string): nlp = TextBlob(unicode(string, 'utf-8')) return nlp.correct()
subjective sentences hold sentiments while objective sentences are facts and figures ''' wiki = TextBlob( "Python is a high-level, general-purpose programming language.") print(wiki.tags) sentence = TextBlob( "Très bonnes ambiance et les plats sont délicieux \nPour ceux qui demandent les prix... pour les plats ça vari entre 1200 et 1300 da" ) print(sentence.words) print(sentence.tags) #Donesn't work with french print(sentence.words[2].pluralize()) #Works print(sentence.correct()) print(sentence.detect_language()) ''' commentProcessing is a function that we use to classify comments. We first translate the comment (since it's in french) and then tokennize it We then extract the polarity which is a float within the range [-1.0, 1.0] -1 is suposed to be super bad. ''' def commentProcessing(text): initBlob = TextBlob(text) lang = initBlob.detect_language() textBlob = initBlob.translate(from_lang=lang, to='en') return textBlob
import pandas as pd from textblob import TextBlob from textblob import Word mon_ami_photos = pd.read_pickle(r'C:\Users\LauraM\Desktop\mon_ami_gabi_photos.pkl') mon_ami_reviews = pd.read_pickle(r'C:\Users\LauraM\Desktop\mon_ami_gabi_reviews.pkl') #print(mon_ami_photos['caption']) #print(mon_ami_reviews['text']) reviewsEntities=[] counter=0 totalRev=len(mon_ami_reviews['text']) for review in mon_ami_reviews['text']: print '%d : %s'%(len(reviewsEntities),review) textTB=TextBlob(review); textTB.correct(); print textTB.sentiment entities=[] for word in textTB.noun_phrases: w=Word(word) w.singularize(); w.lemmatize() entities.append(w) print(w) reviewsEntities.append(entities) print '%d/%d : %s'%(len(reviewsEntities),totalRev,entities)