def get_best_syntax_tree(text, cfg): blob = TextBlob(text) ph_list = [] #trova i token genera le regole semplificate #Taglia l'albero a vari livelli fino e genera le frasi #cfg.print_grammar() #print() for depth in range(4): cfg_copy = CFG.CFG(cfg.prod) cfg_copy.prune(blob.upper().words, depth) if "S" in cfg_copy.prod: #Genera un set di frasi for i in range(10): sent, tree = cfg_copy.gen_random_convergent('S') ph_list.append({"S": sent, "Tree": tree, "Score": 0}) else: break for ph in ph_list: ph["Score"] = fuzz.ratio(ph["S"], blob.upper()) max_score = ph_list[0]["Score"] best_ph = ph_list[0] for ph in ph_list: if ph["Score"] > max_score: max_score = ph["Score"] best_ph = ph #print(best_ph) return best_ph
def on_status(self, status): if from_creator(status): try: # Prints out the extended tweet getattr(status, 'extended_tweet') analysis = TextBlob(status.extended_tweet['full_text']) # set sentiment if analysis.sentiment.polarity > 0: analysis = 'positive' elif analysis.sentiment.polarity == 0: analysis = 'neutral' else: analysis = 'negative' # convert tuple to string for tweepy print(analysis.upper() + " " + status.user.screen_name + " " + status.extended_tweet['full_text']) client.publish( TargetArn="", # Input AWS ARN for SNS Message=analysis.upper() + " " + status.user.screen_name + " " + status.extended_tweet['full_text'] ) # Saves the extended tweet to a file with open("Test.txt", 'a') as file: file.write(analysis.upper() + status.extended_tweet['full_text']) except AttributeError: # Prints out the regular length tweet analysis = TextBlob(status.text) # set sentiment if analysis.sentiment.polarity > 0: analysis = 'positive' elif analysis.sentiment.polarity == 0: analysis = 'neutral' else: analysis = 'negative' # convert tuple to string for tweepy compliance print(analysis.upper() + " " + status.user.screen_name + " " + status.text) client.publish( TargetArn="", # Input AWS ARN for SNS Message=analysis + " " + status.user.screen_name + " " + status.text ) # Saves the regular length tweet to a file. with open("Test.txt", 'a') as file: file.write(analysis.upper() + status.text) return True return True
def chatbot_extractor(document): blob = TextBlob(document) words = blob.upper().words #print(words) feat = {} #question feat["contain(WHERE)"] = "WHERE" in words feat["contain(WHAT)"] = "WHAT" in words feat["contain(HOW)"] = "HOW" in words feat["contain(WHICH)"] = "WHICH" in words #punteggiatura feat["contain(?)"] = "?" in words feat["contain(.)"] = "." in words #command feat["contain(DO)"] = "DO" in words feat["contain(FIND)"] = "FIND" in words feat["contain(GET)"] = "GET" in words feat["contain(TELL)"] = "TELL" in words feat["contain(SEARCH)"] = "SEARCH" in words #approval feat["contain(YES)"] = "YES" in words feat["contain(NO)"] = "NO" in words feat["contain(OK)"] = "OK" in words feat["contain(GOOD)"] = "GOOD" in words #saluti feat["contain(HELLO)"] = "HELLO" in words feat["contain(HI)"] = "HI" in words feat["contain(BYE)"] = "BYE" in words feat["contain(HEY)"] = "HEY" in words #statements feat["contain(THINK)"] = "THINK" in words feat["contain(LIKE)"] = "LIKE" in words feat["contain(FEEL)"] = "FEEL" in words return feat
"Simple is better than complex.") print(zen.words,zen.sentences) w=Word('Cats') v=Word('went') print(w.lemmatize()) print(v.lemmatize('v')) for sentence in zen.sentences: print(sentence.sentiment) b = TextBlob("I havv goood speling!") print(b.correct()) w1 = Word('falibility') print(w1.spellcheck()) monty = TextBlob("We are no longer the Knights who say Ni. " "We are now the Knights who say Ekki ekki ekki PTANG.") print(monty.word_counts['ekki']) print(monty.words.count('ekki')) print(monty.words.count('ekki',case_sensitive=True)) en_blob = TextBlob(u'Simple is better than complex.') print(en_blob.translate(to='es')) chinese_blob = TextBlob(u"美丽优于丑陋") print(chinese_blob.translate(from_lang="zh-CN", to='en')) b = TextBlob(u"بسيط هو أفضل من مجمع") print(b.detect_language()) print(zen[0:19]) print(zen.upper()) apple_blob = TextBlob('apples') banana_blob = TextBlob('bananas') print(apple_blob + ' and ' + banana_blob) print("{0} and {1}".format(apple_blob, banana_blob))
def cooc_graph(search_term, tweets_dataframe, save_path, NUM_OF_COOCS=5): def dict_value_sort(dictionary): return (dict( sorted(dictionary.items(), key=lambda item: item[1], reverse=True))) def dice_significance(cooc_array, word, word_list): """Dice statistical significance for "word" against all other words in "word_list" using corresponding 2d array of cooccurrance. """ word_index = word_list.index(word.upper()) k = len(cooc_array) ki = sum(cooc_array[:, word_index]) kj = np.sum(cooc_array, axis=0) kij = cooc_array[word_index, :] dice_stat = 2 * kij / (ki + kj) stat_dict = {word: d for word, d in zip(word_list, dice_stat)} stat_dict.pop(word.upper()) return (stat_dict) ALL_SEARCH_TERMS = TextBlob(search_term).words tweets = tweets_dataframe["Tweet"].dropna().values # Sort out fonts font_files = font_manager.findSystemFonts( fontpaths= "/Users/jamesashford/Documents/Projects/Hackathons/Oxford Hack 2020/OxHack-2020/TCARS/rsc" ) font_list = font_manager.createFontList(font_files) font_manager.fontManager.ttflist.extend(font_list) # Extract and clean words all_words = TextBlob(" ".join(tweets).upper()).words.lemmatize() # Get stop-words stop_words = list(set(stopwords.words('english'))) + ['thi'] # Remove Stop and Short Words words = [ w for w in all_words if len(w) > 3 and w.lower() not in stop_words ] # Remove words that only occur once counts = dict(Counter(words)) key_words = [word for word in counts if counts[word] > 1] # Create dtm dtm = np.array( [[1 if (tweet.upper().count(word) > 0) else 0 for word in key_words] for tweet in tweets]) # Co-occurrances cooc = np.dot(dtm.T, dtm) graph_data = [] layer1_names = [] layer2_names = [] for term in ALL_SEARCH_TERMS: # Statistical significance of search_term term_dice_stats = dice_significance(cooc, term, key_words) term_dice_stats = dict_value_sort(term_dice_stats) # Get NUM_OF_COOCS most similar words most_similar = list(term_dice_stats.keys())[0:NUM_OF_COOCS] layer1_names += most_similar # Create a structure to hold the node links graph_data += [{ "from": term.upper(), "to": set_name, "stat": term_dice_stats[set_name] } for set_name in most_similar] # Iterate over each of the chosen coocs, and find their closest for word in most_similar: # Find stats for this word word_dice_stats = dice_significance(cooc, word, key_words) word_dice_stats = dict_value_sort(word_dice_stats) # Choose top nearby matches top_neighbours = list(word_dice_stats.keys())[0:10] layer2_names += top_neighbours new_graph_data = [{ "from": word.upper(), "to": set_name, "stat": word_dice_stats[set_name] } for set_name in top_neighbours] # Add to existing graph data graph_data += new_graph_data # Convert graph data to pandas dataframe gd = pd.DataFrame.from_dict(graph_data) # Create co-occurance graph # G = nx.from_numpy_matrix(cooc) G = nx.from_pandas_edgelist(gd, "from", "to", "stat") # Generate colours colours, sizes = [], [] l0, l1, l2 = {}, {}, {} for node in G: if node in ALL_SEARCH_TERMS.upper(): col = 'darkblue' #'red' size = min(counts[node] * 100, 5000) #5000 l0[node] = node elif node in layer1_names: col = 'lightblue' #'orange' size = min(counts[node] * 100, 3000) #2500 l1[node] = node else: col = 'cyan' #'blue' size = counts[node] * 10 #1000 l2[node] = node colours.append(col) sizes.append(size) # Visualisation fig = plt.figure(figsize=(5, 3), dpi=200) pos = nx.spring_layout(G) if len(ALL_SEARCH_TERMS) == 1: pos = nx.nx_agraph.graphviz_layout(G, prog='twopi') # Draw edges edges = nx.draw_networkx_edges( G, pos, alpha=1, width=1, edge_color='white') #width=gd["stat"].values*10) # Draw nodes, once white for background then again with colour+alpha nodes = nx.draw_networkx_nodes(G, pos, alpha=1, node_color='white', node_size=sizes) nodes = nx.draw_networkx_nodes(G, pos, alpha=0.8, node_color=colours, node_size=sizes) nodes.set_edgecolor('black') # Draw labels for each layer, with proportional sizes labels0 = nx.draw_networkx_labels(G, pos, labels=l0, font_family="Swiss911 UCm BT", font_size=30) labels1 = nx.draw_networkx_labels(G, pos, labels=l1, font_family="Swiss911 UCm BT", font_size=15) labels2 = nx.draw_networkx_labels(G, pos, labels=l2, font_family="Swiss911 UCm BT", font_size=11, font_color="white") labels2 = nx.draw_networkx_labels(G, pos, labels=l2, font_family="Swiss911 UCm BT", font_size=10, font_color="black") # Save file_name = f"{save_path}/{search_term}_coocgraph.png" plt.savefig(file_name, transparent=True)
print "---------------------------------------------------------------" sentence = TextBlob('Use 4 spaces per indentation level.') print sentence.words print sentence.words[2].singularize() print sentence.words[-1].pluralize() animals = TextBlob("cat dog octopus") print animals.words print animals.words.pluralize() # TextBlobs Are Like Python Strings print animals[0:10] # You can use common string methods. print animals.upper() print animals.find('dog') print "---------------------------------------------------------------" from textblob import Word w = Word("octopi") print w.lemmatize() w = Word("went") print w.lemmatize("v") # Pass in WordNet part of speech (verb) print "---------------------------------------------------------------" b = TextBlob("I havv goood speling!") print(b.correct())
""" Created on Wed Jun 13 16:07:51 2018 @author: akansal2 """ #importing libraies from textblob import TextBlob #TextBlob Strings Str1 = TextBlob('Amazing') Str2 = TextBlob('Spider Man') #Textblob string operations Str1.lower() Str1.upper() Str1[1:4] Str1 + " " + Str2 Str1.detect_language() #Paragraph and sentence operations para = TextBlob("My name is aditya. \n I live is Modinagar.\n My apples id is [email protected]") para.sentences # distinguish sentences with combination of . and \n para.sentences[0] para.sentences[1] para.sentences[2] para.sentences[0].words for n in para.sentences[1].noun_phrases: print(n)
layer2_names += top_neighbours new_graph_data = [{"from":word.upper(), "to":set_name, "stat":word_dice_stats[set_name]} for set_name in top_neighbours] # Add to existing graph data graph_data += new_graph_data # Convert graph data to pandas dataframe gd = pd.DataFrame.from_dict(graph_data) # Create co-occurance graph # G = nx.from_numpy_matrix(cooc) G = nx.from_pandas_edgelist(gd, "from", "to", "stat") # Generate colours colours, sizes = [], [] l0, l1, l2 = {}, {}, {} for node in G: if node in ALL_SEARCH_TERMS.upper(): col = 'darkblue' #'red' size = counts[node]*1000 #5000 l0[node] = node elif node in layer1_names: col = 'lightblue' #'orange' size = counts[node]*1000 #2500 l1[node] = node else: col = 'cyan' #'blue' size = counts[node]*100 #1000 l2[node] = node colours.append(col) sizes.append(size) # Visualisation
sent = TextBlob("I haawve goood speling") correct_sent = sent.correct() w = Word("haave") spellcheck = w.spellcheck() #Get Word and Noun Phrase Frequencies words = TextBlob('We are no longer together. We are enemies now.') word_counts = words.word_counts #You can specify whether or not the search should be case-sensitive (default is False). #Translation and Language Detection en_blob = TextBlob("You are my best friend") pl_blob = en_blob.translate(to='pl') blob = TextBlob("Mam na imię Piotr") detected_lang = blob.detect_language() #Parsing text = TextBlob('I know You') text_parse = text.parse() #string text = TextBlob("Hello World") upper_text = text.upper() find_world = text.find("World") #ngrams blob = TextBlob("Now is better than never.") ngram = blob.ngrams(n=3)
# WordLists (A WordList is just a Python list with additional methods.) animals = TextBlob("cat dog octopus") print animals.words print animals.words.pluralize() # Spelling Correction (Use the correct() method to attempt spelling correction.) b = TextBlob("I havv goood speling!") print(b.correct()) w = Word('falibility') print w.spellcheck() # Get Word and Noun Phrase Frequencies monty = TextBlob("We are no longer the Knights who say Ni. " "We are now the Knights who say Ekki ekki ekki PTANG.") print monty.word_counts['ekki'] # The second way is to use the count() method. print monty.words.count('ekki') print monty.words.count('Ekki', case_sensitive=True) # TextBlobs Are Like Python Strings print zen.upper() # You can make comparisons between TextBlobs and strings. apple_blob = TextBlob('apples') banana_blob = TextBlob('bananas') print apple_blob < banana_blob # You can concatenate and interpolate TextBlobs and strings. print apple_blob + ' and ' + banana_blob print "{0} and {1}".format(apple_blob, banana_blob) # n-grams ( The TextBlob.ngrams() method returns a list of tuples of n successive words. ) blob = TextBlob("Now is better than never.") print blob.ngrams(n=3)
from textblob import TextBlob text = "I love to watch football, but I have never played it" text_blob_object = TextBlob(text) text2 = text_blob_object.upper() text3 = text_blob_object.lower() print(text) print(text2) print(text3)
############################################################################### """ Basic """ from textblob import TextBlob ## creating a textblob object blob = TextBlob("Hyderabad is a great place to learn software courses.") ## textblobs are like python strings blob[0:5] blob.upper() blob.lower() blob2 = TextBlob("It contains a lot of institutes to learn and get the job.") ## concat blob + " And " + blob2 ############################################################################### """ Tokenization blob = TextBlob("Hyderabad is a great place to learn software courses. \n It contains a lot of institutes to learn and get the job.") blob.sentences blob.sentences[0] """ ###############################################################################