def __init__( self, train_vocab, n_movies, params, autorec_path=os.path.join(config.AUTOREC_MODEL, "model_best"), sentiment_analysis_path=os.path.join(config.SENTIMENT_ANALYSIS_MODEL, "model_best"), cuda=None, gensen=True, ): super(RecommendFromDialogue, self).__init__() self.n_movies = n_movies if cuda is None: self.cuda_available = torch.cuda.is_available() else: self.cuda_available = cuda self.sentiment_analysis = SentimentAnalysis( params=params['sentiment_analysis_params'], train_vocab=train_vocab, gensen=gensen, resume=sentiment_analysis_path) self.autorec = AutoRec(params=params['autorec_params'], n_movies=self.n_movies, resume=autorec_path) # freeze sentiment analysis for param in self.sentiment_analysis.parameters(): param.requires_grad = False
def sentiment(nounchunk): sa = SentimentAnalysis() positive_sentences, negative_sentences, neutral_sentences = sa.get_sentence_orientation( nounchunk) print positive_sentences print neutral_sentences positive_neutral_sen = positive_sentences + neutral_sentences return positive_neutral_sen
def scrape_submission(submission_url): """ Receives a submission URL. Scrapes the data and inserts it into a submission, comment, and sentiment table in the database. :param submission_url: """ ''' Scrape Data ''' # Get submission dict submission_dict = reddit.extract_post_data(submission_url=submission_url) # Get list of comments_dicts submission_object = submission_dict.get('submission_object') comments_dict = reddit.extract_post_comments_data(submission_object) ''' Exit if no comments were extracted from the submission ''' if not len(comments_dict.get('data')) > 0: logger.info( 'Data extraction yielded zero comments. Aborting sentiment analysis and database insertion.' ) return ''' Analyze Sentiment ''' # Call sentimentanalysis to analyze the comments and append the dicts SentimentAnalysis.list_parser(comments_dict) ''' Insert to Database ''' # Create instance of database_manager database_manager = DatabaseManager() # Check if submission exists if database_manager.check_submission_exists(submission_dict): # Delete the submission and associated data if exists database_manager.delete_submission(submission_dict) # Insert new submission info into database new_submission = database_manager.insert_submission(submission_dict) # Insert comments if submission inserted successfully if new_submission is not None: database_manager.insert_comments(comments_dict, new_submission) database_manager.insert_sentiment(comments_dict) # Returns submission_id return submission_dict.get('id')
def sentiment(): senti = SentimentAnalysis() input_msg = request.form.get('sentiment_text') prediction = senti.sentiment(input_msg) if prediction > 0.80: message = 'It is a positive message, having a score of {}'.format( prediction) else: message = 'It is a negative message, having a score of {}'.format( prediction) return render_template('result.html', prediction=message, input=input_msg)
def _load_model(): global sentiment_analysis global conversation with graph.as_default(): sentiment_analysis = SentimentAnalysis(SA_TOKENIZER, SA_MODEL) conversation = Conversation(CONVERSATION_MODEL)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--bert-model', default=None, type=str, required=True, help='path to BERT model directory') parser.add_argument('--fine-tuned-model', default=None, type=str, required=True, help='path to fine-tuned PosNeg classifier model file') parser.add_argument('--jumanpp-command', type=str, action='store', default="/mnt/violet/share/tool/juman++v2/bin/jumanpp") parser.add_argument("--server", default=None, type=str, required=True, help="server IP address.") parser.add_argument("--port", default=None, type=int, required=True, help="server port.") args = parser.parse_args() server = xmlrpc_server.SimpleXMLRPCServer((args.server, args.port), allow_none=True) sa_model = SentimentAnalysis(args.bert_model, args.fine_tuned_model, args.jumanpp_command) server.register_function(sa_model.get_prediction, 'get_prediction') print("loading done.", file=sys.stderr) server.serve_forever()
def __init__(self, username=None, password=None, session_user=None): self.L = instaloader.Instaloader( dirname_pattern="posts/{profile}/{date}") self.analizer = SentimentAnalysis() if session_user is None and username and password is not None: print('logging in ... ') try: self.L.login(username, password) print('logged in -> ', username) self.Logged = 'logged' except Exception as ex: print(ex) self.Logged = ex elif session_user is not None: self.L.load_session_from_file(session_user) else: pass
def frp_multi(fr): zz = fr['reviews'] multi_list = [] for yy in zz: xx = yy['summary'] multi_list.append(xx) abc = SentimentAnalysis() list_tds = [] list_qc = [] list_c = [] list_dict = [] for ss in multi_list: ss_a = list(jieba.cut(ss, cut_all=False)) ss_b = " ".join(ss_a) #print (ss_b) result = abc.analyze(ss_b) #print (result) for item in result: t = item[0] d = item[1] s = item[4] tds = [t, d, s] list_tds.append(tds) for tri in list_tds: if tri not in list_qc: list_qc.append(tri) tri_count = list_tds.count(tri) list_c.append(tri_count) for ww in list_qc: vc_index = list_qc.index(ww) vc = list_c[vc_index] vt = ww[0] vd = ww[1] vs = ww[2] dict_a = {"对象": vt, "评价极性": vs, "描述词": vd, "评论数": vc} list_dict.append(dict_a) df = pd.DataFrame(list_dict, columns=["对象", "评价极性", "描述词", "评论数"]) df.to_csv("./ndetails.csv", index=False)
def frp_single(sentence): abc = SentimentAnalysis() result = abc.analyze(sentence) str_a = [] jsonlist_a = [] for item in result: aspect = item[0] opinion = item[1] relation = item[4] #t = *** a = {'target': aspect, 'description': opinion, 'sentiment': relation} str_a.append(a) for i in str_a: json_info = json.dumps(i, default=set_default, ensure_ascii=False) jsonlist_a.append(json_info) return jsonlist_a
def run_process(access_key, secret_access_key, opinions, output_path): pos = neg = neutral = mixed = 0 # run sentiment analysis for i in opinions: obj = SentimentAnalysis(access_key, secret_access_key, i) data = obj.run_single_sentiment_analysis() pos += data['SentimentScore']['Positive'] neg += data['SentimentScore']['Negative'] neutral += data['SentimentScore']['Neutral'] mixed += data['SentimentScore']['Mixed'] cnt = len(opinions) dict_result = { 'positive': pos / cnt, 'negative': neg / cnt, 'neutral': neutral / cnt, 'mixed': mixed / cnt } create_pieplot_percent(dict_result, output_path) return json.dumps(dict_result)
def analysis_status(self, statusText): s = SentimentAnalysis(statusText) return s.sentiments
def update(self, text): sa = SentimentAnalysis() cleaned_text = sa.clean_text_tweet_from_mails_and_rubbish(text) return sa.get_tweet_sentiment(cleaned_text)
class instascraper(): def __init__(self, username=None, password=None, session_user=None): self.L = instaloader.Instaloader( dirname_pattern="posts/{profile}/{date}") self.analizer = SentimentAnalysis() if session_user is None and username and password is not None: print('logging in ... ') try: self.L.login(username, password) print('logged in -> ', username) self.Logged = 'logged' except Exception as ex: print(ex) self.Logged = ex elif session_user is not None: self.L.load_session_from_file(session_user) else: pass #SET PROFILE FOR THE CURRENT SESSION def set_profile(self, username_profile): self.profile = instaloader.Profile.from_username( self.L.context, username_profile) print("PROFILE -->", self.profile) return self.profile #RETURN PROFILE DATA def get_profile_data(self, profile=None): if profile is None: profile = self.profile if profile.is_private == True: print('PRIVINFO NOT AVAILABLE') return temp_vect = [] try: temp_has_public_story = profile.has_public_story temp_has_viewable_story = profile.has_viewable_story except Exception as ex: print("EXECPTION -->", ex) temp_has_public_story = None temp_has_viewable_story = None dict_profile_data = { 'user_id': profile.userid, 'username': profile.username, 'followed_by_viewer': profile.followed_by_viewer, 'post_count': profile.mediacount, 'igtv_count': profile.igtvcount, 'n_follower': profile.followers, 'n_followees': profile.followees, 'external_url': profile.external_url, 'is_bussines': profile.is_business_account, 'business_Category': profile.business_category_name, 'biography': profile.biography, 'blocked_by_viewer': profile.blocked_by_viewer, 'follows_viewer': profile.follows_viewer, 'full_name': profile.full_name, 'has_blocked_viewer': profile.has_blocked_viewer, 'has_public_story': temp_has_public_story, 'has_viewable_story': temp_has_viewable_story, 'has_requested_viewer': profile.has_requested_viewer, 'is_verified': profile.is_verified, 'requested_by_viewer': profile.requested_by_viewer, 'profile_pic_url': profile.profile_pic_url, 'has_higlighted_reels': profile.has_highlight_reels, 'followed_by_viewer': profile.followed_by_viewer } temp_vect.append(dict_profile_data) profile_data = pd.DataFrame(temp_vect, index=['data_profile']) return profile_data # GET DATA POST FROM SHORTCODE def get_post_from_shortcode(self, SHORTCODE: str, MAX_COMMENT: int): post = Post.from_shortcode(self.L.context, SHORTCODE) try: accessibility_caption = str( post._asdict()['accessibility_caption']) except Exception as ex: print(ex) try: location = post.location except Exception as ex: print(ex) location = None #INFORMATION OF THE POST GOING INTO THE CSV post_info_dict = { 'title': post.title, 'owner_username': post.owner_username, 'date_and_time': post.date, 'type_of_post': post.typename, 'mediacount': post.mediacount, 'caption': post.caption, 'n_caption_hashatags': len(post.caption_hashtags), 'caption_hashtags': post.caption_hashtags, 'n_mentions_post': len(post.caption_mentions), 'n_tagged_users': len(post.tagged_users), 'is_video': post.is_video, 'n_likes': post.likes, 'n_comments': post.comments, 'is_sponsored': post.is_sponsored, 'sponsors': post.sponsor_users, 'location': location, 'url_link': post.url, 'url_insta': 'instagram.com/p/{}/'.format(post.shortcode), 'description_of_post': accessibility_caption, } comments_vect = [] # DOWNLOAD AND STORE COMMENT print('Start Comments', end='') comment_count = 0 for comment in post.get_comments(): answer_count = 0 for answer in comment.answers: answer_count += 1 if answer_count == 50: break analisys, score = self.analizer.return_sentiment( str(comment.text).strip()) comment_info_dict = { 'date_and_time': comment.created_at_utc, 'profile': comment.owner.username, 'text': str(comment.text).strip(), 'n_likes': comment.likes_count, 'answer_count': answer_count, 'sentiment_analysis': analisys, 'score': score } comments_vect.append(comment_info_dict) if comment_count == MAX_COMMENT: break comment_count += 1 print('.', end='') print('End Comments') comment_df = pd.DataFrame(comments_vect) post_df = pd.DataFrame([post_info_dict]) return post_df, comment_df #GET POST OF THE SETTET PROFILE OR SET profile input to set a new one def get_post_and_comment(self, MAX_COMMENT: int, L=None, MAX_POST=5, profile=None): if profile is None: profile = self.profile if L is None: L = self.L counter_post = 1 post_profile = {"profile": profile.username, 'posts': []} for post in profile.get_posts(): print("POST n:", counter_post, "MAX_COMMENT_SET:", MAX_COMMENT) comments_vect = [] try: accessibility_caption = str( post._asdict()['accessibility_caption']) except Exception as ex: print(ex) accessibility_caption = None try: location = post.location except Exception as ex: print(ex) location = None #INFORMATION OF THE POST GOING INTO THE CSV post_info_dict = { 'title': post.title, 'owner_username': post.owner_username, 'date_and_time': post.date, 'type_of_post': post.typename, 'mediacount': post.mediacount, 'caption': post.caption, 'n_caption_hashatags': len(post.caption_hashtags), 'caption_hashtags': post.caption_hashtags, 'n_mentions_post': len(post.caption_mentions), 'n_tagged_users': len(post.tagged_users), 'is_video': post.is_video, 'n_likes': post.likes, 'n_comments': post.comments, 'is_sponsored': post.is_sponsored, 'sponsors': post.sponsor_users, 'location': location, 'url_link': post.url, 'url_insta': 'instagram.com/p/{}/'.format(post.shortcode), 'description_of_post': accessibility_caption, 'engagement_rate': ((int(post.likes) + int(post.comments)) / int(profile.followers)) * 100 } # DOWNLOAD AND STORE COMMENT print('Start Comments') comment_count = 0 for comment in post.get_comments(): answer_count = 0 """ for answer in comment.answers: answer_count += 1 if answer_count == 5: break """ analisys, score = self.analizer.return_sentiment( str(comment.text).strip()) comment_info_dict = { 'date_and_time': comment.created_at_utc, 'profile': comment.owner.username, 'text': str(comment.text).strip(), 'n_likes': comment.likes_count, # 'answer_count': answer_count, 'sentiment_analysis': analisys, 'score': score } comments_vect.append(comment_info_dict) if comment_count == MAX_COMMENT: print("MAX COMMENT") break comment_count += 1 print(comment_count, '.', end='') print('End Comments') #L.download_pic(path_pic_jpg, post.url, post.date_utc) #STORING DATA SCRAPED AND UPLOAD RELATIVE CSVs comment_df = pd.DataFrame(comments_vect) post_df = pd.DataFrame([post_info_dict], index=['post_data']) post = {'post_info': post_df, 'comments': comment_df} post_profile['posts'].append(post) print("END__POST") #IF MAX POST DOWNLOADED BREAK if counter_post % MAX_POST == 0: print('Post Reached') break counter_post += 1 return post_profile
else: break ls = [str(code)] numberInsta = numberInsta + 9 #call on mongo class with the geotag code (which is used to name Mongo collection) m = mongo(ls[0]) #clear previous data in MongoDB the collection m.clearDB() #run instagram scraping s = InstaScraper("/Users/Ed/eclipse/chromedriver", ls, numberInsta) s.runScraper() print('Scraping Complete') #connect to database with webscraped data db = m.getdatabase() #run sentiment analysis c = SentimentAnalysis(db, ls[0]) captions, captionText = c.getCaptions() c.sentimentIntensity(captions) #run frequency distribution analysis freq = WordFrequency(captionText) freq.getFreqDist() print('Analysis Complete')
def _load_model(): global sentiment_analysis global conversation sentiment_analysis = SentimentAnalysis(SA_TOKENIZER, SA_MODEL) conversation = Conversation(CONV_TOKENIZER, CONV_MODEL)
class RecommendFromDialogue(nn.Module): """ Recommender system that takes a dialogue as input. Runs sentiment analysis on all mentioned movies, then uses autorec to provide movie recommendations at each stage of the conversation """ def __init__( self, train_vocab, n_movies, params, autorec_path=os.path.join(config.AUTOREC_MODEL, "model_best"), sentiment_analysis_path=os.path.join(config.SENTIMENT_ANALYSIS_MODEL, "model_best"), cuda=None, gensen=True, ): super(RecommendFromDialogue, self).__init__() self.n_movies = n_movies if cuda is None: self.cuda_available = torch.cuda.is_available() else: self.cuda_available = cuda self.sentiment_analysis = SentimentAnalysis( params=params['sentiment_analysis_params'], train_vocab=train_vocab, gensen=gensen, resume=sentiment_analysis_path) self.autorec = AutoRec(params=params['autorec_params'], n_movies=self.n_movies, resume=autorec_path) # freeze sentiment analysis for param in self.sentiment_analysis.parameters(): param.requires_grad = False def forward(self, dialogue, senders, lengths, conversation_lengths, movie_occurrences, recommend_new_movies, user_representation=None): """ :param dialogue: (batch, max_conv_length, max_utt_length) Variable containing the dialogue :param movie_occurrences: Array where each element corresponds to a conversation and is a dictionary {movieId: (max_conv_length, max_utt_length) array containing the movie mentions} :param recommend_new_movies: If true, zero out the movies already mentioned in the output :param user_representation: optional prior user representation (obtained from language) :return: (batch_size, max_conv_length, n_movies_total) movie preferences (ratings not necessarily between 0 and 1) """ batch_size, max_conv_length = dialogue.data.shape[:2] if not movie_occurrences or len(movie_occurrences) == 0: tt = torch.cuda.FloatTensor if self.cuda_available else torch.FloatTensor return Variable( torch.zeros(batch_size, max_conv_length, self.n_movies, out=tt())) # indices to retrieve original order indices = [(i, movieId) for (i, conv_movie_occurrences) in enumerate(movie_occurrences) for movieId in conv_movie_occurrences] batch_indices = Variable(torch.LongTensor([i[0] for i in indices])) # flatten movie occurrences to shape (total_num_mentions_in_batch, max_conv_length, max_utt_length) flattened_movie_occurrences = [ conv_movie_occurrences[movieId] for conv_movie_occurrences in movie_occurrences for movieId in conv_movie_occurrences ] flattened_movie_occurrences = Variable( torch.FloatTensor(flattened_movie_occurrences)) if self.cuda_available: batch_indices = batch_indices.cuda() flattened_movie_occurrences = flattened_movie_occurrences.cuda() # select the dialogues following the movie mentions dialogue = torch.index_select(dialogue, 0, batch_indices) senders = torch.index_select(senders, 0, batch_indices) lengths = lengths[[i[0] for i in indices]] conversation_lengths = conversation_lengths[[i[0] for i in indices]] # print("senders shape", senders.data.shape) # (total_num_mentions, max_conv_length) sentiment_analysis_input = { "dialogue": dialogue, "movie_occurrences": flattened_movie_occurrences, "senders": senders, "lengths": lengths, "conversation_lengths": conversation_lengths } # (total_num_mentions_in_batch, max_conv_length) movie_likes = self.sentiment_analysis(sentiment_analysis_input, return_liked_probability=True) # populate ratings input using the movie likes # (batch_size, max_conv_length, n_movies_total) tt = torch.cuda.FloatTensor if self.cuda_available else torch.FloatTensor autorec_input = Variable( torch.zeros(batch_size, max_conv_length, self.n_movies, out=tt())) # mask that tells from which utterance the movies have appeared in conversation mask = flattened_movie_occurrences.sum(dim=2) > 0 mask = mask.cumsum( dim=1) > 0 # (total_num_mentions_in_batch, max_conv_length) # only use movie preferences after movies are mentioned movie_likes = movie_likes * mask.float() # print("movie likes shape", movie_likes.data.shape) # (total_num_mentions_in_batch, max_conv_length) for i, (batchId, movieId) in enumerate(indices): autorec_input[batchId, :, movieId] = movie_likes[i] # run recommendation model # (batch_size, max_conv_length, n_movies_total) output = self.autorec(autorec_input, additional_context=user_representation, range01=False) # use this at generation time: lower probability for movies already mentioned if recommend_new_movies: for batchId, movieId in indices: # (max_conv_length) mask that zeros out once the movie has been mentioned mask = np.sum(movie_occurrences[batchId][movieId], axis=1) > 0 mask = Variable( torch.from_numpy( (mask.cumsum(axis=0) == 0).astype(float))).float() if self.cuda_available: mask = mask.cuda() output[batchId, :, movieId] = mask * output[batchId, :, movieId] return output
def run(input_filepath): result_dict = {} print("Processing: {}".format(input_filepath)) file_df = pd.read_csv(input_filepath) # Read one review for index, row in file_df.iterrows(): temp_review_dict = {} review_id = row['review_id'] review = row['text'] golden_truth_text = row['Dishnames, Sentiment'] golden_truth_dish_sentiment_dict = Preprocessing.preprocess_golden_truth_text(golden_truth_text) if golden_truth_dish_sentiment_dict is None: continue print(review_id) review = Preprocessing.preprocess_review_text(review) # Check the grammaticality scores readability_dict = GrammarScore.get_scores(review) # Coreference resolution of the original text resolved_review = CoreferenceResolution.run(review) # Divide text into sentences sentence_tokens = SentenceTokenization.get_sentence_tokens(resolved_review, 'spacy') # Create dish buckets and assign valid sentences to dish predicted_truth_dish_sentiment_dict = {} for dish_names in golden_truth_dish_sentiment_dict: predicted_truth_dish_sentiment_dict[dish_names] = {'found':False, 'sentiment': -100, 'sentences':[]} # Add a key to handle sentences which do not match to any dish name predicted_truth_dish_sentiment_dict['UNK'] = {'sentiment': -100, 'sentences':[]} # Feed line by line to the client to detect dish names sentence_analysis_list = [] false_positive_list = [] #Initialize false positive list for sentence in sentence_tokens: try: intent, dish_names = Factory.sclient.detect_intent_for_text(sentence) sentence_analysis_list.append((sentence, intent, dish_names)) if intent == 'Default Fallback Intent': predicted_truth_dish_sentiment_dict['UNK']['sentences'].append(sentence) elif (intent == 'menu_item_identification_intent') & (len(dish_names) == 0): predicted_truth_dish_sentiment_dict['UNK']['sentences'].append(sentence) else: for dish in dish_names: if dish in predicted_truth_dish_sentiment_dict: predicted_truth_dish_sentiment_dict[dish]['found'] = True predicted_truth_dish_sentiment_dict[dish]['sentences'].append(sentence) else: false_positive_list.append(dish) time.sleep(1) except Exception as e: print(e) continue predicted_truth_dish_sentiment_dict['UNK']['False Positives'] = false_positive_list # Get the sentiment of each dish for dish in predicted_truth_dish_sentiment_dict.keys(): if dish == 'UNK': continue else: if dish not in predicted_truth_dish_sentiment_dict: continue else: # Dish name was found and atleast one sentence exists if predicted_truth_dish_sentiment_dict[dish]['found'] == True: # Join all the sentences and get the sentiment sentiment_text = "" for sentence in predicted_truth_dish_sentiment_dict[dish]['sentences']: sentiment_text = sentiment_text + sentence predicted_truth_dish_sentiment_dict[dish]['sentiment'] = SentimentAnalysis.run(sentiment_text,lib='paralleldot') temp_review_dict = { 'text': review, 'readability_scores': readability_dict, 'golden_truth': golden_truth_dish_sentiment_dict, 'predicted_truth': predicted_truth_dish_sentiment_dict, 'sentences': sentence_analysis_list } #pp.pprint(temp_review_dict) # Save it in the dictionary result_dict[review_id] = temp_review_dict # Save the overall dictionary output_filepath = '../output/' + input_filepath[input_filepath.rfind('/')+1:input_filepath.rfind('.csv')] + '.p' with open(output_filepath, 'wb') as pickle_dump_file_pointer: pickle.dump(result_dict, pickle_dump_file_pointer, protocol=pickle.HIGHEST_PROTOCOL)
index_column='tweeted_date') headline_df = mysql_client.get_dataframe(query=get_headline_data_query, index_column='published_date') except DatabaseQueryError as error: print(error) # Clsoe the connection mysql_client.close() # Process data merged_data = bitcoin_price_df sentiment_columns = [ 'price', 'flair', 'positive', 'negative', 'subjectivity', 'polarity' ] merged_data = merged_data.reindex(columns=sentiment_columns) sa = SentimentAnalysis() for index, row in merged_data.iterrows(): tweets = tweet_df.loc[tweet_df.index == index][:30] headlines = headline_df.loc[headline_df.index == index][:20] # Combine tweets with headlines merged_text = tweets['tweet'].tolist() merged_text.extend(headlines['headline'].tolist()) sentiments = sa.get_sentiments(text=' '.join(merged_text)) # Append new columns i = merged_data.index.get_loc(index) merged_data['price'][i] = bitcoin_price_df['price'][i] merged_data['flair'][i] = sentiments[0] merged_data['positive'][i] = sentiments[1] merged_data['negative'][i] = sentiments[2] merged_data['subjectivity'][i] = sentiments[3] merged_data['polarity'][i] = sentiments[4]
print df #Summarizion text = '' for sent in df['original_sents'].values: text += '.' + sent summarize = Summarization(text, None, senti) final_summary1, final_summary2, final_summary3, neg_final_summary1, neg_final_summary2, neg_final_summary3, counts, eigen_explo = summarize.get_summaries( ) print final_summary1, final_summary2, final_summary3, neg_final_summary1, neg_final_summary2, neg_final_summary3, counts, eigen_explo if __name__ == "__main__": d = spell.request_dict("en_US") nlp = spacy.load('en') senti = SentimentAnalysis() start_time = time.time() filenames = [ f for f in listdir(input_entity_files) if isfile(join(input_entity_files, f)) ] #Parellel on CPU cores #Parallel(n_jobs=cpu_count() - 1, verbose=10, backend="multiprocessing", batch_size="auto")(delayed(processFiles)(fileName,input_entity_files) for fileName in filenames) #for (dirpath, dirnames, filenames) in walk(input_entity_files): for file in filenames: processFiles(file, input_entity_files) print("Time taken --- %s seconds ---" % (time.time() - start_time))
from sentiment_analysis import SentimentAnalysis sa = SentimentAnalysis() sentiment = sa.analyze_sentiment("This is best film i seen") print(sa.model.layers[1].output) print(sentiment) intermediate_out = sa.debug_hidden_layer_out("This is best film i seen") sentiment = sa.analyze_sentiment("Wonderful but suck") print(sentiment) sentiment = sa.analyze_sentiment("Ha ha i laught") print(sentiment)
def generate_training_data(database, collection, query): """Genera un conjunto de datos de entrenamiento. Esta generación se realiza con el propósito de encontrar los mejores parámetros para la clasificación de sentimientos. En específico, se intenta encontrar la mejor combinación de la negación y el léxicon de polaridad. paráms: database: str Base de datos de MongoDB que se utilizará. collection: Colección donde se encuentran los tweets etiquetados para entrenamiento. query: Filtro que se utilizará para recuperar los tweets de entrenamiento. Nótese que los tweets deben tener el campo "polarity". Example: >>> generate_training_data(database='tass_2017', collection='intertass', query={"$or": [{"dataset": "train"}, {"dataset": "development"}]}) """ four_label_homologation = {u'N+': 0, u'N': 0, u'NEU': 1, u'P': 2, u'P+': 2, u'NONE': 3} client = pymongo.MongoClient() coll = client[database][collection] tweets = coll.find(filter=query, projection=['tweet_id', 'content', 'polarity'], sort=[('polarity', pymongo.ASCENDING),]) tweets_ = [[_to_str(tweet['tweet_id']), _to_unicode(tweet['content']), _to_unicode(tweet['polarity']).upper()] for tweet in tweets if (tweet['content'] and len(tweet['content']) > 0)] client.close() tweets = None tweets = tweets_ tweets_ = None output_path = DATA_PATH + '/train/' + collection if not os.path.isdir(output_path): os.makedirs(output_path) for negation_id in NEGATION_SETTINGS.iterkeys(): lexicons = np.random.choice(np.arange(1, 7), size=3, replace=False).tolist() +\ np.random.choice(np.arange(7, 16), size=3, replace=False).tolist() +\ np.random.choice(np.arange(16, 26), size=4, replace=False).tolist() +\ np.random.choice(np.arange(26, 31), size=2, replace=False).tolist() lexicons = np.random.choice(lexicons, size=6, replace=False).tolist() if np.random.choice(range(2), p=[.9, .1]) == 1: lexicons.append(31) negation_path = output_path + '/%s' % negation_id if not os.path.isdir(negation_path): os.mkdir(negation_path) for lexicon_id in lexicons: output_fname = negation_path +\ '/metafeatures-lexicon-%s.tsv' % lexicon_id if os.path.isfile(output_fname): continue clf = SentimentAnalysis(negation_id=negation_id, lexicon='lexicon-%i' % lexicon_id) documents = [] four_label_polarities = [] metafeatures_list = [] for j, (tweet_id, content, polarity) in enumerate(tweets): try: text, metafeatures = clf.preprocess_tweet(content) except: _write_in_file(fname=negation_path + '/errors-1.log', content=tweet_id + '\n', mode='a') continue metafeatures = metafeatures.reshape(1, metafeatures.shape[0]) if j == 0: metafeatures_list = metafeatures else: if metafeatures_list.shape[1] == metafeatures.shape[1]: metafeatures_list = np.vstack((metafeatures_list, metafeatures)) else: _write_in_file(fname=negation_path + '/errors-2.log', content=tweet_id + '\n', mode='a') continue documents.append(_to_str(text)) four_label_polarities.append(four_label_homologation[polarity]) if not os.path.isfile(negation_path + '/tweets.txt'): np.savetxt(negation_path + '/tweets.txt', np.array(documents, dtype=str), fmt='%s') if not os.path.isfile(negation_path + '/target-labels.dat'): np.savetxt(negation_path + '/target-labels.dat', np.array(four_label_polarities, dtype=int), fmt='%i') np.savetxt(output_fname, metafeatures_list, fmt='%i', delimiter='\t') clf = None
from sanic import Sanic from sanic_jinja2 import SanicJinja2 # pip install sanic_jinja2 from sanic import response from sentiment_analysis import SentimentAnalysis app = Sanic() jinja = SanicJinja2(app) sa = None # ---------------------- # Serves files from the static folder to the URL /static app.static('/img', './img') @app.route('/') @jinja.template('index.html') # decorator method is static method async def index(request): return @app.route("/analyze_review", methods=['POST']) def analyze_review(request): sentiment = sa.analyze_sentiment(request.form['review'][0]) return response.json(str(sentiment[0])) if __name__ == "__main__": sa = SentimentAnalysis() # --- Start Sanic Web Server at port 8000 app.run(host="0.0.0.0", port=9000)
from preprocess import WordSet, WordEmbedding, KnowledgeBase from sentiment_analysis import SentimentAnalysis #sentence = '外观 漂亮' #sentence = '外观 不 太 漂亮' #sentence = '高 规格 的 用料 和 精致 的 做工' #sentence = '炫酷 的 造型 、 充沛 的 动力 再 加上 本田 家族 运动 基因 的 传承' parser = argparse.ArgumentParser() parser.add_argument('-s', required=True) args = parser.parse_args() sentence = args.s abc = SentimentAnalysis() result = abc.analyze(sentence) print('--------------------') print('%s\n' % (sentence)) for item in result: aspect = item[0] opinion = item[1] relation = item[4] print('%s\t%s\t%s' % (aspect, opinion, relation)) print('--------------------') for item in result: aspect = item[0] opinion = item[1] relation = item[4]
class ChatBot: sentiment = SentimentAnalysis() #sentiment.makemodel() knowledge = {} historybuffer = list() # contains the 10 most recent messages # The Kernel object is the public interface to # the AIML interpreter. aimlBot = aiml.Kernel() # Use the 'learn' method to load the contents # of an AIML file into the Kernel. aimlBot.learn(os.path.dirname(os.path.abspath(__file__)) + '/botdata/*/*.aiml') status = False def read_input(self, input, latitude, longitude): input = input.lower() self.historybuffer.insert(0, input) if self.historybuffer.__sizeof__() == 100: self.historybuffer.pop() self.knowledge = self.sentiment.analyse_text(self.knowledge, input) if "@bot" in input or "@Bot" in input: input_list = input.split(' ') keyword = 'on' if len(input_list) == 1 else input_list[1] if keyword == 'off': self.status = False elif keyword == 'topic': return self.reply(input, 'topic', latitude, longitude) else: self.status = True return self.reply(input, "location", latitude, longitude) if self.status is True: return self.reply(input, "aiml", latitude, longitude) return None def reply(self, message, case, latitude, longitude): if case == "aiml": # group = self.classify(self.historytext()) return { 'message': self.aimlresponse(message), } elif case == "location": if len(self.historytext().split()) < 20: return { 'message': 'I am not sure about where you should go. Try talking more.', } group = self.classify(self.historytext()) if len(group) == 0: return { 'message': 'I am not sure about where you should go. Try talking more.', } best_match = group[0] place_type = None query = '' if 'Restaurants' in best_match.name: place_type = PlaceType.restaurant category_split = best_match.name.strip('/').split('/') if len(category_split) > 2: query = category_split[2] # Fast Food or Pizzeria results = search_places(latitude, longitude, place_type=place_type, keyword=query) results = results[:4] return { 'message': 'How about one of these?', 'options': [{ 'label': place['name'], 'link': f"https://www.google.com/maps/dir/?api=1&destination={place['geometry']['location']['lat']},{place['geometry']['location']['lng']}", } for place in results], } elif case == 'topic': interests = self.returnInterests() return { 'message': 'How about one of these topics?', 'options': [interest.name for interest in interests], }