def __init__(
        self,
        train_vocab,
        n_movies,
        params,
        autorec_path=os.path.join(config.AUTOREC_MODEL, "model_best"),
        sentiment_analysis_path=os.path.join(config.SENTIMENT_ANALYSIS_MODEL,
                                             "model_best"),
        cuda=None,
        gensen=True,
    ):
        super(RecommendFromDialogue, self).__init__()
        self.n_movies = n_movies
        if cuda is None:
            self.cuda_available = torch.cuda.is_available()
        else:
            self.cuda_available = cuda

        self.sentiment_analysis = SentimentAnalysis(
            params=params['sentiment_analysis_params'],
            train_vocab=train_vocab,
            gensen=gensen,
            resume=sentiment_analysis_path)
        self.autorec = AutoRec(params=params['autorec_params'],
                               n_movies=self.n_movies,
                               resume=autorec_path)

        # freeze sentiment analysis
        for param in self.sentiment_analysis.parameters():
            param.requires_grad = False
Exemple #2
0
def sentiment(nounchunk):
    sa = SentimentAnalysis()
    positive_sentences, negative_sentences, neutral_sentences = sa.get_sentence_orientation(
        nounchunk)
    print positive_sentences
    print neutral_sentences
    positive_neutral_sen = positive_sentences + neutral_sentences
    return positive_neutral_sen
def scrape_submission(submission_url):
    """
	Receives a submission URL. Scrapes the data and inserts it into a submission, comment, and sentiment table in the
	database.

	:param submission_url:
	"""
    '''
	Scrape Data
	'''

    # Get submission dict
    submission_dict = reddit.extract_post_data(submission_url=submission_url)

    # Get list of comments_dicts
    submission_object = submission_dict.get('submission_object')
    comments_dict = reddit.extract_post_comments_data(submission_object)
    '''
	Exit if no comments were extracted from the submission
	'''

    if not len(comments_dict.get('data')) > 0:
        logger.info(
            'Data extraction yielded zero comments. Aborting sentiment analysis and database insertion.'
        )
        return
    '''
	Analyze Sentiment
	'''

    # Call sentimentanalysis to analyze the comments and append the dicts
    SentimentAnalysis.list_parser(comments_dict)
    '''
	Insert to Database
	'''

    # Create instance of database_manager
    database_manager = DatabaseManager()

    # Check if submission exists
    if database_manager.check_submission_exists(submission_dict):
        # Delete the submission and associated data if exists
        database_manager.delete_submission(submission_dict)

    # Insert new submission info into database
    new_submission = database_manager.insert_submission(submission_dict)

    # Insert comments if submission inserted successfully
    if new_submission is not None:
        database_manager.insert_comments(comments_dict, new_submission)
        database_manager.insert_sentiment(comments_dict)

    # Returns submission_id
    return submission_dict.get('id')
Exemple #4
0
def sentiment():
    senti = SentimentAnalysis()
    input_msg = request.form.get('sentiment_text')
    prediction = senti.sentiment(input_msg)
    if prediction > 0.80:
        message = 'It is a positive message, having a score of {}'.format(
            prediction)
    else:
        message = 'It is a negative message, having a score of {}'.format(
            prediction)
    return render_template('result.html', prediction=message, input=input_msg)
Exemple #5
0
def _load_model():
    global sentiment_analysis
    global conversation

    with graph.as_default():
        sentiment_analysis = SentimentAnalysis(SA_TOKENIZER, SA_MODEL)
        conversation = Conversation(CONVERSATION_MODEL)
Exemple #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--bert-model',
                        default=None,
                        type=str,
                        required=True,
                        help='path to BERT model directory')
    parser.add_argument('--fine-tuned-model',
                        default=None,
                        type=str,
                        required=True,
                        help='path to fine-tuned PosNeg classifier model file')
    parser.add_argument('--jumanpp-command',
                        type=str,
                        action='store',
                        default="/mnt/violet/share/tool/juman++v2/bin/jumanpp")
    parser.add_argument("--server",
                        default=None,
                        type=str,
                        required=True,
                        help="server IP address.")
    parser.add_argument("--port",
                        default=None,
                        type=int,
                        required=True,
                        help="server port.")
    args = parser.parse_args()
    server = xmlrpc_server.SimpleXMLRPCServer((args.server, args.port),
                                              allow_none=True)

    sa_model = SentimentAnalysis(args.bert_model, args.fine_tuned_model,
                                 args.jumanpp_command)
    server.register_function(sa_model.get_prediction, 'get_prediction')
    print("loading done.", file=sys.stderr)
    server.serve_forever()
Exemple #7
0
 def __init__(self, username=None, password=None, session_user=None):
     self.L = instaloader.Instaloader(
         dirname_pattern="posts/{profile}/{date}")
     self.analizer = SentimentAnalysis()
     if session_user is None and username and password is not None:
         print('logging in ... ')
         try:
             self.L.login(username, password)
             print('logged in -> ', username)
             self.Logged = 'logged'
         except Exception as ex:
             print(ex)
             self.Logged = ex
     elif session_user is not None:
         self.L.load_session_from_file(session_user)
     else:
         pass
def frp_multi(fr):

    zz = fr['reviews']
    multi_list = []

    for yy in zz:
        xx = yy['summary']
        multi_list.append(xx)

    abc = SentimentAnalysis()
    list_tds = []
    list_qc = []
    list_c = []
    list_dict = []

    for ss in multi_list:
        ss_a = list(jieba.cut(ss, cut_all=False))
        ss_b = " ".join(ss_a)
        #print (ss_b)
        result = abc.analyze(ss_b)
        #print (result)
        for item in result:
            t = item[0]
            d = item[1]
            s = item[4]
            tds = [t, d, s]
            list_tds.append(tds)

    for tri in list_tds:
        if tri not in list_qc:
            list_qc.append(tri)
            tri_count = list_tds.count(tri)
            list_c.append(tri_count)

    for ww in list_qc:
        vc_index = list_qc.index(ww)
        vc = list_c[vc_index]
        vt = ww[0]
        vd = ww[1]
        vs = ww[2]
        dict_a = {"对象": vt, "评价极性": vs, "描述词": vd, "评论数": vc}
        list_dict.append(dict_a)

    df = pd.DataFrame(list_dict, columns=["对象", "评价极性", "描述词", "评论数"])
    df.to_csv("./ndetails.csv", index=False)
def frp_single(sentence):

    abc = SentimentAnalysis()
    result = abc.analyze(sentence)
    str_a = []
    jsonlist_a = []

    for item in result:
        aspect = item[0]
        opinion = item[1]
        relation = item[4]
        #t = ***
        a = {'target': aspect, 'description': opinion, 'sentiment': relation}
        str_a.append(a)

    for i in str_a:
        json_info = json.dumps(i, default=set_default, ensure_ascii=False)
        jsonlist_a.append(json_info)

    return jsonlist_a
Exemple #10
0
def run_process(access_key, secret_access_key, opinions, output_path):
    pos = neg = neutral = mixed = 0

    # run sentiment analysis
    for i in opinions:
        obj = SentimentAnalysis(access_key, secret_access_key, i)
        data = obj.run_single_sentiment_analysis()
        pos += data['SentimentScore']['Positive']
        neg += data['SentimentScore']['Negative']
        neutral += data['SentimentScore']['Neutral']
        mixed += data['SentimentScore']['Mixed']

    cnt = len(opinions)
    dict_result = {
        'positive': pos / cnt,
        'negative': neg / cnt,
        'neutral': neutral / cnt,
        'mixed': mixed / cnt
    }

    create_pieplot_percent(dict_result, output_path)

    return json.dumps(dict_result)
 def analysis_status(self, statusText):
     s = SentimentAnalysis(statusText)
     return s.sentiments
Exemple #12
0
 def update(self, text):
     sa = SentimentAnalysis()
     cleaned_text = sa.clean_text_tweet_from_mails_and_rubbish(text)
     return sa.get_tweet_sentiment(cleaned_text)
Exemple #13
0
class instascraper():
    def __init__(self, username=None, password=None, session_user=None):
        self.L = instaloader.Instaloader(
            dirname_pattern="posts/{profile}/{date}")
        self.analizer = SentimentAnalysis()
        if session_user is None and username and password is not None:
            print('logging in ... ')
            try:
                self.L.login(username, password)
                print('logged in -> ', username)
                self.Logged = 'logged'
            except Exception as ex:
                print(ex)
                self.Logged = ex
        elif session_user is not None:
            self.L.load_session_from_file(session_user)
        else:
            pass

    #SET PROFILE FOR THE CURRENT SESSION
    def set_profile(self, username_profile):
        self.profile = instaloader.Profile.from_username(
            self.L.context, username_profile)
        print("PROFILE -->", self.profile)
        return self.profile

    #RETURN PROFILE DATA
    def get_profile_data(self, profile=None):
        if profile is None:
            profile = self.profile
        if profile.is_private == True:
            print('PRIVINFO NOT AVAILABLE')
            return

        temp_vect = []

        try:
            temp_has_public_story = profile.has_public_story
            temp_has_viewable_story = profile.has_viewable_story
        except Exception as ex:
            print("EXECPTION -->", ex)
            temp_has_public_story = None
            temp_has_viewable_story = None
        dict_profile_data = {
            'user_id': profile.userid,
            'username': profile.username,
            'followed_by_viewer': profile.followed_by_viewer,
            'post_count': profile.mediacount,
            'igtv_count': profile.igtvcount,
            'n_follower': profile.followers,
            'n_followees': profile.followees,
            'external_url': profile.external_url,
            'is_bussines': profile.is_business_account,
            'business_Category': profile.business_category_name,
            'biography': profile.biography,
            'blocked_by_viewer': profile.blocked_by_viewer,
            'follows_viewer': profile.follows_viewer,
            'full_name': profile.full_name,
            'has_blocked_viewer': profile.has_blocked_viewer,
            'has_public_story': temp_has_public_story,
            'has_viewable_story': temp_has_viewable_story,
            'has_requested_viewer': profile.has_requested_viewer,
            'is_verified': profile.is_verified,
            'requested_by_viewer': profile.requested_by_viewer,
            'profile_pic_url': profile.profile_pic_url,
            'has_higlighted_reels': profile.has_highlight_reels,
            'followed_by_viewer': profile.followed_by_viewer
        }
        temp_vect.append(dict_profile_data)
        profile_data = pd.DataFrame(temp_vect, index=['data_profile'])

        return profile_data

    # GET DATA POST FROM SHORTCODE
    def get_post_from_shortcode(self, SHORTCODE: str, MAX_COMMENT: int):
        post = Post.from_shortcode(self.L.context, SHORTCODE)
        try:
            accessibility_caption = str(
                post._asdict()['accessibility_caption'])
        except Exception as ex:
            print(ex)
        try:
            location = post.location
        except Exception as ex:
            print(ex)
            location = None
            #INFORMATION OF THE POST GOING INTO THE CSV
            post_info_dict = {
                'title': post.title,
                'owner_username': post.owner_username,
                'date_and_time': post.date,
                'type_of_post': post.typename,
                'mediacount': post.mediacount,
                'caption': post.caption,
                'n_caption_hashatags': len(post.caption_hashtags),
                'caption_hashtags': post.caption_hashtags,
                'n_mentions_post': len(post.caption_mentions),
                'n_tagged_users': len(post.tagged_users),
                'is_video': post.is_video,
                'n_likes': post.likes,
                'n_comments': post.comments,
                'is_sponsored': post.is_sponsored,
                'sponsors': post.sponsor_users,
                'location': location,
                'url_link': post.url,
                'url_insta': 'instagram.com/p/{}/'.format(post.shortcode),
                'description_of_post': accessibility_caption,
            }
            comments_vect = []
            # DOWNLOAD AND STORE COMMENT
            print('Start Comments', end='')

            comment_count = 0
            for comment in post.get_comments():
                answer_count = 0
                for answer in comment.answers:
                    answer_count += 1
                    if answer_count == 50:
                        break
                analisys, score = self.analizer.return_sentiment(
                    str(comment.text).strip())
                comment_info_dict = {
                    'date_and_time': comment.created_at_utc,
                    'profile': comment.owner.username,
                    'text': str(comment.text).strip(),
                    'n_likes': comment.likes_count,
                    'answer_count': answer_count,
                    'sentiment_analysis': analisys,
                    'score': score
                }

                comments_vect.append(comment_info_dict)
                if comment_count == MAX_COMMENT:
                    break
                comment_count += 1
                print('.', end='')
            print('End Comments')
            comment_df = pd.DataFrame(comments_vect)
            post_df = pd.DataFrame([post_info_dict])

            return post_df, comment_df

    #GET POST OF THE SETTET PROFILE OR SET profile input to set a new one
    def get_post_and_comment(self,
                             MAX_COMMENT: int,
                             L=None,
                             MAX_POST=5,
                             profile=None):
        if profile is None:
            profile = self.profile
        if L is None:
            L = self.L
        counter_post = 1
        post_profile = {"profile": profile.username, 'posts': []}
        for post in profile.get_posts():
            print("POST n:", counter_post, "MAX_COMMENT_SET:", MAX_COMMENT)
            comments_vect = []
            try:
                accessibility_caption = str(
                    post._asdict()['accessibility_caption'])
            except Exception as ex:
                print(ex)
                accessibility_caption = None
            try:
                location = post.location
            except Exception as ex:
                print(ex)
                location = None
            #INFORMATION OF THE POST GOING INTO THE CSV
            post_info_dict = {
                'title':
                post.title,
                'owner_username':
                post.owner_username,
                'date_and_time':
                post.date,
                'type_of_post':
                post.typename,
                'mediacount':
                post.mediacount,
                'caption':
                post.caption,
                'n_caption_hashatags':
                len(post.caption_hashtags),
                'caption_hashtags':
                post.caption_hashtags,
                'n_mentions_post':
                len(post.caption_mentions),
                'n_tagged_users':
                len(post.tagged_users),
                'is_video':
                post.is_video,
                'n_likes':
                post.likes,
                'n_comments':
                post.comments,
                'is_sponsored':
                post.is_sponsored,
                'sponsors':
                post.sponsor_users,
                'location':
                location,
                'url_link':
                post.url,
                'url_insta':
                'instagram.com/p/{}/'.format(post.shortcode),
                'description_of_post':
                accessibility_caption,
                'engagement_rate': ((int(post.likes) + int(post.comments)) /
                                    int(profile.followers)) * 100
            }

            # DOWNLOAD AND STORE COMMENT
            print('Start Comments')

            comment_count = 0
            for comment in post.get_comments():
                answer_count = 0
                """
                for answer in comment.answers:
                    answer_count += 1
                    if answer_count == 5:
                        break
                """
                analisys, score = self.analizer.return_sentiment(
                    str(comment.text).strip())
                comment_info_dict = {
                    'date_and_time': comment.created_at_utc,
                    'profile': comment.owner.username,
                    'text': str(comment.text).strip(),
                    'n_likes': comment.likes_count,
                    # 'answer_count': answer_count,
                    'sentiment_analysis': analisys,
                    'score': score
                }

                comments_vect.append(comment_info_dict)
                if comment_count == MAX_COMMENT:
                    print("MAX COMMENT")
                    break
                comment_count += 1
                print(comment_count, '.', end='')

            print('End Comments')

            #L.download_pic(path_pic_jpg, post.url, post.date_utc)

            #STORING DATA SCRAPED AND UPLOAD RELATIVE CSVs
            comment_df = pd.DataFrame(comments_vect)
            post_df = pd.DataFrame([post_info_dict], index=['post_data'])

            post = {'post_info': post_df, 'comments': comment_df}
            post_profile['posts'].append(post)
            print("END__POST")
            #IF MAX POST DOWNLOADED BREAK
            if counter_post % MAX_POST == 0:
                print('Post Reached')
                break
            counter_post += 1

        return post_profile
Exemple #14
0
    else:
        break

ls = [str(code)]
numberInsta = numberInsta + 9

#call on mongo class with the geotag code (which is used to name Mongo collection)
m = mongo(ls[0])

#clear previous data in MongoDB the collection
m.clearDB()

#run instagram scraping
s = InstaScraper("/Users/Ed/eclipse/chromedriver", ls, numberInsta)
s.runScraper()
print('Scraping Complete')

#connect to database with webscraped data
db = m.getdatabase()

#run sentiment analysis
c = SentimentAnalysis(db, ls[0])
captions, captionText = c.getCaptions()
c.sentimentIntensity(captions)

#run frequency distribution analysis
freq = WordFrequency(captionText)
freq.getFreqDist()

print('Analysis Complete')
Exemple #15
0
def _load_model():
    global sentiment_analysis
    global conversation

    sentiment_analysis = SentimentAnalysis(SA_TOKENIZER, SA_MODEL)
    conversation = Conversation(CONV_TOKENIZER, CONV_MODEL)
class RecommendFromDialogue(nn.Module):
    """
    Recommender system that takes a dialogue as input. Runs sentiment analysis on all mentioned movies,
    then uses autorec to provide movie recommendations at each stage of the conversation
    """
    def __init__(
        self,
        train_vocab,
        n_movies,
        params,
        autorec_path=os.path.join(config.AUTOREC_MODEL, "model_best"),
        sentiment_analysis_path=os.path.join(config.SENTIMENT_ANALYSIS_MODEL,
                                             "model_best"),
        cuda=None,
        gensen=True,
    ):
        super(RecommendFromDialogue, self).__init__()
        self.n_movies = n_movies
        if cuda is None:
            self.cuda_available = torch.cuda.is_available()
        else:
            self.cuda_available = cuda

        self.sentiment_analysis = SentimentAnalysis(
            params=params['sentiment_analysis_params'],
            train_vocab=train_vocab,
            gensen=gensen,
            resume=sentiment_analysis_path)
        self.autorec = AutoRec(params=params['autorec_params'],
                               n_movies=self.n_movies,
                               resume=autorec_path)

        # freeze sentiment analysis
        for param in self.sentiment_analysis.parameters():
            param.requires_grad = False

    def forward(self,
                dialogue,
                senders,
                lengths,
                conversation_lengths,
                movie_occurrences,
                recommend_new_movies,
                user_representation=None):
        """

        :param dialogue: (batch, max_conv_length, max_utt_length) Variable containing the dialogue
        :param movie_occurrences: Array where each element corresponds to a conversation and is a dictionary
        {movieId: (max_conv_length, max_utt_length) array containing the movie mentions}
        :param recommend_new_movies: If true, zero out the movies already mentioned in the output
        :param user_representation: optional prior user representation (obtained from language)
        :return: (batch_size, max_conv_length, n_movies_total) movie preferences
        (ratings not necessarily between 0 and 1)
        """
        batch_size, max_conv_length = dialogue.data.shape[:2]
        if not movie_occurrences or len(movie_occurrences) == 0:
            tt = torch.cuda.FloatTensor if self.cuda_available else torch.FloatTensor
            return Variable(
                torch.zeros(batch_size,
                            max_conv_length,
                            self.n_movies,
                            out=tt()))
        # indices to retrieve original order
        indices = [(i, movieId)
                   for (i,
                        conv_movie_occurrences) in enumerate(movie_occurrences)
                   for movieId in conv_movie_occurrences]
        batch_indices = Variable(torch.LongTensor([i[0] for i in indices]))

        # flatten movie occurrences to shape (total_num_mentions_in_batch, max_conv_length, max_utt_length)
        flattened_movie_occurrences = [
            conv_movie_occurrences[movieId]
            for conv_movie_occurrences in movie_occurrences
            for movieId in conv_movie_occurrences
        ]
        flattened_movie_occurrences = Variable(
            torch.FloatTensor(flattened_movie_occurrences))
        if self.cuda_available:
            batch_indices = batch_indices.cuda()
            flattened_movie_occurrences = flattened_movie_occurrences.cuda()

        # select the dialogues following the movie mentions
        dialogue = torch.index_select(dialogue, 0, batch_indices)
        senders = torch.index_select(senders, 0, batch_indices)
        lengths = lengths[[i[0] for i in indices]]
        conversation_lengths = conversation_lengths[[i[0] for i in indices]]
        # print("senders shape", senders.data.shape) # (total_num_mentions, max_conv_length)

        sentiment_analysis_input = {
            "dialogue": dialogue,
            "movie_occurrences": flattened_movie_occurrences,
            "senders": senders,
            "lengths": lengths,
            "conversation_lengths": conversation_lengths
        }

        # (total_num_mentions_in_batch, max_conv_length)
        movie_likes = self.sentiment_analysis(sentiment_analysis_input,
                                              return_liked_probability=True)

        # populate ratings input using the movie likes
        # (batch_size, max_conv_length, n_movies_total)
        tt = torch.cuda.FloatTensor if self.cuda_available else torch.FloatTensor
        autorec_input = Variable(
            torch.zeros(batch_size, max_conv_length, self.n_movies, out=tt()))
        # mask that tells from which utterance the movies have appeared in conversation
        mask = flattened_movie_occurrences.sum(dim=2) > 0
        mask = mask.cumsum(
            dim=1) > 0  # (total_num_mentions_in_batch, max_conv_length)
        # only use movie preferences after movies are mentioned
        movie_likes = movie_likes * mask.float()
        # print("movie likes shape", movie_likes.data.shape) # (total_num_mentions_in_batch, max_conv_length)
        for i, (batchId, movieId) in enumerate(indices):
            autorec_input[batchId, :, movieId] = movie_likes[i]

        # run recommendation model
        # (batch_size, max_conv_length, n_movies_total)
        output = self.autorec(autorec_input,
                              additional_context=user_representation,
                              range01=False)
        # use this at generation time: lower probability for movies already mentioned
        if recommend_new_movies:
            for batchId, movieId in indices:
                # (max_conv_length) mask that zeros out once the movie has been mentioned
                mask = np.sum(movie_occurrences[batchId][movieId], axis=1) > 0
                mask = Variable(
                    torch.from_numpy(
                        (mask.cumsum(axis=0) == 0).astype(float))).float()
                if self.cuda_available:
                    mask = mask.cuda()
                output[batchId, :,
                       movieId] = mask * output[batchId, :, movieId]
        return output
Exemple #17
0
    def run(input_filepath):
        result_dict = {}
        print("Processing: {}".format(input_filepath))
        file_df = pd.read_csv(input_filepath)

        # Read one review
        for index, row in file_df.iterrows():

            temp_review_dict = {}

            review_id = row['review_id']
            review = row['text']
            golden_truth_text = row['Dishnames, Sentiment']

            golden_truth_dish_sentiment_dict = Preprocessing.preprocess_golden_truth_text(golden_truth_text)
            if golden_truth_dish_sentiment_dict is None:
                continue
            print(review_id)
            review = Preprocessing.preprocess_review_text(review)

            # Check the grammaticality scores
            readability_dict = GrammarScore.get_scores(review)

            # Coreference resolution of the original text
            resolved_review = CoreferenceResolution.run(review)

            # Divide text into sentences
            sentence_tokens = SentenceTokenization.get_sentence_tokens(resolved_review, 'spacy')

            # Create dish buckets and assign valid sentences to dish
            predicted_truth_dish_sentiment_dict = {}
            for dish_names in golden_truth_dish_sentiment_dict:
                predicted_truth_dish_sentiment_dict[dish_names] = {'found':False, 'sentiment': -100, 'sentences':[]}
            # Add a key to handle sentences which do not match to any dish name
            predicted_truth_dish_sentiment_dict['UNK'] = {'sentiment': -100, 'sentences':[]}

            # Feed line by line to the client to detect dish names
            sentence_analysis_list = []
            false_positive_list = []
            #Initialize false positive list
            for sentence in sentence_tokens:
                try:
                    intent, dish_names = Factory.sclient.detect_intent_for_text(sentence)
                    sentence_analysis_list.append((sentence, intent, dish_names))
                    if intent == 'Default Fallback Intent':
                        predicted_truth_dish_sentiment_dict['UNK']['sentences'].append(sentence)
                    elif (intent == 'menu_item_identification_intent') & (len(dish_names) == 0):
                        predicted_truth_dish_sentiment_dict['UNK']['sentences'].append(sentence)
                    else:
                        for dish in dish_names:
                            if dish in predicted_truth_dish_sentiment_dict:
                                predicted_truth_dish_sentiment_dict[dish]['found'] = True
                                predicted_truth_dish_sentiment_dict[dish]['sentences'].append(sentence)
                            else:
                                false_positive_list.append(dish)
                    time.sleep(1)
                except Exception as e:
                    print(e)
                    continue

            predicted_truth_dish_sentiment_dict['UNK']['False Positives'] = false_positive_list

            # Get the sentiment of each dish
            for dish in predicted_truth_dish_sentiment_dict.keys():
                if dish == 'UNK':
                    continue
                else:
                    if dish not in predicted_truth_dish_sentiment_dict:
                        continue
                    else:
                        # Dish name was found and atleast one sentence exists
                        if predicted_truth_dish_sentiment_dict[dish]['found'] == True:
                            # Join all the sentences and get the sentiment
                            sentiment_text = ""
                            for sentence in predicted_truth_dish_sentiment_dict[dish]['sentences']:
                                sentiment_text = sentiment_text + sentence
                            predicted_truth_dish_sentiment_dict[dish]['sentiment'] = SentimentAnalysis.run(sentiment_text,lib='paralleldot')

            temp_review_dict = {
                'text': review,
                'readability_scores': readability_dict,
                'golden_truth': golden_truth_dish_sentiment_dict,
                'predicted_truth': predicted_truth_dish_sentiment_dict,
                'sentences': sentence_analysis_list
            }

            #pp.pprint(temp_review_dict)

            # Save it in the dictionary
            result_dict[review_id] = temp_review_dict

        # Save the overall dictionary
        output_filepath = '../output/' + input_filepath[input_filepath.rfind('/')+1:input_filepath.rfind('.csv')] + '.p'
        with open(output_filepath, 'wb') as pickle_dump_file_pointer:
            pickle.dump(result_dict, pickle_dump_file_pointer, protocol=pickle.HIGHEST_PROTOCOL)
Exemple #18
0
                                              index_column='tweeted_date')
        headline_df = mysql_client.get_dataframe(query=get_headline_data_query,
                                                 index_column='published_date')
    except DatabaseQueryError as error:
        print(error)

# Clsoe the connection
mysql_client.close()

# Process data
merged_data = bitcoin_price_df
sentiment_columns = [
    'price', 'flair', 'positive', 'negative', 'subjectivity', 'polarity'
]
merged_data = merged_data.reindex(columns=sentiment_columns)
sa = SentimentAnalysis()
for index, row in merged_data.iterrows():
    tweets = tweet_df.loc[tweet_df.index == index][:30]
    headlines = headline_df.loc[headline_df.index == index][:20]
    # Combine tweets with headlines
    merged_text = tweets['tweet'].tolist()
    merged_text.extend(headlines['headline'].tolist())
    sentiments = sa.get_sentiments(text=' '.join(merged_text))
    # Append new columns
    i = merged_data.index.get_loc(index)
    merged_data['price'][i] = bitcoin_price_df['price'][i]
    merged_data['flair'][i] = sentiments[0]
    merged_data['positive'][i] = sentiments[1]
    merged_data['negative'][i] = sentiments[2]
    merged_data['subjectivity'][i] = sentiments[3]
    merged_data['polarity'][i] = sentiments[4]
    print df
    #Summarizion
    text = ''
    for sent in df['original_sents'].values:
        text += '.' + sent

    summarize = Summarization(text, None, senti)
    final_summary1, final_summary2, final_summary3, neg_final_summary1, neg_final_summary2, neg_final_summary3, counts, eigen_explo = summarize.get_summaries(
    )
    print final_summary1, final_summary2, final_summary3, neg_final_summary1, neg_final_summary2, neg_final_summary3, counts, eigen_explo


if __name__ == "__main__":
    d = spell.request_dict("en_US")
    nlp = spacy.load('en')

    senti = SentimentAnalysis()

    start_time = time.time()
    filenames = [
        f for f in listdir(input_entity_files)
        if isfile(join(input_entity_files, f))
    ]
    #Parellel on CPU cores
    #Parallel(n_jobs=cpu_count() - 1, verbose=10, backend="multiprocessing", batch_size="auto")(delayed(processFiles)(fileName,input_entity_files) for fileName in filenames)
    #for (dirpath, dirnames, filenames) in walk(input_entity_files):
    for file in filenames:
        processFiles(file, input_entity_files)

    print("Time taken --- %s seconds ---" % (time.time() - start_time))
Exemple #20
0
from sentiment_analysis import SentimentAnalysis

sa = SentimentAnalysis()
sentiment = sa.analyze_sentiment("This is best film i seen")
print(sa.model.layers[1].output)
print(sentiment)

intermediate_out = sa.debug_hidden_layer_out("This is best film i seen")

sentiment = sa.analyze_sentiment("Wonderful but suck")
print(sentiment)

sentiment = sa.analyze_sentiment("Ha ha i laught")
print(sentiment)
Exemple #21
0
def generate_training_data(database, collection, query):
    """Genera un conjunto de datos de entrenamiento.

    Esta generación se realiza con el propósito de encontrar los mejores
    parámetros para la clasificación de sentimientos. En específico,
    se intenta encontrar la mejor combinación de la negación y el
    léxicon de polaridad.

    paráms:
        database: str
            Base de datos de MongoDB que se utilizará.
        collection:
            Colección donde se encuentran los tweets etiquetados para
            entrenamiento.
        query:
            Filtro que se utilizará para recuperar los tweets de entrenamiento.
            Nótese que los tweets deben tener el campo "polarity".

    Example:

        >>> generate_training_data(database='tass_2017',
                                   collection='intertass',
                                   query={"$or": [{"dataset": "train"},
                                                  {"dataset": "development"}]})
    """
    four_label_homologation = {u'N+': 0, u'N': 0,
                               u'NEU': 1,
                               u'P': 2, u'P+': 2,
                               u'NONE': 3}

    client = pymongo.MongoClient()
    coll = client[database][collection]

    tweets = coll.find(filter=query,
                       projection=['tweet_id', 'content', 'polarity'],
                       sort=[('polarity', pymongo.ASCENDING),])

    tweets_ = [[_to_str(tweet['tweet_id']), _to_unicode(tweet['content']),
                _to_unicode(tweet['polarity']).upper()]
               for tweet in tweets if (tweet['content'] and len(tweet['content']) > 0)]

    client.close()

    tweets = None
    tweets = tweets_
    tweets_ = None

    output_path = DATA_PATH + '/train/' + collection
    if not os.path.isdir(output_path):
        os.makedirs(output_path)

    for negation_id in NEGATION_SETTINGS.iterkeys():

        lexicons = np.random.choice(np.arange(1, 7),
                                    size=3, replace=False).tolist() +\
                   np.random.choice(np.arange(7, 16),
                                    size=3, replace=False).tolist() +\
                   np.random.choice(np.arange(16, 26),
                                    size=4, replace=False).tolist() +\
                   np.random.choice(np.arange(26, 31),
                                    size=2, replace=False).tolist()

        lexicons = np.random.choice(lexicons, size=6, replace=False).tolist()

        if np.random.choice(range(2), p=[.9, .1]) == 1:
            lexicons.append(31)

        negation_path = output_path + '/%s' % negation_id
        if not os.path.isdir(negation_path):
            os.mkdir(negation_path)

        for lexicon_id in lexicons:

            output_fname = negation_path +\
                           '/metafeatures-lexicon-%s.tsv' % lexicon_id
            if os.path.isfile(output_fname):
                continue

            clf = SentimentAnalysis(negation_id=negation_id,
                                    lexicon='lexicon-%i' % lexicon_id)

            documents = []
            four_label_polarities = []
            metafeatures_list = []

            for j, (tweet_id, content, polarity) in enumerate(tweets):
                try:
                    text, metafeatures = clf.preprocess_tweet(content)
                except:
                    _write_in_file(fname=negation_path + '/errors-1.log',
                                   content=tweet_id + '\n', mode='a')
                    continue

                metafeatures = metafeatures.reshape(1, metafeatures.shape[0])

                if j == 0:
                    metafeatures_list = metafeatures
                else:
                    if metafeatures_list.shape[1] == metafeatures.shape[1]:
                        metafeatures_list = np.vstack((metafeatures_list,
                                                       metafeatures))
                    else:
                        _write_in_file(fname=negation_path + '/errors-2.log',
                                       content=tweet_id + '\n', mode='a')
                        continue

                documents.append(_to_str(text))
                four_label_polarities.append(four_label_homologation[polarity])

            if not os.path.isfile(negation_path + '/tweets.txt'):
                np.savetxt(negation_path + '/tweets.txt',
                           np.array(documents, dtype=str), fmt='%s')

            if not os.path.isfile(negation_path + '/target-labels.dat'):
                np.savetxt(negation_path + '/target-labels.dat',
                           np.array(four_label_polarities, dtype=int), fmt='%i')

            np.savetxt(output_fname, metafeatures_list, fmt='%i', delimiter='\t')

            clf = None
Exemple #22
0
from sanic import Sanic
from sanic_jinja2 import SanicJinja2  # pip install sanic_jinja2
from sanic import response
from sentiment_analysis import SentimentAnalysis

app = Sanic()
jinja = SanicJinja2(app)
sa = None

# ----------------------
# Serves files from the static folder to the URL /static
app.static('/img', './img')


@app.route('/')
@jinja.template('index.html')  # decorator method is static method
async def index(request):
    return


@app.route("/analyze_review", methods=['POST'])
def analyze_review(request):
    sentiment = sa.analyze_sentiment(request.form['review'][0])
    return response.json(str(sentiment[0]))


if __name__ == "__main__":
    sa = SentimentAnalysis()
    # --- Start Sanic Web Server at port 8000
    app.run(host="0.0.0.0", port=9000)
Exemple #23
0
from preprocess import WordSet, WordEmbedding, KnowledgeBase
from sentiment_analysis import SentimentAnalysis

#sentence = '外观 漂亮'
#sentence = '外观 不 太 漂亮'
#sentence = '高 规格 的 用料 和 精致 的 做工'
#sentence = '炫酷 的 造型 、 充沛 的 动力 再 加上 本田 家族 运动 基因 的 传承'

parser = argparse.ArgumentParser()
parser.add_argument('-s', required=True)
args = parser.parse_args()

sentence = args.s

abc = SentimentAnalysis()
result = abc.analyze(sentence)

print('--------------------')
print('%s\n' % (sentence))
for item in result:
    aspect = item[0]
    opinion = item[1]
    relation = item[4]
    print('%s\t%s\t%s' % (aspect, opinion, relation))
print('--------------------')

for item in result:
    aspect = item[0]
    opinion = item[1]
    relation = item[4]
Exemple #24
0
class ChatBot:
    sentiment = SentimentAnalysis()
    #sentiment.makemodel()
    knowledge = {}
    historybuffer = list()  # contains the 10 most recent messages
    # The Kernel object is the public interface to
    # the AIML interpreter.
    aimlBot = aiml.Kernel()
    # Use the 'learn' method to load the contents
    # of an AIML file into the Kernel.
    aimlBot.learn(os.path.dirname(os.path.abspath(__file__)) + '/botdata/*/*.aiml')
    status = False

    def read_input(self, input, latitude, longitude):
        input = input.lower()
        self.historybuffer.insert(0, input)
        if self.historybuffer.__sizeof__() == 100:
            self.historybuffer.pop()
        self.knowledge = self.sentiment.analyse_text(self.knowledge, input)
        if "@bot" in input or "@Bot" in input:
            input_list = input.split(' ')
            keyword = 'on' if len(input_list) == 1 else input_list[1]

            if keyword == 'off':
                self.status = False
            elif keyword == 'topic':
                return self.reply(input, 'topic', latitude, longitude)
            else:
                self.status = True
                return self.reply(input, "location", latitude, longitude)
        if self.status is True:
            return self.reply(input, "aiml", latitude, longitude)
        return None

    def reply(self, message, case, latitude, longitude):
        if case == "aiml":
            # group = self.classify(self.historytext())
            return {
                'message': self.aimlresponse(message),
            }
        elif case == "location":
            if len(self.historytext().split()) < 20:
                return {
                    'message': 'I am not sure about where you should go. Try talking more.',
                }

            group = self.classify(self.historytext())

            if len(group) == 0:
                return {
                    'message': 'I am not sure about where you should go. Try talking more.',
                }

            best_match = group[0]
            place_type = None
            query = ''
            if 'Restaurants' in best_match.name:
                place_type = PlaceType.restaurant
                category_split = best_match.name.strip('/').split('/')
                if len(category_split) > 2:
                    query = category_split[2]  # Fast Food or Pizzeria

            results = search_places(latitude, longitude, place_type=place_type, keyword=query)
            results = results[:4]

            return {
                'message': 'How about one of these?',
                'options': [{
                    'label': place['name'],
                    'link': f"https://www.google.com/maps/dir/?api=1&destination={place['geometry']['location']['lat']},{place['geometry']['location']['lng']}",
                } for place in results],
            }
        elif case == 'topic':
            interests = self.returnInterests()
            return {
                'message': 'How about one of these topics?',
                'options': [interest.name for interest in interests],
            }