def predict_models_header_sentiment(text_data_list): text_data_df = pd.DataFrame(text_data_list,columns=['id', 'header', 'sub_header']) # added sentiment analyzer columns to store sentiment value text_data_df["sentiment_for"] = "" text_data_df["nltk_classify"] = -1 text_data_df["nltk_confidence"] = 0.0 text_data_df["word2vec_classify"] = -1 text_data_df["count_vectorizer_classify"] = -1 text_data_df["tfidf_vectorizer_classify"] = -1 # ---------news nltk load-------------") word_features = nltk_classifier.load_save_dataset('word_features.pickle') ensemble_clf = nltk_classifier.get_ensemble_models(None) # --------news word2vec load------------- model, w2vmodel, tfidf = word2vec_classifier_model.load_prediction_model_parameters() # --------spacy model load------------- pipe_count_vectorizer, pipe_tfidf_vectorizer = get_spacy_model() for row in text_data_df.itertuples(index=False): text = row.header # ---------nltk-------------") classify, nltk_confidence = nltk_classifier.sentiment_analyzer(text, ensemble_clf, word_features) nltk_classify = 0 if classify == "pos": nltk_classify = 1 elif classify == "neg": nltk_classify = 0 else: nltk_classify = 2 text_data_df["sentiment_for"] = "header" text_data_df.at[index, 'nltk_classify'] = nltk_classify text_data_df.at[index, 'nltk_confidence'] = nltk_confidence # --------news word2vec------------- word2vec_classify = word2vec_classifier_model.predict(model, w2vmodel, tfidf, text) text_data_df.at[index, 'word2vec_classify'] = word2vec_classify # --------spacy------------------------ text_list = [] text_list.append(text) # predict using CountVectorizer count_vectorizer_classify = pipe_count_vectorizer.predict(text_list) # predict using TfidfVectorizer tfidf_vectorizer_classify = pipe_tfidf_vectorizer.predict(text_list) text_data_df.at[index, 'count_vectorizer_classify'] = int(count_vectorizer_classify[0]) text_data_df.at[index, 'tfidf_vectorizer_classify'] = int(tfidf_vectorizer_classify[0]) return text_data_df
"I feel amazing!", "thanks for lyft credit i can not use because they do not offer wheelchair vans in pdx disapointed getthanked", "Intel surges 8% on an earnings beat and better-than-expected forecast"] print("\n--------nltk-------------") # print(pre_processing.common_pre_processing_steps(tweet)) # print("***** loading word features *****") word_features = nltk_classifier.load_save_dataset('word_features.pickle') # print("***** save train models to a ensemble *****") ensemble_clf = nltk_classifier.get_ensemble_models(None) # print("***** predict tweets sentiment *****") for tokens in range(len(tweet_list)): classify, confidence = nltk_classifier.sentiment_analyzer(tweet_list[tokens], ensemble_clf, word_features) print("classify - {} , confidence - {}".format(classify, confidence)) print("\n--------news_word2vec-------------") model, w2vmodel, tfidf = word2vec_classifier_model.load_prediction_model_parameters() for tokens in range(len(tweet_list)): prediction = word2vec_classifier_model.predict(model, w2vmodel, tfidf, tweet_list[tokens]) print(" prediction - {}".format(prediction)) # --------spacy model load------------- print("\n--------spacy-------------") pipe_count_vectorizer, pipe_tfidf_vectorizer = get_spacy_model() text_list = []
def predict_models_header_sentiment(text_data_list): text_data_df = pd.DataFrame(text_data_list, columns=['id', 'header', 'sub_header']) # delete record with empty or "NaN" value text_data_df.drop(text_data_df[text_data_df['header'] == "NaN"].index, inplace=True) text_data_df.drop(text_data_df[text_data_df['header'] == ""].index, inplace=True) # added sentiment analyzer columns to store sentiment value text_data_df["sentiment_for"] = "" text_data_df["nltk_classify"] = "" text_data_df["nltk_confidence"] = "" # text_data_df["word2vec_classify"] = -1 text_data_df["count_vectorizer_classify"] = "" text_data_df["count_vectorizer_confidence"] = "" text_data_df["tfidf_vectorizer_classify"] = "" text_data_df["tfidf_vectorizer_confidence"] = "" # ---------news nltk load-------------") word_features = nltk_classifier.load_save_dataset('word_features.pickle') ensemble_clf = nltk_classifier.get_ensemble_models() # --------news word2vec load------------- # model, w2vmodel, tfidf = word2vec_classifier_model.load_prediction_model_parameters() sentiment_data_list = [] for row in text_data_df.itertuples(index=False): text = row.header if len(text) > 0 and text != "NaN": try: # ---------nltk-------------") classify, nltk_confidence = nltk_classifier.sentiment_analyzer( text, ensemble_clf, word_features) nltk_classify = 2 if classify == "pos": nltk_classify = 1 elif classify == "neg": nltk_classify = 0 else: nltk_classify = 2 # --------news word2vec------------- # word2vec_classify = word2vec_classifier_model.predict(model, w2vmodel, tfidf, text) # # text_data_df['word2vec_classify'] = word2vec_classify # --------spacy------------------------ classify_cVector, confidence_cVector = news_spacy_countVectorizer_model.sentiment_analyzer( text) # predict using TfidfVectorizer classify_tfidf, confidence_tfidf = news_spacy_tfidfVectorizer_model.sentiment_analyzer( text) sentiment_data_list.append({ 'id': row.id, 'header': row.header, 'sub_header': row.sub_header, 'sentiment_for': "header", 'nltk_classify': nltk_classify, 'nltk_confidence': nltk_confidence, 'count_vectorizer_classify': int(classify_cVector), 'count_vectorizer_confidence': confidence_cVector, 'tfidf_vectorizer_classify': int(classify_tfidf), 'tfidf_vectorizer_confidence': confidence_tfidf }) except Exception as error: print(error) database_log.error_log( "run_news_sentiment_analyzer - predict_models_header_sentiment", error) news_sentiment_data = pd.DataFrame(columns=[ 'id', 'header', 'sub_header', 'sentiment_for', 'nltk_classify', 'nltk_confidence', 'count_vectorizer_classify', 'count_vectorizer_confidence', 'tfidf_vectorizer_classify' 'tfidf_vectorizer_confidence' ]) if len(sentiment_data_list) > 0: news_sentiment_data = news_sentiment_data.append(sentiment_data_list) return news_sentiment_data