def run_news_scraper(): try: with multiprocessing.Manager() as manager: # creating new processes p1 = multiprocessing.Process(target=mint.start_process) p2 = multiprocessing.Process( target=business_standard.start_process) p3 = multiprocessing.Process(target=money_control.start_process) p4 = multiprocessing.Process(target=economic_times.start_process) p5 = multiprocessing.Process(target=reuters.start_process) p6 = multiprocessing.Process(target=infoline.start_process) p7 = multiprocessing.Process( target=financial_express.start_process) p8 = multiprocessing.Process(target=bloombergquint.start_process) p9 = multiprocessing.Process(target=businesstoday.start_process) p1.start() p2.start() p3.start() p4.start() p5.start() p6.start() p7.start() p8.start() p9.start() except Exception as error: database_log.error_log("read_website_configuration", error)
def read_news_data(): try: query = """ CREATE TEMPORARY TABLE temp_news_headlines ( id INT, header TEXT NULL, sub_header TEXT NULL ); INSERT INTO temp_news_headlines(id,header,sub_header) SELECT ndump.id, ndump.header, ndump.sub_header FROM news_feeds_dump ndump WHERE ndump.id NOT IN (SELECT DISTINCT news_id FROM news_feeds_sentiment) AND (ndump.sub_header IS NOT NULL AND ndump.sub_header !='') ORDER BY id; delete from temp_news_headlines temp1 using temp_news_headlines temp2 where temp1.id<temp2.id and temp1.sub_header=temp2.sub_header; delete from temp_news_headlines temp1 using temp_news_headlines temp2 where temp1.id<temp2.id and temp1.header=temp2.header; SELECT ndump.id, ndump.header, ndump.sub_header FROM temp_news_headlines ndump WHERE ndump.header NOT IN (SELECT DISTINCT header FROM news_feeds_dump nfdump join news_feeds_sentiment nsentiment on nfdump.id=nsentiment.news_id) ORDER BY id; """ with CursorFromConnectionFromPool() as cursor: cursor.execute(query) news_data = cursor.fetchall() return news_data except Exception as error: database_log.error_log("read_news_data", error)
def load_data_v4(): try: train_dataset_path = '../TrainingData/TwitterData/tweets.csv' test_dataset_path = '../TrainingData/TwitterData/tweetstest.csv' tweets_train = pd.read_csv(train_dataset_path, encoding='latin-1') tweets_test = pd.read_csv(test_dataset_path, encoding='latin-1') tweets_train = clean_dataset_v3(tweets_train) tweets_test = clean_dataset_v3(tweets_test) # merge and filter 50000 tweets df_tweets_train_pos = tweets_train.loc[tweets_train['label'] == 1].head(30000) df_tweets_train_neg = tweets_train.loc[tweets_train['label'] == 0].head(30000) # print(" train -{} ; {}".format(df_tweets_train_pos.shape,df_tweets_train_neg.shape)) tweets_train = pd.concat([df_tweets_train_pos, df_tweets_train_neg]) return tweets_train, tweets_test except Exception as error: database_log.error_log("data_load : load_data_v3", error)
def load_data_v6(): try: tweets_train, tweets_test = load_data_v4() return tweets_train except Exception as error: database_log.error_log("data_load : load_data_v6", error)
def read_source_link(lookup_value): try: web_config_list = read_website_configuration(lookup_value) df_web_config = pd.DataFrame(web_config_list, columns=['website', 'website_category', 'website_link']) return df_web_config except Exception as error: database_log.error_log("read_source_link", error)
def load_data_v1(): try: train_dataset_path = '../TrainingData/TwitterData/Sentiment_Analysis_Dataset.csv' tweets_train = pd.read_csv(train_dataset_path) return tweets_train except Exception as error: database_log.error_log("data_load : load_data_v1", error)
def read_website_configuration(lookup_value): try: with CursorFromConnectionFromPool() as cursor: cursor.execute("SELECT website,website_category,website_link FROM website_configuration " "WHERE is_active=true and website = %s", (lookup_value,)) web_config_list = cursor.fetchall() return web_config_list except Exception as error: database_log.error_log("read_website_configuration", error)
def bulk_insert_indices_data(records): try: sql_insert_query = """ INSERT INTO indices_data (index_id,high,low,open,close,adj_close,entry_date) VALUES (%s,%s,%s,%s,%s,%s,%s) """ with CursorFromConnectionFromPool() as cursor: cursor.executemany(sql_insert_query, records) except Exception as error: database_log.error_log("bulk_insert_twitter_feeds", error)
def load_data(): try: train_dataset_path = '../TrainingData/TwitterData/tweets_train.csv' test_dataset_path = '../TrainingData/TwitterData/tweets_test.csv' tweets_train = pd.read_csv(train_dataset_path) tweets_test = pd.read_csv(test_dataset_path) return tweets_train, tweets_test except Exception as error: database_log.error_log("data_load : load_data", error)
def load_data_v5(): try: twitter_training_data_path = '../TrainingData/TwitterData/twitter_training_data.csv' twitter_training_df = pd.read_csv(twitter_training_data_path) tweets_train, tweets_test = load_data_v4() train_data_v1 = pd.concat([twitter_training_df, tweets_train]) # return twitter_training_df return tweets_train except Exception as error: database_log.error_log("data_load : load_data_v5", error)
def read_indices(): try: query = """ SELECT id, index_symbol, yahoo_symbol, start_date FROM indices WHERE is_active = true ORDER BY id """ with CursorFromConnectionFromPool() as cursor: cursor.execute(query) news_data = cursor.fetchall() return news_data except Exception as error: database_log.error_log("read_indices", error)
def read_twitter_data(): try: query = """ SELECT tdump.id, tdump.tweet_id, tdump.screen_id, tdump.tweet_message, tdump.tweet_date FROM twitter_data_dump tdump WHERE tdump.id NOT IN (select DISTINCT tweet_id from twitter_sentiment) AND tweet_message!='NaN' AND tweet_message!='' """ with CursorFromConnectionFromPool() as cursor: cursor.execute(query) twitter_data = cursor.fetchall() return twitter_data except Exception as error: database_log.error_log("read_twitter_data", error)
def read_twitter_account(): try: query = """ SELECT ta.id, ta.screen_id, CASE WHEN max(tweet_id) IS NULL THEN '1' ELSE max(tweet_id) END tweet_id FROM twitter_account ta LEFT JOIN twitter_data_dump td ON ta.screen_id = td.screen_id WHERE ta.is_active = true GROUP BY ta.id ORDER BY ta.id """ with CursorFromConnectionFromPool() as cursor: cursor.execute(query) twitter_account_list = cursor.fetchall() return twitter_account_list except Exception as error: database_log.error_log("read_twitter_account", error)
def load_data_v3(): try: train_dataset_path = '../TrainingData/TwitterData/tweets.csv' test_dataset_path = '../TrainingData/TwitterData/tweetstest.csv' tweets_train = pd.read_csv(train_dataset_path, encoding='latin-1') tweets_test = pd.read_csv(test_dataset_path, encoding='latin-1') tweets_train = clean_dataset_v3(tweets_train) tweets_test = clean_dataset_v3(tweets_test) return tweets_train, tweets_test except Exception as error: database_log.error_log("data_load : load_data_v3", error)
def read_daily_indices(): try: query = """ SELECT indices.id, indices.yahoo_symbol,indices.time_zone,CASE WHEN MAX(indices_data.entry_date) is null THEN indices.start_date ELSE MAX(indices_data.entry_date) + INTERVAL '1 day' end AS last_date FROM indices LEFT JOIN indices_data ON indices.id = indices_data.index_id WHERE indices.is_active = true GROUP BY indices.id ORDER BY id """ with CursorFromConnectionFromPool() as cursor: cursor.execute(query) news_data = cursor.fetchall() return news_data except Exception as error: database_log.error_log("read_daily_indices", error)
def start_process(): try: print("***********Initiate Process - mint**************") database_log.process_log("India - mint : start_process", "Initiate Process") while True: scheduled_sleeping_seconds = app_config.india_market_scheduled_task_sleeping loader.start_load_process(lookup_value) database_log.process_log("India - mint : start_process", "Re-Run Process") print("Last run was successful for India - mint, next run in {} seconds.".format(scheduled_sleeping_seconds)) time.sleep(scheduled_sleeping_seconds) except Exception as error: database_log.error_log("India - mint : start_process", error)
def init(): # Create API object api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) try: api.verify_credentials() print("Authentication OK") except Exception as error: database_log.error_log("twitter_scheduler - init - Error creating API", error) # auth_status = False return api
def clean_dataset_v3(dataset): try: dataset.columns=["label","ItemID","Date","Blank","SentimentSource","tweet"] dataset.drop(['ItemID','Date','Blank','SentimentSource'], axis=1, inplace=True) dataset = dataset[dataset.label.isnull() == False] dataset['label'] = dataset['label'].map( {4:1, 0:0}) #Converting 4 to 1 dataset = dataset[dataset['tweet'].isnull() == False] dataset = dataset[dataset['label'].isnull() == False] # remove NaN row dataset.reset_index(inplace=True) dataset.drop('index', axis=1, inplace=True) # print ('dataset loaded with shape', dataset.shape ) return dataset except Exception as error: database_log.error_log("data_load : clean_dataset_v3", error)
def read_configuration(lookup_value): try: with CursorFromConnectionFromPool() as cursor: cursor.execute("SELECT website,scheduled_task_sleeping,market_time_zone,market_start_time,market_end_time," "market_hours_delay,market_off_hours_delay,market_weekend_hours_delay,market_location " "FROM configuration WHERE is_active=true and website = %s", (lookup_value,)) config_list = cursor.fetchall() df_web_config = pd.DataFrame(config_list, columns=['website', 'scheduled_task_sleeping', 'market_time_zone', 'market_start_time', 'market_end_time', 'market_hours_delay', 'market_off_hours_delay', 'market_weekend_hours_delay', 'market_location']) return df_web_config except Exception as error: database_log.error_log("read_source_link", error)
def start_process(): try: print("***********Initiate Process - business_today**************") database_log.process_log("India - business_today : start_process", "Initiate Process") while True: # Checks whether a scheduled task - is pending to run or not scheduled_sleeping_seconds = app_config.india_market_scheduled_task_sleeping loader.start_load_process(lookup_value) database_log.process_log("India - business_today : start_process", "Re-Run Process") print( "Last run was successful for India - business_today, next run in {} seconds." .format(scheduled_sleeping_seconds)) time.sleep(scheduled_sleeping_seconds) except Exception as error: database_log.error_log("India - business_today : start_process", error)
def read_information(df_web_config): try: # Creating an empty Data frame with column names only df_news_data = pd.DataFrame(columns=['website', 'website_category', 'website_link', 'header', 'sub_header', 'timestamp']) for row in df_web_config.itertuples(index=False): url_link = row.website_link # open with GET method resp = requests.get(url_link, headers={'User-Agent': 'Mozilla/5.0'}) # http_response 200 means OK status if resp.status_code == 200: # parser soup = BeautifulSoup(resp.text, 'html.parser') website = 'india_reuters' categories = row.website_category news_link = '' header = '' sub_header = '' timestamp = '' for level_1 in soup.findAll("div", {"class": "news-headline-list"}): for level_2 in level_1.findAll("div", {"class": "story-content"}): for level_3 in level_2.findAll("a"): news_link = level_3['href'] header = level_3.text.strip() for level_3 in level_2.findAll("p"): sub_header = level_3.text for level_3 in level_2.findAll("time", {"class": "article-time"}): timestamp = level_3.text.strip() if len(header) > 0: text_lang = detect(header) if text_lang == "en": df_news_data = df_news_data.append({'website': website, 'website_link': url_link, 'website_category': categories,'news_link': news_link, 'header': header, 'sub_header': sub_header, 'timestamp': timestamp},ignore_index=True) else: database_log.error_log("India - Loader - india_reuters : read_information", resp.status_code) if df_news_data.empty is not True: df_news_feed_list = [tuple(r) for r in df_news_data[['website', 'website_link', 'website_category', 'news_link', 'header', 'sub_header', 'timestamp']].values.tolist()] sql_execute.bulk_insert_news_feeds(df_news_feed_list) else: database_log.error_log("India - Loader - india_reuters : read_information", "no record found") except Exception as error: database_log.error_log("India - Loader - india_reuters : read_information", error)
def bulk_insert_twitter_sentiment(twitter_data_sentiment_df): try: # convert data frame into a list twitter_data_list = [tuple(r) for r in twitter_data_sentiment_df[['id', 'tweet_id', 'screen_id', 'tweet_message', 'tweet_date']].values.tolist()] twitter_data_sentiment_list = [tuple(r) for r in twitter_data_sentiment_df[['id','nltk_classify','nltk_confidence', 'count_vectorizer_classify','count_vectorizer_confidence', 'tfidf_vectorizer_classify','tfidf_vectorizer_confidence','word2vec_classify']].values.tolist()] # sql_twitter_data_query = """ INSERT INTO twitter_data (id,tweet_id,screen_id,tweet_message,tweet_date) # VALUES (%s,%s,%s,%s,%s) """ sql_twitter_sentiment_query = """ INSERT INTO twitter_sentiment (tweet_id,nltk_classify,nltk_confidence, spacy_count_vectorizer_classify,spacy_count_vectorizer_confidence, spacy_tfidf_vectorizer_classify,spacy_tfidf_vectorizer_confidence,word2vec_classify) VALUES (%s,%s,%s,%s,%s,%s,%s,%s) """ # with CursorFromConnectionFromPool() as cursor: # cursor.executemany(sql_twitter_data_query, twitter_data_list) with CursorFromConnectionFromPool() as cursor: cursor.executemany(sql_twitter_sentiment_query, twitter_data_sentiment_list) # delete processd records # max_tweet_id = twitter_data_sentiment_df['id'].max() # max_tweet_id = int(max_tweet_id) # sql_delete_query = "DELETE FROM twitter_preprocessing_data WHERE id <= %s" # # with CursorFromConnectionFromPool() as cursor: # cursor.execute(sql_delete_query, (max_tweet_id,)) except Exception as error: database_log.error_log("bulk_insert_twitter_sentiment", error)
def read_information(df_web_config): try: # Creating an empty Data frame with column names only df_news_data = pd.DataFrame(columns=[ 'website', 'website_category', 'website_link', 'header', 'sub_header', 'timestamp' ]) for index, row in df_web_config.iterrows(): url_link = row["website_link"] # open with GET method resp = requests.get(url_link, headers={'User-Agent': 'Mozilla/5.0'}) # http_response 200 means OK status if resp.status_code == 200: # parser soup = BeautifulSoup(resp.text, 'html.parser') website = 'mint' categories = row["website_category"] news_link = '' header = '' sub_header = '' timestamp = '' for level_1 in soup.findAll("div", {"class": "headlineSec"}): for level_2 in level_1.find_all('h2', {"class": "headline"}): for level_3 in level_2.find_all('a', href=True): news_link = level_3['href'] header = level_3.text for level_2 in level_1.find_all('span', {"class": "fl date"}): for level_3 in level_2.find_all('span'): timestamp = level_3.text df_news_data = df_news_data.append( { 'website': website, 'website_link': url_link, 'website_category': categories, 'news_link': news_link, 'header': header, 'sub_header': sub_header, 'timestamp': timestamp }, ignore_index=True) else: database_log.error_log( "India - Loader - mint : read_information", resp.status_code) if df_news_data.empty is not True: df_news_feed_list = [ tuple(r) for r in df_news_data[[ 'website', 'website_link', 'website_category', 'news_link', 'header', 'sub_header', 'timestamp' ]].values.tolist() ] sql_execute.bulk_insert_news_feeds(df_news_feed_list) else: database_log.error_log("India - Loader - mint : read_information", "no record found") except Exception as error: database_log.error_log("India - Loader - mint : read_information", error)
def read_user_timeline(api): try: # get twitter account df_twitter_account = read_twitter_account() for row in df_twitter_account.itertuples(index=False): # print(row["id"], row["screen_id"], row['tweet_id']) tweets = api.user_timeline(screen_name=row.screen_id, include_rts=False, since_id=int(row.tweet_id)) tweet_list = [] for tweet in tweets: tweet_id = tweet.id tweet_message = tweet.text tweet_source = tweet.source retweet_count = tweet.retweet_count likes_count = tweet.favorite_count tweet_date = tweet.created_at try: text_lang = detect(tweet_message) # check if text is in english if text_lang == "en": tweet_list.append({ 'tweet_id': tweet_id, 'screen_id': row.screen_id, 'tweet_message': tweet_message, 'tweet_source': tweet_source, 'retweet_count': retweet_count, 'likes_count': likes_count, 'tweet_date': tweet_date }) except Exception as error: database_log.error_log( "twitter_scheduler - read_user_timeline - language error", error) tweet_list.append({ 'tweet_id': tweet_id, 'screen_id': row.screen_id, 'tweet_message': tweet_message, 'tweet_source': tweet_source, 'retweet_count': retweet_count, 'likes_count': likes_count, 'tweet_date': tweet_date }) if len(tweet_list) > 0: tweet_data_frame = pd.DataFrame(tweet_list, columns=[ 'tweet_id', 'screen_id', 'tweet_message', 'tweet_source', 'retweet_count', 'likes_count', 'tweet_date' ]) clean_twitter_data_frame = pre_processing.clean_twitter_data( tweet_data_frame) if len(tweet_list ) > 0 and clean_twitter_data_frame.empty is not True: tweet_data_list = [ tuple(r) for r in clean_twitter_data_frame[[ 'tweet_id', 'screen_id', 'tweet_message', 'tweet_source', 'retweet_count', 'likes_count', 'tweet_date' ]].values.tolist() ] sql_execute.bulk_insert_twitter_feeds(tweet_data_list) else: database_log.error_log( "twitter_scheduler - read_user_timeline", "no record found") print("No Record Found.") except Exception as error: database_log.error_log("twitter_scheduler - read_user_timeline", error) print("twitter_scheduler - read_user_timeline - {}".format(error))
except Exception as error: database_log.error_log("twitter_scheduler - read_user_timeline", error) print("twitter_scheduler - read_user_timeline - {}".format(error)) if __name__ == '__main__': try: lookup_value = "twitter" # search website name df_config = sql_database_execute.read_configuration( lookup_value) # read config file setting row_id = df_config.index[0] scheduled_task_sleeping = df_config["scheduled_task_sleeping"][row_id] # initialize twitter api api = init() while True: scheduled_task_sleeping = app_config.twitter_data_scheduled_task_sleeping read_user_timeline(api) print( "Last run was successful for Twitter API, next run in {} seconds." .format(scheduled_task_sleeping)) time.sleep(scheduled_task_sleeping) except Exception as error: database_log.error_log("twitter_scheduler - main", error)
def predict_models_sentiment(): # read tweet from database twitter_data_list = sql_execute.read_twitter_data() twitter_df = pd.DataFrame( twitter_data_list, columns=['id', 'tweet_id', 'screen_id', 'tweet_message', 'tweet_date']) # ---------twitter nltk load-------------") word_features = twitter_nltk_classifier.load_save_dataset( 'word_features.pickle') ensemble_nltk_clf = twitter_nltk_classifier.get_ensemble_models() # --------twitter word2vec load------------- model, w2vmodel, tfidf = twitter_word2vec_classifier_model.load_prediction_model_parameters( ) sentiment_data_list = [] for row in twitter_df.itertuples(index=False): tweet = row.tweet_message try: # ---------nltk-------------") classify, nltk_confidence = twitter_nltk_classifier.sentiment_analyzer( tweet, ensemble_nltk_clf) nltk_classify = 2 if classify == "Positive": nltk_classify = 1 elif classify == "Negative": nltk_classify = 0 else: nltk_classify = 2 # --------twitter word2vec------------- word2vec_classify = twitter_word2vec_classifier_model.predict( model, w2vmodel, tfidf, tweet) # word2vec_classify = 0 # predict using CountVectorizer classify_cVector, confidence_cVector = twitter_spacy_countVectorizer_model.sentiment_analyzer( tweet) # classify_cVector = 2 # confidence_cVector =1 # predict using TfidfVectorizer classify_tfidf, confidence_tfidf = twitter_spacy_tfidfVectorizer_model.sentiment_analyzer( tweet) # classify_tfidf =2 # confidence_tfidf=1 sentiment_data_list.append({ 'id': row.id, 'tweet_id': row.tweet_id, 'screen_id': row.screen_id, 'tweet_message': row.tweet_message, 'tweet_date': row.tweet_date, 'nltk_classify': nltk_classify, 'nltk_confidence': nltk_confidence, 'word2vec_classify': word2vec_classify, 'count_vectorizer_classify': int(classify_cVector), 'count_vectorizer_confidence': confidence_cVector, 'tfidf_vectorizer_classify': int(classify_tfidf), 'tfidf_vectorizer_confidence': confidence_tfidf }) except Exception as error: database_log.error_log( "run_twitter_sentiment_analyzer - predict_models_sentiment", error) print( "run_twitter_sentiment_analyzer - predict_models_sentiment - {}" .format(error)) tweet_sentiment_data = pd.DataFrame(columns=[ 'id', 'tweet_id', 'screen_id', 'tweet_message', 'tweet_date', 'nltk_classify', 'nltk_confidence', 'word2vec_classify', 'count_vectorizer_classify', 'count_vectorizer_confidence', 'tfidf_vectorizer_classify' 'tfidf_vectorizer_confidence' ]) if len(sentiment_data_list) > 0: tweet_sentiment_data = tweet_sentiment_data.append(sentiment_data_list) return tweet_sentiment_data
p4 = multiprocessing.Process(target=economic_times.start_process) p5 = multiprocessing.Process(target=reuters.start_process) p6 = multiprocessing.Process(target=infoline.start_process) p7 = multiprocessing.Process( target=financial_express.start_process) p8 = multiprocessing.Process(target=bloombergquint.start_process) p9 = multiprocessing.Process(target=businesstoday.start_process) p1.start() p2.start() p3.start() p4.start() p5.start() p6.start() p7.start() p8.start() p9.start() except Exception as error: database_log.error_log("read_website_configuration", error) if __name__ == '__main__': try: run_news_scraper() except Exception as error: database_log.error_log("read_website_configuration", error)
def predict_models_header_sentiment(text_data_list): text_data_df = pd.DataFrame(text_data_list, columns=['id', 'header', 'sub_header']) # delete record with empty or "NaN" value text_data_df.drop(text_data_df[text_data_df['header'] == "NaN"].index, inplace=True) text_data_df.drop(text_data_df[text_data_df['header'] == ""].index, inplace=True) # added sentiment analyzer columns to store sentiment value text_data_df["sentiment_for"] = "" text_data_df["nltk_classify"] = "" text_data_df["nltk_confidence"] = "" # text_data_df["word2vec_classify"] = -1 text_data_df["count_vectorizer_classify"] = "" text_data_df["count_vectorizer_confidence"] = "" text_data_df["tfidf_vectorizer_classify"] = "" text_data_df["tfidf_vectorizer_confidence"] = "" # ---------news nltk load-------------") word_features = nltk_classifier.load_save_dataset('word_features.pickle') ensemble_clf = nltk_classifier.get_ensemble_models() # --------news word2vec load------------- # model, w2vmodel, tfidf = word2vec_classifier_model.load_prediction_model_parameters() sentiment_data_list = [] for row in text_data_df.itertuples(index=False): text = row.header if len(text) > 0 and text != "NaN": try: # ---------nltk-------------") classify, nltk_confidence = nltk_classifier.sentiment_analyzer( text, ensemble_clf, word_features) nltk_classify = 2 if classify == "pos": nltk_classify = 1 elif classify == "neg": nltk_classify = 0 else: nltk_classify = 2 # --------news word2vec------------- # word2vec_classify = word2vec_classifier_model.predict(model, w2vmodel, tfidf, text) # # text_data_df['word2vec_classify'] = word2vec_classify # --------spacy------------------------ classify_cVector, confidence_cVector = news_spacy_countVectorizer_model.sentiment_analyzer( text) # predict using TfidfVectorizer classify_tfidf, confidence_tfidf = news_spacy_tfidfVectorizer_model.sentiment_analyzer( text) sentiment_data_list.append({ 'id': row.id, 'header': row.header, 'sub_header': row.sub_header, 'sentiment_for': "header", 'nltk_classify': nltk_classify, 'nltk_confidence': nltk_confidence, 'count_vectorizer_classify': int(classify_cVector), 'count_vectorizer_confidence': confidence_cVector, 'tfidf_vectorizer_classify': int(classify_tfidf), 'tfidf_vectorizer_confidence': confidence_tfidf }) except Exception as error: print(error) database_log.error_log( "run_news_sentiment_analyzer - predict_models_header_sentiment", error) news_sentiment_data = pd.DataFrame(columns=[ 'id', 'header', 'sub_header', 'sentiment_for', 'nltk_classify', 'nltk_confidence', 'count_vectorizer_classify', 'count_vectorizer_confidence', 'tfidf_vectorizer_classify' 'tfidf_vectorizer_confidence' ]) if len(sentiment_data_list) > 0: news_sentiment_data = news_sentiment_data.append(sentiment_data_list) return news_sentiment_data