def get_indices_data(cls): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() query = """ SELECT DISTINCT ind.index_symbol FROM indices ind JOIN indices_data idata ON ind.id = idata.index_id WHERE ind.id in(1,2) ORDER BY index_symbol DESC """ db_cursor.execute(query) unique_indices_list = db_cursor.fetchall() columns = ['country','index_symbol','entry_date','lat','lon','open','adj_close','adj_close_change','percent_change'] indices_data_list = [] for row in unique_indices_list: query = """ CREATE TEMPORARY TABLE indices_data_temp ( index_id INT, index_name VARCHAR(100) NOT NULL, index_symbol VARCHAR(50) NOT NULL, entry_date DATE, lat VARCHAR(30) NOT NULL, lon VARCHAR(30) NOT NULL, open DECIMAL, adj_close DECIMAL ); INSERT INTO indices_data_temp SELECT DISTINCT idata.index_id,ind.country,ind.index_symbol,idata.entry_date,ind.lat,ind.lon,ROUND(idata.open,2) open,ROUND(idata.adj_close,2) adj_close FROM indices ind JOIN indices_data idata ON ind.id = idata.index_id; select ind.country,ind.index_symbol,idata.entry_date,ind.lat,ind.lon,ROUND(idata.open,2) open, ROUND(idata.adj_close,2) adj_close, ROUND(idata.adj_close,2) - lag(ROUND(idata.adj_close,2)) over (order by idata.entry_date, ind.index_symbol) as adj_close_change, round(((ROUND(idata.adj_close,2) - lag(ROUND(idata.adj_close,2)) over (order by idata.entry_date, ind.index_symbol))/lag(ROUND(idata.adj_close,2)) over (order by idata.entry_date, ind.index_symbol))*100,2) as percent_change FROM indices ind JOIN indices_data_temp idata ON ind.id = idata.index_id WHERE idata.index_symbol = %s ORDER BY ind.index_symbol,idata.entry_date DESC;""" db_cursor.execute(query,(row[0],)) indices_data = db_cursor.fetchall() row_count = db_cursor.rowcount if row_count > 0: indices_data_list.append(indices_data[0]) indices_map_data = pd.DataFrame(indices_data_list,columns=columns) indices_map_data['adj_close'] = indices_map_data['adj_close'].astype(float) indices_map_data['adj_close_change'] = indices_map_data['adj_close_change'].astype(float) indices_map_data['percent_change'] = indices_map_data['percent_change'].astype(float) indices_map_data.sort_values(by=['entry_date','country'], inplace=True, ascending=False) query = "drop table indices_data_temp;" db_cursor.execute(query) return list(indices_map_data.itertuples(index=False))
def set_user_session_data(self, user_id, session_key, session_data): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: # delete existing session query = """DELETE FROM user_session_data WHERE user_id = %s and session_key = %s""" db_cursor.execute(query, (user_id, session_key)) result = db_cursor.rowcount # insert sessin data if session_data != "": query = """ INSERT INTO user_session_data (user_id,session_key,session_data) VALUES (%s,%s,%s)""" record_to_insert = (user_id, session_key, session_data) db_cursor.execute(query, record_to_insert) result = db_cursor.rowcount return result except (Exception, psycopg2.Error) as error: raise UserNotFoundError(error) finally: db_conn.commit() db_conn.close()
def add_website_url(cls, website_url, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: # check if url already exists query = """SELECT id FROM user_monitor_newssite WHERE website_url = %s and user_id=%s""" db_cursor.execute(query, (website_url, user_id)) record = db_cursor.fetchone() result = db_cursor.rowcount if result > 0: query = """UPDATE user_monitor_newssite SET is_active = True WHERE website_url = %s and user_id=%s""" db_cursor.execute(query, (website_url, user_id)) result = db_cursor.rowcount else: query = """ INSERT INTO user_monitor_newssite (user_id,website_url) VALUES (%s,%s) """ db_cursor.execute(query, (user_id, website_url)) result = db_cursor.rowcount return result except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.commit() db_conn.close()
def map_user_website(cls, selected_website_url, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: # delete existing mapping records query = """DELETE FROM user_newssite_account_mapping WHERE user_id = %s""" db_cursor.execute(query, (user_id, )) result = db_cursor.rowcount if len(selected_website_url) > 0: # insert map twitter account query = """ INSERT INTO user_newssite_account_mapping (user_id,news_account_id) VALUES (%s,%s) """ for website_id in selected_website_url: db_cursor.execute(query, (user_id, website_id)) result = db_cursor.rowcount return result except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.commit() db_conn.close()
def get_twitter_account(cls, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """ SELECT ta.id, screen_id, screen_name, country,(CASE WHEN twitter_account_id IS NOT NULL THEN True ELSE False END) "twitter_account_id" FROM twitter_account ta LEFT JOIN user_twitter_account_mapping tam ON ta.id = tam.twitter_account_id WHERE ta.is_active = True AND tam.user_id=%s UNION SELECT ta.id, screen_id, screen_name, country,False "twitter_account_id" FROM twitter_account ta WHERE ta.id NOT IN(SELECT DISTINCT ta.id FROM twitter_account ta LEFT JOIN user_twitter_account_mapping tam ON ta.id = tam.twitter_account_id WHERE ta.is_active = True AND tam.user_id=%s) ORDER BY twitter_account_id DESC""" db_cursor.execute(query, (user_id, user_id)) twiter_records = db_cursor.fetchall() return twiter_records except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def get_website_urls(cls, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """ SELECT DISTINCT ta.id, website_category,website_link, (CASE WHEN news_account_id IS NOT NULL THEN True ELSE False END) "news_account_id" FROM website_configuration ta LEFT JOIN user_newssite_account_mapping tam ON ta.id = tam.news_account_id WHERE ta.is_active = True AND tam.user_id=%s UNION SELECT ta.id, website_category, website_link, False "news_account_id" FROM website_configuration ta WHERE ta.id NOT IN(SELECT DISTINCT ta.id FROM website_configuration ta LEFT JOIN user_newssite_account_mapping tam ON ta.id = tam.news_account_id WHERE ta.is_active = True AND tam.user_id=%s) ORDER BY news_account_id DESC""" db_cursor.execute(query, (user_id, user_id)) news_records = db_cursor.fetchall() return news_records except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def verify_user(cls, email, password): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """SELECT password, first_name, last_name, email,id FROM users WHERE is_active = True and email = %s""" db_cursor.execute(query, (email, )) record = db_cursor.fetchone() result = db_cursor.rowcount if result > 0: # return True if check_password_hash(record[0], password): return record else: return False else: return False except (Exception, psycopg2.Error) as error: raise UserNotFoundError(error) finally: db_conn.close()
def get_news_article_count(cls, sentiment_type_id, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: if int(sentiment_type_id) == 2: query = """ SELECT count(*) FROM news_feeds_dump ndump JOIN news_feeds_sentiment nsentiment ON ndump.id = nsentiment.news_id WHERE sentiment_for in('sub_header') AND ndump.website_link in ( select DISTINCT website_link from website_configuration wconfig JOIN user_newssite_account_mapping uconfigmap ON wconfig.id = uconfigmap.news_account_id WHERE user_id=%s) """ db_cursor.execute(query, (user_id, )) records_list = db_cursor.fetchall() if records_list[0][0] <= 0: query = """ SELECT count(*) FROM news_feeds_dump ndump JOIN news_feeds_sentiment nsentiment ON ndump.id = nsentiment.news_id WHERE sentiment_for in('header') """ db_cursor.execute(query) records_list = db_cursor.fetchall() return records_list elif int(sentiment_type_id) == 0 or int(sentiment_type_id) == 1: query = """ SELECT count(*) FROM news_feeds_dump ndump JOIN news_feeds_sentiment nsentiment ON ndump.id = nsentiment.news_id WHERE sentiment_for in('sub_header') AND nsentiment.nltk_classify = %s AND ndump.website_link in ( select DISTINCT website_link from website_configuration wconfig JOIN user_newssite_account_mapping uconfigmap ON wconfig.id = uconfigmap.news_account_id WHERE user_id=%s) """ db_cursor.execute(query, (sentiment_type_id, user_id)) records_list = db_cursor.fetchall() if records_list[0][0] <= 1: query = """ SELECT count(*) FROM news_feeds_dump ndump JOIN news_feeds_sentiment nsentiment ON ndump.id = nsentiment.news_id WHERE sentiment_for in('sub_header') AND nsentiment.nltk_classify = %s """ db_cursor.execute(query, (sentiment_type_id, )) records_list = db_cursor.fetchall() return records_list except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def get_twitter_plot_data(cls, sentiment_type): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() query = """ SELECT TO_DATE(TO_CHAR(tweet_date :: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') "record_date",COUNT(tsentiment.nltk_classify) FROM twitter_data_dump tdump JOIN twitter_sentiment tsentiment ON tdump.id = tsentiment.tweet_id WHERE TO_DATE(TO_CHAR(tweet_date :: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') >= '2020-03-01' AND tsentiment.nltk_classify = %s AND array_length(regexp_split_to_array(tdump.tweet_message, '\s'),1) > 10 AND tdump.screen_id in (SELECT DISTINCT screen_id FROM twitter_account WHERE is_active = true) GROUP BY record_date ORDER BY record_date ASC """ db_cursor.execute(query, (sentiment_type, )) records_list = db_cursor.fetchall() return records_list
def get_latest_twitter_article(cls, last_date, sentiment_type_id, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: if int(sentiment_type_id) == 2: query = """ SELECT td.id,td.screen_id,td.tweet_date,td.tweet_message,ts.nltk_classify as "classifier" FROM twitter_data_dump td JOIN twitter_sentiment ts ON td.id = ts.tweet_id WHERE td.tweet_date > %s AND td.screen_id in (SELECT DISTINCT screen_id FROM twitter_account ta LEFT JOIN user_twitter_account_mapping tam ON ta.id = tam.twitter_account_id WHERE ta.is_active = True AND tam.user_id=%s) AND array_length(regexp_split_to_array(td.tweet_message, '\s'),1) > 10 AND RIGHT(td.tweet_message, 1) != '?' ORDER BY td.tweet_date DESC """ db_cursor.execute(query, (last_date, user_id)) records_list = db_cursor.fetchall() return records_list elif int(sentiment_type_id) == 0 or int(sentiment_type_id) == 1: query = """ SELECT td.id,td.screen_id,td.tweet_date,td.tweet_message,ts.nltk_classify as "classifier" FROM twitter_data_dump td JOIN twitter_sentiment ts ON td.id = ts.tweet_id WHERE td.tweet_date > %s AND nltk_classify = %s AND td.screen_id in (SELECT DISTINCT screen_id FROM twitter_account ta LEFT JOIN user_twitter_account_mapping tam ON ta.id = tam.twitter_account_id WHERE ta.is_active = True AND tam.user_id=%s) AND array_length(regexp_split_to_array(td.tweet_message, '\s'),1) > 10 AND RIGHT(td.tweet_message, 1) != '?' ORDER BY td.tweet_date DESC""" db_cursor.execute(query, (last_date, sentiment_type_id, user_id)) records_list = db_cursor.fetchall() return records_list except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def get_latest_news_article(cls, last_date, sentiment_type_id, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: if int(sentiment_type_id) == 2: query = """ SELECT ndump.id,ndump.header,case when coalesce(ndump.sub_header, '') != '' AND ndump.sub_header != 'NaN' THEN ndump.sub_header ELSE 'no_value' END "sub_header", ndump.website, CASE WHEN ndump.news_link LIKE 'http' THEN ndump.news_link ELSE CONCAT(ndump.website_link,ndump.news_link) END AS "web_ink", nsentiment.nltk_classify as "classifier",TO_CHAR(nsentiment.entry_time :: DATE, 'yyyy-mm-dd') AS "entry_time" FROM news_feeds_dump ndump JOIN news_feeds_sentiment nsentiment ON ndump.id = nsentiment.news_id WHERE sentiment_for in('sub_header') AND nsentiment.entry_time > %s AND ndump.website_link in (select DISTINCT website_link from website_configuration wconfig JOIN user_newssite_account_mapping uconfigmap ON wconfig.id = uconfigmap.news_account_id WHERE user_id=%s) ORDER BY nsentiment.entry_time DESC """ db_cursor.execute(query, (last_date, user_id)) records_list = db_cursor.fetchall() return records_list elif int(sentiment_type_id) == 0 or int(sentiment_type_id) == 1: query = """ SELECT ndump.id,ndump.header,case when coalesce(ndump.sub_header, '') != '' AND ndump.sub_header != 'NaN' THEN ndump.sub_header ELSE 'no_value' END "sub_header" ,ndump.website, CASE WHEN ndump.news_link LIKE 'http' THEN ndump.news_link ELSE CONCAT(ndump.website_link,ndump.news_link) END AS "web_ink", nsentiment.nltk_classify as "classifier",TO_CHAR(nsentiment.entry_time :: DATE, 'yyyy-mm-dd') AS "entry_time" FROM news_feeds_dump ndump JOIN news_feeds_sentiment nsentiment ON ndump.id = nsentiment.news_id WHERE sentiment_for in('sub_header') AND nsentiment.entry_time > %s AND nltk_classify = %s AND ndump.website_link in (select DISTINCT website_link from website_configuration wconfig JOIN user_newssite_account_mapping uconfigmap ON wconfig.id = uconfigmap.news_account_id WHERE user_id=%s) ORDER BY nsentiment.entry_time DESC """ db_cursor.execute(query, (last_date, sentiment_type_id, user_id)) records_list = db_cursor.fetchall() return records_list except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def get_autocomplete_records(cls): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """ select CONCAT(symbol,'_', name) AS "search" from indices_componets WHERE is_active = True """ db_cursor.execute(query) records = db_cursor.fetchall() return records except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def get_monitor_newssite(cls, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """ select id, website_url from user_monitor_newssite where user_id=%s""" db_cursor.execute(query, (user_id, )) news_records = db_cursor.fetchall() return news_records except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def get_user_session_data(cls, user_id, session_key): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """SELECT session_data FROM user_session_data WHERE user_id = %s and session_key = %s""" db_cursor.execute(query, (user_id, session_key)) record = db_cursor.fetchone() result = db_cursor.rowcount return record except (Exception, psycopg2.Error) as error: raise UserNotFoundError(error) finally: db_conn.close()
def get_user_details(cls, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """SELECT first_name, last_name, email,id FROM users WHERE id = %s""" db_cursor.execute(query, (user_id, )) record = db_cursor.fetchone() result = db_cursor.rowcount return record except (Exception, psycopg2.Error) as error: raise UserNotFoundError(error) finally: db_conn.close()
def get_map_user_companies_list(cls, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """ select t2.id, t1.exchange,t1.country,t1.symbol,t1.name,t2.search_criteria from indices_componets t1 JOIN user_companies_watch_list t2 ON t1.id = t2.indices_componets_id AND user_id=%s """ db_cursor.execute(query, (user_id, )) records = db_cursor.fetchall() return records except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def save_user_message(self): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """ INSERT INTO contact_us (first_name, last_name, email, message) VALUES (%s,%s,%s,%s)""" record_to_insert = (self.first_name, self.last_name, self.email, self.message) db_cursor.execute(query, record_to_insert) result = db_cursor.rowcount return result except (Exception, psycopg2.Error) as error: raise UserNotFoundError(error) finally: db_conn.commit() db_conn.close()
def get_indices(cls): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """ SELECT DISTINCT ind.index_symbol FROM indices ind JOIN indices_data idata ON ind.id = idata.index_id ORDER BY index_symbol ASC""" db_cursor.execute(query) records_list = db_cursor.fetchall() # df_indices_data = pd.DataFrame(records_list,columns=["index_symbol"]) return records_list except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def add_user_companies_watch_list(cls, search_company, search_criteria, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """ select id from indices_componets WHERE is_active = True and CONCAT(symbol,'_', name) = %s """ db_cursor.execute(query, (search_company, )) search_record = db_cursor.fetchone() result = db_cursor.rowcount if result > 0: indices_componets_id = search_record[0] # delete existing mapping records query = """DELETE FROM user_companies_watch_list WHERE user_id = %s and indices_componets_id=%s""" db_cursor.execute(query, (user_id, indices_componets_id)) result = db_cursor.rowcount # insert map twitter account query = """ INSERT INTO user_companies_watch_list (user_id,indices_componets_id,search_criteria) VALUES (%s,%s,%s) """ db_cursor.execute( query, (user_id, indices_componets_id, search_criteria)) result = db_cursor.rowcount return result except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.commit() db_conn.close()
def update_user_companies_watch_list(cls, selected_list): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: # check if url already exists query = """DELETE FROM user_companies_watch_list WHERE id = %s""" for site_id in selected_list: db_cursor.execute(query, (site_id, )) result = db_cursor.rowcount return result except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.commit() db_conn.close()
def create_account(self): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: password_hash = generate_password_hash(self.password, "sha256") query = """ INSERT INTO users (first_name, last_name, email, password) VALUES (%s,%s,%s,%s)""" record_to_insert = (self.first_name, self.last_name, self.email, password_hash) db_cursor.execute(query, record_to_insert) result = db_cursor.rowcount return result except (Exception, psycopg2.Error) as error: raise UserNotFoundError(error) finally: db_conn.commit() db_conn.close()
def get_news_plot_data(cls, sentiment_type): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() query = """ SELECT CASE WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('hours' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('ist' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('ago' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' THEN TO_DATE(TO_CHAR(timestamp:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') ELSE TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') END AS "record_date",COUNT(tsentiment.nltk_classify) FROM news_feeds_dump tdump JOIN news_feeds_sentiment tsentiment ON tdump.id = tsentiment.news_id WHERE TO_DATE(TO_CHAR(tdump.entry_time :: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') >= '2020-03-01' AND tsentiment.nltk_classify = %s AND sentiment_for='sub_header' GROUP BY record_date ORDER BY record_date ASC """ db_cursor.execute(query, (sentiment_type, )) records_list = db_cursor.fetchall() return records_list
def get_indices_plot_data(cls, user_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: query = """ CREATE TEMPORARY TABLE temp_table( entry_date DATE ); insert into temp_table select entry_date from indices_data where index_id=1 intersect select entry_date from indices_data where index_id=2 intersect select entry_date from indices_data where index_id=3 intersect select entry_date from indices_data where index_id=4 intersect select entry_date from indices_data where index_id=5 intersect select entry_date from indices_data where index_id=6 intersect select entry_date from indices_data where index_id=7 intersect select entry_date from indices_data where index_id=9 intersect select entry_date from indices_data where index_id=13 intersect select entry_date from indices_data where index_id=14; SELECT ind.index_symbol,TO_CHAR(idata.entry_date :: DATE, 'dd/mm/yyyy') AS "entry_date", ROUND(idata.adj_close,2) AS "adj_close" FROM indices ind JOIN indices_data idata ON ind.id = idata.index_id WHERE idata.entry_date IN (SELECT DISTINCT entry_date FROM temp_table) ORDER BY index_symbol, idata.entry_date ASC """ db_cursor.execute(query) records_list = db_cursor.fetchall() df_indices_data = pd.DataFrame( records_list, columns=["index_symbol", "entry_date", "adj_close"]) df_indices_data['adj_close'] = df_indices_data['adj_close'].astype( float) # get user session data user_session_list = User.get_user_session_data( user_id, "selected_indices") if user_session_list is not None and len(user_session_list) > 0: selected_indices_list = user_session_list[0] selected_indices_list = selected_indices_list.split("_") df_indices_data = df_indices_data.loc[df_indices_data[ 'index_symbol'].isin(selected_indices_list)] return df_indices_data except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def get_indices_vs_sentiment_plot_data(cls): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() df_data_positive = pd.DataFrame(columns=['Record_Date','News_Positive_Sentiment_Count']) df_data_negative = pd.DataFrame(columns=['Record_Date','News_Negative_Sentiment_Count']) df_data_indices = pd.DataFrame(columns=['Record_Date','Indices_Sentiment']) query=""" SELECT CASE WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('hours' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('ist' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('ago' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' THEN TO_DATE(TO_CHAR(timestamp:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') ELSE TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') END AS "record_date",COUNT(tsentiment.nltk_classify) FROM news_feeds_dump tdump JOIN news_feeds_sentiment tsentiment ON tdump.id = tsentiment.news_id WHERE TO_DATE(TO_CHAR(tdump.entry_time :: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') >= '2020-03-01' AND tsentiment.nltk_classify = 1 AND sentiment_for='sub_header' GROUP BY record_date ORDER BY record_date ASC """ # AND tdump.website in ('businessinsider','economic_times') db_cursor.execute(query) positive_sentiment_data = db_cursor.fetchall() data_list = [] for row in positive_sentiment_data: df_data_positive = df_data_positive.append({'Record_Date': row[0], 'News_Positive_Sentiment_Count': row[1]},ignore_index=True) df_data_positive['News_Positive_Sentiment_Count'] = df_data_positive['News_Positive_Sentiment_Count'].astype(int) query=""" SELECT CASE WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('hours' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('ist' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('ago' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' THEN TO_DATE(TO_CHAR(timestamp:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') ELSE TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') END AS "record_date",COUNT(tsentiment.nltk_classify) FROM news_feeds_dump tdump JOIN news_feeds_sentiment tsentiment ON tdump.id = tsentiment.news_id WHERE TO_DATE(TO_CHAR(tdump.entry_time :: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') >= '2020-03-01' AND tsentiment.nltk_classify = 0 AND sentiment_for='sub_header' GROUP BY record_date ORDER BY record_date ASC """ db_cursor.execute(query) negative_sentiment_data = db_cursor.fetchall() data_list = [] for row in negative_sentiment_data: df_data_negative = df_data_negative.append({'Record_Date': row[0], 'News_Negative_Sentiment_Count': row[1]},ignore_index=True) df_data_negative['News_Negative_Sentiment_Count'] = df_data_negative['News_Negative_Sentiment_Count'].astype(int) # merge sentiment data sentiment_data = pd.merge(df_data_positive,df_data_negative[['Record_Date','News_Negative_Sentiment_Count']], on='Record_Date') sentiment_data['Day_Sentiment'] = np.where(sentiment_data['News_Positive_Sentiment_Count'] >= sentiment_data['News_Negative_Sentiment_Count'], 1, 0) # indices data query = """ SELECT DISTINCT ind.index_symbol FROM indices ind JOIN indices_data idata ON ind.id = idata.index_id WHERE ind.id in (1,2) ORDER BY index_symbol DESC""" db_cursor.execute(query) unique_indices_list = db_cursor.fetchall() columns = ['entry_date','percent_change'] query = """ select DISTINCT entry_date from indices_data order by entry_date desc; """ db_cursor.execute(query) unique_indices_date = db_cursor.fetchall() for row_date in unique_indices_date: indices_data_list = [] for row in unique_indices_list: query = """ CREATE TEMPORARY TABLE indices_data_temp ( index_id INT, index_name VARCHAR(100) NOT NULL, index_symbol VARCHAR(50) NOT NULL, entry_date DATE, lat VARCHAR(30) NOT NULL, lon VARCHAR(30) NOT NULL, open DECIMAL, adj_close DECIMAL ); INSERT INTO indices_data_temp SELECT DISTINCT idata.index_id,ind.country,ind.index_symbol,idata.entry_date,ind.lat,ind.lon,ROUND(idata.open,2) open,ROUND(idata.adj_close,2) adj_close FROM indices ind JOIN indices_data idata ON ind.id = idata.index_id; select idata.entry_date, round(((ROUND(idata.adj_close,2) - lag(ROUND(idata.adj_close,2)) over (order by idata.entry_date, ind.index_symbol))/lag(ROUND(idata.adj_close,2)) over (order by idata.entry_date, ind.index_symbol))*100,2) as percent_change FROM indices ind JOIN indices_data_temp idata ON ind.id = idata.index_id WHERE idata.index_symbol = %s AND idata.entry_date <= %s ORDER BY ind.index_symbol,idata.entry_date DESC;""" db_cursor.execute(query,(row[0],row_date[0])) indices_data = db_cursor.fetchall() row_count = db_cursor.rowcount if row_count > 0: indices_data_list.append(indices_data[0]) indices_map_data = pd.DataFrame(indices_data_list,columns=columns) indices_map_data['percent_change'] = indices_map_data['percent_change'].astype(float) indices_map_data.sort_values(by=['entry_date'], inplace=True, ascending=False) query = "drop table indices_data_temp;" db_cursor.execute(query) # price_change_sentiment = np.where(indices_map_data[["percent_change"]].mean() > 0, 1, 0) seriesObj = indices_map_data.apply(lambda x: True if x['percent_change'] >= 0 else False , axis=1) numOfRows = len(seriesObj[seriesObj == True].index) numOfRows1 = len(seriesObj[seriesObj == False].index) price_change_sentiment = 0 if numOfRows >= numOfRows1: price_change_sentiment = 1 indices_map_data.sort_values(by=['entry_date'], inplace=True, ascending=False) df_data = indices_map_data[indices_map_data['entry_date'] == row_date[0]] if df_data.empty == False: df_data_indices = df_data_indices.append({'Record_Date': df_data['entry_date'].iloc[0], 'Indices_Sentiment': price_change_sentiment},ignore_index=True) # df_data_indices = df_data_indices.append({'Record_Date': indices_map_data['entry_date'][0], # 'Indices_Sentiment': price_change_sentiment[0]},ignore_index=True) indices_vs_sentiment_data = pd.merge(sentiment_data,df_data_indices[['Record_Date','Indices_Sentiment']], on='Record_Date') return indices_vs_sentiment_data
def get_news_bar_plot_data(cls, sentiment_type_id, user_id, type): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: if int(sentiment_type_id) == 2: query = """ SELECT 'Sentiment' AS "entry_time",COUNT(nsentiment.nltk_classify) FROM news_feeds_dump ndump JOIN news_feeds_sentiment nsentiment ON ndump.id = nsentiment.news_id WHERE sentiment_for in('sub_header') AND nsentiment.nltk_classify = %s AND ndump.website_link in (select DISTINCT website_link from website_configuration wconfig JOIN user_newssite_account_mapping uconfigmap ON wconfig.id = uconfigmap.news_account_id WHERE user_id = %s) GROUP BY nsentiment.entry_time ORDER BY nsentiment.entry_time ASC """ db_cursor.execute(query, (type, user_id)) result = db_cursor.rowcount if result <= 0: query = """ SELECT 'Sentiment' AS "entry_time",COUNT(nsentiment.nltk_classify) FROM news_feeds_dump ndump JOIN news_feeds_sentiment nsentiment ON ndump.id = nsentiment.news_id WHERE sentiment_for in('header') AND nsentiment.nltk_classify = %s GROUP BY nsentiment.entry_time ORDER BY nsentiment.entry_time ASC""" db_cursor.execute(query, (type, )) records_list = db_cursor.fetchall() return records_list elif int(sentiment_type_id) == 0 or int(sentiment_type_id) == 1: query = """ SELECT 'Sentiment' AS "entry_time",COUNT(nsentiment.nltk_classify) FROM news_feeds_dump ndump JOIN news_feeds_sentiment nsentiment ON ndump.id = nsentiment.news_id WHERE sentiment_for in('sub_header') AND nltk_classify = %s AND nsentiment.nltk_classify = %s AND ndump.website_link in (select DISTINCT website_link from website_configuration wconfig JOIN user_newssite_account_mapping uconfigmap ON wconfig.id = uconfigmap.news_account_id WHERE user_id = %s) GROUP BY nsentiment.entry_time ORDER BY nsentiment.entry_time ASC """ db_cursor.execute(query, (sentiment_type_id, type, user_id)) result = db_cursor.rowcount if result <= 0: query = """ SELECT 'Sentiment' AS "entry_time",COUNT(nsentiment.nltk_classify) FROM news_feeds_dump ndump JOIN news_feeds_sentiment nsentiment ON ndump.id = nsentiment.news_id WHERE sentiment_for in('sub_header') AND nltk_classify = %s AND nsentiment.nltk_classify = %s GROUP BY nsentiment.entry_time ORDER BY nsentiment.entry_time ASC """ db_cursor.execute(query, (sentiment_type_id, type)) records_list = db_cursor.fetchall() return records_list except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def add_twitter_account(cls, screen_id): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: CONSUMER_KEY = "9NfQodgml2Io3uUFslehvzZXd" CONSUMER_SECRET = "cF1dN1SNK4X4VShvzBPQSb872opJeHLI2oQ8W8fGaBrHZ5KKsb" ACCESS_TOKEN = "1143151270913552386-nTV6DXH8ri21Kdzbmjqbv167RfMS1V" ACCESS_TOKEN_SECRET = "l2DraSWLyqr2YNq4aK2dbwmVEmP3SLC205pTodNVDU5SA" # Authenticate to Twitter auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) # get user details twitter_user_details = api.get_user(screen_name=screen_id) tweet_list = [] screen_id = twitter_user_details.screen_name screen_name = twitter_user_details.name profile = twitter_user_details.description country = twitter_user_details.location details = twitter_user_details.description tweet_list.append( {screen_id, screen_name, profile, country, details}) # check if account already exists query = """SELECT id, screen_id, screen_name, country FROM twitter_account WHERE screen_id = %s""" db_cursor.execute(query, (screen_id, )) record = db_cursor.fetchone() result = db_cursor.rowcount if result > 0: query = """UPDATE twitter_account SET is_active = True WHERE screen_id = %s""" db_cursor.execute(query, (screen_id, )) result = db_cursor.rowcount else: query = """ INSERT INTO twitter_account (screen_id,screen_name,profile,country,details) VALUES (%s,%s,%s,%s,%s) """ db_cursor.execute( query, (screen_id, screen_name, profile, country, details)) result = db_cursor.rowcount return result except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.commit() db_conn.close()
def get_twitter_article(cls, limit, offset, sentiment_type_id, user_id): # psycopg2.connect(host=t_host, port=t_port, dbname=t_dbname, user=t_user, password=t_pw) db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: if int(sentiment_type_id) == 2: query = """ SELECT td.id,td.screen_id,td.tweet_date,td.tweet_message,ts.nltk_classify as "classifier" FROM twitter_data_dump td JOIN twitter_sentiment ts ON td.id = ts.tweet_id WHERE td.screen_id in ( SELECT DISTINCT screen_id FROM twitter_account ta LEFT JOIN user_twitter_account_mapping tam ON ta.id = tam.twitter_account_id WHERE ta.is_active = True AND tam.user_id=%s) AND array_length(regexp_split_to_array(td.tweet_message, '\s'),1) > 10 AND RIGHT(td.tweet_message, 1) != '?' ORDER BY td.tweet_date DESC LIMIT %s OFFSET %s ROWS""" db_cursor.execute(query, (user_id, limit, offset)) result = db_cursor.rowcount if result <= 0: query = """ SELECT td.id,td.screen_id,td.tweet_date,td.tweet_message,ts.nltk_classify as "classifier" FROM twitter_data_dump td JOIN twitter_sentiment ts ON td.id = ts.tweet_id WHERE array_length(regexp_split_to_array(td.tweet_message, '\s'),1) > 10 AND RIGHT(td.tweet_message, 1) != '?' ORDER BY td.tweet_date DESC LIMIT %s OFFSET %s ROWS""" db_cursor.execute(query, (limit, offset)) records_list = db_cursor.fetchall() return records_list elif int(sentiment_type_id) == 0 or int(sentiment_type_id) == 1: query = """ SELECT td.id,td.screen_id,td.tweet_date,td.tweet_message,ts.nltk_classify as "classifier" FROM twitter_data_dump td JOIN twitter_sentiment ts ON td.id = ts.tweet_id WHERE nltk_classify = %s AND td.screen_id in ( SELECT DISTINCT screen_id FROM twitter_account ta LEFT JOIN user_twitter_account_mapping tam ON ta.id = tam.twitter_account_id WHERE ta.is_active = True AND tam.user_id=%s) AND array_length(regexp_split_to_array(td.tweet_message, '\s'),1) > 10 AND RIGHT(td.tweet_message, 1) != '?' ORDER BY td.tweet_date DESC LIMIT %s OFFSET %s ROWS""" db_cursor.execute(query, (sentiment_type_id, user_id, limit, offset)) result = db_cursor.rowcount if result <= 0: query = """ SELECT td.id,td.screen_id,td.tweet_date,td.tweet_message,ts.nltk_classify as "classifier" FROM twitter_data_dump td JOIN twitter_sentiment ts ON td.id = ts.tweet_id WHERE nltk_classify = %s AND array_length(regexp_split_to_array(td.tweet_message, '\s'),1) > 10 AND RIGHT(td.tweet_message, 1) != '?' ORDER BY td.tweet_date DESC LIMIT %s OFFSET %s ROWS""" db_cursor.execute(query, (sentiment_type_id, limit, offset)) records_list = db_cursor.fetchall() return records_list except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()
def get_indices_vs_sentiment_twitter_plot_data(cls): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() df_data_positive = pd.DataFrame( columns=['Record_Date', 'News_Positive_Sentiment_Count']) df_data_negative = pd.DataFrame( columns=['Record_Date', 'News_Negative_Sentiment_Count']) df_data_indices = pd.DataFrame( columns=['Record_Date', 'Indices_Sentiment']) query = """ SELECT TO_DATE(TO_CHAR(tweet_date :: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') "record_date",COUNT(tsentiment.nltk_classify) FROM twitter_data_dump tdump JOIN twitter_sentiment tsentiment ON tdump.id = tsentiment.tweet_id WHERE TO_DATE(TO_CHAR(tweet_date :: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') >= '2020-03-01' AND tsentiment.nltk_classify = 1 AND array_length(regexp_split_to_array(tdump.tweet_message, '\s'),1) > 10 AND tdump.screen_id in (SELECT DISTINCT screen_id FROM twitter_account WHERE is_active = true and screen_id in('Investingcom','business','markets','FinancialTimes','stevenmnuchin1','Lagarde', 'federalreserve','bankofcanada','bankofengland','RBI','ecb','banquedefrance','tsx_tsxv','bse_sensex', 'NSEIndia','S&P 500','Nasdaq','DowJones','NikkeiMarket','GTAI_com','CAC_40_Trading')) GROUP BY record_date ORDER BY record_date ASC """ # AND tdump.website in ('businessinsider','economic_times') db_cursor.execute(query) positive_sentiment_data = db_cursor.fetchall() data_list = [] for row in positive_sentiment_data: df_data_positive = df_data_positive.append( { 'Record_Date': row[0], 'News_Positive_Sentiment_Count': row[1] }, ignore_index=True) df_data_positive['News_Positive_Sentiment_Count'] = df_data_positive[ 'News_Positive_Sentiment_Count'].astype(int) query = """ SELECT TO_DATE(TO_CHAR(tweet_date :: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') "record_date",COUNT(tsentiment.nltk_classify) FROM twitter_data_dump tdump JOIN twitter_sentiment tsentiment ON tdump.id = tsentiment.tweet_id WHERE TO_DATE(TO_CHAR(tweet_date :: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') >= '2020-03-01' AND tsentiment.nltk_classify = 0 AND array_length(regexp_split_to_array(tdump.tweet_message, '\s'),1) > 10 AND tdump.screen_id in (SELECT DISTINCT screen_id FROM twitter_account WHERE is_active = true) GROUP BY record_date ORDER BY record_date ASC """ db_cursor.execute(query) negative_sentiment_data = db_cursor.fetchall() data_list = [] for row in negative_sentiment_data: df_data_negative = df_data_negative.append( { 'Record_Date': row[0], 'News_Negative_Sentiment_Count': row[1] }, ignore_index=True) df_data_negative['News_Negative_Sentiment_Count'] = df_data_negative[ 'News_Negative_Sentiment_Count'].astype(int) # merge sentiment data sentiment_data = pd.merge( df_data_positive, df_data_negative[['Record_Date', 'News_Negative_Sentiment_Count']], on='Record_Date') sentiment_data['Day_Sentiment'] = np.where( sentiment_data['News_Positive_Sentiment_Count'] >= sentiment_data['News_Negative_Sentiment_Count'], 1, 0) # indices data query = """ SELECT DISTINCT ind.index_symbol FROM indices ind JOIN indices_data idata ON ind.id = idata.index_id ORDER BY index_symbol DESC""" db_cursor.execute(query) unique_indices_list = db_cursor.fetchall() columns = ['entry_date', 'percent_change'] query = """ select DISTINCT entry_date from indices_data order by entry_date desc; """ db_cursor.execute(query) unique_indices_date = db_cursor.fetchall() for row_date in unique_indices_date: indices_data_list = [] for row in unique_indices_list: query = """ CREATE TEMPORARY TABLE indices_data_temp ( index_id INT, index_name VARCHAR(100) NOT NULL, index_symbol VARCHAR(50) NOT NULL, entry_date DATE, lat VARCHAR(30) NOT NULL, lon VARCHAR(30) NOT NULL, open DECIMAL, adj_close DECIMAL ); INSERT INTO indices_data_temp SELECT DISTINCT idata.index_id,ind.country,ind.index_symbol,idata.entry_date,ind.lat,ind.lon,ROUND(idata.open,2) open,ROUND(idata.adj_close,2) adj_close FROM indices ind JOIN indices_data idata ON ind.id = idata.index_id; select idata.entry_date, round(((ROUND(idata.adj_close,2) - lag(ROUND(idata.adj_close,2)) over (order by idata.entry_date, ind.index_symbol))/lag(ROUND(idata.adj_close,2)) over (order by idata.entry_date, ind.index_symbol))*100,2) as percent_change FROM indices ind JOIN indices_data_temp idata ON ind.id = idata.index_id WHERE idata.index_symbol = %s AND idata.entry_date <= %s ORDER BY ind.index_symbol,idata.entry_date DESC;""" db_cursor.execute(query, (row[0], row_date[0])) indices_data = db_cursor.fetchall() row_count = db_cursor.rowcount if row_count > 0: indices_data_list.append(indices_data[0]) indices_map_data = pd.DataFrame(indices_data_list, columns=columns) indices_map_data['percent_change'] = indices_map_data[ 'percent_change'].astype(float) indices_map_data.sort_values(by=['entry_date'], inplace=True, ascending=False) query = "drop table indices_data_temp;" db_cursor.execute(query) # price_change_sentiment = np.where(indices_map_data[["percent_change"]].mean() > 0, 1, 0) seriesObj = indices_map_data.apply( lambda x: True if x['percent_change'] >= 0 else False, axis=1) numOfRows = len(seriesObj[seriesObj == True].index) numOfRows1 = len(seriesObj[seriesObj == False].index) price_change_sentiment = 0 if numOfRows >= numOfRows1: price_change_sentiment = 1 indices_map_data.sort_values(by=['entry_date'], inplace=True, ascending=False) df_data = indices_map_data[indices_map_data['entry_date'] == row_date[0]] df_data_indices = df_data_indices.append( { 'Record_Date': df_data['entry_date'].iloc[0], 'Indices_Sentiment': price_change_sentiment }, ignore_index=True) # df_data_indices = df_data_indices.append({'Record_Date': indices_map_data['entry_date'][0], # 'Indices_Sentiment': price_change_sentiment[0]},ignore_index=True) indices_vs_sentiment_data = pd.merge( sentiment_data, df_data_indices[['Record_Date', 'Indices_Sentiment']], on='Record_Date') return indices_vs_sentiment_data
def get_indices_vs_sentiment_plot_data_v1(cls): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() df_data = pd.DataFrame(columns=[ 'Record_Date', 'nltk_classify', 'spacy_count_vectorizer_classify', 'spacy_tfidf_vectorizer_classify' ]) # df_data_positive = pd.DataFrame(columns=['Record_Date','News_Positive_Sentiment_Count']) # df_data_negative = pd.DataFrame(columns=['Record_Date','News_Negative_Sentiment_Count']) df_data_indices = pd.DataFrame( columns=['Record_Date', 'Indices_Sentiment']) query = """ SELECT CASE WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('hours' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('ist' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' AND position('ago' in LOWER(tdump.timestamp)) > 0 THEN TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') WHEN timestamp IS NOT NULL AND tdump.timestamp !='' THEN TO_DATE(TO_CHAR(timestamp:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') ELSE TO_DATE(TO_CHAR(tdump.entry_time:: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') END AS "record_date", tsentiment.nltk_classify,tsentiment.spacy_count_vectorizer_classify,tsentiment.spacy_tfidf_vectorizer_classify FROM news_feeds_dump tdump JOIN news_feeds_sentiment tsentiment ON tdump.id = tsentiment.news_id WHERE TO_DATE(TO_CHAR(tdump.entry_time :: DATE, 'dd/mm/yyyy'),'dd/mm/yyyy') >= '2020-03-01' """ db_cursor.execute(query) sentiment_data_list = db_cursor.fetchall() for row in sentiment_data_list: df_data = df_data.append( { 'Record_Date': row[0], 'nltk_classify': row[1], 'spacy_count_vectorizer_classify': row[2], 'spacy_tfidf_vectorizer_classify': row[3] }, ignore_index=True) df_data['nltk_classify'] = df_data['nltk_classify'].astype(int) df_data['spacy_count_vectorizer_classify'] = df_data[ 'spacy_count_vectorizer_classify'].astype(int) df_data['spacy_tfidf_vectorizer_classify'] = df_data[ 'spacy_tfidf_vectorizer_classify'].astype(int) df_data['Record_Date'] = pd.to_datetime(df_data['Record_Date']) df_data['majority_vote'] = df_data.mode(axis=1, numeric_only=True)[0] df_postive_sentiment_data = df_data.loc[df_data['majority_vote'] == 1] df_negative_sentiment_data = df_data.loc[df_data['majority_vote'] == 0] df_postive_group_data = df_postive_sentiment_data.groupby( 'Record_Date')['majority_vote'].value_counts().reset_index( name="News_Positive_Sentiment_Count") df_negative_group_data = df_negative_sentiment_data.groupby( 'Record_Date')['majority_vote'].value_counts().reset_index( name="News_Negative_Sentiment_Count") df_data_positive = df_postive_group_data[[ 'Record_Date', 'News_Positive_Sentiment_Count' ]] df_data_negative = df_negative_group_data[[ 'Record_Date', 'News_Negative_Sentiment_Count' ]] # merge sentiment data sentiment_data = pd.merge( df_data_positive, df_data_negative[['Record_Date', 'News_Negative_Sentiment_Count']], on='Record_Date') sentiment_data['Day_Sentiment'] = np.where( sentiment_data['News_Positive_Sentiment_Count'] >= sentiment_data['News_Negative_Sentiment_Count'], 1, 0) # indices data query = """ SELECT DISTINCT ind.index_symbol FROM indices ind JOIN indices_data idata ON ind.id = idata.index_id ORDER BY index_symbol DESC""" db_cursor.execute(query) unique_indices_list = db_cursor.fetchall() columns = ['entry_date', 'percent_change'] query = """ CREATE TEMPORARY TABLE temp_table( entry_date DATE ); insert into temp_table select entry_date from indices_data where index_id=1 intersect select entry_date from indices_data where index_id=2 intersect select entry_date from indices_data where index_id=3 intersect select entry_date from indices_data where index_id=4 intersect select entry_date from indices_data where index_id=5 intersect select entry_date from indices_data where index_id=6 intersect select entry_date from indices_data where index_id=7 intersect select entry_date from indices_data where index_id=9 intersect select entry_date from indices_data where index_id=13 intersect select entry_date from indices_data where index_id=14; select DISTINCT entry_date from temp_table order by entry_date desc; """ db_cursor.execute(query) unique_indices_date = db_cursor.fetchall() for row_date in unique_indices_date: indices_data_list = [] for row in unique_indices_list: query = """ CREATE TEMPORARY TABLE indices_data_temp ( index_id INT, index_name VARCHAR(100) NOT NULL, index_symbol VARCHAR(50) NOT NULL, entry_date DATE, lat VARCHAR(30) NOT NULL, lon VARCHAR(30) NOT NULL, open DECIMAL, adj_close DECIMAL ); INSERT INTO indices_data_temp SELECT DISTINCT idata.index_id,ind.country,ind.index_symbol,idata.entry_date,ind.lat,ind.lon,ROUND(idata.open,2) open,ROUND(idata.adj_close,2) adj_close FROM indices ind JOIN indices_data idata ON ind.id = idata.index_id; select idata.entry_date, round(((ROUND(idata.adj_close,2) - lag(ROUND(idata.adj_close,2)) over (order by idata.entry_date, ind.index_symbol))/lag(ROUND(idata.adj_close,2)) over (order by idata.entry_date, ind.index_symbol))*100,2) as percent_change FROM indices ind JOIN indices_data_temp idata ON ind.id = idata.index_id WHERE idata.index_symbol = %s AND idata.entry_date <= %s ORDER BY ind.index_symbol,idata.entry_date DESC;""" db_cursor.execute(query, (row[0], row_date[0])) indices_data = db_cursor.fetchall() row_count = db_cursor.rowcount if row_count > 0: indices_data_list.append(indices_data[0]) indices_map_data = pd.DataFrame(indices_data_list, columns=columns) indices_map_data['percent_change'] = indices_map_data[ 'percent_change'].astype(float) indices_map_data.sort_values(by=['entry_date'], inplace=True, ascending=False) query = "drop table indices_data_temp;" db_cursor.execute(query) price_change_sentiment = np.where( indices_map_data[["percent_change"]].mean() > 0, 1, 0) indices_map_data.sort_values(by=['entry_date'], inplace=True, ascending=False) df_data = indices_map_data[indices_map_data['entry_date'] == row_date[0]] df_data_indices = df_data_indices.append( { 'Record_Date': df_data['entry_date'].iloc[0], 'Indices_Sentiment': price_change_sentiment[0] }, ignore_index=True) indices_vs_sentiment_data = pd.merge( sentiment_data, df_data_indices[['Record_Date', 'Indices_Sentiment']], on='Record_Date') return indices_vs_sentiment_data
def get_twitter_bar_plot_data(cls, sentiment_type_id, user_id, type): db_conn = DBConnection.get_connection() db_cursor = db_conn.cursor() try: if int(sentiment_type_id) == 2: query = """ SELECT 'Sentiment' AS "entry_time",COUNT(ts.nltk_classify) FROM twitter_data_dump td JOIN twitter_sentiment ts ON td.id = ts.tweet_id WHERE td.screen_id in (SELECT DISTINCT screen_id FROM twitter_account ta LEFT JOIN user_twitter_account_mapping tam ON ta.id = tam.twitter_account_id WHERE ta.is_active = True AND ts.nltk_classify = %s AND tam.user_id=%s) AND array_length(regexp_split_to_array(td.tweet_message, '\s'),1) > 10 AND RIGHT(td.tweet_message, 1) != '?' GROUP BY ts.entry_time ORDER BY ts.entry_time ASC""" db_cursor.execute(query, (type, user_id)) result = db_cursor.rowcount if result <= 0: query = """ SELECT 'Sentiment' AS "entry_time",COUNT(ts.nltk_classify) FROM twitter_data_dump td JOIN twitter_sentiment ts ON td.id = ts.tweet_id WHERE ts.nltk_classify = %s AND array_length(regexp_split_to_array(td.tweet_message, '\s'),1) > 10 AND RIGHT(td.tweet_message, 1) != '?' GROUP BY ts.entry_time ORDER BY ts.entry_time ASC""" db_cursor.execute(query, (type, )) records_list = db_cursor.fetchall() return records_list elif int(sentiment_type_id) == 0 or int(sentiment_type_id) == 1: query = """ SELECT 'Sentiment' AS "entry_time",COUNT(ts.nltk_classify) FROM twitter_data_dump td JOIN twitter_sentiment ts ON td.id = ts.tweet_id WHERE nltk_classify = %s AND ts.nltk_classify = %s AND td.screen_id in ( SELECT DISTINCT screen_id FROM twitter_account ta LEFT JOIN user_twitter_account_mapping tam ON ta.id = tam.twitter_account_id WHERE ta.is_active = True AND tam.user_id=%s) AND array_length(regexp_split_to_array(td.tweet_message, '\s'),1) > 10 AND RIGHT(td.tweet_message, 1) != '?' GROUP BY ts.entry_time ORDER BY ts.entry_time ASC""" db_cursor.execute(query, (sentiment_type_id, type, user_id)) result = db_cursor.rowcount if result <= 0: query = """ SELECT 'Sentiment' AS "entry_time",COUNT(ts.nltk_classify) FROM twitter_data_dump td JOIN twitter_sentiment ts ON td.id = ts.tweet_id WHERE nltk_classify = %s AND ts.nltk_classify = %s AND array_length(regexp_split_to_array(td.tweet_message, '\s'),1) > 10 AND RIGHT(td.tweet_message, 1) != '?' GROUP BY ts.entry_time ORDER BY ts.entry_time ASC""" db_cursor.execute(query, (sentiment_type_id, type)) records_list = db_cursor.fetchall() return records_list except (Exception, psycopg2.Error) as error: raise ErrorFound(error) finally: db_conn.close()