def calc_predicted_direction(tickerVal): a = json.load(open("dataset/totalByDate.json")) info = {} for key, val in a.iteritems(): info[key] = 0 sql_command = """SELECT dateVal, tickers FROM comments WHERE tickers LIKE '%{}%';""".format( tickerVal) for val in db.run_command(sql_command): dateVal = val[0] tickers = [x.upper() for x in val[1].split(",") if len(x) > 0] if tickerVal.upper() in tickers: info[dateVal] += 1 totalRatio = 0.0 totalCount = 0 for key, val in info.iteritems(): totalRatio += float(info[key]) / float(a[key]) totalCount += 1 averageVal = totalRatio / float(totalCount) trades = {} for key, value in a.iteritems(): trades[key] = 0 for key, value in info.iteritems(): thisAvg = float(info[key]) / float(a[key]) diffVal = thisAvg - averageVal if ((abs(float(diffVal)) / averageVal) * 100) < 25: trades[key] = 0 else: if diffVal > 0: trades[key] = 1 else: trades[key] = -1 return trades
def get_yolo_comments(): # This returns tickers that are used in "YOLO" comments sql_command = """SELECT tickers FROM comments WHERE body LIKE '%yolo%' AND tickers not NULL;""" tickers = [] for val in db.run_command(sql_command): tickers += [x.upper() for x in val[0].split(",") if len(x) > 0] return tickers
def time_it(test_dir, style, nth): fn = os.path.join("test_queries", test_dir, style + ".sql") print("Evaluating {}".format(fn)) with open(fn, "r") as f: query = f.read() if db_type == "snowflake": db.run_command(db_type, conn, disable_snowflake_cache) start_time = time.time() if db_type == "redshift": query = disable_redshift_cache + query query_res = db.get_query_results(db_type, conn, query) end_time = time.time() elapsed = end_time - start_time print("Completed in: {}".format(elapsed)) res = {"test": test_dir, "style": style, "time": elapsed, "nth": nth} pd_res = pd.DataFrame.from_records([res]) return pd_res
def get_dates(): sql_command = """SELECT dateVal FROM comments;""" totalCount = 0 dates = [] for val in set(db.run_command(sql_command)): dateVal = val[0] if dateVal not in dates: dates.append(dateVal) return dates
def get_count_by_ticker(tickerVal): # This is super hacky because the tickers are stored as a string like F,TSLA,ETC. sql_command = """SELECT tickers FROM comments WHERE tickers LIKE '%{}%';""".format( tickerVal) totalCount = 0 for val in db.run_command(sql_command): tickers = [x.upper() for x in val[0].split(",") if len(x) > 0] if tickerVal.upper() in tickers: totalCount += 1 return totalCount
def get_first_comment_with_ticker(tickerVal): sql_command = """SELECT created_utc, tickers FROM comments WHERE tickers LIKE '%{}%';""".format( tickerVal) largest_num = 0 for val in db.run_command(sql_command): utcTime = str(val[0]) tickers = [x.upper() for x in val[1].split(",") if len(x) > 0] if tickerVal.upper() in tickers: z = get_day_difference_between_utc(utcTime) if z > largest_num: largest_num = z return largest_num
def get_total_ticker_count_dates(tickerVal): a = json.load(open("dataset/totalByDate.json")) info = {} for key, val in a.iteritems(): info[key] = 0 sql_command = """SELECT dateVal, tickers FROM comments WHERE tickers LIKE '%{}%';""".format( tickerVal) for val in db.run_command(sql_command): dateVal = val[0] tickers = [x.upper() for x in val[1].split(",") if len(x) > 0] if tickerVal.upper() in tickers: info[dateVal] += 1 return info
def get_average_by_ticker(tickerVal): # This is super hacky because the tickers are stored as a string like F,TSLA,ETC. info = {} sql_command = """SELECT weekday, tickers FROM comments WHERE tickers LIKE '%{}%';""".format( tickerVal) totalVal = 0 totalCount = 0 for val in db.run_command(sql_command): weekday = str(val[0]) tickers = [x.upper() for x in val[1].split(",") if len(x) > 0] if tickerVal.upper() in tickers: if weekday not in info: info[weekday] = 0 info[weekday] += 1 return info
def get_average_upvotes_by_ticker(tickerVal): # This is super hacky because the tickers are stored as a string like F,TSLA,ETC. sql_command = """SELECT ups, tickers FROM comments WHERE tickers LIKE '%{}%';""".format( tickerVal) totalVal = 0 totalCount = 0 for val in db.run_command(sql_command): sentiment = val[0] tickers = [x.upper() for x in val[1].split(",") if len(x) > 0] if tickerVal.upper() in tickers: if sentiment != None: totalVal += sentiment totalCount += 1 if totalCount == 0: return 0 return float(totalVal) / float(totalCount)
def calc_ratio_info(tickerVal): a = json.load(open("dataset/totalByDate.json")) info = {} for key, val in a.iteritems(): info[key] = 0 sql_command = """SELECT dateVal, tickers FROM comments WHERE tickers LIKE '%{}%';""".format( tickerVal) for val in db.run_command(sql_command): dateVal = val[0] tickers = [x.upper() for x in val[1].split(",") if len(x) > 0] if tickerVal.upper() in tickers: info[dateVal] += 1 totalRatio = 0.0 totalCount = 0 for key, val in info.iteritems(): ratio = float(info[key]) / float(a[key]) totalRatio += ratio info[key] = ratio totalCount += 1 return {"average": totalRatio / float(totalCount), "dates": info}
def get_total_count_dates(dateVal): sql_command = """SELECT count(body) FROM comments WHERE dateVal = '{}';""".format( dateVal) return db.run_command(sql_command)
import db import sys db_type = sys.argv[1] print(db_type) today_str = datetime.datetime.today().strftime("%Y-%m-%d") disable_redshift_cache = """SET enable_result_cache_for_session TO OFF;""" disable_snowflake_cache = """ALTER SESSION SET USE_CACHED_RESULT=false;""" conn = db.get_connection(db_type) # Disable cache if db_type == "redshift": db.run_command(db_type, conn, disable_redshift_cache) if db_type == "snowflake": db.run_command(db_type, conn, "USE DATABASE DEMO_DB;") db.run_command(db_type, conn, disable_snowflake_cache) def time_it(test_dir, style, nth): fn = os.path.join("test_queries", test_dir, style + ".sql") print("Evaluating {}".format(fn)) with open(fn, "r") as f: query = f.read() if db_type == "snowflake": db.run_command(db_type, conn, disable_snowflake_cache) start_time = time.time() if db_type == "redshift": query = disable_redshift_cache + query