예제 #1
0
def calc_predicted_direction(tickerVal):
    a = json.load(open("dataset/totalByDate.json"))
    info = {}
    for key, val in a.iteritems():
        info[key] = 0
    sql_command = """SELECT dateVal, tickers FROM comments WHERE tickers LIKE '%{}%';""".format(
        tickerVal)
    for val in db.run_command(sql_command):
        dateVal = val[0]
        tickers = [x.upper() for x in val[1].split(",") if len(x) > 0]
        if tickerVal.upper() in tickers:
            info[dateVal] += 1
    totalRatio = 0.0
    totalCount = 0
    for key, val in info.iteritems():
        totalRatio += float(info[key]) / float(a[key])
        totalCount += 1
    averageVal = totalRatio / float(totalCount)
    trades = {}
    for key, value in a.iteritems():
        trades[key] = 0
    for key, value in info.iteritems():
        thisAvg = float(info[key]) / float(a[key])
        diffVal = thisAvg - averageVal
        if ((abs(float(diffVal)) / averageVal) * 100) < 25:
            trades[key] = 0
        else:
            if diffVal > 0:
                trades[key] = 1
            else:
                trades[key] = -1
    return trades
예제 #2
0
def get_yolo_comments():
    # This returns tickers that are used in "YOLO" comments
    sql_command = """SELECT tickers FROM comments WHERE body LIKE '%yolo%' AND tickers not NULL;"""

    tickers = []
    for val in db.run_command(sql_command):
        tickers += [x.upper() for x in val[0].split(",") if len(x) > 0]
    return tickers
예제 #3
0
def time_it(test_dir, style, nth):
    fn = os.path.join("test_queries", test_dir, style + ".sql")
    print("Evaluating {}".format(fn))
    with open(fn, "r") as f:
        query = f.read()
    if db_type == "snowflake":
        db.run_command(db_type, conn, disable_snowflake_cache)
    start_time = time.time()
    if db_type == "redshift":
        query = disable_redshift_cache + query
    query_res = db.get_query_results(db_type, conn, query)
    end_time = time.time()
    elapsed = end_time - start_time
    print("Completed in: {}".format(elapsed))
    res = {"test": test_dir, "style": style, "time": elapsed, "nth": nth}
    pd_res = pd.DataFrame.from_records([res])
    return pd_res
예제 #4
0
def get_dates():
    sql_command = """SELECT dateVal FROM comments;"""
    totalCount = 0
    dates = []
    for val in set(db.run_command(sql_command)):
        dateVal = val[0]
        if dateVal not in dates:
            dates.append(dateVal)
    return dates
예제 #5
0
def get_count_by_ticker(tickerVal):
    # This is super hacky because the tickers are stored as a string like F,TSLA,ETC.
    sql_command = """SELECT tickers FROM comments WHERE tickers LIKE '%{}%';""".format(
        tickerVal)
    totalCount = 0
    for val in db.run_command(sql_command):
        tickers = [x.upper() for x in val[0].split(",") if len(x) > 0]
        if tickerVal.upper() in tickers:
            totalCount += 1
    return totalCount
예제 #6
0
def get_first_comment_with_ticker(tickerVal):
    sql_command = """SELECT created_utc, tickers FROM comments WHERE tickers LIKE '%{}%';""".format(
        tickerVal)
    largest_num = 0
    for val in db.run_command(sql_command):
        utcTime = str(val[0])
        tickers = [x.upper() for x in val[1].split(",") if len(x) > 0]
        if tickerVal.upper() in tickers:
            z = get_day_difference_between_utc(utcTime)
            if z > largest_num:
                largest_num = z
    return largest_num
예제 #7
0
def get_total_ticker_count_dates(tickerVal):
    a = json.load(open("dataset/totalByDate.json"))
    info = {}
    for key, val in a.iteritems():
        info[key] = 0
    sql_command = """SELECT dateVal, tickers FROM comments WHERE tickers LIKE '%{}%';""".format(
        tickerVal)
    for val in db.run_command(sql_command):
        dateVal = val[0]
        tickers = [x.upper() for x in val[1].split(",") if len(x) > 0]
        if tickerVal.upper() in tickers:
            info[dateVal] += 1
    return info
예제 #8
0
def get_average_by_ticker(tickerVal):
    # This is super hacky because the tickers are stored as a string like F,TSLA,ETC.
    info = {}
    sql_command = """SELECT weekday, tickers FROM comments WHERE tickers LIKE '%{}%';""".format(
        tickerVal)
    totalVal = 0
    totalCount = 0
    for val in db.run_command(sql_command):
        weekday = str(val[0])
        tickers = [x.upper() for x in val[1].split(",") if len(x) > 0]
        if tickerVal.upper() in tickers:
            if weekday not in info:
                info[weekday] = 0
            info[weekday] += 1
    return info
예제 #9
0
def get_average_upvotes_by_ticker(tickerVal):
    # This is super hacky because the tickers are stored as a string like F,TSLA,ETC.
    sql_command = """SELECT ups, tickers FROM comments WHERE tickers LIKE '%{}%';""".format(
        tickerVal)
    totalVal = 0
    totalCount = 0
    for val in db.run_command(sql_command):
        sentiment = val[0]
        tickers = [x.upper() for x in val[1].split(",") if len(x) > 0]
        if tickerVal.upper() in tickers:
            if sentiment != None:
                totalVal += sentiment
                totalCount += 1
    if totalCount == 0:
        return 0
    return float(totalVal) / float(totalCount)
예제 #10
0
def calc_ratio_info(tickerVal):
    a = json.load(open("dataset/totalByDate.json"))
    info = {}
    for key, val in a.iteritems():
        info[key] = 0
    sql_command = """SELECT dateVal, tickers FROM comments WHERE tickers LIKE '%{}%';""".format(
        tickerVal)
    for val in db.run_command(sql_command):
        dateVal = val[0]
        tickers = [x.upper() for x in val[1].split(",") if len(x) > 0]
        if tickerVal.upper() in tickers:
            info[dateVal] += 1
    totalRatio = 0.0
    totalCount = 0
    for key, val in info.iteritems():
        ratio = float(info[key]) / float(a[key])
        totalRatio += ratio
        info[key] = ratio
        totalCount += 1
    return {"average": totalRatio / float(totalCount), "dates": info}
예제 #11
0
def get_total_count_dates(dateVal):
    sql_command = """SELECT count(body) FROM comments WHERE dateVal = '{}';""".format(
        dateVal)
    return db.run_command(sql_command)
예제 #12
0
import db
import sys

db_type = sys.argv[1]
print(db_type)

today_str = datetime.datetime.today().strftime("%Y-%m-%d")

disable_redshift_cache = """SET enable_result_cache_for_session TO OFF;"""
disable_snowflake_cache = """ALTER SESSION SET USE_CACHED_RESULT=false;"""

conn = db.get_connection(db_type)

# Disable cache
if db_type == "redshift":
    db.run_command(db_type, conn, disable_redshift_cache)
if db_type == "snowflake":
    db.run_command(db_type, conn, "USE DATABASE DEMO_DB;")
    db.run_command(db_type, conn, disable_snowflake_cache)


def time_it(test_dir, style, nth):
    fn = os.path.join("test_queries", test_dir, style + ".sql")
    print("Evaluating {}".format(fn))
    with open(fn, "r") as f:
        query = f.read()
    if db_type == "snowflake":
        db.run_command(db_type, conn, disable_snowflake_cache)
    start_time = time.time()
    if db_type == "redshift":
        query = disable_redshift_cache + query