Ejemplo n.º 1
0
def assure_required_table():
    print("checking")
    if not table_exist("ff_scan_symbols"):
        print("table ff_scan_symbols not exist; creating...")
        runsql(
            "create table ff_scan_symbols (symbol varchar(10), last_updated datetime, last_scanned datetime, security varchar(100), sector varchar(100), subsec varchar(200), hq varchar(200), first_added datetime)"
        )
Ejemplo n.º 2
0
def refresh_sp500():
    assure_required_table()
    print("fetching sp500 list...")
    resp = requests.get(
        'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'html5lib')
    table = soup.find('table', {'class': 'wikitable sortable'})
    for row in table.findAll('tr')[1:]:
        r = row.findAll('td')
        #ticker = r[1].text.rstrip()
        ticker = r[0].text.rstrip()
        #print("working on "+ticker+"...")
        Log.info("SP500", "working on " + ticker + "...")
        check = fetch_rawsql(
            "select symbol from ff_scan_symbols where symbol='" + ticker + "'")
        if len(check) > 0:
            rawsql = "update ff_scan_symbols set last_updated=now() WHERE symbol='" + ticker + "'"
            runsql(rawsql)
        else:
            rawsql = "insert into ff_scan_symbols (symbol, last_updated, security, sector, subsec, hq, first_added) values (%s, %s, %s, %s, %s, %s, %s)"
            val = (ticker,
                   datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                   r[0].text, r[3].text, r[4].text, r[5].text,
                   (r[6].text if r[6].text and len(r[6].text) == 10 else None))
            runsql(rawsql, val)
    dbcommit()
Ejemplo n.º 3
0
def assure_required_tables():
    if not table_exist("ff_tweets"):
        runsql(FF_TWEETS_DDL)
    if not table_exist("ff_scan_symbols"):
        refresh_sp500()
    r = fetch_rawsql(
        "select min(datediff(now(),last_updated)) d from ff_scan_symbols")[0]
    if r['d'] > 7:
        refresh_sp500()
Ejemplo n.º 4
0
def assure_required_tables():
    if not table_exist("ff_finnews_iex"):
        runsql(FF_FINNEWS_IEX_DDL)
        runsql(
            "create index ff_finnews_iex_i1 on ff_finnews_iex (symbol, iex_url)"
        )
        runsql("create index ff_finnews_iex_i2 on ff_finnews_iex (iex_id)")
        runsql(
            "create index ff_finnews_iex_i3 on ff_finnews_iex (last_updated)")
        runsql("create index ff_finnews_iex_i4 on ff_finnews_iex (published)")
Ejemplo n.º 5
0
def assure_tables():
    if not table_exist("ff_status_scan"):
        runsql(FF_STATUS_SCAN)
        runsql("create index ff_status_scan_i1 on ff_status_scan (batched)")
        runsql("create index ff_status_scan_i2 on ff_status_scan (status)")
    else:
        a = fetch_rawsql("show table status like 'ff_status_scan'")
        a = a[0]['Collation']
        if a != 'utf8_unicode_ci':
            runsql(
                "ALTER TABLE ff_status_scan CONVERT TO CHARACTER SET utf8 COLLATE utf8_unicode_ci"
            )
    # maintenance related on collation correction
    if table_exist("ff_scan_symbols"):
        a = fetch_rawsql("show table status like 'ff_scan_symbols'")
        a = a[0]['Collation']
        if a != 'utf8_unicode_ci':
            runsql(
                "ALTER TABLE ff_scan_symbols CONVERT TO CHARACTER SET utf8 COLLATE utf8_unicode_ci"
            )
Ejemplo n.º 6
0
def do_scan():
    status_scan_starttime = datetime.datetime.now()
    assure_tables()

    symbols = fetch_rawsql(
        "select symbol, sector from ff_scan_symbols where datediff(now(),last_updated)<14"
    )
    s = [i['symbol'] for i in symbols]
    r = requests.get(FF_SCAN_HOST + "/api/v1/misc/nonsp/")
    if r.status_code != 200:
        print("Failed to retrieve list of non-SP500 symbols")
    else:
        r = r.json()
        for i in r['symbol_list']:
            if i in s or i in NON_SCAN_LIST:
                continue
            symbols += [{'symbol': i, 'sector': ''}]

    # stop when:
    # 1. data not stablized, before 9:45am
    # 2. more than 1 hour after trading stopped
    # 3. none-trading days
    enow = utc.localize(datetime.datetime.utcnow()).astimezone(eastern)
    etime = enow.hour + enow.minute / 60
    if enow.weekday() < 5 and etime > 9.75 and etime < 17.5:
        has_recent_scans = fetch_rawsql(
            "select count(1) cnt from ff_status_scan where timestampdiff(minute, scanned, '"
            + get_eastern() + "') < " + str(MINUTES_TRADING_SCANS))[0]['cnt']
        if has_recent_scans:
            print("Already scanned status in the last " +
                  str(MINUTES_TRADING_SCANS) +
                  " minutes. Cooldown even if FORCE_RUN")
            dbclose()
            sys.exit()
    elif not FORCE_RUN:
        print("Not during trading hours - skipped status scan")
        dbclose()
        sys.exit()

    # Clear scan cache only when scan actually runs
    if table_exist("ff_status_scan_a1"):
        runsql("drop table ff_status_scan_a1")
    runsql(FF_STATUS_SCAN_A1)

    n_rotten = 0
    for s0 in symbols:
        s = s0['symbol']
        print("Scanning " + s + " ...")
        try:
            q = requests.get("https://api.iextrading.com/1.0/stock/" + s +
                             "/quote")
            q = q.json()
        except:
            print("- bad IEX call for " + s + " ... skipped")
            continue
            """
            print("Unexpected error:", sys.exec_info()[0])
            raise
    		"""
        # check required keys
        if not ('calculationPrice' in q and 'latestPrice' in q
                and 'changePercent' in q and 'marketCap' in q
                and 'latestUpdate' in q and 'primaryExchange' in q):
            print("- bad IEX return format for " + s + " ... skipped")
            continue
        if 'Arca' in q['primaryExchange']:
            print("- not scanning ETF " + s + " ... skipped")
            continue
        if q['marketCap'] < MIN_MARKETCAP:
            print("- MarketCap too low to scan,  for " + s + " ... skipped")
            continue

        # double-check if IEX actually returns the latest quote
        updated = utc.localize(
            datetime.datetime.utcfromtimestamp(q['latestUpdate'] //
                                               1000)).astimezone(eastern)
        print(enow, updated)
        diff = enow - updated
        dmin = (diff.days * 86400 + diff.seconds) // 60
        if dmin > MINUTES_FRESH_SCANS and not FORCE_RUN:
            print("- rotten/older IEX return data for " + s + " ... skipped")
            n_rotten += 1
            if n_rotten >= MAX_ROTTEN:
                sys.exit()
            continue

        # Upon the first IEX fresh data of today, swap out yesterday's benchmark ceil-floor tables (from get_stock_adj)
        # *The benchmark tables should only be updated at the time market opens
        if s == symbols[0]['symbol'] and q['calculationPrice'] == 'tops':
            if table_exist("ff_stock_cf_w00") and table_exist(
                    "ff_stock_cf_w0"):
                runsql("drop table ff_stock_cf_w0")
                runsql(
                    "create table ff_stock_cf_w0 as select * from ff_stock_cf_w00"
                )
                runsql(
                    "create index ff_stock_cf_w0_i1 on ff_stock_cf_w0 (symbol)"
                )
                runsql("drop table ff_stock_cf_w00")

        # FF_scans - Volume
        while True:
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/vol/" + s + "/",
                             headers=headers)
            if r.status_code != 429:
                break
            time.sleep(5)
        try:
            r = r.json()
            if r['I'] > 90 and 'high' in r['rarity']:
                msg = VOL_TEMPLATE[np.random.choice(
                    len(VOL_TEMPLATE),
                    p=[e['prob'] for e in VOL_TEMPLATE],
                    size=1)[0]]["tweet"]
                msg = msg.replace("&STOCK", "$" + s)
                msg = msg.replace("&RARITY", r['rarity'])
                msg = msg.replace("&VOLUME", r['volume_human'])
                msg = msg.replace("&Z", "{0:.1f}".format(r['z']))
                msg = msg.replace("&URL", FF_SCAN_HOST + "fb/vol/" + s + "/")
                msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/")
                #ctx = str({"v": r['volume_human'], "z": "{0:.1f}".format(r['z'])})
                ctx = str(r)
                val = (s, q['latestPrice'], q['changePercent'], q['marketCap'],
                       updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(),
                       enow.strftime("%Y-%m-%d %H:%M:%S"), 'vol', r['rarity'],
                       ctx, msg, r['I'])
                runsql(INSERT_SQL, val)
        except Exception as e:
            print(e)
            print("- error FF vol scan for " + s + " ... skipped")
            print(r)

        # FF_scans - Volatility + Fluctuation
        while True:
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/pfl/" + s + "/",
                             headers=headers)
            if r.status_code != 429:
                break
            time.sleep(5)
        try:
            r = r.json()
            if r['I'] > 90 and 'large' in r['rarity']:
                msg = PFL_TEMPLATE[np.random.choice(
                    len(PFL_TEMPLATE),
                    p=[e['prob'] for e in PFL_TEMPLATE],
                    size=1)[0]]["tweet"]
                msg = msg.replace("&STOCK", "$" + s)
                msg = msg.replace("&RARITY", r['rarity'])
                msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/vol/" + s + "/")
                msg = msg.replace("&FLUX", str(r['hl_ratio']) + "%")
                msg = msg.replace("&L", str(r['low']))
                msg = msg.replace("&H", str(r['high']))
                msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/")
                #ctx = str({"f": r['hl_ratio'], "l": r['low'], "h": r['high']})
                ctx = str(r)
                val = (s, q['latestPrice'], q['changePercent'], q['marketCap'],
                       updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(),
                       enow.strftime("%Y-%m-%d %H:%M:%S"), 'pfl', r['rarity'],
                       ctx, msg, r['I'])
                runsql(INSERT_SQL, val)
        except Exception as e:
            print(e)
            print("- error FF pfl scan for " + s + " ... skipped")
            print(r)

        # FF_scans - Single Day Change
        while True:
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/chg/" + s + "/",
                             headers=headers)
            if r.status_code != 429:
                break
            time.sleep(5)
        try:
            r = r.json()
            if s0['sector']:
                val = (s, s0['sector'], q['changePercent'], q['marketCap'],
                       updated.strftime("%Y-%m-%d %H:%M:%S"),
                       enow.strftime("%Y-%m-%d %H:%M:%S"))
                runsql(INSERT_A1, val)
            if r['I'] > 90 and 'large' in r['rarity']:
                msg = CHG_TEMPLATE[np.random.choice(
                    len(CHG_TEMPLATE),
                    p=[e['prob'] for e in CHG_TEMPLATE],
                    size=1)[0]]["tweet"]
                msg = msg.replace("&STOCK", "$" + s)
                msg = msg.replace("&RARITY", r['rarity'])
                msg = msg.replace("&CHG", str(abs(round(r['chg'] * 100, 2))))
                msg = msg.replace("&MOVE", r['move'])
                msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/chg/" + s + "/")
                msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/")
                msg = msg.replace(
                    "&HAVE", ("is having" if r['status'] == "live" else "had"))
                msg = msg.replace(
                    "&JS", (("roar" if "extreme" in r['rarity'] else
                             ("jump" if "very" in r['rarity'] else "gain"))
                            if r['chg'] > 0 else
                            ("crash" if "extreme" in r['rarity'] else
                             ("dive" if "very" in r['rarity'] else "drop"))))
                #ctx = str({"c": r['chg'], "m": r['move'], "js": r['js']})
                ctx = str(r)
                val = (s, q['latestPrice'], q['changePercent'], q['marketCap'],
                       updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(),
                       enow.strftime("%Y-%m-%d %H:%M:%S"), 'chg', r['rarity'],
                       ctx, msg, r['I'])
                runsql(INSERT_SQL, val)
        except Exception as e:
            print(e)
            print("- error FF chg scan for " + s + " ... skipped")
            print(r)

        # FF_scans - Ceiling Floor Penetration
        while True:
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/cfp/" + s + "/",
                             headers=headers)
            if r.status_code != 429:
                break
            time.sleep(5)
        try:
            r = r.json()
            if r['I'] > 90 and r['target'] == 'quoted':
                msg = CFP_TEMPLATE[np.random.choice(
                    len(CFP_TEMPLATE),
                    p=[e['prob'] for e in CFP_TEMPLATE],
                    size=1)[0]]["tweet"]
                msg = msg.replace("&STOCK", "$" + s)
                msg = msg.replace("&TARGET", r['target'])
                msg = msg.replace("&MOVE", r['move'])
                msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/dme/" + s + "/")
                msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/")
                #ctx = str({"t": r['target'], "m": r['move']})
                ctx = str(r)
                val = (s, q['latestPrice'], q['changePercent'], q['marketCap'],
                       updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(),
                       enow.strftime("%Y-%m-%d %H:%M:%S"), 'cfp', r['rarity'],
                       ctx, msg, r['I'])
                runsql(INSERT_SQL, val)
        except Exception as e:
            print(e)
            print("- error FF cfp scan for " + s + " ... skipped")
            print(r)

    runsql("drop table if exists ff_status_scan_w1")
    runsql(
        "create table ff_status_scan_w1 as select a.*, @rn:=@rn+1 rn from (select batched, count(1) n_item, count(distinct symbol) n_symbol from ff_status_scan group by batched order by batched desc) a, (select @rn:=0) b"
    )
    runsql(
        "create index ff_status_scan_w1_i1 on ff_status_scan_w1 (n_symbol, rn)"
    )
    runsql("drop table if exists ff_status_scan_w2")
    runsql(
        "create table ff_status_scan_w2 as select stype, if(changePct>=0,'u','d') ch, count(1) n from ff_status_scan a, (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b where a.batched=b.max_batched and a.marketcap>"
        + str(MIN_MARKETCAP) + " group by stype, if(changePct>=0,'u','d')")
    runsql("drop table if exists ff_status_scan_w3")
    runsql("create table ff_status_scan_w3 as select * from ff_status_scan_a1")
    runsql("drop table if exists ff_status_scan_w4")
    runsql(
        "create table ff_status_scan_w4 as select max(batched) batched, sector, count(1) cnt, sum(marketcap) sm, round(sum(changePct*marketcap)) cm, round(sum(changePct*marketcap)/sum(marketcap)*100,2) marketchgPct from ff_status_scan_w3 group by sector having cnt>20 order by marketchgPct desc"
    )
    if not table_exist("ff_status_scan_a2"):
        runsql(
            "create table ff_status_scan_a2 as select * from ff_status_scan_w4"
        )
        runsql(
            "create index ff_status_scan_a2_i1 on ff_status_scan_a2 (batched)")
    else:
        runsql("insert into ff_status_scan_a2 select * from ff_status_scan_w4")
    runsql("drop table if exists ff_status_scan_w5")
    runsql(
        "create table ff_status_scan_w5 as select d.sector, stype, count(1) n, sum(if(changePct>=0,1,0)) n_u, sum(if(changePct<0,1,0)) n_d, any_value(d.cnt) n_sector, any_value(round(count(1)/d.cnt,4)) s_ratio from ff_status_scan a join (select batched as max_batched from ff_status_scan_w1 order by rn limit 1) b on a.batched=b.max_batched join (select symbol, sector from ff_scan_symbols where datediff(now(),last_updated)<60) c on a.symbol=c.symbol right join ff_status_scan_w4 d on c.sector=d.sector group by d.sector, a.stype order by sector, s_ratio desc"
    )
    runsql(
        "create index ff_status_scan_w5_i1 on ff_status_scan_w5 (sector, s_ratio)"
    )
    runsql("drop table if exists ff_status_scan_w6")
    runsql(
        "create table ff_status_scan_w6 as select max(batched) batched, sector, any_value(if(locate(' ',sector)>0, concat(substr(sector,1,1),substr(sector,locate(' ',sector)+1,2)), substr(sector,1,3))) sector3, count(1) cnt, sum(if(changePct>=0,1,0)) n_u, sum(if(changePct<0,1,0)) n_d, sum(marketcap) sm, round(sum(changePct*marketcap)) cm, round(sum(abs(changePct)*marketcap)) am, round(sum(changePct*marketcap)/sum(marketcap)*100,2) marketchgPct from ff_status_scan_w3 group by sector having cnt>20 order by marketchgPct desc"
    )
    runsql("drop table if exists ff_status_scan_w7")
    runsql(
        "create table ff_status_scan_w7 as select a.security, a.subsec, a.hq, b.*, changePct*marketcap marketeff, abs(changePct*marketcap) marketabs from (select symbol, security, subsec, hq from ff_scan_symbols where datediff(now(),last_updated)<60) a, ff_status_scan_w3 b where a.symbol=b.symbol order by marketabs"
    )
    runsql(
        "create index ff_status_scan_w7_i1 on ff_status_scan_w7 (sector, marketabs)"
    )
    runsql("drop table if exists ff_status_scan_w8")
    runsql(
        "create table ff_status_scan_w8 as select b.batched_date, a.* from ff_status_scan_a2 a,(select date(batched) batched_date, max(batched) batched_time from ff_status_scan_a2 group by date(batched)) b where a.batched=b.batched_time"
    )
    runsql(
        "create index ff_status_scan_w8_i1 on ff_status_scan_w8 (sector, batched_date)"
    )
    runsql(
        "create index ff_status_scan_w8_i2 on ff_status_scan_w8 (batched_date)"
    )
    cache_baking()

    status_scan_endtime = datetime.datetime.now()

    timedetail = "status_scan job is running from {0} to {1}, and total runging time is {2}".format(
        status_scan_starttime, status_scan_endtime,
        (status_scan_endtime - status_scan_starttime).seconds)
    print(timedetail)
    Log.info("status_scan.py", timedetail)
Ejemplo n.º 7
0
def cache_baking():
    print("Caching Baking list ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
    runsql("drop table if exists ff_status_scan_o1")
    rawsql = "create table ff_status_scan_o1 as select symbol, stype, infov, w_micd from \
          (select a.*, td+w_mic*md w_micd, @rn:=if(@s=symbol,@rn+1,0) rn, @s:=symbol s from \
          (select a.symbol, marketCap, updated, scanned, stype, rarity, context, infov, if(stype='vol',if(md>1,1,md),1) md, if(td>" + str(
        AGING_TIME_FACTOR
    ) + "," + str(AGING_TIME_FACTOR) + ",td)*rand()*" + str(
        AGING_RAND_SCALE
    ) + " td, (log10(marketCap)-9)*(infov-90)*(log10(abs(changePct*10000)+1)) w_mic from \
            (select a.*, (hour(updated)*60+minute(updated))/" + str(
        AGING_MATURE_HOUR * 60
    ) + " md, if(timestampdiff(minute,batched,utc_timestamp())<200,0,(timestampdiff(minute,batched,utc_timestamp())-200)/480) td from ff_status_scan a) a, \
              (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b where a.batched=b.max_batched order by w_mic desc limit " + str(
        N_OVERALL_CACHE * 4) + ") a, \
            (select @s:='',@rn:=0) b order by marketCap desc, w_micd desc \
          ) a where rn=0 order by w_micd desc limit " + str(N_OVERALL_CACHE)
    runsql(rawsql)
    runsql("drop table if exists ff_status_scan_o2")
    runsql(
        "create table ff_status_scan_o2 as select a.*, c.w_micd from ff_status_scan a, (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b, ff_status_scan_o1 c where a.batched=b.max_batched and a.symbol=c.symbol order by w_micd desc"
    )
    runsql("drop table if exists ff_status_scan_o3")
    runsql(
        "create table ff_status_scan_o3 as select (@rn:=if(@s=symbol,@rn,@rn+1))*1 rn, @s:=symbol symbol, quote, changePct, marketcap, stype, context, infov, w_micd from ff_status_scan_o2 a, (select @rn:=0, @s:='') b"
    )
    S = fetch_rawsql(
        "select rn, symbol, max(quote) quote, max(changePct) changePct, max(marketcap) marketcap, max(w_micd) w_micd from ff_status_scan_o3 group by rn, symbol"
    )
    if S:
        for s in S:
            r = requests.get(FF_SCAN_HOST + "/api/v1/fame66/w52/" +
                             s['symbol'] + "/",
                             headers=headers)
            if r.status_code == 200:
                r = r.json()
                if 'datasets' in r:
                    ctx = r['datasets'][0]
                    ctx = str(ctx)
                    if len(ctx) <= 255:
                        runsql(
                            "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                            (s['rn'], s['symbol'], s['quote'], s['changePct'],
                             s['marketcap'], 'f6w', ctx, 0, s['w_micd']))
                    else:
                        print("ctx longer than 255 for fame66 support of " +
                              s['symbol'])
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/f66/" +
                             s['symbol'] + "/",
                             headers=headers)
            if r.status_code == 200:
                r = r.json()
                if 'bits' in r:
                    a = [i['pr'] for i in r['bits']]
                    b = [i['c_diff'] for i in r['bits']]
                    ctx = str({"pr": a, "c_diff": b})
                    if len(ctx) <= 255:
                        runsql(
                            "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                            (s['rn'], s['symbol'], s['quote'], s['changePct'],
                             s['marketcap'], 'f66', ctx, 0, s['w_micd']))
                    else:
                        print("ctx longer than 255 for fame66 chart of " +
                              s['symbol'])
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/ahd/" +
                             s['symbol'] + "/",
                             headers=headers)
            if r.status_code == 200:
                r = r.json()
                if 'target' in r:
                    if r['target'] == 'self':
                        ctx = str(r)
                        if len(ctx) <= 255:
                            runsql(
                                "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                                (s['rn'], s['symbol'], s['quote'],
                                 s['changePct'], s['marketcap'], 'ahd', ctx,
                                 r['I'], s['w_micd']))
                        else:
                            print("ctx longer than 255 for Fame Ahead of " +
                                  s['symbol'])
    runsql("drop table if exists ff_status_scan_o4")
    runsql(
        "create table ff_status_scan_o4 as select * from ff_status_scan_o3 order by rn"
    )
    runsql("create index ff_status_scan_o4_i1 on ff_status_scan_o4 (rn)")
    runsql(
        "create index ff_status_scan_o4_i2 on ff_status_scan_o4 (stype, infov, marketcap)"
    )
    print("Done caching Baking list ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
Ejemplo n.º 8
0
def final_sense(is_incremental=False):
    if is_incremental:
        runsql("drop table if exists ff_finnews_f2")
        runsql(
            "create table ff_finnews_f2 as select a.*, b.iex_source, b.iex_title, b.iex_related, b.article_url from (select * from ff_finnews_b12 union select * from ff_finnews_d8) a, (select iex_id, any_value(iex_source) iex_source, any_value(iex_title) iex_title, any_value(iex_related) iex_related, any_value(article_url) article_url from ff_finnews_iex where published>=date_sub(now() , interval 6 month) and greatest(length(article),length(iex_summary))>500 group by iex_id) b where a.iex_id=b.iex_id"
        )
        runsql(
            "create index ff_finnews_f2_i1 on ff_finnews_f2 (symbol, dest_date)"
        )
        runsql(
            "create index ff_finnews_f2_i2 on ff_finnews_f2 (symbol, published)"
        )
        runsql(
            "create index ff_finnews_f2_i3 on ff_finnews_f2 (symbol, iex_id)")
        runsql("drop table if exists ff_finnews_f3")
        runsql(
            "create table ff_finnews_f3 as select * from ff_finnews_c1 union select * from ff_finnews_c11"
        )
        runsql("create index ff_finnews_f3_i1 on ff_finnews_f3 (iex_id)")
        runsql("drop table if exists ff_finnews_f4")
        runsql(
            "create table ff_finnews_f4 as select * from ff_finnews_c0 union select * from ff_finnews_c00"
        )
        runsql("create index ff_finnews_f4_i1 on ff_finnews_f4 (iex_id)")
        print("Finalized Fame Sense incremental run ... " +
              time.strftime('%Y-%m-%d %H:%M:%S'))
    else:
        runsql("drop table if exists ff_finnews_f0")
        runsql("create table ff_finnews_f0 as select * from ff_finnews_b19")
        runsql(
            "create index ff_finnews_f0_i1 on ff_finnews_f0 (symbol, dest_date)"
        )
        runsql("create index ff_finnews_f0_i2 on ff_finnews_f0 (dest_date)")
        runsql("drop table if exists ff_finnews_f1")
        runsql(
            "create table ff_finnews_f1 as select a.sector, ifnull(marketcap,if(@s=symbol,@m,null)) marketcap, @s:=symbol symbol, dest_date, dense_sense, condense_sense, dense_diff, condense_diff, dcon_bin, dense_diff*ifnull(marketcap,if(@s=symbol,@m,null)) denseeff, abs(dense_diff*ifnull(marketcap,if(@s=symbol,@m,null))) denseabs, @m:=ifnull(marketcap,@m) dummy_mc from (select c.sector, round(c.marketcap*cr) marketcap, d.* from ff_finnews_f0 d inner join ff_status_scan_w3 c on d.symbol=c.symbol left join (select a.symbol, a.close_date, a.close/b.close cr from ff_stock a, (select symbol, close from ff_stock where close_date=(select max(close_date) from ff_stock)) b where a.close_date>=date_sub(curdate(), interval 6 month) and a.symbol=b.symbol) a on d.symbol=a.symbol and d.dest_date=a.close_date order by symbol, dest_date ) a, (select @s:='',@m:=null) b"
        )
        runsql(
            "create index ff_finnews_f1_i1 on ff_finnews_f1 (sector, dest_date, dense_diff)"
        )
        runsql("create index ff_finnews_f1_i2 on ff_finnews_f1 (dest_date)")
        runsql(
            "create index ff_finnews_f1_i3 on ff_finnews_f1 (symbol, dest_date)"
        )
        runsql("drop table if exists ff_finnews_f2")
        runsql(
            "create table ff_finnews_f2 as select a.*, b.iex_source, b.iex_title, b.iex_related, b.article_url from ff_finnews_b12 a, (select iex_id, any_value(iex_source) iex_source, any_value(iex_title) iex_title, any_value(iex_related) iex_related, any_value(article_url) article_url from ff_finnews_iex where published>=date_sub(now() , interval 6 month) and greatest(length(article),length(iex_summary))>500 group by iex_id) b where a.iex_id=b.iex_id"
        )
        runsql(
            "create index ff_finnews_f2_i1 on ff_finnews_f2 (symbol, dest_date)"
        )
        runsql(
            "create index ff_finnews_f2_i2 on ff_finnews_f2 (symbol, published)"
        )
        runsql(
            "create index ff_finnews_f2_i3 on ff_finnews_f2 (symbol, iex_id)")
        runsql("drop table if exists ff_finnews_f3")
        runsql("create table ff_finnews_f3 as select * from ff_finnews_c1")
        runsql("create index ff_finnews_f3_i1 on ff_finnews_f3 (iex_id)")
        runsql("drop table if exists ff_finnews_f4")
        runsql("create table ff_finnews_f4 as select * from ff_finnews_c0")
        runsql("create index ff_finnews_f4_i1 on ff_finnews_f4 (iex_id)")
        runsql("drop table if exists ff_finnews_f5")
        runsql("create table ff_finnews_f5 as select * from ff_finnews_w7")
        runsql(
            "create index ff_finnews_f5_i1 on ff_finnews_f5 (symbol, iex_id)")
        print("Finalized Fame Sense training run ... " +
              time.strftime('%Y-%m-%d %H:%M:%S'))
Ejemplo n.º 9
0
def incremental_sense():
    print("Started incrementals " + time.strftime('%Y-%m-%d %H:%M:%S'))
    runsql("drop table if exists ff_finnews_d0")
    runsql(
        "create table ff_finnews_d0 as select iex_id, any_value(published) published, any_value(iex_title) title, any_value(if(length(article)>length(iex_summary),article,iex_summary)) article from ff_finnews_iex where published>=date_sub(now() , interval 1 week) and greatest(length(article),length(iex_summary))>500 group by iex_id"
    )
    runsql("create index ff_finnews_d0_i1 on ff_finnews_d0 (iex_id)")
    runsql("drop table if exists ff_finnews_d1")
    runsql(
        "create table ff_finnews_d1 as select a.iex_id, a.title, a.article, b.iex_id dummy from ff_finnews_d0 a left join ff_finnews_b1 b on a.iex_id=b.iex_id having dummy is null"
    )
    runsql("create index ff_finnews_d1_i1 on ff_finnews_d1 (iex_id)")
    A = fetch_rawsql("select iex_id, title, article from ff_finnews_d1")
    print("Done fetching incremental articles " +
          time.strftime('%Y-%m-%d %H:%M:%S'))
    documents, IEX_ids = doc_to_vec(A)
    print("Done cleaning incremental articles " +
          time.strftime('%Y-%m-%d %H:%M:%S'))
    TV = joblib.load(FILE_GLOBAL_TV_LST)
    tv = TV.transform(documents)
    T = TV.get_feature_names()
    print("Done vectorizing incremental articles " +
          time.strftime('%Y-%m-%d %H:%M:%S'))

    dig_docs(A, IEX_ids, TV, tv, is_incremental=True)

    runsql("drop table if exists ff_finnews_d2")
    runsql(
        "create table ff_finnews_d2 (symbol varchar(10), iex_id varchar(30), term varchar(10), model_type varchar(4), raw_sense float)"
    )
    IEX_SYMBOL = fetch_rawsql(
        "select a.symbol, a.iex_id, a.published from ff_finnews_iex a, ff_finnews_d1 b where a.iex_id=b.iex_id"
    )
    ALL_LOCAL, _X, _y = focus_prep(IEX_SYMBOL, IEX_ids, tv, is_train=False)
    print("Done preparing incremental articles for scoring " +
          time.strftime('%Y-%m-%d %H:%M:%S'))
    for term in TERMS:
        focus_models(term, ALL_LOCAL, [], [], is_train=False)
        print("Done scoring incremental articles for " + term + " : " +
              time.strftime('%Y-%m-%d %H:%M:%S'))

    runsql("create index ff_finnews_d2_i1 on ff_finnews_d2 (iex_id)")
    runsql("drop table if exists ff_finnews_d3")
    runsql(
        "create table ff_finnews_d3 as select a.*, published, title from ff_finnews_d2 a, ff_finnews_d0 b where a.iex_id=b.iex_id"
    )
    runsql(
        "create index ff_finnews_d3_i1 on ff_finnews_d3 (symbol, iex_id, model_type, term)"
    )
    runsql("drop table if exists ff_finnews_d4")
    runsql(
        "create table ff_finnews_d4 as select symbol, iex_id, model_type, any_value(published) published, sum(if(raw_sense>=0,1,0)) n_up, sum(if(raw_sense<0,1,0)) n_down \
           , max(raw_sense) max_sense, min(raw_sense) min_sense, sum(raw_sense)/(max(raw_sense)-min(raw_sense)) con_sense \
           , avg(if(raw_sense>=0,raw_sense,null)) pos_sense, avg(if(raw_sense<0,raw_sense,null)) neg_sense \
           from ff_finnews_d3 group by symbol, iex_id, model_type")
    runsql(
        "create index ff_finnews_d4_i1 on ff_finnews_d4 (symbol, iex_id, model_type)"
    )
    runsql("drop table if exists ff_finnews_d5")
    runsql(
        "create table ff_finnews_d5 as select a.symbol, a.iex_id, any_value(published) published, any_value(b.n_doc) n_doc, sum(n_up) n_up, sum(n_down) n_down \
           , sum( if(model_type='L', 1/(1/log10(ifnull(b.n_doc,0)+2)+1) * con_sense, 1/log10(ifnull(b.n_doc,0)+2)/(1/log10(ifnull(b.n_doc,0)+2)+1) * con_sense) ) con_sense \
           , if(sum(n_up)>0,sum(n_up*pos_sense)/sum(n_up),null) pos_sense, if(sum(n_down)>0,sum(n_down*neg_sense)/sum(n_down),null) neg_sense \
           , sum(if(model_type='G',n_up,0)) n_g_up, sum(if(model_type='G',n_down,0)) n_g_down, sum(if(model_type='L',n_up,0)) n_l_up, sum(if(model_type='L',n_down,0)) n_l_down \
           from ff_finnews_d4 a left join ff_finnews_b4 b on a.symbol = b.symbol group by symbol, iex_id"
    )
    runsql("drop table if exists ff_finnews_d6")
    runsql(
        "create table ff_finnews_d6 as select ifnull(b.bin,if(con_sense>max_max,99,0)) bin , a.* from ff_finnews_d5 a inner join (select max(max_con) max_max, min(min_con) min_min from ff_finnews_b7) c left join ff_finnews_b7 b on a.con_sense>b.min_con and a.con_sense<=b.max_con"
    )
    runsql("drop table if exists ff_finnews_d7")
    runsql(
        "create table ff_finnews_d7 as select a.*, date(published) published_date, (con_sense-b.min_con)/(b.max_con-b.min_con) bin_float from ff_finnews_d6 a, ff_finnews_b7 b where a.bin=b.bin"
    )
    runsql("drop table if exists ff_finnews_d8")
    runsql(
        "create table ff_finnews_d8 as select a.symbol, iex_id, published, dest_date, n_doc, n_up, n_down, con_sense, pos_sense, neg_sense, n_g_up, n_g_down, n_l_up, n_l_down, bin+bin_float bin, dense_sense, bin+bin_float-dense_sense bin_diff, con_sense-condense_sense con_sense_diff from ff_finnews_d7 a, ff_finnews_b11 b where a.symbol=b.symbol"
    )
    print("Done incremental sensing " + time.strftime('%Y-%m-%d %H:%M:%S'))
    final_sense(is_incremental=True)
Ejemplo n.º 10
0
def build_sense():
    print("Started Fame Sense building ... " +
          time.strftime('%Y-%m-%d %H:%M:%S'))
    A, IEX_ids, TV, tv = gather_sense()
    dig_docs(A, IEX_ids, TV, tv)
    profile_training()

    print("Started focus models " + time.strftime('%Y-%m-%d %H:%M:%S'))
    A = fetch_rawsql(
        "select iex_id, any_value(iex_title) title, any_value(if(length(article)>length(iex_summary),article,iex_summary)) article from ff_finnews_iex where published>=date_sub(now() , interval 1 year) and greatest(length(article),length(iex_summary))>500 group by iex_id"
    )
    print("Done fetching focus articles " + time.strftime('%Y-%m-%d %H:%M:%S'))
    documents, IEX_ids = doc_to_vec(A)
    print("Done cleaning focus articles " + time.strftime('%Y-%m-%d %H:%M:%S'))
    tv = TV.transform(documents)
    print("Done vectorizing focus articles " +
          time.strftime('%Y-%m-%d %H:%M:%S'))

    runsql("drop table if exists ff_finnews_b0")
    runsql(
        "create table ff_finnews_b0 (symbol varchar(10), iex_id varchar(30), term varchar(10), model_type varchar(4), y float, chg_term float, raw_sense float)"
    )
    for term in TERMS:
        IEX_SYMBOL = fetch_rawsql(
            "select iex_id, symbol, published, date_same, chg_" + term +
            " chg from ff_finnews_w7")
        ALL_LOCAL, OVERALL_X, OVERALL_y = focus_prep(IEX_SYMBOL, IEX_ids, tv)
        print("Done preparing focus articles for " + term + " training " +
              time.strftime('%Y-%m-%d %H:%M:%S'))

        focus_models(term, ALL_LOCAL, OVERALL_X, OVERALL_y, is_train=True)
        print("Finished local sense on term " + term + " ... " +
              time.strftime('%Y-%m-%d %H:%M:%S'))

    runsql("create index ff_finnews_b0_i1 on ff_finnews_b0 (iex_id)")
    runsql("drop table if exists ff_finnews_b1")
    runsql(
        "create table ff_finnews_b1 as select iex_id, any_value(published) published, any_value(date(published)) published_date, any_value(iex_source) iex_source, any_value(iex_title) iex_title from ff_finnews_iex where published>=date_sub(now() , interval 6 month) and greatest(length(article),length(iex_summary))>500 group by iex_id"
    )
    runsql("create index ff_finnews_b1_i1 on ff_finnews_b1 (iex_id)")
    runsql("drop table if exists ff_finnews_b2")
    runsql(
        "create table ff_finnews_b2 as select a.*, b.published, b.published_date, b.iex_source, b.iex_title from ff_finnews_b0 a, ff_finnews_b1 b where a.iex_id=b.iex_id"
    )
    runsql(
        "create index ff_finnews_b2_i1 on ff_finnews_b2 (symbol, iex_id, model_type, term)"
    )
    runsql("drop table if exists ff_finnews_b3")
    runsql(
        "create table ff_finnews_b3 as select symbol, iex_id, model_type, any_value(published) published, sum(if(raw_sense>=0,1,0)) n_up, sum(if(raw_sense<0,1,0)) n_down \
           , max(raw_sense) max_sense, min(raw_sense) min_sense, sum(raw_sense)/(max(raw_sense)-min(raw_sense)) con_sense \
           , avg(if(raw_sense>=0,raw_sense,null)) pos_sense, avg(if(raw_sense<0,raw_sense,null)) neg_sense \
           from ff_finnews_b2 group by symbol, iex_id, model_type")
    runsql(
        "create index ff_finnews_b3_i1 on ff_finnews_b3 (symbol, iex_id, model_type)"
    )
    runsql("drop table if exists ff_finnews_b4")
    runsql(
        "create table ff_finnews_b4 as select symbol, count(distinct iex_id) n_doc from ff_finnews_b3 group by symbol"
    )
    runsql("create index ff_finnews_b4_i1 on ff_finnews_b4 (symbol)")
    runsql("drop table if exists ff_finnews_b5")
    runsql(
        "create table ff_finnews_b5 as select a.symbol, a.iex_id, any_value(published) published, any_value(b.n_doc) n_doc, sum(n_up) n_up, sum(n_down) n_down \
           , sum( if(model_type='L', 1/(1/log10(b.n_doc+2)+1) * con_sense, 1/log10(b.n_doc+2)/(1/log10(b.n_doc+2)+1) * con_sense) ) con_sense \
           , if(sum(n_up)>0,sum(n_up*pos_sense)/sum(n_up),null) pos_sense, if(sum(n_down)>0,sum(n_down*neg_sense)/sum(n_down),null) neg_sense \
           , sum(if(model_type='G',n_up,0)) n_g_up, sum(if(model_type='G',n_down,0)) n_g_down, sum(if(model_type='L',n_up,0)) n_l_up, sum(if(model_type='L',n_down,0)) n_l_down \
           from ff_finnews_b3 a, ff_finnews_b4 b where a.symbol = b.symbol group by symbol, iex_id"
    )
    runsql("drop table if exists ff_finnews_b6")
    runsql(
        "create table ff_finnews_b6 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) bin, a.* from (select a.*, cnt from (select * from ff_finnews_b5) a, (select count(1) cnt from ff_finnews_b5) b order by con_sense) a, (select @rn:=0) b"
    )
    runsql("create index ff_finnews_b6_i1 on ff_finnews_b6 (bin)")
    runsql("drop table if exists ff_finnews_b7")
    runsql(
        "create table ff_finnews_b7 select a.bin, a.min_con, ifnull(b.min_con,(select max(con_sense) from ff_finnews_b6)) max_con from (select bin bin, min(con_sense) min_con from ff_finnews_b6 group by bin) a left join (select bin-1 bin, min(con_sense) min_con from ff_finnews_b6 group by bin-1) b on a.bin=b.bin"
    )
    runsql("drop table if exists ff_finnews_b8")
    runsql(
        "create table ff_finnews_b8 as select a.*, date(published) published_date, (con_sense-b.min_con)/(b.max_con-b.min_con) bin_float from ff_finnews_b6 a, ff_finnews_b7 b where a.bin=b.bin"
    )
    runsql(
        "create index ff_finnews_b8_i1 on ff_finnews_b8 (symbol, published)")
    runsql(
        "create index ff_finnews_b8_i2 on ff_finnews_b8 (symbol, published_date)"
    )
    runsql("drop table if exists ff_stock_date6m")
    runsql("create table ff_stock_date6m as select * from \
           (select adddate('1970-01-01',t4.i*10000 + t3.i*1000 + t2.i*100 + t1.i*10 + t0.i) dest_date from \
           (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t0, \
           (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t1, \
           (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t2, \
           (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t3, \
           (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t4) v \
           where dest_date between date_sub(curdate(), interval 6 month) and curdate()"
           )
    runsql("drop table if exists ff_finnews_b9")
    runsql(
        "create table ff_finnews_b9 as select distinct(symbol) symbol, dest_date from ff_finnews_b6, ff_stock_date6m"
    )
    runsql(
        "create index ff_finnews_b9_i1 on ff_finnews_b9 (symbol, dest_date)")
    runsql("drop table if exists ff_finnews_b10")
    runsql(
        "create table ff_finnews_b10 as select symbol, dest_date, sum(power(0.8,greatest(dlag,0.01))*(bin+bin_float))+0.26*50 dense_sum, sum(power(0.8,greatest(dlag,0.01)))+0.26 dense_base \
           , ifnull((sum(power(0.8,greatest(dlag,0.01))*(bin+bin_float))+0.26*50)/(sum(power(0.8,greatest(dlag,0.01)))+0.26),50) dense_sense \
           , sum(power(0.8,greatest(dlag,0.01))*con_sense) condense_sum, sum(power(0.8,greatest(dlag,0.01)))+0.26 condense_base \
           , ifnull((sum(power(0.8,greatest(dlag,0.01))*con_sense))/(sum(power(0.8,greatest(dlag,0.01)))+0.26),0) condense_sense \
           from (select a.symbol, a.dest_date, b.bin, b.bin_float, round(TIMESTAMPDIFF(HOUR,published,dest_date)/24,2) dlag, con_sense from ff_finnews_b9 a left join ff_finnews_b8 b on a.symbol=b.symbol and DATEDIFF(dest_date, b.published) between 0 and 13) a group by symbol, dest_date"
    )
    runsql(
        "create index ff_finnews_b10_i1 on ff_finnews_b10 (symbol, dest_date)")
    runsql("drop table if exists ff_finnews_b11")
    runsql(
        "create table ff_finnews_b11 as select a.*, a.dense_sense-b.dense_sense dense_diff, a.condense_sense-b.condense_sense condense_diff from \
           (select * from ff_finnews_b10 where dest_date = (select max(dest_date) from ff_finnews_b10) ) a, \
           (select * from ff_finnews_b10 where dest_date = (select max(dest_date) from ff_finnews_b10 where dest_date<(select max(dest_date) from ff_finnews_b10))) b \
           where a.symbol=b.symbol order by abs(dense_diff) desc")
    runsql("create index ff_finnews_b11_i1 on ff_finnews_b11 (symbol)")
    runsql("drop table if exists ff_finnews_b12")
    runsql(
        "create table ff_finnews_b12 as select a.symbol, iex_id, published, dest_date, n_doc, n_up, n_down, con_sense, pos_sense, neg_sense, n_g_up, n_g_down, n_l_up, n_l_down, bin+bin_float bin, dense_sense, bin+bin_float-dense_sense bin_diff, con_sense-condense_sense con_sense_diff from ff_finnews_b8 a, ff_finnews_b10 b where a.symbol=b.symbol and date_add(dest_date,interval 1 day)=published_date"
    )
    runsql(
        "create index ff_finnews_b12_i1 on ff_finnews_b12 (symbol, dest_date)")
    runsql(
        "create index ff_finnews_b12_i2 on ff_finnews_b12 (symbol, published)")
    runsql("create index ff_finnews_b12_i3 on ff_finnews_b12 (symbol, iex_id)")
    runsql("drop table if exists ff_finnews_b13")
    runsql(
        "create table ff_finnews_b13 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) dbin, a.* from (select a.*, cnt from (select * from ff_finnews_b12) a, (select count(1) cnt from ff_finnews_b12) b order by bin_diff) a, (select @rn:=0) b"
    )
    runsql("create index ff_finnews_b13_i1 on ff_finnews_b13 (dbin)")
    runsql("drop table if exists ff_finnews_b14")
    runsql(
        "create table ff_finnews_b14 select a.dbin, a.min_con, ifnull(b.min_con,(select max(bin_diff) from ff_finnews_b13)) max_con from (select dbin dbin, min(bin_diff) min_con from ff_finnews_b13 group by dbin) a left join (select dbin-1 dbin, min(bin_diff) min_con from ff_finnews_b13 group by dbin-1) b on a.dbin=b.dbin"
    )
    runsql("drop table if exists ff_finnews_b15")
    runsql(
        "create table ff_finnews_b15 as select @rn:=@rn+1 rn, dest_date from (select * from ff_stock_date6m order by dest_date desc) a, (select @rn:=0) b"
    )
    runsql("create index ff_finnews_b15_i1 on ff_finnews_b15 (dest_date)")
    runsql("drop table if exists ff_finnews_b16")
    runsql(
        "create table ff_finnews_b16 as select a.*, c.dest_date dest_ref, a.dense_sense-c.dense_sense dense_diff, a.condense_sense-c.condense_sense condense_diff from ff_finnews_b10 a, ff_finnews_b15 b, ff_finnews_b10 c, ff_finnews_b15 d where a.symbol=c.symbol and a.dest_date=b.dest_date and c.dest_date=d.dest_date and b.rn+1=d.rn"
    )
    runsql(
        "create index ff_finnews_b16_i1 on ff_finnews_b16 (symbol, dest_date)")
    runsql("drop table if exists ff_finnews_b17")
    runsql(
        "create table ff_finnews_b17 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) dcon_bin, a.* from (select a.*, cnt from (select * from ff_finnews_b16) a, (select count(1) cnt from ff_finnews_b16) b order by condense_diff) a, (select @rn:=0) b"
    )
    runsql("drop table if exists ff_finnews_b18")
    runsql(
        "create table ff_finnews_b18 select a.dcon_bin, a.min_con, ifnull(b.min_con,(select max(condense_diff) from ff_finnews_b17)) max_con from (select dcon_bin dcon_bin, min(condense_diff) min_con from ff_finnews_b17 group by dcon_bin) a left join (select dcon_bin-1 dcon_bin, min(condense_diff) min_con from ff_finnews_b17 group by dcon_bin-1) b on a.dcon_bin=b.dcon_bin"
    )
    runsql("drop table if exists ff_finnews_b19")
    runsql(
        "create table ff_finnews_b19 as select rn, symbol, dest_date, dense_sense, condense_sense, dest_ref, dense_diff, condense_diff, a.dcon_bin+(condense_diff-b.min_con)/(b.max_con-b.min_con) dcon_bin from ff_finnews_b17 a, ff_finnews_b18 b where a.dcon_bin=b.dcon_bin"
    )
    print("Done focus sensing " + time.strftime('%Y-%m-%d %H:%M:%S'))
    final_sense(is_incremental=False)
    file_cleanup()
Ejemplo n.º 11
0
def dig_docs(A, IEX_ids, TV, tv, is_incremental=False):
    if is_incremental:
        keyword_table = 'ff_finnews_c00'
        keysent_table = 'ff_finnews_c11'
    else:
        keyword_table = 'ff_finnews_c0'
        keysent_table = 'ff_finnews_c1'
    runsql("drop table if exists " + keyword_table)
    runsql(
        "create table " + keyword_table +
        " (iex_id varchar(30), key_id int, key_weight float, word varchar(100))"
    )
    runsql("drop table if exists " + keysent_table)
    runsql(
        "create table " + keysent_table +
        " (iex_id varchar(30), key_id int, key_weight float, sentence varchar(1000))"
    )

    T = TV.get_feature_names()
    WORD_BUFFER = []
    SENT_BUFFER = []
    for i, d in enumerate([a['article'] for a in A]):
        dense_vector = tv[i].toarray()[0]
        keywords = [{
            'word': T[j],
            'tfidf': dense_vector[j]
        } for j in np.argsort(dense_vector)[::-1][:N_KEYWORDS]]
        WORD_BUFFER += [(IEX_ids[i], k, w['tfidf'], w['word'])
                        for k, w in enumerate(keywords)]
        sentences = d.replace(';', '.').split('. ')
        sentence_importance = []
        for idx, s in enumerate(sentences):
            s_weight = sum(
                [k['tfidf'] for k in keywords if k['word'] in s.lower()])
            s_weight /= np.log2(max(8, len(s.split())))
            if len(s) < 1000:
                sentence_importance.append({
                    'sentence': s,
                    'weight': s_weight,
                    'idx': idx
                })
        important_sentences = sorted(sentence_importance,
                                     key=lambda k: k['weight'])
        important_sentences = important_sentences[::-1][:N_SENTENCES]
        ordered_sentences = sorted(important_sentences, key=lambda k: k['idx'])
        SENT_BUFFER += [(IEX_ids[i], k, s['weight'], s['sentence'][:1000])
                        for k, s in enumerate(ordered_sentences)]
        if len(WORD_BUFFER) >= 10000:
            runsqlmany(
                "insert into " + keyword_table +
                " (iex_id, key_id, key_weight, word) values (%s,%s,%s,%s)",
                WORD_BUFFER)
            WORD_BUFFER = []
        if len(SENT_BUFFER) >= 10000:
            runsqlmany(
                "insert into " + keysent_table +
                " (iex_id, key_id, key_weight, sentence) values (%s,%s,%s,%s)",
                SENT_BUFFER)
            SENT_BUFFER = []
    if WORD_BUFFER:
        runsqlmany(
            "insert into " + keyword_table +
            " (iex_id, key_id, key_weight, word) values (%s,%s,%s,%s)",
            WORD_BUFFER)
    if SENT_BUFFER:
        runsqlmany(
            "insert into " + keysent_table +
            " (iex_id, key_id, key_weight, sentence) values (%s,%s,%s,%s)",
            SENT_BUFFER)
    print("Done sentence weighing ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
Ejemplo n.º 12
0
def assure_required_tables():
    if table_exist("ff_stock_adj"):
        runsql("drop table ff_stock_adj")
    runsql(FF_STOCK_ADJ_DDL)
    refresh_sp500()
    if not table_exist("ff_stock_summary_hist"):
        runsql(FF_STOCK_SUMMARY_HIST_DDL)
        runsql(FF_STOCK_SUMMARY_HIST_I1)
        runsql(FF_STOCK_SUMMARY_HIST_I2)
    if not table_exist("ff_stock_summary_term"):
        runsql(FF_STOCK_SUMMARY_TERM_DDL)
        runsql(FF_STOCK_SUMMARY_TERM_I1)
Ejemplo n.º 13
0
    sys.exit()

# check data size
cur = getcur()
cur.execute("show tables like 'ff_stock_________'")
T = [t[0] for t in cur.fetchall()]
if T:
    c_adj = fetch_rawsql("select count(1) cnt from ff_stock_adj")[0]['cnt']
    c_last = fetch_rawsql("select count(1) cnt from " + T[-1])[0]['cnt']
    if c_adj < c_last * 0.5:
        print("Weak ingestion")
        sys.exit()

# swap prod table ff_stock with ff_stock_adj
if table_exist(tname):
    runsql("drop table " + tname)
runsql("create table " + tname + " as select * from ff_stock_adj")
runsql("create index " + tname + "_i1 on " + tname + " (symbol, close_date)")
runsql("create index " + tname + "_i2 on " + tname + " (close_date)")
runsql("drop table if exists ff_stock")
runsql("alter table ff_stock_adj rename to ff_stock")
runsql("create index ff_stock_i1 on ff_stock (symbol, close_date)")
runsql("create index ff_stock_i2 on ff_stock (close_date)")
print("Finished daily backup " + tname + " ... " +
      time.strftime('%Y-%m-%d %H:%M:%S'))

tname = "ff_stock_" + cdate.strftime('%Yw%W')
if table_exist(tname):
    runsql("drop table " + tname)
runsql("create table " + tname + " as select * from ff_stock")
print("Finished weekly backup " + tname + " ... " +
Ejemplo n.º 14
0
 for tw in tq['statuses']:
     tid = tw['id']
     sid = max(sid, tid)
     print("    Checking tweet #" + str(tid) + ". " +
           time.strftime('%Y-%m-%d %H:%M:%S'))
     url_list = [i['url'] for i in tw['entities']['urls']]
     # no article
     if len(url_list) == 0:
         print("      No article")
         continue
     chk = fetch_rawsql("select count(1) cnt from ff_tweets where tid = " +
                        str(tid))[0]['cnt']
     # old tweet
     if chk:
         runsql("update ff_tweets set last_updated=now(), favorites=" +
                str(tw['favorite_count']) + ", retweets=" +
                str(tw['retweet_count']) + " where tid=" + str(tid))
         print("      Old tweet")
         continue
     tweet_text = re_pattern.sub(u'\uFFFD', tw['text'])
     #chk = fetch_rawsql("select count(1) cnt from ff_tweets where tweet_text='" + tweet_text + "'")[0]['cnt']
     # same tweet text
     #if chk:
     #    print("      Duplicate")
     #    continue
     for u in url_list:
         chk = fetch_rawsql(
             "select count(1) cnt from ff_tweets where tweet_url = '" + u +
             "'")[0]['cnt']
         if chk:
             break
Ejemplo n.º 15
0
def backup_table(tb, d=3, w=2, m=2):
    if not table_exist(tb):
        print("Backup source " + tb + " does not exist")
        return False
    cdate = utc.localize(datetime.datetime.utcnow()).astimezone(eastern).date()
    if d:
        tname = tb + "_bck" + cdate.strftime('%Y%m%d')
        runsql("drop table if exists " + tname)
        runsql("create table " + tname + " as select * from " + tb)
        cur = getcur()
        cur.execute("show tables like '" + tb + "_bck________'")
        T = [t[0] for t in cur.fetchall()]
        for t in T[:-d]:
            runsql("drop table " + t)
        print("..Backup + cleanup " + tb + " daily ... " +
              time.strftime('%Y-%m-%d %H:%M:%S'))

    if w:
        tname = tb + "_bck" + cdate.strftime('%Yw%W')
        runsql("drop table if exists " + tname)
        runsql("create table " + tname + " as select * from " + tb)
        cur = getcur()
        cur.execute("show tables like '" + tb + "_bck____w__'")
        T = [t[0] for t in cur.fetchall()]
        for t in T[:-w]:
            runsql("drop table " + t)
        print("..Backup + cleanup " + tb + " weekly ... " +
              time.strftime('%Y-%m-%d %H:%M:%S'))

    if m:
        tname = tb + "_bck" + cdate.strftime('%Ym%m')
        runsql("drop table if exists " + tname)
        runsql("create table " + tname + " as select * from " + tb)
        cur = getcur()
        cur.execute("show tables like '" + tb + "_bck____m__'")
        T = [t[0] for t in cur.fetchall()]
        for t in T[:-m]:
            runsql("drop table " + t)
        print("..Backup + cleanup " + tb + " monthly ... " +
              time.strftime('%Y-%m-%d %H:%M:%S'))

    dbcommit()
    print("Successfully backed up " + tb)
    return True
Ejemplo n.º 16
0
def profile_training():
    runsql("drop table if exists ff_stock_1y")
    runsql(
        "create table ff_stock_1y as select * from ff_stock where close_date>=date_sub(now(), interval 1 year)"
    )
    runsql("create index ff_stock_1y_i1 on ff_stock_1y (symbol, close_date)")
    runsql("drop table if exists ff_finnews_w1")
    runsql(
        "create table ff_finnews_w1 as select symbol, published pt, date(published) published, iex_id, iex_source from ff_finnews_iex where greatest(length(article),length(iex_summary))>500"
    )
    runsql("drop table if exists ff_finnews_w2")
    runsql(
        "create table ff_finnews_w2 as select a.*, (select min(rn) from ff_stock_w40 where close_date<published) rn from ff_finnews_w1 a"
    )
    runsql("create index ff_finnews_w2_i1 on ff_finnews_w2 (symbol, rn)")
    runsql("drop table if exists ff_finnews_w3")
    runsql(
        "create table ff_finnews_w3 as select a.*, bp.close_date date_previous, bs.close_date date_same, b1.close_date date_1d, b2.close_date date_2d from ff_finnews_w2 a left join ff_stock_w40 bp on a.rn=bp.rn left join ff_stock_w40 bs on a.rn=bs.rn+1 left join ff_stock_w40 b1 on a.rn=b1.rn+2 left join ff_stock_w40 b2 on a.rn=b2.rn+3"
    )
    runsql("create index ff_finnews_w3_i1 on ff_finnews_w3 (published)")
    runsql("drop table if exists ff_finnews_w4")
    runsql("create table ff_finnews_w4 as select a.* \
       ,(select min(close_date) from ff_stock_w40 where close_date>=date_add(published , interval 1 week)) date_1w \
       ,(select min(close_date) from ff_stock_w40 where close_date>=date_add(published , interval 2 week)) date_2w \
       ,(select min(close_date) from ff_stock_w40 where close_date>=date_add(published , interval 1 month)) date_1m \
       ,(select min(close_date) from ff_stock_w40 where close_date>=date_add(published , interval 2 month)) date_2m \
       from ff_finnews_w3 a")
    runsql(
        "create index ff_finnews_w4_i1 on ff_finnews_w4 (symbol, date_previous)"
    )
    runsql(
        "create index ff_finnews_w4_i2 on ff_finnews_w4 (symbol, date_same)")
    runsql("create index ff_finnews_w4_i3 on ff_finnews_w4 (symbol, date_1d)")
    runsql("create index ff_finnews_w4_i4 on ff_finnews_w4 (symbol, date_2d)")
    runsql("drop table if exists ff_finnews_w5")
    runsql(
        "create table ff_finnews_w5 as select a.*, bp.close close_p, bs.close close_s, b1d.close close_1d, b2d.close close_2d from ff_finnews_w4 a \
       left join ff_stock_1y bp on a.symbol=bp.symbol and a.date_previous=bp.close_date \
       left join ff_stock_1y bs on a.symbol=bs.symbol and a.date_same=bs.close_date \
       left join ff_stock_1y b1d on a.symbol=b1d.symbol and a.date_1d=b1d.close_date \
       left join ff_stock_1y b2d on a.symbol=b2d.symbol and a.date_2d=b2d.close_date"
    )
    runsql("create index ff_finnews_w5_i1 on ff_finnews_w5 (symbol, date_1w)")
    runsql("create index ff_finnews_w5_i2 on ff_finnews_w5 (symbol, date_2w)")
    runsql("create index ff_finnews_w5_i3 on ff_finnews_w5 (symbol, date_1m)")
    runsql("create index ff_finnews_w5_i4 on ff_finnews_w5 (symbol, date_2m)")
    runsql("drop table if exists ff_finnews_w6")
    runsql(
        "create table ff_finnews_w6 as select a.*, b1w.close close_1w, b2w.close close_2w, b1m.close close_1m, b2m.close close_2m from ff_finnews_w5 a \
       left join ff_stock_1y b1w on a.symbol=b1w.symbol and a.date_1w=b1w.close_date \
       left join ff_stock_1y b2w on a.symbol=b2w.symbol and a.date_2w=b2w.close_date \
       left join ff_stock_1y b1m on a.symbol=b1m.symbol and a.date_1m=b1m.close_date \
       left join ff_stock_1y b2m on a.symbol=b2m.symbol and a.date_2m=b2m.close_date"
    )
    runsql("drop table if exists ff_finnews_w7")
    runsql(
        "create table ff_finnews_w7 as select a.*, (close_s-close_p)/close_p chg_sameday, (close_1d-close_s)/close_s chg_1d, (close_2d-close_s)/close_s chg_2d , (close_1w-close_s)/close_s chg_1w, (close_2w-close_s)/close_s chg_2w, (close_1m-close_s)/close_s chg_1m, (close_2m-close_s)/close_s chg_2m from ff_finnews_w6 a"
    )
    runsql("create index ff_finnews_w7_i1 on ff_finnews_w7 (published)")
    print("Done profiling ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
Ejemplo n.º 17
0
     #runsql("update ff_finnews_iex set last_updated=now() where symbol='"+s+"' and iex_url='"+post['url'][:100]+"'")
     continue
 # news previously fetched
 if pi:
     A = fetch_rawsql(
         "select article, article_url, article_title from ff_finnews_iex where iex_id='"
         + pi + "' and article<>''")
     if A:
         A = A[0]
         rawsql = "insert into ff_finnews_iex (symbol, published, last_updated, iex_id, iex_url, iex_source, iex_title, iex_image, iex_summary, iex_related, article_title, article_url, article)"
         rawsql += "values (%s, %s, now(), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
         val = (s, pt, pi, post['url'][:100], post['source'][:20],
                post['headline'][:255], post['image'][:100],
                post['summary'][:65000], post['related'][:255],
                A['article_title'], A['article_url'], A['article'])
         runsql(rawsql, val)
         print("  [i] Recycled entry (" + pi + "): " + post['headline'])
         continue
 # news previously failed retrieving from source
 f_refetch = False
 chk = fetch_rawsql(
     "select count(1) cnt from ff_finnews_iex where symbol='" + s +
     "' and iex_url='" + post['url'][:100] +
     "' and ( article='' or length(article)<length(iex_summary) )"
 )[0]['cnt']
 if chk:
     if F_REFETCH_IN_SCAN:
         f_refetch = True
         print("  [i] Refetching old entry", post['headline'])
     else:
         continue