def assure_required_table(): print("checking") if not table_exist("ff_scan_symbols"): print("table ff_scan_symbols not exist; creating...") runsql( "create table ff_scan_symbols (symbol varchar(10), last_updated datetime, last_scanned datetime, security varchar(100), sector varchar(100), subsec varchar(200), hq varchar(200), first_added datetime)" )
def refresh_sp500(): assure_required_table() print("fetching sp500 list...") resp = requests.get( 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies') soup = bs.BeautifulSoup(resp.text, 'html5lib') table = soup.find('table', {'class': 'wikitable sortable'}) for row in table.findAll('tr')[1:]: r = row.findAll('td') #ticker = r[1].text.rstrip() ticker = r[0].text.rstrip() #print("working on "+ticker+"...") Log.info("SP500", "working on " + ticker + "...") check = fetch_rawsql( "select symbol from ff_scan_symbols where symbol='" + ticker + "'") if len(check) > 0: rawsql = "update ff_scan_symbols set last_updated=now() WHERE symbol='" + ticker + "'" runsql(rawsql) else: rawsql = "insert into ff_scan_symbols (symbol, last_updated, security, sector, subsec, hq, first_added) values (%s, %s, %s, %s, %s, %s, %s)" val = (ticker, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), r[0].text, r[3].text, r[4].text, r[5].text, (r[6].text if r[6].text and len(r[6].text) == 10 else None)) runsql(rawsql, val) dbcommit()
def assure_required_tables(): if not table_exist("ff_tweets"): runsql(FF_TWEETS_DDL) if not table_exist("ff_scan_symbols"): refresh_sp500() r = fetch_rawsql( "select min(datediff(now(),last_updated)) d from ff_scan_symbols")[0] if r['d'] > 7: refresh_sp500()
def assure_required_tables(): if not table_exist("ff_finnews_iex"): runsql(FF_FINNEWS_IEX_DDL) runsql( "create index ff_finnews_iex_i1 on ff_finnews_iex (symbol, iex_url)" ) runsql("create index ff_finnews_iex_i2 on ff_finnews_iex (iex_id)") runsql( "create index ff_finnews_iex_i3 on ff_finnews_iex (last_updated)") runsql("create index ff_finnews_iex_i4 on ff_finnews_iex (published)")
def assure_tables(): if not table_exist("ff_status_scan"): runsql(FF_STATUS_SCAN) runsql("create index ff_status_scan_i1 on ff_status_scan (batched)") runsql("create index ff_status_scan_i2 on ff_status_scan (status)") else: a = fetch_rawsql("show table status like 'ff_status_scan'") a = a[0]['Collation'] if a != 'utf8_unicode_ci': runsql( "ALTER TABLE ff_status_scan CONVERT TO CHARACTER SET utf8 COLLATE utf8_unicode_ci" ) # maintenance related on collation correction if table_exist("ff_scan_symbols"): a = fetch_rawsql("show table status like 'ff_scan_symbols'") a = a[0]['Collation'] if a != 'utf8_unicode_ci': runsql( "ALTER TABLE ff_scan_symbols CONVERT TO CHARACTER SET utf8 COLLATE utf8_unicode_ci" )
def do_scan(): status_scan_starttime = datetime.datetime.now() assure_tables() symbols = fetch_rawsql( "select symbol, sector from ff_scan_symbols where datediff(now(),last_updated)<14" ) s = [i['symbol'] for i in symbols] r = requests.get(FF_SCAN_HOST + "/api/v1/misc/nonsp/") if r.status_code != 200: print("Failed to retrieve list of non-SP500 symbols") else: r = r.json() for i in r['symbol_list']: if i in s or i in NON_SCAN_LIST: continue symbols += [{'symbol': i, 'sector': ''}] # stop when: # 1. data not stablized, before 9:45am # 2. more than 1 hour after trading stopped # 3. none-trading days enow = utc.localize(datetime.datetime.utcnow()).astimezone(eastern) etime = enow.hour + enow.minute / 60 if enow.weekday() < 5 and etime > 9.75 and etime < 17.5: has_recent_scans = fetch_rawsql( "select count(1) cnt from ff_status_scan where timestampdiff(minute, scanned, '" + get_eastern() + "') < " + str(MINUTES_TRADING_SCANS))[0]['cnt'] if has_recent_scans: print("Already scanned status in the last " + str(MINUTES_TRADING_SCANS) + " minutes. Cooldown even if FORCE_RUN") dbclose() sys.exit() elif not FORCE_RUN: print("Not during trading hours - skipped status scan") dbclose() sys.exit() # Clear scan cache only when scan actually runs if table_exist("ff_status_scan_a1"): runsql("drop table ff_status_scan_a1") runsql(FF_STATUS_SCAN_A1) n_rotten = 0 for s0 in symbols: s = s0['symbol'] print("Scanning " + s + " ...") try: q = requests.get("https://api.iextrading.com/1.0/stock/" + s + "/quote") q = q.json() except: print("- bad IEX call for " + s + " ... skipped") continue """ print("Unexpected error:", sys.exec_info()[0]) raise """ # check required keys if not ('calculationPrice' in q and 'latestPrice' in q and 'changePercent' in q and 'marketCap' in q and 'latestUpdate' in q and 'primaryExchange' in q): print("- bad IEX return format for " + s + " ... skipped") continue if 'Arca' in q['primaryExchange']: print("- not scanning ETF " + s + " ... skipped") continue if q['marketCap'] < MIN_MARKETCAP: print("- MarketCap too low to scan, for " + s + " ... skipped") continue # double-check if IEX actually returns the latest quote updated = utc.localize( datetime.datetime.utcfromtimestamp(q['latestUpdate'] // 1000)).astimezone(eastern) print(enow, updated) diff = enow - updated dmin = (diff.days * 86400 + diff.seconds) // 60 if dmin > MINUTES_FRESH_SCANS and not FORCE_RUN: print("- rotten/older IEX return data for " + s + " ... skipped") n_rotten += 1 if n_rotten >= MAX_ROTTEN: sys.exit() continue # Upon the first IEX fresh data of today, swap out yesterday's benchmark ceil-floor tables (from get_stock_adj) # *The benchmark tables should only be updated at the time market opens if s == symbols[0]['symbol'] and q['calculationPrice'] == 'tops': if table_exist("ff_stock_cf_w00") and table_exist( "ff_stock_cf_w0"): runsql("drop table ff_stock_cf_w0") runsql( "create table ff_stock_cf_w0 as select * from ff_stock_cf_w00" ) runsql( "create index ff_stock_cf_w0_i1 on ff_stock_cf_w0 (symbol)" ) runsql("drop table ff_stock_cf_w00") # FF_scans - Volume while True: r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/vol/" + s + "/", headers=headers) if r.status_code != 429: break time.sleep(5) try: r = r.json() if r['I'] > 90 and 'high' in r['rarity']: msg = VOL_TEMPLATE[np.random.choice( len(VOL_TEMPLATE), p=[e['prob'] for e in VOL_TEMPLATE], size=1)[0]]["tweet"] msg = msg.replace("&STOCK", "$" + s) msg = msg.replace("&RARITY", r['rarity']) msg = msg.replace("&VOLUME", r['volume_human']) msg = msg.replace("&Z", "{0:.1f}".format(r['z'])) msg = msg.replace("&URL", FF_SCAN_HOST + "fb/vol/" + s + "/") msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/") #ctx = str({"v": r['volume_human'], "z": "{0:.1f}".format(r['z'])}) ctx = str(r) val = (s, q['latestPrice'], q['changePercent'], q['marketCap'], updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(), enow.strftime("%Y-%m-%d %H:%M:%S"), 'vol', r['rarity'], ctx, msg, r['I']) runsql(INSERT_SQL, val) except Exception as e: print(e) print("- error FF vol scan for " + s + " ... skipped") print(r) # FF_scans - Volatility + Fluctuation while True: r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/pfl/" + s + "/", headers=headers) if r.status_code != 429: break time.sleep(5) try: r = r.json() if r['I'] > 90 and 'large' in r['rarity']: msg = PFL_TEMPLATE[np.random.choice( len(PFL_TEMPLATE), p=[e['prob'] for e in PFL_TEMPLATE], size=1)[0]]["tweet"] msg = msg.replace("&STOCK", "$" + s) msg = msg.replace("&RARITY", r['rarity']) msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/vol/" + s + "/") msg = msg.replace("&FLUX", str(r['hl_ratio']) + "%") msg = msg.replace("&L", str(r['low'])) msg = msg.replace("&H", str(r['high'])) msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/") #ctx = str({"f": r['hl_ratio'], "l": r['low'], "h": r['high']}) ctx = str(r) val = (s, q['latestPrice'], q['changePercent'], q['marketCap'], updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(), enow.strftime("%Y-%m-%d %H:%M:%S"), 'pfl', r['rarity'], ctx, msg, r['I']) runsql(INSERT_SQL, val) except Exception as e: print(e) print("- error FF pfl scan for " + s + " ... skipped") print(r) # FF_scans - Single Day Change while True: r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/chg/" + s + "/", headers=headers) if r.status_code != 429: break time.sleep(5) try: r = r.json() if s0['sector']: val = (s, s0['sector'], q['changePercent'], q['marketCap'], updated.strftime("%Y-%m-%d %H:%M:%S"), enow.strftime("%Y-%m-%d %H:%M:%S")) runsql(INSERT_A1, val) if r['I'] > 90 and 'large' in r['rarity']: msg = CHG_TEMPLATE[np.random.choice( len(CHG_TEMPLATE), p=[e['prob'] for e in CHG_TEMPLATE], size=1)[0]]["tweet"] msg = msg.replace("&STOCK", "$" + s) msg = msg.replace("&RARITY", r['rarity']) msg = msg.replace("&CHG", str(abs(round(r['chg'] * 100, 2)))) msg = msg.replace("&MOVE", r['move']) msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/chg/" + s + "/") msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/") msg = msg.replace( "&HAVE", ("is having" if r['status'] == "live" else "had")) msg = msg.replace( "&JS", (("roar" if "extreme" in r['rarity'] else ("jump" if "very" in r['rarity'] else "gain")) if r['chg'] > 0 else ("crash" if "extreme" in r['rarity'] else ("dive" if "very" in r['rarity'] else "drop")))) #ctx = str({"c": r['chg'], "m": r['move'], "js": r['js']}) ctx = str(r) val = (s, q['latestPrice'], q['changePercent'], q['marketCap'], updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(), enow.strftime("%Y-%m-%d %H:%M:%S"), 'chg', r['rarity'], ctx, msg, r['I']) runsql(INSERT_SQL, val) except Exception as e: print(e) print("- error FF chg scan for " + s + " ... skipped") print(r) # FF_scans - Ceiling Floor Penetration while True: r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/cfp/" + s + "/", headers=headers) if r.status_code != 429: break time.sleep(5) try: r = r.json() if r['I'] > 90 and r['target'] == 'quoted': msg = CFP_TEMPLATE[np.random.choice( len(CFP_TEMPLATE), p=[e['prob'] for e in CFP_TEMPLATE], size=1)[0]]["tweet"] msg = msg.replace("&STOCK", "$" + s) msg = msg.replace("&TARGET", r['target']) msg = msg.replace("&MOVE", r['move']) msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/dme/" + s + "/") msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/") #ctx = str({"t": r['target'], "m": r['move']}) ctx = str(r) val = (s, q['latestPrice'], q['changePercent'], q['marketCap'], updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(), enow.strftime("%Y-%m-%d %H:%M:%S"), 'cfp', r['rarity'], ctx, msg, r['I']) runsql(INSERT_SQL, val) except Exception as e: print(e) print("- error FF cfp scan for " + s + " ... skipped") print(r) runsql("drop table if exists ff_status_scan_w1") runsql( "create table ff_status_scan_w1 as select a.*, @rn:=@rn+1 rn from (select batched, count(1) n_item, count(distinct symbol) n_symbol from ff_status_scan group by batched order by batched desc) a, (select @rn:=0) b" ) runsql( "create index ff_status_scan_w1_i1 on ff_status_scan_w1 (n_symbol, rn)" ) runsql("drop table if exists ff_status_scan_w2") runsql( "create table ff_status_scan_w2 as select stype, if(changePct>=0,'u','d') ch, count(1) n from ff_status_scan a, (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b where a.batched=b.max_batched and a.marketcap>" + str(MIN_MARKETCAP) + " group by stype, if(changePct>=0,'u','d')") runsql("drop table if exists ff_status_scan_w3") runsql("create table ff_status_scan_w3 as select * from ff_status_scan_a1") runsql("drop table if exists ff_status_scan_w4") runsql( "create table ff_status_scan_w4 as select max(batched) batched, sector, count(1) cnt, sum(marketcap) sm, round(sum(changePct*marketcap)) cm, round(sum(changePct*marketcap)/sum(marketcap)*100,2) marketchgPct from ff_status_scan_w3 group by sector having cnt>20 order by marketchgPct desc" ) if not table_exist("ff_status_scan_a2"): runsql( "create table ff_status_scan_a2 as select * from ff_status_scan_w4" ) runsql( "create index ff_status_scan_a2_i1 on ff_status_scan_a2 (batched)") else: runsql("insert into ff_status_scan_a2 select * from ff_status_scan_w4") runsql("drop table if exists ff_status_scan_w5") runsql( "create table ff_status_scan_w5 as select d.sector, stype, count(1) n, sum(if(changePct>=0,1,0)) n_u, sum(if(changePct<0,1,0)) n_d, any_value(d.cnt) n_sector, any_value(round(count(1)/d.cnt,4)) s_ratio from ff_status_scan a join (select batched as max_batched from ff_status_scan_w1 order by rn limit 1) b on a.batched=b.max_batched join (select symbol, sector from ff_scan_symbols where datediff(now(),last_updated)<60) c on a.symbol=c.symbol right join ff_status_scan_w4 d on c.sector=d.sector group by d.sector, a.stype order by sector, s_ratio desc" ) runsql( "create index ff_status_scan_w5_i1 on ff_status_scan_w5 (sector, s_ratio)" ) runsql("drop table if exists ff_status_scan_w6") runsql( "create table ff_status_scan_w6 as select max(batched) batched, sector, any_value(if(locate(' ',sector)>0, concat(substr(sector,1,1),substr(sector,locate(' ',sector)+1,2)), substr(sector,1,3))) sector3, count(1) cnt, sum(if(changePct>=0,1,0)) n_u, sum(if(changePct<0,1,0)) n_d, sum(marketcap) sm, round(sum(changePct*marketcap)) cm, round(sum(abs(changePct)*marketcap)) am, round(sum(changePct*marketcap)/sum(marketcap)*100,2) marketchgPct from ff_status_scan_w3 group by sector having cnt>20 order by marketchgPct desc" ) runsql("drop table if exists ff_status_scan_w7") runsql( "create table ff_status_scan_w7 as select a.security, a.subsec, a.hq, b.*, changePct*marketcap marketeff, abs(changePct*marketcap) marketabs from (select symbol, security, subsec, hq from ff_scan_symbols where datediff(now(),last_updated)<60) a, ff_status_scan_w3 b where a.symbol=b.symbol order by marketabs" ) runsql( "create index ff_status_scan_w7_i1 on ff_status_scan_w7 (sector, marketabs)" ) runsql("drop table if exists ff_status_scan_w8") runsql( "create table ff_status_scan_w8 as select b.batched_date, a.* from ff_status_scan_a2 a,(select date(batched) batched_date, max(batched) batched_time from ff_status_scan_a2 group by date(batched)) b where a.batched=b.batched_time" ) runsql( "create index ff_status_scan_w8_i1 on ff_status_scan_w8 (sector, batched_date)" ) runsql( "create index ff_status_scan_w8_i2 on ff_status_scan_w8 (batched_date)" ) cache_baking() status_scan_endtime = datetime.datetime.now() timedetail = "status_scan job is running from {0} to {1}, and total runging time is {2}".format( status_scan_starttime, status_scan_endtime, (status_scan_endtime - status_scan_starttime).seconds) print(timedetail) Log.info("status_scan.py", timedetail)
def cache_baking(): print("Caching Baking list ... " + time.strftime('%Y-%m-%d %H:%M:%S')) runsql("drop table if exists ff_status_scan_o1") rawsql = "create table ff_status_scan_o1 as select symbol, stype, infov, w_micd from \ (select a.*, td+w_mic*md w_micd, @rn:=if(@s=symbol,@rn+1,0) rn, @s:=symbol s from \ (select a.symbol, marketCap, updated, scanned, stype, rarity, context, infov, if(stype='vol',if(md>1,1,md),1) md, if(td>" + str( AGING_TIME_FACTOR ) + "," + str(AGING_TIME_FACTOR) + ",td)*rand()*" + str( AGING_RAND_SCALE ) + " td, (log10(marketCap)-9)*(infov-90)*(log10(abs(changePct*10000)+1)) w_mic from \ (select a.*, (hour(updated)*60+minute(updated))/" + str( AGING_MATURE_HOUR * 60 ) + " md, if(timestampdiff(minute,batched,utc_timestamp())<200,0,(timestampdiff(minute,batched,utc_timestamp())-200)/480) td from ff_status_scan a) a, \ (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b where a.batched=b.max_batched order by w_mic desc limit " + str( N_OVERALL_CACHE * 4) + ") a, \ (select @s:='',@rn:=0) b order by marketCap desc, w_micd desc \ ) a where rn=0 order by w_micd desc limit " + str(N_OVERALL_CACHE) runsql(rawsql) runsql("drop table if exists ff_status_scan_o2") runsql( "create table ff_status_scan_o2 as select a.*, c.w_micd from ff_status_scan a, (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b, ff_status_scan_o1 c where a.batched=b.max_batched and a.symbol=c.symbol order by w_micd desc" ) runsql("drop table if exists ff_status_scan_o3") runsql( "create table ff_status_scan_o3 as select (@rn:=if(@s=symbol,@rn,@rn+1))*1 rn, @s:=symbol symbol, quote, changePct, marketcap, stype, context, infov, w_micd from ff_status_scan_o2 a, (select @rn:=0, @s:='') b" ) S = fetch_rawsql( "select rn, symbol, max(quote) quote, max(changePct) changePct, max(marketcap) marketcap, max(w_micd) w_micd from ff_status_scan_o3 group by rn, symbol" ) if S: for s in S: r = requests.get(FF_SCAN_HOST + "/api/v1/fame66/w52/" + s['symbol'] + "/", headers=headers) if r.status_code == 200: r = r.json() if 'datasets' in r: ctx = r['datasets'][0] ctx = str(ctx) if len(ctx) <= 255: runsql( "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)", (s['rn'], s['symbol'], s['quote'], s['changePct'], s['marketcap'], 'f6w', ctx, 0, s['w_micd'])) else: print("ctx longer than 255 for fame66 support of " + s['symbol']) r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/f66/" + s['symbol'] + "/", headers=headers) if r.status_code == 200: r = r.json() if 'bits' in r: a = [i['pr'] for i in r['bits']] b = [i['c_diff'] for i in r['bits']] ctx = str({"pr": a, "c_diff": b}) if len(ctx) <= 255: runsql( "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)", (s['rn'], s['symbol'], s['quote'], s['changePct'], s['marketcap'], 'f66', ctx, 0, s['w_micd'])) else: print("ctx longer than 255 for fame66 chart of " + s['symbol']) r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/ahd/" + s['symbol'] + "/", headers=headers) if r.status_code == 200: r = r.json() if 'target' in r: if r['target'] == 'self': ctx = str(r) if len(ctx) <= 255: runsql( "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)", (s['rn'], s['symbol'], s['quote'], s['changePct'], s['marketcap'], 'ahd', ctx, r['I'], s['w_micd'])) else: print("ctx longer than 255 for Fame Ahead of " + s['symbol']) runsql("drop table if exists ff_status_scan_o4") runsql( "create table ff_status_scan_o4 as select * from ff_status_scan_o3 order by rn" ) runsql("create index ff_status_scan_o4_i1 on ff_status_scan_o4 (rn)") runsql( "create index ff_status_scan_o4_i2 on ff_status_scan_o4 (stype, infov, marketcap)" ) print("Done caching Baking list ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
def final_sense(is_incremental=False): if is_incremental: runsql("drop table if exists ff_finnews_f2") runsql( "create table ff_finnews_f2 as select a.*, b.iex_source, b.iex_title, b.iex_related, b.article_url from (select * from ff_finnews_b12 union select * from ff_finnews_d8) a, (select iex_id, any_value(iex_source) iex_source, any_value(iex_title) iex_title, any_value(iex_related) iex_related, any_value(article_url) article_url from ff_finnews_iex where published>=date_sub(now() , interval 6 month) and greatest(length(article),length(iex_summary))>500 group by iex_id) b where a.iex_id=b.iex_id" ) runsql( "create index ff_finnews_f2_i1 on ff_finnews_f2 (symbol, dest_date)" ) runsql( "create index ff_finnews_f2_i2 on ff_finnews_f2 (symbol, published)" ) runsql( "create index ff_finnews_f2_i3 on ff_finnews_f2 (symbol, iex_id)") runsql("drop table if exists ff_finnews_f3") runsql( "create table ff_finnews_f3 as select * from ff_finnews_c1 union select * from ff_finnews_c11" ) runsql("create index ff_finnews_f3_i1 on ff_finnews_f3 (iex_id)") runsql("drop table if exists ff_finnews_f4") runsql( "create table ff_finnews_f4 as select * from ff_finnews_c0 union select * from ff_finnews_c00" ) runsql("create index ff_finnews_f4_i1 on ff_finnews_f4 (iex_id)") print("Finalized Fame Sense incremental run ... " + time.strftime('%Y-%m-%d %H:%M:%S')) else: runsql("drop table if exists ff_finnews_f0") runsql("create table ff_finnews_f0 as select * from ff_finnews_b19") runsql( "create index ff_finnews_f0_i1 on ff_finnews_f0 (symbol, dest_date)" ) runsql("create index ff_finnews_f0_i2 on ff_finnews_f0 (dest_date)") runsql("drop table if exists ff_finnews_f1") runsql( "create table ff_finnews_f1 as select a.sector, ifnull(marketcap,if(@s=symbol,@m,null)) marketcap, @s:=symbol symbol, dest_date, dense_sense, condense_sense, dense_diff, condense_diff, dcon_bin, dense_diff*ifnull(marketcap,if(@s=symbol,@m,null)) denseeff, abs(dense_diff*ifnull(marketcap,if(@s=symbol,@m,null))) denseabs, @m:=ifnull(marketcap,@m) dummy_mc from (select c.sector, round(c.marketcap*cr) marketcap, d.* from ff_finnews_f0 d inner join ff_status_scan_w3 c on d.symbol=c.symbol left join (select a.symbol, a.close_date, a.close/b.close cr from ff_stock a, (select symbol, close from ff_stock where close_date=(select max(close_date) from ff_stock)) b where a.close_date>=date_sub(curdate(), interval 6 month) and a.symbol=b.symbol) a on d.symbol=a.symbol and d.dest_date=a.close_date order by symbol, dest_date ) a, (select @s:='',@m:=null) b" ) runsql( "create index ff_finnews_f1_i1 on ff_finnews_f1 (sector, dest_date, dense_diff)" ) runsql("create index ff_finnews_f1_i2 on ff_finnews_f1 (dest_date)") runsql( "create index ff_finnews_f1_i3 on ff_finnews_f1 (symbol, dest_date)" ) runsql("drop table if exists ff_finnews_f2") runsql( "create table ff_finnews_f2 as select a.*, b.iex_source, b.iex_title, b.iex_related, b.article_url from ff_finnews_b12 a, (select iex_id, any_value(iex_source) iex_source, any_value(iex_title) iex_title, any_value(iex_related) iex_related, any_value(article_url) article_url from ff_finnews_iex where published>=date_sub(now() , interval 6 month) and greatest(length(article),length(iex_summary))>500 group by iex_id) b where a.iex_id=b.iex_id" ) runsql( "create index ff_finnews_f2_i1 on ff_finnews_f2 (symbol, dest_date)" ) runsql( "create index ff_finnews_f2_i2 on ff_finnews_f2 (symbol, published)" ) runsql( "create index ff_finnews_f2_i3 on ff_finnews_f2 (symbol, iex_id)") runsql("drop table if exists ff_finnews_f3") runsql("create table ff_finnews_f3 as select * from ff_finnews_c1") runsql("create index ff_finnews_f3_i1 on ff_finnews_f3 (iex_id)") runsql("drop table if exists ff_finnews_f4") runsql("create table ff_finnews_f4 as select * from ff_finnews_c0") runsql("create index ff_finnews_f4_i1 on ff_finnews_f4 (iex_id)") runsql("drop table if exists ff_finnews_f5") runsql("create table ff_finnews_f5 as select * from ff_finnews_w7") runsql( "create index ff_finnews_f5_i1 on ff_finnews_f5 (symbol, iex_id)") print("Finalized Fame Sense training run ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
def incremental_sense(): print("Started incrementals " + time.strftime('%Y-%m-%d %H:%M:%S')) runsql("drop table if exists ff_finnews_d0") runsql( "create table ff_finnews_d0 as select iex_id, any_value(published) published, any_value(iex_title) title, any_value(if(length(article)>length(iex_summary),article,iex_summary)) article from ff_finnews_iex where published>=date_sub(now() , interval 1 week) and greatest(length(article),length(iex_summary))>500 group by iex_id" ) runsql("create index ff_finnews_d0_i1 on ff_finnews_d0 (iex_id)") runsql("drop table if exists ff_finnews_d1") runsql( "create table ff_finnews_d1 as select a.iex_id, a.title, a.article, b.iex_id dummy from ff_finnews_d0 a left join ff_finnews_b1 b on a.iex_id=b.iex_id having dummy is null" ) runsql("create index ff_finnews_d1_i1 on ff_finnews_d1 (iex_id)") A = fetch_rawsql("select iex_id, title, article from ff_finnews_d1") print("Done fetching incremental articles " + time.strftime('%Y-%m-%d %H:%M:%S')) documents, IEX_ids = doc_to_vec(A) print("Done cleaning incremental articles " + time.strftime('%Y-%m-%d %H:%M:%S')) TV = joblib.load(FILE_GLOBAL_TV_LST) tv = TV.transform(documents) T = TV.get_feature_names() print("Done vectorizing incremental articles " + time.strftime('%Y-%m-%d %H:%M:%S')) dig_docs(A, IEX_ids, TV, tv, is_incremental=True) runsql("drop table if exists ff_finnews_d2") runsql( "create table ff_finnews_d2 (symbol varchar(10), iex_id varchar(30), term varchar(10), model_type varchar(4), raw_sense float)" ) IEX_SYMBOL = fetch_rawsql( "select a.symbol, a.iex_id, a.published from ff_finnews_iex a, ff_finnews_d1 b where a.iex_id=b.iex_id" ) ALL_LOCAL, _X, _y = focus_prep(IEX_SYMBOL, IEX_ids, tv, is_train=False) print("Done preparing incremental articles for scoring " + time.strftime('%Y-%m-%d %H:%M:%S')) for term in TERMS: focus_models(term, ALL_LOCAL, [], [], is_train=False) print("Done scoring incremental articles for " + term + " : " + time.strftime('%Y-%m-%d %H:%M:%S')) runsql("create index ff_finnews_d2_i1 on ff_finnews_d2 (iex_id)") runsql("drop table if exists ff_finnews_d3") runsql( "create table ff_finnews_d3 as select a.*, published, title from ff_finnews_d2 a, ff_finnews_d0 b where a.iex_id=b.iex_id" ) runsql( "create index ff_finnews_d3_i1 on ff_finnews_d3 (symbol, iex_id, model_type, term)" ) runsql("drop table if exists ff_finnews_d4") runsql( "create table ff_finnews_d4 as select symbol, iex_id, model_type, any_value(published) published, sum(if(raw_sense>=0,1,0)) n_up, sum(if(raw_sense<0,1,0)) n_down \ , max(raw_sense) max_sense, min(raw_sense) min_sense, sum(raw_sense)/(max(raw_sense)-min(raw_sense)) con_sense \ , avg(if(raw_sense>=0,raw_sense,null)) pos_sense, avg(if(raw_sense<0,raw_sense,null)) neg_sense \ from ff_finnews_d3 group by symbol, iex_id, model_type") runsql( "create index ff_finnews_d4_i1 on ff_finnews_d4 (symbol, iex_id, model_type)" ) runsql("drop table if exists ff_finnews_d5") runsql( "create table ff_finnews_d5 as select a.symbol, a.iex_id, any_value(published) published, any_value(b.n_doc) n_doc, sum(n_up) n_up, sum(n_down) n_down \ , sum( if(model_type='L', 1/(1/log10(ifnull(b.n_doc,0)+2)+1) * con_sense, 1/log10(ifnull(b.n_doc,0)+2)/(1/log10(ifnull(b.n_doc,0)+2)+1) * con_sense) ) con_sense \ , if(sum(n_up)>0,sum(n_up*pos_sense)/sum(n_up),null) pos_sense, if(sum(n_down)>0,sum(n_down*neg_sense)/sum(n_down),null) neg_sense \ , sum(if(model_type='G',n_up,0)) n_g_up, sum(if(model_type='G',n_down,0)) n_g_down, sum(if(model_type='L',n_up,0)) n_l_up, sum(if(model_type='L',n_down,0)) n_l_down \ from ff_finnews_d4 a left join ff_finnews_b4 b on a.symbol = b.symbol group by symbol, iex_id" ) runsql("drop table if exists ff_finnews_d6") runsql( "create table ff_finnews_d6 as select ifnull(b.bin,if(con_sense>max_max,99,0)) bin , a.* from ff_finnews_d5 a inner join (select max(max_con) max_max, min(min_con) min_min from ff_finnews_b7) c left join ff_finnews_b7 b on a.con_sense>b.min_con and a.con_sense<=b.max_con" ) runsql("drop table if exists ff_finnews_d7") runsql( "create table ff_finnews_d7 as select a.*, date(published) published_date, (con_sense-b.min_con)/(b.max_con-b.min_con) bin_float from ff_finnews_d6 a, ff_finnews_b7 b where a.bin=b.bin" ) runsql("drop table if exists ff_finnews_d8") runsql( "create table ff_finnews_d8 as select a.symbol, iex_id, published, dest_date, n_doc, n_up, n_down, con_sense, pos_sense, neg_sense, n_g_up, n_g_down, n_l_up, n_l_down, bin+bin_float bin, dense_sense, bin+bin_float-dense_sense bin_diff, con_sense-condense_sense con_sense_diff from ff_finnews_d7 a, ff_finnews_b11 b where a.symbol=b.symbol" ) print("Done incremental sensing " + time.strftime('%Y-%m-%d %H:%M:%S')) final_sense(is_incremental=True)
def build_sense(): print("Started Fame Sense building ... " + time.strftime('%Y-%m-%d %H:%M:%S')) A, IEX_ids, TV, tv = gather_sense() dig_docs(A, IEX_ids, TV, tv) profile_training() print("Started focus models " + time.strftime('%Y-%m-%d %H:%M:%S')) A = fetch_rawsql( "select iex_id, any_value(iex_title) title, any_value(if(length(article)>length(iex_summary),article,iex_summary)) article from ff_finnews_iex where published>=date_sub(now() , interval 1 year) and greatest(length(article),length(iex_summary))>500 group by iex_id" ) print("Done fetching focus articles " + time.strftime('%Y-%m-%d %H:%M:%S')) documents, IEX_ids = doc_to_vec(A) print("Done cleaning focus articles " + time.strftime('%Y-%m-%d %H:%M:%S')) tv = TV.transform(documents) print("Done vectorizing focus articles " + time.strftime('%Y-%m-%d %H:%M:%S')) runsql("drop table if exists ff_finnews_b0") runsql( "create table ff_finnews_b0 (symbol varchar(10), iex_id varchar(30), term varchar(10), model_type varchar(4), y float, chg_term float, raw_sense float)" ) for term in TERMS: IEX_SYMBOL = fetch_rawsql( "select iex_id, symbol, published, date_same, chg_" + term + " chg from ff_finnews_w7") ALL_LOCAL, OVERALL_X, OVERALL_y = focus_prep(IEX_SYMBOL, IEX_ids, tv) print("Done preparing focus articles for " + term + " training " + time.strftime('%Y-%m-%d %H:%M:%S')) focus_models(term, ALL_LOCAL, OVERALL_X, OVERALL_y, is_train=True) print("Finished local sense on term " + term + " ... " + time.strftime('%Y-%m-%d %H:%M:%S')) runsql("create index ff_finnews_b0_i1 on ff_finnews_b0 (iex_id)") runsql("drop table if exists ff_finnews_b1") runsql( "create table ff_finnews_b1 as select iex_id, any_value(published) published, any_value(date(published)) published_date, any_value(iex_source) iex_source, any_value(iex_title) iex_title from ff_finnews_iex where published>=date_sub(now() , interval 6 month) and greatest(length(article),length(iex_summary))>500 group by iex_id" ) runsql("create index ff_finnews_b1_i1 on ff_finnews_b1 (iex_id)") runsql("drop table if exists ff_finnews_b2") runsql( "create table ff_finnews_b2 as select a.*, b.published, b.published_date, b.iex_source, b.iex_title from ff_finnews_b0 a, ff_finnews_b1 b where a.iex_id=b.iex_id" ) runsql( "create index ff_finnews_b2_i1 on ff_finnews_b2 (symbol, iex_id, model_type, term)" ) runsql("drop table if exists ff_finnews_b3") runsql( "create table ff_finnews_b3 as select symbol, iex_id, model_type, any_value(published) published, sum(if(raw_sense>=0,1,0)) n_up, sum(if(raw_sense<0,1,0)) n_down \ , max(raw_sense) max_sense, min(raw_sense) min_sense, sum(raw_sense)/(max(raw_sense)-min(raw_sense)) con_sense \ , avg(if(raw_sense>=0,raw_sense,null)) pos_sense, avg(if(raw_sense<0,raw_sense,null)) neg_sense \ from ff_finnews_b2 group by symbol, iex_id, model_type") runsql( "create index ff_finnews_b3_i1 on ff_finnews_b3 (symbol, iex_id, model_type)" ) runsql("drop table if exists ff_finnews_b4") runsql( "create table ff_finnews_b4 as select symbol, count(distinct iex_id) n_doc from ff_finnews_b3 group by symbol" ) runsql("create index ff_finnews_b4_i1 on ff_finnews_b4 (symbol)") runsql("drop table if exists ff_finnews_b5") runsql( "create table ff_finnews_b5 as select a.symbol, a.iex_id, any_value(published) published, any_value(b.n_doc) n_doc, sum(n_up) n_up, sum(n_down) n_down \ , sum( if(model_type='L', 1/(1/log10(b.n_doc+2)+1) * con_sense, 1/log10(b.n_doc+2)/(1/log10(b.n_doc+2)+1) * con_sense) ) con_sense \ , if(sum(n_up)>0,sum(n_up*pos_sense)/sum(n_up),null) pos_sense, if(sum(n_down)>0,sum(n_down*neg_sense)/sum(n_down),null) neg_sense \ , sum(if(model_type='G',n_up,0)) n_g_up, sum(if(model_type='G',n_down,0)) n_g_down, sum(if(model_type='L',n_up,0)) n_l_up, sum(if(model_type='L',n_down,0)) n_l_down \ from ff_finnews_b3 a, ff_finnews_b4 b where a.symbol = b.symbol group by symbol, iex_id" ) runsql("drop table if exists ff_finnews_b6") runsql( "create table ff_finnews_b6 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) bin, a.* from (select a.*, cnt from (select * from ff_finnews_b5) a, (select count(1) cnt from ff_finnews_b5) b order by con_sense) a, (select @rn:=0) b" ) runsql("create index ff_finnews_b6_i1 on ff_finnews_b6 (bin)") runsql("drop table if exists ff_finnews_b7") runsql( "create table ff_finnews_b7 select a.bin, a.min_con, ifnull(b.min_con,(select max(con_sense) from ff_finnews_b6)) max_con from (select bin bin, min(con_sense) min_con from ff_finnews_b6 group by bin) a left join (select bin-1 bin, min(con_sense) min_con from ff_finnews_b6 group by bin-1) b on a.bin=b.bin" ) runsql("drop table if exists ff_finnews_b8") runsql( "create table ff_finnews_b8 as select a.*, date(published) published_date, (con_sense-b.min_con)/(b.max_con-b.min_con) bin_float from ff_finnews_b6 a, ff_finnews_b7 b where a.bin=b.bin" ) runsql( "create index ff_finnews_b8_i1 on ff_finnews_b8 (symbol, published)") runsql( "create index ff_finnews_b8_i2 on ff_finnews_b8 (symbol, published_date)" ) runsql("drop table if exists ff_stock_date6m") runsql("create table ff_stock_date6m as select * from \ (select adddate('1970-01-01',t4.i*10000 + t3.i*1000 + t2.i*100 + t1.i*10 + t0.i) dest_date from \ (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t0, \ (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t1, \ (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t2, \ (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t3, \ (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t4) v \ where dest_date between date_sub(curdate(), interval 6 month) and curdate()" ) runsql("drop table if exists ff_finnews_b9") runsql( "create table ff_finnews_b9 as select distinct(symbol) symbol, dest_date from ff_finnews_b6, ff_stock_date6m" ) runsql( "create index ff_finnews_b9_i1 on ff_finnews_b9 (symbol, dest_date)") runsql("drop table if exists ff_finnews_b10") runsql( "create table ff_finnews_b10 as select symbol, dest_date, sum(power(0.8,greatest(dlag,0.01))*(bin+bin_float))+0.26*50 dense_sum, sum(power(0.8,greatest(dlag,0.01)))+0.26 dense_base \ , ifnull((sum(power(0.8,greatest(dlag,0.01))*(bin+bin_float))+0.26*50)/(sum(power(0.8,greatest(dlag,0.01)))+0.26),50) dense_sense \ , sum(power(0.8,greatest(dlag,0.01))*con_sense) condense_sum, sum(power(0.8,greatest(dlag,0.01)))+0.26 condense_base \ , ifnull((sum(power(0.8,greatest(dlag,0.01))*con_sense))/(sum(power(0.8,greatest(dlag,0.01)))+0.26),0) condense_sense \ from (select a.symbol, a.dest_date, b.bin, b.bin_float, round(TIMESTAMPDIFF(HOUR,published,dest_date)/24,2) dlag, con_sense from ff_finnews_b9 a left join ff_finnews_b8 b on a.symbol=b.symbol and DATEDIFF(dest_date, b.published) between 0 and 13) a group by symbol, dest_date" ) runsql( "create index ff_finnews_b10_i1 on ff_finnews_b10 (symbol, dest_date)") runsql("drop table if exists ff_finnews_b11") runsql( "create table ff_finnews_b11 as select a.*, a.dense_sense-b.dense_sense dense_diff, a.condense_sense-b.condense_sense condense_diff from \ (select * from ff_finnews_b10 where dest_date = (select max(dest_date) from ff_finnews_b10) ) a, \ (select * from ff_finnews_b10 where dest_date = (select max(dest_date) from ff_finnews_b10 where dest_date<(select max(dest_date) from ff_finnews_b10))) b \ where a.symbol=b.symbol order by abs(dense_diff) desc") runsql("create index ff_finnews_b11_i1 on ff_finnews_b11 (symbol)") runsql("drop table if exists ff_finnews_b12") runsql( "create table ff_finnews_b12 as select a.symbol, iex_id, published, dest_date, n_doc, n_up, n_down, con_sense, pos_sense, neg_sense, n_g_up, n_g_down, n_l_up, n_l_down, bin+bin_float bin, dense_sense, bin+bin_float-dense_sense bin_diff, con_sense-condense_sense con_sense_diff from ff_finnews_b8 a, ff_finnews_b10 b where a.symbol=b.symbol and date_add(dest_date,interval 1 day)=published_date" ) runsql( "create index ff_finnews_b12_i1 on ff_finnews_b12 (symbol, dest_date)") runsql( "create index ff_finnews_b12_i2 on ff_finnews_b12 (symbol, published)") runsql("create index ff_finnews_b12_i3 on ff_finnews_b12 (symbol, iex_id)") runsql("drop table if exists ff_finnews_b13") runsql( "create table ff_finnews_b13 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) dbin, a.* from (select a.*, cnt from (select * from ff_finnews_b12) a, (select count(1) cnt from ff_finnews_b12) b order by bin_diff) a, (select @rn:=0) b" ) runsql("create index ff_finnews_b13_i1 on ff_finnews_b13 (dbin)") runsql("drop table if exists ff_finnews_b14") runsql( "create table ff_finnews_b14 select a.dbin, a.min_con, ifnull(b.min_con,(select max(bin_diff) from ff_finnews_b13)) max_con from (select dbin dbin, min(bin_diff) min_con from ff_finnews_b13 group by dbin) a left join (select dbin-1 dbin, min(bin_diff) min_con from ff_finnews_b13 group by dbin-1) b on a.dbin=b.dbin" ) runsql("drop table if exists ff_finnews_b15") runsql( "create table ff_finnews_b15 as select @rn:=@rn+1 rn, dest_date from (select * from ff_stock_date6m order by dest_date desc) a, (select @rn:=0) b" ) runsql("create index ff_finnews_b15_i1 on ff_finnews_b15 (dest_date)") runsql("drop table if exists ff_finnews_b16") runsql( "create table ff_finnews_b16 as select a.*, c.dest_date dest_ref, a.dense_sense-c.dense_sense dense_diff, a.condense_sense-c.condense_sense condense_diff from ff_finnews_b10 a, ff_finnews_b15 b, ff_finnews_b10 c, ff_finnews_b15 d where a.symbol=c.symbol and a.dest_date=b.dest_date and c.dest_date=d.dest_date and b.rn+1=d.rn" ) runsql( "create index ff_finnews_b16_i1 on ff_finnews_b16 (symbol, dest_date)") runsql("drop table if exists ff_finnews_b17") runsql( "create table ff_finnews_b17 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) dcon_bin, a.* from (select a.*, cnt from (select * from ff_finnews_b16) a, (select count(1) cnt from ff_finnews_b16) b order by condense_diff) a, (select @rn:=0) b" ) runsql("drop table if exists ff_finnews_b18") runsql( "create table ff_finnews_b18 select a.dcon_bin, a.min_con, ifnull(b.min_con,(select max(condense_diff) from ff_finnews_b17)) max_con from (select dcon_bin dcon_bin, min(condense_diff) min_con from ff_finnews_b17 group by dcon_bin) a left join (select dcon_bin-1 dcon_bin, min(condense_diff) min_con from ff_finnews_b17 group by dcon_bin-1) b on a.dcon_bin=b.dcon_bin" ) runsql("drop table if exists ff_finnews_b19") runsql( "create table ff_finnews_b19 as select rn, symbol, dest_date, dense_sense, condense_sense, dest_ref, dense_diff, condense_diff, a.dcon_bin+(condense_diff-b.min_con)/(b.max_con-b.min_con) dcon_bin from ff_finnews_b17 a, ff_finnews_b18 b where a.dcon_bin=b.dcon_bin" ) print("Done focus sensing " + time.strftime('%Y-%m-%d %H:%M:%S')) final_sense(is_incremental=False) file_cleanup()
def dig_docs(A, IEX_ids, TV, tv, is_incremental=False): if is_incremental: keyword_table = 'ff_finnews_c00' keysent_table = 'ff_finnews_c11' else: keyword_table = 'ff_finnews_c0' keysent_table = 'ff_finnews_c1' runsql("drop table if exists " + keyword_table) runsql( "create table " + keyword_table + " (iex_id varchar(30), key_id int, key_weight float, word varchar(100))" ) runsql("drop table if exists " + keysent_table) runsql( "create table " + keysent_table + " (iex_id varchar(30), key_id int, key_weight float, sentence varchar(1000))" ) T = TV.get_feature_names() WORD_BUFFER = [] SENT_BUFFER = [] for i, d in enumerate([a['article'] for a in A]): dense_vector = tv[i].toarray()[0] keywords = [{ 'word': T[j], 'tfidf': dense_vector[j] } for j in np.argsort(dense_vector)[::-1][:N_KEYWORDS]] WORD_BUFFER += [(IEX_ids[i], k, w['tfidf'], w['word']) for k, w in enumerate(keywords)] sentences = d.replace(';', '.').split('. ') sentence_importance = [] for idx, s in enumerate(sentences): s_weight = sum( [k['tfidf'] for k in keywords if k['word'] in s.lower()]) s_weight /= np.log2(max(8, len(s.split()))) if len(s) < 1000: sentence_importance.append({ 'sentence': s, 'weight': s_weight, 'idx': idx }) important_sentences = sorted(sentence_importance, key=lambda k: k['weight']) important_sentences = important_sentences[::-1][:N_SENTENCES] ordered_sentences = sorted(important_sentences, key=lambda k: k['idx']) SENT_BUFFER += [(IEX_ids[i], k, s['weight'], s['sentence'][:1000]) for k, s in enumerate(ordered_sentences)] if len(WORD_BUFFER) >= 10000: runsqlmany( "insert into " + keyword_table + " (iex_id, key_id, key_weight, word) values (%s,%s,%s,%s)", WORD_BUFFER) WORD_BUFFER = [] if len(SENT_BUFFER) >= 10000: runsqlmany( "insert into " + keysent_table + " (iex_id, key_id, key_weight, sentence) values (%s,%s,%s,%s)", SENT_BUFFER) SENT_BUFFER = [] if WORD_BUFFER: runsqlmany( "insert into " + keyword_table + " (iex_id, key_id, key_weight, word) values (%s,%s,%s,%s)", WORD_BUFFER) if SENT_BUFFER: runsqlmany( "insert into " + keysent_table + " (iex_id, key_id, key_weight, sentence) values (%s,%s,%s,%s)", SENT_BUFFER) print("Done sentence weighing ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
def assure_required_tables(): if table_exist("ff_stock_adj"): runsql("drop table ff_stock_adj") runsql(FF_STOCK_ADJ_DDL) refresh_sp500() if not table_exist("ff_stock_summary_hist"): runsql(FF_STOCK_SUMMARY_HIST_DDL) runsql(FF_STOCK_SUMMARY_HIST_I1) runsql(FF_STOCK_SUMMARY_HIST_I2) if not table_exist("ff_stock_summary_term"): runsql(FF_STOCK_SUMMARY_TERM_DDL) runsql(FF_STOCK_SUMMARY_TERM_I1)
sys.exit() # check data size cur = getcur() cur.execute("show tables like 'ff_stock_________'") T = [t[0] for t in cur.fetchall()] if T: c_adj = fetch_rawsql("select count(1) cnt from ff_stock_adj")[0]['cnt'] c_last = fetch_rawsql("select count(1) cnt from " + T[-1])[0]['cnt'] if c_adj < c_last * 0.5: print("Weak ingestion") sys.exit() # swap prod table ff_stock with ff_stock_adj if table_exist(tname): runsql("drop table " + tname) runsql("create table " + tname + " as select * from ff_stock_adj") runsql("create index " + tname + "_i1 on " + tname + " (symbol, close_date)") runsql("create index " + tname + "_i2 on " + tname + " (close_date)") runsql("drop table if exists ff_stock") runsql("alter table ff_stock_adj rename to ff_stock") runsql("create index ff_stock_i1 on ff_stock (symbol, close_date)") runsql("create index ff_stock_i2 on ff_stock (close_date)") print("Finished daily backup " + tname + " ... " + time.strftime('%Y-%m-%d %H:%M:%S')) tname = "ff_stock_" + cdate.strftime('%Yw%W') if table_exist(tname): runsql("drop table " + tname) runsql("create table " + tname + " as select * from ff_stock") print("Finished weekly backup " + tname + " ... " +
for tw in tq['statuses']: tid = tw['id'] sid = max(sid, tid) print(" Checking tweet #" + str(tid) + ". " + time.strftime('%Y-%m-%d %H:%M:%S')) url_list = [i['url'] for i in tw['entities']['urls']] # no article if len(url_list) == 0: print(" No article") continue chk = fetch_rawsql("select count(1) cnt from ff_tweets where tid = " + str(tid))[0]['cnt'] # old tweet if chk: runsql("update ff_tweets set last_updated=now(), favorites=" + str(tw['favorite_count']) + ", retweets=" + str(tw['retweet_count']) + " where tid=" + str(tid)) print(" Old tweet") continue tweet_text = re_pattern.sub(u'\uFFFD', tw['text']) #chk = fetch_rawsql("select count(1) cnt from ff_tweets where tweet_text='" + tweet_text + "'")[0]['cnt'] # same tweet text #if chk: # print(" Duplicate") # continue for u in url_list: chk = fetch_rawsql( "select count(1) cnt from ff_tweets where tweet_url = '" + u + "'")[0]['cnt'] if chk: break
def backup_table(tb, d=3, w=2, m=2): if not table_exist(tb): print("Backup source " + tb + " does not exist") return False cdate = utc.localize(datetime.datetime.utcnow()).astimezone(eastern).date() if d: tname = tb + "_bck" + cdate.strftime('%Y%m%d') runsql("drop table if exists " + tname) runsql("create table " + tname + " as select * from " + tb) cur = getcur() cur.execute("show tables like '" + tb + "_bck________'") T = [t[0] for t in cur.fetchall()] for t in T[:-d]: runsql("drop table " + t) print("..Backup + cleanup " + tb + " daily ... " + time.strftime('%Y-%m-%d %H:%M:%S')) if w: tname = tb + "_bck" + cdate.strftime('%Yw%W') runsql("drop table if exists " + tname) runsql("create table " + tname + " as select * from " + tb) cur = getcur() cur.execute("show tables like '" + tb + "_bck____w__'") T = [t[0] for t in cur.fetchall()] for t in T[:-w]: runsql("drop table " + t) print("..Backup + cleanup " + tb + " weekly ... " + time.strftime('%Y-%m-%d %H:%M:%S')) if m: tname = tb + "_bck" + cdate.strftime('%Ym%m') runsql("drop table if exists " + tname) runsql("create table " + tname + " as select * from " + tb) cur = getcur() cur.execute("show tables like '" + tb + "_bck____m__'") T = [t[0] for t in cur.fetchall()] for t in T[:-m]: runsql("drop table " + t) print("..Backup + cleanup " + tb + " monthly ... " + time.strftime('%Y-%m-%d %H:%M:%S')) dbcommit() print("Successfully backed up " + tb) return True
def profile_training(): runsql("drop table if exists ff_stock_1y") runsql( "create table ff_stock_1y as select * from ff_stock where close_date>=date_sub(now(), interval 1 year)" ) runsql("create index ff_stock_1y_i1 on ff_stock_1y (symbol, close_date)") runsql("drop table if exists ff_finnews_w1") runsql( "create table ff_finnews_w1 as select symbol, published pt, date(published) published, iex_id, iex_source from ff_finnews_iex where greatest(length(article),length(iex_summary))>500" ) runsql("drop table if exists ff_finnews_w2") runsql( "create table ff_finnews_w2 as select a.*, (select min(rn) from ff_stock_w40 where close_date<published) rn from ff_finnews_w1 a" ) runsql("create index ff_finnews_w2_i1 on ff_finnews_w2 (symbol, rn)") runsql("drop table if exists ff_finnews_w3") runsql( "create table ff_finnews_w3 as select a.*, bp.close_date date_previous, bs.close_date date_same, b1.close_date date_1d, b2.close_date date_2d from ff_finnews_w2 a left join ff_stock_w40 bp on a.rn=bp.rn left join ff_stock_w40 bs on a.rn=bs.rn+1 left join ff_stock_w40 b1 on a.rn=b1.rn+2 left join ff_stock_w40 b2 on a.rn=b2.rn+3" ) runsql("create index ff_finnews_w3_i1 on ff_finnews_w3 (published)") runsql("drop table if exists ff_finnews_w4") runsql("create table ff_finnews_w4 as select a.* \ ,(select min(close_date) from ff_stock_w40 where close_date>=date_add(published , interval 1 week)) date_1w \ ,(select min(close_date) from ff_stock_w40 where close_date>=date_add(published , interval 2 week)) date_2w \ ,(select min(close_date) from ff_stock_w40 where close_date>=date_add(published , interval 1 month)) date_1m \ ,(select min(close_date) from ff_stock_w40 where close_date>=date_add(published , interval 2 month)) date_2m \ from ff_finnews_w3 a") runsql( "create index ff_finnews_w4_i1 on ff_finnews_w4 (symbol, date_previous)" ) runsql( "create index ff_finnews_w4_i2 on ff_finnews_w4 (symbol, date_same)") runsql("create index ff_finnews_w4_i3 on ff_finnews_w4 (symbol, date_1d)") runsql("create index ff_finnews_w4_i4 on ff_finnews_w4 (symbol, date_2d)") runsql("drop table if exists ff_finnews_w5") runsql( "create table ff_finnews_w5 as select a.*, bp.close close_p, bs.close close_s, b1d.close close_1d, b2d.close close_2d from ff_finnews_w4 a \ left join ff_stock_1y bp on a.symbol=bp.symbol and a.date_previous=bp.close_date \ left join ff_stock_1y bs on a.symbol=bs.symbol and a.date_same=bs.close_date \ left join ff_stock_1y b1d on a.symbol=b1d.symbol and a.date_1d=b1d.close_date \ left join ff_stock_1y b2d on a.symbol=b2d.symbol and a.date_2d=b2d.close_date" ) runsql("create index ff_finnews_w5_i1 on ff_finnews_w5 (symbol, date_1w)") runsql("create index ff_finnews_w5_i2 on ff_finnews_w5 (symbol, date_2w)") runsql("create index ff_finnews_w5_i3 on ff_finnews_w5 (symbol, date_1m)") runsql("create index ff_finnews_w5_i4 on ff_finnews_w5 (symbol, date_2m)") runsql("drop table if exists ff_finnews_w6") runsql( "create table ff_finnews_w6 as select a.*, b1w.close close_1w, b2w.close close_2w, b1m.close close_1m, b2m.close close_2m from ff_finnews_w5 a \ left join ff_stock_1y b1w on a.symbol=b1w.symbol and a.date_1w=b1w.close_date \ left join ff_stock_1y b2w on a.symbol=b2w.symbol and a.date_2w=b2w.close_date \ left join ff_stock_1y b1m on a.symbol=b1m.symbol and a.date_1m=b1m.close_date \ left join ff_stock_1y b2m on a.symbol=b2m.symbol and a.date_2m=b2m.close_date" ) runsql("drop table if exists ff_finnews_w7") runsql( "create table ff_finnews_w7 as select a.*, (close_s-close_p)/close_p chg_sameday, (close_1d-close_s)/close_s chg_1d, (close_2d-close_s)/close_s chg_2d , (close_1w-close_s)/close_s chg_1w, (close_2w-close_s)/close_s chg_2w, (close_1m-close_s)/close_s chg_1m, (close_2m-close_s)/close_s chg_2m from ff_finnews_w6 a" ) runsql("create index ff_finnews_w7_i1 on ff_finnews_w7 (published)") print("Done profiling ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
#runsql("update ff_finnews_iex set last_updated=now() where symbol='"+s+"' and iex_url='"+post['url'][:100]+"'") continue # news previously fetched if pi: A = fetch_rawsql( "select article, article_url, article_title from ff_finnews_iex where iex_id='" + pi + "' and article<>''") if A: A = A[0] rawsql = "insert into ff_finnews_iex (symbol, published, last_updated, iex_id, iex_url, iex_source, iex_title, iex_image, iex_summary, iex_related, article_title, article_url, article)" rawsql += "values (%s, %s, now(), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" val = (s, pt, pi, post['url'][:100], post['source'][:20], post['headline'][:255], post['image'][:100], post['summary'][:65000], post['related'][:255], A['article_title'], A['article_url'], A['article']) runsql(rawsql, val) print(" [i] Recycled entry (" + pi + "): " + post['headline']) continue # news previously failed retrieving from source f_refetch = False chk = fetch_rawsql( "select count(1) cnt from ff_finnews_iex where symbol='" + s + "' and iex_url='" + post['url'][:100] + "' and ( article='' or length(article)<length(iex_summary) )" )[0]['cnt'] if chk: if F_REFETCH_IN_SCAN: f_refetch = True print(" [i] Refetching old entry", post['headline']) else: continue