def gather_sense(): A = fetch_rawsql( "select iex_id, any_value(iex_title) title, any_value(if(length(article)>length(iex_summary),article,iex_summary)) article from ff_finnews_iex where published>=date_sub(now() , interval 6 month) and greatest(length(article),length(iex_summary))>500 group by iex_id" ) print("Done fetching finnews articles ... " + time.strftime('%Y-%m-%d %H:%M:%S')) documents, IEX_ids = doc_to_vec(A) print("Done cleaning finnews articles ... " + time.strftime('%Y-%m-%d %H:%M:%S')) if RETRAIN_GLOBAL_TV: TV = TfidfVectorizer(tokenizer=LemmaTokenizer(), max_df=0.3, min_df=2, max_features=1000, stop_words='english') tv = TV.fit_transform(documents) assure_folder(FILE_GLOBAL_TV) joblib.dump(TV, FILE_GLOBAL_TV) joblib.dump(TV, FILE_GLOBAL_TV_LST) else: #TV = joblib.load(FILE_GLOBAL_TV) TV = joblib.load(FILE_GLOBAL_TV_LST) tv = TV.transform(documents) print("Done corpus vectorizing ... " + time.strftime('%Y-%m-%d %H:%M:%S')) return A, IEX_ids, TV, tv
def refresh_sp500(): assure_required_table() print("fetching sp500 list...") resp = requests.get( 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies') soup = bs.BeautifulSoup(resp.text, 'html5lib') table = soup.find('table', {'class': 'wikitable sortable'}) for row in table.findAll('tr')[1:]: r = row.findAll('td') #ticker = r[1].text.rstrip() ticker = r[0].text.rstrip() #print("working on "+ticker+"...") Log.info("SP500", "working on " + ticker + "...") check = fetch_rawsql( "select symbol from ff_scan_symbols where symbol='" + ticker + "'") if len(check) > 0: rawsql = "update ff_scan_symbols set last_updated=now() WHERE symbol='" + ticker + "'" runsql(rawsql) else: rawsql = "insert into ff_scan_symbols (symbol, last_updated, security, sector, subsec, hq, first_added) values (%s, %s, %s, %s, %s, %s, %s)" val = (ticker, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), r[0].text, r[3].text, r[4].text, r[5].text, (r[6].text if r[6].text and len(r[6].text) == 10 else None)) runsql(rawsql, val) dbcommit()
def assure_required_tables(): if not table_exist("ff_tweets"): runsql(FF_TWEETS_DDL) if not table_exist("ff_scan_symbols"): refresh_sp500() r = fetch_rawsql( "select min(datediff(now(),last_updated)) d from ff_scan_symbols")[0] if r['d'] > 7: refresh_sp500()
def assure_tables(): if not table_exist("ff_status_scan"): runsql(FF_STATUS_SCAN) runsql("create index ff_status_scan_i1 on ff_status_scan (batched)") runsql("create index ff_status_scan_i2 on ff_status_scan (status)") else: a = fetch_rawsql("show table status like 'ff_status_scan'") a = a[0]['Collation'] if a != 'utf8_unicode_ci': runsql( "ALTER TABLE ff_status_scan CONVERT TO CHARACTER SET utf8 COLLATE utf8_unicode_ci" ) # maintenance related on collation correction if table_exist("ff_scan_symbols"): a = fetch_rawsql("show table status like 'ff_scan_symbols'") a = a[0]['Collation'] if a != 'utf8_unicode_ci': runsql( "ALTER TABLE ff_scan_symbols CONVERT TO CHARACTER SET utf8 COLLATE utf8_unicode_ci" )
def do_scan(): status_scan_starttime = datetime.datetime.now() assure_tables() symbols = fetch_rawsql( "select symbol, sector from ff_scan_symbols where datediff(now(),last_updated)<14" ) s = [i['symbol'] for i in symbols] r = requests.get(FF_SCAN_HOST + "/api/v1/misc/nonsp/") if r.status_code != 200: print("Failed to retrieve list of non-SP500 symbols") else: r = r.json() for i in r['symbol_list']: if i in s or i in NON_SCAN_LIST: continue symbols += [{'symbol': i, 'sector': ''}] # stop when: # 1. data not stablized, before 9:45am # 2. more than 1 hour after trading stopped # 3. none-trading days enow = utc.localize(datetime.datetime.utcnow()).astimezone(eastern) etime = enow.hour + enow.minute / 60 if enow.weekday() < 5 and etime > 9.75 and etime < 17.5: has_recent_scans = fetch_rawsql( "select count(1) cnt from ff_status_scan where timestampdiff(minute, scanned, '" + get_eastern() + "') < " + str(MINUTES_TRADING_SCANS))[0]['cnt'] if has_recent_scans: print("Already scanned status in the last " + str(MINUTES_TRADING_SCANS) + " minutes. Cooldown even if FORCE_RUN") dbclose() sys.exit() elif not FORCE_RUN: print("Not during trading hours - skipped status scan") dbclose() sys.exit() # Clear scan cache only when scan actually runs if table_exist("ff_status_scan_a1"): runsql("drop table ff_status_scan_a1") runsql(FF_STATUS_SCAN_A1) n_rotten = 0 for s0 in symbols: s = s0['symbol'] print("Scanning " + s + " ...") try: q = requests.get("https://api.iextrading.com/1.0/stock/" + s + "/quote") q = q.json() except: print("- bad IEX call for " + s + " ... skipped") continue """ print("Unexpected error:", sys.exec_info()[0]) raise """ # check required keys if not ('calculationPrice' in q and 'latestPrice' in q and 'changePercent' in q and 'marketCap' in q and 'latestUpdate' in q and 'primaryExchange' in q): print("- bad IEX return format for " + s + " ... skipped") continue if 'Arca' in q['primaryExchange']: print("- not scanning ETF " + s + " ... skipped") continue if q['marketCap'] < MIN_MARKETCAP: print("- MarketCap too low to scan, for " + s + " ... skipped") continue # double-check if IEX actually returns the latest quote updated = utc.localize( datetime.datetime.utcfromtimestamp(q['latestUpdate'] // 1000)).astimezone(eastern) print(enow, updated) diff = enow - updated dmin = (diff.days * 86400 + diff.seconds) // 60 if dmin > MINUTES_FRESH_SCANS and not FORCE_RUN: print("- rotten/older IEX return data for " + s + " ... skipped") n_rotten += 1 if n_rotten >= MAX_ROTTEN: sys.exit() continue # Upon the first IEX fresh data of today, swap out yesterday's benchmark ceil-floor tables (from get_stock_adj) # *The benchmark tables should only be updated at the time market opens if s == symbols[0]['symbol'] and q['calculationPrice'] == 'tops': if table_exist("ff_stock_cf_w00") and table_exist( "ff_stock_cf_w0"): runsql("drop table ff_stock_cf_w0") runsql( "create table ff_stock_cf_w0 as select * from ff_stock_cf_w00" ) runsql( "create index ff_stock_cf_w0_i1 on ff_stock_cf_w0 (symbol)" ) runsql("drop table ff_stock_cf_w00") # FF_scans - Volume while True: r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/vol/" + s + "/", headers=headers) if r.status_code != 429: break time.sleep(5) try: r = r.json() if r['I'] > 90 and 'high' in r['rarity']: msg = VOL_TEMPLATE[np.random.choice( len(VOL_TEMPLATE), p=[e['prob'] for e in VOL_TEMPLATE], size=1)[0]]["tweet"] msg = msg.replace("&STOCK", "$" + s) msg = msg.replace("&RARITY", r['rarity']) msg = msg.replace("&VOLUME", r['volume_human']) msg = msg.replace("&Z", "{0:.1f}".format(r['z'])) msg = msg.replace("&URL", FF_SCAN_HOST + "fb/vol/" + s + "/") msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/") #ctx = str({"v": r['volume_human'], "z": "{0:.1f}".format(r['z'])}) ctx = str(r) val = (s, q['latestPrice'], q['changePercent'], q['marketCap'], updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(), enow.strftime("%Y-%m-%d %H:%M:%S"), 'vol', r['rarity'], ctx, msg, r['I']) runsql(INSERT_SQL, val) except Exception as e: print(e) print("- error FF vol scan for " + s + " ... skipped") print(r) # FF_scans - Volatility + Fluctuation while True: r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/pfl/" + s + "/", headers=headers) if r.status_code != 429: break time.sleep(5) try: r = r.json() if r['I'] > 90 and 'large' in r['rarity']: msg = PFL_TEMPLATE[np.random.choice( len(PFL_TEMPLATE), p=[e['prob'] for e in PFL_TEMPLATE], size=1)[0]]["tweet"] msg = msg.replace("&STOCK", "$" + s) msg = msg.replace("&RARITY", r['rarity']) msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/vol/" + s + "/") msg = msg.replace("&FLUX", str(r['hl_ratio']) + "%") msg = msg.replace("&L", str(r['low'])) msg = msg.replace("&H", str(r['high'])) msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/") #ctx = str({"f": r['hl_ratio'], "l": r['low'], "h": r['high']}) ctx = str(r) val = (s, q['latestPrice'], q['changePercent'], q['marketCap'], updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(), enow.strftime("%Y-%m-%d %H:%M:%S"), 'pfl', r['rarity'], ctx, msg, r['I']) runsql(INSERT_SQL, val) except Exception as e: print(e) print("- error FF pfl scan for " + s + " ... skipped") print(r) # FF_scans - Single Day Change while True: r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/chg/" + s + "/", headers=headers) if r.status_code != 429: break time.sleep(5) try: r = r.json() if s0['sector']: val = (s, s0['sector'], q['changePercent'], q['marketCap'], updated.strftime("%Y-%m-%d %H:%M:%S"), enow.strftime("%Y-%m-%d %H:%M:%S")) runsql(INSERT_A1, val) if r['I'] > 90 and 'large' in r['rarity']: msg = CHG_TEMPLATE[np.random.choice( len(CHG_TEMPLATE), p=[e['prob'] for e in CHG_TEMPLATE], size=1)[0]]["tweet"] msg = msg.replace("&STOCK", "$" + s) msg = msg.replace("&RARITY", r['rarity']) msg = msg.replace("&CHG", str(abs(round(r['chg'] * 100, 2)))) msg = msg.replace("&MOVE", r['move']) msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/chg/" + s + "/") msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/") msg = msg.replace( "&HAVE", ("is having" if r['status'] == "live" else "had")) msg = msg.replace( "&JS", (("roar" if "extreme" in r['rarity'] else ("jump" if "very" in r['rarity'] else "gain")) if r['chg'] > 0 else ("crash" if "extreme" in r['rarity'] else ("dive" if "very" in r['rarity'] else "drop")))) #ctx = str({"c": r['chg'], "m": r['move'], "js": r['js']}) ctx = str(r) val = (s, q['latestPrice'], q['changePercent'], q['marketCap'], updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(), enow.strftime("%Y-%m-%d %H:%M:%S"), 'chg', r['rarity'], ctx, msg, r['I']) runsql(INSERT_SQL, val) except Exception as e: print(e) print("- error FF chg scan for " + s + " ... skipped") print(r) # FF_scans - Ceiling Floor Penetration while True: r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/cfp/" + s + "/", headers=headers) if r.status_code != 429: break time.sleep(5) try: r = r.json() if r['I'] > 90 and r['target'] == 'quoted': msg = CFP_TEMPLATE[np.random.choice( len(CFP_TEMPLATE), p=[e['prob'] for e in CFP_TEMPLATE], size=1)[0]]["tweet"] msg = msg.replace("&STOCK", "$" + s) msg = msg.replace("&TARGET", r['target']) msg = msg.replace("&MOVE", r['move']) msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/dme/" + s + "/") msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/") #ctx = str({"t": r['target'], "m": r['move']}) ctx = str(r) val = (s, q['latestPrice'], q['changePercent'], q['marketCap'], updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(), enow.strftime("%Y-%m-%d %H:%M:%S"), 'cfp', r['rarity'], ctx, msg, r['I']) runsql(INSERT_SQL, val) except Exception as e: print(e) print("- error FF cfp scan for " + s + " ... skipped") print(r) runsql("drop table if exists ff_status_scan_w1") runsql( "create table ff_status_scan_w1 as select a.*, @rn:=@rn+1 rn from (select batched, count(1) n_item, count(distinct symbol) n_symbol from ff_status_scan group by batched order by batched desc) a, (select @rn:=0) b" ) runsql( "create index ff_status_scan_w1_i1 on ff_status_scan_w1 (n_symbol, rn)" ) runsql("drop table if exists ff_status_scan_w2") runsql( "create table ff_status_scan_w2 as select stype, if(changePct>=0,'u','d') ch, count(1) n from ff_status_scan a, (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b where a.batched=b.max_batched and a.marketcap>" + str(MIN_MARKETCAP) + " group by stype, if(changePct>=0,'u','d')") runsql("drop table if exists ff_status_scan_w3") runsql("create table ff_status_scan_w3 as select * from ff_status_scan_a1") runsql("drop table if exists ff_status_scan_w4") runsql( "create table ff_status_scan_w4 as select max(batched) batched, sector, count(1) cnt, sum(marketcap) sm, round(sum(changePct*marketcap)) cm, round(sum(changePct*marketcap)/sum(marketcap)*100,2) marketchgPct from ff_status_scan_w3 group by sector having cnt>20 order by marketchgPct desc" ) if not table_exist("ff_status_scan_a2"): runsql( "create table ff_status_scan_a2 as select * from ff_status_scan_w4" ) runsql( "create index ff_status_scan_a2_i1 on ff_status_scan_a2 (batched)") else: runsql("insert into ff_status_scan_a2 select * from ff_status_scan_w4") runsql("drop table if exists ff_status_scan_w5") runsql( "create table ff_status_scan_w5 as select d.sector, stype, count(1) n, sum(if(changePct>=0,1,0)) n_u, sum(if(changePct<0,1,0)) n_d, any_value(d.cnt) n_sector, any_value(round(count(1)/d.cnt,4)) s_ratio from ff_status_scan a join (select batched as max_batched from ff_status_scan_w1 order by rn limit 1) b on a.batched=b.max_batched join (select symbol, sector from ff_scan_symbols where datediff(now(),last_updated)<60) c on a.symbol=c.symbol right join ff_status_scan_w4 d on c.sector=d.sector group by d.sector, a.stype order by sector, s_ratio desc" ) runsql( "create index ff_status_scan_w5_i1 on ff_status_scan_w5 (sector, s_ratio)" ) runsql("drop table if exists ff_status_scan_w6") runsql( "create table ff_status_scan_w6 as select max(batched) batched, sector, any_value(if(locate(' ',sector)>0, concat(substr(sector,1,1),substr(sector,locate(' ',sector)+1,2)), substr(sector,1,3))) sector3, count(1) cnt, sum(if(changePct>=0,1,0)) n_u, sum(if(changePct<0,1,0)) n_d, sum(marketcap) sm, round(sum(changePct*marketcap)) cm, round(sum(abs(changePct)*marketcap)) am, round(sum(changePct*marketcap)/sum(marketcap)*100,2) marketchgPct from ff_status_scan_w3 group by sector having cnt>20 order by marketchgPct desc" ) runsql("drop table if exists ff_status_scan_w7") runsql( "create table ff_status_scan_w7 as select a.security, a.subsec, a.hq, b.*, changePct*marketcap marketeff, abs(changePct*marketcap) marketabs from (select symbol, security, subsec, hq from ff_scan_symbols where datediff(now(),last_updated)<60) a, ff_status_scan_w3 b where a.symbol=b.symbol order by marketabs" ) runsql( "create index ff_status_scan_w7_i1 on ff_status_scan_w7 (sector, marketabs)" ) runsql("drop table if exists ff_status_scan_w8") runsql( "create table ff_status_scan_w8 as select b.batched_date, a.* from ff_status_scan_a2 a,(select date(batched) batched_date, max(batched) batched_time from ff_status_scan_a2 group by date(batched)) b where a.batched=b.batched_time" ) runsql( "create index ff_status_scan_w8_i1 on ff_status_scan_w8 (sector, batched_date)" ) runsql( "create index ff_status_scan_w8_i2 on ff_status_scan_w8 (batched_date)" ) cache_baking() status_scan_endtime = datetime.datetime.now() timedetail = "status_scan job is running from {0} to {1}, and total runging time is {2}".format( status_scan_starttime, status_scan_endtime, (status_scan_endtime - status_scan_starttime).seconds) print(timedetail) Log.info("status_scan.py", timedetail)
def cache_baking(): print("Caching Baking list ... " + time.strftime('%Y-%m-%d %H:%M:%S')) runsql("drop table if exists ff_status_scan_o1") rawsql = "create table ff_status_scan_o1 as select symbol, stype, infov, w_micd from \ (select a.*, td+w_mic*md w_micd, @rn:=if(@s=symbol,@rn+1,0) rn, @s:=symbol s from \ (select a.symbol, marketCap, updated, scanned, stype, rarity, context, infov, if(stype='vol',if(md>1,1,md),1) md, if(td>" + str( AGING_TIME_FACTOR ) + "," + str(AGING_TIME_FACTOR) + ",td)*rand()*" + str( AGING_RAND_SCALE ) + " td, (log10(marketCap)-9)*(infov-90)*(log10(abs(changePct*10000)+1)) w_mic from \ (select a.*, (hour(updated)*60+minute(updated))/" + str( AGING_MATURE_HOUR * 60 ) + " md, if(timestampdiff(minute,batched,utc_timestamp())<200,0,(timestampdiff(minute,batched,utc_timestamp())-200)/480) td from ff_status_scan a) a, \ (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b where a.batched=b.max_batched order by w_mic desc limit " + str( N_OVERALL_CACHE * 4) + ") a, \ (select @s:='',@rn:=0) b order by marketCap desc, w_micd desc \ ) a where rn=0 order by w_micd desc limit " + str(N_OVERALL_CACHE) runsql(rawsql) runsql("drop table if exists ff_status_scan_o2") runsql( "create table ff_status_scan_o2 as select a.*, c.w_micd from ff_status_scan a, (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b, ff_status_scan_o1 c where a.batched=b.max_batched and a.symbol=c.symbol order by w_micd desc" ) runsql("drop table if exists ff_status_scan_o3") runsql( "create table ff_status_scan_o3 as select (@rn:=if(@s=symbol,@rn,@rn+1))*1 rn, @s:=symbol symbol, quote, changePct, marketcap, stype, context, infov, w_micd from ff_status_scan_o2 a, (select @rn:=0, @s:='') b" ) S = fetch_rawsql( "select rn, symbol, max(quote) quote, max(changePct) changePct, max(marketcap) marketcap, max(w_micd) w_micd from ff_status_scan_o3 group by rn, symbol" ) if S: for s in S: r = requests.get(FF_SCAN_HOST + "/api/v1/fame66/w52/" + s['symbol'] + "/", headers=headers) if r.status_code == 200: r = r.json() if 'datasets' in r: ctx = r['datasets'][0] ctx = str(ctx) if len(ctx) <= 255: runsql( "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)", (s['rn'], s['symbol'], s['quote'], s['changePct'], s['marketcap'], 'f6w', ctx, 0, s['w_micd'])) else: print("ctx longer than 255 for fame66 support of " + s['symbol']) r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/f66/" + s['symbol'] + "/", headers=headers) if r.status_code == 200: r = r.json() if 'bits' in r: a = [i['pr'] for i in r['bits']] b = [i['c_diff'] for i in r['bits']] ctx = str({"pr": a, "c_diff": b}) if len(ctx) <= 255: runsql( "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)", (s['rn'], s['symbol'], s['quote'], s['changePct'], s['marketcap'], 'f66', ctx, 0, s['w_micd'])) else: print("ctx longer than 255 for fame66 chart of " + s['symbol']) r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/ahd/" + s['symbol'] + "/", headers=headers) if r.status_code == 200: r = r.json() if 'target' in r: if r['target'] == 'self': ctx = str(r) if len(ctx) <= 255: runsql( "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)", (s['rn'], s['symbol'], s['quote'], s['changePct'], s['marketcap'], 'ahd', ctx, r['I'], s['w_micd'])) else: print("ctx longer than 255 for Fame Ahead of " + s['symbol']) runsql("drop table if exists ff_status_scan_o4") runsql( "create table ff_status_scan_o4 as select * from ff_status_scan_o3 order by rn" ) runsql("create index ff_status_scan_o4_i1 on ff_status_scan_o4 (rn)") runsql( "create index ff_status_scan_o4_i2 on ff_status_scan_o4 (stype, infov, marketcap)" ) print("Done caching Baking list ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
def incremental_sense(): print("Started incrementals " + time.strftime('%Y-%m-%d %H:%M:%S')) runsql("drop table if exists ff_finnews_d0") runsql( "create table ff_finnews_d0 as select iex_id, any_value(published) published, any_value(iex_title) title, any_value(if(length(article)>length(iex_summary),article,iex_summary)) article from ff_finnews_iex where published>=date_sub(now() , interval 1 week) and greatest(length(article),length(iex_summary))>500 group by iex_id" ) runsql("create index ff_finnews_d0_i1 on ff_finnews_d0 (iex_id)") runsql("drop table if exists ff_finnews_d1") runsql( "create table ff_finnews_d1 as select a.iex_id, a.title, a.article, b.iex_id dummy from ff_finnews_d0 a left join ff_finnews_b1 b on a.iex_id=b.iex_id having dummy is null" ) runsql("create index ff_finnews_d1_i1 on ff_finnews_d1 (iex_id)") A = fetch_rawsql("select iex_id, title, article from ff_finnews_d1") print("Done fetching incremental articles " + time.strftime('%Y-%m-%d %H:%M:%S')) documents, IEX_ids = doc_to_vec(A) print("Done cleaning incremental articles " + time.strftime('%Y-%m-%d %H:%M:%S')) TV = joblib.load(FILE_GLOBAL_TV_LST) tv = TV.transform(documents) T = TV.get_feature_names() print("Done vectorizing incremental articles " + time.strftime('%Y-%m-%d %H:%M:%S')) dig_docs(A, IEX_ids, TV, tv, is_incremental=True) runsql("drop table if exists ff_finnews_d2") runsql( "create table ff_finnews_d2 (symbol varchar(10), iex_id varchar(30), term varchar(10), model_type varchar(4), raw_sense float)" ) IEX_SYMBOL = fetch_rawsql( "select a.symbol, a.iex_id, a.published from ff_finnews_iex a, ff_finnews_d1 b where a.iex_id=b.iex_id" ) ALL_LOCAL, _X, _y = focus_prep(IEX_SYMBOL, IEX_ids, tv, is_train=False) print("Done preparing incremental articles for scoring " + time.strftime('%Y-%m-%d %H:%M:%S')) for term in TERMS: focus_models(term, ALL_LOCAL, [], [], is_train=False) print("Done scoring incremental articles for " + term + " : " + time.strftime('%Y-%m-%d %H:%M:%S')) runsql("create index ff_finnews_d2_i1 on ff_finnews_d2 (iex_id)") runsql("drop table if exists ff_finnews_d3") runsql( "create table ff_finnews_d3 as select a.*, published, title from ff_finnews_d2 a, ff_finnews_d0 b where a.iex_id=b.iex_id" ) runsql( "create index ff_finnews_d3_i1 on ff_finnews_d3 (symbol, iex_id, model_type, term)" ) runsql("drop table if exists ff_finnews_d4") runsql( "create table ff_finnews_d4 as select symbol, iex_id, model_type, any_value(published) published, sum(if(raw_sense>=0,1,0)) n_up, sum(if(raw_sense<0,1,0)) n_down \ , max(raw_sense) max_sense, min(raw_sense) min_sense, sum(raw_sense)/(max(raw_sense)-min(raw_sense)) con_sense \ , avg(if(raw_sense>=0,raw_sense,null)) pos_sense, avg(if(raw_sense<0,raw_sense,null)) neg_sense \ from ff_finnews_d3 group by symbol, iex_id, model_type") runsql( "create index ff_finnews_d4_i1 on ff_finnews_d4 (symbol, iex_id, model_type)" ) runsql("drop table if exists ff_finnews_d5") runsql( "create table ff_finnews_d5 as select a.symbol, a.iex_id, any_value(published) published, any_value(b.n_doc) n_doc, sum(n_up) n_up, sum(n_down) n_down \ , sum( if(model_type='L', 1/(1/log10(ifnull(b.n_doc,0)+2)+1) * con_sense, 1/log10(ifnull(b.n_doc,0)+2)/(1/log10(ifnull(b.n_doc,0)+2)+1) * con_sense) ) con_sense \ , if(sum(n_up)>0,sum(n_up*pos_sense)/sum(n_up),null) pos_sense, if(sum(n_down)>0,sum(n_down*neg_sense)/sum(n_down),null) neg_sense \ , sum(if(model_type='G',n_up,0)) n_g_up, sum(if(model_type='G',n_down,0)) n_g_down, sum(if(model_type='L',n_up,0)) n_l_up, sum(if(model_type='L',n_down,0)) n_l_down \ from ff_finnews_d4 a left join ff_finnews_b4 b on a.symbol = b.symbol group by symbol, iex_id" ) runsql("drop table if exists ff_finnews_d6") runsql( "create table ff_finnews_d6 as select ifnull(b.bin,if(con_sense>max_max,99,0)) bin , a.* from ff_finnews_d5 a inner join (select max(max_con) max_max, min(min_con) min_min from ff_finnews_b7) c left join ff_finnews_b7 b on a.con_sense>b.min_con and a.con_sense<=b.max_con" ) runsql("drop table if exists ff_finnews_d7") runsql( "create table ff_finnews_d7 as select a.*, date(published) published_date, (con_sense-b.min_con)/(b.max_con-b.min_con) bin_float from ff_finnews_d6 a, ff_finnews_b7 b where a.bin=b.bin" ) runsql("drop table if exists ff_finnews_d8") runsql( "create table ff_finnews_d8 as select a.symbol, iex_id, published, dest_date, n_doc, n_up, n_down, con_sense, pos_sense, neg_sense, n_g_up, n_g_down, n_l_up, n_l_down, bin+bin_float bin, dense_sense, bin+bin_float-dense_sense bin_diff, con_sense-condense_sense con_sense_diff from ff_finnews_d7 a, ff_finnews_b11 b where a.symbol=b.symbol" ) print("Done incremental sensing " + time.strftime('%Y-%m-%d %H:%M:%S')) final_sense(is_incremental=True)
def build_sense(): print("Started Fame Sense building ... " + time.strftime('%Y-%m-%d %H:%M:%S')) A, IEX_ids, TV, tv = gather_sense() dig_docs(A, IEX_ids, TV, tv) profile_training() print("Started focus models " + time.strftime('%Y-%m-%d %H:%M:%S')) A = fetch_rawsql( "select iex_id, any_value(iex_title) title, any_value(if(length(article)>length(iex_summary),article,iex_summary)) article from ff_finnews_iex where published>=date_sub(now() , interval 1 year) and greatest(length(article),length(iex_summary))>500 group by iex_id" ) print("Done fetching focus articles " + time.strftime('%Y-%m-%d %H:%M:%S')) documents, IEX_ids = doc_to_vec(A) print("Done cleaning focus articles " + time.strftime('%Y-%m-%d %H:%M:%S')) tv = TV.transform(documents) print("Done vectorizing focus articles " + time.strftime('%Y-%m-%d %H:%M:%S')) runsql("drop table if exists ff_finnews_b0") runsql( "create table ff_finnews_b0 (symbol varchar(10), iex_id varchar(30), term varchar(10), model_type varchar(4), y float, chg_term float, raw_sense float)" ) for term in TERMS: IEX_SYMBOL = fetch_rawsql( "select iex_id, symbol, published, date_same, chg_" + term + " chg from ff_finnews_w7") ALL_LOCAL, OVERALL_X, OVERALL_y = focus_prep(IEX_SYMBOL, IEX_ids, tv) print("Done preparing focus articles for " + term + " training " + time.strftime('%Y-%m-%d %H:%M:%S')) focus_models(term, ALL_LOCAL, OVERALL_X, OVERALL_y, is_train=True) print("Finished local sense on term " + term + " ... " + time.strftime('%Y-%m-%d %H:%M:%S')) runsql("create index ff_finnews_b0_i1 on ff_finnews_b0 (iex_id)") runsql("drop table if exists ff_finnews_b1") runsql( "create table ff_finnews_b1 as select iex_id, any_value(published) published, any_value(date(published)) published_date, any_value(iex_source) iex_source, any_value(iex_title) iex_title from ff_finnews_iex where published>=date_sub(now() , interval 6 month) and greatest(length(article),length(iex_summary))>500 group by iex_id" ) runsql("create index ff_finnews_b1_i1 on ff_finnews_b1 (iex_id)") runsql("drop table if exists ff_finnews_b2") runsql( "create table ff_finnews_b2 as select a.*, b.published, b.published_date, b.iex_source, b.iex_title from ff_finnews_b0 a, ff_finnews_b1 b where a.iex_id=b.iex_id" ) runsql( "create index ff_finnews_b2_i1 on ff_finnews_b2 (symbol, iex_id, model_type, term)" ) runsql("drop table if exists ff_finnews_b3") runsql( "create table ff_finnews_b3 as select symbol, iex_id, model_type, any_value(published) published, sum(if(raw_sense>=0,1,0)) n_up, sum(if(raw_sense<0,1,0)) n_down \ , max(raw_sense) max_sense, min(raw_sense) min_sense, sum(raw_sense)/(max(raw_sense)-min(raw_sense)) con_sense \ , avg(if(raw_sense>=0,raw_sense,null)) pos_sense, avg(if(raw_sense<0,raw_sense,null)) neg_sense \ from ff_finnews_b2 group by symbol, iex_id, model_type") runsql( "create index ff_finnews_b3_i1 on ff_finnews_b3 (symbol, iex_id, model_type)" ) runsql("drop table if exists ff_finnews_b4") runsql( "create table ff_finnews_b4 as select symbol, count(distinct iex_id) n_doc from ff_finnews_b3 group by symbol" ) runsql("create index ff_finnews_b4_i1 on ff_finnews_b4 (symbol)") runsql("drop table if exists ff_finnews_b5") runsql( "create table ff_finnews_b5 as select a.symbol, a.iex_id, any_value(published) published, any_value(b.n_doc) n_doc, sum(n_up) n_up, sum(n_down) n_down \ , sum( if(model_type='L', 1/(1/log10(b.n_doc+2)+1) * con_sense, 1/log10(b.n_doc+2)/(1/log10(b.n_doc+2)+1) * con_sense) ) con_sense \ , if(sum(n_up)>0,sum(n_up*pos_sense)/sum(n_up),null) pos_sense, if(sum(n_down)>0,sum(n_down*neg_sense)/sum(n_down),null) neg_sense \ , sum(if(model_type='G',n_up,0)) n_g_up, sum(if(model_type='G',n_down,0)) n_g_down, sum(if(model_type='L',n_up,0)) n_l_up, sum(if(model_type='L',n_down,0)) n_l_down \ from ff_finnews_b3 a, ff_finnews_b4 b where a.symbol = b.symbol group by symbol, iex_id" ) runsql("drop table if exists ff_finnews_b6") runsql( "create table ff_finnews_b6 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) bin, a.* from (select a.*, cnt from (select * from ff_finnews_b5) a, (select count(1) cnt from ff_finnews_b5) b order by con_sense) a, (select @rn:=0) b" ) runsql("create index ff_finnews_b6_i1 on ff_finnews_b6 (bin)") runsql("drop table if exists ff_finnews_b7") runsql( "create table ff_finnews_b7 select a.bin, a.min_con, ifnull(b.min_con,(select max(con_sense) from ff_finnews_b6)) max_con from (select bin bin, min(con_sense) min_con from ff_finnews_b6 group by bin) a left join (select bin-1 bin, min(con_sense) min_con from ff_finnews_b6 group by bin-1) b on a.bin=b.bin" ) runsql("drop table if exists ff_finnews_b8") runsql( "create table ff_finnews_b8 as select a.*, date(published) published_date, (con_sense-b.min_con)/(b.max_con-b.min_con) bin_float from ff_finnews_b6 a, ff_finnews_b7 b where a.bin=b.bin" ) runsql( "create index ff_finnews_b8_i1 on ff_finnews_b8 (symbol, published)") runsql( "create index ff_finnews_b8_i2 on ff_finnews_b8 (symbol, published_date)" ) runsql("drop table if exists ff_stock_date6m") runsql("create table ff_stock_date6m as select * from \ (select adddate('1970-01-01',t4.i*10000 + t3.i*1000 + t2.i*100 + t1.i*10 + t0.i) dest_date from \ (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t0, \ (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t1, \ (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t2, \ (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t3, \ (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t4) v \ where dest_date between date_sub(curdate(), interval 6 month) and curdate()" ) runsql("drop table if exists ff_finnews_b9") runsql( "create table ff_finnews_b9 as select distinct(symbol) symbol, dest_date from ff_finnews_b6, ff_stock_date6m" ) runsql( "create index ff_finnews_b9_i1 on ff_finnews_b9 (symbol, dest_date)") runsql("drop table if exists ff_finnews_b10") runsql( "create table ff_finnews_b10 as select symbol, dest_date, sum(power(0.8,greatest(dlag,0.01))*(bin+bin_float))+0.26*50 dense_sum, sum(power(0.8,greatest(dlag,0.01)))+0.26 dense_base \ , ifnull((sum(power(0.8,greatest(dlag,0.01))*(bin+bin_float))+0.26*50)/(sum(power(0.8,greatest(dlag,0.01)))+0.26),50) dense_sense \ , sum(power(0.8,greatest(dlag,0.01))*con_sense) condense_sum, sum(power(0.8,greatest(dlag,0.01)))+0.26 condense_base \ , ifnull((sum(power(0.8,greatest(dlag,0.01))*con_sense))/(sum(power(0.8,greatest(dlag,0.01)))+0.26),0) condense_sense \ from (select a.symbol, a.dest_date, b.bin, b.bin_float, round(TIMESTAMPDIFF(HOUR,published,dest_date)/24,2) dlag, con_sense from ff_finnews_b9 a left join ff_finnews_b8 b on a.symbol=b.symbol and DATEDIFF(dest_date, b.published) between 0 and 13) a group by symbol, dest_date" ) runsql( "create index ff_finnews_b10_i1 on ff_finnews_b10 (symbol, dest_date)") runsql("drop table if exists ff_finnews_b11") runsql( "create table ff_finnews_b11 as select a.*, a.dense_sense-b.dense_sense dense_diff, a.condense_sense-b.condense_sense condense_diff from \ (select * from ff_finnews_b10 where dest_date = (select max(dest_date) from ff_finnews_b10) ) a, \ (select * from ff_finnews_b10 where dest_date = (select max(dest_date) from ff_finnews_b10 where dest_date<(select max(dest_date) from ff_finnews_b10))) b \ where a.symbol=b.symbol order by abs(dense_diff) desc") runsql("create index ff_finnews_b11_i1 on ff_finnews_b11 (symbol)") runsql("drop table if exists ff_finnews_b12") runsql( "create table ff_finnews_b12 as select a.symbol, iex_id, published, dest_date, n_doc, n_up, n_down, con_sense, pos_sense, neg_sense, n_g_up, n_g_down, n_l_up, n_l_down, bin+bin_float bin, dense_sense, bin+bin_float-dense_sense bin_diff, con_sense-condense_sense con_sense_diff from ff_finnews_b8 a, ff_finnews_b10 b where a.symbol=b.symbol and date_add(dest_date,interval 1 day)=published_date" ) runsql( "create index ff_finnews_b12_i1 on ff_finnews_b12 (symbol, dest_date)") runsql( "create index ff_finnews_b12_i2 on ff_finnews_b12 (symbol, published)") runsql("create index ff_finnews_b12_i3 on ff_finnews_b12 (symbol, iex_id)") runsql("drop table if exists ff_finnews_b13") runsql( "create table ff_finnews_b13 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) dbin, a.* from (select a.*, cnt from (select * from ff_finnews_b12) a, (select count(1) cnt from ff_finnews_b12) b order by bin_diff) a, (select @rn:=0) b" ) runsql("create index ff_finnews_b13_i1 on ff_finnews_b13 (dbin)") runsql("drop table if exists ff_finnews_b14") runsql( "create table ff_finnews_b14 select a.dbin, a.min_con, ifnull(b.min_con,(select max(bin_diff) from ff_finnews_b13)) max_con from (select dbin dbin, min(bin_diff) min_con from ff_finnews_b13 group by dbin) a left join (select dbin-1 dbin, min(bin_diff) min_con from ff_finnews_b13 group by dbin-1) b on a.dbin=b.dbin" ) runsql("drop table if exists ff_finnews_b15") runsql( "create table ff_finnews_b15 as select @rn:=@rn+1 rn, dest_date from (select * from ff_stock_date6m order by dest_date desc) a, (select @rn:=0) b" ) runsql("create index ff_finnews_b15_i1 on ff_finnews_b15 (dest_date)") runsql("drop table if exists ff_finnews_b16") runsql( "create table ff_finnews_b16 as select a.*, c.dest_date dest_ref, a.dense_sense-c.dense_sense dense_diff, a.condense_sense-c.condense_sense condense_diff from ff_finnews_b10 a, ff_finnews_b15 b, ff_finnews_b10 c, ff_finnews_b15 d where a.symbol=c.symbol and a.dest_date=b.dest_date and c.dest_date=d.dest_date and b.rn+1=d.rn" ) runsql( "create index ff_finnews_b16_i1 on ff_finnews_b16 (symbol, dest_date)") runsql("drop table if exists ff_finnews_b17") runsql( "create table ff_finnews_b17 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) dcon_bin, a.* from (select a.*, cnt from (select * from ff_finnews_b16) a, (select count(1) cnt from ff_finnews_b16) b order by condense_diff) a, (select @rn:=0) b" ) runsql("drop table if exists ff_finnews_b18") runsql( "create table ff_finnews_b18 select a.dcon_bin, a.min_con, ifnull(b.min_con,(select max(condense_diff) from ff_finnews_b17)) max_con from (select dcon_bin dcon_bin, min(condense_diff) min_con from ff_finnews_b17 group by dcon_bin) a left join (select dcon_bin-1 dcon_bin, min(condense_diff) min_con from ff_finnews_b17 group by dcon_bin-1) b on a.dcon_bin=b.dcon_bin" ) runsql("drop table if exists ff_finnews_b19") runsql( "create table ff_finnews_b19 as select rn, symbol, dest_date, dense_sense, condense_sense, dest_ref, dense_diff, condense_diff, a.dcon_bin+(condense_diff-b.min_con)/(b.max_con-b.min_con) dcon_bin from ff_finnews_b17 a, ff_finnews_b18 b where a.dcon_bin=b.dcon_bin" ) print("Done focus sensing " + time.strftime('%Y-%m-%d %H:%M:%S')) final_sense(is_incremental=False) file_cleanup()
if table_exist("ff_stock_adj"): runsql("drop table ff_stock_adj") runsql(FF_STOCK_ADJ_DDL) refresh_sp500() if not table_exist("ff_stock_summary_hist"): runsql(FF_STOCK_SUMMARY_HIST_DDL) runsql(FF_STOCK_SUMMARY_HIST_I1) runsql(FF_STOCK_SUMMARY_HIST_I2) if not table_exist("ff_stock_summary_term"): runsql(FF_STOCK_SUMMARY_TERM_DDL) runsql(FF_STOCK_SUMMARY_TERM_I1) assure_required_tables() symbols = fetch_rawsql( "select symbol from ff_scan_symbols where datediff(now(),last_updated)<60") symbols = [i['symbol'] for i in symbols] #symbols = [symbols[0]] for s in symbols: try: d = data.DataReader( s, 'google', cdate - relativedelta(years=10) - relativedelta(months=2), datetime.datetime.now()) d = d.where((pd.notnull(d)), None) t1 = d['Close'].values.tolist() t2 = d.index.tolist() t3 = d['High'].values.tolist() t4 = d['Low'].values.tolist() t5 = d['Volume'].values.tolist()
def assure_required_tables(): if not table_exist("ff_tweets"): runsql(FF_TWEETS_DDL) if not table_exist("ff_scan_symbols"): refresh_sp500() r = fetch_rawsql( "select min(datediff(now(),last_updated)) d from ff_scan_symbols")[0] if r['d'] > 7: refresh_sp500() # Start of task assure_required_tables() symbols = fetch_rawsql( "select symbol from ff_scan_symbols where datediff(now(),last_updated) < 60 and ifnull(TIMESTAMPDIFF(minute, last_scanned, now()),9999) > 1440 order by last_scanned desc" ) if len(symbols) == 0: sys.exit() symbol = symbols[0]['symbol'] print("Working on " + symbol + " : " + time.strftime('%Y-%m-%d %H:%M:%S')) sid = 0 current_time = datetime.datetime.now(datetime.timezone.utc) #current_time = datetime.datetime.now() for d in range(6, 0, -1): dt = current_time - datetime.timedelta(days=d) print(" Fetching up to " + dt.strftime('%Y-%m-%d')) tq = t.search.tweets(q="$" + symbol, lang="en",
runsql( "create index ff_finnews_iex_i3 on ff_finnews_iex (last_updated)") runsql("create index ff_finnews_iex_i4 on ff_finnews_iex (published)") # Start of task assure_required_tables() headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.90 Safari/537.36' } C = {} symbols = fetch_rawsql("select symbol from ff_scan_symbols") symbols = [i['symbol'] for i in symbols] #symbols = ['AAPL','AMD','GOOG','NVDA','QCOM'] #symbols = symbols[:5] n_insert_attempt = 0 n_insert_success = 0 for s in symbols: s = s.replace("-", ".") try: P = requests.get("https://api.iextrading.com/1.0/stock/" + s + "/news/last/100") P = P.json() except: print("[e] IEX error and skipped '" + s + "' ... " +