def getTkLst(tkLst=None, tgtk='AAPL', dbname='ara', tablename='yh_quote_curr', days=-7): ''' Utility function Get a ticker list of 'yh_quote_curr' from lookback 'days' ''' if tkLst is not None and len(tkLst) > 0: mxdate = int(next_date(days=days, dformat=('%Y%m%d'), dtTF=False)) return tkLst, mxdate mdb = conn2mgdb(dbname=dbname) mCur = mdb[tablename] try: mxdate = max(mCur.find({"ticker": tgtk}).distinct("pbdate")) mxdate = int( next_date(mxdate, days=days, dformat=('%Y%m%d'), dtTF=False)) except: mxdate = int(next_date(days=days, dformat=('%Y%m%d'), dtTF=False)) jo = { 'exchange': { '$ne': 'PNK' }, 'ticker': { '$regex': '^((?![\^|=|0]).)*$' }, 'pbdate': { '$gt': mxdate } } r = mCur.find(jo, {"_id": 0, "ticker": 1}).distinct("ticker") tkLst = [x.replace('-', '.') for x in r] return sorted(tkLst), mxdate
def useWeb(jobj={'ticker': '^GSPC'}, colx='pbdt', dbname='ara', tablename='yh_quote_curr', mmGap=30, **optx): '''return [True|Flase] action for using web or DB based on 'tablename' 'colx' field ''' from _alan_calc import conn2mgdb debugTF = getKeyVal(optx, 'debugTF', False) webTF = optx.pop('webTF', None) if webTF is not None and isinstance(webTF, bool): return webTF cdt = getKeyVal(optx, 'cdt', datetime.datetime.now()) mgDB = conn2mgdb(dbname=dbname) dc = mgDB[tablename].find_one({ "$query": jobj, "$orderby": { colx: -1 } }, { colx: 1, "_id": 0 }) if len(dc) < 1: return True pdt = dc[colx] mmPassed = pd.Timedelta(cdt - pdt).total_seconds() / 60.0 webTF = mmPassed > mmGap if debugTF is True: sys.stderr.write("{}|{}|{}|{}|{}\n".format("webTF", "cdt", "pdt", "mmPassed", "mmGap")) sys.stderr.write("{}|{}|{}|{:.0f}|{}\n".format(webTF, cdt, pdt, mmPassed, mmGap)) return webTF
def yh_financials_batch(tkLst=[], **optx): ''' pull financials from yahoo and save to 'tablename' e.g., python3 -c "from yh_chart import yh_financials_batch as yf;yf(['AAPL']);" OR python3 -c "from yh_chart import yh_financials_batch as yf;yf(modules='incomeStatementHistoryQuarterly',tablename='yh_IS_Q',zpk={'ticker','pbdate'});" ''' useDB = optx.pop('useDB', False) tablename = optx.pop('tablename', '') modules = optx.pop('modules', 'incomeStatementHistory') if len(tablename) > 0 or useDB is True: dbname = optx.pop('dbname', 'ara') dbM = conn2mgdb(dbname=dbname) if useDB is True and (tkLst is None or len(tkLst) < 1): tkLst = dbM['yh+' + modules].distinct("ticker") else: dbM = None if tkLst is None or len(tkLst) < 1: tkLst = list(pull_act_tickers()['ticker']) sys.stderr.write("===Batch list:{}, tablename: {}, useDB:{}\n".format( tkLst, tablename, useDB)) dd = [] for tk in tkLst: try: jd = yh_financials(tk, modules=modules, tablename=tablename, clientM=dbM, useDB=useDB, **optx) except Exception as e: sys.stderr.write("**ERROR:{}:{} @{}\n".format( tk, str(e), 'yh_financials')) continue dd.extend(jd) return dd
def batch_daily_comment(tkLst=None, **kwargs): for ky, va in kwargs.items(): exec("{}=va".format(ky)) pgDB = conn2pgdb(dbname=dbname, hostname=hostname) mgDB = conn2mgdb(dbname=dbname.replace('.', '_'), hostname=hostname) #====OHLC Comment=========================== pqint("====OHLC Comment===========================", file=sys.stderr) if tablename is None: tablename = "{}_{}".format("ohlc_daily_comment", lang) if mp3YN is True: tablename = "{}_{}".format(tablename, "mp3") else: tablename = tablename # Initial assignments (tempS, daily_hdr) = assign_ts_ohlc(lang=lang) if filename is None: fp = None elif filename == "stdout" or tablename == "-": fp = sys.stdout elif filename == "stderr": fp = sys.stderr else: fp = open(filename, "w") # Get batch list sqr = "SELECT s.*,m.sector,m.company{} as company FROM mapping_ticker_cik m right join (select ticker,min(pbdate) as mndt,max(pbdate) as mxdt from ohlc_pnl group by ticker) as s ON m.ticker=s.ticker order by s.ticker".format( "" if lang == "en" else "_" + lang) dataM = pd.read_sql(sqr, pgDB) #dataM=pd.read_sql(sqr,pgDB)[9:10] if tkLst is not None: dx = dataM[dataM['ticker'].isin(tkLst)] if dx.empty is True: pqint("{} not found!".format(tkLst), file=sys.stderr) return None else: dataM = dx.reset_index(drop=True) pqint( "START batch_daily_comment-------------------------------------------", file=sys.stderr) for j, tRw in dataM.iterrows(): (tkX, mndt, mxdt, sector, label) = tRw.values iteritem_daily_comment(j, tempS, tkX, mndt, mxdt, sector, label, daily_hdr, tablename, pgDB, mgDB, fp, wmode=wmode, lang=lang, saveDB=saveDB, mp3YN=mp3YN, dirname=dirname, debugTF=debugTF) wmode = 'append' #fp.close() pqint("END batch_daily_comment-------------------------------------------", file=sys.stderr) print(kwargs.keys()) print(kwargs.values())
def yh_batchSpark(tkLst=[], filter='*', types='spark', nchunk=100, saveDB=True, dbname='ara', tablename='yh_spark_hist', zpk={'ticker', 'epochs'}, t2='yh_quote_curr', t2zpk={'ticker'}, isEoD=False, **optx): ''' pull data from types=[spark|quote] then save to mgDB 'dbname':: 'tablename' and 't2' respectively Note, if isEoD=True: quote data save to both mgDB 'dbname':: 'tablename' and 't2' if tablename or t2 ='' , nothing will be saved in 'tablename' or 't2' quote data will also be saved to pgDB 'dbname'::'t2' if isEoD=True and types='quote' ''' debugTF = getKeyVal(optx, 'debugTF', False) if debugTF: sys.stderr.write("===LOCALS: {}\noptx: {}\n".format(locals(), optx)) dbM = conn2mgdb(dbname=dbname) if tkLst is None or len(tkLst) < 1: tkLst = list(pull_act_tickers()['ticker']) elif isinstance(tkLst, str): tkLst = [tkLst] chunkLst = list2chunk(tkLst, nchunk) jdN = [] colX = [ 'ticker', 'open', 'high', 'low', 'close', 'xclose', 'volume', 'pchg', 'change', 'pbdate', 'epochs', 'pbdt', 'hhmm' ] for j, tkM in enumerate(chunkLst): jdTmp = yh_batchRaw(tkM, types=types, **optx) jdQC = {} if types in ['spark', 'chart']: jdLst = jdTmp[types]['result'] if types == 'chart': jdM = chart2df(jdLst, **optx) else: jdM = batchSpark_sparkConv(jdLst, **optx) if len(jdM) < 1: continue if saveDB is True: m, dbM, err = insert_mdb(jdM, clientM=dbM, tablename=tablename, **optx) if debugTF: sys.stderr.write("=== {} of {} saved to {}\n".format( tkM, jdM[-1], tablename)) jdN.extend(jdM) else: # for case of types.lower()=='quote' jdLst = jdTmp['quoteResponse']['result'] jdQC = batchSpark_quoteConv(jdLst, **optx) if len(jdQC) < 1: continue jdM = subDict(jdQC, colX) if saveDB is True and len(jdQC) > 0: if len(t2) > 0: qc, dbM, erq = upsert_mdb(jdQC, clientM=dbM, tablename=t2, zpk=t2zpk, **optx) sys.stderr.write("=== {} of {} saved to {}\n".format( tkM, jdQC[-1], t2)) if isEoD is True: m, dbM, err = insert_mdb(jdM, clientM=dbM, tablename=tablename, zpk=zpk, **optx) sys.stderr.write("=== {} of {} saved to {}\n".format( tkM, jdM[-1], tablename)) jdN.extend(jdQC) if debugTF: sys.stderr.write("=== tkM:{}[{}/{}], last:\n{}\n".format( tkM, len(jdM), len(jdN), jdN[-1])) if saveDB is True and len(t2) > 0 and len(jdN) > 0 and types.lower( ) == 'quote' and isEoD is True: try: df = pd.DataFrame(jdN) df = df.drop(['_id'], axis=1) if debugTF: sys.stderr.write( "=== Save to pgDB::{} of {} rows(1st-last)\n{}\n{}\n". format(t2, len(df), df.iloc[:1], df.iloc[-1:])) from _alan_calc import save2pgdb save2pgdb(df, dbname, tablename=t2) except Exception as e: sys.stderr.write("**ERROR: {}:{}\n".format("save2pgdb", str(e))) return jdN
def yh_quote_curr(tkLst=None, screenerTF=False, dbname='ara', tablename='yh_quote_curr', zpk={'ticker'}, m2pTF=False, webTF=None, **optx): ''' create yh_quote_curr table ''' if tkLst is None or len(tkLst) < 1: from _alan_calc import sqlQuery tkDF = sqlQuery( 'select ticker from mapping_ticker_cik where act_code=1') tkLst = list(tkDF['ticker']) try: jobj = dict(ticker=tkLst[0]) if webTF is None: webTF = useWeb(jobj=jobj, dbname=dbname, tablename=tablename, **optx) if not webTF: jobj = {'ticker': {'$in': tkLst}} d, _, _ = find_mdb(jobj, dbname=dbname, tablename=tablename, dfTF=True) sys.stderr.write("===Using data from {}::{}\n".format( dbname, tablename)) return d d = yh_quote_comparison(tkLst, screenerTF=screenerTF, dbname=dbname, tablename=tablename, zpk=zpk, **optx) except Exception as e: sys.stderr.write("**ERROR: {}:{}:{}\n".format("yh_quote_curr()", "@ yh_quote_comparison", str(e))) d = [] saveDB = optx.pop('saveDB', False) if not saveDB: return d try: if m2pTF: from mongo2psql import mongo2psql mongo2psql(tablename, dbname) sys.stderr.write( "===Running mongo2psql()...{dbname}::{tablename}".format( **locals())) except Exception as e: sys.stderr.write("**ERROR: {}:{}:{}\n".format("yh_quote_curr()", "mongo2psql ...", str(e))) # Save yh_quote_curr To yh_quote_hist try: dbM = conn2mgdb(dbname=dbname) tablename = tablename.replace('curr', 'hist') ret = dbM[tablename].insert_many(d.to_dict(orient='records'), ordered=False) sys.stderr.write( "===Saving history: {dbname}::{tablename}".format(**locals())) except Exception as e: sys.stderr.write("**ERROR: {}:{}:{}\n".format("yh_quote_curr()", "MDB saving...", str(e))) return d
from _alan_str import write2mdb, find_mdb import pandas as pd import datetime sdate = sys.argv[1] if len( sys.argv) > 1 else datetime.datetime.now().strftime('%Y%m%d') endOfDayTF = int(sys.argv[2]) if len(sys.argv) > 2 else 1 from macro_event_yh import search_earnings df = search_earnings(sdate) vs = df['ticker'] #if endOfDayTF : # vs= df['ticker'] #else: # vs= df[df['CallTime']!='After Market Close']['ticker'] clientM = conn2mgdb() pbdate = int(sdate) print(pbdate) xg, _, _ = find_mdb(clientM=clientM, dbname='ara', tablename='iex_news_eps', jobj={"pbdate": pbdate}, field={"ticker"}) from peers_chart import eps_news_grabber finalvs = vs if len(xg) > 0: if len(xg[0]) > 0: vg = pd.DataFrame(xg) finalvs = list(set(vs) - set(vg['ticker'].values)) finalvs = vs
def upd_mapping_ticker(lkbk=-7, mxcap=10**9, debugTF=False, saveDB=True, **optx): sys.stderr.write("===STARTING to update ticker list\n") #1. UPDATE sp500 / Dow30 list from theme_list import sp500_component sp500 = sp500_component(saveDB=False) if len(sp500) < 1: sp500 = sqlQuery("SELECT * FROM sp500_component") sLst = sp500['ticker'].values #2. PULL MDB::ara:madmoney_hist dt6mo = int(next_date(months=-6, dformat='%Y%m01', dtTF=False)) mdb = conn2mgdb(dbname='ara') mLst = mdb['madmoney_hist'].find({ "pbdate": { "$gt": dt6mo } }).distinct("ticker") mLst = [x.replace('.', '-') for x in mLst] #3. COMPILE a new list based on madmoney and SP500 new tickers tkCIK = sqlQuery( "SELECT * FROM mapping_ticker_cik WHERE act_code=1 and ticker not similar to '%%(^|=)%%'" ) tLst = tkCIK['ticker'].values n1Lst = set(sLst) - set(tLst) sys.stderr.write("New SP500 List:{}\n".format(n1Lst)) n2Lst = set(mLst) - set(tLst) sys.stderr.write("New madMoney List:{}\n".format(n2Lst)) nLst = list(n1Lst.union(n2Lst)) #4. PULL only valid quotes within last 7 days and marketCap > 1B dt7d = int(next_date(days=lkbk, dformat='%Y%m%d', dtTF=False)) df = yqc(nLst, tablename=None, screenerTF=False, dfTF=True) xqr = 'not(pbdate<{}|marketCap<{})'.format(dt7d, mxcap) dg = df.query(xqr) newLst = dg['ticker'].values if len(newLst) < 1: sys.stderr.write("No Additions\n") return (0) else: sys.stderr.write("New Additional List:{}\n".format(newLst)) #5. PULL new list 'newLst' with cik/sic/sector info if saveDB: newDf = tkmp(newLst) pgDB = conn2pgdb(dbname='ara') newDf.to_sql('ticker_mapping_temp', pgDB, schema='public', index=False, if_exists='replace') xqr = """ DELETE FROM mapping_ticker_cik B USING ticker_mapping_temp C WHERE B.ticker = C.ticker; INSERT INTO mapping_ticker_cik SELECT cik,ticker,company,sic,sector,company_cn,sector_cn,1::int as act_code FROM (SELECT a.*,b.sector,b.sector_cn from ticker_mapping_temp a LEFT JOIN spdr_sector b ON a.sector_alias=b.sector_alias) as s """ pgDB.execute(xqr, pgDB) return newLst
def iex_quote_short(tkLst=[], filter='', types='quote', dbname='ara', tablename='iex_quote_short', saveDB=True, debugTF=False, **optx): ''' Pull in-the-fly IEX quotes and save to MDB::ara:iex_quote_short mapping { "symbol":"ticker","changePercent":"pchg","latestPrice":"close", "latestUpdate":"epochs","previousClose":"xclose","avgTotalVolume":"volume", "previousVolume":"xvolume" } ''' if not filter: filter = 'iexMarketPercent,previousClose,avgTotalVolume,previousVolume,week52High,peRatio,iexLastUpdated,companyName,calculationPrice,latestPrice,isUSMarketOpen,week52Low,lastTradeTime,primaryExchange,ytdChange,symbol,latestTime,change,marketCap,changePercent,latestSource,latestUpdate' isNew = getKeyVal(optx, 'isNew', False) tkLst, mxdate = getTkLst(tkLst) jdTmp = iex_batchTypes(tkLst, filter=filter, types=types, **optx) dicX = { "symbol": "ticker", "changePercent": "pchg", "latestPrice": "close", "latestUpdate": "epochs", "previousClose": "xclose", "avgTotalVolume": "volume", "previousVolume": "xvolume" } dm = [] for ticker in tkLst: try: if ticker not in jdTmp: continue elif types not in jdTmp[ticker]: continue elif len(jdTmp[ticker][types]) < 1: coninue dx = jdTmp[ticker][types] renameDict(dx, dicX) pbdt = epoch_parser(dx['epochs']) pbdate = int(pbdt.strftime('%Y%m%d')) dx.update(ticker=ticker, pbdt=pbdt, pbdate=pbdate) dm.append(dx) except Exception as e: sys.stderr.write("**ERROR: {}:{}\n".format(ticker, str(e()))) continue if len(dm) < 1: return {} if saveDB is True: zpk = getKeyVal(optx, 'zpk', ['ticker', 'epochs']) ordered = getKeyVal(optx, 'ordered', False) optx.pop('zpk', None) optx.pop('ordered', None) mdb = conn2mgdb(dbname=dbname) if isNew is True: mdb[tablename].drop() insert_mdb(dm, clientM=mdb, zpk=zpk, tablename=tablename, ordered=ordered, **optx) return dm
def iex_spark_batch(tkLst=[], filter='*', types='chart', range='1d', chartLast=-999, nchunk=100, **optx): ''' Utility function to process data in iex_spark_hist() ''' dbname = getKeyVal(optx, 'dbname', 'ara') tablename = getKeyVal(optx, 'tablename', 'iex_spark_hist') tabq = 'yh_quote_curr' zpk = getKeyVal(optx, 'zpk', ['ticker', 'epochs']) ordered = getKeyVal(optx, 'ordered', False) optx.pop('tablename', None) optx.pop('zpk', None) optx.pop('ordered', None) saveDB = getKeyVal(optx, 'saveDB', False) dfTF = getKeyVal(optx, 'dfTF', False) tkLst, mxdate = getTkLst(tkLst) chunkLst = list2chunk(tkLst, nchunk) mdb = conn2mgdb(dbname=dbname) mCur = mdb[tabq] jdM = [] if len(tkLst) < 1: #dQuotes=iex_quote_short(tkLst,isNew=True) sys.stderr.write( "**ERROR: quotes date not update @ {},:{}\n".format(mxdate)) return [] for j, tkM in enumerate(chunkLst): try: jdTmp = iex_batchRaw(tkM, filter=filter, types=types, range=range, chartLast=chartLast, **optx) # pull quote info jobj = {'ticker': {'$in': tkM}} res = mCur.find(jobj) jqTmp = [x for x in res] if not isinstance(jdTmp, dict) or len(jdTmp) < 1: continue dx = iex_spark_process(jdTmp, dq=jqTmp, **optx) if len(dx) < 1: continue if saveDB is True: insert_mdb(dx, zpk=zpk, tablename=tablename, ordered=ordered, **optx) if not dfTF: jdM.extend(dx) except Exception as e: sys.stderr.write("**ERROR: {}\n{}\n".format(tkM, str(e))) continue sys.stderr.write("tkLst{}:\n{}\n".format(tkLst[:5], jdM[:5])) if dfTF is True: jdM = pd.DataFrame(jdM) return jdM
import pprint #%matplotlib inline #import matplotlib.pyplot as plt from hashlib import md5 if sys.version_info.major == 2: reload(sys) sys.setdefaultencoding('utf8') dtCvt = lambda x: str(x) if isinstance(x, (ObjectId,datetime.datetime)) else x # set pandas global display formula pd.set_option('display.max_colwidth', -1) pd.options.display.float_format='{:,.2f}'.format # set mongoDB database connection as globals() mgDB=conn2mgdb(dbname='ara') pgDB=conn2pgdb(dbname='ara') currDate=datetime.datetime.now() def verify_apikey(ak): if ak=='beyondbond_internal_testing_only': return 1 ht=pd.read_csv("/apps/fafa/webDir/loginDir/.htpasswd.dat",sep="\t") aLst=list(ht['api_key']) return 1 if ak.lower() in aLst else 0 class myPrettyPrinter(pprint.PrettyPrinter): def format(self, object, context, maxlevels, level): if isinstance(object, unicode): #return (object.encode('utf8'), True, False) return (object, True, False)