def create_topic_theme_media(start=20200101, dbname="ara", tablename="topic_theme_media", tablesrc="madmoney_hist", **optx): from _alan_str import find_mdb from yh_chart import yh_quote_comparison as yqc dtmp, mDB, errmsg = find_mdb( { 'pbdate': { '$gt': start }, 'Call': { '$in': ['4', '5'] } }, sortLst={'ticker', 'pbdate'}, dbname=dbname, tablename=tablesrc, dfTF=True) dg = dtmp.groupby(['ticker']).apply(lambda x: pd.Series( [x.Call.count(), x.pbdate.max()], index=['buyCount', 'buyDate'])) renameDict(dtmp, {'pbdate': 'buyDate', 'Price': 'buyPrice'}) mediaD = dtmp.merge(dg, on=['ticker', 'buyDate']) colX = ['ticker', 'buyCount', 'buyDate', 'buyPrice', 'sector', 'industry'] mediaD = subDict(mediaD, colX) quoLst = yqc(mediaD['ticker'].values) quoD = pd.DataFrame(quoLst) colX = [ 'ticker', 'close', 'fiftyTwoWeekRange', 'marketCap', 'pbdate', 'shortName', 'changePercent', 'epsTrailingTwelveMonths', 'pbdt' ] quoD = subDict(quoD, colX) quoD = renameDict( quoD, dict(epsTrailingTwelveMonths='EPS', close='closePrice', shortName='Company', fiftyTwoWeekRange='Range52Week', changePercent='dayChg%', change='Chg', pbdt='pubDate')) df = mediaD.merge(quoD, on='ticker') #- remove no-quote rows # ,how='left') df.dropna(subset=['marketCap'], inplace=True) df['buyChg%'] = (df['closePrice'] / df['buyPrice'].astype(float) - 1) * 100 colX = [ 'ticker', 'buyCount', 'buyDate', 'marketCap', 'buyPrice', 'closePrice', 'buyChg%', 'dayChg%', 'EPS', 'Company', 'Range52Week', 'pbdate', 'pubDate', 'sector', 'industry' ] #df=subDict(df,colX) print(" --media DF:\n{}".format(df), file=sys.stderr) zpk = optx.pop('zpk', {'ticker'}) upsert_mdb(df, dbname=dbname, tablename=tablename, zpk=zpk) sys.stderr.write(" --DF:\n{}\n".format(df.head().to_string(index=False))) return df
def create_topic_theme_ipo(updTF=False, **opts): ''' create 'topic_theme_ipo' based on 'nasdaq_ipos' and yh live-quote info ''' from _alan_calc import renameDict, subDict from _alan_str import find_mdb, upsert_mdb from yh_chart import yh_quote_comparison as yqc # Note: 500 limit may cause close prices of certain tickers not get updated, need further debugging limit = opts.pop('limit', 500) ipoLst, _, _ = find_mdb(tablename='nasdaq_ipos', dbname='ara', sortLst=['pbdate'], limit=limit, dfTF=True) ipoLst = renameDict(ipoLst, dict(pbdate='ipoDate', price='ipoPrice')) ipoD = subDict(ipoLst, ['ticker', 'ipoDate', 'ipoPrice', 'sector', 'industry']) quoLst = yqc(ipoD['ticker'].values) quoD = pd.DataFrame(quoLst) colX = [ 'ticker', 'close', 'fiftyTwoWeekRange', 'marketCap', 'pbdate', 'shortName', 'changePercent', 'epsTrailingTwelveMonths', 'pbdt' ] quoD = subDict(quoD, colX) quoD = renameDict( quoD, dict(epsTrailingTwelveMonths='EPS', close='closePrice', shortName='Company', fiftyTwoWeekRange='Range52Week', changePercent='dayChg%', change='Chg', pbdt='pubDate')) df = ipoD.merge(quoD, on='ticker') #- remove no-quote rows # ,how='left') df.dropna(subset=['marketCap'], inplace=True) df['ipoChg%'] = (df['closePrice'] / df['ipoPrice'].astype(float) - 1) * 100 colX = [ 'ticker', 'ipoDate', 'marketCap', 'ipoPrice', 'closePrice', 'ipoChg%', 'dayChg%', 'EPS', 'Company', 'Range52Week', 'pbdate', 'pubDate', 'sector', 'industry' ] df = subDict(df, colX) pqint(" --ipo DF:\n{}".format(df), file=sys.stderr) dbname = opts.pop('dbname', 'ara') tablename = opts.pop('tablename', 'topic_theme_ipo') zpk = opts.pop('zpk', {'ticker'}) upsert_mdb(df, dbname=dbname, tablename=tablename, zpk=zpk) sys.stderr.write(" --DF:\n{}\n".format(df.head().to_string(index=False))) return df
def topic_theme_majorplayer(**opts): ''' use 'themename' as tablename ''' from _alan_str import find_mdb fund = getKeyVal(opts, 'fund', 'renaissance-technologies-llc') clientM = getKeyVal(opts, 'clientM', None) dbname = getKeyVal(opts, 'dbname', 'ara') themename = getKeyVal(opts, 'themename', 'topic_theme_majorplayer') dfTF = getKeyVal(opts, 'dfTF', True) jobj = dict(fund=fund) df, mDB, errmsg = find_mdb(jobj, tablename=themename, dbname=dbname, dfTF=dfTF) colX = [ 'ticker', 'close', 'marketCap', 'changePercent', 'CurrentShares', 'percentPos', 'SharesChangePercent', 'fiftyTwoWeekRange', 'pbdate', 'shortName', 'fund', 'funddate', 'pbdt' ] colDc = { 'close': 'closePrice', 'pbdate': 'closeDate', 'shortName': 'company', 'SharesChangePercent': 'SharesChg%', 'changePercent': 'Chg%', 'percentPos': 'Position%' } df = subDict(df, colX) df = renameDict(df, colDc) return df
def search_comment(tkLst,fdLst,**opts): topicLst='hourly|news|report|theme|peers|industry|MFRM'.split('|') topic=getKeyVal(opts,'topic','MFRM') if topic not in topicLst: return None argName="{}_comment".format(topic) if topic in topicLst and argName in globals(): pqint("==RUNNING {}() Inputs:{}".format(argName,opts),file=sys.stderr) try: data=globals()[argName](tkLst,fdLst,**opts) except Exception as e: pqint("**ERROR:{} to run {}".format(str(e),argName),file=sys.stderr) return data output=opts.pop('output',None) data=pd.DataFrame() optx=subDict(opts,['tablename','lang','dbname','hostname','topic','subtopic','factor']) if tkLst[0]=='*': data=geteach_comment('*',fdLst,**optx) # data = data_output(data,output) return data for ticker in tkLst: ret=geteach_comment(ticker,fdLst,**optx) if ret is not None and len(ret)>0: data=data.append(ret,ignore_index=True) else: continue # data = data_output(data,output) if len(data)<1: return None return data
def topic_theme_ipo(**opts): from _alan_str import find_mdb subtab = getKeyVal(opts, 'subtab', '') opts.pop('output', None) # not output type assigned updTF = opts.pop('updTF', False) jobj = json.loads(opts.pop('jobj', '{}')) sector = opts.pop('sector', '') if len(sector) > 5: sLst = sector.split(',') jobj.update(sector={"$in": sLst}) #df = get_ipoReport(updTF=updTF,**opts) # DEPRECATED #df = create_topic_theme_ipo(updTF=updTF,**opts) # SETUP in crontab tablename = opts.pop('tablename', None) if tablename is None: tablename = 'topic_theme_ipo' df, _, _ = find_mdb(jobj, dbname='ara', tablename=tablename, dfTF=True) colX = [ 'ticker', 'ipoDate', 'marketCap', 'ipoPrice', 'Price', 'closePrice', 'ipoChg%', 'dayChg%', 'EPS', 'Range52Week', 'Company', 'sector', 'industry', 'pubDate' ] df = subDict(df, colX) #-onTheFly run, not used #-run daily crontab to create 'topic_theme_ipo' table via #-python3 -c "from ipoReport import create_topic_theme_ipo as ctt;ctt()" if subtab.lower() == 'conflict': df = df.query('"ipoChg%">10') return df
def find_lsi2nlg_info(jobj={}, fieldLst=['username', 'category', 'tmplname', 'lang'], dbname='ara', tablename='lsi2nlg'): findDct = subDict(jobj, fieldLst) mobj, clientM, err_msg = find_mdb(jobj=findDct, dbname=dbname, tablename=tablename) if len(mobj) > 0: return (mobj[0]) else: return []
def run_j2ts(optGet={}, optPost={}): pqint(optGet, optPost, file=sys.stderr) dd = {} if optPost is not None: dd.update(optPost) if optGet is not None: dd.update(optGet) if 'j2ts' in dd: j2ts = dd['j2ts'] dd = subDict(dd, ['j2ts'], reverseTF=True) else: j2ts = 'Usage of /?key1=value1&key2=value2 ...' return jj_fmt(j2ts, dd, j2onlyTF=True)
def search_quote(tkLst,fdLst,**opts): tkLst=get_sector_etfname(tkLst,**opts) sys.stderr.write("---tkLst: {} @ search_quote\n".format(tkLst)) instrument = getKeyVal(opts,'instrument','stock') outTF = getKeyVal(opts,'outTF',True) hostname,dbname,tablename,lang = getKeyVal(opts,['hostname','dbname','tablename','lang'],['localhost','ara',None,None]) colx='ticker' if instrument=='stock' else 'series' data=[] opts.pop('ticker',None) for ticker in tkLst: try: # get quotes from MDB::"yh_quote_curr" for yahoo source indices setup in the PGDB::'mapping_series_label' if instrument=='stock' or re.search(r'[=^.]',ticker): mktLst =['^GSPC','^DJI','^IXIC','^SOX'] if ticker.upper() in mktLst: tablename="market_indicator_quote" elif re.search(r'[=^.]',ticker): tablename="yh_spark_hist" else: tablename="iex_spark_hist" jobj={"ticker":ticker} ret = list(mgDB[tablename].find(jobj,{"_id":0},sort=[("epochs",-1)]).limit(1)) #ret,_,_=find_mdb(jobj,tablename=tablename,dbname="ara") ret = subDict(ret,['ticker','close','change','pchg','xclose','epochs','pbdate','pbdt']) ret = renameDict(ret,{'pchg':'changePercent','xclose':'prevClose'}) else: # get quotes for all fields from pgDB ret=geteach_quote(ticker,fdLst='*',tablename=tablename,lang=lang,dbname=dbname,hostname=hostname,colx=colx) if ret is not None and len(ret)>0: #data=data.append(ret,ignore_index=True) data.extend(ret) else: continue except Exception as e: pqint( "**ERROR:{} @ {}".format(str(e),search_quote) ,file=sys.stderr) continue if len(data)<1: return None if not outTF: return data data=pd.DataFrame(data) if fdLst is None: pass elif len(fdLst)>2 and fdLst.lower()=='all': pass else: colx=['ticker','epochs','open','high','low','close','volume','xclose','change','pchg','pbdt','hhmm','pbdate','changePercent','prevClose','marketCap'] data=subDF(data,colx) # data = data_output(data,output) return data
def run_fcs(ticker, debugTF=False, funcName='rForecast', **optx): # get data datax = pull_stock_data(ticker) asof = int(datax['pbdate'].iloc[-1]) # idxtm=map(lambda x:datetime.datetime.strptime(str(x),"%Y%m%d"),datax['pbdate']) # datax.set_index(pd.DatetimeIndex(idxtm),inplace=True) if debugTF is True: print datax.tail() # get r-code pandas2ri.activate() rstring = 'source("./_alan_ohlc_fcs.r")' r(rstring) # convert to r-data #df=pandas2ri.py2ri(datax[['pbdate','close']]) df = pandas2ri.py2ri(datax['close'][:]) # run r-function opts = { 'nfcs': 30, 'dwmTF': True, 'autoArima': False, 'difTF': True, 'funcname': 'rAR', 'logTF': True, 'plevel': 0.7, 'freq': 'W' } opts.update(optx) optR = subDict(opts, [ 'nfcs', 'plevel', 'funcname', 'autoArima', 'logTF', 'difTF', 'freq', 'fcsLst', 'dwmTF' ]) if debugTF: print >> sys.stderr, "==Input Args:{}".format(optR) print >> sys.stderr, "==asof {},df:\n{}".format( asof, datax['close'][-5:]) if funcName in robj.globalenv: funcArg = robj.globalenv[funcName] ret = funcArg(df, asof, debugTF=debugTF, **optR) if opts['dwmTF'] is True: dwm = pandas2ri.ri2py(ret[1]) dwm['ticker'] = ticker else: dwm = pd.DataFrame() dd = pandas2ri.ri2py(ret[0]) dd['ticker'] = ticker return (dd, dwm, datax)
def batch_yh_hist(tkLst=[], opts=None, **optx): #- Set input parameters if opts is None or len(opts) < 1: opts, _ = opt_yh_hist([]) if optx is not None: opts.update(optx) kys = ['gap', 'ranged', 'tsTF', 'pchgTF', 'debugTF'] debugTF = getKeyVal(opts, 'debugTF', False) indexTF = getKeyVal(opts, 'indexTF', True) output = getKeyVal(opts, 'output', None) sep = getKeyVal(opts, 'sep', '|') #for ky,va in opts.items(): # exec('{}=va'.format(ky)) hdrTF = True if 'funcArg' in opts and opts['funcArg'] in globals(): funcArg = globals()[opts['funcArg']] else: funcArg = yh_hist if len(tkLst) > 0 and tkLst[0] == '-': tkLst = sys.stdin.read().split("\n") dm = pd.DataFrame() for j, ticker in enumerate(tkLst): hdrTF = True if j < 1 else False try: df = funcArg(ticker, hdrTF=hdrTF, **subDict(opts, kys)) if len(df) < 1: continue if output is None or len(output) < 1: dm = pd.concat([dm, df]) except Exception as e: pqint("**ERROR: {}.{}\n{}".format(j, ticker, str(e)), file=sys.stderr) continue if output is not None and 'ticker' not in df: df['ticker'] = ticker if output == 'csv': sep = sep.encode().decode( 'unicode_escape' ) if sys.version_info.major == 3 else sep.decode("string_escape") sys.stdout.write(df.to_csv(sep=sep, index=indexTF, header=hdrTF)) elif output == 'html': sys.stdout.write(df.to_html(index=indexTF)) elif output == 'json': sys.stdout.write(df.to_json(orient='records')) hdrTF = False return dm
def run_pppscf(ticker, opts=None, pgDB=None, stdinTF=False, debugTF=False): """ Calc Past Price Performance Summary & CashFlow Return dictionary of {'data','dfdr'} where data: dataframe data of historical prices applied dfdr: dataframe the PPP Summary of data """ if opts is None: opts, _ = opt_pppscf([]) #python3 compatible #for ky,va in opts.items(): # exec("{}=va".format(ky)) npar = opts['npar'] optx = subDict(opts, ['sep', 'start', 'end', 'src', 'days']) if stdinTF is True: pqint("==RUNNING get_csvdata({sep},{start},{end},{src},{days})".format( **optx), file=sys.stderr) data = get_csvdata('-', pgDB=pgDB, **optx) else: data = pullStockHistory(ticker, pgDB=pgDB, **optx) if isinstance(data.index, (datetime.date, datetime.datetime)) is False: if 'pbdate' in data: idxpt = [ymd_parser(x, fmt="%Y%m%d") for x in data['pbdate']] data.set_index(pd.DatetimeIndex(idxpt), inplace=True) elif 'epochs' in data: idxpt = [epoch_parser(x) for x in data['epochs']] data.set_index(pd.DatetimeIndex(idxpt), inplace=True) prc = data['close'] prc = prc.dropna(axis=0, how='all') # polyfit the data at [npar] polynomial _, dfdr = vertex_locator(prc, npar=npar, debugTF=debugTF) pqint(dfdr, file=sys.stderr) pqint(data.head(5), file=sys.stderr) # calc additional stats vx = prc[ -62:] # vx=prc # using last 3-month as z-score benchmark rather than entire period stdev = vx.std() avg = vx.mean() zs = avg / stdev dfdr = calc_pppchgs(dfdr, zs=zs, stdev=stdev, avg=avg) dfdr = dfdr.drop(['date'], 1) dfdr['ticker'] = ticker return {"data": data, "dfdr": dfdr}
def quote_dx2dd(jdTmp, dicX, colX): if "regularMarketTime" not in jdTmp: sys.stderr.write("**WARNING:{} {}\n".format(ticker, "is invalid.")) return {} renameDict(jdTmp, dicX) #keep "regularMarketPreviousClose" for backward compatibility if "regularMarketPreviousClose" in jdTmp: jdTmp['xclose'] = jdTmp["regularMarketPreviousClose"] if "changePercent" in jdTmp: jdTmp['pchg'] = jdTmp["changePercent"] / 100.0 epoch = int(jdTmp["regularMarketTime"]) jdTmp['epochs'] = epoch * 1000 jdTmp['pbdt'] = pbdt = datetime.datetime.fromtimestamp(epoch) jdTmp['hhmm'] = pbdt.strftime('%H%M') jdTmp['pbdate'] = int(pbdt.strftime('%Y%m%d')) dd = subDict(jdTmp, colX) if len(colX) > 0 else jdTmp return dd
def add_sector_industry(df=[]): from yh_chart import runOTF from _alan_calc import subDict if len(df) < 1: return [] tkLst = df['ticker'].values datax = runOTF('yh_financials', list(df['ticker'].values), modules="summaryProfile", dbname='ara', tablename="yh_summaryProfile", zpk={'ticker'}, deltaTolerance=8640000) if len(datax) < 1: return df dg = subDict(pd.DataFrame(datax), ['ticker', 'sector', 'industry']) df = df.merge(dg, on='ticker', how='left') return df
def theme_comment(tkLst,fdLst,subtopic='ipo',**opts): from _alan_rmc import run_topic_theme outTF=opts.pop('outTF',True) if subtopic.lower() in ['majorplayer','media','ipo']: funcName= "topic_{}_{}".format("theme",subtopic) sys.stderr.write("===RUNNING {} with OPTS:{}\n".format(funcName,opts)) df=run_topic_theme(funcName=funcName,subtopic=subtopic,outTF=outTF,**opts) if len(df)<0: return {} if fdLst not in ['*',''] and fdLst is not None: fields=fdLst.split(',') df = subDict(df,fields) if len(df)<0: return [] if isinstance(df,pd.DataFrame): sys.stderr.write("==OPTS:{},FIELDS:{}, DATA:\n{}".format(opts,fdLst,df.tail(2))) data = df # data = data_output(df,**opts) return data
def run_ohlc_fcs(ticker, opts=None, debugTF=False, pgDB=None, **kwargs): """ forecast 'nfcs' periods based on raw data 'datax' return (dd,dwm,datax) where dd: forecast values dwm: forecast values of next day, week, month (optional) datax: data used for forecast calculation Note, dwm can be optional if dwmTF is False """ if opts is None: (opts, _) = opt_ohlc_fcs([]) if len(kwargs) > 0: opts.update(kwargs) if debugTF: pqint(opts, file=sys.stderr) days = getKeyVal(opts, 'days', 730) freq = getKeyVal(opts, 'freq', 'D') # get data if isinstance(ticker, pd.DataFrame): datax = ticker ticker = '' else: # get data optx = subDict(opts, ['src', 'days', 'start', 'end']) datax = pull_stock_data(ticker, pgDB=pgDB, **optx) if 'ticker' in datax: ticker = datax['ticker'].iloc[0] if datax is None or len(datax) < 1: return (None, None, None) #idxtm=map(lambda x:datetime.datetime.strptime(str(x),"%Y%m%d"),datax['pbdate']) #datax.set_index(pd.DatetimeIndex(idxtm),inplace=True) if debugTF is True: pqint(opts, file=sys.stderr) pqint(datax.tail(), file=sys.stderr) nobs = days if 'epochs' in datax: asof = int( datetime.datetime.fromtimestamp( int(datax['epochs'].iloc[-1]) / 1000).strftime('%Y%m%d')) fcsLst = np.array([5, 10, 30]) else: asof = int(datax['pbdate'].iloc[-1]) fcsLst = np.array([1, 5, 23]) vprc = datax['close'][-nobs:] # get r-code pandas2ri.activate() fpath = os.path.dirname(__file__) if len(fpath) < 1: fpath = '.' rstring = 'source("{}/{}")'.format(fpath, "_alan_ohlc_fcs.r") if debugTF: pqint(rstring, file=sys.stderr) r(rstring) # convert to r-data df = pandas2ri.py2ri(vprc) # run r-function [rGARCH|rAR] optx = subDict(opts, [ 'nfcs', 'plevel', 'funcname', 'autoArima', 'logTF', 'difTF', 'freq', 'fcsLst', 'dwmTF' ]) if debugTF: pqint("==Input Args:{}".format(optx), file=sys.stderr) pqint("==df\n:{}".format(vprc.tail()), file=sys.stderr) ret = robj.globalenv['rForecast'](df, asof, debugTF=debugTF, **optx) #ret=robj.globalenv['rForecast'](df,asof,plevel=plevel,funcname=funcname,autoArima=autoArima,debugTF=debugTF,logTF=logTF,difTF=difTF,freq=freq,fcsLst=fcsLst) if opts['dwmTF'] is True: dwm = pandas2ri.ri2py(ret[1]) dwm['ticker'] = ticker else: dwm = pd.DataFrame() dd = pandas2ri.ri2py(ret[0]) dd['ticker'] = ticker dd['freq'] = freq return (dd, dwm, datax)
def pn2mp4(_pn_={},zpk=[],debugTF=False,**optx): ''' convert _pn_ that contail svg, comment, mp3comment into relevant mp3 and mp4 files and save file locations and save to MDB table: 'mkt_briefing_media' or 'daily_single_stock_media' ''' if 'tmplLst' not in _pn_: return {} tmplLst = _pn_['tmplLst'] title = _pn_['title'] dpn=subDict(_pn_,zpk+['title','pbdt','headTitle','intraday_headline','daily_headline','comment','mp3comment']) videoLst=[] j=0 for tname in tmplLst: if tname not in _pn_: sys.stderr.write("===NotFound: @ {}\n".format(tname)) continue sys.stderr.write("===Running: {}\n{}\n".format(tname,_pn_[tname])) try: dx = _pn_[tname] if 'chartpath' not in dx or dx['chartpath'] is None: continue chartpath= dx['chartpath'] xtmp=re.sub("(.svg)?","",chartpath) outdir,mpname=os.path.dirname(xtmp),os.path.basename(xtmp) if len(mpname)<1: sys.stderr.write("**WARNING: @ {}\t{}\n".format(tname,"Filename not exists")) continue dx.pop('title',None) vk = [k for k in dx] for k in vk: v = dx[k] if isinstance(v,(dict,tuple,list,pd.DataFrame)): dx.pop(k,None) #dx.update(k=None) txtChart2audioSrtVideo(tmplname=tname,mpname=mpname,tmplobj=dx,debugTF=debugTF,**optx) dpn.update({tname:dx}) videoLst.append(dx['videoPath']) sys.stderr.write("\n==={}:{} successfully created!\n".format(tname,dx['videoPath'])) except Exception as e: sys.stderr.write("**ERROR: @ {}\n\t{}\n".format(tname,str(e))) continue j=j+1 if j<1: return {} sys.stderr.write("===Total:{}: {}\n".format(j,videoLst)) try: xpoch = mpname.split("_")[-1] if title=='daily_single_stock': ticker = getKeyVal(_pn_,'ticker',None) videoPath='{}_{}_{}_ALL.mp4'.format(title,ticker,xpoch) else: videoPath='{}_{}_ALL.mp4'.format(title,xpoch) run_concat(videoLst=videoLst,outdir=outdir,videoPath=videoPath) #dpn.update(comment=rptTxt) dpn.update(videoPath=videoPath) sys.stderr.write("===Combine: {} To {}\n".format(videoLst,videoPath)) if len(glob(outdir+"/"+videoPath))>0: sys.stderr.write("===videoLst: {}\n".format(videoLst)) sys.stderr.write("===videopath: {} successfully created!\n".format(videoPath)) else: sys.stderr.write("**ERROR: {} not created!\n".format(videoPath)) # save to MDB 'dbname'::'tablename' dbname=optx.pop('dbname','ara') tablename=optx.pop('tablename','') if title[-9:]=='_briefing': tablename='mkt_briefing_media' elif title=='daily_single_stock': tablename=title+'_media' if len(tablename)>5: ret,_,emsg = upsert_mdb([dpn],zpk=zpk,dbname=dbname,tablename=tablename,ordered=False) sys.stderr.write("==errmsg:SAVE TO {} @ {}, STATUS:{}\n".format(tablename,dpn['pbdt'],str(emsg))) sys.stderr.write("==SUCCESS videoLst:{}, mp4:{}\n".format(videoLst,mpname)) except Exception as e: sys.stderr.write("**ERROR: @ {}:{}\n\t{}\n".format(videoLst,mpname,str(e))) return dpn
def yh_hist_query(tkLst=[], filter='*', types='quote', nchunk=50, rawTF=False, screenerTF=False, dfTF=False, debugTF=False, dbname=None, tablename=None, **optx): ''' Pull minute ohlc pricing data from Yahoo but use marketVolume as volume since market data has 15-minute delay, latest 15 marketVolumes become 0 ''' if len(tkLst) < 1: tkLst = list(pull_act_tickers()['ticker']) jdLst = yh_batchTypes(tkLst, filter=filter, types=types, nchunk=nchunk, debugTF=debugTF, **optx) colX = [ "ticker", "open", "high", "low", "close", "volume", "xclose", "change", "pchg", "epochs", 'hhmm', "pbdate", "pbdt" ] dLst = [] df = pd.DataFrame() clientM = None tablename = 'yh_{}_temp'.format(types) if tablename is None else tablename for j, jdTmp in enumerate(jdLst): try: jdX = {} if 'response' in jdTmp: ticker = jdTmp['symbol'] jdd = jdTmp['response'][0] elif 'meta' in jdTmp: ticker = jdTmp['meta']['symbol'] jdd = jdTmp else: #- for 'quote' parsing if "regularMarketPrice" not in jdTmp: continue if "regularMarketTime" not in jdTmp: continue jdTmp['epochs'] = jdTmp['regularMarketTime'] * 1000 jdTmp['pbdt'] = datetime.datetime.fromtimestamp( jdTmp['regularMarketTime']) jdTmp['pbdate'] = int(jdTmp['pbdt'].strftime('%Y%m%d')) newNames = { "symbol": "ticker", "regularMarketPrice": "close", "regularMarketChange": "change", "regularMarketChangePercent": "changePercent", "regularMarketOpen": "open", "regularMarketDayHigh": "high", "regularMarketDayLow": "low", "regularMarketVolume": "volume", "regularMarketPreviousClose": "xclose", "regularMarketTime": "epoch" } if rawTF: renameDict(jdTmp, mapper=newNames) if debugTF: sys.stderr.write("{}\n".format(jdTmp)) if screenerTF == True: colx = [ "change", "changePercent", "company", "marketCap", "close", "ticker", "volume", "epochs", "pbdt" ] #ds=raw2screener_output_1(jdTmp) ds = subDict(jdTmp, colx) renameDict(ds, {"close": "price"}) elif screenerTF > 0: # original False case colx = list( set(newNames.values()).union([ 'epochs', 'pbdt', 'hhmm', 'pbdate', 'marketCap' ])) ds = subDict(jdTmp, colx) else: ds = jdTmp if all([dbname, tablename]): zpk = getKeyVal(optx, 'zpk', ['ticker', 'epochs']) #mobj,clientM,err_msg = write2mdb(ds,clientM,dbname=dbname,tablename=tablename,zpk=zpk) mobj, clientM, err_msg = insert_mdb( ds, clientM, dbname=dbname, tablename=tablename, zpk=zpk) if debugTF: sys.stderr.write("{}\nSave to {}::{}\n".format( ds, dbname, tablename)) dLst.append(ds) continue #- for 'spark' and 'chart' parsing dx = pd.DataFrame([jdTmp]) dx.rename(newNames, axis='columns', inplace=True) if 'volume' not in dx: continue dx.dropna(subset=['volume'], inplace=True) if len(dx) < 1: continue colX = [x for x in colX if x in dx.columns] dm = dx[colX] if debugTF: sys.stderr.write("quote:\n{}".format(dm.tail())) df = pd.concat([df, dm]) continue xClose = None if 'meta' in jdd and 'previousClose' in jdd['meta']: xClose = jdd['meta']['previousClose'] epoch = jdd['timestamp'] for x, y in jdd['indicators']['quote'][0].items(): jdX[x] = y jdX['epochs'] = np.array(epoch) * 1000 dx = pd.DataFrame(jdX) dx['ticker'] = ticker if 'pchgTF' in optx and optx['pchgTF'] is False: df = pd.concat([df, dx]) continue elif 'pchgTF' in optx and optx[ 'pchgTF'] and jdd['meta']['dataGranularity'][:1] != 'm': dx['pchg'] = dx['close'].pct_change() dx['change'] = dx['close'].diff() xChartClose = jdd['meta']['chartPreviousClose'] dx.loc[dx.index[0], 'pchg'] = dx.loc[dx.index[0], 'close'] / xChartClose - 1 dx.loc[dx.index[0], 'change'] = dx.loc[dx.index[0], 'close'] - xChartClose pbdt = [datetime.datetime.fromtimestamp(int(x)) for x in epoch] dx['hhmm'] = [x.strftime('%H%M') for x in pbdt] dx['pbdate'] = [x.strftime('%Y%m%d') for x in pbdt] dx['pbdate'] = dx['pbdate'].astype(int) dx['pbdt'] = pbdt dx = dx.dropna() if xClose is not None and xClose > 0: dx['pchg'] = dx['close'] / xClose - 1 dx['change'] = dx['close'] - xClose dx['xclose'] = xClose colX = [x for x in colX if x in dx.columns] dm = dx[colX] if debugTF: sys.stderr.write("{}".format(dm.tail())) if dfTF: df = pd.concat([df, dm]) else: dLst.extend(dm.to_dict(orient='records')) except Exception as e: sys.stderr.write("**ERROR: {}:{}:{}\n".format(j, jdTmp, str(e))) continue if len(df) > 0: df.reset_index(drop=True, inplace=True) if len(dLst) > 0: if dfTF: dLst = pd.DataFrame(dLst) return dLst return df
def yh_batchSpark(tkLst=[], filter='*', types='spark', nchunk=100, saveDB=True, dbname='ara', tablename='yh_spark_hist', zpk={'ticker', 'epochs'}, t2='yh_quote_curr', t2zpk={'ticker'}, isEoD=False, **optx): ''' pull data from types=[spark|quote] then save to mgDB 'dbname':: 'tablename' and 't2' respectively Note, if isEoD=True: quote data save to both mgDB 'dbname':: 'tablename' and 't2' if tablename or t2 ='' , nothing will be saved in 'tablename' or 't2' quote data will also be saved to pgDB 'dbname'::'t2' if isEoD=True and types='quote' ''' debugTF = getKeyVal(optx, 'debugTF', False) if debugTF: sys.stderr.write("===LOCALS: {}\noptx: {}\n".format(locals(), optx)) dbM = conn2mgdb(dbname=dbname) if tkLst is None or len(tkLst) < 1: tkLst = list(pull_act_tickers()['ticker']) elif isinstance(tkLst, str): tkLst = [tkLst] chunkLst = list2chunk(tkLst, nchunk) jdN = [] colX = [ 'ticker', 'open', 'high', 'low', 'close', 'xclose', 'volume', 'pchg', 'change', 'pbdate', 'epochs', 'pbdt', 'hhmm' ] for j, tkM in enumerate(chunkLst): jdTmp = yh_batchRaw(tkM, types=types, **optx) jdQC = {} if types in ['spark', 'chart']: jdLst = jdTmp[types]['result'] if types == 'chart': jdM = chart2df(jdLst, **optx) else: jdM = batchSpark_sparkConv(jdLst, **optx) if len(jdM) < 1: continue if saveDB is True: m, dbM, err = insert_mdb(jdM, clientM=dbM, tablename=tablename, **optx) if debugTF: sys.stderr.write("=== {} of {} saved to {}\n".format( tkM, jdM[-1], tablename)) jdN.extend(jdM) else: # for case of types.lower()=='quote' jdLst = jdTmp['quoteResponse']['result'] jdQC = batchSpark_quoteConv(jdLst, **optx) if len(jdQC) < 1: continue jdM = subDict(jdQC, colX) if saveDB is True and len(jdQC) > 0: if len(t2) > 0: qc, dbM, erq = upsert_mdb(jdQC, clientM=dbM, tablename=t2, zpk=t2zpk, **optx) sys.stderr.write("=== {} of {} saved to {}\n".format( tkM, jdQC[-1], t2)) if isEoD is True: m, dbM, err = insert_mdb(jdM, clientM=dbM, tablename=tablename, zpk=zpk, **optx) sys.stderr.write("=== {} of {} saved to {}\n".format( tkM, jdM[-1], tablename)) jdN.extend(jdQC) if debugTF: sys.stderr.write("=== tkM:{}[{}/{}], last:\n{}\n".format( tkM, len(jdM), len(jdN), jdN[-1])) if saveDB is True and len(t2) > 0 and len(jdN) > 0 and types.lower( ) == 'quote' and isEoD is True: try: df = pd.DataFrame(jdN) df = df.drop(['_id'], axis=1) if debugTF: sys.stderr.write( "=== Save to pgDB::{} of {} rows(1st-last)\n{}\n{}\n". format(t2, len(df), df.iloc[:1], df.iloc[-1:])) from _alan_calc import save2pgdb save2pgdb(df, dbname, tablename=t2) except Exception as e: sys.stderr.write("**ERROR: {}:{}\n".format("save2pgdb", str(e))) return jdN
def mainTst(description = "Regressing TICKER1 on TICKER2", \ optkys = ['deg','method','start','freq','src','pct_chg_prd','debugTF','pngname','titlename','lagd','pct_chg_prd2','freq2','src2','logTF','log2TF','filename','monthlyTF','days']): # ASSIGN options & arguments options, ns_args = parse_args(version="0.1", description=description, nargs='*') opts = subDict(options, optkys) for ky, va in opts.items(): exec("{}=va".format(ky)) # ASSIGN customed variables & parameters ticker = '^GSPC' series = 'T5YIFR' args = options['tkLst'] argc = len(args) if argc > 1: ticker, series = args[:2] elif argc > 0: ticker = args[0], if titlename is None: titlename = "{} / {}".format(ticker, series) opts['titlename'] = titlename if debugTF: print >> sys.stderr, "{}".format(opts) print >> sys.stderr, "series:{}, ticker:{}, start:{}, debug:{}".format( series, ticker, start, debugTF) # REGRESSING [ydata] on [xdata] in polyfit() with [deg] power #(dh, pprm, zroots, zvtx) = polyfit_XY(series, ticker, **opts) #return (titlename, dh, pprm, zroots, zvtx) # GRAB data dh = get_dataXY(series, ticker, start=start, debugTF=debugTF, method=method, src2=src2, src=src, freq2=freq2, freq=freq, pct_chg_prd2=pct_chg_prd2, pct_chg_prd=pct_chg_prd, lagd=lagd, log2TF=log2TF, logTF=logTF, filename=filename, monthlyTF=monthlyTF, days=days) if debugTF: print >> sys.stderr, dh # RUN polyfit of [deg] degree of power pprm, zroots, zvtx, dh['fitted'] = run_polyfit(dh['xData'], dh['yData'], deg, debugTF=debugTF) # PLOTTIG plot_XY(dh[['xData', 'yData', 'fitted']], pngname=pngname, debugTF=debugTF, titlename=titlename, labels=[series, ticker, series + 'Fit']) # RUN OLS for compare with np.polyfit of deg=1 oret = run_OLS(dh['xData'], dh['yData'], api=sm, debugTF=debugTF) # RUN curve_fit for nls-fitting to compare with np.polyfit cprm, ccov = curve_fit(polyFnc, dh['xData'], dh['yData'], p0=np.full(deg + 1, 1)) if debugTF: print >> sys.stderr, pprm, cprm return (titlename, dh, pprm, zroots, zvtx, opts)
def run_api(jobj, engine=None): pd.options.display.float_format = '{:,.2f}'.format sty = "<style>.dataframe {text-align:right;}</style>\n" ret = """Usage of: <H3> ?topic='TOPIC' </H3> Where <PRE> TOPIC = [ipo|theme|priceRange|top10|utdEarningsList|mongo_search|file|write2file|test] """ topic = getKeyVal(jobj, 'topic', '').lower() if topic is None: return ret if topic == 'theme': subtopic = getKeyVal(jobj, 'subtopic', 'majorplayer') if topic == 'theme' and subtopic in ['majorplayer', 'media', 'ipo']: jobj.update(subtopic=subtopic) return run_topic_theme(dfTF=False, **jobj) elif topic == 'pricerange': xqTmp = '''select * from (select ticker,close as price,"trailingPE" as "peRatio","marketCap"::float/1000000 as "marketCapMM","changePercent" as "change%%",change from yh_quote_curr where close>={} and close<={}) as a, (select ticker,company_cn,company,sector_cn from mapping_ticker_cik where act_code>0) as b where a.ticker=b.ticker order by price''' try: vr = jobj['range'].split(",") if 'range' in jobj else [60, 70] if len(vr) >= 2: vr = np.array(vr, dtype=float) lb, ub = (vr.min(), vr.max()) xqr = xqTmp.format(lb, ub) df = sqlQuery(xqr) pd.options.display.float_format = '{:,.2f}'.format cfm = {'marketCapMM': "{:,.0f}".format} ret = df.to_html(formatters=cfm) return sty + ret except Exception as e: pqint(str(e)) return str(e) elif topic == 'top10': return run_top10(jobj, engine=None) elif topic in ['utd_earnings_list', 'utdearningslist']: from utd_earnings_list import utd_earnings_list dd = dict(sector='Technology', pbdate='20190101') dd.update(subDict(jobj, ['sector', 'pbdate'])) df = utd_earnings_list(**dd).sort_values(by=['pbdate', 'marketCap'], ascending=[False, False]) df.rename(columns={'quarter': 'epochs'}, inplace=True) cfm = { 'marketCap': "{:,.0f}".format, 'recdate': "{:.0f}".format, 'epochs': "{:.0f}".format } ret = df.to_html(formatters=cfm) return sty + ret elif topic in ['daily_med', 'dailymed']: from dailyMed_api import drug2labelInfo as dli try: dd = dli(jobj['drug_name']) except Exception as e: sys.stderr.write("**ERROR:{}\n".format(str(e))) return (str(e)) #return dd ts = """{{drug_name}} <P> {{ sec_cn|join('</P><P>\n') }} </P> """ return jj_fmt(ts, **dd) elif topic == 'mongo_search': d = dict(dbname='ara', tablename='lsi2nlg', username='******', field='tmplname', ticker='AAPL') if len(jobj) > 0: d.update(jobj) findDct = {'username': d.get('username')} fieldLst = d.get('field').split(',') fieldDct = {x: 1 for x in fieldLst} xg, _, _ = find_mdb(dbname=d['dbname'], tablename=d['tablename'], jobj=findDct, field=fieldDct) return xg elif topic == 'file': try: dirname = jobj[ 'dirname'] if 'dirname' in jobj else "/apps/fafa/pyx/tst" if 'j2name' in jobj: fname = "{}/{}".format(dirname, jobj['j2name']) ret = open(fname).read() elif 'image' in jobj: fname = "{}/{}".format(dirname, jobj['image']) ret = open(fname).read() except Exception as e: sys.stderr.write("**ERROR:{}".format(str(e))) elif topic == 'write2file': # save 'j2name' to 'dirName' pqint(jobj) try: dirName = "/apps/fafa/pyx/flask/rmc/templates" if 'j2name' in jobj and 'j2ts' in jobj: if 'dirname' in jobj: dirName = jobj['dirname'] fname = "{}/{}".format(dirName, jobj['j2name']) fp = open(fname, 'w+') fp.write(jobj['j2ts']) fp.close() pqint("===Save {} To {}".format(jobj['j2ts'], fname)) ret = "Successfully save to {}".format(fname) except Exception as e: sys.stderr.write("**ERROR:{}".format(str(e))) ret = str(e) elif topic == 'test': if 'tmplrpt' in jobj: ret = jobj['tmplrpt'] return ret
def headline_calc(tkLead='^GSPC', idxLst=None, eqtLst=None, np=3, xCol='changePercent', colLst=[], thd=0.05): ''' return object of {'topLst', 'indexOrder', 'topIndex', 'indexLst', 'topUpDn'} Where topIndex: ticker name of lead index defined as 'tkLead'='^GSPC' topUpDn: sign in string UP/FLAT/DOWN within the range of 'thd'=[0.05,-0.05] allUpDn: 1,0,-1 indecis all up/TBD/down topLst: selected 'eqtLst' stock quote info ranked via 'changePercent' w.r.t. the 'sign'/'topUpDn' of 'topIndex' bttmLst: selected 'eqtLst' stock quote info oppsite to topLst indexLst: 'idxLst' stock quote info listed in the order of 'indexOrder' Note aht topIndex quote info should be in the 'indexLst' ''' from _alan_str import udfStr, find_mdb if eqtLst is None or len(eqtLst) < 1: eqtLst = get_eqtLst() if idxLst is None or len(idxLst) < 1: idxLst = ['^GSPC', '^DJI', '^IXIC'] #,'^SOX'] if colLst is None or len(colLst) < 1: #colLst=['open','high','low','close','volume','ticker','change','changePercent','pbdate'] colLst = [ 'close', 'volume', 'ticker', 'change', 'changePercent', 'pbdate', 'pbdt' ] #xqTmp="SELECT * from yh_quote_curr WHERE ticker in ('{}')" # get selected equities quote performance #tkStr = "','".join(eqtLst) #eqtRtn = sqlQuery(xqTmp.format(tkStr))[colLst] jobj = dict(ticker={'$in': eqtLst}) eqtRtn = find_mdb(jobj, dbname='ara', tablename='yh_quote_curr', dfTF=True)[0][colLst] # get indices quote performance #tkStr = "','".join(idxLst) #idxRtn = sqlQuery(xqTmp.format(tkStr))[colLst] jobj = dict(ticker={'$in': idxLst}) idxRtn = find_mdb(jobj, dbname='ara', tablename='yh_quote_curr', dfTF=True)[0][colLst] # calc 'topLst' w.r.t. the 'sign'/'topUpDn' of 'topIndex' pbdate = idxRtn.query("ticker=='{}'".format(tkLead))['pbdate'].iloc[0] chgPct = idxRtn.query("ticker=='{}'".format(tkLead))[xCol].iloc[0] topUpDn = udfStr(chgPct, ['UP', 'DOWN', 'FLAT'], thd) topSign = udfStr(chgPct, [1, 0, -1], thd) sign = False if chgPct >= 0 else True xd = eqtRtn.sort_values(by=[xCol], ascending=sign) leadLst = xd.iloc[:np] if (xd['changePercent'].iloc[0] * xd['changePercent'].iloc[-1]) < 0: bttmLst = xd.iloc[-1:] else: bttmLst = [] # update my lead index in the top level dd = dict(topIndex=tkLead) dd.update(topUpDn=topUpDn) # add all indices info to idxLst dd.update(indexLst=idxRtn[colLst].to_dict(orient='records')) indexOrder = [x['ticker'] for x in dd['indexLst']] dd.update(indexOrder=indexOrder) # determine if indices are all Up/Undetermined/Down if all([x['changePercent'] < 0 for x in dd['indexLst']]): allUpDn = -1 elif all([x['changePercent'] > 0 for x in dd['indexLst']]): allUpDn = 1 else: allUpDn = 0 dd.update(allUpDn=allUpDn) # add topLst if len(leadLst) > 0: dd.update(topLst=leadLst[colLst].to_dict(orient='records')) else: dd.update(topLst=[]) if len(bttmLst) > 0: dd.update(bttmLst=subDict(bttmLst, colLst).to_dict(orient='records')) else: dd.update(bttmLst=[]) # get hiloRecord (based on past 1-year daily change since end date) hiloRecord = find_hiloRecord(ticker=tkLead, end=pbdate, days=366) dd.update(hiloRecord=hiloRecord) dd.update(start=pbdate) dd.update(mp3YN=False) return dd