def topic_theme_media_OLD(**opts): from _alan_str import find_mdb subtab = getKeyVal(opts, 'subtab', 'blacklist') if subtab.lower() == 'whitelist': jobj = {"rrt": {"$gt": 10}} dtmp, mDB, errmsg = find_mdb(jobj, tablename='topic_theme_media', dbname='ara', dfTF=True) df = dtmp.sort_values(by='rrt', ascending=False).iloc[:100] elif subtab.lower() == 'conflict': dtmp, mDB, errmsg = find_mdb({}, tablename='topic_theme_media', dbname='ara', dfTF=True) a1 = list(dtmp.query("rrt>=1.0")['ticker'].unique()) a2 = list(dtmp.query("rrt<=-1.0")['ticker'].unique()) aLst = set(a1).intersection(set(a2)) df = dtmp.loc[dtmp['ticker'].isin(aLst)].sort_values(by='ticker') else: jobj = {"rrt": {"$lt": -10}} dtmp, mDB, errmsg = find_mdb(jobj, tablename='topic_theme_media', dbname='ara', dfTF=True) df = dtmp.sort_values(by='pbdate', ascending=False).iloc[:100] colX = [ "ticker", "pbdate", "start", "end", "sPrice", "ePrice", "rrt", "Price", "SegmentDscript", "CallDscript", "Company" ] df = subDF(df, colX) return df
def search_quote(tkLst,fdLst,**opts): tkLst=get_sector_etfname(tkLst,**opts) sys.stderr.write("---tkLst: {} @ search_quote\n".format(tkLst)) instrument = getKeyVal(opts,'instrument','stock') outTF = getKeyVal(opts,'outTF',True) hostname,dbname,tablename,lang = getKeyVal(opts,['hostname','dbname','tablename','lang'],['localhost','ara',None,None]) colx='ticker' if instrument=='stock' else 'series' data=[] opts.pop('ticker',None) for ticker in tkLst: try: # get quotes from MDB::"yh_quote_curr" for yahoo source indices setup in the PGDB::'mapping_series_label' if instrument=='stock' or re.search(r'[=^.]',ticker): mktLst =['^GSPC','^DJI','^IXIC','^SOX'] if ticker.upper() in mktLst: tablename="market_indicator_quote" elif re.search(r'[=^.]',ticker): tablename="yh_spark_hist" else: tablename="iex_spark_hist" jobj={"ticker":ticker} ret = list(mgDB[tablename].find(jobj,{"_id":0},sort=[("epochs",-1)]).limit(1)) #ret,_,_=find_mdb(jobj,tablename=tablename,dbname="ara") ret = subDict(ret,['ticker','close','change','pchg','xclose','epochs','pbdate','pbdt']) ret = renameDict(ret,{'pchg':'changePercent','xclose':'prevClose'}) else: # get quotes for all fields from pgDB ret=geteach_quote(ticker,fdLst='*',tablename=tablename,lang=lang,dbname=dbname,hostname=hostname,colx=colx) if ret is not None and len(ret)>0: #data=data.append(ret,ignore_index=True) data.extend(ret) else: continue except Exception as e: pqint( "**ERROR:{} @ {}".format(str(e),search_quote) ,file=sys.stderr) continue if len(data)<1: return None if not outTF: return data data=pd.DataFrame(data) if fdLst is None: pass elif len(fdLst)>2 and fdLst.lower()=='all': pass else: colx=['ticker','epochs','open','high','low','close','volume','xclose','change','pchg','pbdt','hhmm','pbdate','changePercent','prevClose','marketCap'] data=subDF(data,colx) # data = data_output(data,output) return data
def topic_theme_media(**opts): from _alan_str import find_mdb jobj = {"buyCount": {"$gt": 5}} dtmp, mDB, errmsg = find_mdb(jobj, tablename='topic_theme_media', dbname='ara', dfTF=True) df = dtmp.sort_values(by='buyCount', ascending=False) colX = [ 'ticker', 'buyDate', 'marketCap', 'buyPrice', 'closePrice', 'buyChg%', 'buyCount', 'dayChg%', 'EPS', 'pbdate', 'Range52Week', 'Company', 'sector', 'industry', 'pubDate' ] if 'marketCap' in df: df['marketCap'] = df['marketCap'].astype(float) df = subDF(df, colX) return df
def batch_fintel_majorplayer(fundLst=[], dbname='ara', tablename='topic_fintel_hist', themename='topic_theme_majorplayer', saveDB=False, debugTF=False): ''' Process ticker list + daily quote and ytd performance ''' if len(fundLst) < 1: fundLst = [ 'berkshire-hathaway', 'blackrock', 'bridgewater-associates-lp', 'goldman-sachs-group', 'renaissance-technologies-llc' ] zpk = ["fund", "ticker", "funddate"] colX = [ 'fund', 'funddate', 'pbdate', 'pbdt', 'ticker', 'shortName', 'change', 'changePercent', 'sector_cn', 'company_cn', 'marketCap', 'close', 'peRatio', 'CurrentShares', 'percentPos', 'SharesChangePercent', 'fiftyTwoWeekRange' ] clientM = None for fn in fundLst: sys.stderr.write("==GET fund:{}\n".format(fn)) jobj = {"fund": fn} try: df, clientM, err, funddate = get_fund_quote(jobj, clientM=clientM, dbname=dbname, tablename=tablename) df = subDF(df, colX) if saveDB: sys.stderr.write("==SAVING {} of\n{}\n".format( fn, df.iloc[:10])) dg = df.to_dict(orient='records') ret = upsert_mdb(dg, clientM=clientM, dbname=dbname, tablename=themename, zpk=zpk) except Exception as e: sys.stderr.write("**ERROR:{}:{}\n".format(fn, str(e))) df = {} return df
def geteach_financials_history(ticker,fdLst,**opts): debugTF = getKeyVal(opts,'debugTF',False) subtopic = getKeyVal(opts,'subtopic','') jobj={'ticker':ticker} if subtopic == 'eps': tablename='earnings_yh' jobj.update({'actualEPS':{'$ne':np.nan}}) jd=list(mgDB[tablename].find(jobj,{'_id':0},sort=[("pbdate",-1)])) if len(jd)<1: return [] for j,xd in enumerate(jd): jd[j]['EPSReportDate']="{}{}{}{}-{}{}-{}{}".format(*list(str(jd[j]['pbdate']))) datax = pd.DataFrame(jd) elif subtopic == 'roe': freq = getKeyVal(opts,'freq','Q') tablename='qS_IS_{}'.format('A' if freq=='A' else 'Q') jd=list(mgDB[tablename].find(jobj,{'_id':0})) if len(jd)<1: return [] datax = pd.DataFrame(jd) if len(jd)>0 else [] tablename='qS_BS_{}'.format('A' if freq=='A' else 'Q') jd=list(mgDB[tablename].find(jobj,{'_id':0})) if len(jd)>0: for j,xd in enumerate(jd): a=jd[j]['pbdate'] jd[j]['reportDate']='{:04d}-{:02d}-{:02d}'.format(int(a/10000),int(a/100)%100,a%100) jd[j]['freq']=freq d2 = pd.DataFrame(jd) datax = datax.merge(d2,on=['ticker','pbdate','endDate'],how='inner') if debugTF: sys.stderr.write("=====geteach_financials_history() d2:{}".format(d2)) if 'netIncome' in datax.columns and 'totalStockholderEquity' in datax.columns: datax['roe'] = datax['netIncome']/datax['totalStockholderEquity'] if debugTF: sys.stderr.write("=====geteach_financials_history() datax:{}".format(datax)) datax=subDF(datax,['ticker','roe','pbdate','reportDate','freq','endDate','netIncome','totalStockholderEquity']) sys.stderr.write(" ---datax:{}\n{}\n".format(ticker,datax)) else: return [] return datax
def create_ipoReport(dbname='ara', tablesrc='ipoData', topRow=50, tablename='ipoReport', saveDB=True): '''Create YTD 'ipoReport' table based on 'nasdaq_ipos' ''' from _alan_date import ymd_diff dds, clientM, _ = find_mdb(dbname=dbname, tablename=tablesrc, dfTF=True) dds = dds.astype(object).where(dds.notna(), None) pbdate = int(dds['pbdate_y'].max()) dds['daysSinceIPO'] = [ ymd_diff(x, int(y)) if y is not None else ymd_diff(x, pbdate) for x, y in dds[['pbdate_x', 'pbdate_y']].values ] if 'fiftyTwoWeekLow' in dds: dds['Range52Week'] = [ "{:.2f} - {:.2f}".format(x, y) if y is not None and y < 9999 else "" for x, y in dds[['fiftyTwoWeekLow', 'fiftyTwoWeekHigh']].values ] dds['marketCapMM'] = dds['marketCap'] / 10**6 dds['currDate'] = pbdate renCol = {'pbdate_x': 'ipoDate', 'price': 'ipoPrice', 'close': 'currPrice'} dds.rename(columns=renCol, inplace=True) dds['changeSinceIPO'] = dds['currPrice'] - dds['ipoPrice'] dds['changePercent'] = dds['currPrice'] / dds['ipoPrice'] - 1 colX = [ 'ticker', 'ipoDate', 'ipoPrice', 'currPrice', 'currDate', 'changeSinceIPO', 'changePercent', 'Range52Week', 'fiftyDayAverage', 'daysSinceIPO', 'marketCapMM', 'trailingPE', 'shortName' ] df = subDF( dds.sort_values(by=['marketCap'], ascending=False).iloc[:topRow], colX) df.reset_index(drop=True, inplace=True) if saveDB is True: clientM[dbname][tablename].delete_many({}) clientM[dbname][tablename].insert_many(df.to_dict(orient='records')) return df
def search_hist(tkLst,fdLst,**opts): output=getKeyVal(opts,'output','json') topic=getKeyVal(opts,'topic','daily') opts.pop('ticker',None) data=pd.DataFrame() dd=[] for ticker in tkLst: df=geteach_history(ticker,fdLst,**opts) sys.stderr.write(" --DF: {}\n{}\n".format(ticker,type(df))) if isinstance(df,pd.DataFrame) and len(df)>0: data=data.append(df,ignore_index=True) elif isinstance(df,list) and len(df)>0: dd=dd.extend(df) else: continue if len(dd)>0: data=pd.DataFrame(dd) sys.stderr.write(" --DATA[{}] tail:\n{}\n".format(len(data),data.tail())) if topic not in ['daily','minute']: return data # data_output(data,output) renameDict(data,{"name":"ticker"}) colx=['ticker','epochs','open','high','low','close','volume','xclose','change','pchg','pbdt','hhmm','pbdate'] data=subDF(data,colx) return data # data_output(data,output)
def plot_csvdata(df, nbins=6,rsiYN=False,title=None,pivot_value=None,pivot_group=None,pngname=None,x_fmt="%b-%d-%y",interpolateYN=True,backend="tkAgg",npar=15,tsTF=True,xaxis=None,trendTF=False,debugTF=False,ohlcTF=False,ohlcComboTF=False,lang='en',**kwargs): if debugTF: sys.stderr.write("===plot_csvdata VARS:\t{}\n".format(locals())) import matplotlib.dates as mdates import matplotlib.image as mimage import matplotlib.ticker as mticker #pltStyle=getKeyVal(kwargs,'pltStyle','dark_background') pltStyle=getKeyVal(kwargs,'pltStyle','classic') figsize=getKeyVal(kwargs,'figsize',(11,6)) ylabel=getKeyVal(kwargs,'ylabel',None) fontpath=getKeyVal(kwargs,'fontpath',None) if fontpath is not None: prop.set_file(fontpath) if pltStyle in plt.style.available: plt.style.use(pltStyle) #- Use backend to 'tkAgg' for cronjob if pngname is None or len(pngname)<=4: plt.switch_backend(backend) #- Rename columns renColumns=getKeyVal(kwargs,'renColumns',{}) if len(renColumns)>0: df = renameDict(df,mapper=renColumns) #- Create datetime index idxname='date' pbname=xaxis if isinstance(df.index,pd.DatetimeIndex): pass elif pbname in df.columns: sdate = str(df[pbname].iloc[0]) if sdate.isdigit() == True: if int(sdate)>123456789: idxpt=[epoch_parser(x) for x in df[pbname]] else: idxpt=[ymd_parser(x,fmt="%Y%m%d") for x in df[pbname]] else: idxpt=[ymd_parser(x,fmt=x_fmt) for x in df[pbname]] df.set_index(pd.DatetimeIndex(idxpt),inplace=True) df.index.rename(idxname,inplace=True) df = df.drop(pbname,1) elif idxname in df.columns: df[idxname] = pd.to_datetime(df[idxname]) df.set_index(idxname,inplace=True) else: df = df.reset_index(drop=True) #- Create 2nd dataframe 'df2' for 2-Yaxis plot from _alan_calc import subDF dux=getKeyVal(kwargs,'columns2',None) if dux is not None: colLst2=dux.split(',') df2=subDF(df,colLst2) df=subDF(df,colLst2,reverseTF=True) else: df2={} #- Create a pivot table trendName = None if pivot_group in df.columns and pivot_value in df.columns: trendName = df[pivot_group][0] df=df.pivot_table(index='date',columns=pivot_group,values=pivot_value) #- Rename columns renColumns=getKeyVal(kwargs,'renColumns',{}) if len(renColumns)>0: df = renameDict(df,mapper=renColumns) #- Create linear-interpolation for missing data if interpolateYN is True: df=df.apply(extrapolate_series,axis=0) #- Create return since inception if rsiYN is True: de=[] for j in range(df.shape[1]): inix = df.iloc[0,j] if df.iloc[0,j]>1 else 1 de.append(df.iloc[:,j]/inix*100.-100) #de = [df.iloc[:,j]/df.iloc[0,j]*100.-100 for j in range(df.shape[1])] df = pd.concat(de,axis=1) #- NO PLOTTING, just return data if 'plotTF' in kwargs and kwargs['plotTF'] is False: return df,{},{} #- Create trend curve if trendTF is True: try: from _alan_pppscf import vertex_locator if trendName is None: trendName = df._get_numeric_data().columns[0] dg, dh = vertex_locator(df[trendName],npar=npar,debugTF=debugTF) #df['trend'] = dg['trend'].values if debugTF is True: sys.stderr.write("{}\n{}\n".format("Trendline dh:",dh)) except Exception as e: sys.stderr.write("**ERROR: {} @ {}\n".format(str(e),'trendline')) if title is None: title="/".join(df.columns).upper() if rsiYN is True: title += " Return Since Inception" #- plot simple line plot if tsTF is False: df = df.reset_index(drop=True) if debugTF is True: sys.stderr.write("{}\n".format(df.head())) sys.stderr.write("{}\n".format(df.tail())) nobs=len(df.index) nsp = (nobs/nbins) if nobs>nbins*2 else nobs #ds=[y for j,y in enumerate(df.index) if j%nsp==0] #ax=df.plot(xticks=ds,title=title) colorUD = ['red','green'] if lang=='cn' else ['green','red'] if ohlcComboTF is True: from alan_plot import plot_candlestickCombo from _alan_calc import run_tech chartType = 'minute' if pbname == 'epochs' else 'chart' #ma1=5;ma2=30 ma1,ma2=sma=getKeyVal(kwargs,'sma',[5,30]) datax = run_tech(df, pcol='close',winLst=sma,debugTF=debugTF,nanTF=True) if 'open' not in datax: return datax, None, None fig, axes = plot_candlestickCombo(datax,title,ma1,ma2,block=False,chartType=chartType,trendTF=trendTF,npar=npar,debugTF=debugTF,colorUD=colorUD,title=title) #plt.suptitle(title,fontsize=18,fontproperties=prop) if pngname is not None and len(pngname)>4 and '.' in pngname: ghLst = plt.gcf().canvas.get_supported_filetypes().keys() ghx = pngname.split('.')[-1] format = ghx.lower() if ghx.lower() in ghLst: format = ghx.lower() else: format = 'svg' pngname = pngname.replace(ghx,'svg') plt.savefig(pngname, format=format) #, bbox_inches='tight',dpi=1000) # skip the plot if pngname='noshow' elif pngname is None: plt.show(axes) return datax, fig, axes fig, ax=plt.subplots(figsize=figsize) if ohlcTF is True: if 'marketVolume' in df: df.rename(columns={'marketVolume': 'volume'},inplace=True) if 'open' not in df and 'close' in df: df['open']=df['high']=df['low']=df['close'] elif 'open' not in df: return df, None, None from alan_plot import plot_candlestick chartType = 'minute' if pbname == 'epochs' else 'chart' ax = plot_candlestick(df,tsidx=df.index,chartType=chartType,title=title,block=False,debugTF=debugTF,ax=ax,trendTF=trendTF,npar=npar,colorUD=colorUD) x_fmt = "%H:%M" if chartType == 'minute' else x_fmt else: colorLst=['blue','red','green','salmon','lightgray','cyan'] df.plot(ax=ax,grid=True,color=colorLst) #ax=df.plot(figsize=(11,6)) ax.set_ylabel(df.columns[0]) if trendTF is True: dg.plot(ax=ax) if len(df2)>0: if debugTF: sys.stderr.write("DF2:\n{}\n{}\n".format(df2.tail(),df2.shape)) axv = ax.twinx() df2.plot(ax=axv,kind='area',alpha=0.4,legend=False) axv.yaxis.set_major_formatter(FuncFormatter(lambda x,pos: '{:,.0f}'.format(x))) if rsiYN is True: # calc Returns Since Incept ax.set_ylabel("Returns Since Inception (%)") if ylabel is not None and len(ylabel)>0: ax.set_ylabel(ylabel,fontproperties=fontProp(size=12)) ax.grid(linestyle='dotted',linewidth=0.5) if df.index._typ == "datetimeindex": mddfmt=mdates.DateFormatter(x_fmt) ax.xaxis.set_major_formatter(mddfmt) xtinterval=(df.index[1]-df.index[0]) if xtinterval.days < 7 and xtinterval.days>=1 : # daily data ax.set_xlim(df.index[0], df.index[-1]) #ax.xaxis.set_major_locator(mdates.MonthLocator(interval=int(nsp/30.+0.97))) bymd = [1,5,10,15,20,25] if nobs<50 else [1,15] if nobs<120 else [1] itv = 1 if nobs<160 else int(nsp/30.+0.97) xlocator = mdates.MonthLocator(bymonthday=bymd,interval=itv) ax.xaxis.set_major_locator(xlocator) # check if min/max of xaxis should be included major ticks if debugTF is True: sys.stderr.write("{} {}\n".format( ax.get_xticks(),ax.get_xlim())) xtcks = list(ax.get_xticks()) x1,x2 = xtcks[:2] xmin,xmax = ax.get_xlim() if (x1-xmin)>(x2-x1)*0.6: xtcks = [xmin] + xtcks if (xmax-xtcks[-1])>(x2-x1)*0.6: xtcks = xtcks + [xmax] ax.set_xticks(xtcks) ax.xaxis.set_minor_locator(mdates.MonthLocator(interval=1)) if debugTF is True: sys.stderr.write("{}\n".format(ax.get_xticks())) sys.stderr.write("{}\n".format( "Daily data use MonthLocator")) elif xtinterval.seconds < 30: # second data locator = mdates.AutoDateLocator() locator.intervald[5] = [0,5,10,15,20,25,30,35,40,45,55] mddfmt = mdates.AutoDateFormatter(locator) mddfmt.scaled[1/(24.*60.)] = '%M:%S' ax.xaxis.set_major_locator(locator) ax.xaxis.set_major_formatter(mddfmt) sys.stderr.write("{} {}\n".format( "Second data use AutoDateLocator",xtinterval.seconds)) elif xtinterval.seconds < 100 : # minute data bym = [0,15,30,45] if nobs<=120 else [0,30] if nobs<=360 else [0] xlocator = mdates.MinuteLocator(byminute=bym, interval = 1) ax.xaxis.set_major_locator(xlocator) sys.stderr.write("{} {}\n".format( "Minute data use MinuteLocator",xtinterval.days)) else: # periodic data sys.stderr.write("{}\n".format( "Periodic data use DayLocator" )) ax.xaxis.set_major_locator(mdates.DayLocator(interval=nsp)) ax.xaxis.label.set_visible(False) plt.title(title,fontsize=30,fontproperties=prop) plt.xticks(rotation='20',fontsize=12)#,fontproperties=prop) if len(df.columns)>1 and ohlcTF is False: ax.legend(loc="upper left",prop=prop) #ax.legend().set_visible(False) #logo = mimage.imread("aicaas_icon.png") #plt.figimage(logo, xo=20,yo=420) plt.subplots_adjust(left=0.1,bottom=0.30) if pngname is not None and len(pngname)>4: plt.savefig(pngname)#, bbox_inches='tight',dpi=1000) sys.stderr.write("Save chart {} to {}\n".format(title,pngname)) else: plt.show(ax) return df, fig, [ax]