def _ops(curOp, startDate, endDate, entDB): """ THe helper which processes the individual op for ops function. This inturn depends on the ops module to do the actual op. """ daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends) startDateIndex, endDateIndex = entDB.daterange2index(startDate, endDate) curOpFull = curOp if '=' in curOp: dataDst, curOp = curOp.split('=') else: dataDst = '' op, dataSrc = curOp.split('(', 1) dataSrc = dataSrc[:-1] if dataDst == '': dataDst = "{}({}[{}:{}])".format(op, dataSrc, startDate, endDate) print("DBUG:ops:op[{}]:dst[{}]".format(curOpFull, dataDst)) #### Op specific things to do before getting into individual records if op == 'srel': theOps.srel(dataDst, dataSrc, entDB) elif op.startswith("rel"): baseDate = op[3:] if baseDate != '': baseDate = int(baseDate) else: baseDate = entDB.dates[0] theOps.relto(dataDst, dataSrc, baseDate, entDB) elif op.startswith("ma"): maDays = hlpr.days_in(op[3:], entDB.bSkipWeekends) theOps.movavg(dataDst, dataSrc, maDays, op[2], entDB) elif op.startswith("roll"): # RollWindowSize number of days at beginning will not have # Rolling ret data, bcas there arent enough days to calculate # rolling ret while satisfying the RollingRetWIndowSize requested. rollDays = hlpr.days_in(op[4:].split('_')[0], entDB.bSkipWeekends) if '_' in op: opType = op.split('_')[1] else: opType = 'retpa' theOps.rollret(dataDst, dataSrc, rollDays, opType, entDB) elif op.startswith("block"): blockDays = hlpr.days_in(op[5:], entDB.bSkipWeekends) theOps.blockstats(dataDst, dataSrc, blockDays, entDB) elif op.startswith("reton"): if '_' in op: retonT, retonType = op.split('_') else: retonT = op retonType = 'safe' if retonT == "reton": retonDateIndex = endDateIndex else: retonDate = int(retonT[5:]) retonDateIndex = entDB.datesD[retonDate] theOps.reton(dataDst, dataSrc, retonDateIndex, retonType, None, entDB) update_metas(op, dataSrc, dataDst)
def relto(dataDst, dataSrc, baseDate, entDB=None): """ Calculate the absolute return for all dates wrt/relative_to a given base date. """ # Get generic things required dataDstMT, dataDstMD, dataDstML = hlpr.data_metakeys(dataDst) entDB = _entDB(entDB) entDB.data[dataDstMT] = 'relto' daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends) startDateIndex, endDateIndex = entDB.daterange2index(-1, -1) # Start on relto specific logic baseDateIndex = entDB.datesD[baseDate] dBase = entDB.data[dataSrc][:, baseDateIndex].reshape(entDB.nxtEntIndex,1) dEnd = entDB.data[dataSrc][:, endDateIndex] tResult = ((entDB.data[dataSrc]/dBase)-1)*100 entDB.data[dataDst] = tResult # Start on MetaData/Label dLatestAbsRet = tResult[:, -1] durationInYears = hlpr.days2year(endDateIndex-baseDateIndex+1, entDB.bSkipWeekends) dLatestRetPA = ((((dLatestAbsRet/100)+1)**(1/durationInYears))-1)*100 entDB.data[dataDstMD] = numpy.zeros([entDB.nxtEntIndex,5]) entDB.data[dataDstMD][:,0] = dLatestAbsRet entDB.data[dataDstMD][:,1] = dLatestRetPA entDB.data[dataDstMD][:,2] = durationInYears entDB.data[dataDstMD][:,3] = dBase.transpose() entDB.data[dataDstMD][:,4] = dEnd entDB.data[dataDstML] = [] for md in entDB.data[dataDstMD]: entDB.data[dataDstML].append(relto_md2str(md))
def rollret(dataDst, dataSrc, rollDays, rollType, entDB=None): """ Calculate the rolling return corresponding to the given rollDays, for each day in the database. rollDays: Calculate the returns got after the specified time duration. rollType: Whether to keep the returns as AbsoluteReturn or ReturnPerAnnum. 'absret' | 'retpa' """ # Get generic things required dataDstMT, dataDstMD, dataDstML = hlpr.data_metakeys(dataDst) entDB = _entDB(entDB) entDB.data[dataDstMT] = 'rollret' daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends) startDateIndex, endDateIndex = entDB.daterange2index(-1, -1) # Rolling ret related logic starts durationForPA = rollDays/daysInAYear if rollType == 'absret': durationForPA = 1 tResult = numpy.zeros(entDB.data[dataSrc].shape) tResult[:,rollDays:] = (entDB.data[dataSrc][:,rollDays:]/entDB.data[dataSrc][:,:-rollDays])**(1/durationForPA) if not gbRetDataAsFloat: tResult = (tResult - 1)*100 tResult[:,:rollDays] = numpy.nan entDB.data[dataDst] = tResult # Create the meta datas entDB.data[dataDstMD] = numpy.zeros([entDB.nxtEntIndex, 5]) trValid = numpy.ma.masked_invalid(tResult) # The Avgs trAvg = numpy.mean(trValid, axis=1) trAvg.set_fill_value(numpy.nan) entDB.data[dataDstMD][:,0] = trAvg.filled() # The Stds trStd = numpy.std(trValid, axis=1) trStd.set_fill_value(numpy.nan) entDB.data[dataDstMD][:,1] = trStd.filled() # The BelowMinRetPA trValidBelowMinRetPA = numpy.count_nonzero(trValid < gfMinRetPA, axis=1)*1.0 trValidBelowMinRetPA.set_fill_value(numpy.nan) trValidLens = numpy.count_nonzero(trValid, axis=1)*1.0 trValidLens.set_fill_value(numpy.nan) trBelowMinRetPA = (trValidBelowMinRetPA.filled()/trValidLens.filled())*100 entDB.data[dataDstMD][:,2] = trBelowMinRetPA # The MaSharpeMinT trMaSharpeMinT = (trAvg-gfMinRetPA)/trStd trMaSharpeMinT.set_fill_value(numpy.nan) entDB.data[dataDstMD][:,3] = trMaSharpeMinT.filled() # The Years alive trYears = ((entDB.meta['lastSeenDI'] - entDB.meta['firstSeenDI'])+1)/daysInAYear entDB.data[dataDstMD][:,4] = trYears # Meta label and Years entDB.data[dataDstML] = [] for md in entDB.data[dataDstMD]: entDB.data[dataDstML].append(rollret_md2str(md))
def blockstats(dataDst, dataSrc, blockDays, entDB=None): """ Calculate stats like Avg,STD,Qnts wrt each block of data. The data in the specified dataSrc is divided into blocks of blockDays duration and the statistics calculated for each resultant block. NOTE: Any Inf or NaN value will be converted to 0, before Avgs are calculated. NOTE: Any Inf or NaN value are masked before Stds are calculated, so that they dont impact the result. TODO2: Add a skipBlocksAtBegin argument, to skip any blocks at the begining of the chain of blocks, if so specified by the user. Could be used to skip Non Data blocks/duration at begining of RollRet op. """ # Get generic things required dataDstMT, dataDstMD, dataDstML = hlpr.data_metakeys(dataDst) entDB = _entDB(entDB) entDB.data[dataDstMT] = 'blockstats' daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends) startDateIndex, endDateIndex = entDB.daterange2index(-1, -1) # Prepare the job specific params blockTotalDays = endDateIndex - startDateIndex + 1 blockCnt = int(blockTotalDays/blockDays) dataDstAvgs = "{}Avgs".format(dataDst) dataDstStds = "{}Stds".format(dataDst) dataDstQntls = "{}Qntls".format(dataDst) entDB.data[dataDstAvgs] = numpy.zeros([entDB.nxtEntIndex,blockCnt]) entDB.data[dataDstStds] = numpy.zeros([entDB.nxtEntIndex,blockCnt]) entDB.data[dataDstQntls] = numpy.zeros([entDB.nxtEntIndex,blockCnt,5]) entDB.data[dataDstMD] = numpy.empty([entDB.nxtEntIndex,4], dtype=object) # Calc the stats iEnd = endDateIndex+1 lAvgs = [] lStds = [] for i in range(blockCnt): iDst = blockCnt-i-1 iStart = iEnd-blockDays tBlockData = entDB.data[dataSrc][:,iStart:iEnd].copy() tBlockData[~numpy.isfinite(tBlockData)] = 0 entDB.data[dataDstAvgs][:,iDst] = numpy.mean(tBlockData,axis=1) entDB.data[dataDstQntls][:,iDst] = numpy.quantile(tBlockData,[0,0.25,0.5,0.75,1],axis=1).transpose() tBlockData = numpy.ma.masked_invalid(entDB.data[dataSrc][:,iStart:iEnd]) tStds = numpy.std(tBlockData,axis=1) tStds.set_fill_value(numpy.nan) entDB.data[dataDstStds][:,iDst] = tStds.filled() iEnd = iStart # Do the needful wrt MetaData/Label entDB.data[dataDstML] = [] for i in range(entDB.nxtEntIndex): entDB.data[dataDstMD][i,0] = entDB.data[dataDstAvgs][i] entDB.data[dataDstMD][i,1] = numpy.mean(entDB.data[dataDstAvgs][i]) entDB.data[dataDstMD][i,2] = entDB.data[dataDstStds][i] entDB.data[dataDstMD][i,3] = numpy.nanmean(entDB.data[dataDstStds][i]) entDB.data[dataDstML].append(blockstats_md2str(entDB.data[dataDstMD][i]))
def infoset1_prep(entDB=None): """ Run a common set of operations, which can be used to get some amount of potentially useful info, on the loaded data, """ entDB = _entDB(entDB) daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends) warnings.filterwarnings('ignore') ops(['srel=srel(data)', 'mas50Srel=mas50(srel)'], entDB=entDB) ops(['roabs=reton_absret(data)', 'rosaf=reton(data)'], entDB=entDB) ops(['roll3Y=roll3Y(data)', 'mas50Roll3Y=mas50(roll3Y)'], entDB=entDB) ops(['roll5Y=roll5Y(data)', 'mas50Roll5Y=mas50(roll5Y)'], entDB=entDB) blockDays = int((entDB.nxtDateIndex - daysInAYear * 3) / 5) ops(['blockNRoll3Y=block{}(roll3Y)'.format(blockDays)], entDB=entDB) warnings.filterwarnings('default')
def _blocky_view(dataSrcs, modes, blockDays, destKeyNameTmpl, entDB=None): """ Generate data(s) which provide a blocks based view of the passed source data(s). For each block of days, within the overall dataset, a single representative value is identified, as defined by the mode. dataSrcs: list of source data keys for which blocks based view needs to be created. modes: Specifies how to generate the blocks based view. It could be one of 'M': Use the max value from among all the data wrt each of the blocks. 'm': Use the min value from among all the data wrt each of the blocks. 's': Use the values belonging to the first day from each of the blocks. 'e': Use the values belonging to the last day from each of the blocks. 'a': Use the average value of all the data wrt each of the blocks. blockDays: The size of each block wrt the blocks the overall data is divided into. destKeyNameTmpl: A template which specifies how the destination dataKeys should be named. NOTE: The blocks are assumed starting from the lastday in the data set, as the last day of the last block, irrespective of which calender day it may be. """ entDB = _entDB(entDB) if type(blockDays) == str: blockDays = hlpr.days_in(blockDays, entDB.bSkipWeekends) if type(dataSrcs) == str: dataSrcs = [ dataSrcs ] srcShape = entDB.data[dataSrcs[0]].shape dstShape = list(srcShape) dstShape[1] = int(dstShape[1]/blockDays) dataDsts = hlpr.derive_keys(dataSrcs, destKeyNameTmpl) for dDst in dataDsts: entDB.data[dDst] = numpy.zeros(dstShape) endI = entDB.nxtDateIndex startI = endI - blockDays iDst = -1 while startI > 0: for dSrc, mode, dDst in zip(dataSrcs, modes, dataDsts): if mode == 'M': entDB.data[dDst][:,iDst] = numpy.max(entDB.data[dSrc][:,startI:endI], axis=1) elif mode == 'm': entDB.data[dDst][:,iDst] = numpy.min(entDB.data[dSrc][:,startI:endI], axis=1) elif mode == 's': entDB.data[dDst][:,iDst] = entDB.data[dSrc][:,startI] elif mode == 'e': entDB.data[dDst][:,iDst] = entDB.data[dSrc][:,endI-1] elif mode == 'a': entDB.data[dDst][:,iDst] = numpy.average(entDB.data[dSrc][:,startI:endI], axis=1) endI = startI startI = endI - blockDays iDst -= 1
def _plot(entCodes, bPivotPoints=True, bVolumes=True, bRSI=True, bLinRegress=False): """ Plot data related to the given set of entCodes. This includes the close related raw, mas50 and mas200 data as well as linear regression based lines wrt 3M, 6M, 1Y and 3Y. Volumes traded and its 10day moving average. PivotPoints. day based pivot line drawn across 2 weeks week based pivot line drawn across 6 weeks (1.5 months) month based pivot line drawn across 12 weeks (3 months) Even thou entCodes can be passed as a list, passing a single entCode may be more practically useful. Also plot_pivotpoints currently supports a single entCode only. """ entDB = edb.gEntDB weekDays = hlpr.days_in('1W', entDB.bSkipWeekends) eplot._data(['data', 'mas200', 'mae9', 'mae26', 'mae50'], entCodes) if bPivotPoints: ops.plot_pivotpoints('pp', entCodes, plotRange=weekDays, axes=eplot._axes()) ops.plot_pivotpoints('ppW', entCodes, plotRange=weekDays * 3, axes=eplot._axes()) ops.plot_pivotpoints('ppM', entCodes, plotRange=weekDays * 6, axes=eplot._axes()) if bVolumes: _plot_volume('volume', 'mas10Vol', entCodes, 1) if bRSI: _plot_rsi('rsi', entCodes, 0) if bLinRegress: eplot.linregress('data', entCodes, days=['3M', '6M', '1Y', '3Y'])
def linregress(dataKeys, entCodes, days=[7, '1M', '6M', '1Y', '3Y', '5Y'], entDB=None, axes=None): """ For the given dataKeys and entCodes, plot the corresponding data as well as curve fitting lines based on linear regression for 1Year, 3Year and 5Years of data. """ entDB = _entDB(entDB) startDateIndex, endDateIndex = entDB.daterange2index(-1, -1) for d in days: if type(d) == str: d = hlpr.days_in(d, entDB.bSkipWeekends) endDate = entDB.dates[endDateIndex] startDate = entDB.dates[endDateIndex - d] if entDB.datesD.get(startDate, -1) >= 0: _fit(dataKeys, entCodes, startDate, endDate, 'linregress', entDB, axes)
def srel(dataDst, dataSrc, entDB): """ Calculate the absolute return for all dates wrt/relative_to start date. NOTE: If a entity was active from day 1 or rather 0th day wrt database, then the return is calculated wrt that. However if the entity started later than start date, then calculate relative to the start date of that given entity. """ # Get generic things required dataDstMT, dataDstMD, dataDstML = hlpr.data_metakeys(dataDst) entDB = _entDB(entDB) entDB.data[dataDstMT] = 'srel' daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends) startDateIndex, endDateIndex = entDB.daterange2index(-1, -1) # Rolling ret related logic starts iStart = entDB.meta['firstSeenDI'] dStart = entDB.data[dataSrc][range(entDB.nxtEntIndex), iStart] dStartT = dStart.reshape(entDB.nxtEntIndex,1) dEnd = entDB.data[dataSrc][:, endDateIndex] tResult = entDB.data[dataSrc]/dStartT if not gbRetDataAsFloat: tResult = (tResult - 1)*100 entDB.data[dataDst] = tResult # Work on the meta data, also set NaN for No data zone entDB.data[dataDstMD] = numpy.zeros([entDB.nxtEntIndex,5]) dAbsRet = tResult[:, -1] totalDays = endDateIndex-startDateIndex+1 durationInYears = (totalDays - iStart)/daysInAYear dRetPA = (((dEnd/dStart)**(1/durationInYears))-1)*100 entDB.data[dataDstMD][:,0] = dAbsRet entDB.data[dataDstMD][:,1] = dRetPA entDB.data[dataDstMD][:,2] = durationInYears entDB.data[dataDstMD][:,3] = dStart entDB.data[dataDstMD][:,4] = dEnd entDB.data[dataDstML] = [] for i in range(entDB.nxtEntIndex): entDB.data[dataDst][i, :iStart[i]] = numpy.nan md = entDB.data[dataDstMD][i] entDB.data[dataDstML].append(srel_md2str(md))
def reton(dataDst, dataSrc, retOnDateIndex, retOnType, historicGaps=None, entDB=None): """ Calculate the absolute returns and or returnsPerAnnum as on endDate wrt/relative_to all the other dates. """ # Get generic things required dataDstMT, dataDstMD, dataDstML = hlpr.data_metakeys(dataDst) entDB = _entDB(entDB) entDB.data[dataDstMT] = 'reton' daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends) startDateIndex, endDateIndex = entDB.daterange2index(-1, -1) # Work on retOn if historicGaps == None: historicGaps = _gHistoricGaps(entDB) validHistoric = historicGaps[historicGaps < (retOnDateIndex+1)] histDays = abs(numpy.arange(endDateIndex+1)-retOnDateIndex) retOnData = entDB.data[dataSrc][:, retOnDateIndex].reshape(entDB.nxtEntIndex,1) tROAbs = ((retOnData/entDB.data[dataSrc])-1)*100 tRORPA = (((retOnData/entDB.data[dataSrc])**(daysInAYear/histDays))-1)*100 if retOnType == 'absret': tResult = tROAbs elif retOnType == 'retpa': tResult = tRORPA else: if len(tROAbs) > daysInAYear: #tResult[:, -daysInAYear:] = tROAbs[:, -daysInAYear:] tResult = tROAbs tResult[:, :-daysInAYear] = tRORPA[:, :-daysInAYear] else: tResult = tROAbs entDB.data[dataDst] = tResult # Handle meta data entDB.data[dataDstMD] = numpy.ones([entDB.nxtEntIndex,historicGaps.shape[0]])*numpy.nan retOnDateDelta = endDateIndex-retOnDateIndex entDB.data[dataDstMD][:, :validHistoric.shape[0]] = tResult[:, -(validHistoric+1+retOnDateDelta)] entDB.data[dataDstML] = [] for md in entDB.data[dataDstMD]: entDB.data[dataDstML].append(reton_md2str(md))
def infoset1_result1_entcodes(entCodes, bPrompt=False, numEntities=-1, entDB=None): """ Print data generated by processing the loaded data, wrt the specified entities, to the user. NOTE: As 2nd part of the result dump, where it prints data across all specified entities, wrt each data aspect that was processed during prep, it tries to sort them based on the average meta data info from roll3Y (3Y). And entities which are less than 3 years will get collated to the end of the sorted list, based on the last RetPA from srel operation. If there are entities which get dropped by both the sort operations, then they will get collated to the end. numEntities if greater than 0, will limit the number of entities that are shown in the comparitive print wrt each processed data type. """ entDB = _entDB(entDB) dataSrcs = [ ['srel', 'srel.MetaLabel'], ['absRet', 'roabs.MetaLabel'], ['retOn', 'rosaf.MetaLabel'], ['roll3Y', 'roll3Y.MetaLabel'], ['roll5Y', 'roll5Y.MetaLabel'], ] for entCode in entCodes: entIndex = entDB.meta['codeD'][entCode] print("Name:", entDB.meta['name'][entIndex]) for dataSrc in dataSrcs: print("\t{:16}: {}".format(dataSrc[0], entDB.data[dataSrc[1]][entIndex])) daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends) dateDuration = entDB.nxtDateIndex / daysInAYear if dateDuration > 1.5: dateDuration = 1.5 print("INFO:InfoSet1:AnalSimpleDateDurationSetTo:", dateDuration) analR3Y = anal_simple('roll3Y', 'roll_avg', 'top', entCodes=entCodes, numEntities=len(entCodes), minDataYears=dateDuration, entDB=entDB) analR3YEntCodes = [x[0] for x in analR3Y] s1 = set(entCodes) s2 = set(analR3YEntCodes) otherEntCodes = s1 - s2 analSRelRPA = anal_simple('srel', 'srel_retpa', 'top', entCodes=otherEntCodes, numEntities=len(otherEntCodes), minDataYears=dateDuration, entDB=entDB) analSRelRPAEntCodes = [x[0] for x in analSRelRPA] s3 = set(analSRelRPAEntCodes) entCodes = analR3YEntCodes + analSRelRPAEntCodes + list(s1 - (s2.union(s3))) anal_simple('blockNRoll3Y', 'block_ranked', 'top', entCodes=entCodes, numEntities=len(entCodes), minDataYears=dateDuration, entDB=entDB) totalEntities = len(entCodes) if numEntities > totalEntities: numEntities = totalEntities printFmt = "\t{:<20}:{:24}:" for dataSrc in dataSrcs: print("DataSrc:{}: >>showing {} of {} entities<<".format( dataSrc, numEntities, totalEntities)) if dataSrc[0] in ['absRet', 'retOn']: print((printFmt + " {}").format("code", "name", theOps.reton_mdhdr())) elif dataSrc[0] == 'srel': print((printFmt + " {}").format("code", "name", theOps.srel_mdhdr())) elif dataSrc[0].startswith('roll'): print((printFmt + " {}").format("code", "name", theOps.rollret_mdhdr())) x = [] y = [] c = [] dataSrcMetaData = dataSrc[1].replace('Label', 'Data') entCount = 0 for entCode in entCodes: entIndex = entDB.meta['codeD'][entCode] entName = entDB.meta['name'][entIndex][:24] if dataSrc[0].startswith('roll'): x.append(entDB.data[dataSrcMetaData][entIndex, 0]) y.append(entDB.data[dataSrcMetaData][entIndex, 1]) c.append(entCode) print((printFmt + " {}").format(entCode, entName, entDB.data[dataSrc[1]][entIndex])) entCount += 1 if (numEntities > 0) and (entCount > numEntities): break if dataSrc[0].startswith('roll'): plt.scatter(x, y) for i, txt in enumerate(c): plt.annotate(txt, (x[i], y[i])) plt.xlabel('RollRet Avg') plt.ylabel('RollRet StD') plt.show() if bPrompt: input("Press any key to continue...")
def anal_simple(dataSrc, analType='normal', order="top", theDate=None, theIndex=None, numEntities=10, entCodes=None, minDataYears=1.5, bCurrentEntitiesOnly=True, bDebug=False, iClipNameWidth=64, entDB=None): """ Look at specified data in dataSrc, and find top/bottom N entities. The rows of the dataSrc represent the entities and the cols represent the data associated with the individual entities. One can specify the data one wants to look at by using * [For Normal] the date one is interested in or * [For Normal] the index of the data * [For Others] op specific attribute/meta data that one is interested in. order: could be either 'top' or 'bottom' analType: could be one of 'normal', 'srel_absret', 'srel_retpa', 'roll_avg', 'block_ranked', 'block_avg' normal: Look at data corresponding to identified date or index, in the given dataSrc, to decide on entities to select. srel_absret: Look at the Absolute Returns data associated with the given dataSrc (which should be generated using srel operation), to decide on entities. srel_retpa: Look at the Returns PerAnnum data associated with the given dataSrc (which should be generated using srel operation), to decide on entities. roll_avg: look at Average ReturnsPerAnnum, calculated using rolling return (dataSrc specified should be generated using roll operation), to decide on entities ordering/selection. block_ranked: look at the Avgs calculated by block op, for each sub date periods(i.e blocks), rank them and average over all the sub date periods to calculate the rank for full date Range. Use this final rank to decide on entities ordering. (dataSrc should have been generated using block operation). block_avg: Look at Avg of Averages calculated by block op, to order the entities. User can also do a equivalent anal by running anal_simple('blockOpDst.MetaData', 'normal', theIndex=1) theDate and theIndex: [Used by normal analType] If both are None, then the logic will try to find a date which contains atleast some valid data, starting from the lastDate and moving towards startDate wrt given dataSrc. NOTE: ValidData: Any Non Zero, Non NaN, Non Inf data If theDate is a date, then values in dataSrc corresponding to this date will be used for sorting. If theDate is -1, then the lastDate wrt the currently loaded dataset, is used as the date from which values should be used to identify the entities. If theIndex is set and theDate is None, then the values in dataSrc corresponding to given index, is used for sorting. NOTE: date follows YYYYMMDD format. entCodes: One can restrict the logic to look at data belonging to the specified list of entities. If None, then all entities in the loaded dataset will be considered, for ranking. minDataYears: This sorting logic will ignore entities for which the available data duration in the entities database is less than that specified here. NOTE: The default is 1.5 years, If you have loaded less than that amount of data, then remember to set this to a smaller value, if required. NOTE: It expects the info about duration for which data is available for each entity, to be available under 'srel.MetaData' key. If this is not the case, it will trigger a generic 'srel' operation through ops to generate the same. It also means that the check is done wrt overall amount of data available for a given entity in the loaded dataset, While a dataSrc key which corresponds to a view of partial data from the loaded dataset, which is less than specified minDataYears, can still be done. bCurrentEntitiesOnly: Will drop entities which have not been seen in the last 1 week, wrt the dateRange currently loaded. iClipNameWidth: If None, the program prints full name, with space alloted by default for 64 chars. Else the program limits the Name to specified width. entDB: The data set from which to pick the data to work with. If specified, it will be used. Else what ever default data set was previously initialised by the program will be used. """ entDB = _entDB(entDB) theAnal = "{}_{}".format(analType, order) #print("DBUG:AnalSimple:{}:{}".format(theAnal, dataSrc)) printFmts = gAnalSimpleBasePrintFormats.copy() printWidths = gAnalSimpleBasePrintWidths.copy() printHdr = ["Code", "Name"] if order == 'top': iSkip = -numpy.inf else: iSkip = numpy.inf theSaneArray = None daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends) if analType == 'normal': printHdr.extend(['Value']) if (type(theDate) == type(None)) and (type(theIndex) == type(None)): for i in range(-1, -entDB.nxtDateIndex, -1): if bDebug: print("DBUG:AnalSimple:{}:findDateIndex:{}".format( theAnal, i)) theSaneArray = entDB.data[dataSrc][:, i].copy() theSaneArray[numpy.isinf(theSaneArray)] = iSkip theSaneArray[numpy.isnan(theSaneArray)] = iSkip if not numpy.all( numpy.isinf(theSaneArray) | numpy.isnan(theSaneArray)): dateIndex = entDB.nxtDateIndex + i print("INFO:AnalSimple:{}:DateIndex:{}".format( theAnal, dateIndex)) break else: if (type(theIndex) == type(None)) and (type(theDate) != type(None)): startDateIndex, theIndex = entDB.daterange2index( theDate, theDate) #print("DBUG:AnalSimple:{}:theIndex:{}".format(theAnal, theIndex)) theSaneArray = entDB.data[dataSrc][:, theIndex].copy().astype(float) theSaneArray[numpy.isinf(theSaneArray)] = iSkip theSaneArray[numpy.isnan(theSaneArray)] = iSkip elif analType.startswith("srel"): dataSrcMetaType, dataSrcMetaData, dataSrcMetaLabel = hlpr.data_metakeys( dataSrc) if analType == 'srel_absret': printHdr.extend(['AbsRet']) theSaneArray = entDB.data[dataSrcMetaData][:, 0].copy() elif analType == 'srel_retpa': printHdr.extend(['RetPA']) theSaneArray = entDB.data[dataSrcMetaData][:, 1].copy() else: input( "ERRR:AnalSimple:{}:dataSrc[{}]: unknown srel anal subType, returning..." .format(theAnal, dataSrc)) return None elif analType.startswith("roll"): dataSrcMetaType, dataSrcMetaData, dataSrcMetaLabel = hlpr.data_metakeys( dataSrc) if analType == 'roll_avg': printHdr.extend(['Avg', 'Std', '<minT', 'MaSha', 'Yrs']) theSaneArray = entDB.data[dataSrcMetaData][:, 0].copy() theSaneArray[numpy.isinf(theSaneArray)] = iSkip theSaneArray[numpy.isnan(theSaneArray)] = iSkip printFmts.extend([{ 'num': "{:{width}.2f}", 'str': '{:{width}}' }, { 'num': "{:{width}.2f}", 'str': '{:{width}}' }, { 'num': "{:{width}.2f}", 'str': '{:{width}}' }, { 'num': "{:{width}.1f}", 'str': '{:{width}}' }]) printWidths.extend([7, 7, 7, 4]) elif analType == "block_avg": dataSrcMetaType, dataSrcMetaData, dataSrcMetaLabel = hlpr.data_metakeys( dataSrc) printHdr.extend(['AvgRank', 'blockAvgs', 'blockStds']) metaDataAvgs = "{}Avgs".format(dataSrc) metaDataStds = "{}Stds".format(dataSrc) theSaneArray = entDB.data[dataSrcMetaData][:, 1].astype(float).copy() theSaneArray[numpy.isinf(theSaneArray)] = iSkip theSaneArray[numpy.isnan(theSaneArray)] = iSkip iValidBlockAtBegin = 0 elif analType == "block_ranked": printHdr.extend(['AvgRank', 'blockRanks', 'blockAvgs']) metaDataAvgs = "{}Avgs".format(dataSrc) tNumEnts, tNumBlocks = entDB.data[metaDataAvgs].shape theRankArray = numpy.zeros([tNumEnts, tNumBlocks + 1]) iValidBlockAtBegin = 0 bValidBlockFound = False for b in range(tNumBlocks): tArray = entDB.data[metaDataAvgs][:, b] tValidArray = tArray[numpy.isfinite(tArray)] tSaneArray = hlpr.sane_array(tArray, iSkip) if len(tValidArray) != 0: tQuants = numpy.quantile(tValidArray, [0, 0.2, 0.4, 0.6, 0.8, 1]) theRankArray[:, b] = numpy.digitize(tSaneArray, tQuants, True) bValidBlockFound = True else: if not bValidBlockFound: iValidBlockAtBegin = b + 1 theRankArray[:, b] = numpy.zeros(len(theRankArray[:, b])) theRankArray[:, tNumBlocks] = numpy.average( theRankArray[:, iValidBlockAtBegin:tNumBlocks], axis=1) theRankArray = theRankArray[:, iValidBlockAtBegin:tNumBlocks + 1] tNumBlocks = tNumBlocks - iValidBlockAtBegin theSaneArray = theRankArray[:, tNumBlocks] else: input("ERRR:AnalSimple:{}:dataSrc[{}]: unknown analType, returning...". format(theAnal, dataSrc)) return None if type(theSaneArray) == type(None): print("WARN:DBUG:AnalSimple:{}:{}: No SaneArray????".format( theAnal, dataSrc)) breakpoint() theSaneArray = _forceval_entities(theSaneArray, entCodes, iSkip, 'invert', entDB=entDB) if minDataYears > 0: dataYearsAvailable = entDB.nxtDateIndex / daysInAYear if (dataYearsAvailable < minDataYears): print( "WARN:AnalSimple:{}: dataYearsAvailable[{}] < minDataYears[{}]" .format(theAnal, dataYearsAvailable, minDataYears)) srelMetaType, srelMetaData, srelMetaLabel = hlpr.data_metakeys('srel') theSRelMetaData = entDB.data.get(srelMetaData, None) if type(theSRelMetaData) == type(None): ops('srel=srel(data)') if bDebug: tNames = numpy.array(entDB.meta['name']) tDroppedNames = tNames[entDB.data[srelMetaData][:, 2] < minDataYears] print( "INFO:AnalSimple:{}:{}:Dropping if baby Entity".format( theAnal, dataSrc), tDroppedNames) theSaneArray[entDB.data[srelMetaData][:, 2] < minDataYears] = iSkip if bCurrentEntitiesOnly: oldEntities = numpy.nonzero( entDB.meta['lastSeenDI'] < (entDB.nxtDateIndex - 1 - 7))[0] if bDebug: #aNames = numpy.array(entDB.meta['name']) #print(aNames[oldEntities]) for index in oldEntities: print("DBUG:AnalSimple:{}:IgnoringOldEntity:{}, {}".format( theAnal, entDB.meta['name'][index], entDB.dates[entDB.meta['lastSeenDI'][index]])) theSaneArray[oldEntities] = iSkip #theRows=numpy.argsort(theSaneArray)[-numEntities:] theRows = numpy.argsort(theSaneArray) rowsLen = len(theRows) if numEntities > rowsLen: print( "WARN:AnalSimple:{}:RankContenders[{}] < numEntities[{}] requested, adjusting" .format(theAnal, rowsLen, numEntities)) numEntities = rowsLen if order == 'top': lStart = -1 lStop = -(numEntities + 1) lDelta = -1 elif order == 'bottom': lStart = 0 lStop = numEntities lDelta = 1 theSelected = [] print("INFO:AnalSimple:{}:{}".format(theAnal, dataSrc)) hlpr.printl(printFmts, printHdr, " ", "\t", "", printWidths) for i in range(lStart, lStop, lDelta): index = theRows[i] if (theSaneArray[index] == iSkip) or ((analType == 'block_ranked') and (theSaneArray[index] == 0)): print(" WARN:AnalSimple:{}:No more valid elements".format( theAnal)) break curEntry = [ entDB.meta['codeL'][index], entDB.meta['name'][index], theSaneArray[index] ] if analType == "roll_avg": curEntry.extend(entDB.data[dataSrcMetaData][index, 1:]) theSelected.append(curEntry) if iClipNameWidth == None: curEntry[1] = "{:64}".format(curEntry[1]) else: curEntry[1] = "{:{width}}".format(curEntry[1][:iClipNameWidth], width=iClipNameWidth) curEntry[2] = numpy.round(curEntry[2], 2) if analType == "roll_avg": curEntry[3:] = numpy.round(curEntry[3:], 2) elif analType == "block_avg": extra = "{}:{}".format( hlpr.array_str( entDB.data[metaDataAvgs][index, iValidBlockAtBegin:], 6, 2), hlpr.array_str( entDB.data[metaDataStds][index, iValidBlockAtBegin:], 6, 2)) curEntry.append(extra) elif analType == "block_ranked": theSelected[-1] = theSelected[-1] + [theRankArray[index]] extra = "{}:{}".format( hlpr.array_str(theRankArray[index], 4, "A0L1"), hlpr.array_str( entDB.data[metaDataAvgs][index, iValidBlockAtBegin:], 6, 2)) curEntry.append(extra) #print(" {} {}".format(extra, curEntry)) hlpr.printl(printFmts, curEntry, " ", "\t", "", printWidths) return theSelected