def _ops(curOp, startDate, endDate, entDB):
    """
    THe helper which processes the individual op for ops function.
    This inturn depends on the ops module to do the actual op.
    """
    daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends)
    startDateIndex, endDateIndex = entDB.daterange2index(startDate, endDate)
    curOpFull = curOp
    if '=' in curOp:
        dataDst, curOp = curOp.split('=')
    else:
        dataDst = ''
    op, dataSrc = curOp.split('(', 1)
    dataSrc = dataSrc[:-1]
    if dataDst == '':
        dataDst = "{}({}[{}:{}])".format(op, dataSrc, startDate, endDate)
    print("DBUG:ops:op[{}]:dst[{}]".format(curOpFull, dataDst))
    #### Op specific things to do before getting into individual records
    if op == 'srel':
        theOps.srel(dataDst, dataSrc, entDB)
    elif op.startswith("rel"):
        baseDate = op[3:]
        if baseDate != '':
            baseDate = int(baseDate)
        else:
            baseDate = entDB.dates[0]
        theOps.relto(dataDst, dataSrc, baseDate, entDB)
    elif op.startswith("ma"):
        maDays = hlpr.days_in(op[3:], entDB.bSkipWeekends)
        theOps.movavg(dataDst, dataSrc, maDays, op[2], entDB)
    elif op.startswith("roll"):
        # RollWindowSize number of days at beginning will not have
        # Rolling ret data, bcas there arent enough days to calculate
        # rolling ret while satisfying the RollingRetWIndowSize requested.
        rollDays = hlpr.days_in(op[4:].split('_')[0], entDB.bSkipWeekends)
        if '_' in op:
            opType = op.split('_')[1]
        else:
            opType = 'retpa'
        theOps.rollret(dataDst, dataSrc, rollDays, opType, entDB)
    elif op.startswith("block"):
        blockDays = hlpr.days_in(op[5:], entDB.bSkipWeekends)
        theOps.blockstats(dataDst, dataSrc, blockDays, entDB)
    elif op.startswith("reton"):
        if '_' in op:
            retonT, retonType = op.split('_')
        else:
            retonT = op
            retonType = 'safe'
        if retonT == "reton":
            retonDateIndex = endDateIndex
        else:
            retonDate = int(retonT[5:])
            retonDateIndex = entDB.datesD[retonDate]
        theOps.reton(dataDst, dataSrc, retonDateIndex, retonType, None, entDB)
    update_metas(op, dataSrc, dataDst)
Ejemplo n.º 2
0
def relto(dataDst, dataSrc, baseDate, entDB=None):
    """
    Calculate the absolute return for all dates wrt/relative_to a given base date.
    """
    # Get generic things required
    dataDstMT, dataDstMD, dataDstML = hlpr.data_metakeys(dataDst)
    entDB = _entDB(entDB)
    entDB.data[dataDstMT] = 'relto'
    daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends)
    startDateIndex, endDateIndex = entDB.daterange2index(-1, -1)
    # Start on relto specific logic
    baseDateIndex = entDB.datesD[baseDate]
    dBase = entDB.data[dataSrc][:, baseDateIndex].reshape(entDB.nxtEntIndex,1)
    dEnd = entDB.data[dataSrc][:, endDateIndex]
    tResult = ((entDB.data[dataSrc]/dBase)-1)*100
    entDB.data[dataDst] = tResult
    # Start on MetaData/Label
    dLatestAbsRet = tResult[:, -1]
    durationInYears = hlpr.days2year(endDateIndex-baseDateIndex+1, entDB.bSkipWeekends)
    dLatestRetPA = ((((dLatestAbsRet/100)+1)**(1/durationInYears))-1)*100
    entDB.data[dataDstMD] = numpy.zeros([entDB.nxtEntIndex,5])
    entDB.data[dataDstMD][:,0] = dLatestAbsRet
    entDB.data[dataDstMD][:,1] = dLatestRetPA
    entDB.data[dataDstMD][:,2] = durationInYears
    entDB.data[dataDstMD][:,3] = dBase.transpose()
    entDB.data[dataDstMD][:,4] = dEnd
    entDB.data[dataDstML] = []
    for md in entDB.data[dataDstMD]:
        entDB.data[dataDstML].append(relto_md2str(md))
Ejemplo n.º 3
0
def rollret(dataDst, dataSrc, rollDays, rollType, entDB=None):
    """
    Calculate the rolling return corresponding to the given rollDays,
    for each day in the database.
    rollDays: Calculate the returns got after the specified time duration.
    rollType: Whether to keep the returns as AbsoluteReturn or ReturnPerAnnum.
        'absret' | 'retpa'
    """
    # Get generic things required
    dataDstMT, dataDstMD, dataDstML = hlpr.data_metakeys(dataDst)
    entDB = _entDB(entDB)
    entDB.data[dataDstMT] = 'rollret'
    daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends)
    startDateIndex, endDateIndex = entDB.daterange2index(-1, -1)
    # Rolling ret related logic starts
    durationForPA = rollDays/daysInAYear
    if rollType == 'absret':
        durationForPA = 1
    tResult = numpy.zeros(entDB.data[dataSrc].shape)
    tResult[:,rollDays:] = (entDB.data[dataSrc][:,rollDays:]/entDB.data[dataSrc][:,:-rollDays])**(1/durationForPA)
    if not gbRetDataAsFloat:
        tResult = (tResult - 1)*100
    tResult[:,:rollDays] = numpy.nan
    entDB.data[dataDst] = tResult
    # Create the meta datas
    entDB.data[dataDstMD] = numpy.zeros([entDB.nxtEntIndex, 5])
    trValid = numpy.ma.masked_invalid(tResult)
    # The Avgs
    trAvg = numpy.mean(trValid, axis=1)
    trAvg.set_fill_value(numpy.nan)
    entDB.data[dataDstMD][:,0] = trAvg.filled()
    # The Stds
    trStd = numpy.std(trValid, axis=1)
    trStd.set_fill_value(numpy.nan)
    entDB.data[dataDstMD][:,1] = trStd.filled()
    # The BelowMinRetPA
    trValidBelowMinRetPA = numpy.count_nonzero(trValid < gfMinRetPA, axis=1)*1.0
    trValidBelowMinRetPA.set_fill_value(numpy.nan)
    trValidLens = numpy.count_nonzero(trValid, axis=1)*1.0
    trValidLens.set_fill_value(numpy.nan)
    trBelowMinRetPA = (trValidBelowMinRetPA.filled()/trValidLens.filled())*100
    entDB.data[dataDstMD][:,2] = trBelowMinRetPA
    # The MaSharpeMinT
    trMaSharpeMinT = (trAvg-gfMinRetPA)/trStd
    trMaSharpeMinT.set_fill_value(numpy.nan)
    entDB.data[dataDstMD][:,3] = trMaSharpeMinT.filled()
    # The Years alive
    trYears = ((entDB.meta['lastSeenDI'] - entDB.meta['firstSeenDI'])+1)/daysInAYear
    entDB.data[dataDstMD][:,4] = trYears
    # Meta label and Years
    entDB.data[dataDstML] = []
    for md in entDB.data[dataDstMD]:
        entDB.data[dataDstML].append(rollret_md2str(md))
Ejemplo n.º 4
0
def blockstats(dataDst, dataSrc, blockDays, entDB=None):
    """
    Calculate stats like Avg,STD,Qnts wrt each block of data.
    The data in the specified dataSrc is divided into blocks of blockDays duration
    and the statistics calculated for each resultant block.
    NOTE: Any Inf or NaN value will be converted to 0, before Avgs are calculated.
    NOTE: Any Inf or NaN value are masked before Stds are calculated, so that they
        dont impact the result.
    TODO2: Add a skipBlocksAtBegin argument, to skip any blocks at the begining of
        the chain of blocks, if so specified by the user.
        Could be used to skip Non Data blocks/duration at begining of RollRet op.
    """
    # Get generic things required
    dataDstMT, dataDstMD, dataDstML = hlpr.data_metakeys(dataDst)
    entDB = _entDB(entDB)
    entDB.data[dataDstMT] = 'blockstats'
    daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends)
    startDateIndex, endDateIndex = entDB.daterange2index(-1, -1)
    # Prepare the job specific params
    blockTotalDays = endDateIndex - startDateIndex + 1
    blockCnt = int(blockTotalDays/blockDays)
    dataDstAvgs = "{}Avgs".format(dataDst)
    dataDstStds = "{}Stds".format(dataDst)
    dataDstQntls = "{}Qntls".format(dataDst)
    entDB.data[dataDstAvgs] = numpy.zeros([entDB.nxtEntIndex,blockCnt])
    entDB.data[dataDstStds] = numpy.zeros([entDB.nxtEntIndex,blockCnt])
    entDB.data[dataDstQntls] = numpy.zeros([entDB.nxtEntIndex,blockCnt,5])
    entDB.data[dataDstMD] = numpy.empty([entDB.nxtEntIndex,4], dtype=object)
    # Calc the stats
    iEnd = endDateIndex+1
    lAvgs = []
    lStds = []
    for i in range(blockCnt):
        iDst = blockCnt-i-1
        iStart = iEnd-blockDays
        tBlockData = entDB.data[dataSrc][:,iStart:iEnd].copy()
        tBlockData[~numpy.isfinite(tBlockData)] = 0
        entDB.data[dataDstAvgs][:,iDst] = numpy.mean(tBlockData,axis=1)
        entDB.data[dataDstQntls][:,iDst] = numpy.quantile(tBlockData,[0,0.25,0.5,0.75,1],axis=1).transpose()
        tBlockData = numpy.ma.masked_invalid(entDB.data[dataSrc][:,iStart:iEnd])
        tStds = numpy.std(tBlockData,axis=1)
        tStds.set_fill_value(numpy.nan)
        entDB.data[dataDstStds][:,iDst] = tStds.filled()
        iEnd = iStart
    # Do the needful wrt MetaData/Label
    entDB.data[dataDstML] = []
    for i in range(entDB.nxtEntIndex):
        entDB.data[dataDstMD][i,0] = entDB.data[dataDstAvgs][i]
        entDB.data[dataDstMD][i,1] = numpy.mean(entDB.data[dataDstAvgs][i])
        entDB.data[dataDstMD][i,2] = entDB.data[dataDstStds][i]
        entDB.data[dataDstMD][i,3] = numpy.nanmean(entDB.data[dataDstStds][i])
        entDB.data[dataDstML].append(blockstats_md2str(entDB.data[dataDstMD][i]))
def infoset1_prep(entDB=None):
    """
    Run a common set of operations, which can be used to get some amount of
    potentially useful info, on the loaded data,
    """
    entDB = _entDB(entDB)
    daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends)
    warnings.filterwarnings('ignore')
    ops(['srel=srel(data)', 'mas50Srel=mas50(srel)'], entDB=entDB)
    ops(['roabs=reton_absret(data)', 'rosaf=reton(data)'], entDB=entDB)
    ops(['roll3Y=roll3Y(data)', 'mas50Roll3Y=mas50(roll3Y)'], entDB=entDB)
    ops(['roll5Y=roll5Y(data)', 'mas50Roll5Y=mas50(roll5Y)'], entDB=entDB)
    blockDays = int((entDB.nxtDateIndex - daysInAYear * 3) / 5)
    ops(['blockNRoll3Y=block{}(roll3Y)'.format(blockDays)], entDB=entDB)
    warnings.filterwarnings('default')
Ejemplo n.º 6
0
def _blocky_view(dataSrcs, modes, blockDays, destKeyNameTmpl, entDB=None):
    """
    Generate data(s) which provide a blocks based view of the passed source data(s).
        For each block of days, within the overall dataset, a single representative
        value is identified, as defined by the mode.
    dataSrcs: list of source data keys for which blocks based view needs to be created.
    modes: Specifies how to generate the blocks based view. It could be one of
        'M': Use the max value from among all the data wrt each of the blocks.
        'm': Use the min value from among all the data wrt each of the blocks.
        's': Use the values belonging to the first day from each of the blocks.
        'e': Use the values belonging to the last day from each of the blocks.
        'a': Use the average value of all the data wrt each of the blocks.
    blockDays: The size of each block wrt the blocks the overall data is divided into.
    destKeyNameTmpl: A template which specifies how the destination dataKeys
        should be named.
    NOTE: The blocks are assumed starting from the lastday in the data set,
        as the last day of the last block, irrespective of which calender day
        it may be.
    """
    entDB = _entDB(entDB)
    if type(blockDays) == str:
        blockDays = hlpr.days_in(blockDays, entDB.bSkipWeekends)
    if type(dataSrcs) == str:
        dataSrcs = [ dataSrcs ]
    srcShape = entDB.data[dataSrcs[0]].shape
    dstShape = list(srcShape)
    dstShape[1] = int(dstShape[1]/blockDays)
    dataDsts = hlpr.derive_keys(dataSrcs, destKeyNameTmpl)
    for dDst in dataDsts:
        entDB.data[dDst] = numpy.zeros(dstShape)
    endI = entDB.nxtDateIndex
    startI = endI - blockDays
    iDst = -1
    while startI > 0:
        for dSrc, mode, dDst in zip(dataSrcs, modes, dataDsts):
            if mode == 'M':
                entDB.data[dDst][:,iDst] = numpy.max(entDB.data[dSrc][:,startI:endI], axis=1)
            elif mode == 'm':
                entDB.data[dDst][:,iDst] = numpy.min(entDB.data[dSrc][:,startI:endI], axis=1)
            elif mode == 's':
                entDB.data[dDst][:,iDst] = entDB.data[dSrc][:,startI]
            elif mode == 'e':
                entDB.data[dDst][:,iDst] = entDB.data[dSrc][:,endI-1]
            elif mode == 'a':
                entDB.data[dDst][:,iDst] = numpy.average(entDB.data[dSrc][:,startI:endI], axis=1)
        endI = startI
        startI = endI - blockDays
        iDst -= 1
Ejemplo n.º 7
0
def _plot(entCodes,
          bPivotPoints=True,
          bVolumes=True,
          bRSI=True,
          bLinRegress=False):
    """
    Plot data related to the given set of entCodes.

    This includes
        the close related
            raw, mas50 and mas200 data as well as
            linear regression based lines wrt 3M, 6M, 1Y and 3Y.
        Volumes traded and its 10day moving average.
        PivotPoints.
            day based pivot line drawn across 2 weeks
            week based pivot line drawn across 6 weeks (1.5 months)
            month based pivot line drawn across 12 weeks (3 months)

    Even thou entCodes can be passed as a list, passing a single
    entCode may be more practically useful. Also plot_pivotpoints
    currently supports a single entCode only.
    """
    entDB = edb.gEntDB
    weekDays = hlpr.days_in('1W', entDB.bSkipWeekends)
    eplot._data(['data', 'mas200', 'mae9', 'mae26', 'mae50'], entCodes)
    if bPivotPoints:
        ops.plot_pivotpoints('pp',
                             entCodes,
                             plotRange=weekDays,
                             axes=eplot._axes())
        ops.plot_pivotpoints('ppW',
                             entCodes,
                             plotRange=weekDays * 3,
                             axes=eplot._axes())
        ops.plot_pivotpoints('ppM',
                             entCodes,
                             plotRange=weekDays * 6,
                             axes=eplot._axes())
    if bVolumes:
        _plot_volume('volume', 'mas10Vol', entCodes, 1)
    if bRSI:
        _plot_rsi('rsi', entCodes, 0)
    if bLinRegress:
        eplot.linregress('data', entCodes, days=['3M', '6M', '1Y', '3Y'])
Ejemplo n.º 8
0
def linregress(dataKeys,
               entCodes,
               days=[7, '1M', '6M', '1Y', '3Y', '5Y'],
               entDB=None,
               axes=None):
    """
    For the given dataKeys and entCodes, plot the corresponding data
    as well as curve fitting lines based on linear regression for
    1Year, 3Year and 5Years of data.
    """
    entDB = _entDB(entDB)
    startDateIndex, endDateIndex = entDB.daterange2index(-1, -1)
    for d in days:
        if type(d) == str:
            d = hlpr.days_in(d, entDB.bSkipWeekends)
        endDate = entDB.dates[endDateIndex]
        startDate = entDB.dates[endDateIndex - d]
        if entDB.datesD.get(startDate, -1) >= 0:
            _fit(dataKeys, entCodes, startDate, endDate, 'linregress', entDB,
                 axes)
Ejemplo n.º 9
0
def srel(dataDst, dataSrc, entDB):
    """
    Calculate the absolute return for all dates wrt/relative_to start date.
    NOTE: If a entity was active from day 1 or rather 0th day wrt database,
        then the return is calculated wrt that.
        However if the entity started later than start date, then calculate
        relative to the start date of that given entity.
    """
    # Get generic things required
    dataDstMT, dataDstMD, dataDstML = hlpr.data_metakeys(dataDst)
    entDB = _entDB(entDB)
    entDB.data[dataDstMT] = 'srel'
    daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends)
    startDateIndex, endDateIndex = entDB.daterange2index(-1, -1)
    # Rolling ret related logic starts
    iStart = entDB.meta['firstSeenDI']
    dStart = entDB.data[dataSrc][range(entDB.nxtEntIndex), iStart]
    dStartT = dStart.reshape(entDB.nxtEntIndex,1)
    dEnd = entDB.data[dataSrc][:, endDateIndex]
    tResult = entDB.data[dataSrc]/dStartT
    if not gbRetDataAsFloat:
        tResult = (tResult - 1)*100
    entDB.data[dataDst] = tResult
    # Work on the meta data, also set NaN for No data zone
    entDB.data[dataDstMD] = numpy.zeros([entDB.nxtEntIndex,5])
    dAbsRet = tResult[:, -1]
    totalDays = endDateIndex-startDateIndex+1
    durationInYears = (totalDays - iStart)/daysInAYear
    dRetPA = (((dEnd/dStart)**(1/durationInYears))-1)*100
    entDB.data[dataDstMD][:,0] = dAbsRet
    entDB.data[dataDstMD][:,1] = dRetPA
    entDB.data[dataDstMD][:,2] = durationInYears
    entDB.data[dataDstMD][:,3] = dStart
    entDB.data[dataDstMD][:,4] = dEnd
    entDB.data[dataDstML] = []
    for i in range(entDB.nxtEntIndex):
        entDB.data[dataDst][i, :iStart[i]] = numpy.nan
        md = entDB.data[dataDstMD][i]
        entDB.data[dataDstML].append(srel_md2str(md))
Ejemplo n.º 10
0
def reton(dataDst, dataSrc, retOnDateIndex, retOnType, historicGaps=None, entDB=None):
    """
    Calculate the absolute returns and or returnsPerAnnum as on endDate wrt/relative_to
    all the other dates.
    """
    # Get generic things required
    dataDstMT, dataDstMD, dataDstML = hlpr.data_metakeys(dataDst)
    entDB = _entDB(entDB)
    entDB.data[dataDstMT] = 'reton'
    daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends)
    startDateIndex, endDateIndex = entDB.daterange2index(-1, -1)
    # Work on retOn
    if historicGaps == None:
        historicGaps = _gHistoricGaps(entDB)
    validHistoric = historicGaps[historicGaps < (retOnDateIndex+1)]
    histDays = abs(numpy.arange(endDateIndex+1)-retOnDateIndex)
    retOnData = entDB.data[dataSrc][:, retOnDateIndex].reshape(entDB.nxtEntIndex,1)
    tROAbs = ((retOnData/entDB.data[dataSrc])-1)*100
    tRORPA = (((retOnData/entDB.data[dataSrc])**(daysInAYear/histDays))-1)*100
    if retOnType == 'absret':
        tResult = tROAbs
    elif retOnType == 'retpa':
        tResult = tRORPA
    else:
        if len(tROAbs) > daysInAYear:
            #tResult[:, -daysInAYear:] = tROAbs[:, -daysInAYear:]
            tResult = tROAbs
            tResult[:, :-daysInAYear] = tRORPA[:, :-daysInAYear]
        else:
            tResult = tROAbs
    entDB.data[dataDst] = tResult
    # Handle meta data
    entDB.data[dataDstMD] = numpy.ones([entDB.nxtEntIndex,historicGaps.shape[0]])*numpy.nan
    retOnDateDelta = endDateIndex-retOnDateIndex
    entDB.data[dataDstMD][:, :validHistoric.shape[0]] = tResult[:, -(validHistoric+1+retOnDateDelta)]
    entDB.data[dataDstML] = []
    for md in entDB.data[dataDstMD]:
        entDB.data[dataDstML].append(reton_md2str(md))
Ejemplo n.º 11
0
def infoset1_result1_entcodes(entCodes,
                              bPrompt=False,
                              numEntities=-1,
                              entDB=None):
    """
    Print data generated by processing the loaded data, wrt the specified entities,
    to the user.

    NOTE: As 2nd part of the result dump, where it prints data across all specified
    entities, wrt each data aspect that was processed during prep, it tries to sort
    them based on the average meta data info from roll3Y (3Y). And entities which
    are less than 3 years will get collated to the end of the sorted list, based on
    the last RetPA from srel operation. If there are entities which get dropped by
    both the sort operations, then they will get collated to the end.

    numEntities if greater than 0, will limit the number of entities that are shown
    in the comparitive print wrt each processed data type.
    """
    entDB = _entDB(entDB)
    dataSrcs = [
        ['srel', 'srel.MetaLabel'],
        ['absRet', 'roabs.MetaLabel'],
        ['retOn', 'rosaf.MetaLabel'],
        ['roll3Y', 'roll3Y.MetaLabel'],
        ['roll5Y', 'roll5Y.MetaLabel'],
    ]
    for entCode in entCodes:
        entIndex = entDB.meta['codeD'][entCode]
        print("Name:", entDB.meta['name'][entIndex])
        for dataSrc in dataSrcs:
            print("\t{:16}: {}".format(dataSrc[0],
                                       entDB.data[dataSrc[1]][entIndex]))

    daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends)
    dateDuration = entDB.nxtDateIndex / daysInAYear
    if dateDuration > 1.5:
        dateDuration = 1.5
    print("INFO:InfoSet1:AnalSimpleDateDurationSetTo:", dateDuration)
    analR3Y = anal_simple('roll3Y',
                          'roll_avg',
                          'top',
                          entCodes=entCodes,
                          numEntities=len(entCodes),
                          minDataYears=dateDuration,
                          entDB=entDB)
    analR3YEntCodes = [x[0] for x in analR3Y]
    s1 = set(entCodes)
    s2 = set(analR3YEntCodes)
    otherEntCodes = s1 - s2
    analSRelRPA = anal_simple('srel',
                              'srel_retpa',
                              'top',
                              entCodes=otherEntCodes,
                              numEntities=len(otherEntCodes),
                              minDataYears=dateDuration,
                              entDB=entDB)
    analSRelRPAEntCodes = [x[0] for x in analSRelRPA]
    s3 = set(analSRelRPAEntCodes)
    entCodes = analR3YEntCodes + analSRelRPAEntCodes + list(s1 -
                                                            (s2.union(s3)))

    anal_simple('blockNRoll3Y',
                'block_ranked',
                'top',
                entCodes=entCodes,
                numEntities=len(entCodes),
                minDataYears=dateDuration,
                entDB=entDB)

    totalEntities = len(entCodes)
    if numEntities > totalEntities:
        numEntities = totalEntities
    printFmt = "\t{:<20}:{:24}:"
    for dataSrc in dataSrcs:
        print("DataSrc:{}: >>showing {} of {} entities<<".format(
            dataSrc, numEntities, totalEntities))
        if dataSrc[0] in ['absRet', 'retOn']:
            print((printFmt + " {}").format("code", "name",
                                            theOps.reton_mdhdr()))
        elif dataSrc[0] == 'srel':
            print((printFmt + " {}").format("code", "name",
                                            theOps.srel_mdhdr()))
        elif dataSrc[0].startswith('roll'):
            print((printFmt + " {}").format("code", "name",
                                            theOps.rollret_mdhdr()))
            x = []
            y = []
            c = []
            dataSrcMetaData = dataSrc[1].replace('Label', 'Data')
        entCount = 0
        for entCode in entCodes:
            entIndex = entDB.meta['codeD'][entCode]
            entName = entDB.meta['name'][entIndex][:24]
            if dataSrc[0].startswith('roll'):
                x.append(entDB.data[dataSrcMetaData][entIndex, 0])
                y.append(entDB.data[dataSrcMetaData][entIndex, 1])
                c.append(entCode)
            print((printFmt + " {}").format(entCode, entName,
                                            entDB.data[dataSrc[1]][entIndex]))
            entCount += 1
            if (numEntities > 0) and (entCount > numEntities):
                break
        if dataSrc[0].startswith('roll'):
            plt.scatter(x, y)
            for i, txt in enumerate(c):
                plt.annotate(txt, (x[i], y[i]))
            plt.xlabel('RollRet Avg')
            plt.ylabel('RollRet StD')
            plt.show()
        if bPrompt:
            input("Press any key to continue...")
Ejemplo n.º 12
0
def anal_simple(dataSrc,
                analType='normal',
                order="top",
                theDate=None,
                theIndex=None,
                numEntities=10,
                entCodes=None,
                minDataYears=1.5,
                bCurrentEntitiesOnly=True,
                bDebug=False,
                iClipNameWidth=64,
                entDB=None):
    """
    Look at specified data in dataSrc, and find top/bottom N entities.
    The rows of the dataSrc represent the entities and
    the cols represent the data associated with the individual entities.
    One can specify the data one wants to look at by using
    * [For Normal] the date one is interested in or
    * [For Normal] the index of the data
    * [For Others] op specific attribute/meta data that
      one is interested in.

    order: could be either 'top' or 'bottom'

    analType: could be one of 'normal', 'srel_absret', 'srel_retpa',
        'roll_avg', 'block_ranked', 'block_avg'

        normal: Look at data corresponding to identified date or index,
        in the given dataSrc, to decide on entities to select.

        srel_absret: Look at the Absolute Returns data associated
        with the given dataSrc (which should be generated using
        srel operation), to decide on entities.

        srel_retpa: Look at the Returns PerAnnum data associated
        with the given dataSrc (which should be generated using
        srel operation), to decide on entities.

        roll_avg: look at Average ReturnsPerAnnum, calculated using
        rolling return (dataSrc specified should be generated using
        roll operation), to decide on entities ordering/selection.

        block_ranked: look at the Avgs calculated by block op,
        for each sub date periods(i.e blocks), rank them and average
        over all the sub date periods to calculate the rank for
        full date Range. Use this final rank to decide on entities
        ordering. (dataSrc should have been generated using block
        operation).

        block_avg: Look at Avg of Averages calculated by block op,
        to order the entities. User can also do a equivalent anal
        by running

            anal_simple('blockOpDst.MetaData', 'normal', theIndex=1)

    theDate and theIndex: [Used by normal analType]
        If both are None, then the logic will try to find a date
        which contains atleast some valid data, starting from the
        lastDate and moving towards startDate wrt given dataSrc.

        NOTE: ValidData: Any Non Zero, Non NaN, Non Inf data

        If theDate is a date, then values in dataSrc corresponding
        to this date will be used for sorting.

        If theDate is -1, then the lastDate wrt the currently
        loaded dataset, is used as the date from which values
        should be used to identify the entities.

        If theIndex is set and theDate is None, then the values
        in dataSrc corresponding to given index, is used for
        sorting.

        NOTE: date follows YYYYMMDD format.

    entCodes: One can restrict the logic to look at data belonging to
        the specified list of entities. If None, then all entities
        in the loaded dataset will be considered, for ranking.

    minDataYears: This sorting logic will ignore entities for which the
        available data duration in the entities database is less than
        that specified here.

        NOTE: The default is 1.5 years, If you have loaded less than that
        amount of data, then remember to set this to a smaller value,
        if required.

        NOTE: It expects the info about duration for which data is
        available for each entity, to be available under 'srel.MetaData'
        key. If this is not the case, it will trigger a generic 'srel'
        operation through ops to generate the same.

            It also means that the check is done wrt overall amount
            of data available for a given entity in the loaded dataset,
            While a dataSrc key which corresponds to a view of partial
            data from the loaded dataset, which is less than specified
            minDataYears, can still be done.

    bCurrentEntitiesOnly: Will drop entities which have not been seen
        in the last 1 week, wrt the dateRange currently loaded.

    iClipNameWidth:
        If None, the program prints full name, with space alloted by
        default for 64 chars.
        Else the program limits the Name to specified width.

    entDB: The data set from which to pick the data to work with.
        If specified, it will be used. Else what ever default
        data set was previously initialised by the program
        will be used.
    """
    entDB = _entDB(entDB)
    theAnal = "{}_{}".format(analType, order)
    #print("DBUG:AnalSimple:{}:{}".format(theAnal, dataSrc))
    printFmts = gAnalSimpleBasePrintFormats.copy()
    printWidths = gAnalSimpleBasePrintWidths.copy()
    printHdr = ["Code", "Name"]
    if order == 'top':
        iSkip = -numpy.inf
    else:
        iSkip = numpy.inf
    theSaneArray = None
    daysInAYear = hlpr.days_in('1Y', entDB.bSkipWeekends)
    if analType == 'normal':
        printHdr.extend(['Value'])
        if (type(theDate) == type(None)) and (type(theIndex) == type(None)):
            for i in range(-1, -entDB.nxtDateIndex, -1):
                if bDebug:
                    print("DBUG:AnalSimple:{}:findDateIndex:{}".format(
                        theAnal, i))
                theSaneArray = entDB.data[dataSrc][:, i].copy()
                theSaneArray[numpy.isinf(theSaneArray)] = iSkip
                theSaneArray[numpy.isnan(theSaneArray)] = iSkip
                if not numpy.all(
                        numpy.isinf(theSaneArray) | numpy.isnan(theSaneArray)):
                    dateIndex = entDB.nxtDateIndex + i
                    print("INFO:AnalSimple:{}:DateIndex:{}".format(
                        theAnal, dateIndex))
                    break
        else:
            if (type(theIndex)
                    == type(None)) and (type(theDate) != type(None)):
                startDateIndex, theIndex = entDB.daterange2index(
                    theDate, theDate)
            #print("DBUG:AnalSimple:{}:theIndex:{}".format(theAnal, theIndex))
            theSaneArray = entDB.data[dataSrc][:,
                                               theIndex].copy().astype(float)
            theSaneArray[numpy.isinf(theSaneArray)] = iSkip
            theSaneArray[numpy.isnan(theSaneArray)] = iSkip
    elif analType.startswith("srel"):
        dataSrcMetaType, dataSrcMetaData, dataSrcMetaLabel = hlpr.data_metakeys(
            dataSrc)
        if analType == 'srel_absret':
            printHdr.extend(['AbsRet'])
            theSaneArray = entDB.data[dataSrcMetaData][:, 0].copy()
        elif analType == 'srel_retpa':
            printHdr.extend(['RetPA'])
            theSaneArray = entDB.data[dataSrcMetaData][:, 1].copy()
        else:
            input(
                "ERRR:AnalSimple:{}:dataSrc[{}]: unknown srel anal subType, returning..."
                .format(theAnal, dataSrc))
            return None
    elif analType.startswith("roll"):
        dataSrcMetaType, dataSrcMetaData, dataSrcMetaLabel = hlpr.data_metakeys(
            dataSrc)
        if analType == 'roll_avg':
            printHdr.extend(['Avg', 'Std', '<minT', 'MaSha', 'Yrs'])
            theSaneArray = entDB.data[dataSrcMetaData][:, 0].copy()
            theSaneArray[numpy.isinf(theSaneArray)] = iSkip
            theSaneArray[numpy.isnan(theSaneArray)] = iSkip
            printFmts.extend([{
                'num': "{:{width}.2f}",
                'str': '{:{width}}'
            }, {
                'num': "{:{width}.2f}",
                'str': '{:{width}}'
            }, {
                'num': "{:{width}.2f}",
                'str': '{:{width}}'
            }, {
                'num': "{:{width}.1f}",
                'str': '{:{width}}'
            }])
            printWidths.extend([7, 7, 7, 4])
    elif analType == "block_avg":
        dataSrcMetaType, dataSrcMetaData, dataSrcMetaLabel = hlpr.data_metakeys(
            dataSrc)
        printHdr.extend(['AvgRank', 'blockAvgs', 'blockStds'])
        metaDataAvgs = "{}Avgs".format(dataSrc)
        metaDataStds = "{}Stds".format(dataSrc)
        theSaneArray = entDB.data[dataSrcMetaData][:, 1].astype(float).copy()
        theSaneArray[numpy.isinf(theSaneArray)] = iSkip
        theSaneArray[numpy.isnan(theSaneArray)] = iSkip
        iValidBlockAtBegin = 0
    elif analType == "block_ranked":
        printHdr.extend(['AvgRank', 'blockRanks', 'blockAvgs'])
        metaDataAvgs = "{}Avgs".format(dataSrc)
        tNumEnts, tNumBlocks = entDB.data[metaDataAvgs].shape
        theRankArray = numpy.zeros([tNumEnts, tNumBlocks + 1])
        iValidBlockAtBegin = 0
        bValidBlockFound = False
        for b in range(tNumBlocks):
            tArray = entDB.data[metaDataAvgs][:, b]
            tValidArray = tArray[numpy.isfinite(tArray)]
            tSaneArray = hlpr.sane_array(tArray, iSkip)
            if len(tValidArray) != 0:
                tQuants = numpy.quantile(tValidArray,
                                         [0, 0.2, 0.4, 0.6, 0.8, 1])
                theRankArray[:, b] = numpy.digitize(tSaneArray, tQuants, True)
                bValidBlockFound = True
            else:
                if not bValidBlockFound:
                    iValidBlockAtBegin = b + 1
                theRankArray[:, b] = numpy.zeros(len(theRankArray[:, b]))
        theRankArray[:, tNumBlocks] = numpy.average(
            theRankArray[:, iValidBlockAtBegin:tNumBlocks], axis=1)
        theRankArray = theRankArray[:, iValidBlockAtBegin:tNumBlocks + 1]
        tNumBlocks = tNumBlocks - iValidBlockAtBegin
        theSaneArray = theRankArray[:, tNumBlocks]
    else:
        input("ERRR:AnalSimple:{}:dataSrc[{}]: unknown analType, returning...".
              format(theAnal, dataSrc))
        return None
    if type(theSaneArray) == type(None):
        print("WARN:DBUG:AnalSimple:{}:{}: No SaneArray????".format(
            theAnal, dataSrc))
        breakpoint()
    theSaneArray = _forceval_entities(theSaneArray,
                                      entCodes,
                                      iSkip,
                                      'invert',
                                      entDB=entDB)
    if minDataYears > 0:
        dataYearsAvailable = entDB.nxtDateIndex / daysInAYear
        if (dataYearsAvailable < minDataYears):
            print(
                "WARN:AnalSimple:{}: dataYearsAvailable[{}] < minDataYears[{}]"
                .format(theAnal, dataYearsAvailable, minDataYears))
        srelMetaType, srelMetaData, srelMetaLabel = hlpr.data_metakeys('srel')
        theSRelMetaData = entDB.data.get(srelMetaData, None)
        if type(theSRelMetaData) == type(None):
            ops('srel=srel(data)')
        if bDebug:
            tNames = numpy.array(entDB.meta['name'])
            tDroppedNames = tNames[entDB.data[srelMetaData][:,
                                                            2] < minDataYears]
            print(
                "INFO:AnalSimple:{}:{}:Dropping if baby Entity".format(
                    theAnal, dataSrc), tDroppedNames)
        theSaneArray[entDB.data[srelMetaData][:, 2] < minDataYears] = iSkip
    if bCurrentEntitiesOnly:
        oldEntities = numpy.nonzero(
            entDB.meta['lastSeenDI'] < (entDB.nxtDateIndex - 1 - 7))[0]
        if bDebug:
            #aNames = numpy.array(entDB.meta['name'])
            #print(aNames[oldEntities])
            for index in oldEntities:
                print("DBUG:AnalSimple:{}:IgnoringOldEntity:{}, {}".format(
                    theAnal, entDB.meta['name'][index],
                    entDB.dates[entDB.meta['lastSeenDI'][index]]))
        theSaneArray[oldEntities] = iSkip
    #theRows=numpy.argsort(theSaneArray)[-numEntities:]
    theRows = numpy.argsort(theSaneArray)
    rowsLen = len(theRows)
    if numEntities > rowsLen:
        print(
            "WARN:AnalSimple:{}:RankContenders[{}] < numEntities[{}] requested, adjusting"
            .format(theAnal, rowsLen, numEntities))
        numEntities = rowsLen
    if order == 'top':
        lStart = -1
        lStop = -(numEntities + 1)
        lDelta = -1
    elif order == 'bottom':
        lStart = 0
        lStop = numEntities
        lDelta = 1
    theSelected = []
    print("INFO:AnalSimple:{}:{}".format(theAnal, dataSrc))
    hlpr.printl(printFmts, printHdr, " ", "\t", "", printWidths)
    for i in range(lStart, lStop, lDelta):
        index = theRows[i]
        if (theSaneArray[index] == iSkip) or ((analType == 'block_ranked') and
                                              (theSaneArray[index] == 0)):
            print("    WARN:AnalSimple:{}:No more valid elements".format(
                theAnal))
            break
        curEntry = [
            entDB.meta['codeL'][index], entDB.meta['name'][index],
            theSaneArray[index]
        ]
        if analType == "roll_avg":
            curEntry.extend(entDB.data[dataSrcMetaData][index, 1:])
        theSelected.append(curEntry)
        if iClipNameWidth == None:
            curEntry[1] = "{:64}".format(curEntry[1])
        else:
            curEntry[1] = "{:{width}}".format(curEntry[1][:iClipNameWidth],
                                              width=iClipNameWidth)
        curEntry[2] = numpy.round(curEntry[2], 2)
        if analType == "roll_avg":
            curEntry[3:] = numpy.round(curEntry[3:], 2)
        elif analType == "block_avg":
            extra = "{}:{}".format(
                hlpr.array_str(
                    entDB.data[metaDataAvgs][index, iValidBlockAtBegin:], 6,
                    2),
                hlpr.array_str(
                    entDB.data[metaDataStds][index, iValidBlockAtBegin:], 6,
                    2))
            curEntry.append(extra)
        elif analType == "block_ranked":
            theSelected[-1] = theSelected[-1] + [theRankArray[index]]
            extra = "{}:{}".format(
                hlpr.array_str(theRankArray[index], 4, "A0L1"),
                hlpr.array_str(
                    entDB.data[metaDataAvgs][index, iValidBlockAtBegin:], 6,
                    2))
            curEntry.append(extra)
        #print("    {} {}".format(extra, curEntry))
        hlpr.printl(printFmts, curEntry, " ", "\t", "", printWidths)
    return theSelected