Example #1
0
def get_btm2():
    size = get_df('size')  #TODO:流通市值对应所有者权益?,总市值?
    totshe = get_df('totshe')
    size, totshe = pandasu.get_inter_frame(size, totshe)
    btm = totshe / size
    btm.index = [str(int(ind[:4]) + 1) + '-06' for ind in btm.index]
    save_df(btm, 'btm2')
Example #2
0
def get_portRet():
    portId = get_df('portId_5_5')
    portId = portId.T

    ret = get_df('ret')
    weight = get_df('weight')

    ports = np.sort(
        [p for p in portId.iloc[:, -1].unique() if not np.isnan(p)])

    portRet = pd.DataFrame()
    for month in portId.columns.tolist():
        year = month[:4]
        validmonths = [year + '-0' + str(i) for i in range(7, 10)]
        validmonths += [year + '-1' + str(i) for i in range(3)]
        validmonths += [
            str(int(year) + 1) + '-0' + str(i) for i in range(1, 7)
        ]

        for port in ports:
            stocks = portId[portId[month] == port].index.tolist()
            for validmonth in validmonths:
                if validmonth in ret.index.tolist():
                    tmp = pd.DataFrame()
                    tmp['ret'] = ret.loc[validmonth, stocks]
                    tmp['weight'] = weight.loc[validmonth, stocks]
                    tmp = tmp.dropna(axis=0, how='any')
                    pr = pandasu.mean_self(tmp, 'ret', 'weight')
                    portRet.loc[validmonth, port] = pr
                else:
                    portRet.loc[validmonth, port] = np.NaN
        print month
    portRet = portRet.dropna(axis=0, how='any')
    save_df(portRet, 'portRet')
Example #3
0
def get_rp():
    rf = get_df('rf')
    rm = get_df('rm')

    rp = rm['rm'] - rf['rf']

    rp = rp.to_frame()
    rp.columns = ['rp']
    save_df(rp, 'rp')
Example #4
0
def get_portEret_ts():
    portRet_rs_tmv = get_df('portRet_rs_tmv')
    portRet_rs_mc = get_df('portRet_rs_mc')
    rf = get_df('rf')

    portEret_rs_tmv = portRet_rs_tmv.sub(rf['rf'], axis=0)
    portEret_rs_mc = portRet_rs_mc.dropna(axis=0, how='any')
    save_df(portEret_rs_tmv, 'portEret_rs_tmv')
    save_df(portEret_rs_mc, 'portEret_rs_mc')
Example #5
0
def validate_rm():
    rm = get_df('rm')
    rm1 = get_df('rm1')

    df = pd.DataFrame()
    df['rm'] = rm['rm']
    df['rm1'] = rm1['rm']
    df.cumsum().plot()

    (df['rm'] - df['rm1']).cumsum().plot()
Example #6
0
def get_op():
    operIncom = get_df('operIncom')
    admExp = get_df('admExp').fillna(method='ffill')
    finanExp = get_df('finanExp').fillna(method='ffill')
    operExp = get_df('operExp').fillna(method='ffill')

    op = operIncom - admExp - finanExp - operExp
    op.index = [
        str(int(ind[:4]) + 1) + '-06' for ind in op.index
    ]  #TODO:In june of year t,op comes from fiscal year ending in year t-1
    save_df(op, 'op')
Example #7
0
def get_portId(sizeNum, btmNum):
    sizeId = get_df('sizeId_%s' % sizeNum)
    if btmNum == 3:
        btmId = get_df('btmId_[0.3, 0.7]')  #TODO: change the file name
    else:
        btmId = get_df('btmId_%s' % btmNum)

    sizeId, btmId = pandasu.get_inter_frame(sizeId, btmId)

    portId = sizeId * 10 + btmId
    save_df(portId, 'portId_%s_%s' % (sizeNum, btmNum))
Example #8
0
def get_smb_hml():
    portId = get_df('portId_2_3')
    portId = portId.T  #TODO: transpose
    ret = get_df('ret')
    weight = get_df('weight')

    def _get_portRet(validmonth, stocks):
        tmp = pd.DataFrame()
        if validmonth in ret.index.tolist():
            tmp['ret'] = ret.loc[validmonth,
                                 stocks]  # TODO: validmonth rathar than month
            tmp['weight'] = weight.loc[validmonth, stocks]
            tmp = tmp.dropna(axis=0, how='any')
            portRet = pandasu.mean_self(tmp, 'ret', 'weight')
            return portRet
        else:
            return np.NaN

    factor = pd.DataFrame(columns=['smb', 'hml'])
    months = portId.columns.tolist()
    for month in months:
        year = month[:4]
        validmonths = [year + '-0' + str(i) for i in range(7, 10)]
        validmonths += [year + '-1' + str(i) for i in range(3)]
        validmonths += [
            str(int(year) + 1) + '-0' + str(i) for i in range(1, 7)
        ]

        aa = portId[portId[month] == 11].index.tolist()
        ab = portId[portId[month] == 12].index.tolist()
        ac = portId[portId[month] == 13].index.tolist()

        ba = portId[portId[month] == 21].index.tolist()
        bb = portId[portId[month] == 22].index.tolist()
        bc = portId[portId[month] == 23].index.tolist()

        for validmonth in validmonths:
            aaret = _get_portRet(validmonth, aa)
            abret = _get_portRet(validmonth, ab)
            acret = _get_portRet(validmonth, ac)

            baret = _get_portRet(validmonth, ba)
            bbret = _get_portRet(validmonth, bb)
            bcret = _get_portRet(validmonth, bc)

            smb = (aaret + abret + acret) / 3.0 - (baret + bbret + bcret) / 3.0
            hml = (acret + bcret) / 2.0 - (aaret + baret) / 2.0
            factor.loc[validmonth] = [smb, hml]
            print validmonth
    save_df(factor, 'smb_hml')
Example #9
0
def validate_smb_hml():
    smb_hml = get_df('smb_hml')

    ff3 = get_df('ff3')

    com = pd.concat([smb_hml, ff3], axis=1)
    com = com.dropna(axis=0, how='any')
    cols1 = ['smb', 'SMB1', 'SMB2']
    cols2 = ['hml', 'HML1', 'HML2']

    com[cols1][24:].cumsum().plot().get_figure().savefig(
        r'D:\quantDb\researchTopics\crossSection\data\observe\smb.png')
    com[cols2][24:].cumsum().plot().get_figure().savefig(
        r'D:\quantDb\researchTopics\crossSection\data\observe\hml.png')
Example #10
0
def get_inv():
    totAsset = get_df('totAsset')
    inv = totAsset.pct_change()
    inv.index = [
        str(int(ind[:4]) + 1) + '-06' for ind in inv.index
    ]  #TODO:In june of year t,inv comes from fiscal year ending in year t-1
    save_df(inv, 'inv')
Example #11
0
def validate_portRet():
    df = pd.read_csv(r'D:\quantDb\resset\PMONRET_FF.csv', index_col=0)
    q1 = 'Exchflg == 0'
    q2 = 'Mktflg == A'
    df = filterDf(df, [q1, q2])

    mypr = get_df('portRet')
    mypr.columns = [int(float(col)) for col in mypr.columns]
    pr = pd.DataFrame()
    for date in sorted(df['Date'].unique().tolist()):
        month = date[:-3]
        for i in range(1, 6):
            for j in range(1, 6):
                try:
                    pr.loc[month, i * 10 +
                           j] = df[(df['Sizeflg'] == i) & (df['BMflg'] == j) &
                                   (df['Date']
                                    == date)]['Pmonret_tmv'].values[0]
                except IndexError:
                    pr.loc[month, i * 10 + j] = np.NaN
                    pass

        print month

    pr = pr.dropna(axis=0, how='any')

    for i in range(1, 6):
        for j in range(1, 6):
            port = i * 10 + j
            tmp = pd.DataFrame()
            tmp['mypr'] = mypr[port]
            tmp['pr'] = pr[port]
            tmp.cumsum().plot().get_figure().savefig(
                r'D:\quantDb\researchTopics\crossSection\data\observe\portRet\%s.png'
                % port)
Example #12
0
def get_btm1():
    bv = get_df('bv')
    mv = get_df('mv')

    inds = sorted(
        list(set(bv.index.tolist()).intersection(set(mv.index.tolist()))))
    cols = sorted(
        list(set(bv.columns.tolist()).intersection(set(mv.columns.tolist()))))

    bv = bv.loc[inds, cols]
    mv = mv.loc[inds, cols]

    btm = bv / mv

    btm.index = [str(int(ind[:4]) + 1) + '-06' for ind in btm.index]
    #TODO: notice that we can only use the data 6 months later
    save_df(btm, 'btm')
Example #13
0
def get_factorId(tbname, param):
    '''
    :param tbname:
    :param param:an int number or a list of breakpoints such as [0.3,0.7]
    :return:
    '''
    df = get_df(tbname)
    nfc = get_df('nfc')['Stkcd'].values
    nfc = [str(n) for n in nfc]
    cols = [s for s in df.columns if s in nfc]
    df = df[cols]

    ids = pd.DataFrame(columns=df.columns)
    for month in df.index.tolist():
        sub = df.loc[month].to_frame()
        sub = sub.dropna()
        sub['id'] = mathu.getPortId(sub, param)
        ids.loc[month] = sub['id']
        print month
    save_df(ids, '%sId_%s' % (tbname, param))
Example #14
0
def get_rm1():
    Mretwd = get_df('ret')
    weight = get_df('weight')

    Mretwd.head()
    rm = pd.DataFrame()

    for month in Mretwd.index.tolist():
        tmpdf = pd.DataFrame(columns=Mretwd.columns)
        tmpdf.loc['ret'] = Mretwd.loc[month]
        tmpdf.loc['weight'] = weight.loc[month]
        tmpdf = tmpdf.T
        tmpdf = tmpdf.dropna(axis=0, how='any')
        if tmpdf.shape[0] > 0:
            ret = np.average(tmpdf['ret'], weights=tmpdf['weight'])
            rm.loc[month, 'rm'] = ret
        else:
            rm.loc[month, 'rm'] = np.NaN
        print month

    save_df(rm, 'rm1')
Example #15
0
def portDescribe():
    portId = get_df('portId_2_3')
    size = pd.DataFrame(columns=[11, 12, 13, 21, 22, 23])
    for month in portId.index.tolist():
        tmp = portId.loc[month]
        size.loc[month] = tmp.value_counts()
    size = size.fillna(0)
    describe = size.copy()
    describe['min'] = size.min(axis=1)
    describe['max'] = size.max(axis=1)
    describe['sum'] = size.sum(axis=1)

    describe.to_csv(
        r'D:\quantDb\researchTopics\crossSection\data\observe\portDescribe.csv'
    )
Example #16
0
def validate_rp():
    myrp = get_df('rp')
    myrp.columns = ['myrp']

    rp = pd.read_csv(os.path.join(sp, 'STK_MKT_FivefacMonth.csv'))
    rp = rp[rp['Portfolios'] == 1]

    rp = rp[rp['MarkettypeID'] == 'P9709']

    colnames = ['TradingMonth', 'RiskPremium1', 'RiskPremium2']
    rp = rp[colnames]
    rp = rp.set_index('TradingMonth')

    com = pd.concat([myrp, rp], axis=1)
    com = com.dropna(axis=0, how='any')

    ax = com.cumsum().plot()
    fig = ax.get_figure()
    fig.savefig(r'D:\quantDb\researchTopics\crossSection\data\observe\rp.png')
Example #17
0
def get_weight():
    size = get_df('size')
    weight = size.shift(1)
    save_df(weight, 'weight')
Example #18
0
def get_portEret():
    portRet = get_df('portRet')
    rf = get_df('rf')
    portEret = portRet.sub(rf['rf'], axis=0)
    portEret = portEret.dropna(axis=0, how='any')
    save_df(portEret, 'portEret')