Beispiel #1
0
    def _get_panel_stk_avg(self, comb, indicator, gcol):
        panel_stk_eavg = comb.groupby(['t', gcol
                                       ])['stockEretM'].mean()  #equal weighted
        if self.factor == 'size':
            '''
            when the factor is size,we also use the indicator (sort variable) as weight
            Refer to page 159.

            '''
            panel_stk_wavg = comb.groupby([
                't', gcol
            ]).apply(lambda df: my_average(df, 'stockEretM', wname=indicator))
        else:
            '''
            the index denotes t+1,and the weight is from time t,
            since we have shift weight forward in dataset.
            '''
            # def func(df):
            #     return my_average(df,'stockEretM',wname='weight')
            #
            # panel_stk_wavg=comb.groupby(['t',gcol]).apply(func)
            panel_stk_wavg = comb.groupby([
                't', gcol
            ]).apply(lambda df: my_average(df, 'stockEretM', wname='weight'))

        return panel_stk_eavg, panel_stk_wavg
Beispiel #2
0
def construct_playingField(vars, model):
    '''
    :param vars: list
    :param model: belong to {'5x5','2x4x4'}
    :return:
    '''
    if model == '5x5':
        v1, v2 = tuple(vars)
        comb = data_for_bivariate(v1, v2, 5, 5, independent=True)
        assets=comb.groupby(['t','g1','g2']).apply(
            lambda df:my_average(df,'stockEretM',wname='weight'))\
            .unstack(level=['g1','g2'])
    elif model == '2x4x4':
        #v1 must belong to size category
        v1, v2, v3 = tuple(vars)
        comb = combine_with_datalagged([v1, v2, v3])
        comb = comb.dropna()
        comb['g1'] = comb.groupby('t', group_keys=False).apply(
            lambda df: assign_port_id(df[v1], 2, range(1, 3)))

        comb['g2'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
            lambda df: assign_port_id(df[v2], 4, range(1, 5)))

        comb['g3'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
            lambda df: assign_port_id(df[v3], 4, range(1, 5)))

        assets=comb.groupby(['t','g1','g2','g3']).apply(
            lambda df: my_average(df, 'stockEretM', wname='weight')) \
            .unstack(level=['g1', 'g2','g3'])
    else:
        raise MyError('Model "{}" is not supported currently'.format(model))

    return assets
Beispiel #3
0
def get_25assets(v1, v2):
    sampleControl = False
    q = 5

    ss=[]
    for v in [v1,v2]:
        if v in Database(sample_control=sampleControl).all_indicators:
            s=Database(sample_control=sampleControl).by_indicators([v])
        else:
            s=pd.read_pickle(os.path.join(dirFI,v+'.pkl')).stack()
            s.name=v
        ss.append(s)

    # data lagged
    weight = Database(sample_control=sampleControl).by_indicators(['weight'])
    datalagged = pd.concat(ss+[weight], axis=1)
    datalagged = datalagged.groupby('sid').shift(1)
    # data t
    datat = Database(sample_control=sampleControl).by_indicators(['stockEretM'])

    comb = pd.concat([datalagged, datat], axis=1)
    comb = comb.dropna()

    comb['g1'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v1], q))
    comb['g2'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v2], q))

    assets = comb.groupby(['t', 'g1', 'g2']).apply(
        lambda df: my_average(df, 'stockEretM', wname='weight'))\
        .unstack(level=['g1','g2'])
    return assets
Beispiel #4
0
def indicator2factor(indicator):
    sampleControl = False
    q = 5

    # data lagged
    df = _read(indicator)
    s = df.stack()
    s.name = indicator
    weight = Database(sample_control=sampleControl).by_indicators(['weight'])
    datalagged = pd.concat([s, weight], axis=1)
    datalagged = datalagged.groupby('sid').shift(1)

    # data t
    datat = Database(sample_control=sampleControl).by_indicators(
        ['stockEretM'])
    comb = pd.concat([datalagged, datat], axis=1)
    comb = comb.dropna()

    comb['g'] = comb.groupby(
        't',
        group_keys=False).apply(lambda df: assign_port_id(df[indicator], q))

    panel = comb.groupby(['t', 'g']).apply(
        lambda df: my_average(df, 'stockEretM', wname='weight')) \
        .unstack(level=['g'])

    factor = panel[q] - panel[1]
    factor.name = indicator
    factor.to_pickle(os.path.join(factorPath, '{}.pkl'.format(indicator)))
Beispiel #5
0
def get_bivariate_panel(v1, v2='size__size'):
    sampleControl = False
    q = 5

    ss = []
    for v in [v1, v2]:
        if v in Database(sample_control=sampleControl).all_indicators:
            s = Database(sample_control=sampleControl).by_indicators([v])
        else:
            s = _read(v).stack()
            s.name = v
        ss.append(s)

    # data lagged
    weight = Database(sample_control=sampleControl).by_indicators(['weight'])
    datalagged = pd.concat(ss + [weight], axis=1)
    datalagged = datalagged.groupby('sid').shift(1)

    # data t
    datat = Database(sample_control=sampleControl).by_indicators(
        ['stockEretM'])
    comb = pd.concat([datalagged, datat], axis=1)
    comb = comb.dropna()

    comb['g1'] = comb.groupby(
        't', group_keys=False).apply(lambda df: assign_port_id(df[v1], q))
    comb['g2'] = comb.groupby(
        't', group_keys=False).apply(lambda df: assign_port_id(df[v2], q))

    panel = comb.groupby(['t', 'g1', 'g2']).apply(
        lambda df: my_average(df, 'stockEretM', wname='weight'))\
        .unstack(level=['g1','g2'])
    print(v1)
    return panel
def single_sorting_factor(indicator, q, weight=False):
    # method1 independent way
    '''
    This function is used to construct a new factor by a given indicator.
    We first group stocks into "q" portfolios based on the rank of "indicator"
    every month.Then,at the next month we calculate the corresponding monthly
    value-weighted (if weight is True) portfolio return.The factor return is
    the spread between the return of the top portfolio and bottom portfolio.

    :param indicator:
    :param q:
    :param weight:
    :return:Series
    '''
    if isinstance(q, int):
        labels = ['g{}'.format(i) for i in range(1, q + 1)]
    elif isinstance(q, (list, tuple)):
        labels = ['g{}'.format(i) for i in range(1, len(q))]
    else:
        raise MyError('q:"{}"  is wrong!'.format(repr(q)))

    comb = combine_with_datalagged([indicator])
    comb['g'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[indicator], q, labels))

    if weight:
        panel=comb.groupby(['t','g']).apply(
            lambda df:my_average(df,'stockEretM',wname='weight'))\
            .unstack(level=['g'])
    else:
        panel = comb.groupby(['t',
                              'g'])['stockEretM'].mean().unstack(level=['g'])

    factor = panel[labels[-1]] - panel[labels[0]]
    return factor
Beispiel #7
0
    def _one_indicator(self, indicator):
        ns = range(1, 13)
        all_indicators = [indicator, 'weight', 'stockEretM']
        comb = DATA.by_indicators(all_indicators)
        comb = comb.dropna()
        try:
            comb['g'] = comb.groupby(
                't', group_keys=False).apply(lambda df: pd.qcut(
                    df[indicator],
                    self.q,
                    labels=[indicator + str(i) for i in range(1, self.q + 1)],
                    duplicates='raise'))
        except ValueError:  #trick:qcut with non unique values https://stackoverflow.com/questions/20158597/how-to-qcut-with-non-unique-bin-edges
            comb['g'] = comb.groupby(
                't', group_keys=False).apply(lambda df: pd.qcut(
                    df[indicator].rank(method='first'),
                    self.q,
                    labels=[indicator + str(i) for i in range(1, self.q + 1)]))

        def _one_indicator_one_weight_type(group_ts, indicator):
            def _big_minus_small(s, ind):
                time = s.index.get_level_values('t')[0]
                return s[(time, ind + str(self.q))] - s[(time, ind + '1')]

            spread_data = group_ts.groupby('t').apply(
                lambda series: _big_minus_small(series, indicator))
            s = risk_adjust(spread_data)
            return s

        eret = comb['eret'].unstack()

        s_es = []
        s_ws = []
        eret_names = []
        for n in ns:
            eret_name = 'eret_ahead%s' % (n + 1)
            comb[eret_name] = eret.shift(-n).stack()

            group_eavg_ts = comb.groupby(['t', 'g'])[eret_name].mean()
            group_wavg_ts = comb.groupby(
                ['t',
                 'g']).apply(lambda df: my_average(df, eret_name, 'weight'))
            # group_wavg_ts = comb.groupby(['t', 'g']).apply(
            #     lambda df: np.average(df[eret_name], weights=df['weight']))#fixme: what if there is nan values?
            #TODO: If we are analyzing size,the weights should be the indicator
            #we are analyzing,rather than weight
            s_e = _one_indicator_one_weight_type(group_eavg_ts, indicator)
            s_w = _one_indicator_one_weight_type(group_wavg_ts, indicator)
            s_es.append(s_e)
            s_ws.append(s_w)
            eret_names.append(eret_name)
        eq_table = pd.concat(s_es, axis=1, keys=eret_names)
        vw_table = pd.concat(s_ws, axis=1, keys=eret_names)
        return eq_table, vw_table
Beispiel #8
0
    def _get_panel_stk_avg(self, comb, indicator, gcol):
        panel_stk_eavg=comb.groupby(['t',gcol])['eretM'].mean()
        if self.factor=='size':
            '''
            when the factor is size,we also use the indicator (sort variable) as weight
            Refer to page 159.
            
            '''
            panel_stk_wavg=comb.groupby(['t',gcol]).apply(
                lambda df:my_average(df,'eretM',wname=indicator)
                )
        else:
            '''
            the index denotes t+1,and the capM is from time t,
            since we have shift capM forward in dataset.
            '''
            panel_stk_wavg = comb.groupby(['t', gcol]).apply(
                lambda df:my_average(df,'eretM',wname='capM')
            )

        return panel_stk_eavg,panel_stk_wavg
Beispiel #9
0
def get_hxz4():
    '''
    calculate hxz4 factors,refer to din.py for details about the indicators

    References:
        Hou, K., Mo, H., Xue, C., and Zhang, L. (2018). Motivating Factors (Rochester, NY: Social Science Research Network).

    Returns:

    '''
    v1 = 'size__size'
    v2 = 'inv__inv'  #I/A
    v3 = 'roe__roe'  # ROE

    comb = combine_with_datalagged([v1, v2, v3], sample_control=True)
    comb = comb.dropna()

    comb['g1'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v1], 2, range(1, 3)))

    comb['g2'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
        lambda df: assign_port_id(df[v2], [0, 0.3, 0.7, 1.0], range(1, 4)))

    comb['g3'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
        lambda df: assign_port_id(df[v3], [0, 0.3, 0.7, 1.0], range(1, 4)))

    assets = comb.groupby(
        ['t', 'g1', 'g2',
         'g3']).apply(lambda df: my_average(df, 'stockEretM', wname='weight'))

    df1 = assets.groupby(['t', 'g1']).mean().unstack(level='g1')
    smb = df1[1] - df1[2]

    df2 = assets.groupby(['t', 'g2']).mean().unstack(level='g2')
    ria = df2[3] - df2[1]

    df3 = assets.groupby(['t', 'g3']).mean().unstack(level='g3')
    roe = df3[3] - df2[1]

    rp = load_data('rpM')
    hxz4 = pd.concat([rp, smb, ria, roe],
                     axis=1,
                     keys=['rp', 'smb', 'ria', 'roe'])
    hxz4.columns.name = 'type'
    hxz4 = hxz4.dropna()
    save(hxz4, 'hxz4M')
def get_single_sorting_assets(indicator, q, weight=True):
    if isinstance(q, int):
        labels = ['g{}'.format(i) for i in range(1, q + 1)]
    elif isinstance(q, (list, tuple)):
        labels = ['g{}'.format(i) for i in range(1, len(q))]
    else:
        raise MyError('q:"{}"  is wrong!'.format(repr(q)))

    comb = combine_with_datalagged([indicator])
    comb['g'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[indicator], q, labels))

    if weight:
        assets=comb.groupby(['t','g']).apply(
            lambda df:my_average(df,'stockEretM',wname='weight'))\
            .unstack(level=['g'])
    else:
        assets = comb.groupby(['t',
                               'g'])['stockEretM'].mean().unstack(level=['g'])
    return assets
def three_sorting_factor(v1, v2, v3, q1, q2, q3, weight=True):
    '''

    v1 and v2 are independent,v3 is conditional on v1 and v2

    reference:
        page 18 of Pan, L., Tang, Y., and Xu, J. (2016).
        Speculative Trading and Stock Returns. Review of Finance 20, 1835–1865.


    '''

    comb = combine_with_datalagged([v1, v2, v3])
    comb = comb.dropna()

    comb['g1'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v1], q1, range(1, q1 + 1)))

    comb['g2'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v2], q2, range(1, q2 + 1)))

    # g3 is conditional on g1 and g2
    comb['g3'] = comb.groupby(['t', 'g1', 'g2'], group_keys=False).apply(
        lambda df: assign_port_id(df[v3], q3, range(1, q3 + 1)))

    if weight:
        s = comb.groupby([
            't', 'g1', 'g2', 'g3'
        ]).apply(lambda df: my_average(df, 'stockEretM', wname='weight'))
    else:
        s = comb.groupby(['t', 'g1', 'g2', 'g3'])['stockEretM'].mean()

    panel1 = s.groupby(['t', 'g1']).mean().unstack(level='g1')
    factor1 = panel1[q1] - panel1[1]

    panel2 = s.groupby(['t', 'g2']).mean().unstack(level='g2')
    factor2 = panel2[q2] - panel2[1]

    panel3 = s.groupby(['t', 'g3']).mean().unstack(level='g3')
    factor3 = panel3[q3] - panel3[1]
    return factor1, factor2, factor3
def two_sorting_factor(v1,
                       v2,
                       q1,
                       q2,
                       independent=True,
                       weight=True,
                       **kwargs):
    '''
    just like the way we construct SMB and HML

    :param v1:
    :param v2:
    :param q1:
    :param q2:
    :param independent: sort independently or not
    :param weight:
    :return: a tuple of two Series
    '''
    comb = data_for_bivariate(v1,
                              v2,
                              q1,
                              q2,
                              independent=independent,
                              **kwargs)

    if weight:
        s = comb.groupby([
            't', 'g1', 'g2'
        ]).apply(lambda df: my_average(df, 'stockEretM', wname='weight'))
    else:
        s = comb.groupby(['t', 'g1', 'g2'])['stockEretM'].mean()

    panel1 = s.groupby(['t', 'g1']).mean().unstack(level='g1')
    factor1 = panel1[panel1.columns.max()] - panel1[1]

    panel2 = s.groupby(['t', 'g2']).mean().unstack(level='g2')
    factor2 = panel2[panel2.columns.max()] - panel2[1]
    return factor1, factor2
Beispiel #13
0
def indicatorDf_to_10_assets(indicatorDf, indicatorName):
    sampleControl = False
    q = 10

    # data lagged
    s = indicatorDf.stack()
    s.name = indicatorName
    weight = Database(sample_control=sampleControl).by_indicators(['weight'])
    datalagged = pd.concat([s, weight], axis=1)
    datalagged = datalagged.groupby('sid').shift(1)

    # data t
    datat = Database(sample_control=sampleControl).by_indicators(['stockEretM'])
    comb = pd.concat([datalagged, datat], axis=1)
    comb = comb.dropna()

    comb['g'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[indicatorName], q))

    assets = comb.groupby(['t', 'g']).apply(
        lambda df: my_average(df, 'stockEretM', wname='weight')) \
        .unstack(level=['g'])
    return assets
Beispiel #14
0
 def _get_eret(self, comb):
     group_eavg_ts = comb.groupby(['g1', 'g2', 't'])['stockEretM'].mean()
     group_wavg_ts = comb.groupby(
         ['g1', 'g2',
          't']).apply(lambda df: my_average(df, 'stockEretM', 'weight'))
     return group_eavg_ts, group_wavg_ts