def single_sorting_factor(indicator, q, weight=False):
    # method1 independent way
    '''
    This function is used to construct a new factor by a given indicator.
    We first group stocks into "q" portfolios based on the rank of "indicator"
    every month.Then,at the next month we calculate the corresponding monthly
    value-weighted (if weight is True) portfolio return.The factor return is
    the spread between the return of the top portfolio and bottom portfolio.

    :param indicator:
    :param q:
    :param weight:
    :return:Series
    '''
    if isinstance(q, int):
        labels = ['g{}'.format(i) for i in range(1, q + 1)]
    elif isinstance(q, (list, tuple)):
        labels = ['g{}'.format(i) for i in range(1, len(q))]
    else:
        raise MyError('q:"{}"  is wrong!'.format(repr(q)))

    comb = combine_with_datalagged([indicator])
    comb['g'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[indicator], q, labels))

    if weight:
        panel=comb.groupby(['t','g']).apply(
            lambda df:my_average(df,'stockEretM',wname='weight'))\
            .unstack(level=['g'])
    else:
        panel = comb.groupby(['t',
                              'g'])['stockEretM'].mean().unstack(level=['g'])

    factor = panel[labels[-1]] - panel[labels[0]]
    return factor
Exemple #2
0
def construct_playingField(vars, model):
    '''
    :param vars: list
    :param model: belong to {'5x5','2x4x4'}
    :return:
    '''
    if model == '5x5':
        v1, v2 = tuple(vars)
        comb = data_for_bivariate(v1, v2, 5, 5, independent=True)
        assets=comb.groupby(['t','g1','g2']).apply(
            lambda df:my_average(df,'stockEretM',wname='weight'))\
            .unstack(level=['g1','g2'])
    elif model == '2x4x4':
        #v1 must belong to size category
        v1, v2, v3 = tuple(vars)
        comb = combine_with_datalagged([v1, v2, v3])
        comb = comb.dropna()
        comb['g1'] = comb.groupby('t', group_keys=False).apply(
            lambda df: assign_port_id(df[v1], 2, range(1, 3)))

        comb['g2'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
            lambda df: assign_port_id(df[v2], 4, range(1, 5)))

        comb['g3'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
            lambda df: assign_port_id(df[v3], 4, range(1, 5)))

        assets=comb.groupby(['t','g1','g2','g3']).apply(
            lambda df: my_average(df, 'stockEretM', wname='weight')) \
            .unstack(level=['g1', 'g2','g3'])
    else:
        raise MyError('Model "{}" is not supported currently'.format(model))

    return assets
Exemple #3
0
def get_hxz4():
    '''
    calculate hxz4 factors,refer to din.py for details about the indicators

    References:
        Hou, K., Mo, H., Xue, C., and Zhang, L. (2018). Motivating Factors (Rochester, NY: Social Science Research Network).

    Returns:

    '''
    v1 = 'size__size'
    v2 = 'inv__inv'  #I/A
    v3 = 'roe__roe'  # ROE

    comb = combine_with_datalagged([v1, v2, v3], sample_control=True)
    comb = comb.dropna()

    comb['g1'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v1], 2, range(1, 3)))

    comb['g2'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
        lambda df: assign_port_id(df[v2], [0, 0.3, 0.7, 1.0], range(1, 4)))

    comb['g3'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
        lambda df: assign_port_id(df[v3], [0, 0.3, 0.7, 1.0], range(1, 4)))

    assets = comb.groupby(
        ['t', 'g1', 'g2',
         'g3']).apply(lambda df: my_average(df, 'stockEretM', wname='weight'))

    df1 = assets.groupby(['t', 'g1']).mean().unstack(level='g1')
    smb = df1[1] - df1[2]

    df2 = assets.groupby(['t', 'g2']).mean().unstack(level='g2')
    ria = df2[3] - df2[1]

    df3 = assets.groupby(['t', 'g3']).mean().unstack(level='g3')
    roe = df3[3] - df2[1]

    rp = load_data('rpM')
    hxz4 = pd.concat([rp, smb, ria, roe],
                     axis=1,
                     keys=['rp', 'smb', 'ria', 'roe'])
    hxz4.columns.name = 'type'
    hxz4 = hxz4.dropna()
    save(hxz4, 'hxz4M')
def get_single_sorting_assets(indicator, q, weight=True):
    if isinstance(q, int):
        labels = ['g{}'.format(i) for i in range(1, q + 1)]
    elif isinstance(q, (list, tuple)):
        labels = ['g{}'.format(i) for i in range(1, len(q))]
    else:
        raise MyError('q:"{}"  is wrong!'.format(repr(q)))

    comb = combine_with_datalagged([indicator])
    comb['g'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[indicator], q, labels))

    if weight:
        assets=comb.groupby(['t','g']).apply(
            lambda df:my_average(df,'stockEretM',wname='weight'))\
            .unstack(level=['g'])
    else:
        assets = comb.groupby(['t',
                               'g'])['stockEretM'].mean().unstack(level=['g'])
    return assets
def three_sorting_factor(v1, v2, v3, q1, q2, q3, weight=True):
    '''

    v1 and v2 are independent,v3 is conditional on v1 and v2

    reference:
        page 18 of Pan, L., Tang, Y., and Xu, J. (2016).
        Speculative Trading and Stock Returns. Review of Finance 20, 1835–1865.


    '''

    comb = combine_with_datalagged([v1, v2, v3])
    comb = comb.dropna()

    comb['g1'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v1], q1, range(1, q1 + 1)))

    comb['g2'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v2], q2, range(1, q2 + 1)))

    # g3 is conditional on g1 and g2
    comb['g3'] = comb.groupby(['t', 'g1', 'g2'], group_keys=False).apply(
        lambda df: assign_port_id(df[v3], q3, range(1, q3 + 1)))

    if weight:
        s = comb.groupby([
            't', 'g1', 'g2', 'g3'
        ]).apply(lambda df: my_average(df, 'stockEretM', wname='weight'))
    else:
        s = comb.groupby(['t', 'g1', 'g2', 'g3'])['stockEretM'].mean()

    panel1 = s.groupby(['t', 'g1']).mean().unstack(level='g1')
    factor1 = panel1[q1] - panel1[1]

    panel2 = s.groupby(['t', 'g2']).mean().unstack(level='g2')
    factor2 = panel2[q2] - panel2[1]

    panel3 = s.groupby(['t', 'g3']).mean().unstack(level='g3')
    factor3 = panel3[q3] - panel3[1]
    return factor1, factor2, factor3
def data_for_bivariate(v1, v2, q1, q2, independent=True, **kwargs):
    comb = combine_with_datalagged([v1, v2], **kwargs)
    comb = comb.dropna()

    if independent:
        comb['g1'] = comb.groupby(
            't', group_keys=False).apply(lambda df: assign_port_id(df[v1], q1))

        comb['g2'] = comb.groupby(
            't', group_keys=False).apply(lambda df: assign_port_id(df[v2], q2))
    else:  #dependent
        '''
        v2 is conditional on v1,that is,we first group stocks into n1 portfolios
        based on the rank of v1,and then we will group each of the n1 portfolios
        into n2 portfolios based on v2
        '''
        comb['g1'] = comb.groupby(
            't', group_keys=False).apply(lambda df: assign_port_id(df[v1], q1))

        comb['g2'] = comb.groupby(
            ['t', 'g1'],
            group_keys=False).apply(lambda df: assign_port_id(df[v2], q2))

    return comb