def single_sorting_factor(indicator, q, weight=False): # method1 independent way ''' This function is used to construct a new factor by a given indicator. We first group stocks into "q" portfolios based on the rank of "indicator" every month.Then,at the next month we calculate the corresponding monthly value-weighted (if weight is True) portfolio return.The factor return is the spread between the return of the top portfolio and bottom portfolio. :param indicator: :param q: :param weight: :return:Series ''' if isinstance(q, int): labels = ['g{}'.format(i) for i in range(1, q + 1)] elif isinstance(q, (list, tuple)): labels = ['g{}'.format(i) for i in range(1, len(q))] else: raise MyError('q:"{}" is wrong!'.format(repr(q))) comb = combine_with_datalagged([indicator]) comb['g'] = comb.groupby('t', group_keys=False).apply( lambda df: assign_port_id(df[indicator], q, labels)) if weight: panel=comb.groupby(['t','g']).apply( lambda df:my_average(df,'stockEretM',wname='weight'))\ .unstack(level=['g']) else: panel = comb.groupby(['t', 'g'])['stockEretM'].mean().unstack(level=['g']) factor = panel[labels[-1]] - panel[labels[0]] return factor
def construct_playingField(vars, model): ''' :param vars: list :param model: belong to {'5x5','2x4x4'} :return: ''' if model == '5x5': v1, v2 = tuple(vars) comb = data_for_bivariate(v1, v2, 5, 5, independent=True) assets=comb.groupby(['t','g1','g2']).apply( lambda df:my_average(df,'stockEretM',wname='weight'))\ .unstack(level=['g1','g2']) elif model == '2x4x4': #v1 must belong to size category v1, v2, v3 = tuple(vars) comb = combine_with_datalagged([v1, v2, v3]) comb = comb.dropna() comb['g1'] = comb.groupby('t', group_keys=False).apply( lambda df: assign_port_id(df[v1], 2, range(1, 3))) comb['g2'] = comb.groupby(['t', 'g1'], group_keys=False).apply( lambda df: assign_port_id(df[v2], 4, range(1, 5))) comb['g3'] = comb.groupby(['t', 'g1'], group_keys=False).apply( lambda df: assign_port_id(df[v3], 4, range(1, 5))) assets=comb.groupby(['t','g1','g2','g3']).apply( lambda df: my_average(df, 'stockEretM', wname='weight')) \ .unstack(level=['g1', 'g2','g3']) else: raise MyError('Model "{}" is not supported currently'.format(model)) return assets
def get_hxz4(): ''' calculate hxz4 factors,refer to din.py for details about the indicators References: Hou, K., Mo, H., Xue, C., and Zhang, L. (2018). Motivating Factors (Rochester, NY: Social Science Research Network). Returns: ''' v1 = 'size__size' v2 = 'inv__inv' #I/A v3 = 'roe__roe' # ROE comb = combine_with_datalagged([v1, v2, v3], sample_control=True) comb = comb.dropna() comb['g1'] = comb.groupby('t', group_keys=False).apply( lambda df: assign_port_id(df[v1], 2, range(1, 3))) comb['g2'] = comb.groupby(['t', 'g1'], group_keys=False).apply( lambda df: assign_port_id(df[v2], [0, 0.3, 0.7, 1.0], range(1, 4))) comb['g3'] = comb.groupby(['t', 'g1'], group_keys=False).apply( lambda df: assign_port_id(df[v3], [0, 0.3, 0.7, 1.0], range(1, 4))) assets = comb.groupby( ['t', 'g1', 'g2', 'g3']).apply(lambda df: my_average(df, 'stockEretM', wname='weight')) df1 = assets.groupby(['t', 'g1']).mean().unstack(level='g1') smb = df1[1] - df1[2] df2 = assets.groupby(['t', 'g2']).mean().unstack(level='g2') ria = df2[3] - df2[1] df3 = assets.groupby(['t', 'g3']).mean().unstack(level='g3') roe = df3[3] - df2[1] rp = load_data('rpM') hxz4 = pd.concat([rp, smb, ria, roe], axis=1, keys=['rp', 'smb', 'ria', 'roe']) hxz4.columns.name = 'type' hxz4 = hxz4.dropna() save(hxz4, 'hxz4M')
def get_single_sorting_assets(indicator, q, weight=True): if isinstance(q, int): labels = ['g{}'.format(i) for i in range(1, q + 1)] elif isinstance(q, (list, tuple)): labels = ['g{}'.format(i) for i in range(1, len(q))] else: raise MyError('q:"{}" is wrong!'.format(repr(q))) comb = combine_with_datalagged([indicator]) comb['g'] = comb.groupby('t', group_keys=False).apply( lambda df: assign_port_id(df[indicator], q, labels)) if weight: assets=comb.groupby(['t','g']).apply( lambda df:my_average(df,'stockEretM',wname='weight'))\ .unstack(level=['g']) else: assets = comb.groupby(['t', 'g'])['stockEretM'].mean().unstack(level=['g']) return assets
def three_sorting_factor(v1, v2, v3, q1, q2, q3, weight=True): ''' v1 and v2 are independent,v3 is conditional on v1 and v2 reference: page 18 of Pan, L., Tang, Y., and Xu, J. (2016). Speculative Trading and Stock Returns. Review of Finance 20, 1835–1865. ''' comb = combine_with_datalagged([v1, v2, v3]) comb = comb.dropna() comb['g1'] = comb.groupby('t', group_keys=False).apply( lambda df: assign_port_id(df[v1], q1, range(1, q1 + 1))) comb['g2'] = comb.groupby('t', group_keys=False).apply( lambda df: assign_port_id(df[v2], q2, range(1, q2 + 1))) # g3 is conditional on g1 and g2 comb['g3'] = comb.groupby(['t', 'g1', 'g2'], group_keys=False).apply( lambda df: assign_port_id(df[v3], q3, range(1, q3 + 1))) if weight: s = comb.groupby([ 't', 'g1', 'g2', 'g3' ]).apply(lambda df: my_average(df, 'stockEretM', wname='weight')) else: s = comb.groupby(['t', 'g1', 'g2', 'g3'])['stockEretM'].mean() panel1 = s.groupby(['t', 'g1']).mean().unstack(level='g1') factor1 = panel1[q1] - panel1[1] panel2 = s.groupby(['t', 'g2']).mean().unstack(level='g2') factor2 = panel2[q2] - panel2[1] panel3 = s.groupby(['t', 'g3']).mean().unstack(level='g3') factor3 = panel3[q3] - panel3[1] return factor1, factor2, factor3
def data_for_bivariate(v1, v2, q1, q2, independent=True, **kwargs): comb = combine_with_datalagged([v1, v2], **kwargs) comb = comb.dropna() if independent: comb['g1'] = comb.groupby( 't', group_keys=False).apply(lambda df: assign_port_id(df[v1], q1)) comb['g2'] = comb.groupby( 't', group_keys=False).apply(lambda df: assign_port_id(df[v2], q2)) else: #dependent ''' v2 is conditional on v1,that is,we first group stocks into n1 portfolios based on the rank of v1,and then we will group each of the n1 portfolios into n2 portfolios based on v2 ''' comb['g1'] = comb.groupby( 't', group_keys=False).apply(lambda df: assign_port_id(df[v1], q1)) comb['g2'] = comb.groupby( ['t', 'g1'], group_keys=False).apply(lambda df: assign_port_id(df[v2], q2)) return comb