def get_bm(): ''' this function can be bookmarked as a snippet of how to manipulate date index in Pandas A little different with the book,here we use be and me for one share, but the data in the book is for all floating shares.However,it doesn't affect the bm. :return: ''' # be=load_data('bps') be = read_unfiltered('bps') be = be[be.index.month == 12] me = read_unfiltered('stockCloseY') # me=load_data('stockCloseY') be, me = get_inter_frame([be, me]) # me[me<=0]=np.nan bm = be / me bm[bm <= 0] = np.nan #delete those samples with bm<0 bm = quaterly2monthly(bm, shift='6M') logbm = np.log(bm) bm = bm.stack() logbm = logbm.stack() x = pd.concat([bm, logbm], axis=1, keys=['bm', 'logbm']) x.index.names = ['t', 'sid'] x.columns.name = 'type' save(x, 'value')
def _get_comb(): #page 123 combs=[] for freq in ['D','M']: eret=read_unfiltered('stockEret'+freq).stack() # eret=load_data('stockEret'+freq).stack() eret.name='eret' rp=read_unfiltered('rp'+freq) # rp=load_data('rp'+freq) rp.name='rp' comb=eret.to_frame().join(rp) combs.append(comb) return tuple(combs)
def get_arg_list(): arg_list = [] for freq in ['D', 'M']: eret = read_unfiltered('stockEret' + freq) # TODO: filtered or unfiltered? rp = read_unfiltered('rp' + freq) comb = pd.concat([rp, eret], axis=1) if freq == 'D': windows = [20, 60, 120, 240, 480] else: windows = [12, 24, 40, 60] for w in windows: arg_list.append(Arg(comb, _cal_beta1, w, freq)) return arg_list
def control_sid(conditions): ''' is_sz is_sh is_gem 创业板 is_cross not_financial is_industry :param conditions: :return:a list of stock codes ''' #TODO: is_gem,is_industry, condition_set=['is_sz','is_sh','not_cross','not_financial'] info=read_unfiltered('listInfo') def _one_condition(condition): if condition in condition_set: sids=info[info[condition]].index.tolist() return sids else: raise ValueError('The "conditions" should be one of {}'.format(repr(condition_set))) if isinstance(conditions,str): return _one_condition(conditions) elif isinstance(conditions, list): l_sids=[_one_condition(con) for con in conditions] return sorted(list(set.intersection(*map(set,l_sids)))) else: raise MyError('no such conditon as {}'.format(conditions))
def __init__(self, sample_control=True): if sample_control: self._data = read_filtered('data_controlled') else: self._data = read_filtered('data') self.info = read_unfiltered('info') self.all_indicators = [ele for l in self.info.values() for ele in l]
def cal_sizes(): # mktCap=load_data('capM') mktCap = read_unfiltered('capM') mktCap[mktCap <= 0] = np.nan size = np.log(mktCap) junes = [m for m in mktCap.index.tolist() if m.month == 6] newIndex = pd.date_range(start=junes[0], end=mktCap.index[-1], freq='M') junesDf = mktCap.loc[junes] mktCap_ff = junesDf.reindex(index=newIndex) mktCap_ff = mktCap_ff.ffill( limit=11) # limit=11 is required,or it will fill all NaNs forward. size_ff = np.log(mktCap_ff) size = size.stack() size.name = 'size' mktCap_ff = mktCap_ff.stack() mktCap_ff.name = 'mktCap_ff' size_ff = size_ff.stack() size_ff.name = 'size_ff' mktCap = mktCap.stack() mktCap.name = 'mktCap' # combine x = pd.concat([mktCap, mktCap_ff, size, size_ff], axis=1) x.index.names = ['t', 'sid'] x.columns.name = 'type' save(x, 'size')
def cross_year_after_list(freq='M'): ''' listed at list 1 year :return:DataFrame filled with True or False ''' listInfo=read_unfiltered('listInfo') listInfo['year_later']=listInfo['listDate']+pd.offsets.DateOffset(years=1) if freq=='M': listInfo['year_later']=listInfo['year_later']+MonthEnd(1) # 1 rather than 0,exclude the first month,since most of # year_later won't be monthend. else: listInfo['year_later']=listInfo['year_later']+\ pd.offsets.DateOffset(days=1) mask=listInfo[['year_later']].copy() mask.columns=['t'] mask['bool']=True mask=mask.reset_index().set_index(['t','sid'])['bool'] mask=mask.unstack() mask=mask.reindex(index=pd.Index(pd.date_range( mask.index[0],mask.index[-1],freq=freq),name=mask.index.name)) mask=mask.ffill() mask=mask.fillna(value=False) # replace nan or None with False return mask
def cross_size_groups(freq='M'): ''' 'all-but-tiny' stocks are those larger than the NYSE 20th percentile and 'large' stocks are those larger than the NYSE 50th percentile based on market equity at the beginning of the month.Fama and French (2008) suggest usign these groups as a simple way to check whether predictability is driven by micro-cap stocks or also exists among the economically more important population of large stocks. references: Lewellen, J. (2015). The Cross-section of Expected Stock Returns. Critical Finance Review 4, 1–44. :return:three DataFrames filled with True or False ''' p1=0.3 p2=0.7 size=read_unfiltered('capM') floors=size.quantile(p1,axis=1) roofs=size.quantile(p2,axis=1) small=[] medium=[] big=[] for t,s in size.iterrows(): f=floors[t] r=roofs[t] small.append(s<f) medium.append((f<s) & (f<r)) big.append(s>=r) small=pd.concat(small,axis=1,keys=size.index).T medium=pd.concat(medium,axis=1,keys=size.index).T big=pd.concat(big,axis=1,keys=size.index).T return small,medium,big
def select_a_model(): sharpe = pd.read_pickle(os.path.join(direc, 'sharpe.pkl')) indicator = sharpe['indicator'][0] factor = pd.read_pickle(os.path.join(factorPath, indicator + '.pkl')) ff3 = read_unfiltered('ff3M') model = pd.concat([ff3[['rp', 'smb']], factor], axis=1) model = model.dropna() return model
def calculate_beta(): args_list = [] for freq in ['D', 'M']: eret = read_unfiltered('stockEret' + freq) # TODO: filtered or unfiltered? rp = read_unfiltered('rp' + freq) comb = pd.concat([rp, eret], axis=1) if freq == 'D': windows = [20, 60, 120, 240, 480] else: windows = [12, 24, 40, 60] for w in windows: args_list.append((freq, comb, w)) ss = multiprocessing.Pool(4).map(task, args_list) df = pd.concat(ss, axis=1, keys=['{}{}'.format(a[0], a[2]) for a in args_list]) df = df.unstack('sid').resample('M').last().stack() # convert to monthly save(df, 'beta', sort_axis=False)
def cross_closePrice_floor(clsPrice=5.0,freq='M'): ''' delete penny stocks the minimum close price is 5 :param clsPrice: :param freq: :return:DataFrame filled with True or False ''' stockClose=read_unfiltered('stockClose' + freq) return stockClose>clsPrice
def compare_wind_gta_bps(): ''' the result is different a lot!!! :return: ''' bps_wind = read_unfiltered('bps_wind') # bps_wind=load_data('bps_wind') # bps=load_data('bps') bps = read_unfiltered('bps') # bps_wind.columns=[str(int(col[:-3])) for col in bps_wind.columns] #this method will lead to the missing of columns.name bps_wind.columns = pd.Index( [str(int(col[:-3])) for col in bps_wind.columns], name=bps_wind.columns.name) bps_wind = bps_wind.sort_index(axis=1) bps = bps.sort_index(axis=1) bps_wind, bps = get_inter_frame([bps_wind, bps]) detect_outliers(bps_wind, 'a1') detect_outliers(bps, 'a2')
def cal_market_states(): ''' market states: search for 'market state' in zoter 1. Cheema and Nartea, “Momentum Returns, Market States, and Market Dynamics.” chapter 3.1: Following Chui et al. (2010), we set stocks with monthly returns greater (lower) than 100 (−95) percent equal to 100 (−95) percent to avoid the influence of extreme returns and any possible data recording errors. :return: ''' upDown = read_unfiltered('upDown') pass
def cross_is_normal(freq='M'): ''' trading status is normal or not Args: freq: Returns: DataFrame, contains True or False,denoteing the tradint status of the the stocks in a given time. ''' status=read_unfiltered('tradingStatus{}'.format(freq)) return status.fillna(value=False)
def get_ff6(): v1='size__size' v2='momentum__r12' smb,mom=two_sorting_factor(v1,v2,2,[0,0.3,0.7,1.0],sample_control=False, independent=True) mom.index.name='t' mom.name='mom' ff5=read_unfiltered('ff5M') ff6=pd.concat([ff5,mom],axis=1) ff6=ff6.dropna() ff6.columns.name='type' save(ff6,'ff6M')
def _get_comb(): ''' page 321 :return: ''' retD = read_unfiltered('stockRetD') retD = retD.stack() retD.index.names = ['t', 'sid'] retD.name = 'ret' eretD = read_unfiltered('stockEretD') eretD = eretD.stack() eretD.index.names = ['t', 'sid'] eretD.name = 'eret' ff3D = read_unfiltered('ff3D') mktD = read_unfiltered('mktRetD').to_frame() mktD.columns = ['mkt'] mktD['mkt_square'] = mktD['mkt']**2 multi_comb_D = pd.concat([eretD, retD], axis=1) single_comb_D = pd.concat([mktD, ff3D], axis=1) combD = multi_comb_D.join(single_comb_D) #monthly retM = read_unfiltered('stockRetM') retM = retM.stack() retM.index.names = ['t', 'sid'] retM.name = 'ret' eretM = read_unfiltered('stockEretM') eretM = eretM.stack() eretM.index.names = ['t', 'sid'] eretM.name = 'eret' ff3M = read_unfiltered('ff3M') mktM = read_unfiltered('mktRetM').to_frame() mktM.columns = ['mkt'] mktM['mkt_square'] = mktM['mkt']**2 multi_comb_M = pd.concat([eretM, retM], axis=1) single_comb_M = pd.concat([mktM, ff3M], axis=1) combM = multi_comb_M.join(single_comb_M) return combD, combM
def get_upDown(): ''' 2. Cooper Michael J., Gutierrez Roberto C., and Hameed Allaudeen, “Market States and Momentum.” :return: ''' mktRetM = read_unfiltered('mktRetM') windows = [12, 24, 36] series = [] for window in windows: s = mktRetM.rolling(window=window).sum() s = s.shift(1) s[s > 0] = 1 s[s < 0] = -1 series.append(s) upDown = pd.concat(series, axis=1, keys=['{}M'.format(w) for w in windows]) upDown.columns.name = 'type' save(upDown, 'marketStates')
def _get_comb(): eretD=read_unfiltered('stockEretD') eretD = eretD.stack() eretD.index.names = ['t', 'sid'] eretD.name = 'ret' ff3D=read_unfiltered('ff3D') mktD=read_unfiltered('mktRetD') mktD.columns=['mkt'] combD = eretD.to_frame().join(ff3D) combD=combD.join(mktD) eretM=read_unfiltered('stockEretM') eretM = eretM.stack() eretM.index.names = ['t', 'sid'] eretM.name = 'ret' ffcM=read_unfiltered('ffcM') mktM=read_unfiltered('mktRetM') mktM.columns=['mkt'] combM = eretM.to_frame().join(ffcM) combM=combM.join(mktM) return combD,combM
def get_rev(): stockRetM = read_unfiltered('stockRetM') # stockRetM=load_data('stockRetM') rev = stockRetM * 100 save(rev, 'reversal')
def get_liquidity_ps(): df = read_gta('Liq_PSM_M') #MarketType==21 综合A股和创业板 # 流通市值加权,but on the page 310,Bali use total market capilization condition1 = (df['MarketType'] == 21) condition2 = (df['ST'] == 1) #delete the ST stocks df = df[condition1 & condition2][['Trdmnt', 'AggPS_os']] df.columns = ['t', 'rm'] df = df.set_index('t') df.index = freq_end(df.index, 'M') df = df.sort_index() df['rm_ahead'] = df['rm'].shift(1) df['delta_rm'] = df['rm'] - df['rm'].shift(1) df['delta_rm_ahead'] = df['rm_ahead'] - df['rm_ahead'].shift(1) #df.groupby(lambda x:x.year).apply(lambda df:df.shape[0]) #TODO: we don't know the length of window to regress.In this place,we use the five years history def regr(df): if df.shape[0] > 30: return sm.ols(formula='delta_rm ~ delta_rm_ahead + rm_ahead', data=df).fit().resid[0] else: return np.NaN window = 60 # not exact 5 years lm = pd.Series( [regr(df.loc[:month][-window:].dropna()) for month in df.index], index=df.index) lm.name = 'lm' ret = read_unfiltered('stockRetM') rf = read_unfiltered('rfM') eret = ret.sub(rf['rf'], axis=0) eret = eret.stack() eret.index.names = ['t', 'sid'] eret.name = 'eret' ff3 = read_unfiltered('ff3M') factors = pd.concat([ff3, lm], axis=1) comb = eret.to_frame().join(factors) def _for_one_month(df): if df.shape[0] >= 30: return sm.ols(formula='eret ~ rp + smb + hml + lm', data=df).fit().params['lm'] else: return np.NaN def _get_result(df): thresh = 30 #30 month if df.shape[0] > thresh: values = [] sid = df.index[0][1] df = df.reset_index(level='sid', drop=True) months = df.index.tolist()[thresh:] for month in months: subdf = df.loc[:month][-60:] subdf = subdf.dropna() # df=df.reset_index(level='sid',drop=True).loc[:month].last(window) values.append(_for_one_month(subdf)) print(sid) return pd.Series(values, index=months) result = comb.groupby('sid').apply(_get_result).unstack('sid') save(result, 'liqBeta')
def handle_outliers(tbname): x=read_unfiltered(tbname) detect_outliers(x,tbname) x1=delete_outliers(x,'mad',6) detect_outliers(x1,'filtered_'+tbname) save_to_filtered(x1,tbname)