Python read_unfiltered 예제들, data.dataTools.read_unfiltered Python 예제들

예제 #1

0

파일 보기

def get_bm():
    '''
    this function can be bookmarked as a snippet of how to manipulate date index
    in Pandas

    A little different with the book,here we use be and me for one share,
    but the data in the book is for all floating shares.However,it doesn't
    affect the bm.

    :return:
    '''
    # be=load_data('bps')
    be = read_unfiltered('bps')
    be = be[be.index.month == 12]
    me = read_unfiltered('stockCloseY')
    # me=load_data('stockCloseY')
    be, me = get_inter_frame([be, me])
    # me[me<=0]=np.nan
    bm = be / me
    bm[bm <= 0] = np.nan  #delete those samples with bm<0
    bm = quaterly2monthly(bm, shift='6M')
    logbm = np.log(bm)

    bm = bm.stack()
    logbm = logbm.stack()
    x = pd.concat([bm, logbm], axis=1, keys=['bm', 'logbm'])
    x.index.names = ['t', 'sid']
    x.columns.name = 'type'

    save(x, 'value')

예제 #2

0

파일 보기

파일: 1 beta.py 프로젝트: zhanghaitao1/assetPricing2

def _get_comb():
    #page 123
    combs=[]
    for freq in ['D','M']:
        eret=read_unfiltered('stockEret'+freq).stack()
        # eret=load_data('stockEret'+freq).stack()
        eret.name='eret'
        rp=read_unfiltered('rp'+freq)
        # rp=load_data('rp'+freq)
        rp.name='rp'
        comb=eret.to_frame().join(rp)
        combs.append(comb)
    return tuple(combs)

예제 #3

0

파일 보기

파일: 1 beta_new.py 프로젝트: zhanghaitao1/assetPricing2

def get_arg_list():
    arg_list = []
    for freq in ['D', 'M']:
        eret = read_unfiltered('stockEret' +
                               freq)  # TODO: filtered or unfiltered?
        rp = read_unfiltered('rp' + freq)
        comb = pd.concat([rp, eret], axis=1)
        if freq == 'D':
            windows = [20, 60, 120, 240, 480]
        else:
            windows = [12, 24, 40, 60]
        for w in windows:
            arg_list.append(Arg(comb, _cal_beta1, w, freq))
    return arg_list

예제 #4

0

파일 보기

파일: sampleControl.py 프로젝트: zhanghaitao1/assetPricing2

def control_sid(conditions):
    '''
    is_sz
    is_sh
    is_gem 创业板
    is_cross
    not_financial
    is_industry

    :param conditions:
    :return:a list of stock codes
    '''
    #TODO: is_gem,is_industry,
    condition_set=['is_sz','is_sh','not_cross','not_financial']
    info=read_unfiltered('listInfo')

    def _one_condition(condition):
        if condition in condition_set:
            sids=info[info[condition]].index.tolist()
            return sids
        else:
            raise ValueError('The "conditions" should be one of {}'.format(repr(condition_set)))

    if isinstance(conditions,str):
        return _one_condition(conditions)
    elif isinstance(conditions, list):
        l_sids=[_one_condition(con) for con in conditions]
        return sorted(list(set.intersection(*map(set,l_sids))))
    else:
        raise MyError('no such conditon as {}'.format(conditions))

예제 #5

0

파일 보기

 def __init__(self, sample_control=True):
     if sample_control:
         self._data = read_filtered('data_controlled')
     else:
         self._data = read_filtered('data')
     self.info = read_unfiltered('info')
     self.all_indicators = [ele for l in self.info.values() for ele in l]

예제 #6

0

파일 보기

파일: 2 size.py 프로젝트: zhanghaitao1/assetPricing2

def cal_sizes():
    # mktCap=load_data('capM')
    mktCap = read_unfiltered('capM')
    mktCap[mktCap <= 0] = np.nan
    size = np.log(mktCap)

    junes = [m for m in mktCap.index.tolist() if m.month == 6]
    newIndex = pd.date_range(start=junes[0], end=mktCap.index[-1], freq='M')
    junesDf = mktCap.loc[junes]
    mktCap_ff = junesDf.reindex(index=newIndex)
    mktCap_ff = mktCap_ff.ffill(
        limit=11)  # limit=11 is required,or it will fill all NaNs forward.

    size_ff = np.log(mktCap_ff)

    size = size.stack()
    size.name = 'size'
    mktCap_ff = mktCap_ff.stack()
    mktCap_ff.name = 'mktCap_ff'
    size_ff = size_ff.stack()
    size_ff.name = 'size_ff'

    mktCap = mktCap.stack()
    mktCap.name = 'mktCap'
    # combine
    x = pd.concat([mktCap, mktCap_ff, size, size_ff], axis=1)
    x.index.names = ['t', 'sid']
    x.columns.name = 'type'

    save(x, 'size')

예제 #7

0

파일 보기

파일: sampleControl.py 프로젝트: zhanghaitao1/assetPricing2

def cross_year_after_list(freq='M'):
    '''
    listed at list 1 year
    :return:DataFrame filled with True or False
    '''
    listInfo=read_unfiltered('listInfo')
    listInfo['year_later']=listInfo['listDate']+pd.offsets.DateOffset(years=1)
    if freq=='M':
        listInfo['year_later']=listInfo['year_later']+MonthEnd(1)
        # 1 rather than 0,exclude the first month,since most of
        # year_later won't be monthend.
    else:
        listInfo['year_later']=listInfo['year_later']+\
                               pd.offsets.DateOffset(days=1)

    mask=listInfo[['year_later']].copy()
    mask.columns=['t']
    mask['bool']=True
    mask=mask.reset_index().set_index(['t','sid'])['bool']
    mask=mask.unstack()
    mask=mask.reindex(index=pd.Index(pd.date_range(
        mask.index[0],mask.index[-1],freq=freq),name=mask.index.name))
    mask=mask.ffill()
    mask=mask.fillna(value=False) # replace nan or None with False
    return mask

예제 #8

0

파일 보기

파일: sampleControl.py 프로젝트: zhanghaitao1/assetPricing2

def cross_size_groups(freq='M'):
    '''
    'all-but-tiny' stocks are those larger than the NYSE 20th percentile and 'large'
    stocks are those larger than the NYSE 50th percentile based on market equity at
    the beginning of the month.Fama and French (2008) suggest usign these groups as
    a simple way to check whether predictability is driven by micro-cap stocks or also
    exists among the economically more important population of large stocks.

    references:
        Lewellen, J. (2015). The Cross-section of Expected Stock Returns. Critical Finance Review 4, 1–44.

    :return:three DataFrames filled with True or False
    '''
    p1=0.3
    p2=0.7
    size=read_unfiltered('capM')
    floors=size.quantile(p1,axis=1)
    roofs=size.quantile(p2,axis=1)

    small=[]
    medium=[]
    big=[]
    for t,s in size.iterrows():
        f=floors[t]
        r=roofs[t]
        small.append(s<f)
        medium.append((f<s) & (f<r))
        big.append(s>=r)

    small=pd.concat(small,axis=1,keys=size.index).T
    medium=pd.concat(medium,axis=1,keys=size.index).T
    big=pd.concat(big,axis=1,keys=size.index).T

    return small,medium,big

예제 #9

0

파일 보기

def select_a_model():
    sharpe = pd.read_pickle(os.path.join(direc, 'sharpe.pkl'))
    indicator = sharpe['indicator'][0]
    factor = pd.read_pickle(os.path.join(factorPath, indicator + '.pkl'))

    ff3 = read_unfiltered('ff3M')
    model = pd.concat([ff3[['rp', 'smb']], factor], axis=1)
    model = model.dropna()
    return model

예제 #10

0

파일 보기

파일: 1 beta_new.py 프로젝트: zhanghaitao1/assetPricing2

def calculate_beta():
    args_list = []
    for freq in ['D', 'M']:
        eret = read_unfiltered('stockEret' +
                               freq)  # TODO: filtered or unfiltered?
        rp = read_unfiltered('rp' + freq)
        comb = pd.concat([rp, eret], axis=1)
        if freq == 'D':
            windows = [20, 60, 120, 240, 480]
        else:
            windows = [12, 24, 40, 60]
        for w in windows:
            args_list.append((freq, comb, w))

    ss = multiprocessing.Pool(4).map(task, args_list)
    df = pd.concat(ss,
                   axis=1,
                   keys=['{}{}'.format(a[0], a[2]) for a in args_list])
    df = df.unstack('sid').resample('M').last().stack()  # convert to monthly
    save(df, 'beta', sort_axis=False)

예제 #11

0

파일 보기

파일: sampleControl.py 프로젝트: zhanghaitao1/assetPricing2

def cross_closePrice_floor(clsPrice=5.0,freq='M'):
    '''
    delete penny stocks

    the minimum close price is 5

    :param clsPrice:
    :param freq:
    :return:DataFrame filled with True or False
    '''
    stockClose=read_unfiltered('stockClose' + freq)
    return stockClose>clsPrice

예제 #12

0

파일 보기

def compare_wind_gta_bps():
    '''
    the result is different a lot!!!

    :return:
    '''
    bps_wind = read_unfiltered('bps_wind')
    # bps_wind=load_data('bps_wind')
    # bps=load_data('bps')
    bps = read_unfiltered('bps')
    # bps_wind.columns=[str(int(col[:-3])) for col in bps_wind.columns] #this method will lead to the missing of columns.name
    bps_wind.columns = pd.Index(
        [str(int(col[:-3])) for col in bps_wind.columns],
        name=bps_wind.columns.name)

    bps_wind = bps_wind.sort_index(axis=1)
    bps = bps.sort_index(axis=1)
    bps_wind, bps = get_inter_frame([bps_wind, bps])

    detect_outliers(bps_wind, 'a1')
    detect_outliers(bps, 'a2')

예제 #13

0

파일 보기

파일: 9 marketStates.py 프로젝트: zhanghaitao1/assetPricing2

def cal_market_states():
    '''
    market states:
        search for 'market state' in zoter
    1. Cheema and Nartea, “Momentum Returns, Market States, and Market Dynamics.”  chapter 3.1:
    Following Chui et al. (2010), we set stocks with monthly returns greater (lower) than 100 (−95) percent equal to 100
(−95) percent to avoid the influence of extreme returns and any possible data recording errors.

    :return:
    '''
    upDown = read_unfiltered('upDown')
    pass

예제 #14

0

파일 보기

파일: sampleControl.py 프로젝트: zhanghaitao1/assetPricing2

def cross_is_normal(freq='M'):
    '''
    trading status is normal or not

    Args:
        freq:

    Returns: DataFrame, contains True or False,denoteing the tradint status 
    of the the stocks in a given time.

    '''
    status=read_unfiltered('tradingStatus{}'.format(freq))
    return status.fillna(value=False)

예제 #15

0

파일 보기

def get_ff6():
    v1='size__size'
    v2='momentum__r12'

    smb,mom=two_sorting_factor(v1,v2,2,[0,0.3,0.7,1.0],sample_control=False,
                               independent=True)
    mom.index.name='t'
    mom.name='mom'

    ff5=read_unfiltered('ff5M')
    ff6=pd.concat([ff5,mom],axis=1)
    ff6=ff6.dropna()
    ff6.columns.name='type'

    save(ff6,'ff6M')

예제 #16

0

파일 보기

def _get_comb():
    '''
    page 321

    :return:
    '''
    retD = read_unfiltered('stockRetD')
    retD = retD.stack()
    retD.index.names = ['t', 'sid']
    retD.name = 'ret'

    eretD = read_unfiltered('stockEretD')
    eretD = eretD.stack()
    eretD.index.names = ['t', 'sid']
    eretD.name = 'eret'

    ff3D = read_unfiltered('ff3D')
    mktD = read_unfiltered('mktRetD').to_frame()
    mktD.columns = ['mkt']
    mktD['mkt_square'] = mktD['mkt']**2
    multi_comb_D = pd.concat([eretD, retD], axis=1)
    single_comb_D = pd.concat([mktD, ff3D], axis=1)
    combD = multi_comb_D.join(single_comb_D)

    #monthly
    retM = read_unfiltered('stockRetM')
    retM = retM.stack()
    retM.index.names = ['t', 'sid']
    retM.name = 'ret'

    eretM = read_unfiltered('stockEretM')
    eretM = eretM.stack()
    eretM.index.names = ['t', 'sid']
    eretM.name = 'eret'

    ff3M = read_unfiltered('ff3M')
    mktM = read_unfiltered('mktRetM').to_frame()
    mktM.columns = ['mkt']
    mktM['mkt_square'] = mktM['mkt']**2
    multi_comb_M = pd.concat([eretM, retM], axis=1)
    single_comb_M = pd.concat([mktM, ff3M], axis=1)
    combM = multi_comb_M.join(single_comb_M)
    return combD, combM

예제 #17

0

파일 보기

파일: 9 marketStates.py 프로젝트: zhanghaitao1/assetPricing2

def get_upDown():
    '''
    2. Cooper Michael J., Gutierrez Roberto C., and Hameed Allaudeen, “Market States and Momentum.”

    :return:
    '''

    mktRetM = read_unfiltered('mktRetM')
    windows = [12, 24, 36]
    series = []
    for window in windows:
        s = mktRetM.rolling(window=window).sum()
        s = s.shift(1)
        s[s > 0] = 1
        s[s < 0] = -1
        series.append(s)

    upDown = pd.concat(series, axis=1, keys=['{}M'.format(w) for w in windows])
    upDown.columns.name = 'type'
    save(upDown, 'marketStates')

예제 #18

0

파일 보기

def _get_comb():
    eretD=read_unfiltered('stockEretD')
    eretD = eretD.stack()
    eretD.index.names = ['t', 'sid']
    eretD.name = 'ret'
    ff3D=read_unfiltered('ff3D')
    mktD=read_unfiltered('mktRetD')
    mktD.columns=['mkt']
    combD = eretD.to_frame().join(ff3D)
    combD=combD.join(mktD)

    eretM=read_unfiltered('stockEretM')
    eretM = eretM.stack()
    eretM.index.names = ['t', 'sid']
    eretM.name = 'ret'
    ffcM=read_unfiltered('ffcM')
    mktM=read_unfiltered('mktRetM')
    mktM.columns=['mkt']
    combM = eretM.to_frame().join(ffcM)
    combM=combM.join(mktM)
    return combD,combM

예제 #19

0

파일 보기

def get_rev():
    stockRetM = read_unfiltered('stockRetM')
    # stockRetM=load_data('stockRetM')
    rev = stockRetM * 100
    save(rev, 'reversal')

예제 #20

0

파일 보기

파일: 6 liquidity.py 프로젝트: zhanghaitao1/assetPricing2

def get_liquidity_ps():
    df = read_gta('Liq_PSM_M')
    #MarketType==21   综合A股和创业板
    # 流通市值加权，but on the page 310,Bali use total market capilization
    condition1 = (df['MarketType'] == 21)
    condition2 = (df['ST'] == 1)  #delete the ST stocks

    df = df[condition1 & condition2][['Trdmnt', 'AggPS_os']]
    df.columns = ['t', 'rm']
    df = df.set_index('t')

    df.index = freq_end(df.index, 'M')
    df = df.sort_index()
    df['rm_ahead'] = df['rm'].shift(1)
    df['delta_rm'] = df['rm'] - df['rm'].shift(1)
    df['delta_rm_ahead'] = df['rm_ahead'] - df['rm_ahead'].shift(1)

    #df.groupby(lambda x:x.year).apply(lambda df:df.shape[0])
    #TODO: we don't know the length of window to regress.In this place,we use the five years history
    def regr(df):
        if df.shape[0] > 30:
            return sm.ols(formula='delta_rm ~ delta_rm_ahead + rm_ahead',
                          data=df).fit().resid[0]
        else:
            return np.NaN

    window = 60  # not exact 5 years
    lm = pd.Series(
        [regr(df.loc[:month][-window:].dropna()) for month in df.index],
        index=df.index)
    lm.name = 'lm'

    ret = read_unfiltered('stockRetM')
    rf = read_unfiltered('rfM')
    eret = ret.sub(rf['rf'], axis=0)
    eret = eret.stack()
    eret.index.names = ['t', 'sid']
    eret.name = 'eret'

    ff3 = read_unfiltered('ff3M')
    factors = pd.concat([ff3, lm], axis=1)

    comb = eret.to_frame().join(factors)

    def _for_one_month(df):
        if df.shape[0] >= 30:
            return sm.ols(formula='eret ~ rp + smb + hml + lm',
                          data=df).fit().params['lm']
        else:
            return np.NaN

    def _get_result(df):
        thresh = 30  #30 month
        if df.shape[0] > thresh:
            values = []
            sid = df.index[0][1]
            df = df.reset_index(level='sid', drop=True)
            months = df.index.tolist()[thresh:]
            for month in months:
                subdf = df.loc[:month][-60:]
                subdf = subdf.dropna()
                # df=df.reset_index(level='sid',drop=True).loc[:month].last(window)
                values.append(_for_one_month(subdf))
            print(sid)
            return pd.Series(values, index=months)

    result = comb.groupby('sid').apply(_get_result).unstack('sid')

    save(result, 'liqBeta')

예제 #21

0

파일 보기

def handle_outliers(tbname):
    x=read_unfiltered(tbname)
    detect_outliers(x,tbname)
    x1=delete_outliers(x,'mad',6)
    detect_outliers(x1,'filtered_'+tbname)
    save_to_filtered(x1,tbname)