Ejemplo n.º 1
0
def get_torate(universe, start_time, end_time):
    '''
    获取换手率,使用当天交易量/流通股数来计算
    '''
    volume = query('TO_VOLUME', (start_time, end_time))
    float_shares = query('FLOAT_SHARE', (start_time, end_time))
    res = volume / float_shares
    res = res.loc[:, sorted(universe)]
    assert checkdata_completeness(res, start_time, end_time), "Error, data missed!"
    return res
Ejemplo n.º 2
0
 def inner(universe, start_time, end_time):
     adj_factor = query('ADJ_FACTOR', (start_time, end_time))
     data = query(price_type, (start_time, end_time))
     assert len(adj_factor) == len(data), "Error, basic data length does not  match! " + \
         "adj_factor data = {sd_len}, while close data = {cd_len}".format(sd_len=len(adj_factor),
                                                                          cd_len=len(data))
     res = adj_factor * data
     res = res.loc[:, sorted(universe)]
     assert checkdata_completeness(res, start_time, end_time), "Error, data missed!"
     return res
Ejemplo n.º 3
0
 def _inner(universe, start_time, end_time):
     share_data = query(share_factor_name, (start_time, end_time))
     close_data = query('CLOSE', (start_time, end_time))
     assert len(share_data) == len(close_data), "Error, basic data length does not  match! " + \
         "share data = {sd_len}, while close data = {cd_len}".format(sd_len=len(share_data),
                                                                     cd_len=len(close_data))
     res = share_data * close_data
     res = res.loc[:, sorted(universe)]
     assert checkdata_completeness(res, start_time, end_time), "Error, data missed!"
     return res
Ejemplo n.º 4
0
def get_valid_mask(start_time, end_time):
    '''
    获取给定期间内(包含起始时间)数据是否有效的掩码。

    Parameter
    ---------
    start_time: datetime like
        开始时间
    end_time: datetime like
        结束时间

    Return
    ------
    out: pd.DataFrame
        index为时间,columns为股票代码

    Notes
    -----
    数据是否有效是根据当前股票是否退市或者终止上市来判断的,凡是LIST_STATUS为退市或者终止上市(3和4)
    状态的股票均被视作为无效数据,即False
    '''
    from fmanager.factors.query import query
    ls_status = query('LIST_STATUS', (start_time, end_time))
    valid_mask = np.logical_or(ls_status == 1, ls_status == 2)
    return valid_mask
Ejemplo n.º 5
0
def get_lntotalmktv(universe, start_time, end_time):
    '''
    对数总市值
    '''
    tmktv = query('TOTAL_MKTVALUE', (start_time, end_time))
    data = np.log(tmktv)
    data = data.loc[:, sorted(universe)]
    assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
    return data
Ejemplo n.º 6
0
def get_lnfloatmktv(universe, start_time, end_time):
    '''
    对数流通市值
    '''
    fmktv = query('FLOAT_MKTVALUE', (start_time, end_time))
    data = np.log(fmktv)
    data = data.loc[:, sorted(universe)]
    assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
    return data
Ejemplo n.º 7
0
def get_dailyret(universe, start_time, end_time):
    '''
    获取日收益率,使用后复权收盘价计算
    '''
    new_start = pd.to_datetime(start_time) - pd.Timedelta('30 day')
    data = query('ADJ_CLOSE', (new_start, end_time))
    data = data.pct_change()
    mask = data.index >= start_time
    data = data.loc[mask, sorted(universe)]
    assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
    return data
Ejemplo n.º 8
0
def get_avgtorate(universe, start_time, end_time):
    '''
    指过去20个交易日平均换手率
    '''
    start_time = pd.to_datetime(start_time)
    new_start = start_time - pd.Timedelta('60 day')
    daily_torate = query('TO_RATE', (new_start, end_time))
    data = daily_torate.rolling(20, min_periods=20).mean().dropna(how='all')
    mask = (data.index >= start_time) & (data.index <= end_time)
    data = data.loc[mask, sorted(universe)]
    if start_time > pd.to_datetime(START_TIME):     # 第一次更新从START_TIME开始,必然会有缺失数据
        assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
    return data
Ejemplo n.º 9
0
 def inner(universe, start_time, end_time):
     start_time = pd.to_datetime(start_time)
     threshold = 10e-6
     new_start = dateshandle.tds_shift(start_time, offset)
     daily_torate = query('TO_RATE', (new_start, end_time))
     data = daily_torate.rolling(offset, min_periods=offset).sum().dropna(how='all')
     data[data <= threshold] = np.NaN
     data = data / month_num
     data = np.log(data)
     mask = (data.index >= start_time) & (data.index <= end_time)
     data = data.loc[mask, sorted(universe)]
     if start_time > pd.to_datetime(START_TIME):     # 第一次更新从START_TIME开始,必然会有缺失数据
         assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
     return data
Ejemplo n.º 10
0
def get_nonlinearmktv(universe, start_time, end_time):
    '''
    非线性市值,并不针对市值数据进行正交化
    '''
    lnmktv = query('LN_TMKV', (start_time, end_time))

    # def get_nlsize(df):
    #     # 市值3次方然后通过OLS来获取与市值正交的残差
    #     raw_index = df.index
    #     df = df.dropna()
    #     nonlsize = np.power(df, 3)
    #     mod = OLS(nonlsize, add_constant(df))
    #     mod_res = mod.fit()
    #     return mod_res.resid.reindex(raw_index)
    # data = lnmktv.apply(get_nlsize, axis=1)
    data = np.power(lnmktv, 3)
    data = data.loc[:, sorted(universe)]
    assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
    return data