def inner(start_time, end_time): sql = ''' SELECT S.{share_type}, S.EndDate, M.SecuCode FROM SecuMain M, LC_ShareStru S WHERE M.CompanyCode = S.CompanyCode AND M.SecuMarket in (83, 90) AND M.SecuCategory = 1 AND M.ListedState != 9 ORDER BY M.SecuCode ASC, S.EndDate ASC '''.format(share_type=share_type) data = fetch_db_data(jydb, sql, ['data', 'time', 'symbol'], dtypes={'data': 'float64'}) data.symbol = data.symbol.apply(add_stock_suffix) data = data.drop_duplicates(['symbol', 'time' ]) # 若证券代码和时间相同则认为是相同数据,一般不会出现股本数据更正 by_symbol = data.groupby('symbol') tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time) data = by_symbol.apply(map2td, days=tds, timecol='time', fillna={'symbol': lambda x: x.symbol.iloc[0]}) data = data.pivot_table('data', index='time', columns='symbol') last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe) if not check_completeness(data.index, start_time, end_time): raise ValueError('Error, data missed!') return data
def get_liststatus(start_time, end_time): ''' 获取上市公司的上市状态,1表示正常上市,2表示暂停上市,3表示退市整理,4表示终止上市,NA表示非 正常上市状态 ''' sql = ''' SELECT M.Secucode, S.ChangeType, S.changeDate FROM SECUMAIN M, LC_ListStatus S WHERE M.INNERCODE = S.INNERCODE AND M.SecuCategory = 1 AND S.SecuMarket in (90, 83) AND S.ChangeType != 9 ORDER BY M.SECUCODE ASC, S.changeDate ASC ''' ls_data = fetch_db_data(jydb, sql, ['symbol', 'data', 'time'], dtypes={'data': 'int'}) # 原数据库中1表示上市,2表示暂停上市,3表示恢复上市,4表示退市,6表示退市整理 ls_map = {1: 1, 2: 2, 3: 1, 4: 4, 6: 3} ls_data['data'] = ls_data.data.map(ls_map) ls_data['symbol'] = ls_data.symbol.apply(add_stock_suffix) by_symbol = ls_data.groupby('symbol') tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time) ls_data = by_symbol.apply(map2td, days=tds, timecol='time', fillna={'symbol': lambda x: x.symbol.iloc[0]}) ls_data = ls_data.pivot_table('data', index='time', columns='symbol') last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) ls_data = ls_data.reindex(columns=universe).sort_index(ascending=True) if not check_completeness(ls_data.index, start_time, end_time): raise ValueError('Error, data missed!') return ls_data
def inner(start_time, end_time): denominator_data = denominator_func(start_time, end_time, **denominator_kwargs) numerator_data = numerator_func(start_time, end_time, **numerator_kwargs) data = denominator_data / numerator_data if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def get_padjfactor(start_time, end_time): ''' 从数据库中获取复权因子 ''' sql = ''' SELECT A.ExDiviDate, A.RatioAdjustingFactor, M.SecuCode FROM QT_AdjustingFactor A, SecuMain M WHERE A.InnerCode = M.InnerCode AND M.secuMarket in (83, 90) AND M.SECUCATEGORY = 1 ORDER BY M.SecuCode ASC, A.ExDiviDate ASC ''' data = fetch_db_data(jydb, sql, ['exdivdate', 'data', 'symbol'], dtypes={'data': 'float64'}) data.symbol = data.symbol.apply(add_stock_suffix) by_symbol = data.groupby('symbol') tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time) data = by_symbol.apply(map2td, days=tds, timecol='exdivdate', fillna={ 'data': lambda x: 1, 'symbol': lambda x: x.symbol.iloc[0] }) data = data.pivot_table('data', index='exdivdate', columns='symbol') last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe).sort_index(ascending=True).fillna(1) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def inner(start_time, end_time): sql = ''' SELECT S.{level}, S.InfoPublDate, M.SecuCode FROM LC_exgIndustry S, SecuMain M WHERE S.CompanyCOde = M.CompanyCode AND S.Standard = {standard} AND M.SecuCategory = 1 AND M.SecuMarket in (90, 83) ORDER BY M.Secucode, S.InfoPublDate ASC '''.format(level=INDUSTRY_LEVEL_MAP[class_level], standard=class_standard) data = fetch_db_data(jydb, sql, ['data', 'time', 'symbol']) data['data'] = data.data.map(translate_table) data['symbol'] = data.symbol.apply(add_stock_suffix) by_symbol = data.groupby('symbol') tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time) data = by_symbol.apply(map2td, days=tds, timecol='time', fillna={'data': lambda x: NaS, 'symbol': lambda x: x.symbol.iloc[0]}) data = data.pivot_table('data', index='time', columns='symbol', aggfunc=lambda x: ';'.join(x)) check_agg_error(data) last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted(pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe).fillna(NaS).astype(np_unicode) if not check_completeness(data.index, start_time, end_time): raise ValueError('Error, data missed!') return data
def inner(start_time, end_time): mkv_data = pitcache_getter(mkvdata_name, 50).get_tsdata(start_time, end_time) mask = np_isclose(mkv_data, 0, 0.01) data = mkv_data.where(~mask) data = np_log(mkv_data) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def shift_processor(raw_data, cols, start_time, end_time, offset, freq, is_single_season=False): """ 偏移数据处理函数,即计算每个日期内可以看到的给定偏移的数据,同时支持流量数据和存量 数据,支持的偏移频率包含[季度, 半年度, 年度]。对于存量数据,同时支持所有偏移频率; 对于季度数据,仅支持[季度, 年度]这两种偏移频率 Parameter --------- raw_data: pandas.DataFrame 待处理的原始财报数据(即季报、半年报和年报) cols: iterable 数据列名,元素依次为[证券代码, 数据, 更新时间, 报告期时间] start_time: datetime like 计算结果开始时间 end_time: datetime like 计算结果结束时间 offset: int 往前推的期数,offset=1表示计算最近一期的数据,offset=2表示计算次近一期的数据, 以此类推 freq: int 数据推移的频率,仅支持[1, 2, 4]分别表示为季度、半年度和年度 is_single_season: boolean, default False 是否计算季度数据,仅当freq为1是才会启用,该选项适用于流量数据进行季度偏移的情况, 且目标是获取偏移后的单季度数据 Return ------ out: pandas.DataFrame index为时间,columns为证券代码轴 """ if freq == 1 and is_single_season: # 计算单季度数据 raw_data = calc_seasonly_data(raw_data, cols) raw_data.symbol = raw_data.symbol.apply(add_stock_suffix) data = process_fundamental_data(raw_data, cols, start_time, end_time, offset * freq + 3, calc_offsetdata, data_col=cols[1], period_flag_col=cols[3], offset=offset, multiple=freq) last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def inner(start_time, end_time): cache_size = 10 share_data = pitcache_getter(share_name, cache_size).get_tsdata( start_time, end_time) price_data = pitcache_getter(price_name, cache_size).get_tsdata( start_time, end_time) data = share_data * price_data if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def get_trade_status(start_time, end_time): ''' 获取股票的交易状态 Notes ----- 将成交量为0或者最高价等于最低价视为不能交易,返回值为1表示正常交易,0表示不能交易,NA表示未上市而不能交易 ''' sql = ''' SELECT S.TradingDay, S.TurnoverVolume, S.HighPrice, S.LowPrice, M.Secucode FROM QT_DailyQuote S, SecuMain M WHERE S.InnerCode = M.InnerCode AND M.SecuMarket in (83, 90) AND S.TradingDay <= \'{end_time:%Y-%m-%d}\' AND S.TradingDay >= \'{start_time:%Y-%m-%d}\' AND M.SecuCategory = 1 ORDER BY M.Secucode ASC, S.TradingDay ASC ''' start_time, end_time = trans_date(start_time, end_time) offset = 30 start_time_shifted = get_calendar('stock.sse').shift_tradingdays( start_time, -offset - 10) sql = sql.format(start_time=start_time_shifted, end_time=end_time) data = fetch_db_data(jydb, sql, ['time', 'vol', 'high', 'low', 'symbol'], dtypes={ 'vol': 'float64', 'high': 'float64', 'low': 'float64' }) data.symbol = data.symbol.apply(add_stock_suffix) ma_vol = data.groupby('symbol', as_index=False).vol.rolling( offset, min_periods=offset).mean() ma_vol = ma_vol.reset_index(level=0, drop=True) data = data.assign(ma_vol=ma_vol) data = data.assign(flag=1) # pdb.set_trace() data.loc[np.isclose(data.ma_vol, 0, 0.1), 'ma_vol'] = np.nan # (上个交易日)移动平均成交量过低 data.ma_vol = data.vol / data.ma_vol data.loc[np.isclose(data.vol, 0, 0.1), 'flag'] = 0 # 成家量过低,不可交易 data.loc[data.high == data.low, 'flag'] = 0 # 最高价等于最低价,不可交易 # 移动平均成交量过低,不可交易 data.loc[np.isclose(data.ma_vol, 0, 0.05) | (pd.isnull(data.ma_vol)), 'flag'] = 0 data = data.pivot_table('flag', index='time', columns='symbol') last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.loc[(data.index >= start_time) & (data.index <= end_time)].reindex(columns=universe) if not check_completeness(data.index, start_time, end_time): raise ValueError('Error, data missed!') return data
def inner(start_time, end_time): data_group = [data_name_format.format(i=i) for i in range(1, period + 1)] raw_data = query_group(data_group, start_time, end_time) by_date = raw_data.groupby(level=0) data = by_date.apply(calc_growth) last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted(pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def inner(start_time, end_time): start_time, end_time = trans_date(start_time, end_time) start_time_shifted = get_calendar('stock.sse').shift_tradingdays(start_time, -(lag+1)) ret_data = pitcache_getter(ret_source, lag+1).get_tsdata(start_time_shifted, end_time) data = ret_data.rolling(lag, min_periods=lag).std() latest_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted(pitcache_getter('UNIVERSE', 10).get_csdata(latest_td).index) data = data.reindex(columns=universe) data = data.loc[(data.index>=start_time) & (data.index<=end_time)] if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def inner(start_time, end_time): start_time, end_time = trans_date(start_time, end_time) shifted_start_time = get_calendar('stock.sse').shift_tradingdays(start_time, -period-1) ret_data = pitcache_getter('CLOSE_DRET', 10).get_tsdata(shifted_start_time, end_time) data = ret_data.rolling(period, min_periods=period).apply(func) data = data.loc[(data.index >= start_time) & (data.index <= end_time)] last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted(pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def inner(start_time, end_time): start_time_shifted = get_calendar('stock.sse').shift_tradingdays( start_time, -freq - 10) price_data = pitcache_getter(price_name, 50).get_tsdata(start_time_shifted, end_time) data = price_data.pct_change(freq) data = data.loc[(data.index >= start_time) & (data.index <= end_time)] # pdb.set_trace() if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def inner(start_time, end_time): start_time, end_time = trans_date(start_time, end_time) shifted_start_time = get_calendar('stock.sse').shift_tradingdays(start_time, -start_t-1) period = start_t - end_t price_data = pitcache_getter('CLOSE_ADJ', 10).get_tsdata(shifted_start_time, end_time) data = price_data.shift(end_t).pct_change(period) data = data.loc[(data.index >= start_time) & (data.index <= end_time)] last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted(pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def get_suspend_state(start_time, end_time): """ 获取股票停牌状态 Notes ----- 其中,0表示未停牌,1表示整日停牌,2表示日内部分交易时间停牌 """ start_time, end_time = trans_date(start_time, end_time) sql = ''' SELECT M.SecuCode, S.SuspendDate, S.ResumptionDate FROM LC_SuspendResumption S, SecuMain M WHERE S.InnerCode = M.InnerCode AND M.SecuMarket in (83, 90) AND (S.ResumptionDate >= \'{start_time: %Y-%m-%d}\' OR s.ResumptionDate = '1900-01-01') AND S.suspenddate <= \'{end_time: %Y-%m-%d}\' AND M.SecuCategory = 1 ORDER BY M.Secucode ASC, S.SuspendDate ASC '''.format(start_time=start_time, end_time=end_time) data = fetch_db_data(jydb, sql, ['symbol', 'suspend_date', 'resumption_date']) data.symbol = data.symbol.apply(add_stock_suffix) tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time) def process_per_symbol(df): state = {} for _, line in df.iterrows(): _, start, end = line if start < end: # 正常超过一个交易日的停牌 state[start] = 1 state[end] = 0 elif end == pd.to_datetime('1900-01-01'): # 停牌至计算日还未复牌 state[start] = 1 else: # 日内停牌 state[start] = 2 state = pd.Series(state).sort_index() return state data = data.groupby('symbol').apply(process_per_symbol).unstack().T data = data.fillna(method='ffill') data = map2td(data, tds) latest_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(latest_td).index) data = data.reindex(columns=universe).fillna(0) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def inner(start_time, end_time): start_time, end_time = trans_date(start_time, end_time) start_time_shift = get_calendar('stock.sse').shift_tradingdays(start_time, -180) nonlocal sql sql = sql.format(code=index_symbol, start_time=start_time_shift, end_time=end_time) data = fetch_db_data(jydb, sql, ['symbol', 'weight', 'time'], dtypes={'weight': 'float64'}) data.symbol = data.symbol.apply(add_stock_suffix) tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time) data = data.pivot_table('weight', index='time', columns='symbol') data = map2td(data, tds) last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted(pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe) if not check_completeness(data.index, start_time, end_time): raise ValueError('Error, data missed!') return data
def inner(start_time, end_time): start_time, end_time = trans_date(start_time, end_time) nonlocal sql sql = sql.format(start_time=start_time, end_time=end_time) data = fetch_db_data(jydb, sql, cols, dtypes=dtypes) data.symbol = data.symbol.apply(add_stock_suffix) if len(cols) == 4: # 当前数据为需要使用前收盘填充的数据 data.loc[data.data == 0, 'data'] = data['prevclose'] data = data.drop('prevclose', axis=1) data = data.pivot_table('data', index='time', columns='symbol') latest_td = get_calendar('stock.sse').latest_tradingday( end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(latest_td).index) data = data.reindex(columns=universe).sort_index(ascending=True) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def inner(start_time, end_time): start_time, end_time = trans_date(start_time, end_time) cache_size = 100 offset = major_mul * minor_mul threshold = 1e-5 start_time_shifted = get_calendar('stock.sse').\ shift_tradingdays(start_time, -offset - 20) to_data = pitcache_getter('TO_RATE', cache_size).\ get_tsdata(start_time_shifted, end_time) data = to_data.rolling(offset, min_periods=offset).sum().dropna(how='all') data[data <= threshold] = np_nan data = data / major_mul data = np_log(data) data = data.loc[(data.index >= start_time) & (data.index <= end_time)] if start_time > trans_date(DATA_START_DATE): if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def get_torate(start_time, end_time): """ 计算换手率,计算公式为当日交易量/流通股数 Parameter --------- start_time: datetime like end_time: datetime like Return ------ out: pandas.DataFrame """ volume_data = pitcache_getter('TO_VOLUME', 50).get_tsdata(start_time, end_time) float_shares = pitcache_getter('FLOAT_SHARES', 50).get_tsdata(start_time, end_time) data = volume_data / float_shares if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def inner(start_time, end_time): start_time, end_time = trans_date(start_time, end_time) if not check_jydb_update_state(end_time): raise ValueError('JYDB has not been updated!') sql = ''' SELECT S.{field}, S.TradingDay FROM QT_IndexQuote S, SecuMain M WHERE S.InnerCode = M.InnerCode AND M.SecuCode = \'{symbol}\' AND M.SecuCategory = 4 AND S.TradingDay >= \'{start_time:%Y-%m-%d}\' AND S.TradingDay <= \'{end_time:%Y-%m-%d}\' ORDER BY S.TradingDay ASC '''.format(symbol=index_symbol, start_time=start_time, end_time=end_time, field=field) data = fetch_db_data(jydb, sql, ['data', 'time'], dtypes={'data': 'float64'}) data = data.set_index('time').data if (start_time != trans_date(DATA_START_DATE) and not check_completeness(data.index, start_time, end_time)): raise ValueError('Data Missed!') return data
def ttm_processor(raw_data, cols, start_time, end_time): """ 将原始的数据处理成TTM,仅支持流量类的数据,如利润表、现金流量表 Parameter --------- raw_data: pandas.DataFrame 待处理的原始报表数据(即季报、半年报和年报) cols: iterable 数据列名,元素依次为[证券代码, 数据, 更新时间, 报告期时间] start_time: datetime like 计算结果的开始时间 end_time: datetime like 计算结果的结束时间 Return ------ out: pandas.DataFrame index为时间,columns为证券代码轴 """ raw_data = calc_seasonly_data(raw_data, cols) # pdb.set_trace() raw_data.symbol = raw_data.symbol.apply(add_stock_suffix) data = process_fundamental_data(raw_data, cols, start_time, end_time, 6, calc_tnm, data_col='data', period_flag_col='rpt_date') last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def get_st_status(start_time, end_time): ''' 获取股票特殊处理的情况 ''' sql = ''' SELECT S.SpecialTradeTime, S.SecurityAbbr, C.MS, M.SecuCode FROM LC_SpecialTrade S, SecuMain M, CT_SystemConst C WHERE S.InnerCode = M.InnerCode AND M.SecuMarket in (83, 90) AND S.SpecialTradeType = C.DM AND C.LB = 1185 AND M.SecuCategory = 1 ''' data = fetch_db_data(jydb, sql, ['time', 'abbr', 'ms', 'symbol']) def _assign_st(row): map_dict = { 'ST': 1., 'PT': 5., '撤销ST': 0., '*ST': 2., '撤消*ST并实行ST': 1., '从ST变为*ST': 2., '撤销*ST': 0., '退市整理期': 3., '高风险警示': 4. } if row.ms in map_dict: return map_dict[row.ms] else: assert row.ms == '撤销PT', "Error, cannot handle tag '{tag}'".format( tag=row.ms) if 'ST' in row.abbr: return 1 elif '*ST' in row.abbr: return 2 else: return 0 data = data.assign(tag=lambda x: x.apply(_assign_st, axis=1)) data['symbol'] = data.symbol.apply(add_stock_suffix) # 剔除日期重复项,因为数字越大表示越风险越高,因而只保留数字大的 data = data.sort_values(['symbol', 'time', 'tag']) by_snt = data.groupby(['symbol', 'time']) data = by_snt.tail(1) data = data.reset_index(drop=True) tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time) by_symbol = data.groupby('symbol') data = by_symbol.apply(map2td, days=tds, timecol='time', fillna={'symbol': lambda x: x.symbol.iloc[0]}) data = data.pivot_table('tag', index='time', columns='symbol').dropna(axis=0, how='all') last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe).fillna(0) if not check_completeness(data.index, start_time, end_time): raise ValueError('Error, data missed!') return data