def test_sql_engine_fetch_factor_range_forward(self): ref_dates = makeSchedule( advanceDateByCalendar('china.sse', self.ref_date, '-6m'), self.ref_date, '60b', 'china.sse') ref_dates = ref_dates + [ advanceDateByCalendar('china.sse', ref_dates[-1], '60b').strftime('%Y-%m-%d') ] universe = Universe('zz500') + Universe('zz1000') factor = 'ROE' factor_data = self.engine.fetch_factor_range_forward(universe, factor, dates=ref_dates) codes = self.engine.fetch_codes_range(universe, dates=ref_dates[:-1]) groups = codes.groupby('trade_date') for ref_date, g in groups: forward_ref_date = advanceDateByCalendar( 'china.sse', ref_date, '60b').strftime('%Y-%m-%d') query = select([Uqer.code, Uqer.ROE]).where( and_(Uqer.trade_date == forward_ref_date, Uqer.code.in_(g.code.unique().tolist()))) df = pd.read_sql(query, con=self.engine.engine) calculated_factor = factor_data[factor_data.trade_date == ref_date] calculated_factor.set_index('code', inplace=True) calculated_factor = calculated_factor.loc[df.code] np.testing.assert_array_almost_equal(calculated_factor.dx.values, df.ROE.values)
def test_sql_engine_fetch_dx_return_with_universe_adjustment(self): ref_dates = makeSchedule( advanceDateByCalendar('china.sse', '2017-01-26', '-6m'), '2017-01-26', '60b', 'china.sse') universe = Universe('zz500') dx_return = self.engine.fetch_dx_return_range(universe, dates=ref_dates, horizon=4, offset=1) codes = self.engine.fetch_codes_range(universe, dates=ref_dates) groups = codes.groupby('trade_date') for ref_date, g in groups: start_date = advanceDateByCalendar('china.sse', ref_date, '2b') end_date = advanceDateByCalendar('china.sse', ref_date, '6b') query = select([Market.code, Market.chgPct]).where( and_(Market.trade_date.between(start_date, end_date), Market.code.in_(g.code.unique().tolist()))) df = pd.read_sql(query, con=self.engine.engine) res = df.groupby('code').apply(lambda x: np.log(1. + x).sum()) calculated_return = dx_return[dx_return.trade_date == ref_date] np.testing.assert_array_almost_equal(calculated_return.dx.values, res.chgPct.values)
def get_nffund_idx_etf_component(date, index): date = dt.datetime.strptime(date, '%Y%m%d') if not isBizDay('China.SSE', date): date = advanceDateByCalendar('China.SSE', date, '-1b') pre_trading_date = advanceDateByCalendar('China.SSE', date, '-1b').strftime('%Y%m%d') if index == 'zz500': date = date.strftime('%Y%m%d') url = "http://www.nffund.com/etf/bulletin/ETF500/510500{date}.txt".format( date=date) html_text = requests.get(url)._content.decode('gbk').split( 'TAGTAG\r')[1] res = [] col_name = ['Code', 'drop', 'Volume', 'drop', 'drop', 'drop', 'drop'] for line in html_text.split('\r'): res.append(line.replace(' ', '').replace('\n', '').split('|')) res = pd.DataFrame(res, columns=col_name) res = res.drop('drop', axis=1).iloc[:500] elif index == 'hs300': url = "http://www.huatai-pb.com/etf-web/etf/index?fundcode=510300&beginDate={date}".format( date=date.strftime('%Y-%m-%d')) html_text = requests.get(url)._content.decode('utf8') soup = BeautifulSoup(html_text, "lxml") res = [] for item in soup.find_all('tr', {'align': 'center'})[1:]: sub_item = item.find_all('td') res.append([sub_item[0].text, sub_item[2].text]) col_name = ['Code', 'Volume'] res = pd.DataFrame(res, columns=col_name) elif index == 'sz50': url = "http://fund.chinaamc.com/product/fundShengoushuhuiqingdan.do" html_text = requests.post(url, data={'querryDate': date.strftime('%Y-%m-%d'), 'fundcode': '510050'})\ ._content.decode('utf8') soup = BeautifulSoup(html_text, "lxml") res = [] for item in soup.find_all('tr', '')[17:]: sub_item = item.find_all('td') res.append([sub_item[0].text, sub_item[2].text]) col_name = ['Code', 'Volume'] res = pd.DataFrame(res, columns=col_name) else: raise KeyError('Do not have source for index %s yet...' % index) # convert string code to int code res['Code'] = res['Code'].apply(int) # fetch eod close price engine = create_engine( 'mssql+pymssql://sa:[email protected]/MultiFactor?charset=utf8') sql = 'select [Code], [Close] as PreClose from TradingInfo1 where Date = %s' % pre_trading_date close_data = pd.read_sql(sql, engine) res = res.merge(close_data, on='Code', how='left') # calculate weight res['weight'] = res['PreClose'].apply(float) * res['Volume'].apply(float) res['weight'] = res['weight'] / res['weight'].sum() res = res[['Code', 'weight']] return res
def update_factor_performance_big_universe_top_100(ds, **kwargs): ref_date = kwargs['next_execution_date'] if not isBizDay('china.sse', ref_date): logger.info("{0} is not a business day".format(ref_date)) return 0 ref_date = advanceDateByCalendar('china.sse', ref_date, '-2b') ref_date = ref_date.strftime('%Y-%m-%d') previous_date = advanceDateByCalendar('china.sse', ref_date, '-1b') this_day_pos, total_data = create_ond_day_pos(ref_date, source_db, big_universe=True, risk_neutral=False) last_day_pos, _ = create_ond_day_pos(previous_date, source_db, big_universe=True, risk_neutral=False) return_table = settlement(ref_date, this_day_pos, total_data['bm'].values, total_data['D1LogReturn'].values, type='top_100') pos_diff_dict = {} for name in this_day_pos.columns.difference(['industry']): for ind in this_day_pos.industry.unique(): pos_series = this_day_pos.loc[this_day_pos.industry == ind, name] if name in last_day_pos: last_series = last_day_pos.loc[last_day_pos.industry == ind, name] pos_diff = pos_series.sub(last_series, fill_value=0) else: pos_diff = pos_series pos_diff_dict[(name, ind)] = pos_diff.abs().sum() pos_series = this_day_pos[name] if name in last_day_pos: last_series = last_day_pos[name] pos_diff = pos_series.sub(last_series, fill_value=0) else: pos_diff = pos_series pos_diff_dict[(name, 'total')] = pos_diff.abs().sum() pos_diff_series = pd.Series(pos_diff_dict, name='turn_over') pos_diff_series.index.names = ['portfolio', 'industry'] pos_diff_series = pos_diff_series.reset_index() return_table = pd.merge(return_table, pos_diff_series, on=['portfolio', 'industry']) return_table['source'] = 'tiny' return_table['universe'] = 'zz500_expand' upload(ref_date, return_table, destination_db, 'performance')
def fetch_dx_return_index(self, ref_date: str, index_code: int, expiry_date: str = None, horizon: int = 0, offset: int = 0) -> pd.DataFrame: start_date = ref_date if not expiry_date: end_date = advanceDateByCalendar('china.sse', ref_date, str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d') else: end_date = expiry_date stats = self._create_stats(IndexMarket, horizon, offset, code_attr='indexCode') query = select([IndexMarket.trade_date, IndexMarket.indexCode.label('code'), stats]).where( and_( IndexMarket.trade_date.between(start_date, end_date), IndexMarket.indexCode == index_code ) ) df = pd.read_sql(query, self.session.bind).dropna() df = df[df.trade_date == ref_date] return df[['code', 'dx']]
def calc_factor(self, packet_name, class_name, mkt_df, trade_date): result = pd.DataFrame() class_method = importlib.import_module(packet_name).__getattribute__( class_name) alpha_max_window = 0 func_sets = self._func_sets(class_method) start_time = time.time() for func in func_sets: print(func) func_method = getattr(class_method, func) fun_param = inspect.signature(func_method).parameters dependencies = fun_param['dependencies'].default max_window = fun_param['max_window'].default begin = advanceDateByCalendar('china.sse', trade_date, '-%sb' % (max_window)) data = {} for dep in dependencies: if dep not in ['indu']: data[dep] = mkt_df[dep].loc[begin.strftime("%Y-%m-%d" ):trade_date] else: data['indu'] = mkt_df['indu'] res = getattr(class_method(), func)(data) res = pd.DataFrame(res) res.columns = [func] #res = res.reset_index().sort_values(by='code',ascending=True) res = res.reset_index().sort_values(by='security_code', ascending=True) result[func] = res[func] #result['symbol'] = res['code'] result['security_code'] = res['security_code'] result['trade_date'] = trade_date print(time.time() - start_time) return result.replace([np.inf, -np.inf], np.nan)
def fetch_dx_return_index(self, ref_date: str, index_code: int, expiry_date: str = None, horizon: int = 0, offset: int = 0) -> pd.DataFrame: start_date = ref_date if not expiry_date: end_date = advanceDateByCalendar( 'china.sse', ref_date, str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d') else: end_date = expiry_date stats = func.sum(self.ln_func(1. + IndexMarket.chgPct)).over( partition_by=IndexMarket.indexCode, order_by=IndexMarket.trade_date, rows=(1 + DAILY_RETURN_OFFSET + offset, 1 + horizon + DAILY_RETURN_OFFSET + offset)).label('dx') query = select([ IndexMarket.trade_date, IndexMarket.indexCode.label('code'), stats ]).where( and_(IndexMarket.trade_date.between(start_date, end_date), IndexMarket.indexCode == index_code)) df = pd.read_sql(query, self.session.bind).dropna() df = df[df.trade_date == ref_date] return df[['code', 'dx']]
def update_return_data_300(ds, **kwargs): ref_date = kwargs['next_execution_date'] if not isBizDay('china.sse', ref_date): logger.info("{0} is not a business day".format(ref_date)) return 0 start_date = advanceDateByCalendar('china.sse', ref_date, '-30b') for date in bizDatesList('china.sse', start_date, ref_date): date = date.strftime('%Y-%m-%d') conn1 = create_ms_engine('PortfolioManagements300') df = fetch_date('StockReturns', date, conn1) conn2 = create_my_engine() delete_data('return_300', date, conn2) insert_data('return_300', df, conn2) conn3 = create_my_engine2() delete_data('return_300', date, conn3) insert_data('return_300', df, conn3) return 0
def fetch_dx_return_index_range(self, index_code, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, horizon: int = 0, offset: int = 0) -> pd.DataFrame: if dates: start_date = dates[0] end_date = dates[-1] end_date = advanceDateByCalendar('china.sse', end_date, str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d') stats = self._create_stats(IndexMarket, horizon, offset, code_attr='indexCode') query = select([IndexMarket.trade_date, IndexMarket.indexCode.label('code'), stats]) \ .where( and_( IndexMarket.trade_date.between(start_date, end_date), IndexMarket.indexCode == index_code ) ) df = pd.read_sql(query, self.session.bind).dropna() if dates: df = df[df.trade_date.isin(dates)] return df
def update_uqer_universe_ashare_ex(ds, **kwargs): ref_date, this_date = process_date(ds) flag = check_holiday(this_date) if not flag: return query = delete(Universe).where( and_(Universe.trade_date == this_date, Universe.universe == 'ashare_ex')) engine.execute(query) ex_date = advanceDateByCalendar('china.sse', this_date, '-3m') query = select([SecurityMaster.code]).where( and_( SecurityMaster.listDate <= ex_date, or_(SecurityMaster.listStatusCD == "L", SecurityMaster.delistDate > this_date))) df = pd.read_sql(query, engine) if df.empty: return df['universe'] = 'ashare_ex' df['trade_date'] = this_date data_info_log(df, Universe) df.to_sql(Universe.__table__.name, engine, index=False, if_exists='append')
def fetch_dx_return_index_range(self, index_code, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, horizon: int = 0, offset: int = 0) -> pd.DataFrame: if dates: start_date = dates[0] end_date = dates[-1] index_code = _map_index_codes[index_code] end_date = advanceDateByCalendar('china.sse', end_date, str( 1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime( '%Y-%m-%d') query = select([IndexMarket.trade_date, IndexMarket.indexCode.label('code'), IndexMarket.chgPct.label("chgPct")]) \ .where( and_( IndexMarket.trade_date.between(start_date, end_date), IndexMarket.indexCode == index_code, IndexMarket.flag == 1 ) ).order_by(IndexMarket.trade_date, IndexMarket.indexCode) df = pd.read_sql(query, self.session.bind).dropna().drop_duplicates(["trade_date", "code"]) df = self._create_stats(df, horizon, offset) if dates: df = df[df.trade_date.isin(dates)] return df
def fetch_trade_status_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, offset=0): codes = universe.query(self, start_date, end_date, dates) if dates: start_date = dates[0] end_date = dates[-1] end_date = advanceDateByCalendar('china.sse', end_date, str(offset) + 'b').strftime('%Y-%m-%d') stats = func.lead(Market.isOpen, offset).over( partition_by=Market.code, order_by=Market.trade_date).label('is_open') cte = select([Market.trade_date, Market.code, stats]).where( and_( Market.trade_date.between(start_date, end_date), Market.code.in_(codes.code.unique().tolist()) ) ).cte('cte') query = select([cte]).select_from(cte).order_by(cte.columns['trade_date'], cte.columns['code']) df = pd.read_sql(query, self.engine) return pd.merge(df, codes[['trade_date', 'code']], on=['trade_date', 'code'])
def fetch_dx_return(self, ref_date: str, codes: Iterable[int], expiry_date: str = None, horizon: int = 0, offset: int = 0) -> pd.DataFrame: start_date = ref_date if not expiry_date: end_date = advanceDateByCalendar( 'china.sse', ref_date, str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d') else: end_date = expiry_date stats = self._create_stats(Market, horizon, offset) query = select([Market.trade_date, Market.code, stats]).where( and_(Market.trade_date.between(start_date, end_date), Market.code.in_(codes))) df = pd.read_sql(query, self.session.bind).dropna() df = df[df.trade_date == ref_date] return df[['code', 'dx']]
def handle_data(self): for s in self.tradableAssets: if s[:2] != 'if': if self.signal[s] > 0 and self.secPos[s] == 0: self.order(s, 1, 200) elif self.signal[s] < 0 and self.secPos[s] != 0: self.order(s, -1, 200) # 找到需要使用的主力合约 current_time = self.current_datetime year = current_time.year month = current_time.month delDay = nthWeekDay(3, 6, month, year) changeContractDay = advanceDateByCalendar('China.SSE', delDay, '-1b') contract_month = month if current_time.date() >= changeContractDay: contract_month = month + 1 ifc = 'if15%02d.cffex' % contract_month ifcOld = 'if15%02d.cffex' % month if month < contract_month and self.secPos[ifcOld] != 0: # 需要移仓, 平掉旧合约 self.order_to(ifcOld, 1, 0) self.order_to(ifc, -1, 1)
def create_factor_analysis(ds, **kwargs): ref_date = kwargs['next_execution_date'] if not isBizDay('china.sse', ref_date): logger.info("{0} is not a business day".format(ref_date)) return ref_date = advanceDateByCalendar('china.sse', ref_date, '-2b').strftime('%Y-%m-%d') factor_name = kwargs['factor_name'] logger.info("updating '{0}' on {1}".format(factor_name, ref_date)) # small universe, risk_neutral return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=True, risk_neutral=True) if return_table is not None: upload(ref_date, return_table, destination, 'performance', factor_name, 'risk_neutral') # small universe, top_100 return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=True, risk_neutral=False) if return_table is not None: upload(ref_date, return_table, destination, 'performance', factor_name, 'top_100') # small universe, risk_neutral return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=False, risk_neutral=True) if return_table is not None: upload(ref_date, return_table, destination, 'performance_big_universe', factor_name, 'risk_neutral') # small universe, top_100 return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=False, risk_neutral=False) if return_table is not None: upload(ref_date, return_table, destination, 'performance_big_universe', factor_name, 'top_100')
def process_calc_factor(self, packet_name, class_name, mkt_df, trade_date): calc_factor_list = [] cpus = multiprocessing.cpu_count() class_method = importlib.import_module(packet_name).__getattribute__( class_name) alpha_max_window = 0 func_sets = self._func_sets(class_method) start_time = time.time() for func in func_sets: func_method = getattr(class_method, func) fun_param = inspect.signature(func_method).parameters dependencies = fun_param['dependencies'].default max_window = fun_param['max_window'].default begin = advanceDateByCalendar('china.sse', trade_date, '-%sb' % (max_window - 1)) data = {} for dep in dependencies: if dep not in ['indu']: data[dep] = mkt_df[dep].loc[begin.strftime("%Y-%m-%d" ):trade_date] else: data['indu'] = mkt_df['indu'] calc_factor_list.append([class_name, packet_name, func, data]) with multiprocessing.Pool(processes=cpus) as p: res = p.map(self.process_calc, calc_factor_list) print(time.time() - start_time) result = pd.concat( res, axis=1).reset_index().rename(columns={'index': 'security_code'}) result = result.replace([np.inf, -np.inf], np.nan) result['trade_date'] = trade_date return result
def loadon_data(self, trade_date): db_polymerize = DBPolymerize(self._name) max_windows = self._maximization_windows() begin_date = advanceDateByCalendar('china.sse', trade_date, '-%sb' % (max_windows + 1)) total_data = db_polymerize.fetch_data(begin_date, trade_date, '1b') return total_data
def get_continuous_future_contract(universe, current_date, del_rule, contract_prefix_len=2): """ :param universe: dict, key = maturity month, value = contract id :param current_date: datetime, current datetime :param del_rule: dict, termination rule of fut contract :param contract_prefix_len: int, optional, length of the prefix of contract ids :return: the contract id to use, given current date """ year = current_date.year month = current_date.month del_day = nthWeekDay(nth=del_rule['nth'], dayOfWeek=del_rule['day_of_week'], month=month, year=year) del_date = advanceDateByCalendar(holidayCenter='China.SSE', referenceDate=del_day, period='-1b') contract_month = month if current_date < del_date else month + 1 contract_year = year - 2000 if contract_month > 12: contract_month -= 12 contract_year += 1 suffix = universe[0].split('.')[1] prefix = universe[0].split('.')[0][:contract_prefix_len] contract_id = '%s%02d%02d.%s' % (prefix, contract_year, contract_month, suffix) return contract_id
def fetch_dx_return_range(self, universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, horizon: int = 0) -> pd.DataFrame: if dates: start_date = dates[0] end_date = dates[-1] end_date = advanceDateByCalendar('china.sse', end_date, str(horizon) + 'b').strftime('%Y-%m-%d') cond = universe.query_range(start_date, end_date) big_table = join( DailyReturn, UniverseTable, and_(DailyReturn.trade_date == UniverseTable.trade_date, DailyReturn.code == UniverseTable.code, cond)) stats = func.sum(self.ln_func(1. + DailyReturn.d1)).over( partition_by=DailyReturn.code, order_by=DailyReturn.trade_date, rows=(0, horizon)).label('dx') query = select([DailyReturn.trade_date, DailyReturn.code, stats]) \ .select_from(big_table) df = pd.read_sql(query, self.session.bind) if dates: df = df[df.trade_date.isin(dates)] return df
def fetch_dx_return_range(self, universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, horizon: int = 0, offset: int = 0) -> pd.DataFrame: if dates: start_date = dates[0] end_date = dates[-1] end_date = advanceDateByCalendar('china.sse', end_date, str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d') stats = self._create_stats(Market, horizon, offset) codes = universe.query(self.engine, start_date, end_date, dates) t = select([Market.trade_date, Market.code, stats]).where( and_( Market.trade_date.between(start_date, end_date), Market.code.in_(codes.code.unique().tolist()) ) ).cte('t') cond = universe._query_statements(start_date, end_date, dates) query = select([t]).where( and_(t.columns['trade_date'] == UniverseTable.trade_date, t.columns['code'] == UniverseTable.code, cond) ) df = pd.read_sql(query, self.session.bind).dropna() return df.sort_values(['trade_date', 'code'])
def loadon_data(self, trade_date): db_polymerize = DBPolymerize(self._name) max_windows = self._maximization_windows() begin_date = advanceDateByCalendar('china.sse', trade_date, '-%sb' % (max_windows + 1)) market_data, index_data = db_polymerize.fetch_volatility_value_data( begin_date, trade_date, '1b') # market_data, index_data = db_polymerize.fetch_volatility_value_data('2018-08-15', trade_date, '1b') return market_data, index_data
def test_sql_engine_fetch_dx_return(self): horizon = 4 offset = 1 ref_date = self.ref_date universe = Universe('zz500') + Universe('zz1000') codes = self.engine.fetch_codes(ref_date, universe) dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset) start_date = advanceDateByCalendar('china.sse', ref_date, '2b') end_date = advanceDateByCalendar('china.sse', ref_date, '6b') query = select([Market.code, Market.chgPct]).where( and_(Market.trade_date.between(start_date, end_date), Market.code.in_(dx_return.code.unique().tolist()))) df = pd.read_sql(query, con=self.engine.engine) res = df.groupby('code').apply(lambda x: np.log(1. + x).sum()) np.testing.assert_array_almost_equal(dx_return.dx.values, res.chgPct.values) horizon = 4 offset = 0 ref_date = self.ref_date universe = Universe('zz500') + Universe('zz1000') codes = self.engine.fetch_codes(ref_date, universe) dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset) start_date = advanceDateByCalendar('china.sse', ref_date, '1b') end_date = advanceDateByCalendar('china.sse', ref_date, '5b') query = select([Market.code, Market.chgPct]).where( and_(Market.trade_date.between(start_date, end_date), Market.code.in_(dx_return.code.unique().tolist()))) df = pd.read_sql(query, con=self.engine.engine) res = df.groupby('code').apply(lambda x: np.log(1. + x).sum()) np.testing.assert_array_almost_equal(dx_return.dx.values, res.chgPct.values)
def update_uqer_daily_return(ds, **kwargs): ref_date, this_date = process_date(ds) previous_date = advanceDateByCalendar('china.sse', this_date, '-1b').strftime('%Y-%m-%d') table = 'daily_return' df = pd.read_sql("select Code, chgPct as d1 from market where Date = '{0}'".format(this_date), engine2) df['Date'] = previous_date engine2.execute("delete from {0} where Date = '{1}'".format(table, previous_date)) df.to_sql(table, engine2, index=False, if_exists='append')
def fetch_factor(self, ref_date: str, factors: Iterable[object], codes: Iterable[int], warm_start: int = 0, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) start_date = advanceDateByCalendar('china.sse', ref_date, str(-warm_start) + 'b').strftime('%Y-%m-%d') end_date = ref_date big_table = Market joined_tables = set() joined_tables.add(Market.__table__.name) for t in set(factor_cols.values()): if t.__table__.name not in joined_tables: big_table = outerjoin( big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code)) joined_tables.add(t.__table__.name) query = select( [Market.trade_date, Market.code, Market.chgPct, Market.secShortName] + list( factor_cols.keys())) \ .select_from(big_table).where(and_(Market.trade_date.between(start_date, end_date), Market.code.in_(codes))) df = pd.read_sql(query, self.engine) \ .replace([-np.inf, np.inf], np.nan) \ .sort_values(['trade_date', 'code']) \ .set_index('trade_date') res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan) res['chgPct'] = df.chgPct res['secShortName'] = df['secShortName'] res = res.loc[ref_date:ref_date, :] res.index = list(range(len(res))) return res
def fetch_dx_return(self, ref_date: str, codes: Iterable[int], expiry_date: str = None, horizon: int = 0, offset: int = 0, neutralized_risks: list = None, pre_process=None, post_process=None, benchmark: int = None) -> pd.DataFrame: start_date = ref_date if not expiry_date: end_date = advanceDateByCalendar( 'china.sse', ref_date, str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d') else: end_date = expiry_date query = select([ Market.trade_date, Market.code.label("code"), Market.chgPct.label("chgPct") ]).where( and_(Market.trade_date.between(start_date, end_date), Market.code.in_(codes), Market.flag == 1)).order_by(Market.trade_date, Market.code) df = pd.read_sql(query, self.session.bind).dropna() df = self._create_stats(df, horizon, offset) df = df[df.trade_date == ref_date] if benchmark: benchmark = _map_index_codes[benchmark] query = select( [IndexMarket.trade_date, IndexMarket.chgPct.label("chgPct")]).where( and_(IndexMarket.trade_date.between(start_date, end_date), IndexMarket.indexCode == benchmark, IndexMarket.flag == 1)) df2 = pd.read_sql(query, self.session.bind).dropna() df2 = self._create_stats(df2, horizon, offset, no_code=True) ind_ret = df2[df2.trade_date == ref_date]['dx'].values[0] df['dx'] = df['dx'] - ind_ret if neutralized_risks: _, risk_exp = self.fetch_risk_model(ref_date, codes) df = pd.merge(df, risk_exp, on='code').dropna() df[['dx']] = factor_processing( df[['dx']].values, pre_process=pre_process, risk_factors=df[neutralized_risks].values, post_process=post_process) return df[['code', 'dx']]
def test_sql_engine_fetch_dx_return_index(self): horizon = 4 offset = 1 ref_date = self.ref_date dx_return = self.engine.fetch_dx_return_index(ref_date, 905, horizon=horizon, offset=offset) start_date = advanceDateByCalendar('china.sse', ref_date, '2b') end_date = advanceDateByCalendar('china.sse', ref_date, '6b') query = select([IndexMarket.indexCode, IndexMarket.chgPct]).where( and_(IndexMarket.trade_date.between(start_date, end_date), IndexMarket.indexCode == 905)) df = pd.read_sql(query, con=self.engine.engine) res = df.groupby('indexCode').apply(lambda x: np.log(1. + x).sum()) np.testing.assert_array_almost_equal(dx_return.dx.values, res.chgPct.values)
def factor_uqer_analysis(factor_name, ref_date, use_only_index_components=False, risk_neutral=True): previous_day = advanceDateByCalendar('china.sse', ref_date, '-1b').strftime('%Y-%m-%d') weights, analysis = factor_uqer_one_day(factor_name, ref_date, use_only_index_components, risk_neutral) if weights is None: logger.warning("No data for '{0}' on {1}".format( factor_name, ref_date)) return previous_weight, _ = factor_uqer_one_day(factor_name, previous_day, use_only_index_components, risk_neutral) pos_diff_dict = {} if weights is not None: for ind in weights.industry.unique(): pos_series = weights.loc[weights.industry == ind, 'weight'] if previous_weight is not None: last_series = previous_weight.loc[previous_weight.industry == ind, 'weight'] pos_diff = pos_series.sub(last_series, fill_value=0) else: pos_diff = pos_series pos_diff_dict[ind] = pos_diff.abs().sum() pos_diff_dict['total'] = sum(pos_diff_dict.values()) inds = list(pos_diff_dict.keys()) pos_diff_series = pd.DataFrame( {'turn_over': [pos_diff_dict[ind] for ind in inds]}, index=inds) return_table = pd.merge(analysis, pos_diff_series, left_index=True, right_index=True) return_table.index.name = 'industry' return_table['Date'] = dt.datetime.strptime(ref_date, '%Y-%m-%d') return_table['portfolio'] = factor_name if risk_neutral: return_table['type'] = 'risk_neutral' else: return_table['type'] = 'top_100' return_table.reset_index(inplace=True) return return_table
def fetch_dx_return_range(self, universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, horizon: int = 0, offset: int = 0, benchmark: int = None) -> pd.DataFrame: if dates: start_date = dates[0] end_date = dates[-1] end_date = advanceDateByCalendar( 'china.sse', end_date, str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d') codes = universe.query(self.engine, start_date, end_date, dates) t1 = select([ Market.trade_date, Market.code.label("code"), Market.chgPct.label("chgPct") ]).where( and_(Market.trade_date.between(start_date, end_date), Market.code.in_(codes.code.unique().tolist()), Market.flag == 1)) df1 = pd.read_sql(t1, self.session.bind).dropna() df1 = self._create_stats(df1, horizon, offset) df2 = self.fetch_codes_range(universe, start_date, end_date, dates) df2["trade_date"] = pd.to_datetime(df2["trade_date"]) df = pd.merge(df1, df2, on=["trade_date", "code"]) df = df.set_index("trade_date") if benchmark: benchmark = _map_index_codes[benchmark] query = select( [IndexMarket.trade_date, IndexMarket.chgPct.label("chgPct")]).where( and_(IndexMarket.trade_date.between(start_date, end_date), IndexMarket.indexCode == benchmark, IndexMarket.flag == 1)) df2 = pd.read_sql(query, self.session.bind).dropna().drop_duplicates( ["trade_date"]) df2 = self._create_stats(df2, horizon, offset, no_code=True).set_index("trade_date") df['dx'] = df['dx'].values - df2.loc[df.index]['dx'].values if dates: df = df[df.index.isin(dates)] return df.reset_index().sort_values(['trade_date', 'code'])
def prepare_data(engine: SqlEngine, factors: Union[Transformer, Iterable[object]], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0): if warm_start > 0: p = Period(frequency) p = Period(length=-warm_start * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', start_date, p).strftime('%Y-%m-%d') dates = makeSchedule(start_date, end_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Forward) dates = [d.strftime('%Y-%m-%d') for d in dates] horizon = map_freq(frequency) if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).sort_values( ['trade_date', 'code']) alpha_logger.info("factor data loading finished") return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) alpha_logger.info("return data loading finished") industry_df = engine.fetch_industry_range(universe, dates=dates) alpha_logger.info("industry data loading finished") benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates) alpha_logger.info("benchmark data loading finished") df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna() df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left') df = pd.merge(df, industry_df, on=['trade_date', 'code']) df['weight'] = df['weight'].fillna(0.) return dates, df[['trade_date', 'code', 'dx']], df[[ 'trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry' ] + transformer.names]
def fetch_factor(self, ref_date: str, factors: Iterable[object], codes: Iterable[int], warm_start: int = 0, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) start_date = advanceDateByCalendar('china.sse', ref_date, str(-warm_start) + 'b').strftime('%Y-%m-%d') end_date = ref_date big_table = FullFactor for t in set(factor_cols.values()): if t.__table__.name != FullFactor.__table__.name: big_table = outerjoin( big_table, t, and_(FullFactor.trade_date == t.trade_date, FullFactor.code == t.code)) query = select( [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \ .select_from(big_table).where(and_(FullFactor.trade_date.between(start_date, end_date), FullFactor.code.in_(codes))) df = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code' ]).set_index('trade_date') res = transformer.transform('code', df) for col in res.columns: if col not in set(['code', 'isOpen']) and col not in df.columns: df[col] = res[col].values df['isOpen'] = df.isOpen.astype(bool) df = df.loc[ref_date] df.index = list(range(len(df))) return df
def handle_data(self): # 计算指数指标得分 closeDLastCloseScore = self.closeDLastClose['000016.zicn', '000300.zicn', '000905.zicn'].dot(self.indexWeights) closeDOpenScore = self.closeDOpen['000016.zicn', '000300.zicn', '000905.zicn'].dot(self.indexWeights) closeDLowScore = self.closeDLow['000016.zicn', '000300.zicn', '000905.zicn'].dot(self.indexWeights) highDCloseScore = self.highDClose['000016.zicn', '000300.zicn', '000905.zicn'].dot(self.indexWeights) # 找到需要使用的主力合约 current_time = self.current_datetime year = current_time.year month = current_time.month delDay = nthWeekDay(3, 6, month, year) changeContractDay = advanceDateByCalendar('China.SSE', delDay, '-1b') contract_month = month if current_time.date() >= changeContractDay: contract_month = month + 1 ihc, ifc, icc = 'ih15%02d' % contract_month, 'if15%02d' % contract_month, 'ic15%02d' % contract_month ihcOld, ifcOld, iccOld = 'ih15%02d' % month, 'if15%02d' % month, 'ic15%02d' % month if month < contract_month and self.secPos[ihcOld] != 0: # 需要移仓, 平掉旧合约 self.order_to(ihcOld, 1, 0) self.order_to(ifcOld, 1, 0) self.order_to(iccOld, 1, 0) # 定义基差 ihBasis = self.closes[ihc] / self.closes['000016.zicn'] - 1. ihPreBasis = self.preCloses[ihc] / self.preCloses['000016.zicn'] - 1. ifBasis = self.closes[ifc] / self.closes['000300.zicn'] - 1. ifPreBasis = self.preCloses[ifc] / self.preCloses['000300.zicn'] - 1. icBasis = self.closes[icc] / self.closes['000905.zicn'] - 1. icPreBasis = self.preCloses[icc] / self.preCloses['000905.zicn'] - 1. # 计算基差得分 ihBasisDiff = ihPreBasis - ihBasis ifBasisDiff = ifPreBasis - ifBasis icBasisDiff = icPreBasis - icBasis basis = ihBasis - 2. * ifBasis + icBasis basisScore = ihBasisDiff - 2. * ifBasisDiff + icBasisDiff # 5个信号合并产生开平仓信号 elects = 0 for signal in [closeDLastCloseScore, closeDOpenScore, closeDLowScore, highDCloseScore, basisScore]: if basis < 0. < signal: elects += 1 elif signal < 0. < basis: elects -= 1 # 开平仓逻辑 if elects > 0: # 多头信号 self.order_to(ihc, 1, 1) self.order_to(ifc, -1, 2) self.order_to(icc, 1, 1) elif elects < 0: # 空头信号 self.order_to(ihc, -1, 1) self.order_to(ifc, 1, 2) self.order_to(icc, -1, 1) else: # 平仓信号 self.order_to(ihc, 1, 0) self.order_to(ifc, 1, 0) self.order_to(icc, 1, 0) # 记录必要的信息,供回测后查看 # 指数价格信息 self.keep("000016.zicn_open", self.openes['000016.zicn']) self.keep("000016.zicn_high", self.highes['000016.zicn']) self.keep("000016.zicn_low", self.lowes['000016.zicn']) self.keep("000016.zicn_close", self.closes['000016.zicn']) self.keep("000300.zicn_open", self.openes['000300.zicn']) self.keep("000300.zicn_high", self.highes['000300.zicn']) self.keep("000300.zicn_low", self.lowes['000300.zicn']) self.keep("000300.zicn_close", self.closes['000300.zicn']) self.keep("000905.zicn_open", self.openes['000905.zicn']) self.keep("000905.zicn_high", self.highes['000905.zicn']) self.keep("000905.zicn_low", self.lowes['000905.zicn']) self.keep("000905.zicn_close", self.closes['000905.zicn']) # 期货价格信息 self.keep("IH_ID", ihc) self.keep("IH_CLOSER", self.closes[ihc]) self.keep("IF_ID", ifc) self.keep("IF_CLOSER", self.closes[ifc]) self.keep("IC_ID", icc) self.keep("IC_CLOSER", self.closes[icc]) # 因子信息 self.keep("C/LC-1", closeDLastCloseScore) self.keep("C/O-1", closeDOpenScore) self.keep("C/L-1", closeDLowScore) self.keep("1-H/C", highDCloseScore) # 基差信息 self.keep("BASIS_DIFF", basisScore) self.keep("BASIS", basis) # 开平仓方向信号 self.keep("POSITION_SIGNAL", elects) self.keep("Released_PnL", self.realizedHoldings['pnl']) self.keep("PnL", self.holdings['pnl'])
def createPerformanceTearSheet(prices=None, returns=None, benchmark=None, benchmarkReturns=None, plot=True): if prices is not None and not isinstance(prices, pd.Series): raise TypeError("prices series should be a pandas time series.") elif returns is not None and prices is not None: raise ValueError("prices series and returns series can't be both set.") if benchmark is not None and not (isinstance(benchmark, pd.Series) or isinstance(benchmark, str)): raise TypeError("benchmark series should be a pandas time series or a string ticker.") if returns is None: returns = np.log(prices / prices.shift(1)) returns.fillna(0, inplace=True) returns = returns[~np.isinf(returns)] if benchmark is not None and isinstance(benchmark, str) and benchmarkReturns is None: startDate = advanceDateByCalendar("China.SSE", prices.index[0], '-1b', BizDayConventions.Preceding) benchmarkPrices = get_benchmark_data(benchmark, start_date=startDate.strftime('%Y-%m-%d'), end_data=returns.index[-1].strftime("%Y-%m-%d")) # do the linear interpolation on the target time line date_index = prices.index new_index = benchmarkPrices.index.union(date_index) benchmarkPrices = benchmarkPrices.reindex(new_index) benchmarkPrices = benchmarkPrices.interpolate().ix[date_index].dropna() benchmarkReturns = np.log(benchmarkPrices['closePrice'] / benchmarkPrices['closePrice'].shift(1)) benchmarkReturns.name = benchmark benchmarkReturns.fillna(0, inplace=True) benchmarkReturns.index = pd.to_datetime(benchmarkReturns.index.date) elif benchmark is not None and isinstance(benchmark, pd.Series): benchmarkReturns = np.log(benchmark / benchmark.shift(1)) try: benchmarkReturns.name = benchmark.name except AttributeError: benchmarkReturns.name = "benchmark" benchmarkReturns.dropna(inplace=True) benchmarkReturns.index = pd.to_datetime(benchmarkReturns.index.date) aggregateDaily = aggregateReturns(returns) drawDownDaily = drawDown(aggregateDaily) # perf metric annualRet = annualReturn(aggregateDaily) annualVol = annualVolatility(aggregateDaily) sortino = sortinoRatio(aggregateDaily) sharp = sharpRatio(aggregateDaily) maxDrawDown = np.min(drawDownDaily['draw_down']) winningDays = np.sum(aggregateDaily > 0.) lossingDays = np.sum(aggregateDaily < 0.) perf_metric = pd.DataFrame([annualRet, annualVol, sortino, sharp, maxDrawDown, winningDays, lossingDays], index=['annual_return', 'annual_volatiltiy', 'sortino_ratio', 'sharp_ratio', 'max_draw_down', 'winning_days', 'lossing_days'], columns=['metrics']) perf_df = pd.DataFrame(index=aggregateDaily.index) perf_df['daily_return'] = aggregateDaily perf_df['daily_cum_return'] = np.exp(aggregateDaily.cumsum()) - 1.0 perf_df['daily_draw_down'] = drawDownDaily['draw_down'] if benchmarkReturns is not None: perf_df['benchmark_return'] = benchmarkReturns perf_df['benchmark_cum_return'] = benchmarkReturns.cumsum() perf_df.dropna(inplace=True) perf_df['benchmark_cum_return'] = np.exp(perf_df['benchmark_cum_return'] - perf_df['benchmark_cum_return'][0]) - 1.0 perf_df['access_return'] = aggregateDaily - benchmarkReturns perf_df['access_cum_return'] = (1.0 + perf_df['daily_cum_return']) \ / (1.0 + perf_df['benchmark_cum_return']) - 1.0 perf_df.fillna(0.0, inplace=True) accessDrawDownDaily = drawDown(perf_df['access_return']) else: accessDrawDownDaily = None if 'benchmark_cum_return' in perf_df: benchmarkCumReturns = perf_df['benchmark_cum_return'] benchmarkCumReturns.name = benchmarkReturns.name accessCumReturns = perf_df['access_cum_return'] accessReturns = perf_df['access_return'] index = perf_df.index length1 = len(bizDatesList('China.SSE', index[0], index[-1])) length2 = len(perf_df) factor = length1 / float(length2) rb = RollingBeta(perf_df['daily_return'], perf_df['benchmark_return'], [1, 3, 6], factor=factor) rs = RollingSharp(perf_df['daily_return'], [1, 3, 6], factor=factor) else: benchmarkCumReturns = None accessReturns = None accessCumReturns = None if len(perf_df['daily_return']) > APPROX_BDAYS_PER_MONTH and benchmarkCumReturns is not None: rollingRisk = pd.concat([pd.concat(rs, axis=1), pd.concat(rb, axis=1)], axis=1) else: rollingRisk = None if plot: verticalSections = 2 plt.figure(figsize=(16, 7 * verticalSections)) gs = gridspec.GridSpec(verticalSections, 3, wspace=0.5, hspace=0.5) axRollingReturns = plt.subplot(gs[0, :]) axDrawDown = plt.subplot(gs[1, :], sharex=axRollingReturns) plottingRollingReturn(perf_df['daily_cum_return'], benchmarkCumReturns, axRollingReturns) plottingDrawdownPeriods(perf_df['daily_cum_return'], drawDownDaily, 5, axDrawDown) if rollingRisk is not None: plt.figure(figsize=(16, 7 * verticalSections)) gs = gridspec.GridSpec(verticalSections, 3, wspace=0.5, hspace=0.5) axRollingBeta = plt.subplot(gs[0, :]) axRollingSharp = plt.subplot(gs[1, :]) bmName = benchmarkReturns.name plottingRollingBeta(rb, bmName, ax=axRollingBeta) plottingRollingSharp(rs, ax=axRollingSharp) plt.figure(figsize=(16, 7 * verticalSections)) gs = gridspec.GridSpec(verticalSections, 3, wspace=0.5, hspace=0.5) axUnderwater = plt.subplot(gs[0, :]) axMonthlyHeatmap = plt.subplot(gs[1, 0]) axAnnualReturns = plt.subplot(gs[1, 1]) axMonthlyDist = plt.subplot(gs[1, 2]) plottingUnderwater(drawDownDaily['draw_down'], axUnderwater) plottingMonthlyReturnsHeapmap(returns, axMonthlyHeatmap) plottingAnnualReturns(returns, axAnnualReturns) plottingMonthlyRetDist(returns, axMonthlyDist) if accessReturns is not None and plot: plt.figure(figsize=(16, 7 * verticalSections)) gs = gridspec.GridSpec(verticalSections, 3, wspace=0.5, hspace=0.5) axRollingAccessReturns = plt.subplot(gs[0, :]) axAccessDrawDown = plt.subplot(gs[1, :], sharex=axRollingAccessReturns) plottingRollingReturn(accessCumReturns, None, axRollingAccessReturns, title='Access Cumulative Returns w.r.t. ' + benchmarkReturns.name) plottingDrawdownPeriods(accessCumReturns, accessDrawDownDaily, 5, axAccessDrawDown, title=('Top 5 Drawdown periods w.r.t. ' + benchmarkReturns.name)) plt.figure(figsize=(16, 7 * verticalSections)) gs = gridspec.GridSpec(verticalSections, 3, wspace=0.5, hspace=0.5) axAccessUnderwater = plt.subplot(gs[0, :]) axAccessMonthlyHeatmap = plt.subplot(gs[1, 0]) axAccessAnnualReturns = plt.subplot(gs[1, 1]) axAccessMonthlyDist = plt.subplot(gs[1, 2]) plottingUnderwater(accessDrawDownDaily['draw_down'], axAccessUnderwater, title='Underwater Plot w.r.t. ' + benchmarkReturns.name) plottingMonthlyReturnsHeapmap(accessReturns, ax=axAccessMonthlyHeatmap, title='Monthly Access Returns (%)') plottingAnnualReturns(accessReturns, ax=axAccessAnnualReturns, title='Annual Access Returns') plottingMonthlyRetDist(accessReturns, ax=axAccessMonthlyDist, title='Distribution of Monthly Access Returns') return perf_metric, perf_df, rollingRisk