def test_sql_engine_fetch_risk_model(self): ref_date = self.ref_date universe = Universe('zz500') + Universe('zz1000') codes = self.engine.fetch_codes(ref_date, universe) risk_cov, risk_exp = self.engine.fetch_risk_model(ref_date, codes, risk_model='short') self.assertListEqual(risk_cov.Factor.tolist(), risk_cov.columns[2:].tolist()) query = select([RiskCovShort ]).where(RiskCovShort.trade_date == ref_date) cov_df = pd.read_sql(query, con=self.engine.engine).sort_values('FactorID') factors = cov_df.Factor.tolist() np.testing.assert_array_almost_equal(risk_cov[factors].values, cov_df[factors].values) query = select([RiskExposure]).where( and_(RiskExposure.trade_date == ref_date, RiskExposure.code.in_(codes))) exp_df = pd.read_sql(query, con=self.engine.engine) np.testing.assert_array_almost_equal(exp_df[factors].values, risk_exp[factors].values)
def fetch_industry_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, category: str = 'sw', level: int = 1): industry_category_name = _map_industry_category(category) cond = universe._query_statements(start_date, end_date, dates) big_table = join( Industry, UniverseTable, and_(Industry.trade_date == UniverseTable.trade_date, Industry.code == UniverseTable.code, Industry.industry == industry_category_name, cond)) code_name = 'industryID' + str(level) category_name = 'industryName' + str(level) query = select([ Industry.trade_date, Industry.code, getattr(Industry, code_name).label('industry_code'), getattr(Industry, category_name).label('industry') ]).select_from(big_table).distinct() df = pd.read_sql(query, self.engine).dropna() if universe.is_filtered: codes = universe.query(self, start_date, end_date, dates) df = pd.merge(df, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code']) return df.drop_duplicates(['trade_date', 'code'])
def test_sql_engine_fetch_dx_return_range(self): ref_dates = makeSchedule( advanceDateByCalendar('china.sse', self.ref_date, '-6m'), self.ref_date, '60b', 'china.sse') universe = Universe('zz500') + Universe('zz1000') dx_return = self.engine.fetch_dx_return_range(universe, dates=ref_dates, horizon=4, offset=1) codes = self.engine.fetch_codes_range(universe, dates=ref_dates) groups = codes.groupby('trade_date') for ref_date, g in groups: start_date = advanceDateByCalendar('china.sse', ref_date, '2b') end_date = advanceDateByCalendar('china.sse', ref_date, '6b') query = select([Market.code, Market.chgPct]).where( and_(Market.trade_date.between(start_date, end_date), Market.code.in_(g.code.unique().tolist()))) df = pd.read_sql(query, con=self.engine.engine) res = df.groupby('code').apply(lambda x: np.log(1. + x).sum()) calculated_return = dx_return[dx_return.trade_date == ref_date] np.testing.assert_array_almost_equal(calculated_return.dx.values, res.chgPct.values)
def test_sql_engine_fetch_factor_range_forward(self): ref_dates = makeSchedule( advanceDateByCalendar('china.sse', self.ref_date, '-6m'), self.ref_date, '60b', 'china.sse') ref_dates = ref_dates + [ advanceDateByCalendar('china.sse', ref_dates[-1], '60b').strftime('%Y-%m-%d') ] universe = Universe('zz500') + Universe('zz1000') factor = 'ROE' factor_data = self.engine.fetch_factor_range_forward(universe, factor, dates=ref_dates) codes = self.engine.fetch_codes_range(universe, dates=ref_dates[:-1]) groups = codes.groupby('trade_date') for ref_date, g in groups: forward_ref_date = advanceDateByCalendar( 'china.sse', ref_date, '60b').strftime('%Y-%m-%d') query = select([Uqer.code, Uqer.ROE]).where( and_(Uqer.trade_date == forward_ref_date, Uqer.code.in_(g.code.unique().tolist()))) df = pd.read_sql(query, con=self.engine.engine) calculated_factor = factor_data[factor_data.trade_date == ref_date] calculated_factor.set_index('code', inplace=True) calculated_factor = calculated_factor.loc[df.code] np.testing.assert_array_almost_equal(calculated_factor.dx.values, df.ROE.values)
def test_universe_equal(self): universe1 = Universe('zz500') universe2 = Universe('zz500') self.assertEqual(universe1, universe2) universe1 = Universe('zz500') universe2 = Universe('zz800') self.assertNotEqual(universe1, universe2)
def fetch_risk_model_range( self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, risk_model: str = 'short', excluded: Iterable[str] = None ) -> Tuple[pd.DataFrame, pd.DataFrame]: risk_cov_table, special_risk_col = _map_risk_model_table(risk_model) cov_risk_cols = [ risk_cov_table.__table__.columns[f] for f in total_risk_factors ] cond = risk_cov_table.trade_date.in_( dates) if dates else risk_cov_table.trade_date.between( start_date, end_date) query = select([ risk_cov_table.trade_date, risk_cov_table.FactorID, risk_cov_table.Factor ] + cov_risk_cols).where(cond) risk_cov = pd.read_sql(query, self.engine).sort_values( ['trade_date', 'FactorID']) if not excluded: excluded = [] risk_exposure_cols = [ FullFactor.__table__.columns[f] for f in total_risk_factors if f not in set(excluded) ] cond = universe._query_statements(start_date, end_date, dates) big_table = join( FullFactor, UniverseTable, and_(FullFactor.trade_date == UniverseTable.trade_date, FullFactor.code == UniverseTable.code, cond)) query = select( [FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols).select_from(big_table) \ .distinct() risk_exp = pd.read_sql(query, self.engine) if universe.is_filtered: codes = universe.query(self, start_date, end_date, dates) risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code' ]).sort_values(['trade_date', 'code']) return risk_cov, risk_exp
def test_sdl_engine_fetch_codes_with_exclude_universe(self): ref_date = self.ref_date universe = Universe('zz500') - Universe('cyb') codes = self.engine.fetch_codes(ref_date, universe) query = select([UniverseTable.code]).where( and_(UniverseTable.trade_date == ref_date, UniverseTable.zz500 == 1, UniverseTable.cyb == 0)) df = pd.read_sql(query, con=self.engine.engine).sort_values('code') self.assertListEqual(codes, list(df.code.values))
def test_sql_engine_fetch_factor(self): ref_date = self.ref_date universe = Universe('zz500') + Universe('zz1000') codes = self.engine.fetch_codes(ref_date, universe) factor = 'ROE' factor_data = self.engine.fetch_factor(ref_date, factor, codes) query = select([Uqer.code, Uqer.ROE]).where( and_(Uqer.trade_date == ref_date, Uqer.code.in_(codes))) df = pd.read_sql(query, con=self.engine.engine).sort_values('code') np.testing.assert_array_almost_equal(factor_data.ROE.values, df.ROE.values)
def fetch_industry_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, category: str = 'sw', level: int = 1): code_name = 'industry_code' + str(level) category_name = 'industry_name' + str(level) cond = universe._query_statements(start_date, end_date, dates) query = select([Industry.code.label("code"), Industry.trade_date, getattr(Industry, code_name).label('industry_code'), getattr(Industry, category_name).label('industry')]).where( and_( *cond, Industry.code == UniverseTable.code, Industry.trade_date == UniverseTable.trade_date, Industry.flag == 1 ) ).distinct() df = pd.read_sql(query, self.session.bind) df["trade_date"] = pd.to_datetime(df["trade_date"]) return df
def test_composer_persistence(self): freq = '5b' universe = Universe('zz800') batch = 4 neutralized_risk = ['SIZE'] risk_model = 'long' pre_process = ['standardize', 'winsorize_normal'] post_process = ['standardize', 'winsorize_normal'] warm_start = 2 data_source = 'postgresql://*****:*****@server/dummy' data_meta = DataMeta(freq=freq, universe=universe, batch=batch, neutralized_risk=neutralized_risk, risk_model=risk_model, pre_process=pre_process, post_process=post_process, warm_start=warm_start, data_source=data_source) features = {'f1': 'closePrice', 'f2': 'openPrice'} alpha_model = XGBClassifier(features=features) composer = Composer(alpha_model=alpha_model, data_meta=data_meta) comp_desc = composer.save() loaded_comp = Composer.load(comp_desc) self._assert_composer_equal(composer, loaded_comp)
def test_data_meta_persistence(self): freq = '5b' universe = Universe('zz800') batch = 4 neutralized_risk = ['SIZE'] risk_model = 'long' pre_process = ['standardize', 'winsorize_normal'] post_process = ['standardize', 'winsorize_normal'] warm_start = 2 data_source = 'postgresql://*****:*****@server/dummy' data_meta = DataMeta(freq=freq, universe=universe, batch=batch, neutralized_risk=neutralized_risk, risk_model=risk_model, pre_process=pre_process, post_process=post_process, warm_start=warm_start, data_source=data_source) data_desc = data_meta.save() loaded_data = DataMeta.load(data_desc) self.assertEqual(data_meta.freq, loaded_data.freq) self.assertEqual(data_meta.universe, loaded_data.universe) self.assertEqual(data_meta.batch, loaded_data.batch) self.assertEqual(data_meta.neutralized_risk, loaded_data.neutralized_risk) self.assertEqual(data_meta.risk_model, loaded_data.risk_model) self.assertEqual(data_meta.pre_process, loaded_data.pre_process) self.assertEqual(data_meta.post_process, loaded_data.post_process) self.assertEqual(data_meta.warm_start, loaded_data.warm_start) self.assertEqual(data_meta.data_source, loaded_data.data_source)
def fetch_industry_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, category: str = 'sw'): industry_category_name = _map_industry_category(category) cond = universe.query_range(start_date, end_date, dates) if dates: big_table = join( Industry, UniverseTable, and_(Industry.trade_date == UniverseTable.trade_date, Industry.code == UniverseTable.code, Industry.industry == industry_category_name, Industry.trade_date.in_(dates), cond)) else: big_table = join( Industry, UniverseTable, and_(Industry.trade_date == UniverseTable.trade_date, Industry.code == UniverseTable.code, Industry.industry == industry_category_name, Industry.trade_date.between(start_date, end_date), cond)) query = select([ Industry.trade_date, Industry.code, Industry.industryID1.label('industry_code'), Industry.industryName1.label('industry') ]).select_from(big_table).distinct() return pd.read_sql(query, self.engine)
def fetch_codes(self, ref_date: str, universe: Universe) -> List[int]: cond = universe.query(ref_date) query = select([UniverseTable.trade_date, UniverseTable.code]).distinct().where(cond) cursor = self.engine.execute(query) codes_set = {c[1] for c in cursor.fetchall()} return sorted(codes_set)
def fetch_industry_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, category: str = 'sw', level: int = 1): industry_category_name = _map_industry_category(category) cond = universe._query_statements(start_date, end_date, dates) big_table = join(Industry, UniverseTable, and_( Industry.trade_date == UniverseTable.trade_date, Industry.code == UniverseTable.code, Industry.industry == industry_category_name, cond )) code_name = 'industryID' + str(level) category_name = 'industryName' + str(level) query = select([Industry.trade_date, Industry.code, getattr(Industry, code_name).label('industry_code'), getattr(Industry, category_name).label('industry')]).select_from(big_table)\ .order_by(Industry.trade_date, Industry.code) return pd.read_sql(query, self.engine).dropna()
def fetch_factor_range(self, universe: Universe, factors: Union[Transformer, Iterable[object]], start_date: str = None, end_date: str = None, dates: Iterable[str] = None, external_data: pd.DataFrame = None, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) big_table = Market joined_tables = set() joined_tables.add(Market.__table__.name) for t in set(factor_cols.values()): if t.__table__.name not in joined_tables: if dates is not None: big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.in_(dates))) else: big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.between(start_date, end_date))) joined_tables.add(t.__table__.name) universe_df = universe.query(self, start_date, end_date, dates) query = select( [Market.trade_date, Market.code, Market.chgPct] + list(factor_cols.keys())) \ .select_from(big_table).where( and_( Market.code.in_(universe_df.code.unique().tolist()), Market.trade_date.in_(dates) if dates is not None else Market.trade_date.between(start_date, end_date) ) ).distinct() df = pd.read_sql(query, self.engine).replace([-np.inf, np.inf], np.nan) if external_data is not None: df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna() df.sort_values(['trade_date', 'code'], inplace=True) df.set_index('trade_date', inplace=True) res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan) res['chgPct'] = df.chgPct res = res.reset_index() return pd.merge(res, universe_df[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
def fetch_trade_status_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, offset=0): codes = universe.query(self, start_date, end_date, dates) if dates: start_date = dates[0] end_date = dates[-1] end_date = advanceDateByCalendar('china.sse', end_date, str(offset) + 'b').strftime('%Y-%m-%d') stats = func.lead(Market.isOpen, offset).over( partition_by=Market.code, order_by=Market.trade_date).label('is_open') cte = select([Market.trade_date, Market.code, stats]).where( and_( Market.trade_date.between(start_date, end_date), Market.code.in_(codes.code.unique().tolist()) ) ).cte('cte') query = select([cte]).select_from(cte).order_by(cte.columns['trade_date'], cte.columns['code']) df = pd.read_sql(query, self.engine) return pd.merge(df, codes[['trade_date', 'code']], on=['trade_date', 'code'])
def test_sql_engine_fetch_industry_matrix(self): ref_date = self.ref_date universe = Universe('zz500') + Universe('zz1000') codes = self.engine.fetch_codes(ref_date, universe) ind_matrix = self.engine.fetch_industry_matrix(ref_date, codes, 'sw', 1) query = select([Industry.code, Industry.industryName1]).where( and_(Industry.trade_date == ref_date, Industry.industry == '申万行业分类', Industry.code.in_(codes))) df = pd.read_sql(query, con=self.engine.engine) df = pd.get_dummies(df, prefix="", prefix_sep="") self.assertEqual(len(ind_matrix), len(df)) np.testing.assert_array_almost_equal(df[ind_matrix.columns[2:]].values, ind_matrix.iloc[:, 2:].values)
def fetch_codes_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None) -> pd.DataFrame: cond = universe.query_range(start_date, end_date, dates) query = select([UniverseTable.trade_date, UniverseTable.code]).distinct().where(cond) return pd.read_sql(query, self.engine)
def test_sql_engine_fetch_dx_return(self): horizon = 4 offset = 1 ref_date = self.ref_date universe = Universe('zz500') + Universe('zz1000') codes = self.engine.fetch_codes(ref_date, universe) dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset) start_date = advanceDateByCalendar('china.sse', ref_date, '2b') end_date = advanceDateByCalendar('china.sse', ref_date, '6b') query = select([Market.code, Market.chgPct]).where( and_(Market.trade_date.between(start_date, end_date), Market.code.in_(dx_return.code.unique().tolist()))) df = pd.read_sql(query, con=self.engine.engine) res = df.groupby('code').apply(lambda x: np.log(1. + x).sum()) np.testing.assert_array_almost_equal(dx_return.dx.values, res.chgPct.values) horizon = 4 offset = 0 ref_date = self.ref_date universe = Universe('zz500') + Universe('zz1000') codes = self.engine.fetch_codes(ref_date, universe) dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset) start_date = advanceDateByCalendar('china.sse', ref_date, '1b') end_date = advanceDateByCalendar('china.sse', ref_date, '5b') query = select([Market.code, Market.chgPct]).where( and_(Market.trade_date.between(start_date, end_date), Market.code.in_(dx_return.code.unique().tolist()))) df = pd.read_sql(query, con=self.engine.engine) res = df.groupby('code').apply(lambda x: np.log(1. + x).sum()) np.testing.assert_array_almost_equal(dx_return.dx.values, res.chgPct.values)
def test_sql_engine_fetch_codes_range(self): ref_dates = makeSchedule( advanceDateByCalendar('china.sse', self.ref_date, '-6m'), self.ref_date, '60b', 'china.sse') universe = Universe('zz500') + Universe('zz1000') codes = self.engine.fetch_codes_range(universe, dates=ref_dates) query = select([UniverseTable.trade_date, UniverseTable.code]).where( and_(UniverseTable.trade_date.in_(ref_dates), or_(UniverseTable.zz500 == 1, UniverseTable.zz1000 == 1))) df = pd.read_sql(query, con=self.engine.engine).sort_values('code') for ref_date in ref_dates: calculated_codes = list( sorted(codes[codes.trade_date == ref_date].code.values)) expected_codes = list( sorted(df[df.trade_date == ref_date].code.values)) self.assertListEqual(calculated_codes, expected_codes)
def fetch_risk_model_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, risk_model: str = 'short', excluded: Iterable[str] = None) -> Tuple[pd.DataFrame, pd.DataFrame]: risk_cov_table, special_risk_table = _map_risk_model_table(risk_model) cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors] cond = risk_cov_table.trade_date.in_(dates) if dates else risk_cov_table.trade_date.between(start_date, end_date) query = select([risk_cov_table.trade_date, risk_cov_table.FactorID, risk_cov_table.Factor] + cov_risk_cols).where( cond ) risk_cov = pd.read_sql(query, self.engine).sort_values(['trade_date', 'FactorID']) if not excluded: excluded = [] risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded)] cond = universe._query_statements(start_date, end_date, dates) big_table = join(RiskExposure, UniverseTable, and_( RiskExposure.trade_date == UniverseTable.trade_date, RiskExposure.code == UniverseTable.code, cond ) ) big_table = join(special_risk_table, big_table, and_( RiskExposure.code == special_risk_table.code, RiskExposure.trade_date == special_risk_table.trade_date, )) query = select( [RiskExposure.trade_date, RiskExposure.code, special_risk_table.SRISK.label('srisk')] + risk_exposure_cols).select_from(big_table) \ .distinct() risk_exp = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code']).dropna() return risk_cov, risk_exp
def test_sql_engine_fetch_codes(self): ref_date = self.ref_date universe = Universe('custom', ['zz500', 'zz1000']) codes = self.engine.fetch_codes(ref_date, universe) query = select([UniverseTable.code]).where( and_(UniverseTable.trade_date == ref_date, UniverseTable.universe.in_(['zz500', 'zz1000']))).distinct() df = pd.read_sql(query, con=self.engine.engine).sort_values('code') self.assertListEqual(codes, list(df.code.values))
def fetch_factor_range_forward(self, universe: Universe, factors: Union[Transformer, object], start_date: str = None, end_date: str = None, dates: Iterable[str] = None): if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency factor_cols = _map_factors(dependency, factor_tables) codes = universe.query(self, start_date, end_date, dates) total_codes = codes.code.unique().tolist() total_dates = codes.trade_date.astype(str).unique().tolist() big_table = Market joined_tables = set() joined_tables.add(Market.__table__.name) for t in set(factor_cols.values()): if t.__table__.name not in joined_tables: if dates is not None: big_table = outerjoin( big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.in_(dates))) else: big_table = outerjoin( big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.between(start_date, end_date))) joined_tables.add(t.__table__.name) stats = func.lag(list(factor_cols.keys())[0], -1).over(partition_by=Market.code, order_by=Market.trade_date).label('dx') query = select([Market.trade_date, Market.code, Market.chgPct, stats]).select_from(big_table).where( and_(Market.trade_date.in_(total_dates), Market.code.in_(total_codes))) df = pd.read_sql(query, self.engine) \ .replace([-np.inf, np.inf], np.nan) \ .sort_values(['trade_date', 'code']) return pd.merge(df, codes[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
def test_sql_engine_fetch_factor_by_categories(self): ref_date = self.ref_date universe = Universe('zz500') + Universe('zz1000') codes = self.engine.fetch_codes(ref_date, universe) factor1 = {'f': CSRank('ROE', groups='sw1')} factor2 = {'f': CSQuantiles('ROE', groups='sw1')} raw_factor = 'ROE' df1 = self.engine.fetch_factor(ref_date, factor1, codes) df2 = self.engine.fetch_factor(ref_date, factor2, codes) df3 = self.engine.fetch_factor(ref_date, raw_factor, codes) ind_matrix = self.engine.fetch_industry_matrix(ref_date, codes, 'sw', 1) cols = sorted(ind_matrix.columns[2:].tolist()) series = (ind_matrix[cols] * np.array(range(1, len(cols) + 1))).sum(axis=1) df3['cat'] = series.values expected_rank = df3[[ 'ROE', 'cat' ]].groupby('cat').transform(lambda x: rankdata(x.values) - 1.) expected_rank[np.isnan(df3.ROE)] = np.nan df3['rank'] = expected_rank['ROE'].values np.testing.assert_array_almost_equal(df3['rank'].values, df1['f'].values) expected_quantile = df3[[ 'ROE', 'cat' ]].groupby('cat').transform(lambda x: (rankdata(x.values) - 1.) / ((len(x) - 1.) if len(x) > 1 else 1.)) expected_quantile[np.isnan(df3.ROE)] = np.nan df3['quantile'] = expected_quantile['ROE'].values np.testing.assert_array_almost_equal(df3['quantile'].values, df2['f'].values)
def test_sql_engine_fetch_factor_range(self): ref_dates = makeSchedule( advanceDateByCalendar('china.sse', self.ref_date, '-6m'), self.ref_date, '60b', 'china.sse') universe = Universe('zz500') + Universe('zz1000') factor = 'ROE' factor_data = self.engine.fetch_factor_range(universe, factor, dates=ref_dates) codes = self.engine.fetch_codes_range(universe, dates=ref_dates) groups = codes.groupby('trade_date') for ref_date, g in groups: query = select([Uqer.code, Uqer.ROE]).where( and_(Uqer.trade_date == ref_date, Uqer.code.in_(g.code.unique().tolist()))) df = pd.read_sql(query, con=self.engine.engine) calculated_factor = factor_data[factor_data.trade_date == ref_date] np.testing.assert_array_almost_equal(calculated_factor.ROE.values, df.ROE.values)
def test_universe_equal(self): universe1 = Universe('custom', ['zz500']) universe2 = Universe('custom', ['zz500']) self.assertEqual(universe1, universe2) universe1 = Universe('custom', ['zz500']) universe2 = Universe('custom', ['zz800']) self.assertNotEqual(universe1, universe2) filter_cond = LAST('x') > 1. universe1 = Universe('custom', ['zz500'], filter_cond=filter_cond) universe2 = Universe('custom', ['zz500'], filter_cond=filter_cond) self.assertEqual(universe1, universe2) universe1 = Universe('custom', ['zz500'], filter_cond=LAST('x') > 1.) universe2 = Universe('custom', ['zz500'], filter_cond=LAST('x') > 2.) self.assertNotEqual(universe1, universe2)
def test_universe_persistence(self): universe = Universe('custom', ['zz500']) univ_desc = universe.save() loaded_universe = Universe.load(univ_desc) self.assertEqual(universe.name, loaded_universe.name) self.assertListEqual(universe.base_universe, loaded_universe.base_universe) universe = Universe('custom', ['zz500'], filter_cond=LAST('x') > 1.) univ_desc = universe.save() loaded_universe = Universe.load(univ_desc) self.assertEqual(universe.name, loaded_universe.name) self.assertListEqual(universe.base_universe, loaded_universe.base_universe) self.assertEqual(str(universe.filter_cond), str(loaded_universe.filter_cond))
def load(cls, data_desc: dict): freq = data_desc['freq'] universe = Universe.load(data_desc['universe']) batch = data_desc['batch'] neutralized_risk = data_desc['neutralized_risk'] risk_model = data_desc['risk_model'] pre_process = data_desc['pre_process'] post_process = data_desc['post_process'] warm_start = data_desc['warm_start'] data_source = data_desc['data_source'] return cls(freq=freq, universe=universe, batch=batch, neutralized_risk=neutralized_risk, risk_model=risk_model, pre_process=pre_process, post_process=post_process, warm_start=warm_start, data_source=data_source)
def fetch_codes_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None) -> pd.DataFrame: return universe.query(self, start_date, end_date, dates)
def fetch_factor_range(self, universe: Universe, factors: Union[Transformer, Iterable[object]], start_date: str = None, end_date: str = None, dates: Iterable[str] = None, external_data: pd.DataFrame = None, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) cond = universe.query_range(start_date, end_date, dates) big_table = FullFactor for t in set(factor_cols.values()): if t.__table__.name != FullFactor.__table__.name: if dates is not None: big_table = outerjoin( big_table, t, and_(FullFactor.trade_date == t.trade_date, FullFactor.code == t.code, FullFactor.trade_date.in_(dates))) else: big_table = outerjoin( big_table, t, and_( FullFactor.trade_date == t.trade_date, FullFactor.code == t.code, FullFactor.trade_date.between( start_date, end_date))) big_table = join( big_table, UniverseTable, and_(FullFactor.trade_date == UniverseTable.trade_date, FullFactor.code == UniverseTable.code, cond)) query = select( [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \ .select_from(big_table).distinct() df = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code']) if external_data is not None: df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna() df.set_index('trade_date', inplace=True) res = transformer.transform('code', df) for col in res.columns: if col not in set(['code', 'isOpen']) and col not in df.columns: df[col] = res[col].values df['isOpen'] = df.isOpen.astype(bool) return df.reset_index()