def fetch_industry_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, category: str = 'sw'): industry_category_name = _map_industry_category(category) cond = universe.query_range(start_date, end_date, dates) if dates: big_table = join( Industry, UniverseTable, and_(Industry.trade_date == UniverseTable.trade_date, Industry.code == UniverseTable.code, Industry.industry == industry_category_name, Industry.trade_date.in_(dates), cond)) else: big_table = join( Industry, UniverseTable, and_(Industry.trade_date == UniverseTable.trade_date, Industry.code == UniverseTable.code, Industry.industry == industry_category_name, Industry.trade_date.between(start_date, end_date), cond)) query = select([ Industry.trade_date, Industry.code, Industry.industryID1.label('industry_code'), Industry.industryName1.label('industry') ]).select_from(big_table).distinct() return pd.read_sql(query, self.engine)
def fetch_codes_range(self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None) -> pd.DataFrame: cond = universe.query_range(start_date, end_date, dates) query = select([UniverseTable.trade_date, UniverseTable.code]).distinct().where(cond) return pd.read_sql(query, self.engine)
def fetch_risk_model_range( self, universe: Universe, start_date: str = None, end_date: str = None, dates: Iterable[str] = None, risk_model: str = 'short', excluded: Iterable[str] = None ) -> Tuple[pd.DataFrame, pd.DataFrame]: risk_cov_table, special_risk_col = _map_risk_model_table(risk_model) cov_risk_cols = [ risk_cov_table.__table__.columns[f] for f in total_risk_factors ] cond = risk_cov_table.trade_date.in_( dates) if dates else risk_cov_table.trade_date.between( start_date, end_date) query = select([ risk_cov_table.trade_date, risk_cov_table.FactorID, risk_cov_table.Factor ] + cov_risk_cols).where(cond) risk_cov = pd.read_sql(query, self.engine).sort_values( ['trade_date', 'FactorID']) if not excluded: excluded = [] risk_exposure_cols = [ FullFactor.__table__.columns[f] for f in total_risk_factors if f not in set(excluded) ] cond = universe.query_range(start_date, end_date, dates) big_table = join( FullFactor, UniverseTable, and_(FullFactor.trade_date == UniverseTable.trade_date, FullFactor.code == UniverseTable.code, cond)) query = select( [FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols) \ .select_from(big_table).distinct() risk_exp = pd.read_sql(query, self.engine) return risk_cov, risk_exp
def fetch_factor_range(self, universe: Universe, factors: Union[Transformer, Iterable[object]], start_date: str = None, end_date: str = None, dates: Iterable[str] = None, external_data: pd.DataFrame = None, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) cond = universe.query_range(start_date, end_date, dates) big_table = FullFactor for t in set(factor_cols.values()): if t.__table__.name != FullFactor.__table__.name: if dates is not None: big_table = outerjoin( big_table, t, and_(FullFactor.trade_date == t.trade_date, FullFactor.code == t.code, FullFactor.trade_date.in_(dates))) else: big_table = outerjoin( big_table, t, and_( FullFactor.trade_date == t.trade_date, FullFactor.code == t.code, FullFactor.trade_date.between( start_date, end_date))) big_table = join( big_table, UniverseTable, and_(FullFactor.trade_date == UniverseTable.trade_date, FullFactor.code == UniverseTable.code, cond)) query = select( [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \ .select_from(big_table).distinct() df = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code']) if external_data is not None: df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna() df.set_index('trade_date', inplace=True) res = transformer.transform('code', df) for col in res.columns: if col not in set(['code', 'isOpen']) and col not in df.columns: df[col] = res[col].values df['isOpen'] = df.isOpen.astype(bool) return df.reset_index()