def fetch_factor_range(self, universe: Universe, factors: Union[Transformer, Iterable[object]], start_date: str = None, end_date: str = None, dates: Iterable[str] = None, external_data: pd.DataFrame = None, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) big_table = Market joined_tables = set() joined_tables.add(Market.__table__.name) for t in set(factor_cols.values()): if t.__table__.name not in joined_tables: if dates is not None: big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.in_(dates))) else: big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.between(start_date, end_date))) joined_tables.add(t.__table__.name) universe_df = universe.query(self, start_date, end_date, dates) query = select( [Market.trade_date, Market.code, Market.chgPct] + list(factor_cols.keys())) \ .select_from(big_table).where( and_( Market.code.in_(universe_df.code.unique().tolist()), Market.trade_date.in_(dates) if dates is not None else Market.trade_date.between(start_date, end_date) ) ).distinct() df = pd.read_sql(query, self.engine).replace([-np.inf, np.inf], np.nan) if external_data is not None: df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna() df.sort_values(['trade_date', 'code'], inplace=True) df.set_index('trade_date', inplace=True) res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan) res['chgPct'] = df.chgPct res = res.reset_index() return pd.merge(res, universe_df[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
def fetch_factor(self, ref_date: str, factors: Iterable[object], codes: Iterable[int], warm_start: int = 0, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) start_date = advanceDateByCalendar('china.sse', ref_date, str(-warm_start) + 'b').strftime('%Y-%m-%d') end_date = ref_date big_table = Market joined_tables = set() joined_tables.add(Market.__table__.name) for t in set(factor_cols.values()): if t.__table__.name not in joined_tables: big_table = outerjoin( big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code)) joined_tables.add(t.__table__.name) query = select( [Market.trade_date, Market.code, Market.chgPct, Market.secShortName] + list( factor_cols.keys())) \ .select_from(big_table).where(and_(Market.trade_date.between(start_date, end_date), Market.code.in_(codes))) df = pd.read_sql(query, self.engine) \ .replace([-np.inf, np.inf], np.nan) \ .sort_values(['trade_date', 'code']) \ .set_index('trade_date') res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan) res['chgPct'] = df.chgPct res['secShortName'] = df['secShortName'] res = res.loc[ref_date:ref_date, :] res.index = list(range(len(res))) return res
def fetch_factor(self, ref_date: str, factors: Iterable[object], codes: Iterable[int], warm_start: int = 0, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) start_date = advanceDateByCalendar('china.sse', ref_date, str(-warm_start) + 'b').strftime('%Y-%m-%d') end_date = ref_date big_table = FullFactor for t in set(factor_cols.values()): if t.__table__.name != FullFactor.__table__.name: big_table = outerjoin( big_table, t, and_(FullFactor.trade_date == t.trade_date, FullFactor.code == t.code)) query = select( [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \ .select_from(big_table).where(and_(FullFactor.trade_date.between(start_date, end_date), FullFactor.code.in_(codes))) df = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code' ]).set_index('trade_date') res = transformer.transform('code', df) for col in res.columns: if col not in set(['code', 'isOpen']) and col not in df.columns: df[col] = res[col].values df['isOpen'] = df.isOpen.astype(bool) df = df.loc[ref_date] df.index = list(range(len(df))) return df
def query(self, engine, start_date: str = None, end_date: str = None, dates=None) -> pd.DataFrame: universe_cond = self._query_statements(start_date, end_date, dates) if self.filter_cond is None and self.exclude_universe is None: # simple case query = select([UniverseTable.trade_date, UniverseTable.code ]).where(universe_cond).distinct() return pd.read_sql(query, engine.engine) else: if self.filter_cond is not None: if isinstance(self.filter_cond, Transformer): transformer = self.filter_cond else: transformer = Transformer(self.filter_cond) dependency = transformer.dependency factor_cols = _map_factors(dependency, factor_tables) big_table = Market for t in set(factor_cols.values()): if t.__table__.name != Market.__table__.name: big_table = outerjoin( big_table, t, and_( Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.in_(dates) if dates else Market.trade_date.between( start_date, end_date))) big_table = join( big_table, UniverseTable, and_(Market.trade_date == UniverseTable.trade_date, Market.code == UniverseTable.code, universe_cond)) query = select( [Market.trade_date, Market.code] + list(factor_cols.keys())) \ .select_from(big_table).distinct() df = pd.read_sql(query, engine.engine).sort_values( ['trade_date', 'code']).dropna() df.set_index('trade_date', inplace=True) filter_fields = transformer.names pyFinAssert( len(filter_fields) == 1, ValueError, "filter fields can only be 1") df = transformer.transform('code', df) df = df[df[filter_fields[0]] == 1].reset_index()[[ 'trade_date', 'code' ]] return df
def fetch_factor_range_forward(self, universe: Universe, factors: Union[Transformer, object], start_date: str = None, end_date: str = None, dates: Iterable[str] = None): if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency factor_cols = _map_factors(dependency, factor_tables) codes = universe.query(self, start_date, end_date, dates) total_codes = codes.code.unique().tolist() total_dates = codes.trade_date.astype(str).unique().tolist() big_table = Market joined_tables = set() joined_tables.add(Market.__table__.name) for t in set(factor_cols.values()): if t.__table__.name not in joined_tables: if dates is not None: big_table = outerjoin( big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.in_(dates))) else: big_table = outerjoin( big_table, t, and_(Market.trade_date == t.trade_date, Market.code == t.code, Market.trade_date.between(start_date, end_date))) joined_tables.add(t.__table__.name) stats = func.lag(list(factor_cols.keys())[0], -1).over(partition_by=Market.code, order_by=Market.trade_date).label('dx') query = select([Market.trade_date, Market.code, Market.chgPct, stats]).select_from(big_table).where( and_(Market.trade_date.in_(total_dates), Market.code.in_(total_codes))) df = pd.read_sql(query, self.engine) \ .replace([-np.inf, np.inf], np.nan) \ .sort_values(['trade_date', 'code']) return pd.merge(df, codes[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
def fetch_factor_range(self, universe: Universe, factors: Union[Transformer, Iterable[object]], start_date: str = None, end_date: str = None, dates: Iterable[str] = None, external_data: pd.DataFrame = None, used_factor_tables=None) -> pd.DataFrame: if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) dependency = transformer.dependency if used_factor_tables: factor_cols = _map_factors(dependency, used_factor_tables) else: factor_cols = _map_factors(dependency, factor_tables) big_table = FullFactor joined_tables = set() joined_tables.add(FullFactor.__table__.name) for t in set(factor_cols.values()): if t.__table__.name not in joined_tables: if dates is not None: big_table = outerjoin( big_table, t, and_(FullFactor.trade_date == t.trade_date, FullFactor.code == t.code, FullFactor.trade_date.in_(dates))) else: big_table = outerjoin( big_table, t, and_( FullFactor.trade_date == t.trade_date, FullFactor.code == t.code, FullFactor.trade_date.between( start_date, end_date))) joined_tables.add(t.__table__.name) universe_df = universe.query(self, start_date, end_date, dates) query = select( [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \ .select_from(big_table).where( and_( FullFactor.code.in_(universe_df.code.unique().tolist()), FullFactor.trade_date.in_(dates) if dates is not None else FullFactor.trade_date.between(start_date, end_date) ) ).distinct() df = pd.read_sql(query, self.engine) if universe.is_filtered: codes = universe.query(self, start_date, end_date, dates) df = pd.merge(df, codes, how='inner', on=['trade_date', 'code']) if external_data is not None: df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna() df.sort_values(['trade_date', 'code'], inplace=True) df.set_index('trade_date', inplace=True) res = transformer.transform('code', df) for col in res.columns: if col not in set(['code', 'isOpen']) and col not in df.columns: df[col] = res[col].values df['isOpen'] = df.isOpen.astype(bool) df = df.reset_index() return pd.merge(df, universe_df[['trade_date', 'code']], how='inner')