def prepare_data(engine: SqlEngine, factors: Union[Transformer, Iterable[object]], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0): if warm_start > 0: p = Period(frequency) p = Period(length=-warm_start * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', start_date, p).strftime('%Y-%m-%d') dates = makeSchedule(start_date, end_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Forward) dates = [d.strftime('%Y-%m-%d') for d in dates] horizon = map_freq(frequency) if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).sort_values( ['trade_date', 'code']) alpha_logger.info("factor data loading finished") return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) alpha_logger.info("return data loading finished") industry_df = engine.fetch_industry_range(universe, dates=dates) alpha_logger.info("industry data loading finished") benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates) alpha_logger.info("benchmark data loading finished") df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna() df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left') df = pd.merge(df, industry_df, on=['trade_date', 'code']) df['weight'] = df['weight'].fillna(0.) return dates, df[['trade_date', 'code', 'dx']], df[[ 'trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry' ] + transformer.names]
def prepare_data(engine: SqlEngine, factors: Union[Transformer, Iterable[object]], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0, fit_target: Union[Transformer, object] = None): if warm_start > 0: p = Period(frequency) p = Period(length=-warm_start * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', start_date, p).strftime('%Y-%m-%d') dates = makeSchedule(start_date, end_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Forward) dates = [d.strftime('%Y-%m-%d') for d in dates] horizon = map_freq(frequency) if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).sort_values( ['trade_date', 'code']) alpha_logger.info("factor data loading finished") if fit_target is None: target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) else: one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df.groupby('code').apply( lambda x: x.fillna(method='pad')) alpha_logger.info("fit target data loading finished") industry_df = engine.fetch_industry_range(universe, dates=dates) alpha_logger.info("industry data loading finished") benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates) alpha_logger.info("benchmark data loading finished") df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna() df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left') df = pd.merge(df, industry_df, on=['trade_date', 'code']) df['weight'] = df['weight'].fillna(0.) df.dropna(inplace=True) return dates, df[[ 'trade_date', 'code', 'dx' ]], df[['trade_date', 'code', 'weight', 'industry_code', 'industry'] + transformer.names]