def test_fit_execute_multi_target(self, data_loader): f1, target, model = self._create_base_components() target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1) pipeline = BasePipeline(feature=f1, target=[target, target1], model=model, metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 df = pipeline.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert df['y_0'].mean() > 0 assert df['y_1'].mean() > 0 assert (df['y_0'] == df['y_1']).min() == False pipeline = BasePipeline(feature=f1, target=[target, target], model=model, metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 df = pipeline.execute(data_loader, tickers) assert (df['y_0'] == df['y_1']).min() == True
def test_execute_simple(self, data_loader): columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf'] f1 = QuarterlyFeatures(columns=columns, quarter_counts=[2, 10], max_back_quarter=1) target1 = QuarterlyTarget(col='marketcap', quarter_shift=0) target2 = QuarterlyTarget(col='marketcap', quarter_shift=-1) model = lgbm.sklearn.LGBMRegressor() pipeline1 = BasePipeline(feature=f1, target=target1, model=model, metric=median_absolute_relative_error, out_name='p1') pipeline2 = BasePipeline(feature=f1, target=target2, model=model, metric=median_absolute_relative_error, out_name='p2') pipeline3 = QuarterlyLoadPipeline(['ticker', 'date', 'marketcap']) pipeline1.fit(data_loader, tickers) pipeline2.fit(data_loader, tickers) merge1 = ExecuteMergePipeline( pipeline_list=[pipeline1, pipeline2, pipeline3], on=['ticker', 'date']) df1 = pipeline1.execute(data_loader, tickers) df2 = pipeline2.execute(data_loader, tickers) df3 = pipeline3.execute(data_loader, tickers) df = merge1.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert len(df) == len(df1) np.testing.assert_array_equal( df.columns, ['ticker', 'date', 'p1', 'p2', 'marketcap']) np.testing.assert_array_equal(df1['p1'], df['p1']) np.testing.assert_array_equal(df2['p2'], df['p2'])
def test_fit_execute_simple(self, data_loader): f1, target, model = self._create_base_components() pipeline = BasePipeline(feature=f1, target=target, model=model, metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 df = pipeline.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert df['y_0'].mean() > 0
def test_export_load(self, data_loader, tmpdir): f1, target, model = self._create_base_components() pipeline = BasePipeline(feature=f1, target=target, model=model, metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) df = pipeline.execute(data_loader, tickers) pipeline.export_core('{}/pipeline'.format(str(tmpdir))) pipeline = BasePipeline.load('{}/pipeline.pickle'.format(str(tmpdir))) df1 = pipeline.execute(data_loader, tickers[:100]) np.testing.assert_array_equal(df['y_0'].values, df1['y_0'].values)
def test_fit_execute_multi_names(self, data_loader): f1, target, model = self._create_base_components() pipeline = BasePipeline(feature=f1, target=[target, target], model=model, metric=median_absolute_relative_error, out_name=['name1', 'name2']) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_name1'] > 0 assert res['metric_name2'] > 0 df = pipeline.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert df['name1'].mean() > 0 assert df['name2'].mean() > 0 assert (df['name1'] == df['name2']).min() == True
def test_fit_execute_multi_target_metric(self, data_loader): f1, target, model = self._create_base_components() target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1) pipeline = BasePipeline(feature=f1, target=[target, target1], model=model, metric=[ median_absolute_relative_error, mean_absolute_relative_error ], out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 assert res['metric_y_0'] < res['metric_y_1']
def test_fit_execute_multi_target_model(self, data_loader): f1, target, model = self._create_base_components() target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1) model1 = GroupedOOFModel(ctb.CatBoostRegressor(verbose=False), group_column='ticker', fold_cnt=4) pipeline = BasePipeline(feature=f1, target=[target, target], model=[model, model1], metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 df = pipeline.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert df['y_0'].mean() > 0 assert df['y_1'].mean() > 0 assert (df['y_0'] == df['y_1']).min() == False
agg_day_counts=pipeline_config['agg_day_counts'], max_back_quarter=pipeline_config['max_back_quarter']) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) feature = FeatureMerger(feature, fc4, on=['ticker', 'date']) target = DailyAggTarget(col='marketcap', horizon=pipeline_config['target_horizon'], foo=down_std_norm) base_models = [ lgbm.sklearn.LGBMRegressor(), ctb.CatBoostRegressor(verbose=False) ] ansamble = AnsambleModel(base_models=base_models, bagging_fraction=0.7, model_cnt=20) model = TimeSeriesOOFModel(ansamble, time_column='date', fold_cnt=20) pipeline = BasePipeline(feature=feature, target=target, model=model, metric=median_absolute_relative_error) result = pipeline.fit(data_loader, ticker_list) print(result) pipeline.export_core('models_data/marketcap_down_std')
fc2 = BaseCompanyFeatures(cat_columns=pipeline_config['cat_columns']) fc3 = QuarterlyDiffFeatures( columns=pipeline_config['quarter_columns'], compare_quarter_idxs=pipeline_config['compare_quarter_idxs'], max_back_quarter=pipeline_config['max_back_quarter']) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) target = QuarterlyDiffTarget(col='marketcap') base_models = [ lgbm.sklearn.LGBMRegressor(), ctb.CatBoostRegressor(verbose=False) ] ansamble = AnsambleModel(base_models=base_models, bagging_fraction=0.7, model_cnt=20) model = GroupedOOFModel(ansamble, group_column='ticker', fold_cnt=5) pipeline = BasePipeline(feature=feature, target=target, model=model, metric=median_absolute_relative_error) pipeline.fit(data_loader, ticker_list) pipeline.export_core('models_data/marketcap_diff')