def test_fit_execute_multi_target(self, data_loader): f1, target, model = self._create_base_components() target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1) pipeline = BasePipeline(feature=f1, target=[target, target1], model=model, metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 df = pipeline.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert df['y_0'].mean() > 0 assert df['y_1'].mean() > 0 assert (df['y_0'] == df['y_1']).min() == False pipeline = BasePipeline(feature=f1, target=[target, target], model=model, metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 df = pipeline.execute(data_loader, tickers) assert (df['y_0'] == df['y_1']).min() == True
def test_fit_execute_simple(self, data_loader): f1, target, model = self._create_base_components() pipeline = BasePipeline(feature=f1, target=target, model=model, metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 df = pipeline.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert df['y_0'].mean() > 0
def test_fit_execute_multi_names(self, data_loader): f1, target, model = self._create_base_components() pipeline = BasePipeline(feature=f1, target=[target, target], model=model, metric=median_absolute_relative_error, out_name=['name1', 'name2']) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_name1'] > 0 assert res['metric_name2'] > 0 df = pipeline.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert df['name1'].mean() > 0 assert df['name2'].mean() > 0 assert (df['name1'] == df['name2']).min() == True
def test_fit_execute_multi_target_metric(self, data_loader): f1, target, model = self._create_base_components() target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1) pipeline = BasePipeline(feature=f1, target=[target, target1], model=model, metric=[ median_absolute_relative_error, mean_absolute_relative_error ], out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 assert res['metric_y_0'] < res['metric_y_1']
def test_execute_simple(self, data_loader): columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf'] f1 = QuarterlyFeatures(columns=columns, quarter_counts=[2, 10], max_back_quarter=1) target1 = QuarterlyTarget(col='marketcap', quarter_shift=0) target2 = QuarterlyTarget(col='marketcap', quarter_shift=-1) model = lgbm.sklearn.LGBMRegressor() pipeline1 = BasePipeline(feature=f1, target=target1, model=model, metric=median_absolute_relative_error, out_name='p1') pipeline2 = BasePipeline(feature=f1, target=target2, model=model, metric=median_absolute_relative_error, out_name='p2') pipeline3 = QuarterlyLoadPipeline(['ticker', 'date', 'marketcap']) pipeline1.fit(data_loader, tickers) pipeline2.fit(data_loader, tickers) merge1 = ExecuteMergePipeline( pipeline_list=[pipeline1, pipeline2, pipeline3], on=['ticker', 'date']) df1 = pipeline1.execute(data_loader, tickers) df2 = pipeline2.execute(data_loader, tickers) df3 = pipeline3.execute(data_loader, tickers) df = merge1.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert len(df) == len(df1) np.testing.assert_array_equal( df.columns, ['ticker', 'date', 'p1', 'p2', 'marketcap']) np.testing.assert_array_equal(df1['p1'], df['p1']) np.testing.assert_array_equal(df2['p2'], df['p2'])
def test_fit_execute_multi_target_model(self, data_loader): f1, target, model = self._create_base_components() target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1) model1 = GroupedOOFModel(ctb.CatBoostRegressor(verbose=False), group_column='ticker', fold_cnt=4) pipeline = BasePipeline(feature=f1, target=[target, target], model=[model, model1], metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 df = pipeline.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert df['y_0'].mean() > 0 assert df['y_1'].mean() > 0 assert (df['y_0'] == df['y_1']).min() == False
def test_export_load(self, data_loader, tmpdir): f1, target, model = self._create_base_components() pipeline = BasePipeline(feature=f1, target=target, model=model, metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) df = pipeline.execute(data_loader, tickers) pipeline.export_core('{}/pipeline'.format(str(tmpdir))) pipeline = BasePipeline.load('{}/pipeline.pickle'.format(str(tmpdir))) df1 = pipeline.execute(data_loader, tickers[:100]) np.testing.assert_array_equal(df['y_0'].values, df1['y_0'].values)
def test_export_load(self, tmpdir): data_loader = SF1Data(config['sf1_data_path']) tickers_df = data_loader.load_base_data(currency='USD', scalemarketcap=['5 - Large']) tickers = tickers_df['ticker'].unique().tolist() pipeline = self._create_pipeline() res = pipeline.fit(data_loader, tickers[:100]) df = pipeline.execute(data_loader, tickers[:100]) pipeline.export_core('{}/pipeline'.format(str(tmpdir))) #assert str(tmpdir) == 'efef' pipeline = BasePipeline.load('{}/pipeline.pickle'.format(str(tmpdir))) df1 = pipeline.execute(data_loader, tickers[:100]) np.testing.assert_array_equal(df['y'].values, df1['y'].values)
def _create_pipeline(self): columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf'] features = QuarterlyFeatures(columns=columns, quarter_counts=[2, 10], max_back_quarter=1) target = QuarterlyTarget(col='marketcap', quarter_shift=0) model = GroupedOOFModel(lgbm.sklearn.LGBMRegressor(), group_column='ticker', fold_cnt=4) pipeline = BasePipeline(features, target, model, metric=median_absolute_relative_error) return pipeline