def test_fit_execute_multi_target(self, data_loader):
        f1, target, model = self._create_base_components()
        target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1)

        pipeline = BasePipeline(feature=f1,
                                target=[target, target1],
                                model=model,
                                metric=median_absolute_relative_error,
                                out_name=None)

        res = pipeline.fit(data_loader, tickers)
        assert type(res) == dict
        assert res['metric_y_0'] > 0
        assert res['metric_y_1'] > 0
        df = pipeline.execute(data_loader, tickers)
        assert type(df) == pd.DataFrame
        assert df['y_0'].mean() > 0
        assert df['y_1'].mean() > 0
        assert (df['y_0'] == df['y_1']).min() == False

        pipeline = BasePipeline(feature=f1,
                                target=[target, target],
                                model=model,
                                metric=median_absolute_relative_error,
                                out_name=None)

        res = pipeline.fit(data_loader, tickers)
        assert type(res) == dict
        assert res['metric_y_0'] > 0
        assert res['metric_y_1'] > 0
        df = pipeline.execute(data_loader, tickers)
        assert (df['y_0'] == df['y_1']).min() == True
    def test_execute_simple(self, data_loader):
        columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf']
        f1 = QuarterlyFeatures(columns=columns,
                               quarter_counts=[2, 10],
                               max_back_quarter=1)

        target1 = QuarterlyTarget(col='marketcap', quarter_shift=0)
        target2 = QuarterlyTarget(col='marketcap', quarter_shift=-1)

        model = lgbm.sklearn.LGBMRegressor()

        pipeline1 = BasePipeline(feature=f1,
                                 target=target1,
                                 model=model,
                                 metric=median_absolute_relative_error,
                                 out_name='p1')

        pipeline2 = BasePipeline(feature=f1,
                                 target=target2,
                                 model=model,
                                 metric=median_absolute_relative_error,
                                 out_name='p2')

        pipeline3 = QuarterlyLoadPipeline(['ticker', 'date', 'marketcap'])

        pipeline1.fit(data_loader, tickers)
        pipeline2.fit(data_loader, tickers)

        merge1 = ExecuteMergePipeline(
            pipeline_list=[pipeline1, pipeline2, pipeline3],
            on=['ticker', 'date'])

        df1 = pipeline1.execute(data_loader, tickers)
        df2 = pipeline2.execute(data_loader, tickers)
        df3 = pipeline3.execute(data_loader, tickers)

        df = merge1.execute(data_loader, tickers)
        assert type(df) == pd.DataFrame
        assert len(df) == len(df1)
        np.testing.assert_array_equal(
            df.columns, ['ticker', 'date', 'p1', 'p2', 'marketcap'])

        np.testing.assert_array_equal(df1['p1'], df['p1'])
        np.testing.assert_array_equal(df2['p2'], df['p2'])
    def test_fit_execute_simple(self, data_loader):
        f1, target, model = self._create_base_components()
        pipeline = BasePipeline(feature=f1,
                                target=target,
                                model=model,
                                metric=median_absolute_relative_error,
                                out_name=None)

        res = pipeline.fit(data_loader, tickers)
        assert type(res) == dict
        assert res['metric_y_0'] > 0
        df = pipeline.execute(data_loader, tickers)
        assert type(df) == pd.DataFrame
        assert df['y_0'].mean() > 0
    def test_export_load(self, data_loader, tmpdir):
        f1, target, model = self._create_base_components()
        pipeline = BasePipeline(feature=f1,
                                target=target,
                                model=model,
                                metric=median_absolute_relative_error,
                                out_name=None)
        res = pipeline.fit(data_loader, tickers)
        df = pipeline.execute(data_loader, tickers)
        pipeline.export_core('{}/pipeline'.format(str(tmpdir)))
        pipeline = BasePipeline.load('{}/pipeline.pickle'.format(str(tmpdir)))
        df1 = pipeline.execute(data_loader, tickers[:100])

        np.testing.assert_array_equal(df['y_0'].values, df1['y_0'].values)
    def test_fit_execute_multi_names(self, data_loader):
        f1, target, model = self._create_base_components()
        pipeline = BasePipeline(feature=f1,
                                target=[target, target],
                                model=model,
                                metric=median_absolute_relative_error,
                                out_name=['name1', 'name2'])

        res = pipeline.fit(data_loader, tickers)
        assert type(res) == dict
        assert res['metric_name1'] > 0
        assert res['metric_name2'] > 0
        df = pipeline.execute(data_loader, tickers)
        assert type(df) == pd.DataFrame
        assert df['name1'].mean() > 0
        assert df['name2'].mean() > 0
        assert (df['name1'] == df['name2']).min() == True
    def test_fit_execute_multi_target_metric(self, data_loader):
        f1, target, model = self._create_base_components()
        target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1)
        pipeline = BasePipeline(feature=f1,
                                target=[target, target1],
                                model=model,
                                metric=[
                                    median_absolute_relative_error,
                                    mean_absolute_relative_error
                                ],
                                out_name=None)

        res = pipeline.fit(data_loader, tickers)
        assert type(res) == dict
        assert res['metric_y_0'] > 0
        assert res['metric_y_1'] > 0
        assert res['metric_y_0'] < res['metric_y_1']
    def test_fit_execute_multi_target_model(self, data_loader):
        f1, target, model = self._create_base_components()
        target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1)
        model1 = GroupedOOFModel(ctb.CatBoostRegressor(verbose=False),
                                 group_column='ticker',
                                 fold_cnt=4)
        pipeline = BasePipeline(feature=f1,
                                target=[target, target],
                                model=[model, model1],
                                metric=median_absolute_relative_error,
                                out_name=None)

        res = pipeline.fit(data_loader, tickers)
        assert type(res) == dict
        assert res['metric_y_0'] > 0
        assert res['metric_y_1'] > 0
        df = pipeline.execute(data_loader, tickers)
        assert type(df) == pd.DataFrame
        assert df['y_0'].mean() > 0
        assert df['y_1'].mean() > 0
        assert (df['y_0'] == df['y_1']).min() == False
Esempio n. 8
0
        agg_day_counts=pipeline_config['agg_day_counts'],
        max_back_quarter=pipeline_config['max_back_quarter'])

    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])
    feature = FeatureMerger(feature, fc4, on=['ticker', 'date'])

    target = DailyAggTarget(col='marketcap',
                            horizon=pipeline_config['target_horizon'],
                            foo=down_std_norm)

    base_models = [
        lgbm.sklearn.LGBMRegressor(),
        ctb.CatBoostRegressor(verbose=False)
    ]

    ansamble = AnsambleModel(base_models=base_models,
                             bagging_fraction=0.7,
                             model_cnt=20)

    model = TimeSeriesOOFModel(ansamble, time_column='date', fold_cnt=20)

    pipeline = BasePipeline(feature=feature,
                            target=target,
                            model=model,
                            metric=median_absolute_relative_error)

    result = pipeline.fit(data_loader, ticker_list)
    print(result)
    pipeline.export_core('models_data/marketcap_down_std')
Esempio n. 9
0
    fc2 = BaseCompanyFeatures(cat_columns=pipeline_config['cat_columns'])

    fc3 = QuarterlyDiffFeatures(
        columns=pipeline_config['quarter_columns'],
        compare_quarter_idxs=pipeline_config['compare_quarter_idxs'],
        max_back_quarter=pipeline_config['max_back_quarter'])

    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])

    target = QuarterlyDiffTarget(col='marketcap')

    base_models = [
        lgbm.sklearn.LGBMRegressor(),
        ctb.CatBoostRegressor(verbose=False)
    ]

    ansamble = AnsambleModel(base_models=base_models,
                             bagging_fraction=0.7,
                             model_cnt=20)

    model = GroupedOOFModel(ansamble, group_column='ticker', fold_cnt=5)

    pipeline = BasePipeline(feature=feature,
                            target=target,
                            model=model,
                            metric=median_absolute_relative_error)

    pipeline.fit(data_loader, ticker_list)
    pipeline.export_core('models_data/marketcap_diff')