def test_fit_execute_multi_target(self, data): f1, target, model = self._create_base_components() target1 = QuarterlyTarget(data_key='quarterly', col='marketcap', quarter_shift=-1) pipeline = Pipeline(data=data, feature=f1, target=[target, target1], model=model, out_name=None) res = pipeline.fit(tickers, metric=median_absolute_relative_error) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 df = pipeline.execute(tickers) assert type(df) == pd.DataFrame assert df['y_0'].mean() > 0 assert df['y_1'].mean() > 0 assert (df['y_0'] == df['y_1']).min() == False pipeline = Pipeline(data=data, feature=f1, target=[target, target], model=model, out_name=None) res = pipeline.fit(tickers, metric=median_absolute_relative_error) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 df = pipeline.execute(tickers) assert (df['y_0'] == df['y_1']).min() == True
def FairMarketcapDiffSF1(pretrained=True) -> Pipeline: ''' Model is used to evaluate quarter-to-quarter(q2q) company fundamental progress. Model uses :class:`~ml_investment.features.QuarterlyDiffFeatures` (q2q results progress, e.g. 30% revenue increase, decrease in debt by 15% etc), :class:`~ml_investment.features.BaseCompanyFeatures`, :class:`~ml_investment.features.QuarterlyFeatures` :class:`~ml_investment.features.CommoditiesAggQuarterFeatures` and trying to predict real q2q marketcap difference( :class:`~ml_investment.targets.QuarterlyDiffTarget` ). So model prediction may be interpreted as "fair" marketcap change according this q2q fundamental change. :mod:`~ml_investment.data_loaders.sf1` is used for loading data. Note: SF1 dataset is paid, so for using this model you need to subscribe and paste quandl token to `~/.ml_investment/secrets.json` ``quandl_api_key`` Parameters ---------- pretrained: use pretreined weights or not. If so, `fair_marketcap_diff_sf1.pickle` will be downloaded. Downloading directory path can be changed in `~/.ml_investment/config.json` ``models_path`` ''' _check_download_data() data = _create_data() feature = _create_feature() target = _create_target() model = _create_model() pipeline = Pipeline(feature=feature, target=target, model=model, data=data, out_name=OUT_NAME) core_path = '{}/{}.pickle'.format(config['models_path'], OUT_NAME) if pretrained: if not os.path.exists(core_path): urlretrieve(URL, core_path) pipeline.load_core(core_path) return pipeline
def test_fit_execute_simple(self, data): f1, target, model = self._create_base_components() pipeline = Pipeline(data=data, feature=f1, target=target, model=model, out_name=None) res = pipeline.fit(tickers, metric=median_absolute_relative_error) assert type(res) == dict assert res['metric_y_0'] > 0 df = pipeline.execute(tickers) assert type(df) == pd.DataFrame assert df['y_0'].mean() > 0
def FairMarketcapSF1(pretrained=True) -> Pipeline: ''' Model is used to estimate fair company marketcap for several last quarters. Pipeline uses features from :class:`~ml_investment.features.BaseCompanyFeatures`, :class:`~ml_investment.features.QuarterlyFeatures`, :class:`~ml_investment.features.DailyAggQuarterFeatures`, :class:`~ml_investment.features.CommoditiesAggQuarterFeatures` and trained to predict real market capitalizations ( using :class:`~ml_investment.targets.QuarterlyTarget` ). Since some companies are overvalued and some are undervalued, the model makes an average "fair" prediction. :mod:`~ml_investment.data_loaders.sf1` and :mod:`~ml_investment.data_loaders.quandl_commodities` is used for loading data. Note: SF1 dataset is paid, so for using this model you need to subscribe and paste quandl token to `~/.ml_investment/secrets.json` ``quandl_api_key`` Parameters ---------- pretrained: use pretreined weights or not. If so, `fair_marketcap_sf1.pickle` will be downloaded. Downloading directory path can be changed in `~/.ml_investment/config.json` ``models_path`` ''' _check_download_data() data = _create_data() feature = _create_feature() target = _create_target() model = _create_model() pipeline = Pipeline(feature=feature, target=target, model=model, data=data, out_name=OUT_NAME) core_path = '{}/{}.pickle'.format(config['models_path'], OUT_NAME) if pretrained: if not os.path.exists(core_path): urlretrieve(URL, core_path) pipeline.load_core(core_path) return pipeline
def test_fit_execute_multi_names(self, data): f1, target, model = self._create_base_components() pipeline = Pipeline(data=data, feature=f1, target=[target, target], model=model, out_name=['name1', 'name2']) res = pipeline.fit(tickers, metric=median_absolute_relative_error) assert type(res) == dict assert res['metric_name1'] > 0 assert res['metric_name2'] > 0 df = pipeline.execute(tickers) assert type(df) == pd.DataFrame assert df['name1'].mean() > 0 assert df['name2'].mean() > 0 assert (df['name1'] == df['name2']).min() == True
def FairMarketcapDiffYahoo(pretrained=True) -> Pipeline: ''' Model is used to evaluate quarter-to-quarter(q2q) company fundamental progress. Model uses :class:`~ml_investment.features.QuarterlyDiffFeatures` (q2q results progress, e.g. 30% revenue increase, decrease in debt by 15% etc), :class:`~ml_investment.features.BaseCompanyFeatures`, :class:`~ml_investment.features.QuarterlyFeatures` and trying to predict smoothed real q2q marketcap difference( :class:`~ml_investment.targets.DailySmoothedQuarterlyDiffTarget` ). So model prediction may be interpreted as "fair" marketcap change according this q2q fundamental change. :mod:`~ml_investment.data_loaders.yahoo` and :mod:`~ml_investment.data_loaders.daily_bars` are used for loading data. Parameters ---------- pretrained: use pretreined weights or not. If so, `fair_marketcap_diff_yahoo.pickle` will be downloaded. Downloading directory path can be changed in `~/.ml_investment/config.json` ``models_path`` ''' _check_download_data() data = _create_data() feature = _create_feature() target = _create_target() model = _create_model() pipeline = Pipeline(feature=feature, target=target, model=model, data=data, out_name=OUT_NAME) core_path = '{}/{}.pickle'.format(config['models_path'], OUT_NAME) if pretrained: if not os.path.exists(core_path): print('Downloading pretrained model') urlretrieve(URL, core_path) pipeline.load_core(core_path) return pipeline
def test_export_load(self, data, tmpdir): f1, target, model = self._create_base_components() pipeline = Pipeline(data=data, feature=f1, target=target, model=model, out_name=None) res = pipeline.fit(tickers, metric=median_absolute_relative_error) df = pipeline.execute(tickers) pipeline.export_core('{}/pipeline'.format(str(tmpdir))) pipeline.load_core('{}/pipeline.pickle'.format(str(tmpdir))) df1 = pipeline.execute(tickers) np.testing.assert_array_equal(df['y_0'].values, df1['y_0'].values)
def MarketcapDownStdSF1(pretrained=True) -> Pipeline: ''' Model is used to predict future down-std value. Pipeline consist of time-series model training( :class:`~ml_investment.models.TimeSeriesOOFModel` ) and validation on real marketcap down-std values( :class:`~ml_investment.targets.DailyAggTarget` ). Model prediction may be interpreted as "risk" for the next quarter. :mod:`~ml_investment.data_loaders.sf1` is used for loading data. Note: SF1 dataset is paid, so for using this model you need to subscribe and paste quandl token to `~/.ml_investment/secrets.json` ``quandl_api_key`` Parameters ---------- pretrained: use pretreined weights or not. If so, `marketcap_down_std_sf1.pickle` will be downloaded. Downloading directory path can be changed in `~/.ml_investment/config.json` ``models_path`` ''' _check_download_data() data = _create_data() feature = _create_feature() target = _create_target() model = _create_model() pipeline = Pipeline(feature=feature, target=target, model=model, data=data, out_name=OUT_NAME) core_path = '{}/{}.pickle'.format(config['models_path'], OUT_NAME) if pretrained: if not os.path.exists(core_path): urlretrieve(URL, core_path) pipeline.load_core(core_path) return pipeline
def test_fit_execute_multi_target_metric(self, data): f1, target, model = self._create_base_components() target1 = QuarterlyTarget(data_key='quarterly', col='marketcap', quarter_shift=-1) pipeline = Pipeline(data=data, feature=f1, target=[target, target1], model=model, out_name=None) res = pipeline.fit(tickers, metric=[ median_absolute_relative_error, mean_absolute_relative_error ]) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 assert res['metric_y_0'] < res['metric_y_1']
def FairMarketcapYahoo(pretrained=True) -> Pipeline: ''' Model is used to estimate fair company marketcap for `last` quarter. Pipeline uses features from :class:`~ml_investment.features.BaseCompanyFeatures`, :class:`~ml_investment.features.QuarterlyFeatures` and trained to predict real market capitalizations ( using :class:`~ml_investment.targets.QuarterlyTarget` ). Since some companies are overvalued and some are undervalued, the model makes an average "fair" prediction. :mod:`~ml_investment.data_loaders.yahoo` is used for loading data. Parameters ---------- pretrained: use pretreined weights or not. If so, `fair_marketcap_yahoo.pickle` will be downloaded. Downloading directory path can be changed in `~/.ml_investment/config.json` ``models_path`` ''' _check_download_data() data = _create_data() feature = _create_feature() target = _create_target() model = _create_model() pipeline = Pipeline(feature=feature, target=target, model=model, data=data, out_name=OUT_NAME) core_path = '{}/{}.pickle'.format(config['models_path'], OUT_NAME) if pretrained: if not os.path.exists(core_path): print('Downloading pretrained model') urlretrieve(URL, core_path) pipeline.load_core(core_path) return pipeline
def test_fit_execute_multi_target_model(self, data): f1, target, model = self._create_base_components() target1 = QuarterlyTarget(data_key='quarterly', col='marketcap', quarter_shift=-1) model1 = GroupedOOFModel(ctb.CatBoostRegressor(verbose=False), group_column='ticker', fold_cnt=4) pipeline = Pipeline(data=data, feature=f1, target=[target, target], model=[model, model1], out_name=None) res = pipeline.fit(tickers, metric=median_absolute_relative_error) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 df = pipeline.execute(tickers) assert type(df) == pd.DataFrame assert df['y_0'].mean() > 0 assert df['y_1'].mean() > 0 assert (df['y_0'] == df['y_1']).min() == False
def test_fit_execute_simple(self, data): columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf'] f1 = QuarterlyFeatures(data_key='quarterly', columns=columns, quarter_counts=[2, 10], max_back_quarter=1) target1 = QuarterlyTarget(data_key='quarterly', col='marketcap', quarter_shift=0) target2 = QuarterlyTarget(data_key='quarterly', col='marketcap', quarter_shift=-1) model = lgbm.sklearn.LGBMRegressor() pipeline1 = Pipeline(data=data, feature=f1, target=target1, model=model, out_name='p1') pipeline2 = Pipeline(data=data, feature=f1, target=target2, model=model, out_name='p2') pipeline3 = LoadingPipeline(data['quarterly'], ['ticker', 'date', 'marketcap']) merge1 = MergePipeline(pipeline_list=[pipeline1, pipeline2, pipeline3], execute_merge_on=['ticker', 'date']) merge1.fit(tickers) df_m1 = merge1.execute(tickers) pipeline1.fit(tickers) pipeline2.fit(tickers) merge2 = MergePipeline(pipeline_list=[pipeline1, pipeline2, pipeline3], execute_merge_on=['ticker', 'date']) df1 = pipeline1.execute(tickers) df2 = pipeline2.execute(tickers) df3 = pipeline3.execute(tickers) df_m2 = merge1.execute(tickers) df_m3 = merge1.execute(tickers, 2) assert type(df_m1) == pd.DataFrame assert type(df_m2) == pd.DataFrame assert len(df_m1) == len(df1) assert len(df_m2) == len(df1) np.testing.assert_array_equal( df_m1.columns, ['ticker', 'date', 'p1', 'p2', 'marketcap']) np.testing.assert_array_equal( df_m2.columns, ['ticker', 'date', 'p1', 'p2', 'marketcap']) np.testing.assert_array_equal( df_m3.columns, ['ticker', 'date', 'p1', 'p2', 'marketcap']) np.testing.assert_array_equal(df1['p1'], df_m1['p1']) np.testing.assert_array_equal(df2['p2'], df_m1['p2']) np.testing.assert_array_equal(df_m1['p1'], df_m2['p1']) np.testing.assert_array_equal(df_m1['p2'], df_m2['p2']) np.testing.assert_array_equal(df_m2['p1'], df_m3['p1']) np.testing.assert_array_equal(df_m2['p2'], df_m3['p2'])