def _create_feature(): fc1 = QuarterlyFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, quarter_counts=QUARTER_COUNTS, max_back_quarter=MAX_BACK_QUARTER) fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS) fc3 = QuarterlyDiffFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, compare_quarter_idxs=COMPARE_QUARTER_IDXS, max_back_quarter=MAX_BACK_QUARTER) fc4 = DailyAggQuarterFeatures(daily_data_key='commodities', quarterly_data_key='quarterly', columns=['price'], agg_day_counts=AGG_DAY_COUNTS, max_back_quarter=MAX_BACK_QUARTER, daily_index=COMMODITIES_CODES) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) feature = FeatureMerger(feature, fc4, on=['ticker', 'date']) return feature
def _create_feature(): fc1 = QuarterlyFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, quarter_counts=QUARTER_COUNTS, max_back_quarter=MAX_BACK_QUARTER) fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS) # Daily agss on marketcap and pe is possible here because it # normalized and there are no leakage. fc3 = DailyAggQuarterFeatures(daily_data_key='daily', quarterly_data_key='quarterly', columns=DAILY_AGG_COLUMNS, agg_day_counts=AGG_DAY_COUNTS, max_back_quarter=MAX_BACK_QUARTER) fc4 = DailyAggQuarterFeatures(daily_data_key='commodities', quarterly_data_key='quarterly', columns=['price'], agg_day_counts=AGG_DAY_COUNTS, max_back_quarter=MAX_BACK_QUARTER, daily_index=COMMODITIES_CODES) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) feature = FeatureMerger(feature, fc4, on=['ticker', 'date']) return feature
def test_calculate(self, data, tickers, cat_columns): fc = BaseCompanyFeatures(data_key='base', cat_columns=cat_columns) X = fc.calculate(data, tickers) assert type(X) == pd.DataFrame assert 'ticker' in X.index.names base_data = data['base'].load(tickers) base_data = base_data[base_data['ticker'].apply( lambda x: x in tickers)] for col in cat_columns: assert len(base_data[col].unique()) ==\ len(X[col].unique()) # Reuse fitted after first calculate fc new_X = fc.calculate(data, tickers) for col in cat_columns: assert (new_X[col] == X[col]).min()
def _create_feature(): fc1 = QuarterlyFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, quarter_counts=QUARTER_COUNTS, max_back_quarter=1) fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS) feature = FeatureMerger(fc1, fc2, on='ticker') return feature
def test_calculate(self, data, tickers): fc1 = QuarterlyFeatures(data_key='quarterly', columns=['ebit'], quarter_counts=[2], max_back_quarter=10) fc2 = QuarterlyDiffFeatures(data_key='quarterly', columns=['ebit', 'debt'], compare_quarter_idxs=[1, 4], max_back_quarter=10) fc3 = BaseCompanyFeatures(data_key='base', cat_columns=['sector', 'sicindustry']) X1 = fc1.calculate(data, tickers) X2 = fc2.calculate(data, tickers) X3 = fc3.calculate(data, tickers) fm1 = FeatureMerger(fc1, fc2, on=['ticker', 'date']) Xm1 = fm1.calculate(data, tickers) fm2 = FeatureMerger(fc1, fc3, on='ticker') Xm2 = fm2.calculate(data, tickers) assert Xm1.shape[0] == X1.shape[0] assert Xm2.shape[0] == X1.shape[0] assert Xm1.shape[1] == X1.shape[1] + X2.shape[1] assert Xm2.shape[1] == X1.shape[1] + X3.shape[1] assert (Xm1.index == X1.index).min() assert (Xm2.index == X1.index).min() new_cols = Xm1.columns[:X1.shape[1]] old_cols = X1.columns for nc, oc in zip(new_cols, old_cols): assert (Xm1[nc] == X1[oc]).min() new_cols = Xm2.columns[:X1.shape[1]] old_cols = X1.columns for nc, oc in zip(new_cols, old_cols): assert (Xm2[nc] == X1[oc]).min()
def test_calculate(self, data_loader, tickers, cat_columns): fc = BaseCompanyFeatures(cat_columns=cat_columns) X = fc.calculate(data_loader, tickers) assert type(X) == pd.DataFrame assert 'ticker' in X.index.names base_data = data_loader.load_base_data() for col in cat_columns: assert len(base_data[col].unique()) ==\ len(fc.col_to_encoder[col].classes_) # Reuse fitted after first calculate fc for col in cat_columns: assert col in fc.col_to_encoder new_X = fc.calculate(data_loader, tickers) for col in cat_columns: assert (new_X[col] == X[col]).min() wd = WrapData(data_loader, tickers) new_X = fc.calculate(wd, tickers) for col in cat_columns: assert (new_X[col] == X[col]).min()
def _create_feature(): fc1 = QuarterlyFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, quarter_counts=QUARTER_COUNTS, max_back_quarter=MAX_BACK_QUARTER) fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS) fc3 = QuarterlyDiffFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, compare_quarter_idxs=COMPARE_QUARTER_IDXS, max_back_quarter=MAX_BACK_QUARTER) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) return feature
args = parser.parse_args() config = load_json(args.config_path) data_loader = SF1Data(config['sf1_data_path']) tickers_df = data_loader.load_base_data( currency=CURRENCY, scalemarketcap=SCALE_MARKETCAP) ticker_list = tickers_df['ticker'].unique().tolist() fc1 = QuarterlyFeatures( columns=QUARTER_COLUMNS, quarter_counts=QUARTER_COUNTS, max_back_quarter=MAX_BACK_QUARTER) fc2 = BaseCompanyFeatures(cat_columns=CAT_COLUMNS) fc3 = QuarterlyDiffFeatures( columns=QUARTER_COLUMNS, compare_quarter_idxs=COMPARE_QUARTER_IDXS, max_back_quarter=MAX_BACK_QUARTER) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) target = QuarterlyDiffTarget(col='marketcap') base_models = [lgbm.sklearn.LGBMRegressor(), ctb.CatBoostRegressor(verbose=False)] ensemble = EnsembleModel(base_models=base_models,