def data_v3(): X, y_train = load_data() category_cols = [col for col in X.columns if X[col].dtype == 'O'] for col in category_cols: X[col] = encode_with_observation_counts(X[col]) is_train_obs = X.index.get_level_values('obs_type') == 'train' X_train, X_test = X[is_train_obs], X[~is_train_obs] return X_train, y_train, X_test
def data_v8(): X, y_train = load_data() category_cols = [col for col in X.columns if X[col].dtype == 'O'] for col in category_cols: X[col] = encode_with_observation_counts(X[col]) poly_feats = PolynomialFeatures(include_bias=False) X = pd.DataFrame(poly_feats.fit_transform(X), index=X.index) X.columns = ['poly_feat_' + str(i) for i in range(X.shape[1])] is_train_obs = X.index.get_level_values('obs_type') == 'train' X_train, X_test = X[is_train_obs], X[~is_train_obs] return X_train, y_train, X_test