def XY1(): X, y, X_test, X_test_index = load_xy() ####### VARIABLES dummy_cols = ['Weekday', 'DepartmentDescription'] keep_cols = ['ScanCount', 'Returns'] funcs = [np.sum, np.count_nonzero] dfta = ft.DataFrameToArray() add_returns = ft.NGAddReturns() gdd = ft.GDummyAndKeepTransform(dummy_cols, keep_cols, funcs) # Doesn't work! transform_steps = [("imputer", ft.NGNAImputer())] + \ list(ft.wrapStep(("add_returns", add_returns))) + \ list(ft.wrapStep(('grouper', gdd))) + \ [("dfta", dfta)] transform_pipe = Pipeline(steps=transform_steps) kh.start_pipeline() kh.record_metric("validation", "start", "NA", "transform_pipeline", str(transform_pipe), "NA") return { "X": transform_pipe.fit_transform(X), "y": y, "X_test": transform_pipe.transform(X_test), "X_test_index": X_test_index }
def XY7(): X, y, X_test, X_test_index = load_xy() #### DON'T CHANGE BEFORE dummy_cols = ['DepartmentDescription'] keep_cols = ['Weekday'] mul_col = 'ScanCount' dfta = ft.DataFrameToArray() grouper = ft.GDummyKeepAndMultiplierTransform(dummy_cols, mul_col, keep_cols) transform_steps = [("imputer", ft.NGNAImputer())] + \ list(ft.wrapStep(('grouper', grouper))) ### DON'T CHANGE AFTER transform_steps.append((("dfta", dfta))) transform_pipe = Pipeline(steps=transform_steps) kh.start_pipeline() kh.record_metric("validation", "start", "NA", "transform_pipeline", str(transform_pipe), "NA") return { "X": transform_pipe.fit_transform(X), "y": y, "X_test": transform_pipe.transform(X_test), "X_test_index": X_test_index }