Ejemplo n.º 1
0
def XY1():
    X, y, X_test, X_test_index = load_xy()

    ####### VARIABLES
    dummy_cols = ['Weekday', 'DepartmentDescription']
    keep_cols = ['ScanCount', 'Returns']
    funcs = [np.sum, np.count_nonzero]

    dfta = ft.DataFrameToArray()
    add_returns = ft.NGAddReturns()
    gdd = ft.GDummyAndKeepTransform(dummy_cols, keep_cols,
                                    funcs)  # Doesn't work!

    transform_steps = [("imputer", ft.NGNAImputer())] + \
                      list(ft.wrapStep(("add_returns", add_returns))) + \
                      list(ft.wrapStep(('grouper', gdd))) + \
                      [("dfta", dfta)]
    transform_pipe = Pipeline(steps=transform_steps)

    kh.start_pipeline()
    kh.record_metric("validation", "start", "NA", "transform_pipeline",
                     str(transform_pipe), "NA")

    return {
        "X": transform_pipe.fit_transform(X),
        "y": y,
        "X_test": transform_pipe.transform(X_test),
        "X_test_index": X_test_index
    }
Ejemplo n.º 2
0
def XY7():
    X, y, X_test, X_test_index = load_xy()

    #### DON'T CHANGE BEFORE
    dummy_cols = ['DepartmentDescription']
    keep_cols = ['Weekday']
    mul_col = 'ScanCount'
    dfta = ft.DataFrameToArray()

    grouper = ft.GDummyKeepAndMultiplierTransform(dummy_cols, mul_col,
                                                  keep_cols)

    transform_steps = [("imputer", ft.NGNAImputer())] + \
                      list(ft.wrapStep(('grouper', grouper)))

    ### DON'T CHANGE AFTER
    transform_steps.append((("dfta", dfta)))
    transform_pipe = Pipeline(steps=transform_steps)

    kh.start_pipeline()
    kh.record_metric("validation", "start", "NA", "transform_pipeline",
                     str(transform_pipe), "NA")

    return {
        "X": transform_pipe.fit_transform(X),
        "y": y,
        "X_test": transform_pipe.transform(X_test),
        "X_test_index": X_test_index
    }