Example #1
0
def scale_dataframe(train, test, cols, replace=True):
    """ Takes train and test dataframes and a list of columns that need to be scaled.
    Returns a 3-tuple comprising the scaled dataframes and the fitted scaler.
    """
    scaler = StandardScaler()
    scaledTrain = pd.DataFrame(data=scaler.fit_transform(train[cols]), columns=cols)
    scaledTest = pd.DataFrame(data=scaler.transform(test[cols]), columns=cols)
    scaledTrain.columns = scaler.get_feature_names()
    scaledTest.columns = scaler.get_feature_names()
    scaledTrain.index = train.index
    scaledTest.index = test.index
    if replace is True:
        train = train.drop(cols, axis=1)
        train = train.join(scaledTrain)
        test = test.drop(cols, axis=1)
        test = test.join(scaledTest)
    return (train, test, scaler)