def clean_up1(ds, rs):
    ds = ds.copy()
    del ds["id"]
    del ds["date"]
    del ds["sqft_living"]
    del ds["sqft_lot"]
    del ds["view"]
    del ds["lat"]
    del ds["long"]

    ds["age"] = 2018 - np.where(ds["yr_renovated"] == 0, ds["yr_built"],
                                ds["yr_renovated"])
    del ds["yr_built"]
    del ds["yr_renovated"]
    ds["bathrooms"] = ds["bathrooms"] * ds["bedrooms"]
    ds = hskc_functions.createGroups(ds, "bedrooms")
    ds = hskc_functions.createGroups(ds, "bathrooms")
    ds = hskc_functions.createGroups(ds, "floors")
    ds = hskc_functions.dummifyField(ds, "zipcode")

    ds = hskc_functions.createQuantile(ds, "sqft_living15")
    #ds= createQuantile(ds,"sqft_above")

    result = hskc_functions.runRegression(ds)
    result.update({
        "Clean Up Id":
        "1",
        "Comments":
        "Removed Id, date, sqft_living, sqft_lot,view, lat, long, yr_built, year_renovated\n"
        + "Created age\n" +
        "Updated bathrooms=bathrooms*bedrooms\nGrouped bedrooms,bathrooms,floors,\n"
        + "Dummify zipcode\n" + "Quantiled sqft_living15"
    })
    rs = rs.append(result, ignore_index=True)
    return ds, rs
def clean_up2(ds, rs):
    ds = ds.copy()
    del ds["id"]
    del ds["date"]
    del ds["sqft_living"]
    del ds["sqft_lot"]
    del ds["view"]
    del ds["lat"]
    del ds["long"]

    #Making zipcode as cities
    zc = pd.read_csv(
        "C:\\Users\\manoj\\Documents\\Acadgild DSB\\Kaggle Project\\House Sales in King County\\Data\\zipcode.csv"
    )
    ds = ds.merge(zc[["zipcode", "City"]], on="zipcode")
    del ds["zipcode"]
    ds = hskc_functions.dummifyField(ds, "City")

    ds["age"] = 2018 - np.where(ds["yr_renovated"] == 0, ds["yr_built"],
                                ds["yr_renovated"])
    del ds["yr_built"]
    del ds["yr_renovated"]
    ds["bathrooms"] = ds["bathrooms"] * ds["bedrooms"]
    ds = hskc_functions.createGroups(ds, "bedrooms")
    ds = hskc_functions.createGroups(ds, "bathrooms")
    ds = hskc_functions.createGroups(ds, "floors")
    #ds=hskc_functions.dummifyField(ds,"zipcode")

    ds = hskc_functions.createQuantile(ds, "sqft_living15")
    #ds= createQuantile(ds,"sqft_above")

    result = hskc_functions.runRegression(ds)
    result.update({
        "Clean Up Id":
        "2",
        "Comments":
        "Removed Id, date, sqft_living, sqft_lot,view, lat, long, yr_built, year_renovated, zipcode\n"
        + "Append City\n" + "Created age\n" +
        "Updated bathrooms=bathrooms*bedrooms\nGrouped bedrooms,bathrooms,floors,\n"
        + "Dummify City\n" + "Quantiled sqft_living15"
    })
    rs = rs.append(result, ignore_index=True)
    return ds, rs
def clean_up3(ds, rs):
    ds = ds.copy()
    del ds["id"]
    del ds["date"]
    del ds["sqft_living"]
    del ds["sqft_lot"]
    del ds["view"]
    del ds["lat"]
    del ds["long"]

    ds["age"] = 2018 - np.where(ds["yr_renovated"] == 0, ds["yr_built"],
                                ds["yr_renovated"])
    del ds["yr_built"]
    del ds["yr_renovated"]
    ds["bathrooms"] = ds["bathrooms"] * ds["bedrooms"]
    ds = hskc_functions.createGroups(ds, "bedrooms")
    ds = hskc_functions.createGroups(ds, "bathrooms")
    ds = hskc_functions.createGroups(ds, "floors")
    ds = hskc_functions.dummifyField(ds, "zipcode")

    ds = hskc_functions.createQuantile(ds, "sqft_living15")

    #Scaling entire data
    x = ds.values  #returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    ds = pd.DataFrame(x_scaled)
    #ds= createQuantile(ds,"sqft_above")

    result = hskc_functions.runRegression(ds, True)
    result.update({
        "Clean Up Id":
        "3",
        "Comments":
        "Removed Id, date, sqft_living, sqft_lot,view, lat, long, yr_built, year_renovated\n"
        + "Created age\n" +
        "Updated bathrooms=bathrooms*bedrooms\nGrouped bedrooms,bathrooms,floors,\n"
        + "Dummify zipcode\n" + "Quantiled sqft_living15\nData is scaled"
    })
    rs = rs.append(result, ignore_index=True)
    return ds, rs