Exemple #1
0
def predict_mlp(df, save_name):
    model = keras.models.load_model(save_name)
    return model.predict(get_inputs(df)[0], batch_size=1024)


if __name__ == "__main__":
    """
    python scripts/04_predict_mlp_meter.py --normalize_target
    python scripts/04_predict_mlp_meter.py
    """

    args = parser.parse_args()

    with timer("Loading data"):
        if args.normalize_target:
            test = load_data("test_nn_target_normalized_meter")
            test_square_feet = load_data("test_clean")["square_feet"].values
        else:
            test = load_data("test_nn_meter")
        test["target"] = -1

    with timer("Predicting"):
        test_preds = np.zeros(len(test))

        for m in range(4):
            print(m)
            # get base file name
            model_name = f"mlp-split_meter"
            make_dir(f"{MODEL_PATH}/{model_name}")

            # create sub model path
    "month_x",
    "month_y",
    "building_month",  #"month", 
    "gte_meter_building_id_month"
]

if __name__ == "__main__":
    """
    python scripts/04_predict_cb_site.py --normalize_target
    python scripts/04_predict_cb_site.py
    """

    args = parser.parse_args()

    with timer("Loading data"):
        test = load_data("test_clean")
        test.drop(DROP_COLS, axis=1, inplace=True)

    with timer("Preprocesing"):
        for x in CAT_COLS:
            test[x] = test[x].astype("category")

        if args.normalize_target:
            target_encode_cols = [x for x in test.columns if "gte" in x]
            test[target_encode_cols] = test[target_encode_cols] / np.log1p(
                test[["square_feet"]].values)

    # get base file name
    test_preds = np.zeros(len(test))
    for s in range(16):
Exemple #3
0
import glob
import numpy as np
import pandas as pd
from functools import partial
from sklearn.metrics import mean_squared_error
from ashrae.blenders import load_preds, GeneralizedMeanBlender
from ashrae.utils import OUTPUT_PATH, load_data, rmsle, timer

if __name__ == "__main__":
    """
    python scripts/05_blend_predictions.py
    """

    # load test and leak
    with timer("load test and leak"):
        test = load_data("test_clean")
        leak = load_data("is_leak")
        target = leak["meter_reading"].values

    # load predictions
    with timer("load predictions"):
        preds_matrix = [np.load(x) for x in glob.glob(f"{OUTPUT_PATH}/*.npy")]
        if len(glob.glob(f"{OUTPUT_PATH}/*.csv")) > 0:
            preds_matrix += [
                pd.read_csv(x).meter_reading.values
                for x in glob.glob(f"{OUTPUT_PATH}/*.csv")
            ]
        preds_matrix = np.vstack(preds_matrix).T
        preds_matrix[preds_matrix < 0] = 0

    # initialize data
    df["building_month"] = bm_ + df.month.astype(str)
    df["building_hour"] = bm_ + df.hour.astype(str)
    df["building_meter"] = bm_

    # get holidays
    dates_range = pd.date_range(start="2015-12-31", end="2019-01-01")
    us_holidays = calendar().holidays(start=dates_range.min(),
                                      end=dates_range.max())
    df["is_holiday"] = (
        df.ts.dt.date.astype("datetime64").isin(us_holidays)).astype(np.int8)


if __name__ == "__main__":

    print("Loading data")
    train, test = load_data("input")
    building_meta = load_data("meta")
    train_weather, test_weather = load_data("weather")

    print("Process timestamp")
    train["ts"] = pd.to_datetime(train.timestamp)
    test["ts"] = pd.to_datetime(test.timestamp)

    process_timestamp(train)
    process_timestamp(test)
    process_timestamp(train_weather)
    process_timestamp(test_weather)

    print("Process weather")
    process_weather(train_weather, "train")
    process_weather(test_weather, "test")
    "month_x",
    "month_y",
    "building_month",  #"month", 
    "gte_meter_building_id_month"
]

if __name__ == "__main__":
    """
    python scripts/03_train_lgb_site.py --normalize_target
    python scripts/03_train_lgb_site.py 
    """

    args = parser.parse_args()

    with timer("Loading data"):
        train = load_data("train_clean")
        train.drop(DROP_COLS, axis=1, inplace=True)
        train = train.loc[train.is_bad_meter_reading == 0].reset_index(
            drop=True)

    with timer("Preprocesing"):
        for x in CAT_COLS:
            train[x] = train[x].astype("category")

        if args.normalize_target:
            target_encode_cols = [x for x in train.columns if "gte" in x]
            train[target_encode_cols] = train[target_encode_cols] / np.log1p(
                train[["square_feet"]].values)
            train["target"] = np.log1p(train["meter_reading"]) / np.log1p(
                train["square_feet"])
        else:
    #             for col in CAT_COLS:

    #                 x = np.concatenate([train.loc[train_indices, col], test.loc[test_indices, col]])
    #                 encoder = FastLabelEncoder()
    #                 encoder.fit(x)

    #                 train.loc[train_indices, col] = encoder.transform(train.loc[train_indices, col])
    #                 test.loc[test_indices, col] = encoder.transform(test.loc[test_indices, col])

    #     with timer("Save Data"):
    #         train.to_pickle(f"{DATA_PATH}/preprocessed/train_nn_target_normalized_site.pkl")
    #         test.to_pickle(f"{DATA_PATH}/preprocessed/test_nn_target_normalized_site.pkl")

    # meter site no normalization
    with timer("Loading data"):
        train, test = load_data("clean")

    with timer("Standardize Numeric Features"):
        for s in range(16):
            train_indices = train.site_id == s
            test_indices = test.site_id == s

            X = np.concatenate([
                train.loc[train_indices, NUM_COLS].values,
                test.loc[test_indices, NUM_COLS].values
            ])
            mu = X.mean(0)
            sig = X.std(0)

            train.loc[train_indices,
                      NUM_COLS] = (train.loc[train_indices, NUM_COLS] -
                    monitor='val_loss',
                    mode='min')
            ])
    return


if __name__ == "__main__":
    """
    python scripts/03_train_mlp_meter.py --normalize_target
    python scripts/03_train_mlp_meter.py
    """

    args = parser.parse_args()

    with timer("Loading data"):
        train = load_data("train_nn_meter")
        train = train.loc[train.is_bad_meter_reading == 0].reset_index(
            drop=True)

    with timer("Preprocesing"):
        meter_cat_counts = train.groupby(
            ["meter"])[CAT_COLS].agg(lambda x: len(np.unique(x)))

    # get base file name
    model_name = f"mlp-split_meter"
    make_dir(f"{MODEL_PATH}/{model_name}")

    for seed in range(3):
        for n_months in [1, 2, 3, 4, 5, 6]:
            validation_months_list = get_validation_months(6)
def predict_mlp(df, save_name):
    model = keras.models.load_model(save_name)
    return model.predict(get_inputs(df)[0], batch_size=1024)


if __name__ == "__main__":
    """
    python scripts/04_predict_mlp_meter.py --normalize_target
    python scripts/04_predict_mlp_meter.py
    """

    args = parser.parse_args()

    with timer("Loading data"):
        if args.normalize_target:
            test = load_data("test_nn_tareget_normalized_meter")
        else:
            test = load_data("test_nn_meter")

    test_preds = np.zeros(len(test))

    for m in range(4):
        print(m)
        # get base file name
        model_name = f"mlp-split_meter"
        make_dir(f"{MODEL_PATH}/{model_name}")

        # create sub model path
        if args.normalize_target:
            sub_model_path = f"{MODEL_PATH}/mlp-split_meter/target_normalization/meter_{m}"
        else:
                    mode='min'
                )
            ]
        )
    return

if __name__ == "__main__":
    """
    python scripts/03_train_mlp_meter.py --normalize_target
    python scripts/03_train_mlp_meter.py
    """
    
    args = parser.parse_args()
    
    with timer("Loading data"):
        train = load_data("train_nn_meter")
        train = train.loc[train.is_bad_meter_reading==0].reset_index(drop=True)
        train.loc[(train.meter == 0) & (train.site_id == 0), "meter_reading"] *= 0.2931
        if args.normalize_target:
            square_feet = load_data("train_clean")["square_feet"]
            train["target"] = np.log1p(train["target"]/square_feet)
        else:
            train["target"] = np.log1p(train["target"])
        
    with timer("Preprocesing"):
        meter_cat_counts = train.groupby(["meter"])[CAT_COLS].agg(lambda x: len(np.unique(x)))

    # get base file name
    model_name = f"mlp-split_meter"
    make_dir(f"{MODEL_PATH}/{model_name}")