def train_and_persist(model_dir=None, hour_path=None, model="xgboost"):
    valid = {"xgboost", "ridge"}
    if model not in valid:
        raise ValueError("results: model must be one of %s." % valid)
    else:
        hour = read_data(hour_path)
        hour = preprocess(hour)
        hour = dummify(hour)
        hour = postprocess(hour)

        if model == "xgboost":
            model_result = train_xgboost(hour)
        else:
            model_result = train_ridge(hour)
        model_path = get_model_path(model, model_dir)

        joblib.dump(model_result, model_path)
def train_and_persist(model_dir=None, hour_path=None, model="xgboost"):
    hour = read_data(hour_path)
    hour = preprocess(hour)
    hour = dummify(hour)
    hour = postprocess(hour)

    model_path = get_model_path(model_dir, model)

    # Implement other models?
    if model == "ridge":
        model_object, score_train = train_ridge(hour)

    else:
        model_object, score_train = train_xgboost(hour)

    joblib.dump(model_object, model_path)

    return score_train
Example #3
0
def train_and_persist(model_dir=None, hour_path=None, model_name="xgboost"):
    hour = read_data(hour_path)
    hour = preprocess(hour)
    hour = dummify(hour)
    hour = postprocess(hour)

    ##added
    # TODO: Implement other models?
    if model_name == "xgboost":
        model = train_xgboost(hour)
    elif model_name == "ridge":
        model = train_ridge(hour)
    elif model_name == "lasso":
        model = train_lasso(hour)
    else:
        print("model should be equal to 'xgboost' or 'ridge' or 'lasso'")

    model_path = get_model_path(model_dir, model_name)
    joblib.dump(model, model_path)
Example #4
0
def get_predict():

    # Input means

    df = read_data()
    weather_avg = df['weathersit'].mean()
    temp_avg = df['temp'].mean()
    feeling_avg = df['atemp'].mean()
    humidity_avg = df["hum"].mean()
    windspeed_avg = df["windspeed"].mean()

    parameters = dict(request.args)

    parameters["date"] = dt.datetime.fromisoformat(parameters["date"])
    parameters["weathersit"] = int(parameters.get("weathersit", weather_avg))
    parameters["temperature_C"] = float(
        parameters.get("temperature_C", temp_avg))
    parameters["feeling_temperature_C"] = float(
        parameters.get("feeling_temperature_C", feeling_avg))
    parameters["humidity"] = float(parameters.get("humidity", humidity_avg))
    parameters["windspeed"] = float(parameters.get("windspeed", windspeed_avg))

    #parameters["weathersit"] = int(parameters["weathersit"])
    #parameters["date"] = dt.datetime.fromisoformat(parameters["date"])
    #parameters["temperature_C"] = float(parameters["temperature_C"])
    #parameters["feeling_temperature_C"] = float(parameters["feeling_temperature_C"])
    #parameters["humidity"] = float(parameters["humidity"])
    #parameters["windspeed"] = float(parameters["windspeed"])

    model = str(parameters["model"])

    start_prediction = dt.datetime.now()
    result = predict(parameters, model=model)
    #parameters.get('model'))
    end_prediction = dt.datetime.now() - start_prediction

    return {
        "result": result,
        "computation time": end_prediction.total_seconds()
    }
def get_input_dict(parameters):
    hour_original = read_data()
    base_year = pd.to_datetime(hour_original["dteday"]).min().year

    date = parameters["date"]

    is_holiday = date in US_HOLIDAYS
    is_weekend = date.weekday() in (5, 6)

    row = pd.Series(
        {
            "dteday": date.strftime("%Y-%m-%d"),
            "season": get_season(date),
            "yr": date.year - base_year,
            "mnth": date.month,
            "hr": date.hour,
            "holiday": 1 if is_holiday else 0,
            "weekday": (date.weekday() + 1) % 7,
            "workingday": 0 if is_holiday or is_weekend else 1,
            "weathersit": parameters["weathersit"],
            "temp": parameters["temperature_C"] / 41.0,
            "atemp": parameters["feeling_temperature_C"] / 50.0,
            "hum": parameters["humidity"] / 100.0,
            "windspeed": parameters["windspeed"] / 67.0,
            "cnt": 1,  # Dummy, unused for prediction
        }
    )

    dummified_original = dummify(preprocess(hour_original))

    df = pd.DataFrame([row])
    df = preprocess(df)
    df = dummify(df, dummified_original.columns)
    df = postprocess(df)

    df = df.drop(columns=["dteday", "atemp", "casual", "registered", "cnt"])

    assert len(df) == 1

    return df.iloc[0].to_dict()
def get_predict():

    tomorrow = dt.datetime.now() + dt.timedelta(days=1)

    hour_original = read_data()
    hour_original = hour_original[hour_original.mnth == tomorrow.month]

    weathersit_avg = hour_original["weathersit"].median()
    temperature_C_avg = hour_original.temp.mean() * 41.0
    feeling_temperature_C_avg = hour_original.atemp.mean() * 50.0
    humiditiy_avg = hour_original.hum.mean() * 100.0
    windspeed_avg = hour_original.windspeed.mean() * 67.0

    parameters = dict(request.args)
    parameters["date"] = dt.datetime.fromisoformat(
        parameters.get("date", tomorrow.isoformat()))
    parameters["weathersit"] = int(parameters.get("weathersit",
                                                  weathersit_avg))
    parameters["temperature_C"] = float(
        parameters.get("temperature_C", temperature_C_avg))
    parameters["feeling_temperature_C"] = float(
        parameters.get("feeling_temperature_C", feeling_temperature_C_avg))
    parameters["humidity"] = float(parameters.get("humidity", humiditiy_avg))
    parameters["windspeed"] = float(parameters.get("windspeed", windspeed_avg))

    start = dt.datetime.now()

    result = predict(parameters, model=parameters.get("model", "xgboost"))

    prediction_time = dt.datetime.now() - start

    return {
        "result": result,
        "prediction time (seconds)": prediction_time.total_seconds(),
        "date": parameters["date"],
    }