def train_and_persist(model_dir=None, hour_path=None, model="xgboost"): valid = {"xgboost", "ridge"} if model not in valid: raise ValueError("results: model must be one of %s." % valid) else: hour = read_data(hour_path) hour = preprocess(hour) hour = dummify(hour) hour = postprocess(hour) if model == "xgboost": model_result = train_xgboost(hour) else: model_result = train_ridge(hour) model_path = get_model_path(model, model_dir) joblib.dump(model_result, model_path)
def train_and_persist(model_dir=None, hour_path=None, model="xgboost"): hour = read_data(hour_path) hour = preprocess(hour) hour = dummify(hour) hour = postprocess(hour) model_path = get_model_path(model_dir, model) # Implement other models? if model == "ridge": model_object, score_train = train_ridge(hour) else: model_object, score_train = train_xgboost(hour) joblib.dump(model_object, model_path) return score_train
def train_and_persist(model_dir=None, hour_path=None, model_name="xgboost"): hour = read_data(hour_path) hour = preprocess(hour) hour = dummify(hour) hour = postprocess(hour) ##added # TODO: Implement other models? if model_name == "xgboost": model = train_xgboost(hour) elif model_name == "ridge": model = train_ridge(hour) elif model_name == "lasso": model = train_lasso(hour) else: print("model should be equal to 'xgboost' or 'ridge' or 'lasso'") model_path = get_model_path(model_dir, model_name) joblib.dump(model, model_path)
def get_predict(): # Input means df = read_data() weather_avg = df['weathersit'].mean() temp_avg = df['temp'].mean() feeling_avg = df['atemp'].mean() humidity_avg = df["hum"].mean() windspeed_avg = df["windspeed"].mean() parameters = dict(request.args) parameters["date"] = dt.datetime.fromisoformat(parameters["date"]) parameters["weathersit"] = int(parameters.get("weathersit", weather_avg)) parameters["temperature_C"] = float( parameters.get("temperature_C", temp_avg)) parameters["feeling_temperature_C"] = float( parameters.get("feeling_temperature_C", feeling_avg)) parameters["humidity"] = float(parameters.get("humidity", humidity_avg)) parameters["windspeed"] = float(parameters.get("windspeed", windspeed_avg)) #parameters["weathersit"] = int(parameters["weathersit"]) #parameters["date"] = dt.datetime.fromisoformat(parameters["date"]) #parameters["temperature_C"] = float(parameters["temperature_C"]) #parameters["feeling_temperature_C"] = float(parameters["feeling_temperature_C"]) #parameters["humidity"] = float(parameters["humidity"]) #parameters["windspeed"] = float(parameters["windspeed"]) model = str(parameters["model"]) start_prediction = dt.datetime.now() result = predict(parameters, model=model) #parameters.get('model')) end_prediction = dt.datetime.now() - start_prediction return { "result": result, "computation time": end_prediction.total_seconds() }
def get_input_dict(parameters): hour_original = read_data() base_year = pd.to_datetime(hour_original["dteday"]).min().year date = parameters["date"] is_holiday = date in US_HOLIDAYS is_weekend = date.weekday() in (5, 6) row = pd.Series( { "dteday": date.strftime("%Y-%m-%d"), "season": get_season(date), "yr": date.year - base_year, "mnth": date.month, "hr": date.hour, "holiday": 1 if is_holiday else 0, "weekday": (date.weekday() + 1) % 7, "workingday": 0 if is_holiday or is_weekend else 1, "weathersit": parameters["weathersit"], "temp": parameters["temperature_C"] / 41.0, "atemp": parameters["feeling_temperature_C"] / 50.0, "hum": parameters["humidity"] / 100.0, "windspeed": parameters["windspeed"] / 67.0, "cnt": 1, # Dummy, unused for prediction } ) dummified_original = dummify(preprocess(hour_original)) df = pd.DataFrame([row]) df = preprocess(df) df = dummify(df, dummified_original.columns) df = postprocess(df) df = df.drop(columns=["dteday", "atemp", "casual", "registered", "cnt"]) assert len(df) == 1 return df.iloc[0].to_dict()
def get_predict(): tomorrow = dt.datetime.now() + dt.timedelta(days=1) hour_original = read_data() hour_original = hour_original[hour_original.mnth == tomorrow.month] weathersit_avg = hour_original["weathersit"].median() temperature_C_avg = hour_original.temp.mean() * 41.0 feeling_temperature_C_avg = hour_original.atemp.mean() * 50.0 humiditiy_avg = hour_original.hum.mean() * 100.0 windspeed_avg = hour_original.windspeed.mean() * 67.0 parameters = dict(request.args) parameters["date"] = dt.datetime.fromisoformat( parameters.get("date", tomorrow.isoformat())) parameters["weathersit"] = int(parameters.get("weathersit", weathersit_avg)) parameters["temperature_C"] = float( parameters.get("temperature_C", temperature_C_avg)) parameters["feeling_temperature_C"] = float( parameters.get("feeling_temperature_C", feeling_temperature_C_avg)) parameters["humidity"] = float(parameters.get("humidity", humiditiy_avg)) parameters["windspeed"] = float(parameters.get("windspeed", windspeed_avg)) start = dt.datetime.now() result = predict(parameters, model=parameters.get("model", "xgboost")) prediction_time = dt.datetime.now() - start return { "result": result, "prediction time (seconds)": prediction_time.total_seconds(), "date": parameters["date"], }