Exemple #1
0
def train_and_persist(model_dir=None, hour_path=None, model="xgboost"):
    hour = read_data(hour_path)
    hour = preprocess(hour)
    hour = dummify(hour)
    hour = postprocess(hour)
    train_X, test_X, train_y, test_y = split_train_test(hour)

    # TODO: Implement other models?
    if model == "xgboost":
        model = train_xgboost(hour)
        model_path = get_model_path(model_dir)
        train_score = model.score(test_X, test_y)
        joblib.dump(model, model_path + "/xgboost.pkl")
        return train_score

    elif model == "ridge":
        model = train_ridge(hour)
        model_path = get_model_path(model_dir)
        train_score = model.score(test_X, test_y)
        joblib.dump(model, model_path + "/ridge.pkl")
        return train_score

    elif model == "lasso":
        model = train_lasso(hour)
        model_path = get_model_path(model_dir)
        train_score = model.score(test_X, test_y)
        joblib.dump(model, model_path + "/lasso.pkl")
        return train_score
Exemple #2
0
def score(model_dir=None, hour_path=None, model_name="xgboost"):
    model_path = get_model_path(model_dir, model=model_name)
    if not os.path.exists(model_path):
        train_and_persist(model_dir, model=model_name)

    model_clf = joblib.load(model_path)

    hour = read_data(hour_path)
    hour = preprocess(hour)
    hour = dummify(hour)
    hour = postprocess(hour)

    hour_d = pd.get_dummies(hour)
    regex = re.compile(r"\[|\]|<", re.IGNORECASE)
    hour_d.columns = [
        regex.sub("_", col) if any(x in str(col)
                                   for x in set(("[", "]", "<"))) else col
        for col in hour_d.columns.values
    ]

    hour_d = hour_d.select_dtypes(exclude="category")

    hour_d_train_x, hour_d_test_x, hour_d_train_y, hour_d_test_y = split_train_test(
        hour_d)

    r2_train = model_clf.score(hour_d_train_x, hour_d_train_y)
    r2_test = model_clf.score(hour_d_test_x, hour_d_test_y)
    return r2_train, r2_test
def predict(parameters, model_dir=None, model=None):
    """Returns model prediction.
    """

    if model == "ridge":
        model_path = get_model_path(model_dir, model_al="ridge")
        train_and_persist(model_dir=model_dir, model="ridge")
    else:
        model_path = get_model_path(model_dir, model_al="xgboost")
        train_and_persist(model_dir=model_dir, model="xgboost")

    model = joblib.load(model_path)

    input_dict = get_input_dict(parameters)
    X_input = pd.DataFrame([pd.Series(input_dict)])

    result = model.predict(X_input)

    # Undo np.sqrt(hour["cnt"])
    return int(result ** 2)
Exemple #4
0
def train_and_persist(model_dir=None, hour_path=None):
    hour = read_data(hour_path)
    hour = preprocess(hour)
    hour = dummify(hour)
    hour = postprocess(hour)

    # TODO: Implement other models?
    model = train_xgboost(hour)

    model_path = get_model_path(model_dir)

    joblib.dump(model, model_path)
def train_and_persist(model_dir=None, hour_path=None, model="xgboost"):
    hour = read_data(hour_path)
    hour = preprocess(hour)
    hour = dummify(hour)
    hour = postprocess(hour)

    # todo:Implement other models?
    if model == "xgboost":
        train_model = train_xgboost(hour)
    elif model == "ridge":
        train_model = train_ridge(hour)

    model_path = get_model_path(model_dir, model)

    joblib.dump(train_model, model_path)
def predict(parameters, model_dir=None, model="xgboost"):
    """Returns model prediction.
    """
    model_path = get_model_path(model_dir)
    if not os.path.exists(model_path):
        train_and_persist(model_dir=model_dir, model=model)

    model = joblib.load(model_path)

    input_dict = get_input_dict(parameters)
    X_input = pd.DataFrame([pd.Series(input_dict)])

    result = model.predict(X_input)

    # Undo np.sqrt(hour["cnt"])
    return int(result**2)
Exemple #7
0
def train_and_persist(model_dir=None, hour_path=None, model="xgboost"):
    """
    Trains and persists the model (xgboost by default, the other option is ridge).
    """
    hour = read_data(hour_path)
    hour = preprocess(hour)
    hour = dummify(hour)
    hour = postprocess(hour)

    if model == "ridge":
        model_clf = train_ridge(hour)

    else:
        model_clf = train_xgboost(hour)

    model_path = get_model_path(model_dir, model)
    joblib.dump(model_clf, model_path)
def train_and_persist(model_dir=None, hour_path=None, model="xgboost"):
    hour = read_data(hour_path)
    hour = preprocess(hour)
    hour = dummify(hour)
    hour = postprocess(hour)

    model_path = get_model_path(model_dir, model)

    if model == "xgboost":
        scikit_model, score = train_xgboost(hour)

    elif model == "ridge":
        scikit_model, score = train_ridge(hour)

    joblib.dump(scikit_model, model_path)

    return score
def train_and_persist(model_dir=None, hour_path=None, model="xgboost"):
    valid = {"xgboost", "ridge"}
    if model not in valid:
        raise ValueError("results: model must be one of %s." % valid)
    else:
        hour = read_data(hour_path)
        hour = preprocess(hour)
        hour = dummify(hour)
        hour = postprocess(hour)

        if model == "xgboost":
            model_result = train_xgboost(hour)
        else:
            model_result = train_ridge(hour)
        model_path = get_model_path(model, model_dir)

        joblib.dump(model_result, model_path)
def train_and_persist(model_dir=None, hour_path=None, model="xgboost"):
    hour = read_data(hour_path)
    hour = preprocess(hour)
    hour = dummify(hour)
    hour = postprocess(hour)

    model_path = get_model_path(model_dir, model)

    # Implement other models?
    if model == "ridge":
        model_object, score_train = train_ridge(hour)

    else:
        model_object, score_train = train_xgboost(hour)

    joblib.dump(model_object, model_path)

    return score_train
def train_and_persist(model_dir=None, hour_path=None, model_name="xgboost"):
    hour = read_data(hour_path)
    hour = preprocess(hour)
    hour = dummify(hour)
    hour = postprocess(hour)

    ##added
    # TODO: Implement other models?
    if model_name == "xgboost":
        model = train_xgboost(hour)
    elif model_name == "ridge":
        model = train_ridge(hour)
    elif model_name == "lasso":
        model = train_lasso(hour)
    else:
        print("model should be equal to 'xgboost' or 'ridge' or 'lasso'")

    model_path = get_model_path(model_dir, model_name)
    joblib.dump(model, model_path)