Beispiel #1
0
def predict(data):
    config = read_params(params_path)
    model_dir_path = os.path.join(
        "prediction_service", "model", "model.joblib")
    model = joblib.load(model_dir_path)
    prediction = model.predict(data)
    return prediction[0]
def log_production_model(config_path):
    config = read_params(config_path)
    mlflow_config = config["mlflow_config"]
    model_name = mlflow_config["registered_model_name"]
    remote_server_uri = mlflow_config["remote_server_uri"]

    mlflow.set_registry_uri(remote_server_uri)
    runs = mlflow.search_runs(experiment_ids=1)
    lowest = runs["metrics.mae"].sort_values(ascending=True)[0]
    lowest_run_id = runs[runs["metrics.mae"] == lowest]["run_id"][0]

    client = MlflowClient()
    for mv in client.search_model_versions(f"name='{model_name}'"):
        mv = dict(mv)

        if mv["run_id"] == lowest_run_id:
            current_version = mv["version"]
            logged_model = mv["source"]
            pprint(mv, indent=4)
            client.transition_model_version_stage(name=model_name,
                                                  version=current_version,
                                                  stage="Production")
        else:
            current_version = mv["version"]
            client.transition_model_version_stage(name=model_name,
                                                  version=current_version,
                                                  stage="Staging")
    loaded_model = mlflow.pyfunc.load_model(logged_model)
    model_path = config["webapp_model_dir"]

    joblib.dump(loaded_model, model_path)
Beispiel #3
0
def train_and_evaluate(config_path):
    config=get_data.read_params(config_path)
    test_data_path = config["split_data"]["test_path"]
    train_data_path = config["split_data"]["train_path"]
    random_state = config["base"]["random_state"]
    model_dir = config["model_dir"]

    alpha = config["estimators"]["ElasticNet"]["params"]["alpha"]
    l1_ratio = config["estimators"]["ElasticNet"]["params"]["l1_ratio"]

    target = [config["base"]["target_col"]]

    train=pd.read_csv(train_data_path,sep=",")
    test=pd.read_csv(test_data_path,sep=",")

    train_y=train[target]
    test_y=test[target]

    train_x= train.drop(target,axis=1)
    test_x=test.drop(target,axis=1)

    lr=ElasticNet(
        alpha=alpha,
        l1_ratio=l1_ratio,
        random_state=random_state
    )

    lr.fit(train_x,train_y)

    predicted_qualities=lr.predict(test_x)
    (rmse,mae,r2)=eval_metrics(test_y,predicted_qualities)

    print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
    print("  RMSE: %s" % rmse)
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)

    scores_file= config['reports']['scores']
    params_file= config['reports']['params']

    with open(scores_file,'w') as f:
        scores = {
            "rmse":rmse,
            "mae":mae,
            "r2":r2
        }
        json.dump(scores,f,indent=4)

    with open(params_file,'w') as f:
        params = {
            "alpha":alpha,
            "l1_ratio":l1_ratio
        }
        json.dump(params, f, indent=4)

    os.makedirs(model_dir, exist_ok=True)
    model_path = os.path.join(model_dir, "model.joblib")

    joblib.dump(lr, model_path)
Beispiel #4
0
def split_and_saved_data(config_path):
    config = get_data.read_params(config_path)
    test_data_path = config['split_data']['test_path']
    train_data_path = config['split_data']['train_path']
    raw_data_path = config['load_data']['raw_dataset_csv']
    split_ratio = config['split_data']['test_size']
    random_state = config['base']['random_state']

    df = pd.read_csv(raw_data_path, sep=",")
    train, test = train_test_split(df,
                                   test_size=split_ratio,
                                   random_state=random_state)
    train.to_csv(train_data_path, sep=",", index=False, encoding='utf-8')
    test.to_csv(test_data_path, sep=',', index=False, encoding='utf-8')
Beispiel #5
0
def split_and_saved_data(config_path):
    config = get_data.read_params(config_path)
    test_data_path= config["split_data"]["test_path"]
    train_data_path= config["split_data"]["train_path"]
    raw_data_path= config["load_data"]["raw_dataset_csv"]
    split_ratio=  config["split_data"]["test_size"]
    random_state= config["base"]["random_state"]

    df=pd.read_csv(raw_data_path,sep=',',)
    train,test=train_test_split(
         df,
         test_size=split_ratio,
         random_state=random_state
         )
    train.to_csv(train_data_path,sep=',', index=False, encoding="utf-8")
    test.to_csv(test_data_path,sep=',', index=False, encoding="utf-8")
Beispiel #6
0
def load_and_save(config_path):
    config = get_data.read_params(config_path)
    df = get_data.get_data(config_path)
    new_cols = [col.replace(" ", "_") for col in df.columns]
    raw_data_path = config['load_data']['raw_dataset_csv']
    df.to_csv(raw_data_path, sep=',', index=False, header=new_cols)