def predict(data): config = read_params(params_path) model_dir_path = os.path.join( "prediction_service", "model", "model.joblib") model = joblib.load(model_dir_path) prediction = model.predict(data) return prediction[0]
def log_production_model(config_path): config = read_params(config_path) mlflow_config = config["mlflow_config"] model_name = mlflow_config["registered_model_name"] remote_server_uri = mlflow_config["remote_server_uri"] mlflow.set_registry_uri(remote_server_uri) runs = mlflow.search_runs(experiment_ids=1) lowest = runs["metrics.mae"].sort_values(ascending=True)[0] lowest_run_id = runs[runs["metrics.mae"] == lowest]["run_id"][0] client = MlflowClient() for mv in client.search_model_versions(f"name='{model_name}'"): mv = dict(mv) if mv["run_id"] == lowest_run_id: current_version = mv["version"] logged_model = mv["source"] pprint(mv, indent=4) client.transition_model_version_stage(name=model_name, version=current_version, stage="Production") else: current_version = mv["version"] client.transition_model_version_stage(name=model_name, version=current_version, stage="Staging") loaded_model = mlflow.pyfunc.load_model(logged_model) model_path = config["webapp_model_dir"] joblib.dump(loaded_model, model_path)
def train_and_evaluate(config_path): config=get_data.read_params(config_path) test_data_path = config["split_data"]["test_path"] train_data_path = config["split_data"]["train_path"] random_state = config["base"]["random_state"] model_dir = config["model_dir"] alpha = config["estimators"]["ElasticNet"]["params"]["alpha"] l1_ratio = config["estimators"]["ElasticNet"]["params"]["l1_ratio"] target = [config["base"]["target_col"]] train=pd.read_csv(train_data_path,sep=",") test=pd.read_csv(test_data_path,sep=",") train_y=train[target] test_y=test[target] train_x= train.drop(target,axis=1) test_x=test.drop(target,axis=1) lr=ElasticNet( alpha=alpha, l1_ratio=l1_ratio, random_state=random_state ) lr.fit(train_x,train_y) predicted_qualities=lr.predict(test_x) (rmse,mae,r2)=eval_metrics(test_y,predicted_qualities) print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) print(" RMSE: %s" % rmse) print(" MAE: %s" % mae) print(" R2: %s" % r2) scores_file= config['reports']['scores'] params_file= config['reports']['params'] with open(scores_file,'w') as f: scores = { "rmse":rmse, "mae":mae, "r2":r2 } json.dump(scores,f,indent=4) with open(params_file,'w') as f: params = { "alpha":alpha, "l1_ratio":l1_ratio } json.dump(params, f, indent=4) os.makedirs(model_dir, exist_ok=True) model_path = os.path.join(model_dir, "model.joblib") joblib.dump(lr, model_path)
def split_and_saved_data(config_path): config = get_data.read_params(config_path) test_data_path = config['split_data']['test_path'] train_data_path = config['split_data']['train_path'] raw_data_path = config['load_data']['raw_dataset_csv'] split_ratio = config['split_data']['test_size'] random_state = config['base']['random_state'] df = pd.read_csv(raw_data_path, sep=",") train, test = train_test_split(df, test_size=split_ratio, random_state=random_state) train.to_csv(train_data_path, sep=",", index=False, encoding='utf-8') test.to_csv(test_data_path, sep=',', index=False, encoding='utf-8')
def split_and_saved_data(config_path): config = get_data.read_params(config_path) test_data_path= config["split_data"]["test_path"] train_data_path= config["split_data"]["train_path"] raw_data_path= config["load_data"]["raw_dataset_csv"] split_ratio= config["split_data"]["test_size"] random_state= config["base"]["random_state"] df=pd.read_csv(raw_data_path,sep=',',) train,test=train_test_split( df, test_size=split_ratio, random_state=random_state ) train.to_csv(train_data_path,sep=',', index=False, encoding="utf-8") test.to_csv(test_data_path,sep=',', index=False, encoding="utf-8")
def load_and_save(config_path): config = get_data.read_params(config_path) df = get_data.get_data(config_path) new_cols = [col.replace(" ", "_") for col in df.columns] raw_data_path = config['load_data']['raw_dataset_csv'] df.to_csv(raw_data_path, sep=',', index=False, header=new_cols)