def main(): os.makedirs(DOWNSTREAM_DIR, exist_ok=True) os.makedirs(MODEL_DIR, exist_ok=True) params = get_params() x_train = np.load(X_TRAIN_NPY) y_train = np.load(Y_TRAIN_NPY) if params['ml_model'] == 'svc': pipeline = define_svc_pipeline() elif params['ml_model'] == 'tree': pipeline = define_tree_pipeline() else: pass train_model(pipeline, x_train, y_train) modelname = params['save_model_name'] if params['save_format'] == 'sklearn': model_filename = f'{modelname}.pkl' sklearn_interface_filename = f'{modelname}_sklearn.yaml' save_helper.dump_sklearn( pipeline, os.path.join( MODEL_DIR, model_filename)) save_helper.save_interface(modelname, os.path.join(MODEL_DIR, sklearn_interface_filename), [1, 4], str(x_train.dtype).split('.')[-1], [1, 3], 'float32', DATA_TYPE.ARRAY, [{model_filename: MODEL_RUNTIME.SKLEARN}], PREDICTION_TYPE.CLASSIFICATION, 'src.app.ml.iris.iris_predictor_sklearn', label_filepath=os.path.join(MODEL_DIR, LABEL_FILENAME)) elif params['save_format'] == 'onnx': onnx_filename = f'{modelname}.onnx' onnx_interface_filename = f'{modelname}_onnx_runtime.yaml' save_helper.save_onnx(pipeline, os.path.join(MODEL_DIR, onnx_filename)) save_helper.save_interface(modelname, os.path.join(MODEL_DIR, onnx_interface_filename), [1, 4], str(x_train.dtype).split('.')[-1], [1, 3], 'float32', DATA_TYPE.ARRAY, [{onnx_filename: MODEL_RUNTIME.ONNX_RUNTIME}], PREDICTION_TYPE.CLASSIFICATION, 'src.app.ml.iris.iris_predictor_onnx', label_filepath=os.path.join(MODEL_DIR, LABEL_FILENAME)) else: pass shutil.copy2(LABEL_FILEPATH, os.path.join(MODEL_DIR, LABEL_FILENAME))
def main(): os.makedirs(MODEL_DIR, exist_ok=True) labels = save_helper.load_labels(LABEL_FILEPATH) _full_data = save_helper.load_data(DATA_FILEPATH) _data = [d[:4] for d in _full_data] _target = [d[4] for d in _full_data] data = split_dataset(_data, _target) svc_pipeline = define_svc_pipeline() svc_modelname = "iris_svc" svc_model_filename = f"{svc_modelname}.pkl" svc_sklearn_interface_filename = f"{svc_modelname}_sklearn.yaml" train_model(svc_pipeline, data["x_train"], data["y_train"]) evaluate_model(svc_pipeline, data["x_test"], data["y_test"]) save_helper.dump_sklearn(svc_pipeline, os.path.join(MODEL_DIR, svc_model_filename)) save_helper.save_interface( svc_modelname, os.path.join(MODEL_DIR, svc_sklearn_interface_filename), [1, 4], str(data["x_train"].dtype).split(".")[-1], [1, 3], "float32", DATA_TYPE.ARRAY, [{ svc_model_filename: MODEL_RUNTIME.SKLEARN }], PREDICTION_TYPE.CLASSIFICATION, "src.app.ml.iris.iris_predictor_sklearn", label_filepath=LABEL_FILEPATH, ) svc_onnx_filename = f"{svc_modelname}.onnx" svc_onnx_interface_filename = f"{svc_modelname}_onnx_runtime.yaml" save_helper.save_onnx(svc_pipeline, os.path.join(MODEL_DIR, svc_onnx_filename)) save_helper.save_interface( svc_modelname, os.path.join(MODEL_DIR, svc_onnx_interface_filename), [1, 4], str(data["x_train"].dtype).split(".")[-1], [1, 3], "float32", DATA_TYPE.ARRAY, [{ svc_onnx_filename: MODEL_RUNTIME.ONNX_RUNTIME }], PREDICTION_TYPE.CLASSIFICATION, "src.app.ml.iris.iris_predictor_onnx", label_filepath=LABEL_FILEPATH, ) tree_pipeline = define_tree_pipeline() tree_modelname = "iris_tree" tree_model_filename = f"{tree_modelname}.pkl" tree_sklearn_interface_filename = f"{tree_modelname}_sklearn.yaml" train_model(tree_pipeline, data["x_train"], data["y_train"]) evaluate_model(tree_pipeline, data["x_test"], data["y_test"]) save_helper.dump_sklearn(tree_pipeline, os.path.join(MODEL_DIR, tree_model_filename)) save_helper.save_interface( tree_modelname, os.path.join(MODEL_DIR, tree_sklearn_interface_filename), [1, 4], str(data["x_train"].dtype).split(".")[-1], [1, 3], "float32", DATA_TYPE.ARRAY, [{ tree_model_filename: MODEL_RUNTIME.SKLEARN }], PREDICTION_TYPE.CLASSIFICATION, "src.app.ml.iris.iris_predictor_sklearn", label_filepath=LABEL_FILEPATH, ) tree_onnx_filename = f"{tree_modelname}.onnx" tree_onnx_interface_filename = f"{tree_modelname}_onnx_runtime.yaml" save_helper.save_onnx(tree_pipeline, os.path.join(MODEL_DIR, tree_onnx_filename)) save_helper.save_interface( tree_modelname, os.path.join(MODEL_DIR, tree_onnx_interface_filename), [1, 4], str(data["x_train"].dtype).split(".")[-1], [1, 3], "float32", DATA_TYPE.ARRAY, [{ tree_onnx_filename: MODEL_RUNTIME.ONNX_RUNTIME }], PREDICTION_TYPE.CLASSIFICATION, "src.app.ml.iris.iris_predictor_onnx", label_filepath=LABEL_FILEPATH, )
def train_and_save(model, modelname: str, filename: str, x_train: np.ndarray, y_train: np.ndarray, x_test: np.ndarray, y_test: np.ndarray): train_model(model, x_train, y_train) evaluate_model(model, x_test, y_test) save_helper.dump_sklearn(model, os.path.join(MODEL_DIR, filename))
def main(): os.makedirs(DOWNSTREAM_DIR, exist_ok=True) params = get_params() x_train = np.load(X_TRAIN_NPY) y_train = np.load(Y_TRAIN_NPY) if params["ml_model"] == "svc": pipeline = define_svc_pipeline() elif params["ml_model"] == "tree": pipeline = define_tree_pipeline() else: pass train_model(pipeline, x_train, y_train) modelname = params["save_model_name"] if params["save_format"] == "sklearn": sklearn_filename = f"{modelname}.pkl" sklearn_filepath = os.path.join(DOWNSTREAM_DIR, sklearn_filename) sklearn_modelpath = os.path.join(MODEL_DIR, sklearn_filename) save_helper.dump_sklearn(pipeline, sklearn_filepath) save_helper.dump_sklearn(pipeline, sklearn_modelpath) sklearn_interface_filename = f"{modelname}_sklearn.yaml" save_helper.save_interface( modelname, os.path.join(MODEL_DIR, sklearn_interface_filename), [1, 4], str(x_train.dtype).split(".")[-1], [1, 3], "float32", DATA_TYPE.ARRAY, [{ sklearn_filepath: MODEL_RUNTIME.SKLEARN }], PREDICTION_TYPE.CLASSIFICATION, "src.app.ml.iris.iris_predictor_sklearn", label_filepath=os.path.join(DOWNSTREAM_DIR, LABEL_FILENAME), ) elif params["save_format"] == "onnx": onnx_filename = f"{modelname}.onnx" onnx_filepath = os.path.join(DOWNSTREAM_DIR, onnx_filename) onnx_modelpath = os.path.join(MODEL_DIR, onnx_filename) save_helper.save_onnx(pipeline, onnx_filepath) save_helper.save_onnx(pipeline, onnx_modelpath) onnx_interface_filename = f"{modelname}_onnx_runtime.yaml" save_helper.save_interface( modelname, os.path.join(MODEL_DIR, onnx_interface_filename), [1, 4], str(x_train.dtype).split(".")[-1], [1, 3], "float32", DATA_TYPE.ARRAY, [{ onnx_filepath: MODEL_RUNTIME.ONNX_RUNTIME }], PREDICTION_TYPE.CLASSIFICATION, "src.app.ml.iris.iris_predictor_onnx", label_filepath=os.path.join(DOWNSTREAM_DIR, LABEL_FILENAME), ) else: pass shutil.copy2(LABEL_FILEPATH, os.path.join(DOWNSTREAM_DIR, LABEL_FILENAME))