def main(): os.makedirs(DOWNSTREAM_DIR, exist_ok=True) params = get_params() _full_data = save_helper.load_data(DATA_FILEPATH) _data = [d[:4] for d in _full_data] _target = [d[4] for d in _full_data] data = split_dataset(_data, _target, params['test_rate']) for k, v in data.items(): np.save(os.path.join(DOWNSTREAM_DIR, f'{k}.npy'), v)
def main(): os.makedirs(MODEL_DIR, exist_ok=True) labels = save_helper.load_labels(LABEL_FILEPATH) _full_data = save_helper.load_data(DATA_FILEPATH) _data = [d[:4] for d in _full_data] _target = [d[4] for d in _full_data] data = split_dataset(_data, _target) svc_pipeline = define_svc_pipeline() svc_modelname = "iris_svc" svc_model_filename = f"{svc_modelname}.pkl" svc_sklearn_interface_filename = f"{svc_modelname}_sklearn.yaml" train_model(svc_pipeline, data["x_train"], data["y_train"]) evaluate_model(svc_pipeline, data["x_test"], data["y_test"]) save_helper.dump_sklearn(svc_pipeline, os.path.join(MODEL_DIR, svc_model_filename)) save_helper.save_interface( svc_modelname, os.path.join(MODEL_DIR, svc_sklearn_interface_filename), [1, 4], str(data["x_train"].dtype).split(".")[-1], [1, 3], "float32", DATA_TYPE.ARRAY, [{ svc_model_filename: MODEL_RUNTIME.SKLEARN }], PREDICTION_TYPE.CLASSIFICATION, "src.app.ml.iris.iris_predictor_sklearn", label_filepath=LABEL_FILEPATH, ) svc_onnx_filename = f"{svc_modelname}.onnx" svc_onnx_interface_filename = f"{svc_modelname}_onnx_runtime.yaml" save_helper.save_onnx(svc_pipeline, os.path.join(MODEL_DIR, svc_onnx_filename)) save_helper.save_interface( svc_modelname, os.path.join(MODEL_DIR, svc_onnx_interface_filename), [1, 4], str(data["x_train"].dtype).split(".")[-1], [1, 3], "float32", DATA_TYPE.ARRAY, [{ svc_onnx_filename: MODEL_RUNTIME.ONNX_RUNTIME }], PREDICTION_TYPE.CLASSIFICATION, "src.app.ml.iris.iris_predictor_onnx", label_filepath=LABEL_FILEPATH, ) tree_pipeline = define_tree_pipeline() tree_modelname = "iris_tree" tree_model_filename = f"{tree_modelname}.pkl" tree_sklearn_interface_filename = f"{tree_modelname}_sklearn.yaml" train_model(tree_pipeline, data["x_train"], data["y_train"]) evaluate_model(tree_pipeline, data["x_test"], data["y_test"]) save_helper.dump_sklearn(tree_pipeline, os.path.join(MODEL_DIR, tree_model_filename)) save_helper.save_interface( tree_modelname, os.path.join(MODEL_DIR, tree_sklearn_interface_filename), [1, 4], str(data["x_train"].dtype).split(".")[-1], [1, 3], "float32", DATA_TYPE.ARRAY, [{ tree_model_filename: MODEL_RUNTIME.SKLEARN }], PREDICTION_TYPE.CLASSIFICATION, "src.app.ml.iris.iris_predictor_sklearn", label_filepath=LABEL_FILEPATH, ) tree_onnx_filename = f"{tree_modelname}.onnx" tree_onnx_interface_filename = f"{tree_modelname}_onnx_runtime.yaml" save_helper.save_onnx(tree_pipeline, os.path.join(MODEL_DIR, tree_onnx_filename)) save_helper.save_interface( tree_modelname, os.path.join(MODEL_DIR, tree_onnx_interface_filename), [1, 4], str(data["x_train"].dtype).split(".")[-1], [1, 3], "float32", DATA_TYPE.ARRAY, [{ tree_onnx_filename: MODEL_RUNTIME.ONNX_RUNTIME }], PREDICTION_TYPE.CLASSIFICATION, "src.app.ml.iris.iris_predictor_onnx", label_filepath=LABEL_FILEPATH, )