def main():
    os.makedirs(DOWNSTREAM_DIR, exist_ok=True)
    params = get_params()

    _full_data = save_helper.load_data(DATA_FILEPATH)
    _data = [d[:4] for d in _full_data]
    _target = [d[4] for d in _full_data]
    data = split_dataset(_data, _target, params['test_rate'])

    for k, v in data.items():
        np.save(os.path.join(DOWNSTREAM_DIR, f'{k}.npy'), v)
Esempio n. 2
0
def main():
    os.makedirs(MODEL_DIR, exist_ok=True)
    labels = save_helper.load_labels(LABEL_FILEPATH)
    _full_data = save_helper.load_data(DATA_FILEPATH)
    _data = [d[:4] for d in _full_data]
    _target = [d[4] for d in _full_data]
    data = split_dataset(_data, _target)

    svc_pipeline = define_svc_pipeline()
    svc_modelname = "iris_svc"
    svc_model_filename = f"{svc_modelname}.pkl"
    svc_sklearn_interface_filename = f"{svc_modelname}_sklearn.yaml"
    train_model(svc_pipeline, data["x_train"], data["y_train"])
    evaluate_model(svc_pipeline, data["x_test"], data["y_test"])
    save_helper.dump_sklearn(svc_pipeline,
                             os.path.join(MODEL_DIR, svc_model_filename))
    save_helper.save_interface(
        svc_modelname,
        os.path.join(MODEL_DIR, svc_sklearn_interface_filename),
        [1, 4],
        str(data["x_train"].dtype).split(".")[-1],
        [1, 3],
        "float32",
        DATA_TYPE.ARRAY,
        [{
            svc_model_filename: MODEL_RUNTIME.SKLEARN
        }],
        PREDICTION_TYPE.CLASSIFICATION,
        "src.app.ml.iris.iris_predictor_sklearn",
        label_filepath=LABEL_FILEPATH,
    )

    svc_onnx_filename = f"{svc_modelname}.onnx"
    svc_onnx_interface_filename = f"{svc_modelname}_onnx_runtime.yaml"
    save_helper.save_onnx(svc_pipeline,
                          os.path.join(MODEL_DIR, svc_onnx_filename))
    save_helper.save_interface(
        svc_modelname,
        os.path.join(MODEL_DIR, svc_onnx_interface_filename),
        [1, 4],
        str(data["x_train"].dtype).split(".")[-1],
        [1, 3],
        "float32",
        DATA_TYPE.ARRAY,
        [{
            svc_onnx_filename: MODEL_RUNTIME.ONNX_RUNTIME
        }],
        PREDICTION_TYPE.CLASSIFICATION,
        "src.app.ml.iris.iris_predictor_onnx",
        label_filepath=LABEL_FILEPATH,
    )

    tree_pipeline = define_tree_pipeline()
    tree_modelname = "iris_tree"
    tree_model_filename = f"{tree_modelname}.pkl"
    tree_sklearn_interface_filename = f"{tree_modelname}_sklearn.yaml"
    train_model(tree_pipeline, data["x_train"], data["y_train"])
    evaluate_model(tree_pipeline, data["x_test"], data["y_test"])
    save_helper.dump_sklearn(tree_pipeline,
                             os.path.join(MODEL_DIR, tree_model_filename))
    save_helper.save_interface(
        tree_modelname,
        os.path.join(MODEL_DIR, tree_sklearn_interface_filename),
        [1, 4],
        str(data["x_train"].dtype).split(".")[-1],
        [1, 3],
        "float32",
        DATA_TYPE.ARRAY,
        [{
            tree_model_filename: MODEL_RUNTIME.SKLEARN
        }],
        PREDICTION_TYPE.CLASSIFICATION,
        "src.app.ml.iris.iris_predictor_sklearn",
        label_filepath=LABEL_FILEPATH,
    )

    tree_onnx_filename = f"{tree_modelname}.onnx"
    tree_onnx_interface_filename = f"{tree_modelname}_onnx_runtime.yaml"
    save_helper.save_onnx(tree_pipeline,
                          os.path.join(MODEL_DIR, tree_onnx_filename))
    save_helper.save_interface(
        tree_modelname,
        os.path.join(MODEL_DIR, tree_onnx_interface_filename),
        [1, 4],
        str(data["x_train"].dtype).split(".")[-1],
        [1, 3],
        "float32",
        DATA_TYPE.ARRAY,
        [{
            tree_onnx_filename: MODEL_RUNTIME.ONNX_RUNTIME
        }],
        PREDICTION_TYPE.CLASSIFICATION,
        "src.app.ml.iris.iris_predictor_onnx",
        label_filepath=LABEL_FILEPATH,
    )