def init():
    # 加载模型
    try:
        global model
        print("尝试加载PipelineModel")
        model = PipelineModel.load(local_model_path)#加载模型
    except:
        try:
        # H2O模型必须走这里
            from pysparkling.ml import H2OMOJOSettings, H2OMOJOModel
            print("从加载PipelineModel的try中跳出")
            print("在except的try中尝试加载H2OMOJOModel")
            settings = H2OMOJOSettings(withDetailedPredictionCol=True)
            model = H2OMOJOModel.createFromMojo(local_model_path + '/mojo_model', settings)
        except:
            global pipeline_model
            print("从加载H2OMOJOModel的try中跳出")
            print("尝试加载XGBModel")
            # model = XGBoostClassificationModel.load(local_model_path)
            model = load_xgb_model(local_model_path,m_type='XGBoostClassificationModel')
            if not model:
                logging.error('XGBoostClassificationModel没有加载成功')
            pipeline_model = load_xgb_model(local_model_path, "PipelineModel")
            if not pipeline_model:
                logging.error('XGB需要的pipelinemodel没有加载成功')
                logging.error(pipeline_model)

    global final_transform_json_path
    final_transform_json_path = get_jsonfile_fullname()

    # 读取json,model_json: 模型中存储的json
    with open(final_transform_json_path, encoding='utf-8') as f:
        global model_json
        model_json = json.load(f)
예제 #2
0
def func():
    try:
        global model
        print("尝试加载PipelineModel")
        model = PipelineModel.load(local_model_path)  # 加载模型
        print("加载pipeline模型成功")
    except:
        try:
            # H2O模型必须走这里
            from pysparkling.ml import H2OMOJOSettings, H2OMOJOModel
            print("从加载PipelineModel的try中跳出")
            print("在except的try中尝试加载H2OMOJOModel")
            settings = H2OMOJOSettings(withDetailedPredictionCol=True)
            model = H2OMOJOModel.createFromMojo(
                local_model_path + '/mojo_model', settings)
        except:
            global pipeline_model
            print("从加载H2OMOJOModel的try中跳出")
            print("尝试加载XGBModel")
            # model = XGBoostClassificationModel.load(local_model_path)
            model = load_xgb_model(local_model_path,
                                   m_type='XGBoostClassificationModel')
            if not model:
                logging.error('XGBoostClassificationModel没有加载成功')
            pipeline_model = load_xgb_model(local_model_path, "PipelineModel")
            if not pipeline_model:
                logging.error('XGB需要的pipelinemodel没有加载成功')
                logging.error(pipeline_model)

    return model, pipeline_model
예제 #3
0
    def test_h2o_mojo_pipeline_predictions(self):
        # Try loading the Mojo and prediction on it without starting H2O Context
        path = "file://" + os.path.abspath(
            "../ml/src/test/resources/mojo2data/pipeline.mojo")
        settings = H2OMOJOSettings(namedMojoOutputColumns=False)
        mojo = H2OMOJOPipelineModel.createFromMojo(path, settings)

        prostateFrame = self._spark.read.csv(
            "file://" +
            unit_test_utils.locate("smalldata/prostate/prostate.csv"),
            header=True)
        preds = mojo.transform(prostateFrame).repartition(1)

        normalSelection = preds.select("prediction.preds").take(5)

        assert normalSelection[0][0][0] == 65.36320409515132
        assert normalSelection[1][0][0] == 64.96902128114817
        assert normalSelection[2][0][0] == 64.96721023747583
        assert normalSelection[3][0][0] == 65.78772654671035
        assert normalSelection[4][0][0] == 66.11327967814829

        udfSelection = preds.select(mojo.selectPredictionUDF("AGE")).take(5)

        assert udfSelection[0][0] == 65.36320409515132
        assert udfSelection[1][0] == 64.96902128114817
        assert udfSelection[2][0] == 64.96721023747583
        assert udfSelection[3][0] == 65.78772654671035
        assert udfSelection[4][0] == 66.11327967814829
예제 #4
0
 def h2o_model_load(self, path):
     """
     加载h2o model
     :param path:
     :return:
     """
     full_path = self.concat_path(path, self.model_key)
     from pysparkling.ml import H2OMOJOSettings, H2OMOJOModel
     settings = H2OMOJOSettings(withDetailedPredictionCol=False)
     model = H2OMOJOModel.createFromMojo(full_path + "/mojo_model",
                                         settings)
     return model
예제 #5
0
def testMojoPredictionsUnseenCategoricals(spark):
    path = "file://" + os.path.abspath(
        "../ml/src/test/resources/deep_learning_airlines_categoricals.zip")
    settings = H2OMOJOSettings(convertUnknownCategoricalLevelsToNa=True)
    mojo = H2OMOJOModel.createFromMojo(path, settings)

    rowForScoring = Row("sepal_len", "sepal_wid", "petal_len", "petal_wid",
                        "class")

    df = spark.createDataFrame(
        spark.sparkContext.parallelize([
            (5.1, 3.5, 1.4, 0.2, "Missing_categorical")
        ]).map(lambda r: rowForScoring(*r)))
    data = mojo.transform(df).collect()[0]

    assert data["class"] == "Missing_categorical"
    assert data["petal_len"] == 1.4
    assert data["petal_wid"] == 0.2
    assert data["sepal_len"] == 5.1
    assert data["sepal_wid"] == 3.5
    assert data["prediction"] == 5.240174068202646
예제 #6
0
def init():
    global model_tag
    global pmmlFields
    # 下载模型
    download_model.download_model(download_model_zip_path, unzip_path)
    try:
        #如果模型路径下存在pmml文件,那么直接加载pmml模型
        #pmml文件压缩包的结构是model/xxx.pmml文件
        #因为pmml文件结构的特殊性,所以解压函数要修改代码
        model_path_childs = os.listdir(local_model_path)
        logging.info(f'模型文件夹下的文件有:{model_path_childs}')
        for child in model_path_childs:
            if child.endswith(".pmml"):
                full_path = os.path.join(local_model_path, child)
                break
                #或者是保存在model/model/part-00000中的pmml模型
            elif child == "model":
                for file in os.listdir(os.path.join(local_model_path,"model")):
                    if file.startswith("part"):
                        full_path = local_model_path + "/model/" + file
                        break

        logging.info(f'获取到的模型路径是:{full_path}')
        print("模型大小是:",os.path.getsize(full_path))
        global pmmlModel
        pmmlModel = loadPmml.fromFile(full_path)
        pmmlFields = parse_xml(full_path)
        logging.info(f'成功加载pmml模型')
        model_tag = 1
    except:
        logging.info("从pmml模型的加载处理中跳出")
        # 获取模型路径
        get_model_path(local_model_path)
        # 加载模型
        try:
            global model
            logging.info("尝试加载PipelineModel")
            model = PipelineModel.load(local_model_path)#加载模型
            model_tag = 2
        except:
            try:
            # H2O模型必须走这里
                from pysparkling.ml import H2OMOJOSettings, H2OMOJOModel
                logging.info("从加载PipelineModel的try中跳出")
                print("在except的try中尝试加载H2OMOJOModel")
                settings = H2OMOJOSettings(withDetailedPredictionCol=True)
                model = H2OMOJOModel.createFromMojo(local_model_path + '/mojo_model', settings)
                model_tag = 3
            except:
                global pipeline_model
                print("从加载H2OMOJOModel的try中跳出")
                print("尝试加载XGBModel")
                # model = XGBoostClassificationModel.load(local_model_path)
                model = load_xgb_model(local_model_path,m_type='XGBoostClassificationModel')
                if not model:
                    logging.error('XGBoostClassificationModel没有加载成功')
                pipeline_model = load_xgb_model(local_model_path, "PipelineModel")
                if not pipeline_model:
                    logging.error('XGB需要的pipelinemodel没有加载成功')
                    logging.error(pipeline_model)

                model_tag = 4
        global final_transform_json_path
        final_transform_json_path = get_jsonfile_fullname()

        # 读取json,model_json: 模型中存储的json
        with open(final_transform_json_path, encoding='utf-8') as f:
            global model_json
            model_json = json.load(f)