def test_lgbm_02(self):

        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        feature_names = [
            name for name in auto.columns if name not in ('mpg', 'car name')
        ]
        target_name = 'mpg'
        f_name = "lgbmr_pmml.pmml"
        model = LGBMRegressor()
        pipeline_obj = Pipeline([('lgbmr', model)])

        pipeline_obj.fit(auto[feature_names], auto[target_name])

        lgb_to_pmml(pipeline_obj, feature_names, target_name, f_name)

        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            for node in seg.TreeModel.Node.Node:
                varlen = node.get_Node().__len__()
                if varlen > 0:
                    pmml_value_list.append(node.SimplePredicate.value)
                    self.extractValues(node, pmml_value_list, pmml_score_list)
                else:
                    pmml_value_list.append(node.SimplePredicate.value)
                    pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
Example #2
0
 def test_04_plain_text_script(self):
     model = applications.MobileNet(weights='imagenet',
                                    include_top=False,
                                    input_shape=(224, 224, 3))
     x = model.output
     x = Flatten()(x)
     x = Dense(1024, activation="relu")(x)
     predictions = Dense(2, activation='sigmoid')(x)
     model_final = Model(inputs=model.input,
                         outputs=predictions,
                         name='predictions')
     script_content = open("nyoka/tests/preprocess.py", 'r').read()
     pmml_obj = KerasToPmml(model_final,
                            dataSet='image',
                            predictedClasses=['cat', 'dog'],
                            script_args={
                                "content": script_content,
                                "def_name": "getBase64EncodedString",
                                "return_type": "string",
                                "encode": False
                            })
     pmml_obj.export(open("script_with_keras.pmml", 'w'), 0)
     self.assertEqual(os.path.isfile("script_with_keras.pmml"), True)
     reconPmmlObj = pml.parse("script_with_keras.pmml", True)
     content = reconPmmlObj.TransformationDictionary.DefineFunction[
         0].Apply.Extension[0].anytypeobjs_
     content[0] = content[0].replace("\t", "")
     content = "\n".join(content)
     self.assertEqual(script_content, content)
     self.assertEqual(len(model_final.layers),
                      len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
Example #3
0
    def test_sklearn_01(self):

        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "svc_pmml.pmml"
        model = SVC()
        pipeline_obj = Pipeline([('svm', model)])

        pipeline_obj.fit(irisd[features], irisd[target])
        skl_to_pmml(pipeline_obj, features, target, f_name)
        pmml_obj = pml.parse(f_name, True)
        ## 1
        svms = pmml_obj.SupportVectorMachineModel[0].SupportVectorMachine
        for mod_val, recon_val in zip(model.intercept_, svms):
            self.assertEqual(
                "{:.16f}".format(mod_val),
                "{:.16f}".format(recon_val.Coefficients.absoluteValue))

        ## 2
        svm = pmml_obj.SupportVectorMachineModel[0]
        self.assertEqual(svm.RadialBasisKernelType.gamma, model._gamma)
Example #4
0
    def test_xgboost_02(self):
        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        feature_names = [
            name for name in auto.columns if name not in ('mpg', 'car name')
        ]
        target_name = 'mpg'
        f_name = "xgbr_pmml.pmml"
        model = XGBRegressor()
        pipeline_obj = Pipeline([('xgbr', model)])

        pipeline_obj.fit(auto[feature_names], auto[target_name])
        xgboost_to_pmml(pipeline_obj,
                        feature_names,
                        target_name,
                        f_name,
                        description="A test model")
        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            for node in seg.TreeModel.Node.Node:
                varlen = node.get_Node().__len__()
                if varlen > 0:
                    pmml_value_list.append(node.SimplePredicate.value)
                    self.extractValues(node, pmml_value_list, pmml_score_list)
                else:
                    pmml_value_list.append(node.SimplePredicate.value)
                    pmml_score_list.append(node.score)

        get_nodes_in_json_format = []
        for i in range(model.n_estimators):
            get_nodes_in_json_format.append(
                json.loads(model._Booster.get_dump(dump_format='json')[i]))

        for i in range(len(get_nodes_in_json_format)):
            list_score_temp = []
            list_val_temp = []
            node_list = get_nodes_in_json_format[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
Example #5
0
    def test_sklearn_02(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "knn_pmml.pmml"

        pipeline_obj = Pipeline([('scaling', StandardScaler()),
                                 ('knn', KNeighborsClassifier(n_neighbors=5))])

        pipeline_obj.fit(irisd[features], irisd[target])

        skl_to_pmml(pipeline_obj, features, target, f_name)

        pmml_obj = pml.parse(f_name, True)
        ##1
        self.assertIsNotNone(
            pmml_obj.NearestNeighborModel[0].ComparisonMeasure.euclidean)

        ##2
        self.assertEqual(
            pmml_obj.NearestNeighborModel[0].ComparisonMeasure.kind,
            "distance")

        ##3
        self.assertEqual(pipeline_obj.steps[-1][-1].n_neighbors,
                         pmml_obj.NearestNeighborModel[0].numberOfNeighbors)
    def test_sklearn_06(self):
        df = pd.read_csv('nyoka/tests/auto-mpg.csv')
        X = df.drop(['mpg','car name'],axis=1)
        y = df['mpg']

        features = X.columns
        target = 'mpg'
        f_name = "linearregression_pmml.pmml"
        model = LinearRegression()

        pipeline_obj = Pipeline([
            ('model',model)
        ])

        pipeline_obj.fit(X,y)
        skl_to_pmml(pipeline_obj,features,target,f_name)
        pmml_obj = pml.parse(f_name, True)

        ## 1
        reg_tab = pmml_obj.RegressionModel[0].RegressionTable[0]
        self.assertEqual(reg_tab.intercept,model.intercept_)

        ## 2
        for model_val, pmml_val in zip(model.coef_, reg_tab.NumericPredictor):
            self.assertEqual("{:.16f}".format(model_val),"{:.16f}".format(pmml_val.coefficient))
Example #7
0
 def __init__(self, pmml):
     self.nyoka_pmml = ny.parse(pmml, True)
     self.image_input = None
     self.layer_input = None
     self.model = None
     self.layers_outputs = {}
     self.model = self._build_model()
    def test_sklearn_04(self):
        titanic = pd.read_csv("nyoka/tests/titanic_train.csv")
        features = titanic.columns
        target = 'Survived'
        f_name = "gb_pmml.pmml"

        pipeline_obj = Pipeline([
            ("imp", Imputer(strategy="median")),
            ("gbc", GradientBoostingClassifier(n_estimators = 10))
        ])

        pipeline_obj.fit(titanic[features],titanic[target])

        skl_to_pmml(pipeline_obj, features, target, f_name)

        pmml_obj = pml.parse(f_name,True)

        ##1
        self.assertEqual(pmml_obj.MiningModel[0].Segmentation.multipleModelMethod, "modelChain")

        ##2
        self.assertEqual(pmml_obj.MiningModel[0].Segmentation.Segment.__len__(), 2)

        ##3
        self.assertEqual(pmml_obj.MiningModel[0].Segmentation.Segment[1].RegressionModel.normalizationMethod, "logit")
Example #9
0
    def test_sklearn_03(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "rf_pmml.pmml"
        model = RandomForestClassifier(n_estimators=100)

        pipeline_obj = Pipeline([
            ("mapping",
             DataFrameMapper([(['sepal length (cm)',
                                'sepal width (cm)'], StandardScaler()),
                              (['petal length (cm)',
                                'petal width (cm)'], Imputer())])),
            ("rfc", model)
        ])

        pipeline_obj.fit(irisd[features], irisd[target])
        skl_to_pmml(pipeline_obj, features, target, f_name)
        pmml_obj = pml.parse(f_name, True)

        ## 1
        self.assertEqual(
            model.n_estimators,
            pmml_obj.MiningModel[0].Segmentation.Segment.__len__())

        ##2
        self.assertEqual(
            pmml_obj.MiningModel[0].Segmentation.multipleModelMethod,
            "majorityVote")
Example #10
0
 def test_keras_02(self):
     boston = load_boston()
     data = pd.DataFrame(boston.data)
     features = list(boston.feature_names)
     target = 'PRICE'
     data.columns = features
     data['PRICE'] = boston.target
     x_train, x_test, y_train, y_test = train_test_split(data[features],
                                                         data[target],
                                                         test_size=0.20,
                                                         random_state=42)
     model = Sequential()
     model.add(
         Dense(13,
               input_dim=13,
               kernel_initializer='normal',
               activation='relu'))
     model.add(Dense(23))
     model.add(Dense(1, kernel_initializer='normal'))
     model.compile(loss='mean_squared_error', optimizer='adam')
     model.fit(x_train, y_train, epochs=1000, verbose=0)
     pmmlObj = KerasToPmml(model)
     pmmlObj.export(open('sequentialModel.pmml', 'w'), 0)
     reconPmmlObj = ny.parse('sequentialModel.pmml', True)
     self.assertEqual(os.path.isfile("sequentialModel.pmml"), True)
     self.assertEqual(len(model.layers),
                      len(reconPmmlObj.DeepNetwork[0].NetworkLayer) - 1)
Example #11
0
    def test_keras_01(self):

        cnn_pmml = KerasToPmml(self.model_final,model_name="MobileNet",description="Demo",\
            copyright="Internal User",dataSet='image',predictedClasses=['cats','dogs'])
        cnn_pmml.export(open('2classMBNet.pmml', "w"), 0)
        reconPmmlObj = ny.parse('2classMBNet.pmml', True)
        self.assertEqual(os.path.isfile("2classMBNet.pmml"), True)
        self.assertEqual(len(self.model_final.layers),
                         len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
Example #12
0
    def test_keras_01(self):

        model = applications.MobileNet(weights='imagenet', include_top=False,input_shape = (224, 224,3))
        activType='sigmoid'
        x = model.output
        x = Flatten()(x)
        x = Dense(1024, activation="relu")(x)
        predictions = Dense(2, activation=activType)(x)
        model_final = Model(inputs =model.input, outputs = predictions,name='predictions')
        cnn_pmml = KerasToPmml(model_final,model_name="MobileNet",description="Demo",\
            copyright="Internal User",dataSet='image',predictedClasses=['cats','dogs'])
        cnn_pmml.export(open('2classMBNet.pmml', "w"), 0)
        reconPmmlObj=ny.parse('2classMBNet.pmml',True)
        self.assertEqual(os.path.isfile("2classMBNet.pmml"),True)
        self.assertEqual(len(model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
Example #13
0
 def test_construction_vgg(self):
     model = applications.VGG16(weights = "imagenet", include_top=False,input_shape = (224, 224, 3))
     x = model.output
     x = layers.Flatten()(x)
     x = layers.Dense(1024, activation="relu")(x)
     x = layers.Dropout(0.5)(x)
     x = layers.Dense(1024, activation="relu")(x)
     predictions = layers.Dense(2, activation="softmax")(x)
     model_final = models.Model(input = model.input, output = predictions)
     model_final.compile(loss = "binary_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])
     pmmlObj=KerasToPmml(model_final,model_name="VGG16",dataSet='image')
     pmmlObj.export(open('vgg.pmml','w'),0)
     reconPmmlObj=ny.parse('vgg.pmml',True)
     self.assertEqual(os.path.isfile("vgg.pmml"),True)
     self.assertEqual(len(model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
Example #14
0
 def test_05(self):
     backbone = 'resnet'
     script_content = open("nyoka/tests/preprocess.py", 'r').read()
     RetinanetToPmml(self.model,
                     input_shape=(224, 224, 3),
                     backbone_name=backbone,
                     pmml_file_name="retinanet_with_coco_2.pmml",
                     script_args={
                         "content": script_content,
                         "def_name": "getBase64EncodedString",
                         "return_type": "string",
                         "encode": True
                     })
     recon_pmml_obj = pml.parse("retinanet_with_coco_2.pmml", True)
     content = recon_pmml_obj.TransformationDictionary.DefineFunction[
         0].Apply.Extension[0].anytypeobjs_[0]
     content = base64.b64decode(content).decode()
     self.assertEqual(script_content, content)
Example #15
0
    def test_02(self):

        backbone = 'mobilenet'
        RetinanetToPmml(self.model,
                        input_shape=(224, 224, 3),
                        backbone_name=backbone,
                        pmml_file_name="retinanet_with_coco_2.pmml")
        recon_pmml_obj = pml.parse("retinanet_with_coco_2.pmml", True)
        binary_buffered = recon_pmml_obj.DataDictionary.DataField[0].Extension[
            0].value
        self.assertEqual(binary_buffered, 'true')
        function = recon_pmml_obj.DeepNetwork[
            0].LocalTransformations.DerivedField[0].Apply.function
        self.assertEqual(function,
                         'KerasRetinaNet:getBase64StringFromBufferedInput')
        scaling = recon_pmml_obj.DeepNetwork[
            0].LocalTransformations.DerivedField[0].Apply.Constant[0].valueOf_
        self.assertEqual(scaling, 'tf')
Example #16
0
    def test_sklearn_07(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "logisticregression_pmml.pmml"
        model = LogisticRegression()

        pipeline_obj = Pipeline([
            ("mapping",
             DataFrameMapper([(['sepal length (cm)',
                                'sepal width (cm)'], StandardScaler()),
                              (['petal length (cm)',
                                'petal width (cm)'], Imputer())])),
            ("lr", model)
        ])

        pipeline_obj.fit(irisd[features], irisd[target])
        skl_to_pmml(pipeline_obj, features, target, f_name)
        pmml_obj = pml.parse(f_name, True)

        ## 1
        segmentation = pmml_obj.MiningModel[0].Segmentation
        self.assertEqual(segmentation.Segment.__len__(),
                         model.classes_.__len__() + 1)

        ## 2
        self.assertEqual(segmentation.multipleModelMethod, "modelChain")

        ##3
        self.assertEqual(
            segmentation.Segment[-1].RegressionModel.normalizationMethod,
            "simplemax")

        ##4
        for i in range(model.classes_.__len__()):
            self.assertEqual(
                segmentation.Segment[i].RegressionModel.normalizationMethod,
                "logit")
            self.assertEqual("{:.16f}".format(model.intercept_[i]),\
                 "{:.16f}".format(segmentation.Segment[i].RegressionModel.RegressionTable[0].intercept))
Example #17
0
 def test_03_encoded_script(self):
     script_content = open("nyoka/tests/preprocess.py", 'r').read()
     pmml_obj = KerasToPmml(self.model_final,
                            dataSet='image',
                            predictedClasses=['cat', 'dog'],
                            script_args={
                                "content": script_content,
                                "def_name": "getBase64EncodedString",
                                "return_type": "string",
                                "encode": True
                            })
     pmml_obj.export(open("script_with_keras.pmml", 'w'), 0)
     self.assertEqual(os.path.isfile("script_with_keras.pmml"), True)
     reconPmmlObj = pml.parse("script_with_keras.pmml", True)
     content = reconPmmlObj.TransformationDictionary.DefineFunction[
         0].Apply.Extension[0].anytypeobjs_[0]
     content = base64.b64decode(content).decode()
     self.assertEqual(script_content, content)
     self.assertEqual(len(self.model_final.layers),
                      len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
Example #18
0
    def test_sklearn_04(self):
        titanic = pd.read_csv("nyoka/tests/titanic_train.csv")

        titanic['Embarked'] = titanic['Embarked'].fillna('S')

        features = list(
            titanic.columns.drop(
                ['PassengerId', 'Name', 'Ticket', 'Cabin', 'Survived']))
        target = 'Survived'
        f_name = "gb_pmml.pmml"

        pipeline_obj = Pipeline([
            ("mapping",
             DataFrameMapper([(['Sex'], LabelEncoder()),
                              (['Embarked'], LabelEncoder())])),
            ("imp", Imputer(strategy="median")),
            ("gbc", GradientBoostingClassifier(n_estimators=10))
        ])

        pipeline_obj.fit(titanic[features], titanic[target])

        skl_to_pmml(pipeline_obj, features, target, f_name)

        pmml_obj = pml.parse(f_name, True)

        ##1
        self.assertEqual(
            pmml_obj.MiningModel[0].Segmentation.multipleModelMethod,
            "modelChain")

        ##2
        self.assertEqual(
            pmml_obj.MiningModel[0].Segmentation.Segment.__len__(), 2)

        ##3
        self.assertEqual(
            pmml_obj.MiningModel[0].Segmentation.Segment[1].RegressionModel.
            normalizationMethod, "logit")
    def test_lgbm_03(self):

        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "lgbmc_pmml_preprocess.pmml"
        model = LGBMClassifier(n_estimators=5)

        pipeline_obj = Pipeline([('scaling', StandardScaler()),
                                 ('LGBMC', model)])

        pipeline_obj.fit(irisd[features], irisd[target])
        lgb_to_pmml(pipeline_obj, features, target, f_name)
        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        list_seg_score1 = []
        list_seg_score2 = []
        list_seg_score3 = []

        list_seg_val1 = []
        list_seg_val2 = []
        list_seg_val3 = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            if int(seg.id) <= 3:
                for segment in seg.MiningModel.Segmentation.Segment:
                    node_tab = segment.TreeModel.Node.Node
                    if not node_tab:
                        pmml_score_list.append(segment.TreeModel.Node.score)
                    else:
                        for node in node_tab:
                            varlen = node.get_Node().__len__()
                            if varlen > 0:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                self.extractValues(node, pmml_value_list,
                                                   pmml_score_list)
                            else:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        n = 1
        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            if (n == 1):
                n = 2
                self.create_node(node_list, list_score_temp, list_val_temp)
                list_seg_score1 = list_seg_score1 + list_score_temp
                list_seg_val1 = list_seg_val1 + list_val_temp
                list_val_temp.clear()
                list_score_temp.clear()
            elif (n == 2):
                n = 3
                self.create_node(node_list, list_score_temp, list_val_temp)
                list_seg_score2 = list_seg_score2 + list_score_temp
                list_seg_val2 = list_seg_val2 + list_val_temp
                list_val_temp.clear()
                list_score_temp.clear()
            elif (n == 3):
                n = 1
                self.create_node(node_list, list_score_temp, list_val_temp)
                list_seg_score3 = list_seg_score3 + list_score_temp
                list_seg_val3 = list_seg_val3 + list_val_temp
                list_val_temp.clear()
                list_score_temp.clear()

        model_score_list = list_seg_score1 + list_seg_score2 + list_seg_score3
        model_value_list = list_seg_val1 + list_seg_val2 + list_seg_val3

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
Example #20
0
    def test_xgboost_03(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "xgbc_pmml_preprocess.pmml"
        model = XGBClassifier(n_estimators=5)
        pipeline_obj = Pipeline([('scaling', StandardScaler()),
                                 ('xgbc', model)])

        pipeline_obj.fit(irisd[features], irisd[target])
        xgboost_to_pmml(pipeline_obj, features, target, f_name)
        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        list_seg_score1 = []
        list_seg_score2 = []
        list_seg_score3 = []

        list_seg_val1 = []
        list_seg_val2 = []
        list_seg_val3 = []

        get_nodes_in_json_format = []
        for i in range(model.n_estimators * model.n_classes_):
            get_nodes_in_json_format.append(
                json.loads(model._Booster.get_dump(dump_format='json')[i]))

        n = 1
        for i in range(len(get_nodes_in_json_format)):
            list_score_temp = []
            list_val_temp = []
            node_list = get_nodes_in_json_format[i]
            if n == 1:
                n = 2
                self.create_node(node_list, list_score_temp, list_val_temp)
                list_seg_score1 = list_seg_score1 + list_score_temp
                list_seg_val1 = list_seg_val1 + list_val_temp
                list_val_temp.clear()
                list_score_temp.clear()
            elif n == 2:
                n = 3
                self.create_node(node_list, list_score_temp, list_val_temp)
                list_seg_score2 = list_seg_score2 + list_score_temp
                list_seg_val2 = list_seg_val2 + list_val_temp
                list_val_temp.clear()
                list_score_temp.clear()
            elif n == 3:
                n = 1
                self.create_node(node_list, list_score_temp, list_val_temp)
                list_seg_score3 = list_seg_score3 + list_score_temp
                list_seg_val3 = list_seg_val3 + list_val_temp
                list_val_temp.clear()
                list_score_temp.clear()

        model_score_list = list_seg_score1 + list_seg_score2 + list_seg_score3
        model_value_list = list_seg_val1 + list_seg_val2 + list_seg_val3

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            if int(seg.id) <= 3:
                for segment in seg.MiningModel.Segmentation.Segment:
                    node_tab = segment.TreeModel.Node.Node
                    if not node_tab:
                        pmml_score_list.append(segment.TreeModel.Node.score)
                    else:
                        for node in node_tab:
                            varlen = node.get_Node().__len__()
                            if varlen > 0:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                self.extractValues(node, pmml_value_list,
                                                   pmml_score_list)
                            else:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                pmml_score_list.append(node.score)

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
Example #21
0
    def test_xgboost_04(self):
        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        X = auto.drop(['mpg'], axis=1)
        y = auto['mpg']

        feature_names = [name for name in auto.columns if name not in 'mpg']
        f_name = "xgbr_pmml_preprocess2.pmml"
        target_name = 'mpg'
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.33,
                                                            random_state=101)
        model = XGBRegressor()
        pipeline_obj = Pipeline([
            ('mapper',
             DataFrameMapper([('car name', CountVectorizer()),
                              (['displacement'], [StandardScaler()])])),
            ('xgbr', model)
        ])

        pipeline_obj.fit(x_train, y_train)
        xgboost_to_pmml(pipeline_obj, feature_names, target_name, f_name)
        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment

        for seg in seg_tab:
            for node in seg.TreeModel.Node.Node:
                varlen = node.get_Node().__len__()
                if varlen > 0:
                    pmml_value_list.append(node.SimplePredicate.value)
                    self.extractValues(node, pmml_value_list, pmml_score_list)
                else:
                    pmml_value_list.append(node.SimplePredicate.value)
                    pmml_score_list.append(node.score)

        get_nodes_in_json_format = []
        for i in range(model.n_estimators):
            get_nodes_in_json_format.append(
                json.loads(model._Booster.get_dump(dump_format='json')[i]))

        for i in range(len(get_nodes_in_json_format)):
            list_score_temp = []
            list_val_temp = []
            node_list = get_nodes_in_json_format[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
Example #22
0
    def test_xgboost_05(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['target'] = [i % 2 for i in range(iris.data.shape[0])]

        features = irisd.columns.drop('target')
        target = 'target'
        f_name = "xgbc_bin_pmml.pmml"
        model = XGBClassifier(min_child_weight=6,
                              n_estimators=10,
                              scale_pos_weight=10,
                              deterministic_histogram=False)
        pipeline_obj = Pipeline([('xgbc', model)])

        pipeline_obj.fit(irisd[features], irisd[target])
        xgboost_to_pmml(pipeline_obj, features, target, f_name)
        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            if int(seg.id) == 1:
                for segment in seg.MiningModel.Segmentation.Segment:
                    node_tab = segment.TreeModel.Node.Node
                    if not node_tab:
                        pmml_score_list.append(segment.TreeModel.Node.score)
                    else:
                        for node in node_tab:
                            varlen = node.get_Node().__len__()
                            if varlen > 0:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                self.extractValues(node, pmml_value_list,
                                                   pmml_score_list)
                            else:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                pmml_score_list.append(node.score)

        get_nodes_in_json_format = []
        for i in range(model.n_estimators):
            get_nodes_in_json_format.append(
                json.loads(model._Booster.get_dump(dump_format='json')[i]))

        for i in range(len(get_nodes_in_json_format)):
            list_score_temp = []
            list_val_temp = []
            node_list = get_nodes_in_json_format[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
    def test_lgbm_04(self):

        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        X = auto.drop(['mpg'], axis=1)
        y = auto['mpg']

        feature_names = [name for name in auto.columns if name not in ('mpg')]

        target_name = 'mpg'
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.33,
                                                            random_state=101)
        f_name = "lgbmr_pmml_preprocess2.pmml"
        model = LGBMRegressor()
        pipeline_obj = Pipeline([
            ('mapper',
             DataFrameMapper([('car name', CountVectorizer()),
                              (['displacement'], [StandardScaler()])])),
            ('lgbmr', model)
        ])
        pipeline_obj.fit(x_train, y_train)

        lgb_to_pmml(pipeline_obj, feature_names, target_name, f_name)

        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            for node in seg.TreeModel.Node.Node:
                varlen = node.get_Node().__len__()
                if varlen > 0:
                    pmml_value_list.append(node.SimplePredicate.value)
                    self.extractValues(node, pmml_value_list, pmml_score_list)
                else:
                    pmml_value_list.append(node.SimplePredicate.value)
                    pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
    def test_lgbm_05(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['target'] = [i % 2 for i in range(iris.data.shape[0])]

        features = irisd.columns.drop('target')
        target = 'target'
        f_name = "lgbc_bin_pmml.pmml"
        model = LGBMClassifier()
        pipeline_obj = Pipeline([('lgbmc', model)])

        pipeline_obj.fit(irisd[features], irisd[target])

        lgb_to_pmml(pipeline_obj, features, target, f_name)

        # self.assertEqual(os.path.isfile("lgbc_bin_pmml.pmml"), True)

        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            if int(seg.id) == 1:
                for segment in seg.MiningModel.Segmentation.Segment:
                    node_tab = segment.TreeModel.Node.Node
                    if not node_tab:
                        pmml_score_list.append(segment.TreeModel.Node.score)
                    else:
                        for node in node_tab:
                            varlen = node.get_Node().__len__()
                            if varlen > 0:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                self.extractValues(node, pmml_value_list,
                                                   pmml_score_list)
                            else:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)