def test_04_lgbm_regressor(self):
        print("\ntest 04 (lgbm regressor with preprocessing)\n")
        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        X = auto.drop(['mpg'], axis=1)
        y = auto['mpg']

        feature_names = [name for name in auto.columns if name not in ('mpg')]
        target_name = 'mpg'
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.33,
                                                            random_state=101)
        pd.DataFrame(data=x_test, columns=feature_names).to_csv("test.csv",
                                                                index=False)
        pipeline_obj = Pipeline([
            ('mapper',
             DataFrameMapper([('car name', CountVectorizer()),
                              (['displacement'], [StandardScaler()])])),
            ('lgbmr', LGBMRegressor())
        ])
        pipeline_obj.fit(x_train, y_train)
        file_name = "test04lgbm.pmml"
        lgb_to_pmml(pipeline_obj, feature_names, 'mpg', file_name)
        model_name = self.adapa_utility.upload_to_zserver(file_name)
        predictions, _ = self.adapa_utility.score_in_zserver(
            model_name, "test.csv")
        predictions = numpy.array(predictions)
        model_pred = pipeline_obj.predict(x_test)
        self.assertEqual(
            self.adapa_utility.compare_predictions(predictions, model_pred),
            True)
    def test_lgbm_02(self):

        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        feature_names = [
            name for name in auto.columns if name not in ('mpg', 'car name')
        ]
        target_name = 'mpg'
        f_name = "lgbmr_pmml.pmml"
        model = LGBMRegressor()
        pipeline_obj = Pipeline([('lgbmr', model)])

        pipeline_obj.fit(auto[feature_names], auto[target_name])

        lgb_to_pmml(pipeline_obj, feature_names, target_name, f_name)

        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            for node in seg.TreeModel.Node.Node:
                varlen = node.get_Node().__len__()
                if varlen > 0:
                    pmml_value_list.append(node.SimplePredicate.value)
                    self.extractValues(node, pmml_value_list, pmml_score_list)
                else:
                    pmml_value_list.append(node.SimplePredicate.value)
                    pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
    def test_lgbm_04(self):

        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        X = auto.drop(['mpg'], axis=1)
        y = auto['mpg']

        feature_names = [name for name in auto.columns if name not in ('mpg')]

        target_name = 'mpg'
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.33,
                                                            random_state=101)
        pipeline_obj = Pipeline([
            ('mapper',
             DataFrameMapper([('car name', CountVectorizer()),
                              (['displacement'], [StandardScaler()])])),
            ('lgbmr', LGBMRegressor())
        ])
        pipeline_obj.fit(x_train, y_train)

        lgb_to_pmml(pipeline_obj, feature_names, target_name,
                    "lgbmr_pmml_preprocess.pmml")

        self.assertEqual(os.path.isfile("lgbmr_pmml_preprocess.pmml"), True)
Beispiel #4
0
 def test_03_lgbm_regressor(self):
     print("\ntest 03 (lgbm regressor without preprocessing)\n")
     model = LGBMRegressor()
     pipeline_obj = Pipeline([("model", model)])
     pipeline_obj.fit(self.X, self.Y)
     file_name = "test03lgbm.pmml"
     lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
     model_name = self.adapa_utility.upload_to_zserver(file_name)
     predictions, _ = self.adapa_utility.score_in_zserver(
         model_name, self.test_file)
     model_pred = pipeline_obj.predict(self.X)
     self.assertEqual(
         self.adapa_utility.compare_predictions(predictions, model_pred), 0)
    def test_lgbm_06(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['target'] = [i % 2 for i in range(iris.data.shape[0])]

        features = irisd.columns.drop('target')
        target = 'target'

        model = LGBMClassifier()

        model.fit(irisd[features], irisd[target])

        with self.assertRaises(TypeError):
            lgb_to_pmml(model, features, target, "lgbc_bin_pmml.pmml")
    def test_lgbm_07(self):
        iris = datasets.load_iris()
        abc = ['f1', 'f2', 'f3', 'f4']
        irisd = pd.DataFrame(iris.data, columns=abc)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "lgbmc_pmml_with_f_column_names.pmml"
        pipeline_obj = Pipeline([
            ('scaler', StandardScaler()),
            ('lgbmc', LGBMClassifier())
        ])
        pipeline_obj.fit(irisd[features].values, irisd[target].values)
        lgb_to_pmml(pipeline_obj, abc, target, f_name, model_name="MyLGBM")
    def test_lgbm_01(self):

        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'

        pipeline_obj = Pipeline([('lgbmc', LGBMClassifier())])

        pipeline_obj.fit(irisd[features], irisd[target])

        lgb_to_pmml(pipeline_obj, features, target, "lgbmc_pmml.pmml")

        self.assertEqual(os.path.isfile("lgbmc_pmml.pmml"), True)
Beispiel #8
0
    def test_lgbm_05(self):

        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['target'] = [i % 2 for i in range(iris.data.shape[0])]

        features = irisd.columns.drop('target')
        target = 'target'

        pipeline_obj = Pipeline([('lgbmc', LGBMClassifier())])

        pipeline_obj.fit(irisd[features], irisd[target])

        lgb_to_pmml(pipeline_obj, features, target, "lgbc_bin_pmml.pmml")

        self.assertEqual(os.path.isfile("lgbc_bin_pmml.pmml"), True)
Beispiel #9
0
 def test_02_lgbm_classifier(self):
     print("\ntest 02 (lgbm classifier with preprocessing) [multi-class]\n")
     model = LGBMClassifier()
     pipeline_obj = Pipeline([('scaler', MaxAbsScaler()), ("model", model)])
     pipeline_obj.fit(self.X, self.Y)
     file_name = "test02lgbm.pmml"
     lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
     model_name = self.adapa_utility.upload_to_zserver(file_name)
     predictions, probabilities = self.adapa_utility.score_in_zserver(
         model_name, self.test_file)
     model_pred = pipeline_obj.predict(self.X)
     model_prob = pipeline_obj.predict_proba(self.X)
     self.assertEqual(
         self.adapa_utility.compare_predictions(predictions, model_pred), 0)
     self.assertEqual(
         self.adapa_utility.compare_probability(probabilities, model_prob),
         0)
Beispiel #10
0
    def test_lgbm_02(self):

        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        X = auto.drop(['mpg','car name'], axis=1)
        y = auto['mpg']

        feature_names = [name for name in auto.columns if name not in ('mpg','car name')]
        target_name='mpg'

        pipeline_obj = Pipeline([
            ('lgbmr',LGBMRegressor())
        ])

        pipeline_obj.fit(auto[feature_names],auto[target_name])

        lgb_to_pmml(pipeline_obj,feature_names,target_name,"lgbmr_pmml.pmml")

        self.assertEqual(os.path.isfile("lgbmr_pmml.pmml"),True)
Beispiel #11
0
    def test_lgbm_03(self):
        
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data,columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'

        pipeline_obj = Pipeline([
            ('scaling',StandardScaler()), 
            ('LGBMC_preprocess',LGBMClassifier(n_estimators=5))
        ])

        pipeline_obj.fit(irisd[features],irisd[target])

        lgb_to_pmml(pipeline_obj,features,target,"lgbmc_pmml_preprocess.pmml")

        self.assertEqual(os.path.isfile("lgbmc_pmml_preprocess.pmml"),True)
    def test_lgbm_05(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['target'] = [i % 2 for i in range(iris.data.shape[0])]

        features = irisd.columns.drop('target')
        target = 'target'
        f_name = "lgbc_bin_pmml.pmml"
        model = LGBMClassifier()
        pipeline_obj = Pipeline([('lgbmc', model)])

        pipeline_obj.fit(irisd[features], irisd[target])

        lgb_to_pmml(pipeline_obj, features, target, f_name)

        # self.assertEqual(os.path.isfile("lgbc_bin_pmml.pmml"), True)

        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            if int(seg.id) == 1:
                for segment in seg.MiningModel.Segmentation.Segment:
                    node_tab = segment.TreeModel.Node.Node
                    if not node_tab:
                        pmml_score_list.append(segment.TreeModel.Node.score)
                    else:
                        for node in node_tab:
                            varlen = node.get_Node().__len__()
                            if varlen > 0:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                self.extractValues(node, pmml_value_list,
                                                   pmml_score_list)
                            else:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
    def test_lgbm_04(self):

        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        X = auto.drop(['mpg'], axis=1)
        y = auto['mpg']

        feature_names = [name for name in auto.columns if name not in ('mpg')]

        target_name = 'mpg'
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.33,
                                                            random_state=101)
        f_name = "lgbmr_pmml_preprocess2.pmml"
        model = LGBMRegressor()
        pipeline_obj = Pipeline([
            ('mapper',
             DataFrameMapper([('car name', CountVectorizer()),
                              (['displacement'], [StandardScaler()])])),
            ('lgbmr', model)
        ])
        pipeline_obj.fit(x_train, y_train)

        lgb_to_pmml(pipeline_obj, feature_names, target_name, f_name)

        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            for node in seg.TreeModel.Node.Node:
                varlen = node.get_Node().__len__()
                if varlen > 0:
                    pmml_value_list.append(node.SimplePredicate.value)
                    self.extractValues(node, pmml_value_list, pmml_score_list)
                else:
                    pmml_value_list.append(node.SimplePredicate.value)
                    pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
    def test_lgbm_03(self):

        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "lgbmc_pmml_preprocess.pmml"
        model = LGBMClassifier(n_estimators=5)

        pipeline_obj = Pipeline([('scaling', StandardScaler()),
                                 ('LGBMC', model)])

        pipeline_obj.fit(irisd[features], irisd[target])
        lgb_to_pmml(pipeline_obj, features, target, f_name)
        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        list_seg_score1 = []
        list_seg_score2 = []
        list_seg_score3 = []

        list_seg_val1 = []
        list_seg_val2 = []
        list_seg_val3 = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            if int(seg.id) <= 3:
                for segment in seg.MiningModel.Segmentation.Segment:
                    node_tab = segment.TreeModel.Node.Node
                    if not node_tab:
                        pmml_score_list.append(segment.TreeModel.Node.score)
                    else:
                        for node in node_tab:
                            varlen = node.get_Node().__len__()
                            if varlen > 0:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                self.extractValues(node, pmml_value_list,
                                                   pmml_score_list)
                            else:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        n = 1
        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            if (n == 1):
                n = 2
                self.create_node(node_list, list_score_temp, list_val_temp)
                list_seg_score1 = list_seg_score1 + list_score_temp
                list_seg_val1 = list_seg_val1 + list_val_temp
                list_val_temp.clear()
                list_score_temp.clear()
            elif (n == 2):
                n = 3
                self.create_node(node_list, list_score_temp, list_val_temp)
                list_seg_score2 = list_seg_score2 + list_score_temp
                list_seg_val2 = list_seg_val2 + list_val_temp
                list_val_temp.clear()
                list_score_temp.clear()
            elif (n == 3):
                n = 1
                self.create_node(node_list, list_score_temp, list_val_temp)
                list_seg_score3 = list_seg_score3 + list_score_temp
                list_seg_val3 = list_seg_val3 + list_val_temp
                list_val_temp.clear()
                list_score_temp.clear()

        model_score_list = list_seg_score1 + list_seg_score2 + list_seg_score3
        model_value_list = list_seg_val1 + list_seg_val2 + list_seg_val3

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)