def test_lgbm_02(self): auto = pd.read_csv('nyoka/tests/auto-mpg.csv') feature_names = [ name for name in auto.columns if name not in ('mpg', 'car name') ] target_name = 'mpg' f_name = "lgbmr_pmml.pmml" model = LGBMRegressor() pipeline_obj = Pipeline([('lgbmr', model)]) pipeline_obj.fit(auto[feature_names], auto[target_name]) lgb_to_pmml(pipeline_obj, feature_names, target_name, f_name) pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: for node in seg.TreeModel.Node.Node: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append(node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append(node.SimplePredicate.value) pmml_score_list.append(node.score) main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) for i in range(len(main_key_value)): list_score_temp = [] list_val_temp = [] node_list = main_key_value[i] self.create_node(node_list, list_score_temp, list_val_temp) model_score_list = model_score_list + list_score_temp model_value_list = model_value_list + list_val_temp list_val_temp.clear() list_score_temp.clear() ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)
def test_04_plain_text_script(self): model = applications.MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) x = model.output x = Flatten()(x) x = Dense(1024, activation="relu")(x) predictions = Dense(2, activation='sigmoid')(x) model_final = Model(inputs=model.input, outputs=predictions, name='predictions') script_content = open("nyoka/tests/preprocess.py", 'r').read() pmml_obj = KerasToPmml(model_final, dataSet='image', predictedClasses=['cat', 'dog'], script_args={ "content": script_content, "def_name": "getBase64EncodedString", "return_type": "string", "encode": False }) pmml_obj.export(open("script_with_keras.pmml", 'w'), 0) self.assertEqual(os.path.isfile("script_with_keras.pmml"), True) reconPmmlObj = pml.parse("script_with_keras.pmml", True) content = reconPmmlObj.TransformationDictionary.DefineFunction[ 0].Apply.Extension[0].anytypeobjs_ content[0] = content[0].replace("\t", "") content = "\n".join(content) self.assertEqual(script_content, content) self.assertEqual(len(model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
def test_sklearn_01(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['Species'] = iris.target features = irisd.columns.drop('Species') target = 'Species' f_name = "svc_pmml.pmml" model = SVC() pipeline_obj = Pipeline([('svm', model)]) pipeline_obj.fit(irisd[features], irisd[target]) skl_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name, True) ## 1 svms = pmml_obj.SupportVectorMachineModel[0].SupportVectorMachine for mod_val, recon_val in zip(model.intercept_, svms): self.assertEqual( "{:.16f}".format(mod_val), "{:.16f}".format(recon_val.Coefficients.absoluteValue)) ## 2 svm = pmml_obj.SupportVectorMachineModel[0] self.assertEqual(svm.RadialBasisKernelType.gamma, model._gamma)
def test_xgboost_02(self): auto = pd.read_csv('nyoka/tests/auto-mpg.csv') feature_names = [ name for name in auto.columns if name not in ('mpg', 'car name') ] target_name = 'mpg' f_name = "xgbr_pmml.pmml" model = XGBRegressor() pipeline_obj = Pipeline([('xgbr', model)]) pipeline_obj.fit(auto[feature_names], auto[target_name]) xgboost_to_pmml(pipeline_obj, feature_names, target_name, f_name, description="A test model") pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: for node in seg.TreeModel.Node.Node: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append(node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append(node.SimplePredicate.value) pmml_score_list.append(node.score) get_nodes_in_json_format = [] for i in range(model.n_estimators): get_nodes_in_json_format.append( json.loads(model._Booster.get_dump(dump_format='json')[i])) for i in range(len(get_nodes_in_json_format)): list_score_temp = [] list_val_temp = [] node_list = get_nodes_in_json_format[i] self.create_node(node_list, list_score_temp, list_val_temp) model_score_list = model_score_list + list_score_temp model_value_list = model_value_list + list_val_temp list_val_temp.clear() list_score_temp.clear() ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)
def test_sklearn_02(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['Species'] = iris.target features = irisd.columns.drop('Species') target = 'Species' f_name = "knn_pmml.pmml" pipeline_obj = Pipeline([('scaling', StandardScaler()), ('knn', KNeighborsClassifier(n_neighbors=5))]) pipeline_obj.fit(irisd[features], irisd[target]) skl_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name, True) ##1 self.assertIsNotNone( pmml_obj.NearestNeighborModel[0].ComparisonMeasure.euclidean) ##2 self.assertEqual( pmml_obj.NearestNeighborModel[0].ComparisonMeasure.kind, "distance") ##3 self.assertEqual(pipeline_obj.steps[-1][-1].n_neighbors, pmml_obj.NearestNeighborModel[0].numberOfNeighbors)
def test_sklearn_06(self): df = pd.read_csv('nyoka/tests/auto-mpg.csv') X = df.drop(['mpg','car name'],axis=1) y = df['mpg'] features = X.columns target = 'mpg' f_name = "linearregression_pmml.pmml" model = LinearRegression() pipeline_obj = Pipeline([ ('model',model) ]) pipeline_obj.fit(X,y) skl_to_pmml(pipeline_obj,features,target,f_name) pmml_obj = pml.parse(f_name, True) ## 1 reg_tab = pmml_obj.RegressionModel[0].RegressionTable[0] self.assertEqual(reg_tab.intercept,model.intercept_) ## 2 for model_val, pmml_val in zip(model.coef_, reg_tab.NumericPredictor): self.assertEqual("{:.16f}".format(model_val),"{:.16f}".format(pmml_val.coefficient))
def __init__(self, pmml): self.nyoka_pmml = ny.parse(pmml, True) self.image_input = None self.layer_input = None self.model = None self.layers_outputs = {} self.model = self._build_model()
def test_sklearn_04(self): titanic = pd.read_csv("nyoka/tests/titanic_train.csv") features = titanic.columns target = 'Survived' f_name = "gb_pmml.pmml" pipeline_obj = Pipeline([ ("imp", Imputer(strategy="median")), ("gbc", GradientBoostingClassifier(n_estimators = 10)) ]) pipeline_obj.fit(titanic[features],titanic[target]) skl_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name,True) ##1 self.assertEqual(pmml_obj.MiningModel[0].Segmentation.multipleModelMethod, "modelChain") ##2 self.assertEqual(pmml_obj.MiningModel[0].Segmentation.Segment.__len__(), 2) ##3 self.assertEqual(pmml_obj.MiningModel[0].Segmentation.Segment[1].RegressionModel.normalizationMethod, "logit")
def test_sklearn_03(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['Species'] = iris.target features = irisd.columns.drop('Species') target = 'Species' f_name = "rf_pmml.pmml" model = RandomForestClassifier(n_estimators=100) pipeline_obj = Pipeline([ ("mapping", DataFrameMapper([(['sepal length (cm)', 'sepal width (cm)'], StandardScaler()), (['petal length (cm)', 'petal width (cm)'], Imputer())])), ("rfc", model) ]) pipeline_obj.fit(irisd[features], irisd[target]) skl_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name, True) ## 1 self.assertEqual( model.n_estimators, pmml_obj.MiningModel[0].Segmentation.Segment.__len__()) ##2 self.assertEqual( pmml_obj.MiningModel[0].Segmentation.multipleModelMethod, "majorityVote")
def test_keras_02(self): boston = load_boston() data = pd.DataFrame(boston.data) features = list(boston.feature_names) target = 'PRICE' data.columns = features data['PRICE'] = boston.target x_train, x_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.20, random_state=42) model = Sequential() model.add( Dense(13, input_dim=13, kernel_initializer='normal', activation='relu')) model.add(Dense(23)) model.add(Dense(1, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(x_train, y_train, epochs=1000, verbose=0) pmmlObj = KerasToPmml(model) pmmlObj.export(open('sequentialModel.pmml', 'w'), 0) reconPmmlObj = ny.parse('sequentialModel.pmml', True) self.assertEqual(os.path.isfile("sequentialModel.pmml"), True) self.assertEqual(len(model.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer) - 1)
def test_keras_01(self): cnn_pmml = KerasToPmml(self.model_final,model_name="MobileNet",description="Demo",\ copyright="Internal User",dataSet='image',predictedClasses=['cats','dogs']) cnn_pmml.export(open('2classMBNet.pmml', "w"), 0) reconPmmlObj = ny.parse('2classMBNet.pmml', True) self.assertEqual(os.path.isfile("2classMBNet.pmml"), True) self.assertEqual(len(self.model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
def test_keras_01(self): model = applications.MobileNet(weights='imagenet', include_top=False,input_shape = (224, 224,3)) activType='sigmoid' x = model.output x = Flatten()(x) x = Dense(1024, activation="relu")(x) predictions = Dense(2, activation=activType)(x) model_final = Model(inputs =model.input, outputs = predictions,name='predictions') cnn_pmml = KerasToPmml(model_final,model_name="MobileNet",description="Demo",\ copyright="Internal User",dataSet='image',predictedClasses=['cats','dogs']) cnn_pmml.export(open('2classMBNet.pmml', "w"), 0) reconPmmlObj=ny.parse('2classMBNet.pmml',True) self.assertEqual(os.path.isfile("2classMBNet.pmml"),True) self.assertEqual(len(model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
def test_construction_vgg(self): model = applications.VGG16(weights = "imagenet", include_top=False,input_shape = (224, 224, 3)) x = model.output x = layers.Flatten()(x) x = layers.Dense(1024, activation="relu")(x) x = layers.Dropout(0.5)(x) x = layers.Dense(1024, activation="relu")(x) predictions = layers.Dense(2, activation="softmax")(x) model_final = models.Model(input = model.input, output = predictions) model_final.compile(loss = "binary_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"]) pmmlObj=KerasToPmml(model_final,model_name="VGG16",dataSet='image') pmmlObj.export(open('vgg.pmml','w'),0) reconPmmlObj=ny.parse('vgg.pmml',True) self.assertEqual(os.path.isfile("vgg.pmml"),True) self.assertEqual(len(model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
def test_05(self): backbone = 'resnet' script_content = open("nyoka/tests/preprocess.py", 'r').read() RetinanetToPmml(self.model, input_shape=(224, 224, 3), backbone_name=backbone, pmml_file_name="retinanet_with_coco_2.pmml", script_args={ "content": script_content, "def_name": "getBase64EncodedString", "return_type": "string", "encode": True }) recon_pmml_obj = pml.parse("retinanet_with_coco_2.pmml", True) content = recon_pmml_obj.TransformationDictionary.DefineFunction[ 0].Apply.Extension[0].anytypeobjs_[0] content = base64.b64decode(content).decode() self.assertEqual(script_content, content)
def test_02(self): backbone = 'mobilenet' RetinanetToPmml(self.model, input_shape=(224, 224, 3), backbone_name=backbone, pmml_file_name="retinanet_with_coco_2.pmml") recon_pmml_obj = pml.parse("retinanet_with_coco_2.pmml", True) binary_buffered = recon_pmml_obj.DataDictionary.DataField[0].Extension[ 0].value self.assertEqual(binary_buffered, 'true') function = recon_pmml_obj.DeepNetwork[ 0].LocalTransformations.DerivedField[0].Apply.function self.assertEqual(function, 'KerasRetinaNet:getBase64StringFromBufferedInput') scaling = recon_pmml_obj.DeepNetwork[ 0].LocalTransformations.DerivedField[0].Apply.Constant[0].valueOf_ self.assertEqual(scaling, 'tf')
def test_sklearn_07(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['Species'] = iris.target features = irisd.columns.drop('Species') target = 'Species' f_name = "logisticregression_pmml.pmml" model = LogisticRegression() pipeline_obj = Pipeline([ ("mapping", DataFrameMapper([(['sepal length (cm)', 'sepal width (cm)'], StandardScaler()), (['petal length (cm)', 'petal width (cm)'], Imputer())])), ("lr", model) ]) pipeline_obj.fit(irisd[features], irisd[target]) skl_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name, True) ## 1 segmentation = pmml_obj.MiningModel[0].Segmentation self.assertEqual(segmentation.Segment.__len__(), model.classes_.__len__() + 1) ## 2 self.assertEqual(segmentation.multipleModelMethod, "modelChain") ##3 self.assertEqual( segmentation.Segment[-1].RegressionModel.normalizationMethod, "simplemax") ##4 for i in range(model.classes_.__len__()): self.assertEqual( segmentation.Segment[i].RegressionModel.normalizationMethod, "logit") self.assertEqual("{:.16f}".format(model.intercept_[i]),\ "{:.16f}".format(segmentation.Segment[i].RegressionModel.RegressionTable[0].intercept))
def test_03_encoded_script(self): script_content = open("nyoka/tests/preprocess.py", 'r').read() pmml_obj = KerasToPmml(self.model_final, dataSet='image', predictedClasses=['cat', 'dog'], script_args={ "content": script_content, "def_name": "getBase64EncodedString", "return_type": "string", "encode": True }) pmml_obj.export(open("script_with_keras.pmml", 'w'), 0) self.assertEqual(os.path.isfile("script_with_keras.pmml"), True) reconPmmlObj = pml.parse("script_with_keras.pmml", True) content = reconPmmlObj.TransformationDictionary.DefineFunction[ 0].Apply.Extension[0].anytypeobjs_[0] content = base64.b64decode(content).decode() self.assertEqual(script_content, content) self.assertEqual(len(self.model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
def test_sklearn_04(self): titanic = pd.read_csv("nyoka/tests/titanic_train.csv") titanic['Embarked'] = titanic['Embarked'].fillna('S') features = list( titanic.columns.drop( ['PassengerId', 'Name', 'Ticket', 'Cabin', 'Survived'])) target = 'Survived' f_name = "gb_pmml.pmml" pipeline_obj = Pipeline([ ("mapping", DataFrameMapper([(['Sex'], LabelEncoder()), (['Embarked'], LabelEncoder())])), ("imp", Imputer(strategy="median")), ("gbc", GradientBoostingClassifier(n_estimators=10)) ]) pipeline_obj.fit(titanic[features], titanic[target]) skl_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name, True) ##1 self.assertEqual( pmml_obj.MiningModel[0].Segmentation.multipleModelMethod, "modelChain") ##2 self.assertEqual( pmml_obj.MiningModel[0].Segmentation.Segment.__len__(), 2) ##3 self.assertEqual( pmml_obj.MiningModel[0].Segmentation.Segment[1].RegressionModel. normalizationMethod, "logit")
def test_lgbm_03(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['Species'] = iris.target features = irisd.columns.drop('Species') target = 'Species' f_name = "lgbmc_pmml_preprocess.pmml" model = LGBMClassifier(n_estimators=5) pipeline_obj = Pipeline([('scaling', StandardScaler()), ('LGBMC', model)]) pipeline_obj.fit(irisd[features], irisd[target]) lgb_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] list_seg_score1 = [] list_seg_score2 = [] list_seg_score3 = [] list_seg_val1 = [] list_seg_val2 = [] list_seg_val3 = [] seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: if int(seg.id) <= 3: for segment in seg.MiningModel.Segmentation.Segment: node_tab = segment.TreeModel.Node.Node if not node_tab: pmml_score_list.append(segment.TreeModel.Node.score) else: for node in node_tab: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append( node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append( node.SimplePredicate.value) pmml_score_list.append(node.score) main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) n = 1 for i in range(len(main_key_value)): list_score_temp = [] list_val_temp = [] node_list = main_key_value[i] if (n == 1): n = 2 self.create_node(node_list, list_score_temp, list_val_temp) list_seg_score1 = list_seg_score1 + list_score_temp list_seg_val1 = list_seg_val1 + list_val_temp list_val_temp.clear() list_score_temp.clear() elif (n == 2): n = 3 self.create_node(node_list, list_score_temp, list_val_temp) list_seg_score2 = list_seg_score2 + list_score_temp list_seg_val2 = list_seg_val2 + list_val_temp list_val_temp.clear() list_score_temp.clear() elif (n == 3): n = 1 self.create_node(node_list, list_score_temp, list_val_temp) list_seg_score3 = list_seg_score3 + list_score_temp list_seg_val3 = list_seg_val3 + list_val_temp list_val_temp.clear() list_score_temp.clear() model_score_list = list_seg_score1 + list_seg_score2 + list_seg_score3 model_value_list = list_seg_val1 + list_seg_val2 + list_seg_val3 ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)
def test_xgboost_03(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['Species'] = iris.target features = irisd.columns.drop('Species') target = 'Species' f_name = "xgbc_pmml_preprocess.pmml" model = XGBClassifier(n_estimators=5) pipeline_obj = Pipeline([('scaling', StandardScaler()), ('xgbc', model)]) pipeline_obj.fit(irisd[features], irisd[target]) xgboost_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] list_seg_score1 = [] list_seg_score2 = [] list_seg_score3 = [] list_seg_val1 = [] list_seg_val2 = [] list_seg_val3 = [] get_nodes_in_json_format = [] for i in range(model.n_estimators * model.n_classes_): get_nodes_in_json_format.append( json.loads(model._Booster.get_dump(dump_format='json')[i])) n = 1 for i in range(len(get_nodes_in_json_format)): list_score_temp = [] list_val_temp = [] node_list = get_nodes_in_json_format[i] if n == 1: n = 2 self.create_node(node_list, list_score_temp, list_val_temp) list_seg_score1 = list_seg_score1 + list_score_temp list_seg_val1 = list_seg_val1 + list_val_temp list_val_temp.clear() list_score_temp.clear() elif n == 2: n = 3 self.create_node(node_list, list_score_temp, list_val_temp) list_seg_score2 = list_seg_score2 + list_score_temp list_seg_val2 = list_seg_val2 + list_val_temp list_val_temp.clear() list_score_temp.clear() elif n == 3: n = 1 self.create_node(node_list, list_score_temp, list_val_temp) list_seg_score3 = list_seg_score3 + list_score_temp list_seg_val3 = list_seg_val3 + list_val_temp list_val_temp.clear() list_score_temp.clear() model_score_list = list_seg_score1 + list_seg_score2 + list_seg_score3 model_value_list = list_seg_val1 + list_seg_val2 + list_seg_val3 seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: if int(seg.id) <= 3: for segment in seg.MiningModel.Segmentation.Segment: node_tab = segment.TreeModel.Node.Node if not node_tab: pmml_score_list.append(segment.TreeModel.Node.score) else: for node in node_tab: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append( node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append( node.SimplePredicate.value) pmml_score_list.append(node.score) ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)
def test_xgboost_04(self): auto = pd.read_csv('nyoka/tests/auto-mpg.csv') X = auto.drop(['mpg'], axis=1) y = auto['mpg'] feature_names = [name for name in auto.columns if name not in 'mpg'] f_name = "xgbr_pmml_preprocess2.pmml" target_name = 'mpg' x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=101) model = XGBRegressor() pipeline_obj = Pipeline([ ('mapper', DataFrameMapper([('car name', CountVectorizer()), (['displacement'], [StandardScaler()])])), ('xgbr', model) ]) pipeline_obj.fit(x_train, y_train) xgboost_to_pmml(pipeline_obj, feature_names, target_name, f_name) pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: for node in seg.TreeModel.Node.Node: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append(node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append(node.SimplePredicate.value) pmml_score_list.append(node.score) get_nodes_in_json_format = [] for i in range(model.n_estimators): get_nodes_in_json_format.append( json.loads(model._Booster.get_dump(dump_format='json')[i])) for i in range(len(get_nodes_in_json_format)): list_score_temp = [] list_val_temp = [] node_list = get_nodes_in_json_format[i] self.create_node(node_list, list_score_temp, list_val_temp) model_score_list = model_score_list + list_score_temp model_value_list = model_value_list + list_val_temp list_val_temp.clear() list_score_temp.clear() ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)
def test_xgboost_05(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['target'] = [i % 2 for i in range(iris.data.shape[0])] features = irisd.columns.drop('target') target = 'target' f_name = "xgbc_bin_pmml.pmml" model = XGBClassifier(min_child_weight=6, n_estimators=10, scale_pos_weight=10, deterministic_histogram=False) pipeline_obj = Pipeline([('xgbc', model)]) pipeline_obj.fit(irisd[features], irisd[target]) xgboost_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: if int(seg.id) == 1: for segment in seg.MiningModel.Segmentation.Segment: node_tab = segment.TreeModel.Node.Node if not node_tab: pmml_score_list.append(segment.TreeModel.Node.score) else: for node in node_tab: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append( node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append( node.SimplePredicate.value) pmml_score_list.append(node.score) get_nodes_in_json_format = [] for i in range(model.n_estimators): get_nodes_in_json_format.append( json.loads(model._Booster.get_dump(dump_format='json')[i])) for i in range(len(get_nodes_in_json_format)): list_score_temp = [] list_val_temp = [] node_list = get_nodes_in_json_format[i] self.create_node(node_list, list_score_temp, list_val_temp) model_score_list = model_score_list + list_score_temp model_value_list = model_value_list + list_val_temp list_val_temp.clear() list_score_temp.clear() ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)
def test_lgbm_04(self): auto = pd.read_csv('nyoka/tests/auto-mpg.csv') X = auto.drop(['mpg'], axis=1) y = auto['mpg'] feature_names = [name for name in auto.columns if name not in ('mpg')] target_name = 'mpg' x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=101) f_name = "lgbmr_pmml_preprocess2.pmml" model = LGBMRegressor() pipeline_obj = Pipeline([ ('mapper', DataFrameMapper([('car name', CountVectorizer()), (['displacement'], [StandardScaler()])])), ('lgbmr', model) ]) pipeline_obj.fit(x_train, y_train) lgb_to_pmml(pipeline_obj, feature_names, target_name, f_name) pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: for node in seg.TreeModel.Node.Node: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append(node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append(node.SimplePredicate.value) pmml_score_list.append(node.score) main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) for i in range(len(main_key_value)): list_score_temp = [] list_val_temp = [] node_list = main_key_value[i] self.create_node(node_list, list_score_temp, list_val_temp) model_score_list = model_score_list + list_score_temp model_value_list = model_value_list + list_val_temp list_val_temp.clear() list_score_temp.clear() ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)
def test_lgbm_05(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['target'] = [i % 2 for i in range(iris.data.shape[0])] features = irisd.columns.drop('target') target = 'target' f_name = "lgbc_bin_pmml.pmml" model = LGBMClassifier() pipeline_obj = Pipeline([('lgbmc', model)]) pipeline_obj.fit(irisd[features], irisd[target]) lgb_to_pmml(pipeline_obj, features, target, f_name) # self.assertEqual(os.path.isfile("lgbc_bin_pmml.pmml"), True) pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: if int(seg.id) == 1: for segment in seg.MiningModel.Segmentation.Segment: node_tab = segment.TreeModel.Node.Node if not node_tab: pmml_score_list.append(segment.TreeModel.Node.score) else: for node in node_tab: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append( node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append( node.SimplePredicate.value) pmml_score_list.append(node.score) main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) for i in range(len(main_key_value)): list_score_temp = [] list_val_temp = [] node_list = main_key_value[i] self.create_node(node_list, list_score_temp, list_val_temp) model_score_list = model_score_list + list_score_temp model_value_list = model_value_list + list_val_temp list_val_temp.clear() list_score_temp.clear() ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)