def get_segments_for_lgbc(model, derived_col_names, feature_names, target_name, mining_imp_val, categoric_values, model_name): """ It returns all the segments of the LGB classifier. Parameters ---------- model : Contains LGB model object. derived_col_names : List Contains column names after preprocessing. feature_names: List Contains list of feature/column names. target_name : String Name of the Target column. mining_imp_val : tuple Contains the mining_attributes,mining_strategy, mining_impute_value categoric_values : tuple Contains Categorical attribute names and its values model_name : string Name of the model Returns ------- regrs_models : Returns all the segments of the LGB model. """ segments = list() if model.n_classes_ == 2: main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment( feature_names) outputField = list() outputField.append( pml.OutputField(name="lgbValue", optype=OPTYPE.CONTINUOUS, dataType=DATATYPE.DOUBLE, feature=RESULT_FEATURE.PREDICTED_VALUE, isFinalResult="false")) out = pml.Output(OutputField=outputField) oField = list() oField.append("lgbValue") segments_equal_to_estimators = generate_Segments_Equal_To_Estimators( main_key_value, derived_col_names, feature_names) First_segment = xgboostToPmml.add_segmentation( model, segments_equal_to_estimators, mining_schema_for_1st_segment, out, 1) reg_model = sklToPmml.get_regrs_models(model, oField, oField, target_name, mining_imp_val, categoric_values, model_name)[0] reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.LOGISTIC last_segment = pml.Segment(True_=pml.True_(), id=2, RegressionModel=reg_model) segments.append(First_segment) segments.append(last_segment) else: main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) oField = list() for index in range(0, model.n_classes_): inner_segment = [] for in_seg in range(index, len(main_key_value), model.n_classes_): inner_segment.append(main_key_value[in_seg]) mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment( feature_names) outputField = list() outputField.append( pml.OutputField(name='lgbValue(' + str(index) + ')', optype=OPTYPE.CONTINUOUS, feature=RESULT_FEATURE.PREDICTED_VALUE, dataType=DATATYPE.FLOAT, isFinalResult="true")) out = pml.Output(OutputField=outputField) oField.append('lgbValue(' + str(index) + ')') segments_equal_to_estimators = generate_Segments_Equal_To_Estimators( inner_segment, derived_col_names, feature_names) segments_equal_to_class = xgboostToPmml.add_segmentation( model, segments_equal_to_estimators, mining_schema_for_1st_segment, out, index) segments.append(segments_equal_to_class) reg_model = sklToPmml.get_regrs_models(model, oField, oField, target_name, mining_imp_val, categoric_values, model_name)[0] reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.SOFTMAX last_segment = pml.Segment(True_=pml.True_(), id=model.n_classes_ + 1, RegressionModel=reg_model) segments.append(last_segment) return segments
def get_segments_for_xgbc(model, derived_col_names, feature_names, target_name, mining_imp_val, categoric_values, model_name): """ It returns all the segments of the Xgboost classifier. Parameters ---------- model : Contains Xgboost model object. derived_col_names : List Contains column names after preprocessing. feature_names: List Contains list of feature/column names. target_name : String Name of the Target column. mining_imp_val : tuple Contains the mining_attributes,mining_strategy, mining_impute_value categoric_values : tuple Contains Categorical attribute names and its values model_name : string Name of the model Returns ------- regrs_models : Returns Nyoka's Segment object """ segments = list() if model.n_classes_ == 2: get_nodes_in_json_format = [] for i in range(model.n_estimators): get_nodes_in_json_format.append( json.loads(model._Booster.get_dump(dump_format='json')[i])) mining_schema_for_1st_segment = mining_Field_For_First_Segment( feature_names) outputField = list() outputField.append( pml.OutputField(name="xgbValue", optype=OPTYPE.CONTINUOUS.value, dataType=DATATYPE.FLOAT.value, feature=RESULT_FEATURE.PREDICTED_VALUE.value, isFinalResult="true")) out = pml.Output(OutputField=outputField) oField = list() oField.append('xgbValue') segments_equal_to_estimators = generate_Segments_Equal_To_Estimators( get_nodes_in_json_format, derived_col_names, feature_names) First_segment = add_segmentation(model, segments_equal_to_estimators, mining_schema_for_1st_segment, out, 1) reg_model = sklToPmml.get_regrs_models(model, oField, oField, target_name, mining_imp_val, categoric_values, model_name)[0] reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.LOGISTIC.value last_segment = pml.Segment(True_=pml.True_(), id=2, RegressionModel=reg_model) segments.append(First_segment) segments.append(last_segment) else: get_nodes_in_json_format = [] for i in range(model.n_estimators * model.n_classes_): get_nodes_in_json_format.append( json.loads(model._Booster.get_dump(dump_format='json')[i])) oField = list() for index in range(0, model.n_classes_): inner_segment = [] for in_seg in range(index, len(get_nodes_in_json_format), model.n_classes_): inner_segment.append(get_nodes_in_json_format[in_seg]) mining_schema_for_1st_segment = mining_Field_For_First_Segment( feature_names) outputField = list() outputField.append( pml.OutputField(name='xgbValue(' + str(index) + ')', optype=OPTYPE.CONTINUOUS.value, feature=RESULT_FEATURE.PREDICTED_VALUE.value, dataType=DATATYPE.FLOAT.value, isFinalResult="true")) out = pml.Output(OutputField=outputField) oField.append('xgbValue(' + str(index) + ')') segments_equal_to_estimators = generate_Segments_Equal_To_Estimators( inner_segment, derived_col_names, feature_names) segments_equal_to_class = add_segmentation( model, segments_equal_to_estimators, mining_schema_for_1st_segment, out, index) segments.append(segments_equal_to_class) reg_model = sklToPmml.get_regrs_models(model, oField, oField, target_name, mining_imp_val, categoric_values, model_name)[0] reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.SOFTMAX.value last_segment = pml.Segment(True_=pml.True_(), id=model.n_classes_ + 1, RegressionModel=reg_model) segments.append(last_segment) return segments
def get_segments_for_xgbc(skl_model, derived_col_names, feature_names, target_name, mining_imp_val, categoric_values): """ It returns all the segments of the Xgboost classifier. Parameters ---------- skl_model : Contains Xgboost model object. derived_col_names : List Contains column names after preprocessing. feature_names: List Contains list of feature/column names. target_name : String Name of the Target column. mining_imp_val : tuple Contains the mining_attributes,mining_strategy, mining_impute_value categoric_values : tuple Contains Categorical attribute names and its values Returns ------- regrs_models : Returns all the segments of the xgboost model. """ segments = list() if skl_model.n_classes_ == 2: get_nodes_in_json_format = [] for i in range(skl_model.n_estimators): get_nodes_in_json_format.append( json.loads(skl_model._Booster.get_dump(dump_format='json')[i])) main_key_value = generate_main_Key_Value(get_nodes_in_json_format) mining_schema_for_1st_segment = mining_Field_For_First_Segment( feature_names) outputField = list() outputField.append( pml.OutputField(name="xgbValue", optype="continuous", dataType="float", feature="predictedValue", isFinalResult="true")) out = pml.Output(OutputField=outputField) oField = list() oField.append('xgbValue') segments_equal_to_estimators = generate_Segments_Equal_To_Estimators( main_key_value, derived_col_names, feature_names) First_segment = add_segmentation(skl_model, segments_equal_to_estimators, mining_schema_for_1st_segment, out, 1) last_segment = pml.Segment(True_=pml.True_(), id=2, RegressionModel=sklToPmml.get_regrs_models( skl_model, oField, oField, target_name, mining_imp_val, categoric_values)[0]) segments.append(First_segment) segments.append(last_segment) else: get_nodes_in_json_format = [] for i in range(skl_model.n_estimators * skl_model.n_classes_): get_nodes_in_json_format.append( json.loads(skl_model._Booster.get_dump(dump_format='json')[i])) main_key_value = generate_main_Key_Value(get_nodes_in_json_format) oField = list() for index in range(0, skl_model.n_classes_): inner_segment = [] for in_seg in range(index, len(main_key_value), skl_model.n_classes_): inner_segment.append(main_key_value[in_seg]) mining_schema_for_1st_segment = mining_Field_For_First_Segment( feature_names) outputField = list() outputField.append( pml.OutputField(name='xgbValue(' + str(index) + ')', optype="continuous", feature="predictedValue", isFinalResult="true")) out = pml.Output(OutputField=outputField) oField.append('xgbValue(' + str(index) + ')') segments_equal_to_estimators = generate_Segments_Equal_To_Estimators( inner_segment, derived_col_names, feature_names) segments_equal_to_class = add_segmentation( skl_model, segments_equal_to_estimators, mining_schema_for_1st_segment, out, index) segments.append(segments_equal_to_class) last_segment = pml.Segment(True_=pml.True_(), id=skl_model.n_classes_ + 1, RegressionModel=sklToPmml.get_regrs_models( skl_model, oField, oField, target_name, mining_imp_val, categoric_values)[0]) segments.append(last_segment) return segments
def get_segments_for_lgbc(model, derived_col_names, feature_names, target_name, mining_imp_val, categoric_values): """ It returns all the segments of the LGB classifier. Parameters ---------- model : Contains LGB model object. derived_col_names : List Contains column names after preprocessing. feature_names: List Contains list of feature/column names. target_name : String Name of the Target column. mining_imp_val : tuple Contains the mining_attributes,mining_strategy, mining_impute_value categoric_values : tuple Contains Categorical attribute names and its values Returns ------- regrs_models : Returns all the segments of the LGB model. """ segments = list() if model.n_classes_ == 2: main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] list_of_nodes = [] main_key_value.append( generate_structure_for_lgb(tree, list_of_nodes, derived_col_names)) mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment( feature_names) outputField = list() outputField.append( pml.OutputField(name="lgbValue", optype="continuous", dataType="float", feature="predictedValue", isFinalResult="true")) out = pml.Output(OutputField=outputField) oField = list() oField.append('lgbValue') segments_equal_to_estimators = xgboostToPmml.generate_Segments_Equal_To_Estimators( main_key_value, derived_col_names, feature_names) First_segment = xgboostToPmml.add_segmentation( model, segments_equal_to_estimators, mining_schema_for_1st_segment, out, 1) last_segment = pml.Segment(True_=pml.True_(), id=2, RegressionModel=sklToPmml.get_regrs_models( model, oField, oField, target_name, mining_imp_val, categoric_values)[0]) segments.append(First_segment) segments.append(last_segment) else: main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] list_of_nodes = [] main_key_value.append( generate_structure_for_lgb(tree, list_of_nodes, derived_col_names)) oField = list() for index in range(0, model.n_classes_): inner_segment = [] for in_seg in range(index, len(main_key_value), model.n_classes_): inner_segment.append(main_key_value[in_seg]) mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment( feature_names) outputField = list() outputField.append( pml.OutputField(name='lgbValue(' + str(index) + ')', optype="continuous", feature="predictedValue", isFinalResult="true")) out = pml.Output(OutputField=outputField) oField.append('lgbValue(' + str(index) + ')') segments_equal_to_estimators = xgboostToPmml.generate_Segments_Equal_To_Estimators( inner_segment, derived_col_names, feature_names) segments_equal_to_class = xgboostToPmml.add_segmentation( model, segments_equal_to_estimators, mining_schema_for_1st_segment, out, index) segments.append(segments_equal_to_class) last_segment = pml.Segment(True_=pml.True_(), id=model.n_classes_ + 1, RegressionModel=sklToPmml.get_regrs_models( model, oField, oField, target_name, mining_imp_val, categoric_values)[0]) segments.append(last_segment) return segments