def add_segmentation(model,segments_equal_to_estimators,mining_schema_for_1st_segment,out,id): """ It returns the First Segments for a binary classifier and returns number of Segments equls to number of values target class for multiclass classifier Parameters ---------- model: Contains Xgboost model object. segments_equal_to_estimators: List Contains List Segements equals to the number of the estimators of the model. mining_schema_for_1st_segment: Contains Mining Schema for the First Segment out: Contains the Output element id: Integer Index of the Segements Returns: ------- segments_equal_to_estimators: Returns list of segments equal to number of estimator of the model """ segmentation = pml.Segmentation(multipleModelMethod="sum", Segment=segments_equal_to_estimators) mining_model = pml.MiningModel(functionName='regression', modelName="MiningModel", MiningSchema=mining_schema_for_1st_segment, Output=out, Segmentation=segmentation) if model.n_classes_==2: First_segment = pml.Segment(True_=pml.True_(), id=id, MiningModel=mining_model) return First_segment else: segments_equal_to_class = pml.Segment(True_=pml.True_(), id=id + 1, MiningModel=mining_model) return segments_equal_to_class
def generate_Segments_Equal_To_Estimators(val, derived_col_names, col_names): """ It returns number of Segments equal to the estimator of the model. Parameters ---------- val: List Contains a list of well structured node for binary classification/inner segments for multi-class classification derived_col_names: List Contains column names after preprocessing. col_names: List Contains list of feature/column names. Returns: ------- segments_equal_to_estimators: Returns list of segments equal to number of estimator of the model """ segments_equal_to_estimators = [] for i in range(len(val)): main_node = pml.Node(True_=pml.True_()) m_flds = [] mining_field_for_innner_segments = col_names create_node(val[i], main_node, derived_col_names) for name in mining_field_for_innner_segments: m_flds.append(pml.MiningField(name=name)) segments_equal_to_estimators.append((pml.Segment(id=i + 1, True_=pml.True_(), TreeModel=pml.TreeModel(functionName="regression", modelName="DecisionTreeModel", missingValueStrategy="none", noTrueChildStrategy="returnLastPrediction", splitCharacteristic="multiSplit", Node=main_node, MiningSchema=pml.MiningSchema( MiningField=m_flds))))) return segments_equal_to_estimators
def get_segments_for_xgbc(model, derived_col_names, feature_names, target_name, mining_imp_val,categoric_values): """ It returns all the segments of the Xgboost classifier. Parameters ---------- model : Contains Xgboost model object. derived_col_names : List Contains column names after preprocessing. feature_names: List Contains list of feature/column names. target_name : String Name of the Target column. mining_imp_val : tuple Contains the mining_attributes,mining_strategy, mining_impute_value categoric_values : tuple Contains Categorical attribute names and its values Returns ------- regrs_models : Returns all the segments of the xgboost model. """ segments = list() if model.n_classes_ == 2: get_nodes_in_json_format=[] for i in range(model.n_estimators): get_nodes_in_json_format.append(json.loads(model._Booster.get_dump(dump_format='json')[i])) mining_schema_for_1st_segment = mining_Field_For_First_Segment(feature_names) outputField = list() outputField.append(pml.OutputField(name="xgbValue", optype="continuous", dataType="float", feature="predictedValue", isFinalResult="true")) out = pml.Output(OutputField=outputField) oField=list() oField.append('xgbValue') segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(get_nodes_in_json_format, derived_col_names, feature_names) First_segment = add_segmentation(model,segments_equal_to_estimators, mining_schema_for_1st_segment, out, 1) reg_model=sklToPmml.get_regrs_models(model, oField, oField, target_name,mining_imp_val,categoric_values)[0] reg_model.normalizationMethod='logit' last_segment = pml.Segment(True_=pml.True_(), id=2, RegressionModel=reg_model) segments.append(First_segment) segments.append(last_segment) else: get_nodes_in_json_format = [] for i in range(model.n_estimators * model.n_classes_): get_nodes_in_json_format.append(json.loads(model._Booster.get_dump(dump_format='json')[i])) oField = list() for index in range(0, model.n_classes_): inner_segment = [] for in_seg in range(index, len(get_nodes_in_json_format), model.n_classes_): inner_segment.append(get_nodes_in_json_format[in_seg]) mining_schema_for_1st_segment = mining_Field_For_First_Segment(feature_names) outputField = list() outputField.append(pml.OutputField(name='xgbValue(' + str(index) + ')', optype="continuous", feature="predictedValue", dataType="float", isFinalResult="true")) out = pml.Output(OutputField=outputField) oField.append('xgbValue(' + str(index) + ')') segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(inner_segment, derived_col_names, feature_names) segments_equal_to_class = add_segmentation(model,segments_equal_to_estimators, mining_schema_for_1st_segment, out, index) segments.append(segments_equal_to_class) last_segment = pml.Segment(True_=pml.True_(), id=model.n_classes_ + 1, RegressionModel=sklToPmml.get_regrs_models(model,oField,oField,target_name, mining_imp_val,categoric_values)[0]) segments.append(last_segment) return segments