Esempio n. 1
0
def add_segmentation(model,segments_equal_to_estimators,mining_schema_for_1st_segment,out,id):
    """
    It returns the First Segments for a binary classifier and returns number of Segments equls to number of values
    target class for multiclass classifier

    Parameters
    ----------
    model:
       Contains Xgboost model object.
    segments_equal_to_estimators: List
        Contains List Segements equals to the number of the estimators of the model.
    mining_schema_for_1st_segment:
        Contains Mining Schema for the First Segment
    out:
        Contains the Output element
    id: Integer
        Index of the Segements

    Returns:
    -------
    segments_equal_to_estimators:
         Returns list of segments equal to number of estimator of the model
    """

    segmentation = pml.Segmentation(multipleModelMethod="sum", Segment=segments_equal_to_estimators)
    mining_model = pml.MiningModel(functionName='regression', modelName="MiningModel", MiningSchema=mining_schema_for_1st_segment,
                                         Output=out, Segmentation=segmentation)
    if model.n_classes_==2:
        First_segment = pml.Segment(True_=pml.True_(), id=id, MiningModel=mining_model)
        return First_segment
    else:
        segments_equal_to_class = pml.Segment(True_=pml.True_(), id=id + 1, MiningModel=mining_model)
        return segments_equal_to_class
Esempio n. 2
0
def generate_Segments_Equal_To_Estimators(val, derived_col_names, col_names):
    """
    It returns number of Segments equal to the estimator of the model.

    Parameters
    ----------
    val: List
        Contains a list of well structured node for binary classification/inner segments for multi-class classification
    derived_col_names: List
        Contains column names after preprocessing.
    col_names: List
        Contains list of feature/column names.
    Returns:
    -------
    segments_equal_to_estimators:
         Returns list of segments equal to number of estimator of the model
    """
    segments_equal_to_estimators = []
    for i in range(len(val)):
        main_node = pml.Node(True_=pml.True_())
        m_flds = []
        mining_field_for_innner_segments = col_names
        create_node(val[i], main_node, derived_col_names)

        for name in mining_field_for_innner_segments:
            m_flds.append(pml.MiningField(name=name))

        segments_equal_to_estimators.append((pml.Segment(id=i + 1, True_=pml.True_(),
                                                         TreeModel=pml.TreeModel(functionName="regression",
                                                         modelName="DecisionTreeModel",
                                                                                 missingValueStrategy="none",
                                                                                 noTrueChildStrategy="returnLastPrediction",
                                                                                 splitCharacteristic="multiSplit",
                                                                                 Node=main_node,
                                                                                 MiningSchema=pml.MiningSchema(
                                                                                     MiningField=m_flds)))))

    return segments_equal_to_estimators
Esempio n. 3
0
def get_segments_for_xgbc(model, derived_col_names, feature_names, target_name, mining_imp_val,categoric_values):
    """
    It returns all the segments of the Xgboost classifier.

    Parameters
    ----------
    model :
        Contains Xgboost model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names: List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    regrs_models :
        Returns all the segments of the xgboost model.
        """
    segments = list()

    if model.n_classes_ == 2:
        get_nodes_in_json_format=[]
        for i in range(model.n_estimators):
            get_nodes_in_json_format.append(json.loads(model._Booster.get_dump(dump_format='json')[i]))
        mining_schema_for_1st_segment = mining_Field_For_First_Segment(feature_names)
        outputField = list()
        outputField.append(pml.OutputField(name="xgbValue", optype="continuous", dataType="float",
                                           feature="predictedValue", isFinalResult="true"))
        out = pml.Output(OutputField=outputField)
        oField=list()
        oField.append('xgbValue')
        segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(get_nodes_in_json_format, derived_col_names,
                                                                             feature_names)
        First_segment = add_segmentation(model,segments_equal_to_estimators, mining_schema_for_1st_segment, out, 1)
        reg_model=sklToPmml.get_regrs_models(model, oField, oField, target_name,mining_imp_val,categoric_values)[0]
        reg_model.normalizationMethod='logit'
        last_segment = pml.Segment(True_=pml.True_(), id=2,
                                   RegressionModel=reg_model)
        segments.append(First_segment)

        segments.append(last_segment)
    else:

        get_nodes_in_json_format = []
        for i in range(model.n_estimators * model.n_classes_):
            get_nodes_in_json_format.append(json.loads(model._Booster.get_dump(dump_format='json')[i]))
        oField = list()
        for index in range(0, model.n_classes_):
            inner_segment = []
            for in_seg in range(index, len(get_nodes_in_json_format), model.n_classes_):
                inner_segment.append(get_nodes_in_json_format[in_seg])
            mining_schema_for_1st_segment = mining_Field_For_First_Segment(feature_names)
            outputField = list()
            outputField.append(pml.OutputField(name='xgbValue(' + str(index) + ')', optype="continuous",
                                      feature="predictedValue", dataType="float", isFinalResult="true"))
            out = pml.Output(OutputField=outputField)

            oField.append('xgbValue(' + str(index) + ')')
            segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(inner_segment, derived_col_names,
                                                                                 feature_names)
            segments_equal_to_class = add_segmentation(model,segments_equal_to_estimators,
                                                       mining_schema_for_1st_segment, out, index)
            segments.append(segments_equal_to_class)
        last_segment = pml.Segment(True_=pml.True_(), id=model.n_classes_ + 1,
                                   RegressionModel=sklToPmml.get_regrs_models(model,oField,oField,target_name,
                                                                    mining_imp_val,categoric_values)[0])
        segments.append(last_segment)
    return segments