Ejemplo n.º 1
0
 def create_right_node(obj, derived_col_names):
     nd = pml.Node()
     nd.set_SimplePredicate(
         pml.SimplePredicate(field=xgboostToPmml.replace_name_with_derivedColumnNames(derived_col_names[int(obj['split_feature'])],\
              derived_col_names), operator=SIMPLE_PREDICATE_OPERATOR.GREATER_THAN.value, value="{:.16f}".format(obj['threshold'])))
     create_node(obj['right_child'], nd, derived_col_names)
     return nd
Ejemplo n.º 2
0
def get_targets(model, target_name):

    """
    It returns the Target element of the model.

    Parameters
    ----------
    model :
        An Xgboost model instance.
    target_name : String
        Name of the Target column.

    Returns
    -------
    targets :
        Returns Nyoka's Target object
    """
    if model.__class__.__name__ == 'XGBRegressor':
        targets = pml.Targets(
            Target=[
                pml.Target(
                    field=target_name,
                    rescaleConstant="{:.16f}".format(model.base_score if model.base_score is not None
                                                     else 0.5)
                )
            ]
        )
    return targets
Ejemplo n.º 3
0
 def create_right_node(obj, derived_col_names):
     nd = pml.Node()
     nd.set_SimplePredicate(
         pml.SimplePredicate(field=replace_name_with_derivedColumnNames(obj['split'], derived_col_names),\
              operator='greaterOrEqual', value="{:.16f}".format(obj['split_condition'])))
     create_node(obj['children'][1], nd, derived_col_names)
     return nd
Ejemplo n.º 4
0
 def get_data_dictionary():
     data_fields = []
     if not self._use_lag:
         for i,hull in enumerate(self._hulls):
             for j in range(self._length_of_fingerprint):
                 data_fields.append(
                     pml.DataField(
                         name=hull["name"] + _UNDERSCORE + str(j),
                         optype=OPTYPE.CONTINUOUS,
                         dataType=DATATYPE.DOUBLE
                     )
                 )
     else:
         for idx, hull in enumerate(self._hulls):
             data_fields.append(
                 pml.DataField(
                     name=hull["name"],
                     optype=OPTYPE.CONTINUOUS,
                     dataType=DATATYPE.DOUBLE
                 )
             )
     data_dict = pml.DataDictionary(
         numberOfFields=len(data_fields),
         DataField=data_fields
     )
     return data_dict
Ejemplo n.º 5
0
 def create_right_node(obj,derived_col_names):
     nd = pml.Node()
     nd.set_SimplePredicate(
         pml.SimplePredicate(field=xgboostToPmml.replace_name_with_derivedColumnNames(derived_col_names[int(obj['split_feature'])],\
              derived_col_names), operator='greaterOrEqual', value="{:.16f}".format(obj['threshold'])))
     create_node(obj['right_child'], nd, derived_col_names)
     return nd
Ejemplo n.º 6
0
 def create_right_node(obj,derived_col_names):
     nd = pml.Node()
     nd.set_SimplePredicate(
         pml.SimplePredicate(field=replace_name_with_derivedColumnNames(obj['split'], derived_col_names),\
              operator=SIMPLE_PREDICATE_OPERATOR.GREATER_OR_EQUAL, value="{:.16f}".format(obj['split_condition'])))
     create_node(obj['children'][1], nd, derived_col_names)
     return nd
Ejemplo n.º 7
0
 def __init__(self, predictedClasses=None):
     ny.Output.__init__(self)
     if predictedClasses:
         ny.Output.add_OutputField(
             self,
             ny.OutputField(name="predicted_label",
                            feature="predictedValue",
                            dataType="string",
                            optype="categorical"))
         ny.Output.add_OutputField(
             self,
             ny.OutputField(name="top1_prob",
                            feature="probability",
                            dataType="double"))
         ny.Output.add_OutputField(
             self,
             ny.OutputField(name="top5_prob",
                            feature="topCategories",
                            numTopCategories="5",
                            dataType="string",
                            optype="categorical"))
     else:
         ny.Output.add_OutputField(
             self,
             ny.OutputField(name="predicted_predictions",
                            feature="predictedValue",
                            dataType="double",
                            optype="continuous"))
Ejemplo n.º 8
0
def lag(trfm, col_names):
    """
    Generates pre-processing elements for Nyoka's Lag

    Parameters
    ----------
    trfm :
        Contains the Nyoka's Lag instance.
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to Lag preprocessing.

    """
    derived_flds = list()
    pp_dict = dict()
    derived_colnames = get_derived_colnames(trfm.aggregation, col_names)
    for idx, name in enumerate(col_names):
        lag = pml.Lag(field=name, n=trfm.value, aggregate=trfm.aggregation)
        derived_fld = pml.DerivedField(name=derived_colnames[idx], Lag=lag, optype=OPTYPE.CONTINUOUS.value,\
             dataType=DATATYPE.DOUBLE.value)
        derived_flds.append(derived_fld)

    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    return pp_dict
Ejemplo n.º 9
0
    def __init__(self, dataSet, script_args):
        if script_args['content'].__class__.__name__ == 'str':
            content = script_args['content']
            def_name = script_args['def_name']
        else:
            import inspect
            content = inspect.getsource(script_args['content'])
            def_name = script_args['content'].__name__
        encode = True
        if "encode" in script_args:
            encode = script_args['encode']

        if encode:
            content = base64.b64encode(content.encode()).decode()
        return_type = script_args['return_type'].lower()
        extension = [
            ny.Extension(extender='ADAPA',
                         name=def_name,
                         value=return_type,
                         anytypeobjs_=[content])
        ]
        def_func = ny.DefineFunction(
            name='customFunc',
            optype='categorical' if return_type == 'string' else 'continous',
            dataType=return_type,
            ParameterField=[
                ny.ParameterField(name=dataSet, dataType='binary')
            ],
            Apply=ny.Apply(function='python',
                           Extension=extension,
                           FieldRef=[ny.FieldRef(field=dataSet)]),
        )
        ny.TransformationDictionary.__init__(self)
        ny.TransformationDictionary.add_DefineFunction(self, def_func)
Ejemplo n.º 10
0
def lbl_binarizer(trfm, col_names, **kwargs):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's Label Binarizer preprocessing instance.
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to Label Binarizer preprocessing.

    """
    derived_flds = list()
    derived_colnames = list()
    pp_dict = dict()
    categoric_lbls = trfm.classes_.tolist()
    model_exception_list = [
        "LinearRegression", "LogisticRegression", "SVR", "SVC"
    ]
    model = kwargs['model']
    for col_name_idx in range(len(col_names)):
        if len(categoric_lbls) == 2:
            derived_colnames = get_derived_colnames(
                "labelBinarizer(" + str(col_names[col_name_idx]),
                [categoric_lbls[-1]], ")")

            norm_descr = pml.NormDiscrete(field=str(col_names[-1]),
                                          value=str(categoric_lbls[-1]))
            derived_flds.append(
                pml.DerivedField(NormDiscrete=norm_descr,
                                 name=derived_colnames[-1],
                                 optype="categorical",
                                 dataType="double"))
        else:
            derived_colnames = get_derived_colnames(
                "labelBinarizer(" + str(col_names[col_name_idx]),
                categoric_lbls, ")")
            for attribute_name in col_names:
                for class_name, class_idx in zip(categoric_lbls,
                                                 range(len(categoric_lbls))):
                    norm_descr = pml.NormDiscrete(field=str(attribute_name),
                                                  value=str(class_name))
                    derived_flds.append(
                        pml.DerivedField(NormDiscrete=norm_descr,
                                         name=derived_colnames[class_idx],
                                         optype="categorical",
                                         dataType="double"))

    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    pp_dict['pp_feat_class_lbl'] = categoric_lbls
    pp_dict['pp_feat_name'] = col_names[0]

    return pp_dict
Ejemplo n.º 11
0
 def get_output_for_regression_model(index):
     output_fields = [
         pml.OutputField(name="normalizedDistance" + _UNDERSCORE +
                         str(index),
                         optype=OPTYPE.CONTINUOUS.value,
                         dataType=DATATYPE.DOUBLE.value)
     ]
     return pml.Output(OutputField=output_fields)
Ejemplo n.º 12
0
 def get_header():
     header = pml.Header(
         Application=pml.Application(
             name=HEADER_INFO.APPLICATION_NAME,
             version=HEADER_INFO.APPLICATION_VERSION
         ),
         description=self._fingerprint_description
     )
     return header
Ejemplo n.º 13
0
 def __init__(self, description, copyright):
     if not description:
         description = "Keras Model in PMML"
     if not copyright:
         copyright = "Copyright (c) 2018 Software AG"
     ny.Header.__init__(self, copyright=copyright,
                        description=description,
                        Timestamp=ny.Timestamp(str(datetime.datetime.now())),
                        Application=ny.Application(name="Nyoka",version=metadata.__version__))
Ejemplo n.º 14
0
 def create_left_node(obj, derived_col_names):
     nd = pml.Node()
     nd.set_SimplePredicate(
         pml.SimplePredicate(field=replace_name_with_derivedColumnNames(
             obj['split'], derived_col_names),
                             operator='lessThan',
                             value=obj['split_condition']))
     create_node(obj['children'][0], nd, derived_col_names)
     return nd
Ejemplo n.º 15
0
def lbl_binarizer(trfm, col_names, **kwargs):
    """
    Generates pre-processing elements for Scikit-Learn's LabelBinarizer

    Parameters
    ----------
    trfm :
        Contains the Sklearn's Label Binarizer preprocessing instance.
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to Label Binarizer preprocessing.

    """
    derived_flds = list()
    derived_colnames = list()
    pp_dict = dict()
    categoric_lbls = trfm.classes_.tolist()
    for col_name_idx in range(len(col_names)):
        if len(categoric_lbls) == 2:
            derived_colnames = get_derived_colnames(
                "labelBinarizer(" + str(col_names[col_name_idx]),
                [categoric_lbls[-1]], ")")

            norm_descr = pml.NormDiscrete(field=str(col_names[-1]),
                                          value=str(categoric_lbls[-1]))
            derived_flds.append(
                pml.DerivedField(NormDiscrete=norm_descr,
                                 name=derived_colnames[-1],
                                 optype=OPTYPE.CATEGORICAL.value,
                                 dataType=DATATYPE.DOUBLE.value))
        else:
            derived_colnames = get_derived_colnames(
                "labelBinarizer(" + str(col_names[col_name_idx]),
                categoric_lbls, ")")
            for attribute_name in col_names:
                for class_name, class_idx in zip(categoric_lbls,
                                                 range(len(categoric_lbls))):
                    norm_descr = pml.NormDiscrete(field=str(attribute_name),
                                                  value=str(class_name))
                    derived_flds.append(
                        pml.DerivedField(NormDiscrete=norm_descr,
                                         name=derived_colnames[class_idx],
                                         optype=OPTYPE.CATEGORICAL.value,
                                         dataType=DATATYPE.DOUBLE.value))

    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    pp_dict['pp_feat_class_lbl'] = categoric_lbls
    pp_dict['pp_feat_name'] = col_names[0]

    return pp_dict
Ejemplo n.º 16
0
    def __init__(self, dataSet, predictedClasses):
        ny.MiningSchema.__init__(self)
        name = dataSet
        ny.MiningSchema.add_MiningField(self, ny.MiningField(
            name=name, usageType="active",
            invalidValueTreatment="asIs"))

        ny.MiningSchema.add_MiningField(self, ny.MiningField(
            name="labels" if predictedClasses else "predictions", usageType="target",
            invalidValueTreatment="asIs"))
Ejemplo n.º 17
0
 def create_left_node(obj, derived_col_names):
     nd = pml.Node()
     nd.set_SimplePredicate(
         pml.SimplePredicate(
             field=xgboostToPmml.replace_name_with_derivedColumnNames(
                 derived_col_names[int(obj['split_feature'])],
                 derived_col_names),
             operator='lessThan',
             value=obj['threshold']))
     create_node(obj['left_child'], nd, derived_col_names)
     return nd
Ejemplo n.º 18
0
def tfidf_vectorizer(trfm, col_names):
    """
    Generates pre-processing elements for Scikit-Learn's TfIdfVectorizer

    Parameters
    ----------
    trfm :
        Contains the Sklearn's TfIdfVectorizer preprocessing instance
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to TfIdfVectorizer preprocessing.

    """
    pp_dict = dict()
    features = [
        str(feat.encode("utf8"))[2:-1] for feat in trfm.get_feature_names()
    ]
    idfs = trfm.idf_
    extra_features = list(trfm.vocabulary_.keys())
    derived_flds = list()
    derived_colnames = get_derived_colnames('tfidf@[' + col_names[0] + ']',
                                            features)
    if trfm.lowercase:
        derived_flds.append(
            pml.DerivedField(name='lowercase(' + col_names[0] + ')',
                             optype=OPTYPE.CATEGORICAL.value,
                             dataType=DATATYPE.STRING.value,
                             Apply=pml.Apply(
                                 function=FUNCTION.LOWERCASE.value,
                                 FieldRef=[pml.FieldRef(field=col_names[0])])))
    for feat_idx, idf in zip(range(len(features)), idfs):
        derived_flds.append(
            pml.DerivedField(
                name=derived_colnames[feat_idx],
                optype=OPTYPE.CONTINUOUS.value,
                dataType=DATATYPE.DOUBLE.value,
                Apply=pml.Apply(
                    function=FUNCTION.MULTIPLICATION.value,
                    TextIndex=[
                        pml.TextIndex(
                            textField='lowercase(' + col_names[0] + ')',
                            wordSeparatorCharacterRE='\\s+',
                            tokenize='true',
                            Constant=pml.Constant(valueOf_=features[feat_idx]),
                            Extension=[
                                pml.Extension(value=extra_features[feat_idx])
                            ])
                    ],
                    Constant=[pml.Constant(valueOf_="{:.16f}".format(idf))])))
    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    pp_dict['pp_feat_name'] = col_names[0]
    pp_dict['pp_feat_class_lbl'] = list()
    return pp_dict
Ejemplo n.º 19
0
 def get_mining_model():
     output = get_output_for_mining_model()
     mining_model = pml.MiningModel(
         functionName=MINING_FUNCTION.REGRESSION.value,
         modelName=self._fingerprint_name
         if self._model_name is None else self._model_name,
         MiningSchema=pml.MiningSchema(
             MiningField=get_mining_fields_for_mining_model()),
         Output=output,
         Segmentation=pml.Segmentation(
             multipleModelMethod=MULTIPLE_MODEL_METHOD.SUM.value,
             Segment=get_segments()))
     return mining_model
Ejemplo n.º 20
0
        def get_mining_fields_for_regression_model(index):
            mining_fields = []
            if not self._use_lag:
                for i in range(self._length_of_fingerprint):
                    mining_fields.append(
                        pml.MiningField(name=self._hulls[index]["name"] +
                                        _UNDERSCORE + str(i),
                                        usageType="active"))
            else:
                mining_fields.append(
                    pml.MiningField(name=self._hulls[index]["name"],
                                    usageType="active"))

            return mining_fields
Ejemplo n.º 21
0
def get_output(model, target_name):

    """
    It returns the output element of the model.

    Parameters
    ----------
    model :
        An Xboost model instance.
    target_name : String
        Name of the Target column.

    Returns
    -------
    Output :
        Nyoka's Output object

    """
    mining_func = get_mining_func(model)
    output_fields = list()
    if not has_target(model):
        output_fields.append(pml.OutputField(
                name='predicted',
                feature=RESULT_FEATURE.PREDICTED_VALUE,
                optype=OPTYPE.CONTINUOUS,
                dataType=DATATYPE.DOUBLE
            ))
    else:
        alt_target_name = 'predicted_' + target_name
        if mining_func == MINING_FUNCTION.CLASSIFICATION:
            for cls in model.classes_:
                output_fields.append(pml.OutputField(
                    name='probability_' + str(cls),
                    feature=RESULT_FEATURE.PROBABILITY,
                    optype=OPTYPE.CONTINUOUS,
                    dataType=DATATYPE.DOUBLE,
                    value=str(cls)
                ))
            output_fields.append(pml.OutputField(
                name=alt_target_name,
                feature=RESULT_FEATURE.PREDICTED_VALUE,
                optype=OPTYPE.CATEGORICAL,
                dataType=get_dtype(model.classes_[0])))
        else:
            output_fields.append(pml.OutputField(
                name=alt_target_name,
                feature=RESULT_FEATURE.PREDICTED_VALUE,
                optype=OPTYPE.CONTINUOUS,
                dataType=DATATYPE.DOUBLE))
    return pml.Output(OutputField=output_fields)
Ejemplo n.º 22
0
def polynomial_features(trfm, col_names):
    """
    Generates pre-processing elements for Scikit-Learn's PolynomialFeatures

    Parameters
    ----------
    trfm :
        Contains the Sklearn's PolynomialFeatures preprocessing instance.
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to PolynomialFeatures preprocessing.

    """
    polynomial_features.poly_ctr += 1
    pp_dict = dict()
    derived_flds = []
    derived_colnames = []

    for polyfeat_idx in range(trfm.powers_.shape[0]):
        apply_inner_container = []
        for col_name_idx in range(len(col_names)):
            val = int(trfm.powers_[polyfeat_idx][col_name_idx])
            apply_inner = pml.Apply(
                function='pow',
                Constant=[pml.Constant(
                    dataType="integer",
                    valueOf_=val
                )],
                FieldRef=[pml.FieldRef(field=col_names[col_name_idx])])
            apply_inner_container.append(apply_inner)
        apply_outer = pml.Apply(function="product",
                                Apply_member=apply_inner_container
                                )
        derived_flds.append(pml.DerivedField(
            Apply=apply_outer,
            dataType="double",
            optype="continuous",
            name="poly" + str(polynomial_features.poly_ctr) + '-' + "x" + str(polyfeat_idx)
        ))
        name = derived_flds[polyfeat_idx].get_name()
        derived_colnames.append(name)
    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    return pp_dict
Ejemplo n.º 23
0
 def get_output_for_mining_model():
     output_fields = [
         pml.OutputField(
             name="totalDistance",
             optype=OPTYPE.CONTINUOUS.value,
             dataType=DATATYPE.DOUBLE.value,
             feature=RESULT_FEATURE.PREDICTED_VALUE.value,
         ),
         pml.OutputField(name="finalResult",
                         optype=OPTYPE.CONTINUOUS.value,
                         dataType=DATATYPE.DOUBLE.value,
                         feature=RESULT_FEATURE.TRANSFORMED_VALUE.value,
                         Apply=get_normalization_function()),
     ]
     return pml.Output(OutputField=output_fields)
Ejemplo n.º 24
0
def lbl_encoder(trfm, col_names):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's LabelEncoder preprocessing instance
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to LabelEncoder preprocessing.

    """
    pp_dict = dict()
    derived_flds = list()
    field_column_pair = list()
    rows = []
    categoric_lbls = trfm.classes_.tolist()
    categoric_lbls_num = trfm.transform(trfm.classes_.tolist()).tolist()
    derived_colnames = get_derived_colnames('labelEncoder', col_names)
    for row_idx in range(len(categoric_lbls_num)):
        row_main = pml.row()
        row_main.elementobjs_ = ['input', 'output']
        row_main.input = categoric_lbls[row_idx]
        row_main.output = str(categoric_lbls_num[row_idx])
        rows.append(row_main)
    field_column_pair.append(
        pml.FieldColumnPair(field=str(col_names[0]), column="input"))
    inline_table = pml.InlineTable(row=rows)
    map_values = pml.MapValues(outputColumn="output",
                               FieldColumnPair=field_column_pair,
                               InlineTable=inline_table)
    derived_flds.append(
        pml.DerivedField(MapValues=map_values,
                         name=derived_colnames[0],
                         optype="continuous",
                         dataType="double"))

    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    pp_dict['pp_feat_class_lbl'] = categoric_lbls
    pp_dict['pp_feat_name'] = col_names[0]

    return pp_dict
Ejemplo n.º 25
0
    def __init__(self, dataSet=None):
        ny.MiningSchema.__init__(self)
        if dataSet:
            name = dataSet
            ny.MiningSchema.add_MiningField(self, ny.MiningField(
                name=name, usageType="active",
                invalidValueTreatment="asIs"))
        else:
            name = "dataSet"
            ny.MiningSchema.add_MiningField(self, ny.MiningField(
                name=name, usageType="active",
                invalidValueTreatment="asIs"))

        ny.MiningSchema.add_MiningField(self, ny.MiningField(
            name="predictions", usageType="target",
            invalidValueTreatment="asIs"))
Ejemplo n.º 26
0
def get_ensemble_models(model, derived_col_names, col_names, target_name,
                        mining_imp_val, categoric_values):
    """
    It returns the Mining Model element of the model

    Parameters
    ----------
    model :
        Contains LGB model object.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value.
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    mining_models :
        Returns the MiningModel of the respective LGB model
    """
    model_kwargs = sklToPmml.get_model_kwargs(model, col_names, target_name,
                                              mining_imp_val)
    mining_models = list()
    mining_models.append(
        pml.MiningModel(modelName="LightGBModel",
                        Segmentation=get_outer_segmentation(
                            model, derived_col_names, col_names, target_name,
                            mining_imp_val, categoric_values),
                        **model_kwargs))
    return mining_models
Ejemplo n.º 27
0
def get_segments_for_lgbr(model, derived_col_names, feature_names, target_name, mining_imp_val,categorical_values):
    """
        It returns all the Segments element of the model

       Parameters
       ----------
       model :
           Contains LGB model object.
       derived_col_names : List
           Contains column names after preprocessing.
       feature_names : List
           Contains list of feature/column names.
       target_name : List
           Name of the Target column.
       mining_imp_val : tuple
            Contains the mining_attributes,mining_strategy, mining_impute_value
        categoric_values : tuple
            Contains Categorical attribute names and its values

       Returns
       -------
       segment :
           Get the Segmentation element which contains inner segments.

       """
    segments = list()
    main_key_value = []
    lgb_dump = model.booster_.dump_model()
    for i in range(len(lgb_dump['tree_info'])):
        tree = lgb_dump['tree_info'][i]['tree_structure']
        main_key_value.append(tree)
    segmentation = pml.Segmentation(multipleModelMethod="sum",
                                    Segment=generate_Segments_Equal_To_Estimators(main_key_value, derived_col_names,
                                                                                  feature_names))
    return segmentation
Ejemplo n.º 28
0
def get_outer_segmentation(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values):
    """
    It returns the Segmentation element of the model.

    Parameters
    ----------
    model :
        Contains LGB model object.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    segmentation :
        Get the outer most Segmentation of an LGB model

    """

    if 'LGBMRegressor' in str(model.__class__):
        segmentation=get_segments(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values)
    else:
        segmentation = pml.Segmentation(
            multipleModelMethod=get_multiple_model_method(model),
            Segment=get_segments(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values)
        )
    return segmentation
Ejemplo n.º 29
0
def get_segments_for_xgbr(model, derived_col_names, feature_names, target_name, mining_imp_val,categorical_values):
    """
    It returns all the Segments element of the model

    Parameters
    ----------
    model :
        Contains Xgboost model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names : List
        Contains list of feature/column names.
    target_name : List
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    segment :
        Nyoka's Segment object

    """
    get_nodes_in_json_format = model._Booster.get_dump(dump_format='json')
    segmentation = pml.Segmentation(multipleModelMethod=MULTIPLE_MODEL_METHOD.SUM,
                                    Segment=generate_Segments_Equal_To_Estimators(get_nodes_in_json_format, derived_col_names,
                                                                                  feature_names))
    return segmentation
Ejemplo n.º 30
0
    def _get_layer_weights_n_biases(self, layer):
        """
        Pulls out the Weights and Bias matrix from a given Keras layer

        Parameters
        ----------
        layer : Keras layer object
            A Keras Layer

        Returns
        -------
        layer_weights : array
            Weights of the Keras layer in Base64String format
        layer_biases : array
            Bias of the Keras layer in Base64String format

        """
        layer_all_weights = layer.get_weights()
        layer_weights = layer_biases = biases = None
        if layer_all_weights:
            if hasattr(layer, 'use_bias') and layer.use_bias:
                biases = layer_all_weights[-1]
                weights, w_shape = self._get_flatten_weights(
                    layer_all_weights[0:-1])
                layer_weights = ny.LayerWeights(content=weights,
                                                floatsPerLine=0,
                                                weightsShape=w_shape,
                                                weightsFlattenAxis="0")
            else:
                weights, w_shape = self._get_flatten_weights(layer_all_weights)
                layer_weights = ny.LayerWeights(content=weights,
                                                floatsPerLine=0,
                                                weightsShape=w_shape,
                                                weightsFlattenAxis="0")

            if biases is not None:
                bs_shape = biases.shape
                if len(bs_shape) == 1:
                    final_bs_shape = str((bs_shape[0], 1))
                else:
                    final_bs_shape = str(bs_shape)
                layer_biases = ny.LayerBias(content=biases,
                                            biasShape=final_bs_shape,
                                            biasFlattenAxis="0",
                                            floatsPerLine=0)
        return layer_weights, layer_biases