Example #1
0
def lbl_binarizer(trfm, col_names, **kwargs):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's Label Binarizer preprocessing instance.
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to Label Binarizer preprocessing.

    """
    derived_flds = list()
    derived_colnames = list()
    pp_dict = dict()
    categoric_lbls = trfm.classes_.tolist()
    model_exception_list = ["LinearRegression", "LogisticRegression", "SVR", "SVC"]
    model = kwargs['model']
    for col_name_idx in range(len(col_names)):
        if len(categoric_lbls) == 2:
            derived_colnames = get_derived_colnames("labelBinarizer(" + str(col_names[col_name_idx]),
                                                    [categoric_lbls[-1]], ")")

            norm_descr = pml.NormDiscrete(field=str(col_names[-1]), value=str(categoric_lbls[-1]))
            derived_flds.append(pml.DerivedField(NormDiscrete=norm_descr,
                                                 name=derived_colnames[-1],
                                                 optype="categorical",
                                                 dataType="double"))
        else:
            derived_colnames = get_derived_colnames("labelBinarizer(" + str(col_names[col_name_idx]),
                                                    categoric_lbls, ")")
            for attribute_name in col_names:
                for class_name, class_idx in zip(categoric_lbls, range(len(categoric_lbls))):
                    norm_descr = pml.NormDiscrete(field=str(attribute_name), value=str(class_name))
                    derived_flds.append(
                        pml.DerivedField(NormDiscrete=norm_descr,
                                         name=derived_colnames[class_idx],
                                         optype="categorical",
                                         dataType="double"))
    if any_in([model.__class__.__name__], model_exception_list):
        pp_dict['hidden_lb_der_flds'] = derived_flds
        exception_cols.extend(derived_colnames)
        derived_flds = list()

    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    pp_dict['pp_feat_class_lbl'] = categoric_lbls
    pp_dict['pp_feat_name'] = col_names[0]

    return pp_dict
Example #2
0
 def nyObjOfModel(self, pmmlObj, singMod):
     import nyokaBase.PMML43Ext as ny
     if singMod['pmmlModelObject'].__dict__[
             'original_tagname_'] == 'MiningModel':
         nyokaObj = ny.PMML(MiningBuildTask=pmmlObj.MiningBuildTask,
                            DataDictionary=pmmlObj.DataDictionary,
                            MiningModel=[singMod['pmmlModelObject']])
     elif singMod['pmmlModelObject'].__dict__[
             'original_tagname_'] == 'DeepNetwork':
         nyokaObj = ny.PMML(DataDictionary=pmmlObj.DataDictionary,
                            DeepNetwork=[singMod['pmmlModelObject']])
     else:
         nyokaObj = None
     return nyokaObj
Example #3
0
def polynomial_features(trfm, col_names):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's PolynomialFeatures preprocessing instance.
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to PolynomialFeatures preprocessing.

    """
    polynomial_features.poly_ctr += 1
    pp_dict = dict()
    derived_flds = []
    derived_colnames = []

    for polyfeat_idx in range(trfm.powers_.shape[0]):
        apply_inner_container = []
        for col_name_idx in range(len(col_names)):
            val = int(trfm.powers_[polyfeat_idx][col_name_idx])
            apply_inner = pml.Apply(
                function='pow',
                Constant=[pml.Constant(
                    dataType="integer",
                    valueOf_=val
                )],
                FieldRef=[pml.FieldRef(field=col_names[col_name_idx])])
            apply_inner_container.append(apply_inner)
        apply_outer = pml.Apply(function="product",
                                Apply_member=apply_inner_container
                                )
        derived_flds.append(pml.DerivedField(
            Apply=apply_outer,
            dataType="double",
            optype="continuous",
            name="poly" + str(polynomial_features.poly_ctr) + '-' + "x" + str(polyfeat_idx)
        ))
        name = derived_flds[polyfeat_idx].get_name()
        derived_colnames.append(name)
    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    return pp_dict
 def __init__(self, pmml):
     self.nyoka_pmml = ny.parse(pmml,True)
     self.image_input = None
     self.layer_input = None
     self.model = None
     self.layers_outputs = {}
     self.model = self._build_model()
Example #5
0
def get_ensemble_models(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values):
    """
    It returns the Mining Model element of the model

    Parameters
    ----------
    model :
        Contains Xgboost model object.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value.
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    mining_models :
        Returns the MiningModel of the respective Xgboost model
    """
    model_kwargs = sklToPmml.get_model_kwargs(model, col_names, target_name, mining_imp_val)
    if 'XGBRegressor' in str(model.__class__):
        model_kwargs['Targets'] = sklToPmml.get_targets(model, target_name)
    mining_models = list()
    mining_models.append(pml.MiningModel(
        modelName="XGBoostModel",
        Segmentation=get_outer_segmentation(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values),
        **model_kwargs
    ))
    return mining_models
Example #6
0
def get_outer_segmentation(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values):
    """
    It returns the Segmentation element of the model.

    Parameters
    ----------
    model :
        Contains Xgboost model object.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    segmentation :
        Get the outer most Segmentation of an xgboost model

    """

    if 'XGBRegressor' in str(model.__class__):
        segmentation=get_segments(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values)
    else:
        segmentation = pml.Segmentation(
            multipleModelMethod=get_multiple_model_method(model),
            Segment=get_segments(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values)
        )
    return segmentation
Example #7
0
 def selectArchitecture(self, checkTemplateID):
     if checkTemplateID == 'mobilenetArch':
         pmmlObj = ny.parse(open(settingFilePath + 'MobilenetArch.pmml',
                                 'r'),
                            silence=True)
         templateArch = self.pmmlToJson(settingFilePath +
                                        'MobilenetArch.pmml')
     elif checkTemplateID == 'vgg16Arch':
         pmmlObj = ny.parse(open(settingFilePath + 'vGG16Arch.pmml', 'r'),
                            silence=True)
         templateArch = self.pmmlToJson(settingFilePath + 'vGG16Arch.pmml')
     elif checkTemplateID == 'vgg19Arch':
         pmmlObj = ny.parse(open(settingFilePath + 'vGG19Arch.pmml', 'r'),
                            silence=True)
         templateArch = self.pmmlToJson(settingFilePath + 'vGG19Arch.pmml')
     return templateArch, pmmlObj
Example #8
0
def get_segments_for_xgbr(model, derived_col_names, feature_names, target_name, mining_imp_val,categorical_values):
    """
        It returns all the Segments element of the model

       Parameters
       ----------
       model :
           Contains Xgboost model object.
       derived_col_names : List
           Contains column names after preprocessing.
       feature_names : List
           Contains list of feature/column names.
       target_name : List
           Name of the Target column.
       mining_imp_val : tuple
            Contains the mining_attributes,mining_strategy, mining_impute_value
        categoric_values : tuple
            Contains Categorical attribute names and its values

       Returns
       -------
       segment :
           Get the Segmentation element which contains inner segments.

       """
    segments = list()
    get_nodes_in_json_format = []
    for i in range(model.n_estimators):
        get_nodes_in_json_format.append(json.loads(model._Booster.get_dump(dump_format='json')[i]))
    segmentation = pml.Segmentation(multipleModelMethod="sum",
                                    Segment=generate_Segments_Equal_To_Estimators(get_nodes_in_json_format, derived_col_names,
                                                                                  feature_names))
    return segmentation
Example #9
0
def writePmml(pmmlObj, filepath, lockForPMML):
    try:
        lockForPMML.acquire()
        pmmlObj = removeExtraNewLinesFromWeights(pmmlObj)
        scrptVal2 = []
        scrptVal = pmmlObj.script
        if len(scrptVal) > 0:
            for num, sc in enumerate(scrptVal):
                urlOfScript = sc.class_
                useFor = sc.for_
                code = None
                scripCode = sc.get_valueOf_()
                code = scripCode.lstrip('\n')
                lines = []
                code = scripCode.lstrip('\n')
                leading_spaces = len(code) - len(code.lstrip(' '))
                for line in code.split('\n'):
                    lines.append(line[leading_spaces:])
                code = '\n'.join(lines)
                scriptCode = code.replace('<', '&lt;')
                # print (scriptCode)
                scrp = pml.script(content=scriptCode,
                                  class_=urlOfScript,
                                  for_=useFor)
                scrptVal2.append(scrp)
        pmmlObj.script = scrptVal2
        # print ('Code Step 10.1')
        pmmlObj.export(open(filepath, 'w'), 0)
        # print('>>>>>>>>>>>, PMML written')
    except Exception as e:
        print('>>>>>>>>>>>> ', str(e))
    finally:
        lockForPMML.release()
Example #10
0
    def writePMML(self, model, predictedClass, fileName, dataSet):

        try:
            from nyokaBase.keras.keras_model_to_pmml import KerasToPmml
            pmmlToBack = KerasToPmml(model,
                                     model_name="TrainedModel",
                                     description="Keras Models in PMML",
                                     dataSet=dataSet,
                                     predictedClasses=predictedClass)
        except Exception as e:
            data_details = self.upDateStatus()
            data_details['status'] = 'Training Failed'
            data_details[
                'errorMessage'] = 'Error while converting Keras to PMML >> ' + str(
                    e)
            data_details['errorTraceback'] = traceback.format_exc()
            with open(self.statusFile, 'w') as filetosave:
                json.dump(data_details, filetosave)
            # sys.exit()
            return -1

        scriptCode = self.pmmlObj['script']
        if scriptCode == []:
            scriptCode = None
        else:
            for sc in scriptCode:
                sc.__dict__['valueOf_'] = sc.get_valueOf_().replace(
                    '<', '&lt;')

        pmmlObjNew = pmmlToBack.__dict__
        dDict = pmmlObjNew['DataDictionary']
        netw = pmmlObjNew['DeepNetwork']
        netw = self.updateSectionInfo(netw)
        extensionInfoForData = [
            ny.Extension(value=self.hdExtDet, anytypeobjs_=[''])
        ]
        hd = ny.Header(copyright="Copyright (c) 2018 Software AG",
                       Extension=extensionInfoForData,
                       description="Neural Network Model",
                       Timestamp=ny.Timestamp(datetime.now()))
        with open(fileName, 'w') as filetosave:
            jj = ny.PMML(version="4.3Ext",
                         DeepNetwork=netw,
                         DataDictionary=dDict,
                         Header=hd,
                         script=scriptCode)
            jj.export(filetosave, 0)
Example #11
0
def mining_Field_For_First_Segment(feature_names):
    """
        It returns the Mining Schema of the First Segment.

      Parameters
      ----------
      feature_names: List
          Contains list of feature/column names.
      Returns:
      -------
      mining_schema_for_1st_segment:
           Returns the MiningSchema for the main segment.
      """
    mining_fields_1st_segment = []
    for name in feature_names:
        mining_fields_1st_segment.append(pml.MiningField(name=name))
    mining_schema_for_1st_segment = pml.MiningSchema(MiningField=mining_fields_1st_segment)
    return mining_schema_for_1st_segment
Example #12
0
def lbl_encoder(trfm, col_names):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's LabelEncoder preprocessing instance
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to LabelEncoder preprocessing.

    """
    pp_dict = dict()
    derived_flds = list()
    field_column_pair = list()
    rows = []
    categoric_lbls = trfm.classes_.tolist()
    categoric_lbls_num = trfm.transform(trfm.classes_.tolist()).tolist()
    derived_colnames = get_derived_colnames('labelEncoder', col_names)
    for row_idx in range(len(categoric_lbls_num)):
        row_main = pml.row()
        row_main.elementobjs_ = ['input', 'output']
        row_main.input = categoric_lbls[row_idx]
        row_main.output = str(categoric_lbls_num[row_idx])
        rows.append(row_main)
    field_column_pair.append(pml.FieldColumnPair(field=str(col_names[0]), column="input"))
    inline_table = pml.InlineTable(row=rows)
    map_values = pml.MapValues(outputColumn="output", FieldColumnPair=field_column_pair, InlineTable=inline_table)
    derived_flds.append(
        pml.DerivedField(MapValues=map_values, name=derived_colnames[0], optype="continuous", dataType="double"))

    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    pp_dict['pp_feat_class_lbl'] = categoric_lbls
    pp_dict['pp_feat_name'] = col_names[0]

    return pp_dict
Example #13
0
	def post(self,requests):
		filePath = requests.POST.get('filePath')
		from nyokaBase import PMML43Ext as ny
		pmmlObj = ny.parse(open(filePath,'r'),silence=True)
		nn = NeuralNetworkModelTrainer()
		nn.pmmlfileObj = pmmlObj
		returnVal = nn.generateAndCompileModel('mean_squared_error','adam',0.1,['accuracy','f1'],compileTestOnly=True)
		if returnVal.__class__.__name__ == 'dict':
			return JsonResponse(returnVal)
		else:
			return JsonResponse({'status':'Model Compiled Successfully'},status=200)
Example #14
0
def xgboost_to_pmml(pipeline, col_names, target_name, pmml_f_name='from_xgboost.pmml'):
    """
    Exports xgboost pipeline object into pmml

    Parameters
    ----------
    pipeline :
        Contains an instance of Pipeline with preprocessing and final estimator
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the target column.
    pmml_f_name : String
        Name of the pmml file. (Default='from_xgboost.pmml')

    Returns
    -------
    Returns a pmml file

    """
    try:
        model = pipeline.steps[-1][1]
    except:
        raise TypeError("Exporter expects pipeleine_instance and not an estimator_instance")
    else:
        if isinstance(col_names, np.ndarray):
            col_names = col_names.tolist()
        ppln_sans_predictor = pipeline.steps[:-1]
        trfm_dict_kwargs = dict()
        derived_col_names = col_names
        categoric_values = tuple()
        mining_imp_val = tuple()
        if ppln_sans_predictor:
            pml_pp = pp.get_preprocess_val(ppln_sans_predictor, col_names, model)
            trfm_dict_kwargs['TransformationDictionary'] = pml_pp['trfm_dict']
            derived_col_names = pml_pp['derived_col_names']
            col_names = pml_pp['preprocessed_col_names']
            categoric_values = pml_pp['categorical_feat_values']
            mining_imp_val = pml_pp['mining_imp_values']
        PMML_kwargs = get_PMML_kwargs(model,
                                      derived_col_names,
                                      col_names,
                                      target_name,
                                      mining_imp_val,
                                      categoric_values)
        pmml = pml.PMML(
            version=sklToPmml.get_version(),
            Header=sklToPmml.get_header(),
            MiningBuildTask=sklToPmml.get_mining_buildtask(pipeline),
            DataDictionary=sklToPmml.get_data_dictionary(model, col_names, target_name, categoric_values),
            **trfm_dict_kwargs,
            **PMML_kwargs
        )
        pmml.export(outfile=open(pmml_f_name, "w"), level=0)
Example #15
0
def tfidf_vectorizer(trfm, col_names):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's TfIdfVectorizer preprocessing instance
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to TfIdfVectorizer preprocessing.

    """
    pp_dict = dict()
    features = trfm.get_feature_names()
    idfs = trfm.idf_
    extra_features = list(trfm.vocabulary_.keys())
    derived_flds = list()
    derived_colnames = get_derived_colnames('tfidf@[' + col_names[0] + ']', features)
    derived_flds.append(
        pml.DerivedField(name='lowercase(' + col_names[0] + ')',
                         optype='categorical', dataType='string',
                         Apply=pml.Apply(function='lowercase',
                                         FieldRef=[pml.FieldRef(field=col_names[0])])))
    for feat_idx, idf in zip(range(len(features)), idfs):
        derived_flds.append(pml.DerivedField(
            name=derived_colnames[feat_idx],
            optype='continuous',
            dataType='double',
            Apply=pml.Apply(function='*',
                            TextIndex=[pml.TextIndex(textField='lowercase(' + col_names[0] + ')',
                                                     wordSeparatorCharacterRE='\s+',
                                                     tokenize='true',
                                                     Constant=pml.Constant(valueOf_=features[feat_idx]),
                                                     Extension=[pml.Extension(anytypeobjs_=[extra_features[feat_idx]])])],
                            Constant=[pml.Constant(valueOf_=idf)])
        ))
    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    pp_dict['pp_feat_name'] = col_names[0]
    pp_dict['pp_feat_class_lbl'] = list()
    return pp_dict
Example #16
0
def max_abs_scaler(trfm, col_names):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's MaxabsScaler preprocessing instance
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to MaxabsScaler preprocessing.

    """
    pp_dict = dict()
    derived_flds = list()
    derived_colnames = get_derived_colnames('maxAbsScaler', col_names)
    for col_name_idx in range(len(col_names)):
        if (col_names[col_name_idx] not in exception_cols):
            apply_outer = pml.Apply(
                function='/',
                Constant=[pml.Constant(
                    dataType="double",  # <---------------------
                    valueOf_=unround_scalers(trfm.max_abs_[col_name_idx])
                )],
                FieldRef=[pml.FieldRef(field=col_names[col_name_idx])]
            )

            derived_flds.append(pml.DerivedField(
                Apply=apply_outer,
                name=derived_colnames[col_name_idx],
                optype="continuous",
                dataType="double"
            ))
    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    return pp_dict
Example #17
0
def add_segmentation(model,segments_equal_to_estimators,mining_schema_for_1st_segment,out,id):
    """
    It returns the First Segments for a binary classifier and returns number of Segments equls to number of values
    target class for multiclass classifier

    Parameters
    ----------
    model:
       Contains Xgboost model object.
    segments_equal_to_estimators: List
        Contains List Segements equals to the number of the estimators of the model.
    mining_schema_for_1st_segment:
        Contains Mining Schema for the First Segment
    out:
        Contains the Output element
    id: Integer
        Index of the Segements

    Returns:
    -------
    segments_equal_to_estimators:
         Returns list of segments equal to number of estimator of the model
    """

    segmentation = pml.Segmentation(multipleModelMethod="sum", Segment=segments_equal_to_estimators)
    mining_model = pml.MiningModel(functionName='regression', modelName="MiningModel", MiningSchema=mining_schema_for_1st_segment,
                                         Output=out, Segmentation=segmentation)
    if model.n_classes_==2:
        First_segment = pml.Segment(True_=pml.True_(), id=id, MiningModel=mining_model)
        return First_segment
    else:
        segments_equal_to_class = pml.Segment(True_=pml.True_(), id=id + 1, MiningModel=mining_model)
        return segments_equal_to_class
Example #18
0
def binarizer(trfm, col_names):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's Binarizer preprocessing instance.
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to Binarizer preprocessing.

    """
    pp_dict = dict()
    derived_flds = list()
    derived_colnames = get_derived_colnames("binarizer", col_names)
    for col_name_idx in range(len(col_names)):
        apply_outer = pml.Apply(
            function='threshold',
            Constant=[pml.Constant(
                dataType="double",
                valueOf_=trfm.threshold
            )],
            FieldRef=[pml.FieldRef(field=col_names[col_name_idx])])

        derived_flds.append(pml.DerivedField(
            Apply=apply_outer,
            name=derived_colnames[col_name_idx],
            optype="continuous",
            dataType="double"
        ))

    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    return pp_dict
Example #19
0
 def dump_config(self):
     config_vars = [var for var in self.config.__dir__() if var.isupper()]
     config_dict = {}
     for var in config_vars:
         val = getattr(self.config, var)
         if val.__class__.__name__ == "ndarray":
             val = val.tolist()
         config_dict[var] = val
     info_dict = {}
     info_dict["config"] = config_dict
     info_dict["mode"] = self.mode
     exten_obj = pml.Extension(anytypeobjs_=[str(info_dict)], name="config")
     self.pmml_obj.DeepNetwork[0].Extension = [exten_obj]
Example #20
0
 def test_8_compileModel(self):
     logging.info("Test Case : Compile a model.(2)")
     filePath = 'testUseCase/supportdata/from_sklearn.pmml'
     from nyokaBase import PMML43Ext as ny
     pmmlObj = ny.parse(open(filePath, 'r'), silence=True)
     from trainModel.mergeTrainingNN import NeuralNetworkModelTrainer
     nn = NeuralNetworkModelTrainer()
     nn.pmmlfileObj = pmmlObj
     returnVal = nn.generateAndCompileModel('mean_squared_error',
                                            'adam',
                                            0.1, ['accuracy', 'f1'],
                                            compileTestOnly=True)
     self.assertEqual('status' in returnVal, True)
     self.assertEqual('errorMessage' in returnVal, True)
     self.assertEqual('errorTraceback' in returnVal, True)
     self.assertEqual(returnVal['status'], 'Model Compilation Failed')
Example #21
0
 def test_7_compileModel(self):
     logging.info("Test Case : Compile a model.(1)")
     filePath = 'testUseCase/supportdata/irisNN.pmml'
     from nyokaBase import PMML43Ext as ny
     pmmlObj = ny.parse(open(filePath, 'r'), silence=True)
     from trainModel.mergeTrainingNN import NeuralNetworkModelTrainer
     nn = NeuralNetworkModelTrainer()
     nn.pmmlfileObj = pmmlObj
     returnVal = nn.generateAndCompileModel('mean_squared_error',
                                            'adam',
                                            0.1, ['accuracy', 'f1'],
                                            compileTestOnly=True)
     self.assertEqual('nyoka_pmml' in returnVal.__dict__, True)
     self.assertEqual('model' in returnVal.__dict__, True)
     self.assertEqual(returnVal.nyoka_pmml.__class__.__name__, 'PMML')
     self.assertEqual(returnVal.__class__.__name__, 'GenerateKerasModel')
Example #22
0
def writePmml(pmmlObj, filepath, lockForPMML):
    _deepNetworkObj = pmmlObj.DeepNetwork[0]
    _deepNetworkObj.modelName = 'model1'
    _deepNetworkObj.taskType = "trainAndscore"

    pmmlObj.DeepNetwork[0] = _deepNetworkObj

    # print ('came to write')
    try:
        lockForPMML.acquire()
        pmmlObj = removeExtraNewLinesFromWeights(pmmlObj)
        scrptVal2 = []
        scrptVal = pmmlObj.script
        if len(scrptVal) > 0:
            for num, sc in enumerate(scrptVal):
                scriptPurpose = sc.scriptPurpose
                modelVal = sc.for_
                classVal = sc.class_
                filePathUrl = sc.filePath
                scriptOutput = sc.scriptOutput

                code = None
                scripCode = sc.get_valueOf_()
                code = scripCode.lstrip('\n')
                lines = []
                code = scripCode.lstrip('\n')
                leading_spaces = len(code) - len(code.lstrip(' '))
                for line in code.split('\n'):
                    lines.append(line[leading_spaces:])
                code = '\n'.join(lines)
                scriptCode = code.replace('<', '&lt;')
                # scrp=pml.script(content=scriptCode,for_=modelVal,class_=taskTypeVal,scriptPurpose=scriptPurpose,scriptOutput=scriptOutput,filePath=filePathUrl)
                scrp = pml.script(content=scriptCode,
                                  for_=modelVal,
                                  class_=classVal,
                                  scriptPurpose=scriptPurpose,
                                  scriptOutput=scriptOutput,
                                  filePath=filePathUrl)
                scrptVal2.append(scrp)
        pmmlObj.script = scrptVal2
        # print ('Code Step 10.1')
        pmmlObj.export(open(filepath, 'w'), 0)
        # print('>>>>>>>>>>>, PMML written')
    except Exception as e:
        print('>>>>>>>>>>>> ', str(e))
    finally:
        lockForPMML.release()
Example #23
0
def rbst_scaler(trfm, col_names):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's RobustScaler preprocessing instance
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to RobustScaler preprocessing.

    """
    pp_dict = dict()
    derived_flds = list()
    derived_colnames = get_derived_colnames('robustScaler', col_names)
    for col_name_idx in range(len(col_names)):
        if (col_names[col_name_idx] not in exception_cols):
            apply_inner = list()
            apply_inner.append(pml.Apply(
                function='-',
                Constant=[pml.Constant(
                    dataType="double",  # <---------------------
                    valueOf_=unround_scalers(trfm.center_[col_name_idx])
                )],
                FieldRef=[pml.FieldRef(field=col_names[col_name_idx])],
                Extension=[pml.Extension(name='scaling', anytypeobjs_=['RobustScaler'])]
            ))
            apply_outer = pml.Apply(
                Apply_member=apply_inner,
                function='/',
                Constant=[pml.Constant(
                    dataType="double",  # <----------------------------
                    valueOf_=unround_scalers(trfm.scale_[col_name_idx])
                )]
            )
            derived_flds.append(pml.DerivedField(
                Apply=apply_outer,
                name=derived_colnames[col_name_idx],
                optype="continuous",
                dataType="double"
            ))
    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    return pp_dict
Example #24
0
def std_scaler(trfm, col_names, **kwargs):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's Standard Scaler preprocessing instance
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to Standard Scaler preprocessing.

    """
    derived_flds = list()
    pp_dict = dict()
    if is_present("labelBinarizer",col_names):
        derived_flds_hidden = kwargs['derived_fld']
        if derived_flds_hidden:
            derived_flds.extend(derived_flds_hidden)

    derived_colnames = get_derived_colnames('standardScaler', col_names)
    for col_name_idx in range(len(col_names)):
        apply_inner = list()
        apply_inner.append(pml.Apply(
            function='-',
            Constant=[pml.Constant(
                dataType="double",  # <---------------------
                valueOf_=unround_scalers(trfm.mean_[col_name_idx])
            )],
            FieldRef=[pml.FieldRef(field=col_names[col_name_idx])]
        ))
        apply_outer = pml.Apply(
            Apply_member=apply_inner,
            function='/',
            Constant=[pml.Constant(
                dataType="double",  # <----------------------------
                valueOf_=unround_scalers(trfm.scale_[col_name_idx])
            )]
        )
        derived_flds.append(pml.DerivedField(
            Apply=apply_outer,
            name=derived_colnames[col_name_idx],
            optype="continuous",
            dataType="double"
        ))


    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    return pp_dict
Example #25
0
def pca(trfm, col_names):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's PCA preprocessing instance
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to PCA preprocessing.

    """
    pca.counter += 1
    pp_dict = dict()
    derived_flds = list()
    derived_colnames = list()
    val = trfm.mean_
    zero = 0.0
    for preprocess_idx in range(trfm.n_components):
        add = list()
        for pca_idx in range(trfm.n_features_):
            apply_inner = pml.Apply(function='-',
                                    Constant=[pml.Constant(dataType="double",
                                                           valueOf_=val[pca_idx])],
                                    FieldRef=[pml.FieldRef(field=col_names[pca_idx])])
            apply_outer = pml.Apply(function="*",
                                    Apply_member=[apply_inner],
                                    Constant=[pml.Constant(dataType="double",
                                                           valueOf_=zero if trfm.components_[preprocess_idx][
                                                                                pca_idx] == 0.0 else
                                                           trfm.components_[preprocess_idx][pca_idx])])
            add.append(apply_outer)
        app0 = pml.Apply(function="sum", Apply_member=add)

        derived_flds.append(pml.DerivedField(Apply=app0,
                                             dataType="double",
                                             optype="continuous",
                                             name="PCA" + str(pca.counter) + "-" + str(preprocess_idx)))
        name = derived_flds[preprocess_idx].get_name()
        derived_colnames.append(name)
    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    return pp_dict
Example #26
0
    def getDetailsOfPMML(filepath):
        # print ('Enter this world')
        pmmlObj = pml.parse(filepath, silence=True)
        tempObj = pmmlObj.__dict__

        listOfObjectstogetData = []
        for j in tempObj.keys():
            if (tempObj[j] is None):
                pass
            elif (isinstance(tempObj[j], typing.List)):
                if (len(tempObj[j]) == 0):
                    pass
                else:
                    listOfObjectstogetData.append(j)
            else:
                listOfObjectstogetData.append(j)

        allInfo = {}
        for towork in listOfObjectstogetData:
            if towork == 'version':
                allInfo['Version'] = tempObj['version']
            elif towork == 'Header':
                allInfo.update(nyokaUtilities.getHeaderInfo(tempObj))
            elif towork == 'DataDictionary':
                allInfo.update(nyokaUtilities.getDataFields(tempObj))
            elif towork == 'NearestNeighborModel':
                allInfo.update(
                    nyokaUtilities.getInfoNearestNeighborModel(tempObj))
            elif towork == 'DeepNetwork':
                allInfo.update(nyokaUtilities.getInfoOfDeepNetwork(tempObj))
            elif towork == 'MiningModel':
                allInfo.update(nyokaUtilities.getInfoMiningModel(tempObj))
            elif towork == 'SupportVectorMachineModel':
                allInfo.update(
                    nyokaUtilities.getInfoSupportVectorMachineModel(tempObj))
            elif towork == 'TreeModel':
                allInfo.update(nyokaUtilities.getInfoTreeModel(tempObj))
            elif towork == 'RegressionModel':
                allInfo.update(nyokaUtilities.getInfoLinearModel(tempObj))
            elif towork == 'NaiveBayesModel':
                allInfo.update(
                    nyokaUtilities.getInfoOfNaiveBayesModel(tempObj))
        allInfo = nyokaUtilities.changeStructure(allInfo)
        # print('response sent')
        return JsonResponse(allInfo)
Example #27
0
def generate_Segments_Equal_To_Estimators(val, derived_col_names, col_names):
    """
    It returns number of Segments equal to the estimator of the model.

    Parameters
    ----------
    val: List
        Contains a list of well structured node for binary classification/inner segments for multi-class classification
    derived_col_names: List
        Contains column names after preprocessing.
    col_names: List
        Contains list of feature/column names.
    Returns:
    -------
    segments_equal_to_estimators:
         Returns list of segments equal to number of estimator of the model
    """
    segments_equal_to_estimators = []
    for i in range(len(val)):
        main_node = pml.Node(True_=pml.True_())
        m_flds = []
        mining_field_for_innner_segments = col_names
        create_node(val[i], main_node, derived_col_names)

        for name in mining_field_for_innner_segments:
            m_flds.append(pml.MiningField(name=name))

        segments_equal_to_estimators.append((pml.Segment(id=i + 1, True_=pml.True_(),
                                                         TreeModel=pml.TreeModel(functionName="regression",
                                                         modelName="DecisionTreeModel",
                                                                                 missingValueStrategy="none",
                                                                                 noTrueChildStrategy="returnLastPrediction",
                                                                                 splitCharacteristic="multiSplit",
                                                                                 Node=main_node,
                                                                                 MiningSchema=pml.MiningSchema(
                                                                                     MiningField=m_flds)))))

    return segments_equal_to_estimators
Example #28
0
def get_preprocess_val(ppln_sans_predictor, initial_colnames, model):
    """

    Parameters
    ----------
    model :
        Contains an instance of Sklearn model
    ppln_sans_predictor :
        Contains an instance of Sklearn Pipeline
    initial_colnames : list
        Contains list of feature/column names.

    Returns
    -------
    pml_pp: dictionary
    Returns a dictionary that contains data related to pre-processing

    """
    pml_pp = dict()
    pml_derived_flds = list()
    initial_colnames = [col_name for col_name in initial_colnames]
    updated_colnames = initial_colnames.copy()
    dtd_feat_names = list()
    classes = list()
    class_attribute = list()
    mining_strategy = list()
    mining_replacement_val = list()
    mining_attributes = list()
    derived_flds_hidden = list()
    pml_trfm_dict = list()
    polynomial_features.poly_ctr = 0
    pca.counter = 0
    imputer.col_names = initial_colnames

    for ppln_step in ppln_sans_predictor:
        ppln_step_inst = ppln_step[1]
        if "DataFrameMapper" == get_class_name(ppln_step_inst):
            dfm_steps = ppln_step_inst.features
            dfm_col_names = list()
            for dfm_step in dfm_steps:
                dfm_step_col_names = dfm_step[0]
                dfm_step_trfms = dfm_step[1]
                if not hasattr(dfm_step_col_names, "__len__") or isinstance(dfm_step_col_names, str):
                    dfm_step_col_names = [dfm_step_col_names]
                if not hasattr(dfm_step_trfms, "__len__") or isinstance(dfm_step_trfms, str):
                    dfm_step_trfms = [dfm_step_trfms]
                for name in dfm_step_col_names:
                    if name not in dtd_feat_names:
                        dtd_feat_names.append(name)

                for trfm in dfm_step_trfms:
                    pp_dict = get_pml_derived_flds(trfm, dfm_step_col_names, derived_fld=derived_flds_hidden,
                                                   model=model)
                    derived_flds = pp_dict['der_fld']
                    derived_names = pp_dict['der_col_names']
                    if 'pp_feat_class_lbl' in pp_dict.keys():
                        classes.append(pp_dict['pp_feat_class_lbl'])
                        class_attribute.append(pp_dict['pp_feat_name'])
                    if 'pp_feat_class_ohe' in pp_dict.keys():
                        classes.append(pp_dict['pp_feat_class_ohe'])
                        class_attribute.append(pp_dict['pp_feat_name'])
                    if 'mining_strategy' in pp_dict.keys():
                        mining_attributes.append(pp_dict['der_col_names'])
                        mining_strategy.append(pp_dict['mining_strategy'])
                        mining_replacement_val.append(pp_dict['mining_replacement_val'])
                    if 'hidden_lb_der_flds' in pp_dict.keys():
                        derived_flds_hidden.extend(pp_dict['hidden_lb_der_flds'])
                    if 'hidden_ohe_der_flds' in pp_dict.keys():
                        derived_flds_hidden.extend(pp_dict['hidden_ohe_der_flds'])
                    pml_derived_flds.extend(derived_flds)
                    dfm_step_col_names = derived_names
                dfm_col_names.extend(derived_names)

            updated_colnames = dfm_col_names
        else:
            if not dtd_feat_names:
                dtd_feat_names = initial_colnames
                updated_colnames = initial_colnames
            if not hasattr(ppln_step_inst, "__len__") or isinstance(ppln_step_inst, str):
                ppln_step_inst = [ppln_step_inst]
            for trfm in ppln_step_inst:
                pp_dict = get_pml_derived_flds(trfm, updated_colnames, derived_fld=derived_flds_hidden, model=model)
                derived_flds = pp_dict['der_fld']
                derived_names = pp_dict['der_col_names']
                if 'pp_feat_class_lbl' in pp_dict.keys():
                    classes.append(pp_dict['pp_feat_class_lbl'])
                    class_attribute.append(pp_dict['pp_feat_name'])
                if 'pp_feat_class_ohe' in pp_dict.keys():
                    classes.append(pp_dict['pp_feat_class_ohe'])
                    class_attribute.append(pp_dict['pp_feat_name'])
                if 'mining_strategy' in pp_dict.keys():
                    mining_attributes.append(pp_dict['der_col_names'])
                    mining_strategy.append(pp_dict['mining_strategy'])
                    mining_replacement_val.append(pp_dict['mining_replacement_val'])
                pml_derived_flds.extend(derived_flds)
                updated_colnames = derived_names

    if pml_derived_flds:
        pml_trfm_dict = [pml.TransformationDictionary(DerivedField=pml_derived_flds)]
    pml_pp['trfm_dict'] = pml_trfm_dict
    pml_pp['derived_col_names'] = updated_colnames
    pml_pp['preprocessed_col_names'] = dtd_feat_names
    pml_pp['categorical_feat_values'] = classes, class_attribute
    pml_pp['mining_imp_values'] = mining_attributes, mining_strategy, mining_replacement_val

    return pml_pp
Example #29
0
def get_segments_for_xgbc(model, derived_col_names, feature_names, target_name, mining_imp_val,categoric_values):
    """
    It returns all the segments of the Xgboost classifier.

    Parameters
    ----------
    model :
        Contains Xgboost model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names: List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    regrs_models :
        Returns all the segments of the xgboost model.
        """
    segments = list()

    if model.n_classes_ == 2:
        get_nodes_in_json_format=[]
        for i in range(model.n_estimators):
            get_nodes_in_json_format.append(json.loads(model._Booster.get_dump(dump_format='json')[i]))
        mining_schema_for_1st_segment = mining_Field_For_First_Segment(feature_names)
        outputField = list()
        outputField.append(pml.OutputField(name="xgbValue", optype="continuous", dataType="float",
                                           feature="predictedValue", isFinalResult="true"))
        out = pml.Output(OutputField=outputField)
        oField=list()
        oField.append('xgbValue')
        segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(get_nodes_in_json_format, derived_col_names,
                                                                             feature_names)
        First_segment = add_segmentation(model,segments_equal_to_estimators, mining_schema_for_1st_segment, out, 1)
        reg_model=sklToPmml.get_regrs_models(model, oField, oField, target_name,mining_imp_val,categoric_values)[0]
        reg_model.normalizationMethod='logit'
        last_segment = pml.Segment(True_=pml.True_(), id=2,
                                   RegressionModel=reg_model)
        segments.append(First_segment)

        segments.append(last_segment)
    else:

        get_nodes_in_json_format = []
        for i in range(model.n_estimators * model.n_classes_):
            get_nodes_in_json_format.append(json.loads(model._Booster.get_dump(dump_format='json')[i]))
        oField = list()
        for index in range(0, model.n_classes_):
            inner_segment = []
            for in_seg in range(index, len(get_nodes_in_json_format), model.n_classes_):
                inner_segment.append(get_nodes_in_json_format[in_seg])
            mining_schema_for_1st_segment = mining_Field_For_First_Segment(feature_names)
            outputField = list()
            outputField.append(pml.OutputField(name='xgbValue(' + str(index) + ')', optype="continuous",
                                      feature="predictedValue", dataType="float", isFinalResult="true"))
            out = pml.Output(OutputField=outputField)

            oField.append('xgbValue(' + str(index) + ')')
            segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(inner_segment, derived_col_names,
                                                                                 feature_names)
            segments_equal_to_class = add_segmentation(model,segments_equal_to_estimators,
                                                       mining_schema_for_1st_segment, out, index)
            segments.append(segments_equal_to_class)
        last_segment = pml.Segment(True_=pml.True_(), id=model.n_classes_ + 1,
                                   RegressionModel=sklToPmml.get_regrs_models(model,oField,oField,target_name,
                                                                    mining_imp_val,categoric_values)[0])
        segments.append(last_segment)
    return segments
Example #30
0
def imputer(trfm, col_names, **kwargs):
    """

    Parameters
    ----------
    trfm :
        Contains the Sklearn's Imputer preprocessing instance
    col_names : list
        Contains list of feature/column names.
        The column names may represent the names of preprocessed attributes.

    Returns
    -------
    pp_dict : dictionary
        Returns a dictionary that contains attributes related to Imputer preprocessing.

    """
    original_col_names = imputer.col_names
    derived_colnames = col_names
    pp_dict = dict()
    derived_flds = list()

    model = kwargs['model']

    mining_strategy = trfm.strategy
    if "mean" in mining_strategy:
        mining_strategy = "asMean"
    elif "median" in mining_strategy:
        mining_strategy = "asMedian"
    elif "most_frequent" in mining_strategy:
        mining_strategy = "asMode"
    mining_replacement_val = trfm.statistics_

    if not any_in(original_col_names, col_names):
        derived_colnames = get_derived_colnames('imputer', col_names)
        for col_name_idx in range(len(col_names)):
            if (col_names[col_name_idx] not in exception_cols):
                const_list = list()
                apply_inner = list()
                apply_inner.append(pml.Apply(function='isMissing', FieldRef=[pml.FieldRef(field=col_names[col_name_idx])]))
                const_obj = pml.Constant(
                    dataType="double",  # <---------------------
                    valueOf_=mining_replacement_val[col_name_idx]
                ),
                fieldref_obj = pml.FieldRef(field=col_names[col_name_idx])
                fieldref_obj.original_tagname_ = "FieldRef"
                const_list.append(const_obj[0])
                const_list.append(fieldref_obj)
                apply_outer = pml.Apply(
                    Apply_member=apply_inner,
                    function='if',
                    Constant=const_list
                )

                derived_flds.append(pml.DerivedField(
                    Apply=apply_outer,
                    name=derived_colnames[col_name_idx],
                    optype="continuous",
                    dataType="double"
                ))
    else:
        pp_dict['mining_strategy'] = mining_strategy
        pp_dict['mining_replacement_val'] = mining_replacement_val
        pp_dict['mining_attributes'] = col_names

    pp_dict['der_fld'] = derived_flds
    pp_dict['der_col_names'] = derived_colnames
    return pp_dict