Beispiel #1
0
def xgboost_to_pmml(pipeline, col_names, target_name, pmml_f_name='from_xgboost.pmml',model_name=None,description=None):
    """
    Exports xgboost model object into pmml

    Parameters
    ----------
    pipeline :
        Contains an instance of Pipeline with preprocessing and final estimator
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the target column.
    pmml_f_name : String
        Name of the pmml file. (Default='from_xgboost.pmml')
    model_name : string (optional)
        Name of the model
    description : string (optional)
        Description for the model

    Returns
    -------
    Generates the PMML object and exports it to `pmml_f_name`

    """
    try:
        model = pipeline.steps[-1][1]
    except:
        raise TypeError("Exporter expects pipeleine_instance and not an estimator_instance")
    else:
        if col_names.__class__.__name__ == "ndarray":
            col_names = col_names.tolist()
        ppln_sans_predictor = pipeline.steps[:-1]
        trfm_dict_kwargs = dict()
        derived_col_names = col_names
        categoric_values = tuple()
        mining_imp_val = tuple()
        if ppln_sans_predictor:
            pml_pp = pp.get_preprocess_val(ppln_sans_predictor, col_names, model)
            trfm_dict_kwargs['TransformationDictionary'] = pml_pp['trfm_dict']
            derived_col_names = pml_pp['derived_col_names']
            col_names = pml_pp['preprocessed_col_names']
            categoric_values = pml_pp['categorical_feat_values']
            mining_imp_val = pml_pp['mining_imp_values']
        PMML_kwargs = get_PMML_kwargs(model,
                                      derived_col_names,
                                      col_names,
                                      target_name,
                                      mining_imp_val,
                                      categoric_values,
                                      model_name)
        pmml = pml.PMML(
            version=PMML_SCHEMA.VERSION,
            Header=sklToPmml.get_header(description),
            DataDictionary=sklToPmml.get_data_dictionary(model, col_names, target_name, categoric_values),
            **trfm_dict_kwargs,
            **PMML_kwargs
        )
        pmml.export(outfile=open(pmml_f_name, "w"), level=0)
Beispiel #2
0
def xgboost_to_pmml(pipeline,
                    col_names,
                    target_name,
                    pmml_f_name='from_xgboost.pmml'):
    """
    Exports xgboost pipeline object into pmml

    Parameters
    ----------
    pipeline :
        Contains an instance of Pipeline with preprocessing and final estimator
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the target column.
    pmml_f_name : String
        Name of the pmml file. (Default='from_xgboost.pmml')

    Returns
    -------
    Returns a pmml file

    """
    try:
        model = pipeline.steps[-1][1]
    except:
        raise TypeError(
            "Exporter expects pipeleine_instance and not an estimator_instance"
        )
    else:
        if isinstance(col_names, np.ndarray):
            col_names = col_names.tolist()
        ppln_sans_predictor = pipeline.steps[:-1]
        trfm_dict_kwargs = dict()
        derived_col_names = col_names
        categoric_values = tuple()
        mining_imp_val = tuple()
        if ppln_sans_predictor:
            pml_pp = pp.get_preprocess_val(ppln_sans_predictor, col_names,
                                           model)
            trfm_dict_kwargs['TransformationDictionary'] = pml_pp['trfm_dict']
            derived_col_names = pml_pp['derived_col_names']
            col_names = pml_pp['preprocessed_col_names']
            categoric_values = pml_pp['categorical_feat_values']
            mining_imp_val = pml_pp['mining_imp_values']
        PMML_kwargs = get_PMML_kwargs(model, derived_col_names, col_names,
                                      target_name, mining_imp_val,
                                      categoric_values)
        pmml = pml.PMML(
            version=sklToPmml.get_version(),
            Header=sklToPmml.get_header(),
            MiningBuildTask=sklToPmml.get_mining_buildtask(pipeline),
            DataDictionary=sklToPmml.get_data_dictionary(
                model, col_names, target_name, categoric_values),
            **trfm_dict_kwargs,
            **PMML_kwargs)
        pmml.export(outfile=open(pmml_f_name, "w"), level=0)
Beispiel #3
0
def lgb_to_pmml(pipeline,
                col_names,
                target_name,
                pmml_f_name='from_lgbm.pmml'):
    """
    Exports LGBM pipeline object into pmml

    Parameters
    ----------
    pipeline :
        Contains an instance of Pipeline with preprocessing and final estimator
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the target column.
    pmml_f_name : String
        Name of the pmml file. (Default='from_lgbm.pmml')

    Returns
    -------
    Returns a pmml file

    """
    try:
        model = pipeline.steps[-1][1]
    except:
        raise TypeError(
            "Exporter expects pipeleine_instance and not an estimator_instance"
        )
    else:
        if isinstance(col_names, np.ndarray):
            col_names = col_names.tolist()
        ppln_sans_predictor = pipeline.steps[:-1]
        trfm_dict_kwargs = dict()
        derived_col_names = col_names
        categoric_values = tuple()
        mining_imp_val = tuple()
        if ppln_sans_predictor:
            pml_pp = pp.get_preprocess_val(ppln_sans_predictor, col_names)
            trfm_dict_kwargs['TransformationDictionary'] = pml_pp['trfm_dict']
            derived_col_names = pml_pp['derived_col_names']
            col_names = pml_pp['preprocessed_col_names']
            categoric_values = pml_pp['categorical_feat_values']
            mining_imp_val = pml_pp['mining_imp_values']
        PMML_kwargs = get_PMML_kwargs(model, derived_col_names, col_names,
                                      target_name, mining_imp_val,
                                      categoric_values)
        pmml = pml.PMML(version=sklToPmml.get_version(),
                        Header=sklToPmml.get_header(),
                        DataDictionary=sklToPmml.get_data_dictionary(
                            model, col_names, target_name, categoric_values),
                        **trfm_dict_kwargs,
                        **PMML_kwargs)
        pmml.export(outfile=open(pmml_f_name, "w"), level=0)
        read_file = open(pmml_f_name, 'r')
        lines = read_file.readlines()
        lines.insert(
            1,
            '<!--(Comment generated by ADAPA) PMML processed by ADAPA (Version : 4.3)-->\n'
        )
        read_file.close()
        read_file = open(pmml_f_name, 'w')
        read_file.writelines(lines)
        read_file.close()