Exemple #1
0
def create_name_for_xgbmodel(run_cfg: Dict[str, Any],
                             outer_split_num: int,
                             model: XGBModel,
                             inner_split_num: int,
                             prefix_location='logs/',
                             suffix='.pkl') -> str:
    if run_cfg['analysis_type'] == AnalysisType.FLATTEN_CORRS:
        model_str_representation = run_cfg['analysis_type'].value
        for key in [
                'colsample_bylevel', 'colsample_bynode', 'colsample_bytree',
                'gamma', 'learning_rate', 'max_depth', 'min_child_weight',
                'n_estimators', 'subsample'
        ]:
            model_str_representation += key[-3:] + '_' + str(
                model.get_params()[key])
    return prefix_location + '_'.join([
        run_cfg['target_var'], run_cfg['dataset_type'].value,
        str(outer_split_num),
        str(inner_split_num), model_str_representation,
        str(run_cfg['num_nodes']), run_cfg['param_conn_type'].value
    ]) + suffix
Exemple #2
0
def to_mls(xgboost_model: xgboost.XGBModel, **kwargs):
    params = xgboost_model.get_params()

    def standardize_types(v):
        if isinstance(v, np.ndarray):
            return [normalize_float(x) for x in v.tolist()]
        elif isinstance(v, float):
            return normalize_float(v)
        elif callable(v):
            return str(v)  # TODO
        return v

    def deep_get_params(params):
        if isinstance(params, (list, tuple)):
            return [deep_get_params(x) for x in params]
        elif isinstance(params, dict):
            return {k: deep_get_params(v) for k, v in params.items()}
        else:
            v = standardize_types(params)
            try:
                p = v.get_params()
                t = type(v).__module__ + "." + type(v).__name__
                return {"@value": {"type": t, "params": deep_get_params(p)}}
            except AttributeError:
                try:
                    json.dumps(v)
                    return v
                except TypeError as e:
                    raise NotImplementedError(
                        "can't convert sklearn model of type {} to mls: {}".format(
                            type(xgboost_model), e
                        )
                    )

    params = deep_get_params(params)
    model_hash = xgboost_model.__hash__()
    model_class = "{}.{}".format(
        type(xgboost_model).__module__, type(xgboost_model).__name__
    )
    algo = Algorithm(_id=model_class)

    implementation = Implementation(
        _id=generate_unique_id("http://www.w3.org/ns/mls#Implementation"),
        parameters=[
            HyperParameter(key, model_hash=model_hash) for key in params.keys()
        ],
        implements=algo,
        version=xgboost.__version__,
    )

    input_values = [
        HyperParameterSetting(
            value=val,
            specified_by=HyperParameter(key, model_hash=model_hash),
            model_hash=model_hash,
        )
        for key, val in params.items()
        if val is not None
    ]

    output_values = []
    if EVALUATION_MEASURE_KEY in kwargs:
        eval_measure = kwargs[EVALUATION_MEASURE_KEY]
        output_values.append(evaluation_measure(eval_measure[0], eval_measure[1]))
    model = Run(model_hash, implementation, input_values, output_values, algo)
    return RunSchema().dumps(model)