def create_name_for_xgbmodel(run_cfg: Dict[str, Any], outer_split_num: int, model: XGBModel, inner_split_num: int, prefix_location='logs/', suffix='.pkl') -> str: if run_cfg['analysis_type'] == AnalysisType.FLATTEN_CORRS: model_str_representation = run_cfg['analysis_type'].value for key in [ 'colsample_bylevel', 'colsample_bynode', 'colsample_bytree', 'gamma', 'learning_rate', 'max_depth', 'min_child_weight', 'n_estimators', 'subsample' ]: model_str_representation += key[-3:] + '_' + str( model.get_params()[key]) return prefix_location + '_'.join([ run_cfg['target_var'], run_cfg['dataset_type'].value, str(outer_split_num), str(inner_split_num), model_str_representation, str(run_cfg['num_nodes']), run_cfg['param_conn_type'].value ]) + suffix
def to_mls(xgboost_model: xgboost.XGBModel, **kwargs): params = xgboost_model.get_params() def standardize_types(v): if isinstance(v, np.ndarray): return [normalize_float(x) for x in v.tolist()] elif isinstance(v, float): return normalize_float(v) elif callable(v): return str(v) # TODO return v def deep_get_params(params): if isinstance(params, (list, tuple)): return [deep_get_params(x) for x in params] elif isinstance(params, dict): return {k: deep_get_params(v) for k, v in params.items()} else: v = standardize_types(params) try: p = v.get_params() t = type(v).__module__ + "." + type(v).__name__ return {"@value": {"type": t, "params": deep_get_params(p)}} except AttributeError: try: json.dumps(v) return v except TypeError as e: raise NotImplementedError( "can't convert sklearn model of type {} to mls: {}".format( type(xgboost_model), e ) ) params = deep_get_params(params) model_hash = xgboost_model.__hash__() model_class = "{}.{}".format( type(xgboost_model).__module__, type(xgboost_model).__name__ ) algo = Algorithm(_id=model_class) implementation = Implementation( _id=generate_unique_id("http://www.w3.org/ns/mls#Implementation"), parameters=[ HyperParameter(key, model_hash=model_hash) for key in params.keys() ], implements=algo, version=xgboost.__version__, ) input_values = [ HyperParameterSetting( value=val, specified_by=HyperParameter(key, model_hash=model_hash), model_hash=model_hash, ) for key, val in params.items() if val is not None ] output_values = [] if EVALUATION_MEASURE_KEY in kwargs: eval_measure = kwargs[EVALUATION_MEASURE_KEY] output_values.append(evaluation_measure(eval_measure[0], eval_measure[1])) model = Run(model_hash, implementation, input_values, output_values, algo) return RunSchema().dumps(model)