def _save_min_max_meta(self, name, namespace): if self.scale_param.area == consts.ALL: LOGGER.debug("save_min_max_meta with mode is all") feat_upper = "None" if self.scale_param.feat_upper is None else str(self.scale_param.feat_upper) feat_lower = "None" if self.scale_param.feat_lower is None else str(self.scale_param.feat_lower) out_upper = "None" if self.scale_param.out_upper is None else str(self.scale_param.out_upper) out_lower = "None" if self.scale_param.out_lower is None else str(self.scale_param.out_lower) min_max_scale_meta = feature_scale_meta_pb2.MinMaxScaleMeta(feat_upper=feat_upper, feat_lower=feat_lower, out_upper=out_upper, out_lower=out_lower) minmax_scale_meta = {"0": min_max_scale_meta} meta_protobuf_obj = feature_scale_meta_pb2.ScaleMeta(is_scale=True, strategy=self.scale_param.method, minmax_scale_meta=minmax_scale_meta) else: LOGGER.debug("save_min_max_meta with mode is {}".format(self.scale_param.mode)) meta_protobuf_obj = feature_scale_meta_pb2.ScaleMeta(is_scale=True) buffer_type = "{}.meta".format(self.class_name) model_manager.save_model(buffer_type=buffer_type, proto_buffer=meta_protobuf_obj, name=name, namespace=namespace) return buffer_type
def save_standard_scale_model(self, name, namespace): """ Save StandardScaler param and meta Parameters ---------- name: str, save DTable table_name namespace: str, DTable namespace Returns ---------- tuple, include meta string name and param string name """ meta_buffer_type = self._save_standard_scale_meta(name, namespace) standard_scale_param_dict = {} for i in range(len(self.header)): if i in self.std_scale_column_idx: mean = self.mean[i] std = self.std[i] param_obj = feature_scale_param_pb2.StandardScaleParam(mean=mean, scale=std) standard_scale_param_dict[self.header[i]] = param_obj param_protobuf_obj = feature_scale_param_pb2.ScaleParam(standard_scale_param=standard_scale_param_dict) param_buffer_type = "{}.param".format(self.class_name) model_manager.save_model(buffer_type=param_buffer_type, proto_buffer=param_protobuf_obj, name=name, namespace=namespace) return [(meta_buffer_type, param_buffer_type)]
def save_model(self, name, namespace): meta_buffer_type = self._save_meta(name, namespace) # In case arbiter has no header header = self.header weight_dict = {} for idx, header_name in enumerate(header): coef_i = self.coef_[idx] weight_dict[header_name] = coef_i param_protobuf_obj = lr_model_param_pb2.LRModelParam( iters=self.n_iter_, loss_history=self.loss_history, is_converged=self.is_converged, weight=weight_dict, intercept=self.intercept_, header=header) buffer_type = "{}.param".format(self.class_name) model_manager.save_model(buffer_type=buffer_type, proto_buffer=param_protobuf_obj, name=name, namespace=namespace) return [(meta_buffer_type, buffer_type)]
def save_model(self, name, namespace): meta_buffer_type = self._save_meta(name, namespace) iv_attrs = [] for idx, iv_attr in enumerate(self.iv_attrs): LOGGER.debug("{}th iv attr: {}".format(idx, iv_attr.__dict__)) iv_result = iv_attr.result_dict() iv_object = feature_binning_param_pb2.IVParam(**iv_result) iv_attrs.append(iv_object) host_iv_attrs = [] if self.host_iv_attrs is not None: for idx, iv_attr in enumerate(self.host_iv_attrs): iv_result = iv_attr.result_dict() iv_object = feature_binning_param_pb2.IVParam(**iv_result) host_iv_attrs.append(iv_object) result_obj = feature_binning_param_pb2.FeatureBinningParam(iv_result=iv_attrs, host_iv_result=host_iv_attrs, cols=self.cols) param_buffer_type = "HeteroFeatureBinningGuest.param" model_manager.save_model(buffer_type=param_buffer_type, proto_buffer=result_obj, name=name, namespace=namespace) return [(meta_buffer_type, param_buffer_type)]
def save_min_max_model(self, name, namespace): meta_buffer_type = self._save_min_max_meta(name, namespace) min_max_scale_param_dict = {} if self.cols_scale_value is not None: for i in range(len(self.header)): feat_lower = self.cols_scale_value[i][0] feat_upper = self.cols_scale_value[i][1] out_lower = self.cols_scale_value[i][2] out_upper = self.cols_scale_value[i][3] param_obj = feature_scale_param_pb2.MinMaxScaleParam( feat_upper=feat_upper, feat_lower=feat_lower, out_upper=out_upper, out_lower=out_lower) min_max_scale_param_dict[self.header[i]] = param_obj param_protobuf_obj = feature_scale_param_pb2.ScaleParam( minmax_scale_param=min_max_scale_param_dict) param_buffer_type = "{}.param".format(self.class_name) model_manager.save_model(buffer_type=param_buffer_type, proto_buffer=param_protobuf_obj, name=name, namespace=namespace) return [(meta_buffer_type, param_buffer_type)]
def _save_pipeline(self): buffer_type = "Pipeline" model_manager.save_model(buffer_type=buffer_type, proto_buffer=self.pipeline, name=self.workflow_param.model_table, namespace=self.workflow_param.model_namespace)
def save_outlier_model(outlier_replace=False, outlier_replace_method=None, outlier_impute=None, outlier_replace_value=None, header=None, model_name="Outlier", model_table=None, model_namespace=None): model_meta = OutlierMeta() model_param = OutlierParam() model_meta.is_outlier = outlier_replace if outlier_replace: if outlier_replace_method: model_meta.strategy = str(outlier_replace_method) if outlier_impute: model_meta.outlier_value.extend(map(str, outlier_impute)) if outlier_replace_value: outlier_value_dict = dict( zip(header, map(str, outlier_replace_value))) model_param.outlier_replace_value.update(outlier_value_dict) manager.save_model(buffer_type=model_name + ".meta", proto_buffer=model_meta, name=model_table, namespace=model_namespace) manager.save_model(buffer_type=model_name + ".param", proto_buffer=model_param, name=model_table, namespace=model_namespace)
def save_missing_imputer_model(missing_fill=False, missing_replace_method=None, missing_impute=None, missing_fill_value=None, header=None, model_name="Imputer", model_table=None, model_namespace=None): model_meta = ImputerMeta() model_param = ImputerParam() model_meta.is_imputer = missing_fill if missing_fill: if missing_replace_method: model_meta.strategy = str(missing_replace_method) if missing_impute is not None: if missing_impute is not None: model_meta.missing_value.extend(map(str, missing_impute)) if missing_fill_value is not None: feature_value_dict = dict(zip(header, map(str, missing_fill_value))) model_param.missing_replace_value.update(feature_value_dict) manager.save_model(buffer_type=model_name + ".meta", proto_buffer=model_meta, name=model_table, namespace=model_namespace) manager.save_model(buffer_type=model_name + ".param", proto_buffer=model_param, name=model_table, namespace=model_namespace)
def save_data_io_model(input_format="dense", delimitor=",", data_type="str", with_label=False, label_idx=0, label_type="int", output_format="dense", header=None, model_name="DataIO", model_table=None, model_namespace=None): model_meta = DataIOMeta() model_param = DataIOParam() model_meta.input_format = input_format model_meta.delimitor = delimitor model_meta.data_type = data_type model_meta.with_label = with_label model_meta.label_idx = label_idx model_meta.label_type = label_type model_meta.output_format = output_format if header is not None: model_param.header.extend(header) manager.save_model(buffer_type=model_name + ".meta", proto_buffer=model_meta, name=model_table, namespace=model_namespace) manager.save_model(buffer_type=model_name + ".param", proto_buffer=model_param, name=model_table, namespace=model_namespace)
def save_model(self, name, namespace): """ Save each classifier model of OneVsRest. It just include model_param but not model_meta now """ classifier_models = [] str_time = time.strftime("%Y%m%d%H%M%S", time.localtime()) for i, model in enumerate(self.models): classifier_name = str_time + "_" + str( i) + "_" + self.role + "_name" model.save_model(classifier_name, namespace) classifier_model = one_vs_rest_param_pb2.ClassifierModel( name=classifier_name, namespace=namespace) classifier_models.append(classifier_model) LOGGER.info("finish save model_{}, role:{}".format(i, self.role)) str_classes = [str(c) for c in self.classes] one_vs_rest_param_obj = one_vs_rest_param_pb2.OneVsRestParam( classes=str_classes, classifier_models=classifier_models) param_buffer_type = "{}.param".format(self.class_name) model_manager.save_model(buffer_type=param_buffer_type, proto_buffer=one_vs_rest_param_obj, name=name, namespace=namespace) meta_buffer_type = 'None' LOGGER.info("finish OneVsRest save model.") return [(meta_buffer_type, param_buffer_type)]
def _save_meta(self, name, namespace): meta_protobuf_obj = onehot_meta_pb2.OneHotMeta(cols=self.cols) buffer_type = "OneHotEncoder.meta" model_manager.save_model(buffer_type=buffer_type, proto_buffer=meta_protobuf_obj, name=name, namespace=namespace) return buffer_type
def save_model(self, name, namespace): meta_buffer_type = self._save_meta(name, namespace) result_obj = feature_selection_param_pb2.FeatureSelectionParam( results=self.results) param_buffer_type = "HeteroFeatureSelectionGuest.param" model_manager.save_model(buffer_type=param_buffer_type, proto_buffer=result_obj, name=name, namespace=namespace) return [(meta_buffer_type, param_buffer_type)]
def save_model(self, model_table, model_namespace): LOGGER.info("save model") meta_name, meta_protobuf = self.get_model_meta() param_name, param_protobuf = self.get_model_param() manager.save_model(buffer_type=meta_name, proto_buffer=meta_protobuf, name=model_table, namespace=model_namespace) manager.save_model(buffer_type=param_name, proto_buffer=param_protobuf, name=model_table, namespace=model_namespace) return [(meta_name, param_name)]
def _save_standard_scale_meta(self, name, namespace): with_mean = self.scale_param.with_mean with_std = self.scale_param.with_std standard_scale_meta = feature_scale_meta_pb2.StandardScaleMeta(with_mean=with_mean, with_std=with_std) meta_protobuf_obj = feature_scale_meta_pb2.ScaleMeta(is_scale=True, strategy=self.scale_param.method, standard_scale_meta=standard_scale_meta) buffer_type = "{}.meta".format(self.class_name) model_manager.save_model(buffer_type=buffer_type, proto_buffer=meta_protobuf_obj, name=name, namespace=namespace) return buffer_type
def save_model(self, name, namespace): meta_buffer_type = self._save_meta(name, namespace) left_col_obj = feature_selection_param_pb2.LeftCols( original_cols=self.cols, left_cols=self.left_cols) result_obj = feature_selection_param_pb2.FeatureSelectionParam( results=self.results, final_left_cols=left_col_obj) param_buffer_type = "HeteroFeatureSelection{}.param".format( self.party_name) model_manager.save_model(buffer_type=param_buffer_type, proto_buffer=result_obj, name=name, namespace=namespace) return [(meta_buffer_type, param_buffer_type)]
def _save_meta(self, name, namespace): meta_protobuf_obj = feature_binning_meta_pb2.FeatureBinningMeta( method=self.bin_param.method, compress_thres=self.bin_param.compress_thres, head_size=self.bin_param.head_size, error=self.bin_param.error, bin_num=self.bin_param.bin_num, cols=self.cols, adjustment_factor=self.bin_param.adjustment_factor, local_only=self.bin_param.local_only) buffer_type = "HeteroFeatureBinning{}.meta".format(self.party_name) model_manager.save_model(buffer_type=buffer_type, proto_buffer=meta_protobuf_obj, name=name, namespace=namespace) return buffer_type
def _save_meta(self, name, namespace): meta_protobuf_obj = feature_selection_meta_pb2.FeatureSelectionMeta( filter_methods=self.filter_method, local_only=self.params.local_only, cols=self.cols, unique_meta=self.unique_meta, iv_value_meta=self.iv_value_meta, iv_percentile_meta=self.iv_percentile_meta, coe_meta=self.coe_meta, outlier_meta=self.outlier_meta) buffer_type = "HeteroFeatureSelection{}.meta".format(self.party_name) model_manager.save_model(buffer_type=buffer_type, proto_buffer=meta_protobuf_obj, name=name, namespace=namespace) return buffer_type
def _save_meta(self, name, namespace): meta_protobuf_obj = lr_model_meta_pb2.LRModelMeta( penalty=self.param.penalty, eps=self.eps, alpha=self.alpha, optimizer=self.param.optimizer, party_weight=self.param.party_weight, batch_size=self.batch_size, learning_rate=self.learning_rate, max_iter=self.max_iter, converge_func=self.param.converge_func, re_encrypt_batches=self.param.re_encrypt_batches) buffer_type = "{}.meta".format(self.class_name) model_manager.save_model(buffer_type=buffer_type, proto_buffer=meta_protobuf_obj, name=name, namespace=namespace) return buffer_type
def save_model(self, name, namespace): meta_buffer_type = self._save_meta(name, namespace) pb_dict = {} for col_name, value_dict in self.col_maps.items(): value_dict_obj = onehot_param_pb2.ColDict(encode_map=value_dict) pb_dict[col_name] = value_dict_obj result_obj = onehot_param_pb2.OneHotParam(col_map=pb_dict) param_buffer_type = "OneHotEncoder.param" model_manager.save_model(buffer_type=param_buffer_type, proto_buffer=result_obj, name=name, namespace=namespace) return [(meta_buffer_type, param_buffer_type)]
def save_standard_scale_model(self, name, namespace): meta_buffer_type = self._save_standard_scale_meta(name, namespace) standard_scale_param_dict = {} for i in range(len(self.header)): mean = self.mean[i] std = self.std[i] param_obj = feature_scale_param_pb2.StandardScaleParam(mean=mean, scale=std) standard_scale_param_dict[self.header[i]] = param_obj param_protobuf_obj = feature_scale_param_pb2.ScaleParam(standard_scale_param=standard_scale_param_dict) param_buffer_type = "{}.param".format(self.class_name) model_manager.save_model(buffer_type=param_buffer_type, proto_buffer=param_protobuf_obj, name=name, namespace=namespace) return [(meta_buffer_type, param_buffer_type)]
def _save_meta(self, name, namespace): unique_param_dict = copy.deepcopy(self.params.unique_param.__dict__) unique_param = feature_selection_meta_pb2.UniqueValueParam( **unique_param_dict) iv_dict = copy.deepcopy(self.params.iv_param.__dict__) bin_dict = copy.deepcopy(self.params.iv_param.bin_param.__dict__) del bin_dict['process_method'] del bin_dict['result_table'] del bin_dict['result_namespace'] del bin_dict['display_result'] if bin_dict['cols'] == -1: bin_dict['cols'] = self.cols bin_param = FeatureBinningMeta() iv_dict["bin_param"] = bin_param iv_param = feature_selection_meta_pb2.IVSelectionParam(**iv_dict) coe_param_dict = copy.deepcopy(self.params.coe_param.__dict__) coe_param = feature_selection_meta_pb2.CoeffOfVarSelectionParam( **coe_param_dict) outlier_param_dict = copy.deepcopy(self.params.outlier_param.__dict__) outlier_param = feature_selection_meta_pb2.OutlierColsSelectionParam( **outlier_param_dict) meta_protobuf_obj = feature_selection_meta_pb2.FeatureSelectionMeta( filter_methods=self.filter_method, local_only=self.params.local_only, select_cols=self.header, unique_param=unique_param, iv_param=iv_param, coe_param=coe_param, outlier_param=outlier_param) buffer_type = "HeteroFeatureSelectionGuest.meta" model_manager.save_model(buffer_type=buffer_type, proto_buffer=meta_protobuf_obj, name=name, namespace=namespace) return buffer_type
def save_min_max_model(self, name, namespace): """ Save MinMaxScaler param and meta Parameters ---------- name: str, save DTable table_name namespace: str, DTable namespace Returns ---------- tuple, include meta string name and param string name """ meta_buffer_type = self._save_min_max_meta(name, namespace) cols_scale_value = self.cols_scale_res[0] scale_column_idx = self.cols_scale_res[1] min_max_scale_param_dict = {} if cols_scale_value is not None: for i in range(len(self.header)): if i in scale_column_idx: feat_lower = cols_scale_value[i][0] feat_upper = cols_scale_value[i][1] out_lower = cols_scale_value[i][2] out_upper = cols_scale_value[i][3] param_obj = feature_scale_param_pb2.MinMaxScaleParam(feat_upper=feat_upper, feat_lower=feat_lower, out_upper=out_upper, out_lower=out_lower) min_max_scale_param_dict[self.header[i]] = param_obj param_protobuf_obj = feature_scale_param_pb2.ScaleParam(minmax_scale_param=min_max_scale_param_dict) param_buffer_type = "{}.param".format(self.class_name) model_manager.save_model(buffer_type=param_buffer_type, proto_buffer=param_protobuf_obj, name=name, namespace=namespace) return [(meta_buffer_type, param_buffer_type)]
def save_model(self, name, namespace, binning_result=None, host_results=None): if binning_result is None: binning_result = self.binning_result if host_results is None: host_results = self.host_results meta_buffer_type = self._save_meta(name, namespace) iv_attrs = {} for col_name, iv_attr in binning_result.items(): iv_result = iv_attr.result_dict() iv_object = feature_binning_param_pb2.IVParam(**iv_result) iv_attrs[col_name] = iv_object binning_result_obj = feature_binning_param_pb2.FeatureBinningResult(binning_result=iv_attrs) final_host_results = {} for host_id, this_host_results in host_results.items(): host_result = {} for col_name, iv_attr in this_host_results.items(): iv_result = iv_attr.result_dict() iv_object = feature_binning_param_pb2.IVParam(**iv_result) host_result[col_name] = iv_object final_host_results[host_id] = feature_binning_param_pb2.FeatureBinningResult(binning_result=host_result) result_obj = feature_binning_param_pb2.FeatureBinningParam(binning_result=binning_result_obj, host_results=final_host_results) param_buffer_type = "HeteroFeatureBinning{}.param".format(self.party_name) model_manager.save_model(buffer_type=param_buffer_type, proto_buffer=result_obj, name=name, namespace=namespace) return [(meta_buffer_type, param_buffer_type)]