def feature_importance_converter(model_meta, model_param): # extract feature importance from model param fid_mapping = dict(model_param.feature_name_fid_mapping) feat_importance_list = list(model_param.feature_importances) fids = list(fid_mapping.keys()) cols_names, importance_val = [], [] for feat_importance in feat_importance_list: fid = feat_importance.fid importance = feat_importance.importance feature_name = fid_mapping[fid] cols_names.append(feature_name) importance_val.append(importance) for fid in fids: if fid_mapping[fid] not in cols_names: cols_names.append(fid_mapping[fid]) importance_val.append(0) single_info = isometric_model.SingleMetricInfo( values=np.array(importance_val), col_names=cols_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE, metric_info=single_info) return result
def convert(self, model_meta, model_param): values_dict = dict(model_param.binning_result.binning_result) values = [] col_names = [] for n, v in values_dict.items(): values.append(v.iv) col_names.append(n) host_results = list(model_param.host_results) LOGGER.debug( f"In binning adapter convert, host_results: {host_results}") host_party_ids = [int(x.party_id) for x in host_results] host_values = [] host_col_names = [] for host_obj in host_results: binning_result = dict(host_obj.binning_result) h_values = [] h_col_names = [] for n, v in binning_result.items(): h_values.append(v.iv) h_col_names.append(n) host_values.append(np.array(h_values)) host_col_names.append(h_col_names) LOGGER.debug( f"host_party_ids: {host_party_ids}, host_values: {host_values}," f"host_col_names: {host_col_names}") single_info = isometric_model.SingleMetricInfo( values=np.array(values), col_names=col_names, host_party_ids=host_party_ids, host_values=host_values, host_col_names=host_col_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.IV, metric_info=single_info) return result
def convert(self, model_meta, model_param): multi_class_result = model_param.multi_class_result has_remote_result = multi_class_result.has_host_result label_counts = len(list(multi_class_result.labels)) local_results = list(multi_class_result.results) host_results = list(multi_class_result.host_results) result = isometric_model.IsometricModel() for idx, lr in enumerate(local_results): if label_counts == 2: result.add_metric_value(metric_name=f"iv", metric_info=self._load_one_class( lr, host_results)) else: if has_remote_result: remote_results = [ hs for i, hs in enumerate(host_results) if (i % label_counts) == idx ] else: remote_results = [] result.add_metric_value(metric_name=f"iv", metric_info=self._load_one_class( lr, remote_results)) return result
def feature_importance_with_anonymous_converter(model_meta, model_param): # extract feature importance from model param fid_mapping = dict(model_param.feature_name_fid_mapping) feat_importance_list = list(model_param.feature_importances) guest_fids = list(fid_mapping.keys()) guest_cols, guest_val = [], [] # key is int party id, value is a dict, which has two key: col_name and value host_side_data = {} for feat_importance in feat_importance_list: fid = feat_importance.fid importance = feat_importance.importance site_name = feat_importance.sitename site_name = site_name.split(':') if site_name[0] == consts.HOST: host_id = int(site_name[1]) if host_id not in host_side_data: host_side_data[host_id] = {'col_name': [], 'value': []} host_col_name = generate_anonymous(fid, host_id, role=consts.HOST) host_side_data[host_id]['col_name'].append(host_col_name) host_side_data[host_id]['value'].append(importance) else: guest_cols.append(fid_mapping[fid]) guest_val.append(importance) for fid in guest_fids: if fid_mapping[fid] not in guest_cols: guest_cols.append(fid_mapping[fid]) guest_val.append(0) host_party_ids = [] host_values = [] host_col_names = [] for hid in host_side_data: host_party_ids.append(hid) host_values.append(host_side_data[hid]['value']) host_col_names.append(host_side_data[hid]['col_name']) single_info = isometric_model.SingleMetricInfo( values=np.array(guest_val), col_names=guest_cols, host_party_ids=host_party_ids, host_values=host_values, host_col_names=host_col_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE, metric_info=single_info) return result
def convert(self, model_meta, model_param): result = isometric_model.IsometricModel() self_values = model_param.self_values for value_obj in list(self_values.results): metric_name = value_obj.value_name values = list(value_obj.values) col_names = list(value_obj.col_names) if len(values) != len(col_names): raise ValueError( f"The length of values are not equal to the length" f" of col_names with metric_name: {metric_name}") metric_info = isometric_model.SingleMetricInfo(values, col_names) result.add_metric_value(metric_name, metric_info) return result
def convert(self, model_meta, model_param): psi_scores = dict(model_param.total_score) col_names, values = [], [] for name in psi_scores: col_names.append(name) values.append(psi_scores[name]) single_info = isometric_model.SingleMetricInfo(values=np.array(values), col_names=col_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.PSI, metric_info=single_info) return result
def feature_importance_with_anonymous_converter(model_meta, model_param): # extract feature importance from model param fid_mapping = dict(model_param.feature_name_fid_mapping) feat_importance_list = list(model_param.feature_importances) local_fids = list(fid_mapping.keys()) local_cols, local_val = [], [] # key is int party id, value is a dict, which has two key: col_name and value host_side_data = {} for feat_importance in feat_importance_list: fid = feat_importance.fid importance = feat_importance.importance site_name = feat_importance.sitename if site_name == consts.HOST_LOCAL: local_cols.append(fid_mapping[fid]) local_val.append(importance) else: site_name = site_name.split(':') if site_name[0] == consts.HOST: continue else: local_cols.append(fid_mapping[fid]) local_val.append(importance) for fid in local_fids: if fid_mapping[fid] not in local_cols: local_cols.append(fid_mapping[fid]) local_val.append(0) single_info = isometric_model.SingleMetricInfo(values=np.array(local_val), col_names=local_cols) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE, metric_info=single_info) return result
def convert(self, model_meta, model_param): local_vif = model_param.local_vif col_names = list(model_param.names) local_corr = np.array(model_param.local_corr).reshape( model_param.shape, model_param.shape) from federatedml.util import LOGGER for idx in range(local_corr.shape[0]): corr_col = local_corr[idx, :] # LOGGER.debug(f"local_col_idx: {idx}, corr_col: {corr_col}") if model_param.corr: corr = np.array(model_param.corr).reshape(*model_param.shapes) for idx in range(corr.shape[1]): corr_col = corr[:, idx] # LOGGER.debug(f"col_idx: {idx}, corr_col: {corr_col}") host_names = list(list(model_param.all_names)[1].names) parties = list(model_param.parties) else: corr = None host_names = None parties = None pearson_metric = PearsonMetricInfo(local_corr=local_corr, col_names=col_names, corr=corr, host_col_names=host_names, parties=parties) single_info = isometric_model.SingleMetricInfo(values=local_vif, col_names=col_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.VIF, metric_info=single_info) result.add_metric_value(metric_name=consts.PEARSON, metric_info=pearson_metric) return result