def feature_importance_converter(model_meta, model_param): # extract feature importance from model param fid_mapping = dict(model_param.feature_name_fid_mapping) feat_importance_list = list(model_param.feature_importances) fids = list(fid_mapping.keys()) cols_names, importance_val = [], [] for feat_importance in feat_importance_list: fid = feat_importance.fid importance = feat_importance.importance feature_name = fid_mapping[fid] cols_names.append(feature_name) importance_val.append(importance) for fid in fids: if fid_mapping[fid] not in cols_names: cols_names.append(fid_mapping[fid]) importance_val.append(0) single_info = isometric_model.SingleMetricInfo( values=np.array(importance_val), col_names=cols_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE, metric_info=single_info) return result
def convert(self, model_meta, model_param): values_dict = dict(model_param.binning_result.binning_result) values = [] col_names = [] for n, v in values_dict.items(): values.append(v.iv) col_names.append(n) host_results = list(model_param.host_results) LOGGER.debug( f"In binning adapter convert, host_results: {host_results}") host_party_ids = [int(x.party_id) for x in host_results] host_values = [] host_col_names = [] for host_obj in host_results: binning_result = dict(host_obj.binning_result) h_values = [] h_col_names = [] for n, v in binning_result.items(): h_values.append(v.iv) h_col_names.append(n) host_values.append(np.array(h_values)) host_col_names.append(h_col_names) LOGGER.debug( f"host_party_ids: {host_party_ids}, host_values: {host_values}," f"host_col_names: {host_col_names}") single_info = isometric_model.SingleMetricInfo( values=np.array(values), col_names=col_names, host_party_ids=host_party_ids, host_values=host_values, host_col_names=host_col_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.IV, metric_info=single_info) return result
def _load_one_class(self, local_result, remote_results): values_dict = dict(local_result.binning_result) values_sorted_dict = sorted(values_dict.items(), key=operator.itemgetter(0)) values = [] col_names = [] for n, v in values_sorted_dict: values.append(v.iv) col_names.append(n) # LOGGER.debug(f"When loading iv, values: {values}, col_names: {col_names}") host_party_ids = [int(x.party_id) for x in remote_results] host_values = [] host_col_names = [] for host_obj in remote_results: binning_result = dict(host_obj.binning_result) h_values = [] h_col_names = [] for n, v in binning_result.items(): h_values.append(v.iv) h_col_names.append(n) host_values.append(np.array(h_values)) host_col_names.append(h_col_names) # LOGGER.debug(f"host_party_ids: {host_party_ids}, host_values: {host_values}," # f"host_col_names: {host_col_names}") LOGGER.debug(f"host_party_ids: {host_party_ids}") single_info = isometric_model.SingleMetricInfo( values=np.array(values), col_names=col_names, host_party_ids=host_party_ids, host_values=host_values, host_col_names=host_col_names) return single_info
def feature_importance_with_anonymous_converter(model_meta, model_param): # extract feature importance from model param fid_mapping = dict(model_param.feature_name_fid_mapping) feat_importance_list = list(model_param.feature_importances) guest_fids = list(fid_mapping.keys()) guest_cols, guest_val = [], [] # key is int party id, value is a dict, which has two key: col_name and value host_side_data = {} for feat_importance in feat_importance_list: fid = feat_importance.fid importance = feat_importance.importance site_name = feat_importance.sitename site_name = site_name.split(':') if site_name[0] == consts.HOST: host_id = int(site_name[1]) if host_id not in host_side_data: host_side_data[host_id] = {'col_name': [], 'value': []} host_col_name = generate_anonymous(fid, host_id, role=consts.HOST) host_side_data[host_id]['col_name'].append(host_col_name) host_side_data[host_id]['value'].append(importance) else: guest_cols.append(fid_mapping[fid]) guest_val.append(importance) for fid in guest_fids: if fid_mapping[fid] not in guest_cols: guest_cols.append(fid_mapping[fid]) guest_val.append(0) host_party_ids = [] host_values = [] host_col_names = [] for hid in host_side_data: host_party_ids.append(hid) host_values.append(host_side_data[hid]['value']) host_col_names.append(host_side_data[hid]['col_name']) single_info = isometric_model.SingleMetricInfo( values=np.array(guest_val), col_names=guest_cols, host_party_ids=host_party_ids, host_values=host_values, host_col_names=host_col_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE, metric_info=single_info) return result
def convert(self, model_meta, model_param): result = isometric_model.IsometricModel() self_values = model_param.self_values for value_obj in list(self_values.results): metric_name = value_obj.value_name values = list(value_obj.values) col_names = list(value_obj.col_names) if len(values) != len(col_names): raise ValueError( f"The length of values are not equal to the length" f" of col_names with metric_name: {metric_name}") metric_info = isometric_model.SingleMetricInfo(values, col_names) result.add_metric_value(metric_name, metric_info) return result
def convert(self, model_meta, model_param): psi_scores = dict(model_param.total_score) col_names, values = [], [] for name in psi_scores: col_names.append(name) values.append(psi_scores[name]) single_info = isometric_model.SingleMetricInfo(values=np.array(values), col_names=col_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.PSI, metric_info=single_info) return result
def _merge_iv(self): metric_infos = self.iso_model.get_all_metric_info() col_names = metric_infos[0].col_names host_party_ids = metric_infos[0].host_party_ids host_col_names = metric_infos[0].host_col_names values = metric_infos[0].values host_values = np.array(metric_infos[0].host_values) if self.merge_type == "max": for m in metric_infos[1:]: values = np.maximum(values, m.values) host_values = np.maximum(host_values, m.host_values) elif self.merge_type == "min": for m in metric_infos[1:]: values = np.maximum(values, m.values) host_values = np.maximum(host_values, m.host_values) else: for m in metric_infos[1:]: values += m.values host_values += m.host_values """for m in metric_infos[1:]: if self.merge_type == "max": values = np.maximum(values, m.values) host_values = np.maximum(host_values, m.host_values) elif self.merge_type == "min": values = np.minimum(values, m.values) host_values = np.minimum(host_values, m.host_values) else: values += m.values host_values += m.host_values """ if self.merge_type == 'average': values /= len(metric_infos) host_values /= len(metric_infos) # LOGGER.debug(f"After merge, iv_values: {values}, host_values: {host_values}," # f" merge_type:{self.merge_type}") single_info = isometric_model.SingleMetricInfo( values=values, col_names=col_names, host_party_ids=host_party_ids, host_values=host_values, host_col_names=host_col_names) return single_info
def feature_importance_with_anonymous_converter(model_meta, model_param): # extract feature importance from model param fid_mapping = dict(model_param.feature_name_fid_mapping) feat_importance_list = list(model_param.feature_importances) local_fids = list(fid_mapping.keys()) local_cols, local_val = [], [] # key is int party id, value is a dict, which has two key: col_name and value host_side_data = {} for feat_importance in feat_importance_list: fid = feat_importance.fid importance = feat_importance.importance site_name = feat_importance.sitename if site_name == consts.HOST_LOCAL: local_cols.append(fid_mapping[fid]) local_val.append(importance) else: site_name = site_name.split(':') if site_name[0] == consts.HOST: continue else: local_cols.append(fid_mapping[fid]) local_val.append(importance) for fid in local_fids: if fid_mapping[fid] not in local_cols: local_cols.append(fid_mapping[fid]) local_val.append(0) single_info = isometric_model.SingleMetricInfo(values=np.array(local_val), col_names=local_cols) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE, metric_info=single_info) return result
def convert(self, model_meta, model_param): local_vif = model_param.local_vif col_names = list(model_param.names) local_corr = np.array(model_param.local_corr).reshape( model_param.shape, model_param.shape) from federatedml.util import LOGGER for idx in range(local_corr.shape[0]): corr_col = local_corr[idx, :] # LOGGER.debug(f"local_col_idx: {idx}, corr_col: {corr_col}") if model_param.corr: corr = np.array(model_param.corr).reshape(*model_param.shapes) for idx in range(corr.shape[1]): corr_col = corr[:, idx] # LOGGER.debug(f"col_idx: {idx}, corr_col: {corr_col}") host_names = list(list(model_param.all_names)[1].names) parties = list(model_param.parties) else: corr = None host_names = None parties = None pearson_metric = PearsonMetricInfo(local_corr=local_corr, col_names=col_names, corr=corr, host_col_names=host_names, parties=parties) single_info = isometric_model.SingleMetricInfo(values=local_vif, col_names=col_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.VIF, metric_info=single_info) result.add_metric_value(metric_name=consts.PEARSON, metric_info=pearson_metric) return result