Ejemplo n.º 1
0
def feature_importance_converter(model_meta, model_param):

    # extract feature importance from model param
    fid_mapping = dict(model_param.feature_name_fid_mapping)
    feat_importance_list = list(model_param.feature_importances)
    fids = list(fid_mapping.keys())

    cols_names, importance_val = [], []

    for feat_importance in feat_importance_list:
        fid = feat_importance.fid
        importance = feat_importance.importance
        feature_name = fid_mapping[fid]
        cols_names.append(feature_name)
        importance_val.append(importance)

    for fid in fids:
        if fid_mapping[fid] not in cols_names:
            cols_names.append(fid_mapping[fid])
            importance_val.append(0)

    single_info = isometric_model.SingleMetricInfo(
        values=np.array(importance_val), col_names=cols_names)
    result = isometric_model.IsometricModel()
    result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE,
                            metric_info=single_info)

    return result
Ejemplo n.º 2
0
 def convert(self, model_meta, model_param):
     values_dict = dict(model_param.binning_result.binning_result)
     values = []
     col_names = []
     for n, v in values_dict.items():
         values.append(v.iv)
         col_names.append(n)
     host_results = list(model_param.host_results)
     LOGGER.debug(
         f"In binning adapter convert, host_results: {host_results}")
     host_party_ids = [int(x.party_id) for x in host_results]
     host_values = []
     host_col_names = []
     for host_obj in host_results:
         binning_result = dict(host_obj.binning_result)
         h_values = []
         h_col_names = []
         for n, v in binning_result.items():
             h_values.append(v.iv)
             h_col_names.append(n)
         host_values.append(np.array(h_values))
         host_col_names.append(h_col_names)
     LOGGER.debug(
         f"host_party_ids: {host_party_ids}, host_values: {host_values},"
         f"host_col_names: {host_col_names}")
     single_info = isometric_model.SingleMetricInfo(
         values=np.array(values),
         col_names=col_names,
         host_party_ids=host_party_ids,
         host_values=host_values,
         host_col_names=host_col_names)
     result = isometric_model.IsometricModel()
     result.add_metric_value(metric_name=consts.IV, metric_info=single_info)
     return result
Ejemplo n.º 3
0
 def _load_one_class(self, local_result, remote_results):
     values_dict = dict(local_result.binning_result)
     values_sorted_dict = sorted(values_dict.items(),
                                 key=operator.itemgetter(0))
     values = []
     col_names = []
     for n, v in values_sorted_dict:
         values.append(v.iv)
         col_names.append(n)
     # LOGGER.debug(f"When loading iv, values: {values}, col_names: {col_names}")
     host_party_ids = [int(x.party_id) for x in remote_results]
     host_values = []
     host_col_names = []
     for host_obj in remote_results:
         binning_result = dict(host_obj.binning_result)
         h_values = []
         h_col_names = []
         for n, v in binning_result.items():
             h_values.append(v.iv)
             h_col_names.append(n)
         host_values.append(np.array(h_values))
         host_col_names.append(h_col_names)
     # LOGGER.debug(f"host_party_ids: {host_party_ids}, host_values: {host_values},"
     #             f"host_col_names: {host_col_names}")
     LOGGER.debug(f"host_party_ids: {host_party_ids}")
     single_info = isometric_model.SingleMetricInfo(
         values=np.array(values),
         col_names=col_names,
         host_party_ids=host_party_ids,
         host_values=host_values,
         host_col_names=host_col_names)
     return single_info
Ejemplo n.º 4
0
def feature_importance_with_anonymous_converter(model_meta, model_param):

    # extract feature importance from model param

    fid_mapping = dict(model_param.feature_name_fid_mapping)
    feat_importance_list = list(model_param.feature_importances)
    guest_fids = list(fid_mapping.keys())
    guest_cols, guest_val = [], []

    # key is int party id, value is a dict, which has two key: col_name and value
    host_side_data = {}

    for feat_importance in feat_importance_list:
        fid = feat_importance.fid
        importance = feat_importance.importance
        site_name = feat_importance.sitename
        site_name = site_name.split(':')
        if site_name[0] == consts.HOST:
            host_id = int(site_name[1])
            if host_id not in host_side_data:
                host_side_data[host_id] = {'col_name': [], 'value': []}
            host_col_name = generate_anonymous(fid, host_id, role=consts.HOST)
            host_side_data[host_id]['col_name'].append(host_col_name)
            host_side_data[host_id]['value'].append(importance)
        else:
            guest_cols.append(fid_mapping[fid])
            guest_val.append(importance)

    for fid in guest_fids:
        if fid_mapping[fid] not in guest_cols:
            guest_cols.append(fid_mapping[fid])
            guest_val.append(0)

    host_party_ids = []
    host_values = []
    host_col_names = []
    for hid in host_side_data:
        host_party_ids.append(hid)
        host_values.append(host_side_data[hid]['value'])
        host_col_names.append(host_side_data[hid]['col_name'])

    single_info = isometric_model.SingleMetricInfo(
        values=np.array(guest_val),
        col_names=guest_cols,
        host_party_ids=host_party_ids,
        host_values=host_values,
        host_col_names=host_col_names)
    result = isometric_model.IsometricModel()
    result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE,
                            metric_info=single_info)
    return result
Ejemplo n.º 5
0
    def convert(self, model_meta, model_param):
        result = isometric_model.IsometricModel()
        self_values = model_param.self_values
        for value_obj in list(self_values.results):
            metric_name = value_obj.value_name
            values = list(value_obj.values)
            col_names = list(value_obj.col_names)
            if len(values) != len(col_names):
                raise ValueError(
                    f"The length of values are not equal to the length"
                    f" of col_names with metric_name: {metric_name}")
            metric_info = isometric_model.SingleMetricInfo(values, col_names)

            result.add_metric_value(metric_name, metric_info)
        return result
Ejemplo n.º 6
0
    def convert(self, model_meta, model_param):

        psi_scores = dict(model_param.total_score)

        col_names, values = [], []
        for name in psi_scores:
            col_names.append(name)
            values.append(psi_scores[name])

        single_info = isometric_model.SingleMetricInfo(values=np.array(values),
                                                       col_names=col_names)
        result = isometric_model.IsometricModel()
        result.add_metric_value(metric_name=consts.PSI,
                                metric_info=single_info)
        return result
Ejemplo n.º 7
0
    def _merge_iv(self):
        metric_infos = self.iso_model.get_all_metric_info()
        col_names = metric_infos[0].col_names
        host_party_ids = metric_infos[0].host_party_ids
        host_col_names = metric_infos[0].host_col_names

        values = metric_infos[0].values
        host_values = np.array(metric_infos[0].host_values)
        if self.merge_type == "max":
            for m in metric_infos[1:]:
                values = np.maximum(values, m.values)
                host_values = np.maximum(host_values, m.host_values)
        elif self.merge_type == "min":
            for m in metric_infos[1:]:
                values = np.maximum(values, m.values)
                host_values = np.maximum(host_values, m.host_values)
        else:
            for m in metric_infos[1:]:
                values += m.values
                host_values += m.host_values
        """for m in metric_infos[1:]:
            if self.merge_type == "max":
                values = np.maximum(values, m.values)
                host_values = np.maximum(host_values, m.host_values)
            elif self.merge_type == "min":
                values = np.minimum(values, m.values)
                host_values = np.minimum(host_values, m.host_values)
            else:
                values += m.values
                host_values += m.host_values
        """
        if self.merge_type == 'average':
            values /= len(metric_infos)
            host_values /= len(metric_infos)
        # LOGGER.debug(f"After merge, iv_values: {values}, host_values: {host_values},"
        #              f" merge_type:{self.merge_type}")
        single_info = isometric_model.SingleMetricInfo(
            values=values,
            col_names=col_names,
            host_party_ids=host_party_ids,
            host_values=host_values,
            host_col_names=host_col_names)
        return single_info
Ejemplo n.º 8
0
def feature_importance_with_anonymous_converter(model_meta, model_param):

    # extract feature importance from model param

    fid_mapping = dict(model_param.feature_name_fid_mapping)
    feat_importance_list = list(model_param.feature_importances)
    local_fids = list(fid_mapping.keys())
    local_cols, local_val = [], []

    # key is int party id, value is a dict, which has two key: col_name and value
    host_side_data = {}

    for feat_importance in feat_importance_list:
        fid = feat_importance.fid
        importance = feat_importance.importance
        site_name = feat_importance.sitename
        if site_name == consts.HOST_LOCAL:
            local_cols.append(fid_mapping[fid])
            local_val.append(importance)
        else:
            site_name = site_name.split(':')
            if site_name[0] == consts.HOST:
                continue
            else:
                local_cols.append(fid_mapping[fid])
                local_val.append(importance)

    for fid in local_fids:
        if fid_mapping[fid] not in local_cols:
            local_cols.append(fid_mapping[fid])
            local_val.append(0)

    single_info = isometric_model.SingleMetricInfo(values=np.array(local_val),
                                                   col_names=local_cols)
    result = isometric_model.IsometricModel()
    result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE,
                            metric_info=single_info)
    return result
Ejemplo n.º 9
0
    def convert(self, model_meta, model_param):
        local_vif = model_param.local_vif
        col_names = list(model_param.names)
        local_corr = np.array(model_param.local_corr).reshape(
            model_param.shape, model_param.shape)

        from federatedml.util import LOGGER
        for idx in range(local_corr.shape[0]):
            corr_col = local_corr[idx, :]
            # LOGGER.debug(f"local_col_idx: {idx}, corr_col: {corr_col}")

        if model_param.corr:
            corr = np.array(model_param.corr).reshape(*model_param.shapes)

            for idx in range(corr.shape[1]):
                corr_col = corr[:, idx]
                # LOGGER.debug(f"col_idx: {idx}, corr_col: {corr_col}")

            host_names = list(list(model_param.all_names)[1].names)
            parties = list(model_param.parties)
        else:
            corr = None
            host_names = None
            parties = None
        pearson_metric = PearsonMetricInfo(local_corr=local_corr,
                                           col_names=col_names,
                                           corr=corr,
                                           host_col_names=host_names,
                                           parties=parties)

        single_info = isometric_model.SingleMetricInfo(values=local_vif,
                                                       col_names=col_names)
        result = isometric_model.IsometricModel()
        result.add_metric_value(metric_name=consts.VIF,
                                metric_info=single_info)
        result.add_metric_value(metric_name=consts.PEARSON,
                                metric_info=pearson_metric)
        return result