Exemple #1
0
    def filter_one_method(self, data_instances, method):

        if method == consts.IV_VALUE_THRES:
            LOGGER.debug("In host party, sending select_cols")
            self._send_select_cols(consts.IV_VALUE_THRES)
            self._received_result_cols(filter_name=consts.IV_VALUE_THRES)
            LOGGER.info(
                "[Result][FeatureSelection][Host]Finish iv value threshold filter. Current left cols are: {}"
                .format(self.left_cols))

        if method == consts.IV_PERCENTILE:
            self._send_select_cols(consts.IV_PERCENTILE)
            self._received_result_cols(filter_name=consts.IV_PERCENTILE)
            LOGGER.info(
                "[Result][FeatureSelection][Host]Finish iv percentile filter. Current left cols are: {}"
                .format(self.left_cols))

        if method == consts.COEFFICIENT_OF_VARIATION_VALUE_THRES:
            variance_coe_param = self.model_param.variance_coe_param
            coe_filter = feature_selection.CoeffOfVarValueFilter(
                variance_coe_param, self.cols, self.static_obj)
            new_left_cols = coe_filter.fit(data_instances)
            self._renew_final_left_cols(new_left_cols)

            self.static_obj = coe_filter.statics_obj
            self.variance_coe_meta = coe_filter.get_meta_obj()
            self.results.append(coe_filter.get_param_obj())
            LOGGER.debug(
                "[Result][FeatureSelection][Host]Finish coeffiecient_of_variation value threshold filter."
                " Current left cols are: {}".format(self.left_cols))

        if method == consts.UNIQUE_VALUE:
            unique_param = self.model_param.unique_param
            unique_filter = feature_selection.UniqueValueFilter(
                unique_param, self.cols, self.static_obj)
            new_left_cols = unique_filter.fit(data_instances)
            self._renew_final_left_cols(new_left_cols)

            self.static_obj = unique_filter.statics_obj
            self.unique_meta = unique_filter.get_meta_obj()
            self.results.append(unique_filter.get_param_obj())
            # self._renew_left_col_names()
            LOGGER.info(
                "[Result][FeatureSelection][Host]Finish unique value filter. Current left cols are: {}"
                .format(self.left_cols))

        if method == consts.OUTLIER_COLS:
            outlier_param = self.model_param.outlier_param
            outlier_filter = feature_selection.OutlierFilter(
                outlier_param, self.cols)
            new_left_cols = outlier_filter.fit(data_instances)
            self._renew_final_left_cols(new_left_cols)

            self.outlier_meta = outlier_filter.get_meta_obj()
            self.results.append(outlier_filter.get_param_obj())
            # self._renew_left_col_names()
            LOGGER.info(
                "[Result][FeatureSelection][Host]Finish outlier cols filter. Current left cols are: {}"
                .format(self.left_cols))
    def filter_one_method(self, data_instances, method):

        if method == consts.IV_VALUE_THRES:
            iv_param = self.params.iv_value_param
            iv_filter = feature_selection.IVValueSelectFilter(
                iv_param, self.left_col_names, self.binning_model)
            new_left_cols = iv_filter.fit(data_instances)

            self.left_cols = new_left_cols

            if not self.local_only:
                host_select_cols = self._get_host_select_cols(
                    consts.IV_VALUE_THRES)
                host_left_cols = iv_filter.host_cols
                # Only one host
                left_cols = host_left_cols.get(consts.HOST)
                new_result = {}
                for host_col_name in host_select_cols:
                    if host_col_name not in left_cols:
                        LOGGER.warning(
                            "Host column {} has not been set in feature binning module"
                            .format(host_col_name))
                        continue
                    is_left = left_cols.get(host_col_name)
                    new_result[host_col_name] = is_left
                self.host_left_cols = new_result
                self._send_host_result_cols(consts.IV_VALUE_THRES)
                LOGGER.info(
                    "[Result][FeatureSelection][Guest] Finish iv value threshold filter. Host left cols are: {}"
                    .format(self.host_left_cols))

            iv_filter.display_feature_result(self.party_name)
            LOGGER.info(
                "[Result][FeatureSelection][Guest] Finish iv value threshold filter. Self left cols are: {}"
                .format(self.left_cols))
            self.iv_value_meta = iv_filter.get_meta_obj()
            self.results.append(iv_filter.get_param_obj())
            self._renew_left_col_names()

        if method == consts.IV_PERCENTILE:

            iv_param = self.params.iv_percentile_param
            if self.local_only:
                iv_filter = feature_selection.IVPercentileFilter(
                    iv_param, self.left_col_names, {}, self.binning_model)
                self.left_cols = iv_filter.fit(data_instances)

            else:
                host_select_cols = self._get_host_select_cols(
                    consts.IV_PERCENTILE)
                host_cols = {consts.HOST: host_select_cols}
                iv_filter = feature_selection.IVPercentileFilter(
                    iv_param, self.left_col_names, host_cols,
                    self.binning_model)
                self.left_cols = iv_filter.fit(data_instances)

                host_left_cols = iv_filter.host_cols
                # Only one host
                left_cols = host_left_cols.get(consts.HOST)
                new_result = {}
                for host_col_name in host_select_cols:
                    if host_col_name not in left_cols:
                        LOGGER.warning(
                            "Host column {} has not been set in feature binning module"
                            .format(host_col_name))
                        continue
                    is_left = left_cols.get(host_col_name)
                    new_result[host_col_name] = is_left
                self.host_left_cols = new_result
                self._send_host_result_cols(consts.IV_PERCENTILE)
                LOGGER.info(
                    "[Result][FeatureSelection][Host]Finish iv value threshold filter. Host left cols are: {}"
                    .format(self.host_left_cols))

            iv_filter.display_feature_result(self.party_name)
            LOGGER.info(
                "[Result][FeatureSelection][Guest]Finish iv value threshold filter. Self left cols are: {}"
                .format(self.left_cols))
            self.iv_percentile_meta = iv_filter.get_meta_obj()
            self.results.append(iv_filter.get_param_obj())
            self._renew_left_col_names()

        if method == consts.COEFFICIENT_OF_VARIATION_VALUE_THRES:
            coe_param = self.params.coe_param
            coe_filter = feature_selection.CoeffOfVarValueFilter(
                coe_param, self.left_col_names, self.static_obj)
            self.left_cols = coe_filter.fit(data_instances)
            self.static_obj = coe_filter.statics_obj
            self.coe_meta = coe_filter.get_meta_obj()
            self.results.append(coe_filter.get_param_obj())
            self._renew_left_col_names()
            coe_filter.display_feature_result(self.party_name)
            LOGGER.info(
                "[Result][FeatureSelection][Guest] Finish coefficient threshold filter. Self left cols are: {}"
                .format(self.left_cols))

        if method == consts.UNIQUE_VALUE:
            unique_param = self.params.unique_param
            unique_filter = feature_selection.UniqueValueFilter(
                unique_param, self.left_col_names, self.static_obj)
            self.left_cols = unique_filter.fit(data_instances)
            self.static_obj = unique_filter.statics_obj
            self.unique_meta = unique_filter.get_meta_obj()
            self.results.append(unique_filter.get_param_obj())
            self._renew_left_col_names()
            unique_filter.display_feature_result(self.party_name)
            LOGGER.info(
                "[Result][FeatureSelection][Guest]Finish unique value filter. Current left cols are: {}"
                .format(self.left_cols))

        if method == consts.OUTLIER_COLS:
            outlier_param = self.params.outlier_param
            outlier_filter = feature_selection.OutlierFilter(
                outlier_param, self.left_col_names)
            self.left_cols = outlier_filter.fit(data_instances)
            self.outlier_meta = outlier_filter.get_meta_obj()
            self.results.append(outlier_filter.get_param_obj())
            self._renew_left_col_names()
            outlier_filter.display_feature_result(self.party_name)
            LOGGER.info(
                "[Result][FeatureSelection][Guest]Finish outlier cols filter. Current left cols are: {}"
                .format(self.left_cols))
Exemple #3
0
    def filter_one_method(self, data_instances, method):

        if method == consts.IV_VALUE_THRES:
            self._calculates_iv_attrs(data_instances,
                                      flowid_postfix='iv_value')
            iv_param = self.params.iv_param
            iv_filter = feature_selection.IVValueSelectFilter(
                iv_param, self.left_cols, self.guest_iv_attrs)
            new_left_cols = iv_filter.filter()

            self.results.append(iv_filter.to_result())

            # Renew current left cols and iv_attrs
            new_iv_list = self._renew_iv_attrs(new_left_cols, self.left_cols,
                                               self.guest_iv_attrs)
            self.guest_iv_attrs = new_iv_list
            self.left_cols = new_left_cols

            if not self.local_only:
                self._filter_host_iv_value()
            LOGGER.info(
                "Finish iv value threshold filter. Current left cols are: {}".
                format(self.left_cols))

        if method == consts.IV_PERCENTILE:

            self._calculates_iv_attrs(data_instances,
                                      flowid_postfix='iv_percentile')
            iv_param = self.params.iv_param
            iv_filter = feature_selection.IVPercentileFilter(iv_param)
            iv_filter.add_attrs(self.guest_iv_attrs, self.left_cols)
            if not self.local_only:
                iv_filter.add_attrs(self.host_iv_attrs, self.host_left_cols)
            left_cols = iv_filter.filter_multiple_parties()
            new_left_cols = left_cols[0]
            self.results.append(iv_filter.to_result())

            # Renew current left cols and iv_attrs
            new_iv_list = self._renew_iv_attrs(new_left_cols, self.left_cols,
                                               self.guest_iv_attrs)
            self.guest_iv_attrs = new_iv_list
            self.left_cols = new_left_cols

            # If host has participated, send result to host
            if len(left_cols) > 1:
                new_host_left_cols = left_cols[1]
                new_host_iv_list = self._renew_iv_attrs(
                    new_host_left_cols, self.host_left_cols,
                    self.host_iv_attrs)
                self.host_iv_attrs = new_host_iv_list
                self.host_left_cols = new_host_left_cols
                self._send_host_result_cols()
            LOGGER.info(
                "Finish iv percentile filter. Current left cols are: {}".
                format(self.left_cols))

        if method == consts.COEFFICIENT_OF_VARIATION_VALUE_THRES:
            coe_param = self.params.coe_param
            coe_filter = feature_selection.CoeffOfVarValueFilter(
                coe_param, self.left_cols, self.static_obj)
            self.left_cols = coe_filter.filter(data_instances)
            self.static_obj = coe_filter.statics_obj
            self.results.append(coe_filter.to_result())

            LOGGER.info(
                "Finish coeffiecient_of_variation value threshold filter. Current left cols are: {}"
                .format(self.left_cols))

        if method == consts.UNIQUE_VALUE:
            unique_param = self.params.unique_param
            unique_filter = feature_selection.UniqueValueFilter(
                unique_param, self.left_cols, self.static_obj)
            self.left_cols = unique_filter.filter(data_instances)
            self.static_obj = unique_filter.statics_obj
            self.results.append(unique_filter.to_result())

            LOGGER.info(
                "Finish unique value filter. Current left cols are: {}".format(
                    self.left_cols))

        if method == consts.OUTLIER_COLS:
            outlier_param = self.params.outlier_param
            outlier_filter = feature_selection.OutlierFilter(
                outlier_param, self.left_cols)
            self.left_cols = outlier_filter.filter(data_instances)
            self.results.append(outlier_filter.to_result())
            LOGGER.info(
                "Finish outlier cols filter. Current left cols are: {}".format(
                    self.left_cols))
    def filter_one_method(self, data_instances, method):

        if method == consts.IV_VALUE_THRES:
            iv_param = self.model_param.iv_value_param

            if not self.local_only:
                host_select_cols = self._get_host_select_cols(
                    consts.IV_VALUE_THRES)
                LOGGER.debug("In iv value filter, host_select_cols: {}".format(
                    host_select_cols))
                iv_filter = feature_selection.IVValueSelectFilter(
                    iv_param,
                    self.filter_result.this_to_select_cols_index,
                    self.binning_model,
                    host_select_cols=host_select_cols)
                new_left_cols = iv_filter.fit(data_instances, fit_host=True)
                # self._renew_final_left_cols(new_left_cols)
                self.filter_result.add_left_col_index(new_left_cols)

                host_left_cols = iv_filter.host_cols
                left_cols = host_left_cols.get(consts.HOST)

                left_cols = {int(k): v for k, v in left_cols.items()}
                self.host_filter_result.add_left_cols(left_cols)
                LOGGER.debug(
                    "In Guest IV filter, host_select_cols: {}, host_left_cols: {}"
                    .format(host_select_cols, host_left_cols))
                # new_result = {}
                # for host_col_idx, _ in host_select_cols.items():
                #     host_col_idx = int(host_col_idx)
                #     is_left = left_cols.get(host_col_idx)
                #     new_result[host_col_idx] = is_left
                # self.host_left_cols = new_result
                # self._add_host_left_cols(self.host_left_cols)
                self._send_host_result_cols(consts.IV_VALUE_THRES)
                LOGGER.debug(
                    "[Result][FeatureSelection][Guest] Finish iv value threshold filter. Host left cols are: {}"
                    .format(self.host_filter_result.get_left_cols()))

            else:
                iv_filter = feature_selection.IVValueSelectFilter(
                    iv_param, self.filter_result.this_to_select_cols_index,
                    self.binning_model)
                new_left_cols = iv_filter.fit(data_instances)
                # self._renew_final_left_cols(new_left_cols)
                self.filter_result.add_left_col_index(new_left_cols)

            LOGGER.debug(
                "[Result][FeatureSelection][Guest] Finish iv value threshold filter. Self left cols are: {}"
                .format(self.filter_result.get_left_cols()))
            self.iv_value_meta = iv_filter.get_meta_obj()
            self.results.append(iv_filter.get_param_obj())
            # self._renew_left_col_names()

        if method == consts.IV_PERCENTILE:

            iv_param = self.model_param.iv_percentile_param
            if self.local_only:
                iv_filter = feature_selection.IVPercentileFilter(
                    iv_param, self.filter_result.this_to_select_cols_index, {},
                    self.binning_model)
                new_left_cols = iv_filter.fit(data_instances)
                # self._renew_final_left_cols(new_left_cols)
                self.filter_result.add_left_col_index(new_left_cols)

            else:
                host_select_cols = self._get_host_select_cols(
                    consts.IV_PERCENTILE)
                host_cols = {consts.HOST: host_select_cols}

                iv_filter = feature_selection.IVPercentileFilter(
                    iv_param, self.filter_result.this_to_select_cols_index,
                    host_cols, self.binning_model)
                new_left_cols = iv_filter.fit(data_instances)
                # self._renew_final_left_cols(new_left_cols)
                self.filter_result.add_left_col_index(new_left_cols)

                host_left_cols = iv_filter.host_cols
                # Only one host
                left_col_index = host_left_cols.get(consts.HOST)
                self.host_filter_result.add_left_cols(left_col_index)

                self._send_host_result_cols(consts.IV_PERCENTILE)
                LOGGER.info(
                    "[Result][FeatureSelection][Host]Finish iv percentile threshold filter. "
                    "Host left cols are: {}".format(
                        self.host_filter_result.get_left_cols()))

            LOGGER.debug(
                "[Result][FeatureSelection][Guest]Finish iv percentile threshold filter. Self left cols are: {}"
                .format(self.filter_result.get_left_cols()))
            self.iv_percentile_meta = iv_filter.get_meta_obj()
            self.results.append(iv_filter.get_param_obj())
            # self._renew_left_col_names()

        if method == consts.COEFFICIENT_OF_VARIATION_VALUE_THRES:
            variance_coe_param = self.model_param.variance_coe_param
            coe_filter = feature_selection.CoeffOfVarValueFilter(
                variance_coe_param,
                self.filter_result.this_to_select_cols_index, self.static_obj)
            new_left_cols = coe_filter.fit(data_instances)
            # self._renew_final_left_cols(new_left_cols)
            self.filter_result.add_left_col_index(new_left_cols)
            self.static_obj = coe_filter.statics_obj

            LOGGER.info(
                "[Result][FeatureSelection][Guest]Finish coefficient of variance filter. Self left cols are: {}"
                .format(self.filter_result.get_left_cols()))

            self.variance_coe_meta = coe_filter.get_meta_obj()
            self.results.append(coe_filter.get_param_obj())
            # self._renew_left_col_names()

        if method == consts.UNIQUE_VALUE:
            unique_param = self.model_param.unique_param
            unique_filter = feature_selection.UniqueValueFilter(
                unique_param, self.filter_result.this_to_select_cols_index,
                self.static_obj)
            new_left_cols = unique_filter.fit(data_instances)
            # self._renew_final_left_cols(new_left_cols)
            self.filter_result.add_left_col_index(new_left_cols)

            self.static_obj = unique_filter.statics_obj
            self.unique_meta = unique_filter.get_meta_obj()
            self.results.append(unique_filter.get_param_obj())

            LOGGER.info(
                "[Result][FeatureSelection][Guest]Finish unique value filter. Current left cols are: {}"
                .format(self.filter_result.get_left_cols()))

        if method == consts.OUTLIER_COLS:
            outlier_param = self.model_param.outlier_param
            outlier_filter = feature_selection.OutlierFilter(
                outlier_param, self.filter_result.this_to_select_cols_index)
            new_left_cols = outlier_filter.fit(data_instances)
            # self._renew_final_left_cols(new_left_cols)
            self.filter_result.add_left_col_index(new_left_cols)

            self.outlier_meta = outlier_filter.get_meta_obj()
            self.results.append(outlier_filter.get_param_obj())
            LOGGER.info(
                "[Result][FeatureSelection][Guest]Finish outlier filter. Self left cols are: {}"
                .format(self.filter_result.get_left_cols()))