def filter_one_method(self, data_instances, method): if method == consts.IV_VALUE_THRES: LOGGER.debug("In host party, sending select_cols") self._send_select_cols(consts.IV_VALUE_THRES) self._received_result_cols(filter_name=consts.IV_VALUE_THRES) LOGGER.info( "[Result][FeatureSelection][Host]Finish iv value threshold filter. Current left cols are: {}" .format(self.left_cols)) if method == consts.IV_PERCENTILE: self._send_select_cols(consts.IV_PERCENTILE) self._received_result_cols(filter_name=consts.IV_PERCENTILE) LOGGER.info( "[Result][FeatureSelection][Host]Finish iv percentile filter. Current left cols are: {}" .format(self.left_cols)) if method == consts.COEFFICIENT_OF_VARIATION_VALUE_THRES: variance_coe_param = self.model_param.variance_coe_param coe_filter = feature_selection.CoeffOfVarValueFilter( variance_coe_param, self.cols, self.static_obj) new_left_cols = coe_filter.fit(data_instances) self._renew_final_left_cols(new_left_cols) self.static_obj = coe_filter.statics_obj self.variance_coe_meta = coe_filter.get_meta_obj() self.results.append(coe_filter.get_param_obj()) LOGGER.debug( "[Result][FeatureSelection][Host]Finish coeffiecient_of_variation value threshold filter." " Current left cols are: {}".format(self.left_cols)) if method == consts.UNIQUE_VALUE: unique_param = self.model_param.unique_param unique_filter = feature_selection.UniqueValueFilter( unique_param, self.cols, self.static_obj) new_left_cols = unique_filter.fit(data_instances) self._renew_final_left_cols(new_left_cols) self.static_obj = unique_filter.statics_obj self.unique_meta = unique_filter.get_meta_obj() self.results.append(unique_filter.get_param_obj()) # self._renew_left_col_names() LOGGER.info( "[Result][FeatureSelection][Host]Finish unique value filter. Current left cols are: {}" .format(self.left_cols)) if method == consts.OUTLIER_COLS: outlier_param = self.model_param.outlier_param outlier_filter = feature_selection.OutlierFilter( outlier_param, self.cols) new_left_cols = outlier_filter.fit(data_instances) self._renew_final_left_cols(new_left_cols) self.outlier_meta = outlier_filter.get_meta_obj() self.results.append(outlier_filter.get_param_obj()) # self._renew_left_col_names() LOGGER.info( "[Result][FeatureSelection][Host]Finish outlier cols filter. Current left cols are: {}" .format(self.left_cols))
def filter_one_method(self, data_instances, method): if method == consts.IV_VALUE_THRES: iv_param = self.params.iv_value_param iv_filter = feature_selection.IVValueSelectFilter( iv_param, self.left_col_names, self.binning_model) new_left_cols = iv_filter.fit(data_instances) self.left_cols = new_left_cols if not self.local_only: host_select_cols = self._get_host_select_cols( consts.IV_VALUE_THRES) host_left_cols = iv_filter.host_cols # Only one host left_cols = host_left_cols.get(consts.HOST) new_result = {} for host_col_name in host_select_cols: if host_col_name not in left_cols: LOGGER.warning( "Host column {} has not been set in feature binning module" .format(host_col_name)) continue is_left = left_cols.get(host_col_name) new_result[host_col_name] = is_left self.host_left_cols = new_result self._send_host_result_cols(consts.IV_VALUE_THRES) LOGGER.info( "[Result][FeatureSelection][Guest] Finish iv value threshold filter. Host left cols are: {}" .format(self.host_left_cols)) iv_filter.display_feature_result(self.party_name) LOGGER.info( "[Result][FeatureSelection][Guest] Finish iv value threshold filter. Self left cols are: {}" .format(self.left_cols)) self.iv_value_meta = iv_filter.get_meta_obj() self.results.append(iv_filter.get_param_obj()) self._renew_left_col_names() if method == consts.IV_PERCENTILE: iv_param = self.params.iv_percentile_param if self.local_only: iv_filter = feature_selection.IVPercentileFilter( iv_param, self.left_col_names, {}, self.binning_model) self.left_cols = iv_filter.fit(data_instances) else: host_select_cols = self._get_host_select_cols( consts.IV_PERCENTILE) host_cols = {consts.HOST: host_select_cols} iv_filter = feature_selection.IVPercentileFilter( iv_param, self.left_col_names, host_cols, self.binning_model) self.left_cols = iv_filter.fit(data_instances) host_left_cols = iv_filter.host_cols # Only one host left_cols = host_left_cols.get(consts.HOST) new_result = {} for host_col_name in host_select_cols: if host_col_name not in left_cols: LOGGER.warning( "Host column {} has not been set in feature binning module" .format(host_col_name)) continue is_left = left_cols.get(host_col_name) new_result[host_col_name] = is_left self.host_left_cols = new_result self._send_host_result_cols(consts.IV_PERCENTILE) LOGGER.info( "[Result][FeatureSelection][Host]Finish iv value threshold filter. Host left cols are: {}" .format(self.host_left_cols)) iv_filter.display_feature_result(self.party_name) LOGGER.info( "[Result][FeatureSelection][Guest]Finish iv value threshold filter. Self left cols are: {}" .format(self.left_cols)) self.iv_percentile_meta = iv_filter.get_meta_obj() self.results.append(iv_filter.get_param_obj()) self._renew_left_col_names() if method == consts.COEFFICIENT_OF_VARIATION_VALUE_THRES: coe_param = self.params.coe_param coe_filter = feature_selection.CoeffOfVarValueFilter( coe_param, self.left_col_names, self.static_obj) self.left_cols = coe_filter.fit(data_instances) self.static_obj = coe_filter.statics_obj self.coe_meta = coe_filter.get_meta_obj() self.results.append(coe_filter.get_param_obj()) self._renew_left_col_names() coe_filter.display_feature_result(self.party_name) LOGGER.info( "[Result][FeatureSelection][Guest] Finish coefficient threshold filter. Self left cols are: {}" .format(self.left_cols)) if method == consts.UNIQUE_VALUE: unique_param = self.params.unique_param unique_filter = feature_selection.UniqueValueFilter( unique_param, self.left_col_names, self.static_obj) self.left_cols = unique_filter.fit(data_instances) self.static_obj = unique_filter.statics_obj self.unique_meta = unique_filter.get_meta_obj() self.results.append(unique_filter.get_param_obj()) self._renew_left_col_names() unique_filter.display_feature_result(self.party_name) LOGGER.info( "[Result][FeatureSelection][Guest]Finish unique value filter. Current left cols are: {}" .format(self.left_cols)) if method == consts.OUTLIER_COLS: outlier_param = self.params.outlier_param outlier_filter = feature_selection.OutlierFilter( outlier_param, self.left_col_names) self.left_cols = outlier_filter.fit(data_instances) self.outlier_meta = outlier_filter.get_meta_obj() self.results.append(outlier_filter.get_param_obj()) self._renew_left_col_names() outlier_filter.display_feature_result(self.party_name) LOGGER.info( "[Result][FeatureSelection][Guest]Finish outlier cols filter. Current left cols are: {}" .format(self.left_cols))
def filter_one_method(self, data_instances, method): if method == consts.IV_VALUE_THRES: self._calculates_iv_attrs(data_instances, flowid_postfix='iv_value') iv_param = self.params.iv_param iv_filter = feature_selection.IVValueSelectFilter( iv_param, self.left_cols, self.guest_iv_attrs) new_left_cols = iv_filter.filter() self.results.append(iv_filter.to_result()) # Renew current left cols and iv_attrs new_iv_list = self._renew_iv_attrs(new_left_cols, self.left_cols, self.guest_iv_attrs) self.guest_iv_attrs = new_iv_list self.left_cols = new_left_cols if not self.local_only: self._filter_host_iv_value() LOGGER.info( "Finish iv value threshold filter. Current left cols are: {}". format(self.left_cols)) if method == consts.IV_PERCENTILE: self._calculates_iv_attrs(data_instances, flowid_postfix='iv_percentile') iv_param = self.params.iv_param iv_filter = feature_selection.IVPercentileFilter(iv_param) iv_filter.add_attrs(self.guest_iv_attrs, self.left_cols) if not self.local_only: iv_filter.add_attrs(self.host_iv_attrs, self.host_left_cols) left_cols = iv_filter.filter_multiple_parties() new_left_cols = left_cols[0] self.results.append(iv_filter.to_result()) # Renew current left cols and iv_attrs new_iv_list = self._renew_iv_attrs(new_left_cols, self.left_cols, self.guest_iv_attrs) self.guest_iv_attrs = new_iv_list self.left_cols = new_left_cols # If host has participated, send result to host if len(left_cols) > 1: new_host_left_cols = left_cols[1] new_host_iv_list = self._renew_iv_attrs( new_host_left_cols, self.host_left_cols, self.host_iv_attrs) self.host_iv_attrs = new_host_iv_list self.host_left_cols = new_host_left_cols self._send_host_result_cols() LOGGER.info( "Finish iv percentile filter. Current left cols are: {}". format(self.left_cols)) if method == consts.COEFFICIENT_OF_VARIATION_VALUE_THRES: coe_param = self.params.coe_param coe_filter = feature_selection.CoeffOfVarValueFilter( coe_param, self.left_cols, self.static_obj) self.left_cols = coe_filter.filter(data_instances) self.static_obj = coe_filter.statics_obj self.results.append(coe_filter.to_result()) LOGGER.info( "Finish coeffiecient_of_variation value threshold filter. Current left cols are: {}" .format(self.left_cols)) if method == consts.UNIQUE_VALUE: unique_param = self.params.unique_param unique_filter = feature_selection.UniqueValueFilter( unique_param, self.left_cols, self.static_obj) self.left_cols = unique_filter.filter(data_instances) self.static_obj = unique_filter.statics_obj self.results.append(unique_filter.to_result()) LOGGER.info( "Finish unique value filter. Current left cols are: {}".format( self.left_cols)) if method == consts.OUTLIER_COLS: outlier_param = self.params.outlier_param outlier_filter = feature_selection.OutlierFilter( outlier_param, self.left_cols) self.left_cols = outlier_filter.filter(data_instances) self.results.append(outlier_filter.to_result()) LOGGER.info( "Finish outlier cols filter. Current left cols are: {}".format( self.left_cols))
def filter_one_method(self, data_instances, method): if method == consts.IV_VALUE_THRES: iv_param = self.model_param.iv_value_param if not self.local_only: host_select_cols = self._get_host_select_cols( consts.IV_VALUE_THRES) LOGGER.debug("In iv value filter, host_select_cols: {}".format( host_select_cols)) iv_filter = feature_selection.IVValueSelectFilter( iv_param, self.filter_result.this_to_select_cols_index, self.binning_model, host_select_cols=host_select_cols) new_left_cols = iv_filter.fit(data_instances, fit_host=True) # self._renew_final_left_cols(new_left_cols) self.filter_result.add_left_col_index(new_left_cols) host_left_cols = iv_filter.host_cols left_cols = host_left_cols.get(consts.HOST) left_cols = {int(k): v for k, v in left_cols.items()} self.host_filter_result.add_left_cols(left_cols) LOGGER.debug( "In Guest IV filter, host_select_cols: {}, host_left_cols: {}" .format(host_select_cols, host_left_cols)) # new_result = {} # for host_col_idx, _ in host_select_cols.items(): # host_col_idx = int(host_col_idx) # is_left = left_cols.get(host_col_idx) # new_result[host_col_idx] = is_left # self.host_left_cols = new_result # self._add_host_left_cols(self.host_left_cols) self._send_host_result_cols(consts.IV_VALUE_THRES) LOGGER.debug( "[Result][FeatureSelection][Guest] Finish iv value threshold filter. Host left cols are: {}" .format(self.host_filter_result.get_left_cols())) else: iv_filter = feature_selection.IVValueSelectFilter( iv_param, self.filter_result.this_to_select_cols_index, self.binning_model) new_left_cols = iv_filter.fit(data_instances) # self._renew_final_left_cols(new_left_cols) self.filter_result.add_left_col_index(new_left_cols) LOGGER.debug( "[Result][FeatureSelection][Guest] Finish iv value threshold filter. Self left cols are: {}" .format(self.filter_result.get_left_cols())) self.iv_value_meta = iv_filter.get_meta_obj() self.results.append(iv_filter.get_param_obj()) # self._renew_left_col_names() if method == consts.IV_PERCENTILE: iv_param = self.model_param.iv_percentile_param if self.local_only: iv_filter = feature_selection.IVPercentileFilter( iv_param, self.filter_result.this_to_select_cols_index, {}, self.binning_model) new_left_cols = iv_filter.fit(data_instances) # self._renew_final_left_cols(new_left_cols) self.filter_result.add_left_col_index(new_left_cols) else: host_select_cols = self._get_host_select_cols( consts.IV_PERCENTILE) host_cols = {consts.HOST: host_select_cols} iv_filter = feature_selection.IVPercentileFilter( iv_param, self.filter_result.this_to_select_cols_index, host_cols, self.binning_model) new_left_cols = iv_filter.fit(data_instances) # self._renew_final_left_cols(new_left_cols) self.filter_result.add_left_col_index(new_left_cols) host_left_cols = iv_filter.host_cols # Only one host left_col_index = host_left_cols.get(consts.HOST) self.host_filter_result.add_left_cols(left_col_index) self._send_host_result_cols(consts.IV_PERCENTILE) LOGGER.info( "[Result][FeatureSelection][Host]Finish iv percentile threshold filter. " "Host left cols are: {}".format( self.host_filter_result.get_left_cols())) LOGGER.debug( "[Result][FeatureSelection][Guest]Finish iv percentile threshold filter. Self left cols are: {}" .format(self.filter_result.get_left_cols())) self.iv_percentile_meta = iv_filter.get_meta_obj() self.results.append(iv_filter.get_param_obj()) # self._renew_left_col_names() if method == consts.COEFFICIENT_OF_VARIATION_VALUE_THRES: variance_coe_param = self.model_param.variance_coe_param coe_filter = feature_selection.CoeffOfVarValueFilter( variance_coe_param, self.filter_result.this_to_select_cols_index, self.static_obj) new_left_cols = coe_filter.fit(data_instances) # self._renew_final_left_cols(new_left_cols) self.filter_result.add_left_col_index(new_left_cols) self.static_obj = coe_filter.statics_obj LOGGER.info( "[Result][FeatureSelection][Guest]Finish coefficient of variance filter. Self left cols are: {}" .format(self.filter_result.get_left_cols())) self.variance_coe_meta = coe_filter.get_meta_obj() self.results.append(coe_filter.get_param_obj()) # self._renew_left_col_names() if method == consts.UNIQUE_VALUE: unique_param = self.model_param.unique_param unique_filter = feature_selection.UniqueValueFilter( unique_param, self.filter_result.this_to_select_cols_index, self.static_obj) new_left_cols = unique_filter.fit(data_instances) # self._renew_final_left_cols(new_left_cols) self.filter_result.add_left_col_index(new_left_cols) self.static_obj = unique_filter.statics_obj self.unique_meta = unique_filter.get_meta_obj() self.results.append(unique_filter.get_param_obj()) LOGGER.info( "[Result][FeatureSelection][Guest]Finish unique value filter. Current left cols are: {}" .format(self.filter_result.get_left_cols())) if method == consts.OUTLIER_COLS: outlier_param = self.model_param.outlier_param outlier_filter = feature_selection.OutlierFilter( outlier_param, self.filter_result.this_to_select_cols_index) new_left_cols = outlier_filter.fit(data_instances) # self._renew_final_left_cols(new_left_cols) self.filter_result.add_left_col_index(new_left_cols) self.outlier_meta = outlier_filter.get_meta_obj() self.results.append(outlier_filter.get_param_obj()) LOGGER.info( "[Result][FeatureSelection][Guest]Finish outlier filter. Self left cols are: {}" .format(self.filter_result.get_left_cols()))