def add_filter_results(self, filter_name, select_properties: SelectionProperties, host_select_properties=None): self.all_left_col_indexes = select_properties.all_left_col_indexes.copy( ) if filter_name == 'conclusion': return if host_select_properties is None: host_select_properties = [] host_feature_values = [] host_left_cols = [] for idx, host_result in enumerate(host_select_properties): LOGGER.debug( "In add_filter_results, idx: {}, host_all_left_col_names: {}, " "__host_pass_filter_nums_list: {}".format( idx, host_result.all_left_col_names, self.__host_pass_filter_nums_list)) if idx >= len(self.__host_pass_filter_nums_list): _host_pass_filter_nums = {} self.__host_pass_filter_nums_list.append( _host_pass_filter_nums) else: _host_pass_filter_nums = self.__host_pass_filter_nums_list[idx] for col_name in host_result.last_left_col_names: _host_pass_filter_nums.setdefault(col_name, 0) if col_name in host_result.all_left_col_names: _host_pass_filter_nums[col_name] += 1 feature_value_pb = feature_selection_param_pb2.FeatureValue( feature_values=host_result.feature_values) host_feature_values.append(feature_value_pb) left_col_pb = feature_selection_param_pb2.LeftCols( original_cols=host_result.last_left_col_names, left_cols=host_result.left_col_dicts) host_left_cols.append(left_col_pb) # for col_name in select_properties.all_left_col_names: for col_name in select_properties.last_left_col_names: self.__guest_pass_filter_nums.setdefault(col_name, 0) if col_name in select_properties.all_left_col_names: self.__guest_pass_filter_nums[col_name] += 1 left_cols_pb = feature_selection_param_pb2.LeftCols( original_cols=select_properties.last_left_col_names, left_cols=select_properties.left_col_dicts) this_filter_result = { 'feature_values': select_properties.feature_values, 'host_feature_values': host_feature_values, 'left_cols': left_cols_pb, 'host_left_cols': host_left_cols, 'filter_name': filter_name } this_filter_result = feature_selection_param_pb2.FeatureSelectionFilterParam( **this_filter_result) self.filter_results.append(this_filter_result)
def get_param_obj(self): left_col_name_dict = self._generate_col_name_dict() cols = [str(i) for i in self.cols] left_col_obj = feature_selection_param_pb2.LeftCols(original_cols=cols, left_cols=left_col_name_dict) result = feature_selection_param_pb2.FeatureSelectionFilterParam(feature_values=self.feature_values, left_cols=left_col_obj, filter_name="UNIQUE FILTER") return result
def _received_result_cols(self, filter_name): left_cols_index = self.transfer_variable.result_left_cols.get( idx=0, suffix=(filter_name, )) original_cols = self.filter_result.this_to_select_cols self.filter_result.add_left_col_index(left_cols_index) LOGGER.info( "Received left columns from guest, received left_cols: {}".format( left_cols_index)) LOGGER.debug(f"left_cols: {self.filter_result.get_left_cols()}") left_col_obj = feature_selection_param_pb2.LeftCols( original_cols=original_cols, left_cols=self.filter_result.get_left_cols()) result_obj = feature_selection_param_pb2.FeatureSelectionFilterParam( feature_values={}, left_cols=left_col_obj, filter_name=filter_name) self.results.append(result_obj)
def get_param_obj(self): left_col_name_dict = self._generate_col_name_dict() cols = [str(i) for i in self.cols] host_obj = {} for host_name, host_left_cols in self.host_cols.items(): host_cols = list(map(str, host_left_cols.keys())) new_host_left_col = {str(k): v for k, v in host_left_cols.items()} host_left_col_obj = feature_selection_param_pb2.LeftCols(original_cols=host_cols, left_cols=new_host_left_col) for host_col, is_left in host_left_cols.items(): new_col_name = '.'.join([host_name, str(host_col)]) cols.append(new_col_name) left_col_name_dict[new_col_name] = is_left host_obj[host_name] = host_left_col_obj left_col_obj = feature_selection_param_pb2.LeftCols(original_cols=cols, left_cols=left_col_name_dict) host_value_objs = {} for host_name, host_feature_values in self.host_feature_values.items(): new_host_feature_values = {str(k): v for k, v in host_feature_values.items()} host_feature_value_obj = feature_selection_param_pb2.FeatureValue(feature_values=new_host_feature_values) host_value_objs[host_name] = host_feature_value_obj # Combine both guest and host results total_feature_values = {} for col_name, col_value in self.feature_values.items(): total_feature_values[col_name] = col_value for host_name, host_feature_values in self.host_feature_values.items(): for host_col, host_feature_value in host_feature_values.items(): new_col_name = '.'.join([host_name, str(host_col)]) total_feature_values[new_col_name] = host_feature_value result = feature_selection_param_pb2.FeatureSelectionFilterParam(feature_values=total_feature_values, host_feature_values=host_value_objs, left_cols=left_col_obj, host_left_cols=host_obj, filter_name='IV_PERCENTILE') json_result = json_format.MessageToJson(result, including_default_value_fields=True) LOGGER.debug("json_result: {}".format(json_result)) return result
def get_param_obj(self): left_col_name_dict = self._generate_col_name_dict() cols = [str(i) for i in self.cols] host_obj = {} for host_name, host_left_cols in self.host_cols.items(): host_cols = list(map(str, host_left_cols.keys())) new_host_left_col = {str(k): v for k, v in host_left_cols.items()} host_left_col_obj = feature_selection_param_pb2.LeftCols(original_cols=host_cols, left_cols=new_host_left_col) host_obj[host_name] = host_left_col_obj for host_col, is_left in host_left_cols.items(): new_col_name = '.'.join([host_name, str(host_col)]) cols.append(new_col_name) left_col_name_dict[new_col_name] = is_left left_col_obj = feature_selection_param_pb2.LeftCols(original_cols=cols, left_cols=left_col_name_dict) host_value_objs = {} for host_name, host_feature_values in self.host_feature_values.items(): new_host_feature_values = {str(k): v for k, v in host_feature_values.items()} host_feature_value_obj = feature_selection_param_pb2.FeatureValue(feature_values=new_host_feature_values) host_value_objs[host_name] = host_feature_value_obj # Combine both guest and host results total_feature_values = {} for col_name, col_value in self.feature_values.items(): total_feature_values[col_name] = col_value for host_name, host_feature_values in self.host_feature_values.items(): for host_col, host_feature_value in host_feature_values.items(): new_col_name = '.'.join([host_name, str(host_col)]) total_feature_values[new_col_name] = host_feature_value result = feature_selection_param_pb2.FeatureSelectionFilterParam(feature_values=total_feature_values, host_feature_values=host_value_objs, left_cols=left_col_obj, host_left_cols=host_obj, filter_name="IV_VALUE_FILTER") return result