def add_code_table(self, X, C_hat_ij): self.CT = CT(feature_set=self, X=X, C=C_hat_ij)
def merge_code_tables(self, _partition_init, merged_feature_sets=None): number_of_merges = 0 # In case of discarding the merge p_copy = _partition_init if merged_feature_sets.empty: return _partition_init, number_of_merges else: self.logger.debug( 'Merged feature sets are: \n {}'.format(merged_feature_sets)) # Loop through merged feature sets in decreasing normalised IG to find FSs to combine. for i in range(merged_feature_sets.shape[0]): # L(5) cost_init = _partition_init.calculate_total_cost(self.X) # L(6) f_i = merged_feature_sets.iloc[i]['F_i'] f_j = merged_feature_sets.iloc[i]['F_j'] f_ij = merged_feature_sets.iloc[i]['F_ij'] ig = merged_feature_sets.iloc[i]['ig'] # if ig == 0: # break self.logger.info('\n\n\n+++++++ F_ij: {} +++++++'.format(f_ij)) ct_i = f_i.CT ct_j = f_j.CT c_hat_ij = CT.merge_CTs(ct_i, ct_j) unique_rows = CT.unique_rows(ct_i, ct_j) # L(10) f_ij.add_code_table(self.X, c_hat_ij) # L(9), p_hat new_partition_fss = _partition_init.get_new_feature_sets_list_by_add_remove( add=f_ij, remove=[f_i, f_j]) new_partition = Partition(feature_set_list=new_partition_fss, X=self.X, n=self.n, logging_level=self.logging_level) # Calculate new rows of C_i_j before appending U to it. for unique_row in unique_rows: # L(12) if unique_row[0] in list( c_hat_ij.index) and unique_row[1] in list( c_hat_ij.index): self.logger.debug('old c_hat_ij is \n{}'.format(c_hat_ij)) c_hat_ij = CT.update_c_hat(c_hat_ij, unique_row) self.logger.debug('new c_hat_ij is \n{}'.format(c_hat_ij)) # Add new_pattern to FS and CT new_partition.update_feature_set(f_ij.name, self.X, c_hat_ij) self.logger.info('Inner loop f_ij: {} and row: {}'.format( f_ij, unique_row)) self.logger.debug( 'C for feature set f_ij {} is \n{}'.format( f_ij, f_ij.CT.C)) new_cost = new_partition.calculate_total_cost(self.X) old_cost = _partition_init.calculate_total_cost(self.X) self.logger.debug( 'Old partition was:\n{}\nAnd new Partitions is\n{}'. format([ fsp.patterns_list for fsp in _partition_init.feature_sets ], [ fsp.patterns_list for fsp in new_partition.feature_sets ])) if new_cost < old_cost: self.logger.debug( 'New partition cost was lower by: {} '.format( old_cost - new_cost)) _partition_init = new_partition else: self.logger.debug( '----No change to partition----\n'.format()) if _partition_init.calculate_total_cost(self.X) < cost_init: number_of_merges += 1 self.logger.info( 'Number of merges is: {}'.format(number_of_merges)) return _partition_init, number_of_merges else: self.logger.info( 'Merge is rejected and total number of merges is: {}'. format(number_of_merges)) _partition_init = p_copy self.logger.info( '============================================='.format()) return _partition_init, number_of_merges
def build_code_table(self, X): # When it's final CT self.CT = CT(self, X=X, logging_level=self.logging_level) return self.CT
def _build_code_tables(partition): code_tables = [ CT(feature_set) for feature_set in partition.get_feature_sets() ] return code_tables