def bin_and(self, keys, grps, m_grp): n = self.d_set.attr_size min_supp = self.d_set.thd_supp pattern = GP() gi = GI.parse_gi(keys[0]) pattern.add_gradual_item(gi) # bin_1 = grps[0]['bins'] # main_bin = [bin_1[str(x)][:] for x in range(self.d_set.seg_count)] for i in range(len(keys)): if i == 0: continue bin_2 = grps[i]['bins'] # temp_bin = [np.multiply(temp_bin[k], bin_2[str(k)][:]) for k in range(self.d_set.seg_count)] # temp_bin = [] bin_sum = 0 for k in range(self.d_set.seg_count): m_grp[str(k)][...] = np.multiply(m_grp[str(k)][:], bin_2[str(k)][:]) bin_sum += np.sum(m_grp[str(k)][:]) # temp_bin.append(arr) supp = float(bin_sum) / float(n * (n - 1.0) / 2.0) if supp >= min_supp: # main_bin = temp_bin gi = GI.parse_gi(keys[i]) pattern.add_gradual_item(gi) pattern.set_support(supp) # print(str(pattern.to_string()) + ' : ' + str(pattern.support)) return pattern
def validate_gp(self, pattern): min_supp = self.d_set.thd_supp n = self.d_set.attr_size gen_pattern = GP() ranks = self.d_set.rank_matrix main_bin = ranks[:, pattern.gradual_items[0].attribute_col].copy() for i in range(len(pattern.gradual_items)): gi = pattern.gradual_items[i] if i == 0: if gi.is_decrement(): main_bin = np.where(main_bin == 0.5, 1, np.where(main_bin == 1, 0.5, 0)) gen_pattern.add_gradual_item(gi) continue else: bin_2 = ranks[:, gi.attribute_col].copy() if gi.is_decrement(): bin_2 = np.where(bin_2 == 0.5, 1, np.where(bin_2 == 1, 0.5, 0)) # Rank multiplication temp_bin = np.where(main_bin == bin_2, main_bin, 0) # print(str(main_bin) + ' + ' + str(bin_2) + ' = ' + str(temp_bin)) supp = float(np.count_nonzero(temp_bin)) / float(n * (n - 1.0) / 2.0) if supp >= min_supp: main_bin = temp_bin.copy() gen_pattern.add_gradual_item(gi) gen_pattern.set_support(supp) if len(gen_pattern.gradual_items) <= 1: return pattern else: return gen_pattern
def validate_gp(self, pattern): # pattern = [('2', '+'), ('4', '+')] min_supp = self.d_set.thd_supp gen_pattern = GP() bin_data = np.array([]) for gi in pattern.gradual_items: if self.d_set.invalid_bins.size > 0 and np.any( np.isin(self.d_set.invalid_bins, gi.gradual_item)): continue else: arg = np.argwhere( np.isin(self.d_set.valid_bins[:, 0], gi.gradual_item)) if len(arg) > 0: i = arg[0][0] bin_obj = self.d_set.valid_bins[i] if bin_data.size <= 0: bin_data = np.array([bin_obj[1], bin_obj[1]]) gen_pattern.add_gradual_item(gi) else: bin_data[1] = bin_obj[1].copy() temp_bin, supp = GradACO.bin_and( bin_data, self.d_set.attr_size) if supp >= min_supp: bin_data[0] = temp_bin.copy() gen_pattern.add_gradual_item(gi) gen_pattern.set_support(supp) if len(gen_pattern.gradual_items) <= 1: return pattern else: return gen_pattern
def validate_gp(self, pattern): # pattern = [('2', '+'), ('4', '+')] n = self.d_set.attr_size attr_data = self.d_set.attr_data min_supp = self.d_set.thd_supp gen_pattern = GP() h5f = h5py.File(self.d_set.h5_file, 'r+') if len(pattern.gradual_items) >= 2: temp_file = 'temp.dat' gi = pattern.gradual_items[0] col_data = attr_data[gi.attribute_col] grp1 = 'dataset/' + self.d_set.step_name + '/temp_bin1' if gi.symbol == '+': bin_1 = h5f.create_dataset( grp1, data=col_data > col_data[:, np.newaxis], chunks=True) else: bin_1 = h5f.create_dataset( grp1, data=col_data < col_data[:, np.newaxis], chunks=True) gen_pattern.add_gradual_item(gi) temp_bin = np.memmap(temp_file, dtype=bool, mode='w+', shape=bin_1.shape) for i in range(1, len(pattern.gradual_items)): bin_sum = 0 gi = pattern.gradual_items[i] col_data = attr_data[gi.attribute_col] grp2 = 'dataset/' + self.d_set.step_name + '/temp_bin2' if gi.symbol == '+': bin_2 = h5f.create_dataset( grp2, data=col_data > col_data[:, np.newaxis], chunks=True) else: bin_2 = h5f.create_dataset( grp2, data=col_data < col_data[:, np.newaxis], chunks=True) for k in bin_1.iter_chunks(): temp_bin[k] = np.multiply(bin_1[k], bin_2[k]) bin_sum += np.sum(temp_bin[k]) supp = float(bin_sum) / float(n * (n - 1.0) / 2.0) if supp >= min_supp: gen_pattern.add_gradual_item(gi) gen_pattern.set_support(supp) for s in bin_1.iter_chunks(): bin_1[s] = temp_bin[s] del h5f[grp2] os.remove(temp_file) del h5f[grp1] h5f.close() if len(gen_pattern.gradual_items) <= 1: return pattern else: return gen_pattern
def validate_gp(self, pattern): # pattern = [('2', '+'), ('4', '+')] min_supp = self.d_set.thd_supp n = self.d_set.attr_size gen_pattern = GP() bin_arr = [] h5f = h5py.File(self.d_set.h5_file, 'r') grp_name = 'dataset/' + self.d_set.step_name + '/valid_bins/' bin_grp = h5f[grp_name] # bin_keys = [gi.as_string() for gi in pattern.gradual_items] # bin_grps = [h5f[grp_name + k] for k in bin_keys] for gi in pattern.gradual_items: # arg = np.argwhere(np.isin(self.d_set.valid_bins[:, 0], gi.gradual_item)) # if len(arg) > 0: # i = arg[0][0] valid_bin = bin_grp[gi.as_string()] # valid_bin = self.d_set.valid_bins[i] if len(bin_arr) <= 0: bin_arr = [valid_bin, valid_bin] gen_pattern.add_gradual_item(gi) else: bin_arr[1] = valid_bin # temp_bin = np.multiply(bin_arr[0], bin_arr[1]) bin_sum = 0 tmp_bin = [] for k in range(len(bin_arr[0])): bin_prod = np.multiply(bin_arr[0][k], bin_arr[1][k]) bin_sum += np.sum(bin_prod) tmp_bin.append(bin_prod) supp = float(bin_sum) / float(n * (n - 1.0) / 2.0) if supp >= min_supp: bin_arr[0] = tmp_bin.copy() gen_pattern.add_gradual_item(gi) gen_pattern.set_support(supp) h5f.close() if len(gen_pattern.gradual_items) <= 1: return pattern else: return gen_pattern
def validate_gp(self, pattern): min_supp = self.d_set.thd_supp n = self.d_set.attr_size gen_pattern = GP() h5f = h5py.File(self.d_set.h5_file, 'r') grp_name = 'dataset/' + self.d_set.step_name + '/rank_matrix' ranks = h5f[grp_name][:] # [:] TO BE REMOVED main_bin = ranks[:, pattern.gradual_items[0].attribute_col] for i in range(len(pattern.gradual_items)): gi = pattern.gradual_items[i] if i == 0: if gi.is_decrement(): main_bin = np.where(main_bin == 0.5, 1, np.where(main_bin == 1, 0.5, 0)) gen_pattern.add_gradual_item(gi) continue else: bin_2 = ranks[:, gi.attribute_col].copy() if gi.is_decrement(): bin_2 = np.where(bin_2 == 0.5, 1, np.where(bin_2 == 1, 0.5, 0)) # Rank multiplication temp_bin = np.where(main_bin == bin_2, main_bin, 0) # print(str(main_bin) + ' + ' + str(bin_2) + ' = ' + str(temp_bin)) supp = float(np.count_nonzero(temp_bin)) / float( n * (n - 1.0) / 2.0) if supp >= min_supp: main_bin = temp_bin.copy() gen_pattern.add_gradual_item(gi) gen_pattern.set_support(supp) h5f.close() if len(gen_pattern.gradual_items) <= 1: return pattern else: return gen_pattern
def validate_gp(self, pattern): # pattern = [('2', '+'), ('4', '+')] min_supp = self.d_set.thd_supp n = self.d_set.attr_size gen_pattern = GP() bin_arr = [] for gi in pattern.gradual_items: arg = np.argwhere( np.isin(self.d_set.valid_bins[:, 0], gi.gradual_item)) if len(arg) > 0: i = arg[0][0] valid_bin = self.d_set.valid_bins[i] if len(bin_arr) <= 0: bin_arr = [valid_bin[1], valid_bin[1]] gen_pattern.add_gradual_item(gi) else: bin_arr[1] = valid_bin[1] # temp_bin = np.multiply(bin_arr[0], bin_arr[1]) bin_sum = 0 tmp_bin = [] for k in range(len(bin_arr[0])): bin_prod = np.multiply(bin_arr[0][k], bin_arr[1][k]) bin_sum += np.sum(bin_prod) tmp_bin.append(bin_prod) supp = float(bin_sum) / float(n * (n - 1.0) / 2.0) if supp >= min_supp: bin_arr[0] = tmp_bin.copy() gen_pattern.add_gradual_item(gi) gen_pattern.set_support(supp) if len(gen_pattern.gradual_items) <= 1: return pattern else: return gen_pattern