Exemple #1
0
    def generate_aco_gp(self, p_matrix):
        attr_keys = self.attr_keys
        v_matrix = self.d
        pattern = GP()

        # 1. Generate gradual items with highest pheromone and visibility
        m = p_matrix.shape[0]
        for i in range(m):
            combine_feature = np.multiply(v_matrix[i], p_matrix[i])
            total = np.sum(combine_feature)
            with np.errstate(divide='ignore', invalid='ignore'):
                probability = combine_feature / total
            cum_prob = np.cumsum(probability)
            r = np.random.random_sample()
            try:
                j = np.nonzero(cum_prob > r)[0][0]
                gi = GI.parse_gi(attr_keys[j])
                if not pattern.contains_attr(gi):
                    pattern.add_gradual_item(gi)
            except IndexError:
                continue

        # 2. Evaporate pheromones by factor e
        p_matrix = (1 - self.e_factor) * p_matrix
        return pattern, p_matrix
Exemple #2
0
    def validate_gp(self, pattern):
        min_supp = self.d_set.thd_supp
        n = self.d_set.attr_size
        gen_pattern = GP()
        ranks = self.d_set.rank_matrix

        main_bin = ranks[:, pattern.gradual_items[0].attribute_col].copy()
        for i in range(len(pattern.gradual_items)):
            gi = pattern.gradual_items[i]
            if i == 0:
                if gi.is_decrement():
                    main_bin = np.where(main_bin == 0.5, 1, np.where(main_bin == 1, 0.5, 0))
                gen_pattern.add_gradual_item(gi)
                continue
            else:
                bin_2 = ranks[:, gi.attribute_col].copy()
                if gi.is_decrement():
                    bin_2 = np.where(bin_2 == 0.5, 1, np.where(bin_2 == 1, 0.5, 0))

                # Rank multiplication
                temp_bin = np.where(main_bin == bin_2, main_bin, 0)
                # print(str(main_bin) + ' + ' + str(bin_2) + ' = ' + str(temp_bin))
                supp = float(np.count_nonzero(temp_bin)) / float(n * (n - 1.0) / 2.0)
                if supp >= min_supp:
                    main_bin = temp_bin.copy()
                    gen_pattern.add_gradual_item(gi)
                    gen_pattern.set_support(supp)

        if len(gen_pattern.gradual_items) <= 1:
            return pattern
        else:
            return gen_pattern
    def bin_and(self, keys, grps, m_grp):
        n = self.d_set.attr_size
        min_supp = self.d_set.thd_supp
        pattern = GP()

        gi = GI.parse_gi(keys[0])
        pattern.add_gradual_item(gi)
        # bin_1 = grps[0]['bins']
        # main_bin = [bin_1[str(x)][:] for x in range(self.d_set.seg_count)]
        for i in range(len(keys)):
            if i == 0:
                continue
            bin_2 = grps[i]['bins']
            # temp_bin = [np.multiply(temp_bin[k], bin_2[str(k)][:]) for k in range(self.d_set.seg_count)]
            # temp_bin = []
            bin_sum = 0
            for k in range(self.d_set.seg_count):
                m_grp[str(k)][...] = np.multiply(m_grp[str(k)][:],
                                                 bin_2[str(k)][:])
                bin_sum += np.sum(m_grp[str(k)][:])
                # temp_bin.append(arr)
            supp = float(bin_sum) / float(n * (n - 1.0) / 2.0)
            if supp >= min_supp:
                # main_bin = temp_bin
                gi = GI.parse_gi(keys[i])
                pattern.add_gradual_item(gi)
                pattern.set_support(supp)
        # print(str(pattern.to_string()) + ' : ' + str(pattern.support))
        return pattern
    def validate_gp(self, pattern):
        # pattern = [('2', '+'), ('4', '+')]
        min_supp = self.d_set.thd_supp
        gen_pattern = GP()
        bin_data = np.array([])

        for gi in pattern.gradual_items:
            if self.d_set.invalid_bins.size > 0 and np.any(
                    np.isin(self.d_set.invalid_bins, gi.gradual_item)):
                continue
            else:
                arg = np.argwhere(
                    np.isin(self.d_set.valid_bins[:, 0], gi.gradual_item))
                if len(arg) > 0:
                    i = arg[0][0]
                    bin_obj = self.d_set.valid_bins[i]
                    if bin_data.size <= 0:
                        bin_data = np.array([bin_obj[1], bin_obj[1]])
                        gen_pattern.add_gradual_item(gi)
                    else:
                        bin_data[1] = bin_obj[1].copy()
                        temp_bin, supp = GradACO.bin_and(
                            bin_data, self.d_set.attr_size)
                        if supp >= min_supp:
                            bin_data[0] = temp_bin.copy()
                            gen_pattern.add_gradual_item(gi)
                            gen_pattern.set_support(supp)
        if len(gen_pattern.gradual_items) <= 1:
            return pattern
        else:
            return gen_pattern
    def validate_gp(self, pattern):
        # pattern = [('2', '+'), ('4', '+')]
        n = self.d_set.attr_size
        attr_data = self.d_set.attr_data
        min_supp = self.d_set.thd_supp
        gen_pattern = GP()

        h5f = h5py.File(self.d_set.h5_file, 'r+')

        if len(pattern.gradual_items) >= 2:
            temp_file = 'temp.dat'
            gi = pattern.gradual_items[0]
            col_data = attr_data[gi.attribute_col]
            grp1 = 'dataset/' + self.d_set.step_name + '/temp_bin1'
            if gi.symbol == '+':
                bin_1 = h5f.create_dataset(
                    grp1, data=col_data > col_data[:, np.newaxis], chunks=True)
            else:
                bin_1 = h5f.create_dataset(
                    grp1, data=col_data < col_data[:, np.newaxis], chunks=True)
            gen_pattern.add_gradual_item(gi)
            temp_bin = np.memmap(temp_file,
                                 dtype=bool,
                                 mode='w+',
                                 shape=bin_1.shape)

            for i in range(1, len(pattern.gradual_items)):
                bin_sum = 0
                gi = pattern.gradual_items[i]
                col_data = attr_data[gi.attribute_col]
                grp2 = 'dataset/' + self.d_set.step_name + '/temp_bin2'
                if gi.symbol == '+':
                    bin_2 = h5f.create_dataset(
                        grp2,
                        data=col_data > col_data[:, np.newaxis],
                        chunks=True)
                else:
                    bin_2 = h5f.create_dataset(
                        grp2,
                        data=col_data < col_data[:, np.newaxis],
                        chunks=True)

                for k in bin_1.iter_chunks():
                    temp_bin[k] = np.multiply(bin_1[k], bin_2[k])
                    bin_sum += np.sum(temp_bin[k])
                supp = float(bin_sum) / float(n * (n - 1.0) / 2.0)
                if supp >= min_supp:
                    gen_pattern.add_gradual_item(gi)
                    gen_pattern.set_support(supp)
                    for s in bin_1.iter_chunks():
                        bin_1[s] = temp_bin[s]
                del h5f[grp2]
            os.remove(temp_file)
            del h5f[grp1]
        h5f.close()
        if len(gen_pattern.gradual_items) <= 1:
            return pattern
        else:
            return gen_pattern
    def validate_gp(self, pattern):
        # pattern = [('2', '+'), ('4', '+')]
        min_supp = self.d_set.thd_supp
        n = self.d_set.attr_size
        gen_pattern = GP()
        bin_arr = []

        h5f = h5py.File(self.d_set.h5_file, 'r')
        grp_name = 'dataset/' + self.d_set.step_name + '/valid_bins/'
        bin_grp = h5f[grp_name]
        # bin_keys = [gi.as_string() for gi in pattern.gradual_items]
        # bin_grps = [h5f[grp_name + k] for k in bin_keys]

        for gi in pattern.gradual_items:
            # arg = np.argwhere(np.isin(self.d_set.valid_bins[:, 0], gi.gradual_item))
            # if len(arg) > 0:
            #    i = arg[0][0]
            valid_bin = bin_grp[gi.as_string()]
            # valid_bin = self.d_set.valid_bins[i]
            if len(bin_arr) <= 0:
                bin_arr = [valid_bin, valid_bin]
                gen_pattern.add_gradual_item(gi)
            else:
                bin_arr[1] = valid_bin
                # temp_bin = np.multiply(bin_arr[0], bin_arr[1])

                bin_sum = 0
                tmp_bin = []
                for k in range(len(bin_arr[0])):
                    bin_prod = np.multiply(bin_arr[0][k], bin_arr[1][k])
                    bin_sum += np.sum(bin_prod)
                    tmp_bin.append(bin_prod)

                supp = float(bin_sum) / float(n * (n - 1.0) / 2.0)
                if supp >= min_supp:
                    bin_arr[0] = tmp_bin.copy()
                    gen_pattern.add_gradual_item(gi)
                    gen_pattern.set_support(supp)

        h5f.close()
        if len(gen_pattern.gradual_items) <= 1:
            return pattern
        else:
            return gen_pattern
 def generate_random_gp(self):
     p = self.p_matrix
     n = len(self.attr_index)
     pattern = GP()
     attrs = np.random.permutation(n)
     for i in attrs:
         max_extreme = n * 100
         x = float(rand.randint(1, max_extreme) / max_extreme)
         pos = float(p[i][0] / (p[i][0] + p[i][1] + p[i][2]))
         neg = float((p[i][0] + p[i][1]) / (p[i][0] + p[i][1] + p[i][2]))
         if x < pos:
             temp = GI(self.attr_index[i], '+')
         elif (x >= pos) and (x < neg):
             temp = GI(self.attr_index[i], '-')
         else:
             # temp = GI(self.attr_index[i], 'x')
             continue
         pattern.add_gradual_item(temp)
     return pattern
Exemple #8
0
    def validate_gp(self, pattern):
        min_supp = self.d_set.thd_supp
        n = self.d_set.attr_size
        gen_pattern = GP()

        h5f = h5py.File(self.d_set.h5_file, 'r')
        grp_name = 'dataset/' + self.d_set.step_name + '/rank_matrix'
        ranks = h5f[grp_name][:]  # [:] TO BE REMOVED

        main_bin = ranks[:, pattern.gradual_items[0].attribute_col]
        for i in range(len(pattern.gradual_items)):
            gi = pattern.gradual_items[i]
            if i == 0:
                if gi.is_decrement():
                    main_bin = np.where(main_bin == 0.5, 1,
                                        np.where(main_bin == 1, 0.5, 0))
                gen_pattern.add_gradual_item(gi)
                continue
            else:
                bin_2 = ranks[:, gi.attribute_col].copy()
                if gi.is_decrement():
                    bin_2 = np.where(bin_2 == 0.5, 1,
                                     np.where(bin_2 == 1, 0.5, 0))

                # Rank multiplication
                temp_bin = np.where(main_bin == bin_2, main_bin, 0)
                # print(str(main_bin) + ' + ' + str(bin_2) + ' = ' + str(temp_bin))
                supp = float(np.count_nonzero(temp_bin)) / float(
                    n * (n - 1.0) / 2.0)
                if supp >= min_supp:
                    main_bin = temp_bin.copy()
                    gen_pattern.add_gradual_item(gi)
                    gen_pattern.set_support(supp)

        h5f.close()
        if len(gen_pattern.gradual_items) <= 1:
            return pattern
        else:
            return gen_pattern
Exemple #9
0
    def validate_gp(self, pattern):
        # pattern = [('2', '+'), ('4', '+')]
        min_supp = self.d_set.thd_supp
        n = self.d_set.attr_size
        gen_pattern = GP()
        bin_arr = []

        for gi in pattern.gradual_items:
            arg = np.argwhere(
                np.isin(self.d_set.valid_bins[:, 0], gi.gradual_item))
            if len(arg) > 0:
                i = arg[0][0]
                valid_bin = self.d_set.valid_bins[i]
                if len(bin_arr) <= 0:
                    bin_arr = [valid_bin[1], valid_bin[1]]
                    gen_pattern.add_gradual_item(gi)
                else:
                    bin_arr[1] = valid_bin[1]
                    # temp_bin = np.multiply(bin_arr[0], bin_arr[1])

                    bin_sum = 0
                    tmp_bin = []
                    for k in range(len(bin_arr[0])):
                        bin_prod = np.multiply(bin_arr[0][k], bin_arr[1][k])
                        bin_sum += np.sum(bin_prod)
                        tmp_bin.append(bin_prod)

                    supp = float(bin_sum) / float(n * (n - 1.0) / 2.0)
                    if supp >= min_supp:
                        bin_arr[0] = tmp_bin.copy()
                        gen_pattern.add_gradual_item(gi)
                        gen_pattern.set_support(supp)
        if len(gen_pattern.gradual_items) <= 1:
            return pattern
        else:
            return gen_pattern