Esempio n. 1
0
    def insert_into_conditional_fp_tree(self, pattern_list, count_of_pattern,
                                        conditional_fp_tree,
                                        conditional_header_table):
        # return if patternArrayList is empty
        if not pattern_list:
            return

        item_to_add_to_tree = pattern_list[0]
        new_node = None
        done = False
        for child in conditional_fp_tree.children:
            if child.item == item_to_add_to_tree:
                new_node = child
                child.count += count_of_pattern
                done = True
                break

        if not done:
            for header_pointer in conditional_header_table:
                # remove non frequents too
                if header_pointer.item == item_to_add_to_tree:
                    new_node = FPTree(item_to_add_to_tree)
                    new_node.count = count_of_pattern
                    new_node.parent = conditional_fp_tree
                    conditional_fp_tree.children.append(new_node)
                    while header_pointer.next is not None:
                        header_pointer = header_pointer.next
                    header_pointer.next = new_node
        pattern_list.pop(0)
        self.insert_into_conditional_fp_tree(pattern_list, count_of_pattern,
                                             new_node,
                                             conditional_header_table)
Esempio n. 2
0
    def insert_into_tree(self, transaction_sorted_by_frequency, fp_tree):
        # return when list is empty
        if not transaction_sorted_by_frequency:
            return
        item_to_add_to_tree = transaction_sorted_by_frequency[0]
        new_node = None
        done = False
        for child in fp_tree.children:
            if child.item == item_to_add_to_tree:
                new_node = child
                child.count += 1
                done = True
                break

        if not done:
            new_node = FPTree(item_to_add_to_tree)
            new_node.count = 1
            new_node.parent = fp_tree
            fp_tree.children.append(new_node)
            for header_pointer in self.header_table:
                if header_pointer.item == item_to_add_to_tree:
                    while header_pointer.next is not None:
                        header_pointer = header_pointer.next
                    header_pointer.next = new_node

        transaction_sorted_by_frequency.pop(0)
        self.insert_into_tree(transaction_sorted_by_frequency, new_node)
Esempio n. 3
0
    def insert_into_conditional_fp_tree(self, pattern_list, count_of_pattern, conditional_fp_tree,
                                        conditional_header_table):
        # return if patternArrayList is empty
        if not pattern_list:
            return

        item_to_add_to_tree = pattern_list[0]
        new_node = None
        done = False
        for child in conditional_fp_tree.children:
            if child.item == item_to_add_to_tree:
                new_node = child
                child.count += count_of_pattern
                done = True
                break

        if not done:
            for header_pointer in conditional_header_table:
                # remove non frequents too
                if header_pointer.item == item_to_add_to_tree:
                    new_node = FPTree(item_to_add_to_tree)
                    new_node.count = count_of_pattern
                    new_node.parent = conditional_fp_tree
                    conditional_fp_tree.children.append(new_node)
                    while header_pointer.next is not None:
                        header_pointer = header_pointer.next
                    header_pointer.next = new_node
        pattern_list.pop(0)
        self.insert_into_conditional_fp_tree(pattern_list, count_of_pattern, new_node, conditional_header_table)
Esempio n. 4
0
    def insert_into_tree(self, transaction_sorted_by_frequency, fp_tree):
        # return when list is empty
        if not transaction_sorted_by_frequency:
            return
        item_to_add_to_tree = transaction_sorted_by_frequency[0]
        new_node = None
        done = False
        for child in fp_tree.children:
            if child.item == item_to_add_to_tree:
                new_node = child
                child.count += 1
                done = True
                break

        if not done:
            new_node = FPTree(item_to_add_to_tree)
            new_node.count = 1
            new_node.parent = fp_tree
            fp_tree.children.append(new_node)
            for header_pointer in self.header_table:
                if header_pointer.item == item_to_add_to_tree:
                    while header_pointer.next is not None:
                        header_pointer = header_pointer.next
                    header_pointer.next = new_node

        transaction_sorted_by_frequency.pop(0)
        self.insert_into_tree(transaction_sorted_by_frequency, new_node)
Esempio n. 5
0
    def fp_growth(self, base, threshold, header_table, frequent_patterns):
        for item_in_tree in header_table:
            current_pattern = (base if base is not None else "") + (
                " " if base is not None else "") + item_in_tree.item
            support_of_current_pattern = 0
            conditional_pattern_base = dict()
            while item_in_tree.next is not None:
                item_in_tree = item_in_tree.next
                support_of_current_pattern += item_in_tree.count
                conditional_pattern = None
                conditional_item = item_in_tree.parent

                while not conditional_item.is_root():
                    conditional_pattern = conditional_item.item + " " + (
                        conditional_pattern
                        if conditional_pattern is not None else "")
                    conditional_item = conditional_item.parent

                if conditional_pattern is not None:
                    conditional_pattern_base[
                        conditional_pattern] = item_in_tree.count

            frequent_patterns[tuple(
                current_pattern.split())] = support_of_current_pattern

            # counting frequencies of single items in conditional pattern-base
            conditional_items_map_to_frequency = dict()
            for conditional_pattern in conditional_pattern_base:
                split_conditional_pattern = conditional_pattern.split()
                for item in split_conditional_pattern:
                    if item in conditional_items_map_to_frequency:
                        count = conditional_items_map_to_frequency[item]
                        count += conditional_pattern_base[conditional_pattern]
                        conditional_items_map_to_frequency[item] = count
                    else:
                        conditional_items_map_to_frequency[
                            item] = conditional_pattern_base[
                                conditional_pattern]

            # create header table for conditional fp tree
            conditional_header_table = []
            for itemsForTable in conditional_items_map_to_frequency:
                count = conditional_items_map_to_frequency[itemsForTable]
                if count < threshold:
                    continue
                f = FPTree(itemsForTable)
                f.count = count
                conditional_header_table.append(f)

            conditional_fp_tree = self.build_conditional_fp_tree(
                conditional_pattern_base, conditional_items_map_to_frequency,
                threshold, conditional_header_table)

            # header table with reverse ordering
            conditional_header_table.sort(key=lambda x: x.count, reverse=True)
            # children is not empty
            if conditional_fp_tree.children:
                self.fp_growth(current_pattern, threshold,
                               conditional_header_table, frequent_patterns)
Esempio n. 6
0
    def build_fp_tree(self, _input, items_map_to_frequencies,
                      sorted_items_by_frequencies, items_to_remove):
        # build header table
        # first element used as pointer
        self.header_table = []
        for itemsForTable in sorted_items_by_frequencies:
            self.header_table.append(FPTree(itemsForTable))

        self.fpTree = FPTree(None)
        self.fpTree.root = True

        transaction_sorted_by_frequency = []
        with open(_input) as input_file:
            for input_line in input_file:
                token_list = re.split(r'[\s,\r\n]+', input_line)
                # filter empty string
                token_list = filter(None, token_list)
                for item in token_list:
                    # ignore non-frequent items
                    if item in items_to_remove:
                        continue
                    index = 0
                    for string in transaction_sorted_by_frequency:
                        if items_map_to_frequencies[string] < items_map_to_frequencies[item] \
                                or (items_map_to_frequencies[string] == items_map_to_frequencies[item]
                                    and string.lower() < item.lower()):
                            transaction_sorted_by_frequency.insert(index, item)
                            break
                        index += 1

                    if item not in transaction_sorted_by_frequency:
                        transaction_sorted_by_frequency.append(item)

                # add to tree
                self.insert_into_tree(transaction_sorted_by_frequency,
                                      self.fpTree)
                transaction_sorted_by_frequency.clear()

            input_file.close()

            # header table with reversing order
            # first calculate the item frequencies in tree
            for item in self.header_table:
                count = 0
                temp_item = item
                while temp_item.next is not None:
                    temp_item = temp_item.next
                    count += temp_item.count
                item.count = count

            # sort header table
            self.header_table.sort(key=lambda x: x.count, reverse=True)
Esempio n. 7
0
    def fp_growth(self, base, threshold, header_table, frequent_patterns):
        for item_in_tree in header_table:
            current_pattern = (base if base is not None else "") + (" " if base is not None else "") + item_in_tree.item
            support_of_current_pattern = 0
            conditional_pattern_base = dict()
            while item_in_tree.next is not None:
                item_in_tree = item_in_tree.next
                support_of_current_pattern += item_in_tree.count
                conditional_pattern = None
                conditional_item = item_in_tree.parent

                while not conditional_item.is_root():
                    conditional_pattern = conditional_item.item + " " + (
                        conditional_pattern if conditional_pattern is not None else "")
                    conditional_item = conditional_item.parent

                if conditional_pattern is not None:
                    conditional_pattern_base[conditional_pattern] = item_in_tree.count

            frequent_patterns[tuple(current_pattern.split())] = support_of_current_pattern

            # counting frequencies of single items in conditional pattern-base
            conditional_items_map_to_frequency = dict()
            for conditional_pattern in conditional_pattern_base:
                split_conditional_pattern = conditional_pattern.split()
                for item in split_conditional_pattern:
                    if item in conditional_items_map_to_frequency:
                        count = conditional_items_map_to_frequency[item]
                        count += conditional_pattern_base[conditional_pattern]
                        conditional_items_map_to_frequency[item] = count
                    else:
                        conditional_items_map_to_frequency[item] = conditional_pattern_base[conditional_pattern]

            # create header table for conditional fp tree
            conditional_header_table = []
            for itemsForTable in conditional_items_map_to_frequency:
                count = conditional_items_map_to_frequency[itemsForTable]
                if count < threshold:
                    continue
                f = FPTree(itemsForTable)
                f.count = count
                conditional_header_table.append(f)

            conditional_fp_tree = self.build_conditional_fp_tree(conditional_pattern_base,
                                                                 conditional_items_map_to_frequency, threshold,
                                                                 conditional_header_table)

            # header table with reverse ordering
            conditional_header_table.sort(key=lambda x: x.count, reverse=True)
            # children is not empty
            if conditional_fp_tree.children:
                self.fp_growth(current_pattern, threshold, conditional_header_table, frequent_patterns)
Esempio n. 8
0
    def build_conditional_fp_tree(self, conditional_pattern_base, conditional_items_map_to_frequency, threshold,
                                  conditional_header_table):
        conditional_fp_tree = FPTree(None)
        conditional_fp_tree.root = True

        for pattern in conditional_pattern_base:
            # removing non-frequent pattern and make a list instead of string
            pattern_list = []
            split_pattern = pattern.split()
            for item in split_pattern:
                if conditional_items_map_to_frequency[item] >= threshold:
                    pattern_list.append(item)
            self.insert_into_conditional_fp_tree(pattern_list, conditional_pattern_base[pattern], conditional_fp_tree,
                                                 conditional_header_table)
        return conditional_fp_tree
Esempio n. 9
0
    def build_conditional_fp_tree(self, conditional_pattern_base,
                                  conditional_items_map_to_frequency,
                                  threshold, conditional_header_table):
        conditional_fp_tree = FPTree(None)
        conditional_fp_tree.root = True

        for pattern in conditional_pattern_base:
            # removing non-frequent pattern and make a list instead of string
            pattern_list = []
            split_pattern = pattern.split()
            for item in split_pattern:
                if conditional_items_map_to_frequency[item] >= threshold:
                    pattern_list.append(item)
            self.insert_into_conditional_fp_tree(
                pattern_list, conditional_pattern_base[pattern],
                conditional_fp_tree, conditional_header_table)
        return conditional_fp_tree