Example #1
0
 def __init__(self, url, min):
     self.__minSupport = min
     self.__sorter = LetterCount(url)
     self.__header = {}
     self.__fptree = None
Example #2
0
class FPtree:
    def __init__(self, url, min):
        self.__minSupport = min
        self.__sorter = LetterCount(url)
        self.__header = {}
        self.__fptree = None

    def growTree(self, data=None):
        # read data and call sort function to sort it
        # After finish this, we are at the step 4
        # flag is used to build conditional tree
        if data is None:
            sortList, lenght = self.__sorter.method1_hash(24)
            # store data back to frequency list and calcuate min support
            self.__minSupport = math.ceil(lenght * self.__minSupport)
        else:
            sortList = self.__sortedRead(data)

        if len(sortList) == 0:
            return None, None
        header = {}
        for i in range(len(sortList)):
            if sortList[i][1] >= self.__minSupport:
                #self.__header[sortList[i][0]] = [sortList[i][1], None]
                header[sortList[i][0]] = [sortList[i][1], None]

        # create root node
        root = Node('root', 1, None)

        # read data second time
        if data is None:
            file = self.__sorter.readfile()
        else:
            file = self.__originalRead(data)

        for linedata in file: # by line
            if data is None:
                count = 1
            else:
                count = file[linedata]
            tempdict = {}
            for letter in linedata: # by letter
                if header.has_key(letter):
                    # give counters for the temp dict
                    tempdict[letter] = header[letter][0]
                    # reorder the letter
            sortletter = [v[0] for v in sorted(tempdict.items(), key=lambda p: p[1], reverse=True)]
            self.__growing(sortletter, root, header, count)

        # Step 6 done

        if data is None:
            self.__header = header
            self.__fptree = root
        else:
            return root, header

    def __sortedRead(self, data):
        temp = {}
        for trans in data:
            for item in trans:
                temp[item] = temp.get(item, 0) + data[trans]
        sorted_list = [x for x in temp.iteritems()]

        return sorted_list

    def __originalRead(self, data):
        return data


    def __growing(self, letters, tree, header, count):
        # recursive grown tree
        if len(letters) != 0:
            if tree.has_key(letters[0]):
                tree.child[letters[0]].countAdd(1)
            else:
                tree.child[letters[0]] = Node(letters[0], count, tree)
                #update header
                if header[letters[0]][1] == None: # if this is new node, header will link to it
                    header[letters[0]][1] = tree.child[letters[0]]
                else:# otherwise, new node link to header's node, and header link to the new node
                    tree.child[letters[0]].nodeLink = header[letters[0]][1]
                    header[letters[0]][1] = tree.child[letters[0]]
            self.__growing(letters[1::], tree.child[letters[0]], header, count)

    def printHeader(self):
        print self.__header

    def __nodeToroot(self, node, prefixPath): #ascends from leaf node to root
        if node.parent != None:
            prefixPath.append(node.name)
            self.__nodeToroot(node.parent, prefixPath)

    def __findPath(self, Node):
        # node comes from header table
        # return the full path
        path = {}
        while Node != None:
            prefixPath = []
            self.__nodeToroot(Node, prefixPath)
            if len(prefixPath) > 1:
                path[frozenset(prefixPath[1:])] = Node.count
            Node = Node.nodeLink
        return path

    def minTree(self,  prex = None, freqList = None, header=None):
        # recursive find the pattern until all headers are gone
        # use set store items and avoid duplicate
        if header == None:
            prex = set([])
            freqList = []
            self.minTree(prex, freqList, header= self.__header)
            return freqList
        else:
            # sort according to low to high frequence
            sortHeader = [v[0] for v in sorted(header.items(), key=lambda x: x[1])]
            for letter in sortHeader: # sorted letters
                # copy the temp set, add new letter and store back to frequent list
                conditFreq = prex.copy()
                conditFreq.add(letter)
                freqList.append(conditFreq)
                condPattBases = self.__findPath(header[letter][1])
                tree, smallheader = self.growTree(data=condPattBases)
                # if smallheader has letters, we build the tree again
                if smallheader is not None:
                    self.minTree( conditFreq, freqList, header=smallheader)