예제 #1
0
    def get_large_sets(self):
        """
        Calculates large sets from transactions. Returns dictionary where the key is arity and the value is list of item sets.
        """
        large_sets = {}
        counter = self.__count_items_in_transactions(self.__transactions)
        minsup_count = self.__get_minsup_count()
        L1 = self.__getL1(counter, minsup_count)

        self.root = Root(self.__minsup, len(self.__transactions), L1,
                         self.__partial)
        self.root.update_states(0)

        parts_no = math.ceil(float(len(self.__transactions)) / float(self.__M))
        finished = False
        pass_counter = 0

        if self.__randomize:
            transaction_order = random.sample(xrange(len(self.__transactions)),
                                              len(self.__transactions))
        else:
            transaction_order = xrange(len(self.__transactions))

        if self.__partial:
            M = self.__M
        else:
            M = int(math.ceil(len(self.__transactions) * self.__minsup))
        M = self.__M

        while not finished:
            trans_count = 0
            for i in transaction_order:
                transaction = self.__transactions.get(i)
                self.root.increment(transaction)
                trans_count += 1
                if (i + 1) % M == 0 or (i + 1) == len(self.__transactions):
                    finished = self.root.update_states(trans_count)
                    trans_count = 0
                    if finished:
                        break
                    M = self.__M

            pass_counter += 1

        #self.root.print_node()
        #print "passes: %d" % (pass_counter,)

        large_sets = self.root.get_large_sets(large_sets)
        return large_sets
예제 #2
0
    def get_large_sets(self):
        """
        Calculates large sets from transactions. Returns dictionary where the key is arity and the value is list of item sets.
        """
        large_sets = {}
        counter = self.__count_items_in_transactions(self.__transactions)
        minsup_count = self.__get_minsup_count()
        L1 = self.__getL1(counter, minsup_count)

        self.root = Root(self.__minsup, len(self.__transactions), L1, self.__partial)
        self.root.update_states(0)
        
        parts_no = math.ceil(float(len(self.__transactions)) / float(self.__M))
        finished = False
        pass_counter = 0
        
        if self.__randomize:
            transaction_order = random.sample(xrange(len(self.__transactions)), len(self.__transactions))
        else:
            transaction_order = xrange(len(self.__transactions))
        
        if  self.__partial:
            M =  self.__M
        else:
            M = int(math.ceil(len(self.__transactions) * self.__minsup))
        M =  self.__M
        
        while not finished:
            trans_count = 0
            for i in transaction_order:
                transaction = self.__transactions.get(i)
                self.root.increment(transaction)
                trans_count += 1
                if (i+1) % M == 0 or (i+1) == len(self.__transactions):
                    finished = self.root.update_states(trans_count)                    
                    trans_count = 0
                    if finished:
                        break
                    M =  self.__M
            
            pass_counter += 1

        #self.root.print_node()
        #print "passes: %d" % (pass_counter,)

        large_sets = self.root.get_large_sets(large_sets)
        return large_sets
예제 #3
0
class Dic(object):
    def __init__(self, transactions, minsup, M, randomize, partial):
        self.__minsup = minsup
        self.__transactions = transactions
        if M == 0:
            self.__M = int(math.ceil(len(transactions) * minsup))
        elif M < 0:
            self.__M = -M * int(math.ceil(len(transactions) * minsup))
        elif M > 0:
            self.__M = M
        self.__minsup_count = self.__get_minsup_count()
        self.__randomize = randomize
        self.__partial = partial

    def get_large_sets(self):
        """
        Calculates large sets from transactions. Returns dictionary where the key is arity and the value is list of item sets.
        """
        large_sets = {}
        counter = self.__count_items_in_transactions(self.__transactions)
        minsup_count = self.__get_minsup_count()
        L1 = self.__getL1(counter, minsup_count)

        self.root = Root(self.__minsup, len(self.__transactions), L1, self.__partial)
        self.root.update_states(0)
        
        parts_no = math.ceil(float(len(self.__transactions)) / float(self.__M))
        finished = False
        pass_counter = 0
        
        if self.__randomize:
            transaction_order = random.sample(xrange(len(self.__transactions)), len(self.__transactions))
        else:
            transaction_order = xrange(len(self.__transactions))
        
        if  self.__partial:
            M =  self.__M
        else:
            M = int(math.ceil(len(self.__transactions) * self.__minsup))
        M =  self.__M
        
        while not finished:
            trans_count = 0
            for i in transaction_order:
                transaction = self.__transactions.get(i)
                self.root.increment(transaction)
                trans_count += 1
                if (i+1) % M == 0 or (i+1) == len(self.__transactions):
                    finished = self.root.update_states(trans_count)                    
                    trans_count = 0
                    if finished:
                        break
                    M =  self.__M
            
            pass_counter += 1

        #self.root.print_node()
        #print "passes: %d" % (pass_counter,)

        large_sets = self.root.get_large_sets(large_sets)
        return large_sets

    def get_counter(self):
        return DicCounter(self.root)
        
    def get_large_sets_and_counter(self):
        return self.get_large_sets(), self.get_counter()

    @staticmethod
    def __count_items_in_transactions(transactions):
        """
        Counts items and returns dictionary with them as keys and their count as value.
        """
        counter = defaultdict(int)
        for transaction in transactions:
            for item in transaction:
                counter[(item,)] += 1
        return counter

    def __get_minsup_count(self):
        """
        Calculates and returns miniumum support given in number of transactions.
        """
        return int(len(self.__transactions) * self.__minsup)

    @staticmethod
    def __getL1(counter, minsup_count):
        """
        Calculates and returns first large set.
        """
        L1 = {}
        for k, v in counter.iteritems():
            if v >= minsup_count:           
                L1[k] = v
        return L1
예제 #4
0
class Dic(object):
    def __init__(self, transactions, minsup, M, randomize, partial):
        self.__minsup = minsup
        self.__transactions = transactions
        if M == 0:
            self.__M = int(math.ceil(len(transactions) * minsup))
        elif M < 0:
            self.__M = -M * int(math.ceil(len(transactions) * minsup))
        elif M > 0:
            self.__M = M
        self.__minsup_count = self.__get_minsup_count()
        self.__randomize = randomize
        self.__partial = partial

    def get_large_sets(self):
        """
        Calculates large sets from transactions. Returns dictionary where the key is arity and the value is list of item sets.
        """
        large_sets = {}
        counter = self.__count_items_in_transactions(self.__transactions)
        minsup_count = self.__get_minsup_count()
        L1 = self.__getL1(counter, minsup_count)

        self.root = Root(self.__minsup, len(self.__transactions), L1,
                         self.__partial)
        self.root.update_states(0)

        parts_no = math.ceil(float(len(self.__transactions)) / float(self.__M))
        finished = False
        pass_counter = 0

        if self.__randomize:
            transaction_order = random.sample(xrange(len(self.__transactions)),
                                              len(self.__transactions))
        else:
            transaction_order = xrange(len(self.__transactions))

        if self.__partial:
            M = self.__M
        else:
            M = int(math.ceil(len(self.__transactions) * self.__minsup))
        M = self.__M

        while not finished:
            trans_count = 0
            for i in transaction_order:
                transaction = self.__transactions.get(i)
                self.root.increment(transaction)
                trans_count += 1
                if (i + 1) % M == 0 or (i + 1) == len(self.__transactions):
                    finished = self.root.update_states(trans_count)
                    trans_count = 0
                    if finished:
                        break
                    M = self.__M

            pass_counter += 1

        #self.root.print_node()
        #print "passes: %d" % (pass_counter,)

        large_sets = self.root.get_large_sets(large_sets)
        return large_sets

    def get_counter(self):
        return DicCounter(self.root)

    def get_large_sets_and_counter(self):
        return self.get_large_sets(), self.get_counter()

    @staticmethod
    def __count_items_in_transactions(transactions):
        """
        Counts items and returns dictionary with them as keys and their count as value.
        """
        counter = defaultdict(int)
        for transaction in transactions:
            for item in transaction:
                counter[(item, )] += 1
        return counter

    def __get_minsup_count(self):
        """
        Calculates and returns miniumum support given in number of transactions.
        """
        return int(len(self.__transactions) * self.__minsup)

    @staticmethod
    def __getL1(counter, minsup_count):
        """
        Calculates and returns first large set.
        """
        L1 = {}
        for k, v in counter.iteritems():
            if v >= minsup_count:
                L1[k] = v
        return L1