def get_large_sets(self): """ Calculates large sets from transactions. Returns dictionary where the key is arity and the value is list of item sets. """ large_sets = {} counter = self.__count_items_in_transactions(self.__transactions) minsup_count = self.__get_minsup_count() L1 = self.__getL1(counter, minsup_count) self.root = Root(self.__minsup, len(self.__transactions), L1, self.__partial) self.root.update_states(0) parts_no = math.ceil(float(len(self.__transactions)) / float(self.__M)) finished = False pass_counter = 0 if self.__randomize: transaction_order = random.sample(xrange(len(self.__transactions)), len(self.__transactions)) else: transaction_order = xrange(len(self.__transactions)) if self.__partial: M = self.__M else: M = int(math.ceil(len(self.__transactions) * self.__minsup)) M = self.__M while not finished: trans_count = 0 for i in transaction_order: transaction = self.__transactions.get(i) self.root.increment(transaction) trans_count += 1 if (i + 1) % M == 0 or (i + 1) == len(self.__transactions): finished = self.root.update_states(trans_count) trans_count = 0 if finished: break M = self.__M pass_counter += 1 #self.root.print_node() #print "passes: %d" % (pass_counter,) large_sets = self.root.get_large_sets(large_sets) return large_sets
def get_large_sets(self): """ Calculates large sets from transactions. Returns dictionary where the key is arity and the value is list of item sets. """ large_sets = {} counter = self.__count_items_in_transactions(self.__transactions) minsup_count = self.__get_minsup_count() L1 = self.__getL1(counter, minsup_count) self.root = Root(self.__minsup, len(self.__transactions), L1, self.__partial) self.root.update_states(0) parts_no = math.ceil(float(len(self.__transactions)) / float(self.__M)) finished = False pass_counter = 0 if self.__randomize: transaction_order = random.sample(xrange(len(self.__transactions)), len(self.__transactions)) else: transaction_order = xrange(len(self.__transactions)) if self.__partial: M = self.__M else: M = int(math.ceil(len(self.__transactions) * self.__minsup)) M = self.__M while not finished: trans_count = 0 for i in transaction_order: transaction = self.__transactions.get(i) self.root.increment(transaction) trans_count += 1 if (i+1) % M == 0 or (i+1) == len(self.__transactions): finished = self.root.update_states(trans_count) trans_count = 0 if finished: break M = self.__M pass_counter += 1 #self.root.print_node() #print "passes: %d" % (pass_counter,) large_sets = self.root.get_large_sets(large_sets) return large_sets
class Dic(object): def __init__(self, transactions, minsup, M, randomize, partial): self.__minsup = minsup self.__transactions = transactions if M == 0: self.__M = int(math.ceil(len(transactions) * minsup)) elif M < 0: self.__M = -M * int(math.ceil(len(transactions) * minsup)) elif M > 0: self.__M = M self.__minsup_count = self.__get_minsup_count() self.__randomize = randomize self.__partial = partial def get_large_sets(self): """ Calculates large sets from transactions. Returns dictionary where the key is arity and the value is list of item sets. """ large_sets = {} counter = self.__count_items_in_transactions(self.__transactions) minsup_count = self.__get_minsup_count() L1 = self.__getL1(counter, minsup_count) self.root = Root(self.__minsup, len(self.__transactions), L1, self.__partial) self.root.update_states(0) parts_no = math.ceil(float(len(self.__transactions)) / float(self.__M)) finished = False pass_counter = 0 if self.__randomize: transaction_order = random.sample(xrange(len(self.__transactions)), len(self.__transactions)) else: transaction_order = xrange(len(self.__transactions)) if self.__partial: M = self.__M else: M = int(math.ceil(len(self.__transactions) * self.__minsup)) M = self.__M while not finished: trans_count = 0 for i in transaction_order: transaction = self.__transactions.get(i) self.root.increment(transaction) trans_count += 1 if (i+1) % M == 0 or (i+1) == len(self.__transactions): finished = self.root.update_states(trans_count) trans_count = 0 if finished: break M = self.__M pass_counter += 1 #self.root.print_node() #print "passes: %d" % (pass_counter,) large_sets = self.root.get_large_sets(large_sets) return large_sets def get_counter(self): return DicCounter(self.root) def get_large_sets_and_counter(self): return self.get_large_sets(), self.get_counter() @staticmethod def __count_items_in_transactions(transactions): """ Counts items and returns dictionary with them as keys and their count as value. """ counter = defaultdict(int) for transaction in transactions: for item in transaction: counter[(item,)] += 1 return counter def __get_minsup_count(self): """ Calculates and returns miniumum support given in number of transactions. """ return int(len(self.__transactions) * self.__minsup) @staticmethod def __getL1(counter, minsup_count): """ Calculates and returns first large set. """ L1 = {} for k, v in counter.iteritems(): if v >= minsup_count: L1[k] = v return L1
class Dic(object): def __init__(self, transactions, minsup, M, randomize, partial): self.__minsup = minsup self.__transactions = transactions if M == 0: self.__M = int(math.ceil(len(transactions) * minsup)) elif M < 0: self.__M = -M * int(math.ceil(len(transactions) * minsup)) elif M > 0: self.__M = M self.__minsup_count = self.__get_minsup_count() self.__randomize = randomize self.__partial = partial def get_large_sets(self): """ Calculates large sets from transactions. Returns dictionary where the key is arity and the value is list of item sets. """ large_sets = {} counter = self.__count_items_in_transactions(self.__transactions) minsup_count = self.__get_minsup_count() L1 = self.__getL1(counter, minsup_count) self.root = Root(self.__minsup, len(self.__transactions), L1, self.__partial) self.root.update_states(0) parts_no = math.ceil(float(len(self.__transactions)) / float(self.__M)) finished = False pass_counter = 0 if self.__randomize: transaction_order = random.sample(xrange(len(self.__transactions)), len(self.__transactions)) else: transaction_order = xrange(len(self.__transactions)) if self.__partial: M = self.__M else: M = int(math.ceil(len(self.__transactions) * self.__minsup)) M = self.__M while not finished: trans_count = 0 for i in transaction_order: transaction = self.__transactions.get(i) self.root.increment(transaction) trans_count += 1 if (i + 1) % M == 0 or (i + 1) == len(self.__transactions): finished = self.root.update_states(trans_count) trans_count = 0 if finished: break M = self.__M pass_counter += 1 #self.root.print_node() #print "passes: %d" % (pass_counter,) large_sets = self.root.get_large_sets(large_sets) return large_sets def get_counter(self): return DicCounter(self.root) def get_large_sets_and_counter(self): return self.get_large_sets(), self.get_counter() @staticmethod def __count_items_in_transactions(transactions): """ Counts items and returns dictionary with them as keys and their count as value. """ counter = defaultdict(int) for transaction in transactions: for item in transaction: counter[(item, )] += 1 return counter def __get_minsup_count(self): """ Calculates and returns miniumum support given in number of transactions. """ return int(len(self.__transactions) * self.__minsup) @staticmethod def __getL1(counter, minsup_count): """ Calculates and returns first large set. """ L1 = {} for k, v in counter.iteritems(): if v >= minsup_count: L1[k] = v return L1