def find_frequent_itemsets(transactions, minimum_support, include_support=False): #This will have items in the sorted order that will help finding Ored FreqItemsets global itemsAvail global noOfTransactions """ Find frequent itemsets in the given transactions using FP-growth. This function returns a generator instead of an eagerly-populated list of items. The `transactions` parameter can be any iterable of iterables of items. `minimum_support` should be an integer specifying the minimum number of occurrences of an itemset for it to be accepted. Each item must be hashable (i.e., it must be valid as a member of a dictionary or a set). If `include_support` is true, yield (itemset, support) pairs instead of just the itemsets. """ processed_transactions = [] global itemSupport # Load the passed-in transactions and count the support that individual # items have. for transaction in transactions: noOfTransactions += 1 processed = [] for item in transaction: if item not in itemsAvail: itemsAvail.append(item) itemSupport[item] += 1 processed.append(item) processed_transactions.append(processed) #Sorting the list of items itemsAvail.sort(key=lambda v: itemSupport[v], reverse=False) #Updating itemsAvailIndex for i in range(0,len(itemsAvail)): itemsAvailIndex[itemsAvail[i]] = i # Remove infrequent items from the item support dictionary. #itemSupport = dict((item, support) for item, support in itemSupport.iteritems() #if support >= minimum_support) # Build our FP-tree. Before any transactions can be added to the tree, they # must be stripped of infrequent items and their surviving items must be # sorted in decreasing order of frequency. master = FPTree() #Runs clean_transaction on every processed_transactions and returns as a tuple for transaction in imap(clean_transaction, processed_transactions): master.add(transaction) #Can Further Check for Confidence Values supportedItemSets = [] for itemset in find_with_suffix(master, [], minimum_support): supportedItemSets.append(itemset) return supportedItemSets,master,itemSupport
def conditional_tree_from_paths(paths, minimum_support): """Builds a conditional FP-tree from the given prefix paths.""" tree = FPTree() condition_item = None items = set() # Import the nodes in the paths into the new tree. Only the counts of the # leaf notes matter; the remaining counts will be reconstructed from the # leaf counts. for path in paths: if condition_item is None: condition_item = path[-1].item point = tree.root for node in path: next_point = point.search(node.item) if not next_point: # Add a new node to the tree. items.add(node.item) count = node.count if node.item == condition_item else 0 next_point = FPNode(tree, node.item, count) point.add(next_point) tree._update_route(next_point) point = next_point assert condition_item is not None # Calculate the counts of the non-leaf nodes. for path in tree.prefix_paths(condition_item): count = path[-1].count for node in reversed(path[:-1]): node._count += count # Eliminate the nodes for any items that are no longer frequent. for item in items: support = sum(n.count for n in tree.nodes(item)) if support < minimum_support: # Doesn't make the cut anymore for node in tree.nodes(item): if node.parent is not None: node.parent.remove(node) # Finally, remove the nodes corresponding to the item for which this # conditional tree was generated. for node in tree.nodes(condition_item): if node.parent is not None: # the node might already be an orphan node.parent.remove(node) return tree