def write_frequent_itemsets(input_path, output_path, support=-10, min_set_size=1, max_set_size=3): # parse transactions from file transactions = parser.parse_csv_to_mat(input_path) # mine frequent itemsets frequent_itemsets = fpgrowth(transactions, supp=support, min=min_set_size, max=max_set_size) # write result to file with open(output_path, 'w+') as fd: pickle.dump(frequent_itemsets, fd)
def item_stats(): """ Plot stats on frequent itemset occurences """ transactions = parser.parse_csv_to_mat('/Users/ahkj/Dropbox/SAAS/data/csv/sample-big/customers.txt') frequent_itemsets = fpgrowth(transactions, supp=0.0005, max=3 ) frequencies_1=[] frequencies_2 =[] frequencies_3 = [] for frequent_itemset in frequent_itemsets: if len(frequent_itemset[0])==1: frequencies_1.append(frequent_itemset[1][0]) elif len(frequent_itemset[0])==2: frequencies_2.append(frequent_itemset[1][0]) elif len(frequent_itemset[0])==3: frequencies_3.append(frequent_itemset[1][0]) frequencies_counts_1 = [0 for x in range(max(frequencies_1)+1)] frequencies_counts_2 = [0 for x in range(max(frequencies_2)+1)] frequencies_counts_3 = [0 for x in range(max(frequencies_3)+1)] for frequencie in frequencies_1: frequencies_counts_1[frequencie]+=1 for frequencie in frequencies_2: frequencies_counts_2[frequencie]+=1 for frequencie in frequencies_3: frequencies_counts_3[frequencie]+=1 cleaned_ys_1 = frequencies_counts_1[0:30] xs_1 =[x for x in range(len(cleaned_ys_1))] plt.scatter(xs_1, cleaned_ys_1) plot_item_stats(xs_1, cleaned_ys_1, '../tmp/plots/item_stats/signletons.png') cleaned_ys_2 = frequencies_counts_2[0:30] xs_2 =[x for x in range(len(cleaned_ys_2))] plot_item_stats(xs_2, cleaned_ys_2, '../tmp/plots/item_stats/pairs.png') cleaned_ys_3 = frequencies_counts_3[0:30] xs_3 =[x for x in range(len(cleaned_ys_3))] plot_item_stats(xs_3, cleaned_ys_3, '../tmp/plots/item_stats/triples.png') # item_stats()