def generateFrequentItemSets(min_support, number_of_threads, start_k, end_k, old_L_k_1): L = {} k = start_k L_k_1 = old_L_k_1 while not L_k_1.isEmpty() and k <= end_k: print('extracting item-sets with ' + str(k) + ' items ....') #divide L_k_1 into n parts L_k = AprioriHashTable() sub_parts = L_k_1.separateToSubParts(number_of_threads) processes = [] C_ks = [] BaseManager.register("AprioriHash", AprioriHashTable) manager = BaseManager() manager.start() C_ks.append(manager.AprioriHash()) index = 0 for sub_L_k_1 in sub_parts: process_i = Process(target=runForFrequentItemsetsWithKItems, args=(sub_L_k_1, k, min_support, C_ks[index])) processes.append(process_i) # wait for all thread completes for process_i in processes: process_i.start() process_i.join() for new_C_k in C_ks: L_k.merge(new_C_k) L_k_1.clear() L_k_1 = L_k insertHashIntoDictionary(L_k_1, L) k += 1 print('stop at k = ' + str(k)) return L_k_1, L
def generate_frequent_itemsets(self, min_sup, nthreads, end, output_file, write_support = False): ''' Step 1: Generate frequent item-sets with 1 item and write to file ''' nTransactions = self.data_set.size() with open(output_file, 'w') as text_file: text_file.write(str(nTransactions)) text_file.write('\n') self.generate_L1(min_sup) freq_itemsets_dict = self.L1.generate_itemset_dictionary() freq_itemsets_dict.ntransactions = nTransactions freq_itemsets_dict.save_2_file(output_file, 'a', write_support) freq_itemsets_dict.clear() ''' Step 2: Generate frequent item-sets with more than 1 item and append to the file ''' k = 2 L_k1 = self.L1 while not L_k1.is_empty() and (end == -1 or k <= end): print('extracting item-sets with ' + str(k) + ' items ....') ''' Divide data into many parts and create processes to generate frequent item-sets ''' L_k = HashTable() chunks = L_k1.split(nthreads) processes = [] C_ks = [] BaseManager.register("AprioriHash", HashTable) manager = BaseManager() manager.start() C_ks.append(manager.AprioriHash()) index = 0 for L_k_1_chunk in chunks: process_i = Process(target = Apriori.generate_Lk, args=(min_sup, L_k_1_chunk,C_ks[index], k)) processes.append(process_i) index += 1 # wait for all thread completes for process_i in processes: process_i.start() process_i.join() ''' Merge results which returns from processes ''' for new_C_k in C_ks: L_k.append(new_C_k) L_k1.clear() L_k1 = L_k ''' Append frequent item-sets with k items to file ''' freq_itemsets_dict = L_k1.generate_itemset_dictionary() print ('Writing frequent itemset to file ' + str(freq_itemsets_dict.size())) freq_itemsets_dict.ntransactions = nTransactions freq_itemsets_dict.save_2_file(output_file, 'a', write_support) freq_itemsets_dict.clear() k += 1 print ('stop at k = ' + str(k))