コード例 #1
0
def generateFrequentItemSets(min_support, number_of_threads, start_k, end_k,
                             old_L_k_1):
    L = {}
    k = start_k
    L_k_1 = old_L_k_1

    while not L_k_1.isEmpty() and k <= end_k:

        print('extracting item-sets with ' + str(k) + ' items ....')

        #divide L_k_1 into n parts
        L_k = AprioriHashTable()
        sub_parts = L_k_1.separateToSubParts(number_of_threads)
        processes = []

        C_ks = []
        BaseManager.register("AprioriHash", AprioriHashTable)
        manager = BaseManager()
        manager.start()
        C_ks.append(manager.AprioriHash())

        index = 0
        for sub_L_k_1 in sub_parts:
            process_i = Process(target=runForFrequentItemsetsWithKItems,
                                args=(sub_L_k_1, k, min_support, C_ks[index]))
            processes.append(process_i)

        # wait for all thread completes
        for process_i in processes:
            process_i.start()
            process_i.join()

        for new_C_k in C_ks:
            L_k.merge(new_C_k)
        L_k_1.clear()
        L_k_1 = L_k

        insertHashIntoDictionary(L_k_1, L)
        k += 1
    print('stop at k = ' + str(k))
    return L_k_1, L
コード例 #2
0
 def generate_frequent_itemsets(self, min_sup, nthreads, end, output_file, write_support = False):
     
     '''
     Step 1: Generate frequent item-sets with 1 item and write to file
     '''
     nTransactions = self.data_set.size()
     with open(output_file, 'w') as text_file:
         text_file.write(str(nTransactions))
         text_file.write('\n')
     
     
     self.generate_L1(min_sup)
     freq_itemsets_dict = self.L1.generate_itemset_dictionary()
     freq_itemsets_dict.ntransactions = nTransactions
     freq_itemsets_dict.save_2_file(output_file, 'a', write_support)
     freq_itemsets_dict.clear()
     
     '''
     Step 2: Generate frequent item-sets with more than 1 item and append to the file
     '''
     k = 2    
     L_k1 = self.L1
     
     while not L_k1.is_empty() and (end == -1 or k <= end):
         
         print('extracting item-sets with ' + str(k) + ' items ....')
         
         '''
         Divide data into many parts and create processes to generate frequent item-sets
         '''
         L_k = HashTable()
         chunks = L_k1.split(nthreads)
         processes = []
         
         C_ks = []
         BaseManager.register("AprioriHash", HashTable)
         manager = BaseManager()
         manager.start()
         C_ks.append(manager.AprioriHash())
         
         index = 0
         for L_k_1_chunk in chunks:
             process_i = Process(target = Apriori.generate_Lk, 
                                 args=(min_sup, L_k_1_chunk,C_ks[index], k))
             processes.append(process_i)
             index += 1
         
         # wait for all thread completes
         for process_i in processes:
             process_i.start()
             process_i.join()
          
         '''
         Merge results which returns from processes
         '''
         for new_C_k in C_ks:
             L_k.append(new_C_k)
         L_k1.clear()
         L_k1 = L_k
 
         '''
         Append frequent item-sets with k items to file
         '''
         freq_itemsets_dict = L_k1.generate_itemset_dictionary()
         
         print ('Writing frequent itemset to file ' + str(freq_itemsets_dict.size()))
         freq_itemsets_dict.ntransactions = nTransactions
         freq_itemsets_dict.save_2_file(output_file, 'a', write_support)
         freq_itemsets_dict.clear()
         
         k += 1
         
     print ('stop at k = ' + str(k))