def generateMinPDist(transaction_list, trans4lcm, threshold, set_method, lcm_path, \ max_comb, permute_num, outlog, alternative): # sys.stderr.write("--- original dataset ---\n") # for j in transaction_list: # j.output() # sys.stderr.write("------\n") starttime = time.time() # Initialize the apriori and functinos using LAMP. fre_pattern, lam_star, max_lambda, correction_term_time, func_f \ = lamp.runMultTest( transaction_list, trans4lcm, threshold, set_method, \ lcm_path, max_comb, outlog, alternative ) # calculate the set of minimum p-values using permuted data min_p_list = [] # the list stores the minimum p-values org_values_list = getValuesList( transaction_list) # Raw (non-permuted) dataset # estimate the probability distribution of the minimum p-value using permuted datasets. for i in xrange(0, permute_num): per_start = time.time() permute_transaction_list, org2shuffled_list = permuteData( transaction_list, org_values_list) # generate the permuted dataset. # for j in permute_transaction_list: # sys.stderr.write("%s %s " % (j.id, j.name)) # sys.stderr.write("%s" % j.itemset) # sys.stderr.write(" %s\n" % j.value) func_f.calTime = 0 min_p, low_sup, freq_time = calculateMinimumPValue( permute_transaction_list, trans4lcm, fre_pattern, func_f, max_comb, org2shuffled_list) per_time = time.time() - per_start if (i == 0): per_time = time.time() - starttime min_p_list.append( tuple([ min_p, low_sup, fre_pattern.getTotal(low_sup), freq_time, per_time, func_f.calTime ])) outlog.write( "[permute %s] minP %s, minSupport %s, totalTest %s, freqTime %s, totalTime %s, #ofPvalue %s\n" \ % (i, min_p_list[i][0], min_p_list[i][1], min_p_list[i][2], \ min_p_list[i][3], min_p_list[i][4], min_p_list[i][5])) return min_p_list, fre_pattern, func_f
def generateMinPDist(transaction_list, trans4lcm, threshold, set_method, lcm_path, \ max_comb, permute_num, outlog, alternative): # sys.stderr.write("--- original dataset ---\n") # for j in transaction_list: # j.output() # sys.stderr.write("------\n") starttime = time.time() # Initialize the apriori and functinos using LAMP. fre_pattern, lam_star, max_lambda, correction_term_time, func_f \ = lamp.runMultTest( transaction_list, trans4lcm, threshold, set_method, \ lcm_path, max_comb, outlog, alternative ) # calculate the set of minimum p-values using permuted data min_p_list = [] # the list stores the minimum p-values org_values_list = getValuesList( transaction_list ) # Raw (non-permuted) dataset # estimate the probability distribution of the minimum p-value using permuted datasets. for i in xrange( 0, permute_num ): per_start = time.time() permute_transaction_list, org2shuffled_list = permuteData( transaction_list, org_values_list ) # generate the permuted dataset. # for j in permute_transaction_list: # sys.stderr.write("%s %s " % (j.id, j.name)) # sys.stderr.write("%s" % j.itemset) # sys.stderr.write(" %s\n" % j.value) func_f.calTime = 0 min_p, low_sup, freq_time = calculateMinimumPValue( permute_transaction_list, trans4lcm, fre_pattern, func_f, max_comb, org2shuffled_list ) per_time = time.time() - per_start if ( i == 0 ): per_time = time.time() - starttime min_p_list.append( tuple( [ min_p, low_sup, fre_pattern.getTotal( low_sup ), freq_time, per_time, func_f.calTime ] ) ) outlog.write( "[permute %s] minP %s, minSupport %s, totalTest %s, freqTime %s, totalTime %s, #ofPvalue %s\n" \ % (i, min_p_list[i][0], min_p_list[i][1], min_p_list[i][2], \ min_p_list[i][3], min_p_list[i][4], min_p_list[i][5])) return min_p_list, fre_pattern, func_f
max_comb = lamp.convertMaxComb( max_comb, len(columnid2name) ) except ValueError, e: return except KeyError, e: return # run multiple test transaction4lcm53 = transaction_file + ".4lcm53" # run try: outlog = open( log_file, 'w' ) starttime = time.time() sys.stderr.write( "Compute the optimal correction factor ..." ) fre_pattern, lam_star, max_lambda, correction_term_time, func_f \ = lamp.runMultTest(transaction_list, transaction4lcm53, threshold, \ set_method, lcm_path, max_comb, outlog, alternative) k = fre_pattern.getTotal( lam_star ) sys.stderr.write( " %s\n" % k ) sys.stderr.write( "Compute P-values of testable combinations ...\n" ) enrich_lst, finish_test_time \ = lamp.fwerControll(transaction_list, fre_pattern, lam_star, max_lambda, \ threshold, func_f, columnid2name, outlog) outlog.close() except IOError, e: outlog.close() sys.stderr.write( "Output results ...\n" ) # output result result = makeResult(transaction_file, flag_file, threshold, set_method, max_comb, \ columnid2name, lam_star, k, enrich_lst, transaction_list, func_f, alternative )
max_comb = lamp.convertMaxComb(max_comb, len(columnid2name)) except ValueError, e: return except KeyError, e: return # run multiple test transaction4lcm53 = transaction_file + ".4lcm53" # run try: outlog = open(log_file, 'w') starttime = time.time() sys.stderr.write("Compute the optimal correction factor ...") fre_pattern, lam_star, max_lambda, correction_term_time, func_f \ = lamp.runMultTest(transaction_list, transaction4lcm53, threshold, \ set_method, lcm_path, max_comb, outlog, alternative) k = fre_pattern.getTotal(lam_star) sys.stderr.write(" %s\n" % k) sys.stderr.write("Compute P-values of testable combinations ...\n") enrich_lst, finish_test_time \ = lamp.fwerControll(transaction_list, fre_pattern, lam_star, max_lambda, \ threshold, func_f, columnid2name, outlog) outlog.close() except IOError, e: outlog.close() sys.stderr.write("Output results ...\n") # output result result = makeResult(transaction_file, flag_file, threshold, set_method, max_comb, \ columnid2name, lam_star, k, enrich_lst, transaction_list, func_f, alternative )