def run(xls_file, value_file, itemset_str_lst, delimiter, alternative): global readFile import readFile transaction_list, columnid2name = readFile.readFiles( xls_file, value_file, delimiter) if alternative < 0: global lamp from lamp import reverseValue transaction_list = reverseValue(transaction_list, "u_test") func = FunctionOfX(transaction_list, alternative) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in range(len(transaction_list)): t = transaction_list[i] if len(itemset & t.itemset) == len(itemset): flag_transactions_id.append(i) p_value, stat_score = func.calPValue(transaction_list, flag_transactions_id) # print i # print item_id # print columnid2name[item_id] # p, stat_score = func.calPValue(transaction_list, itemset) n = len(transaction_list) sys.stdout.write("p-value: %g (N: %s, x: %s, z-score: %f)\n" \ % (p_value, n, len(flag_transactions_id), stat_score)) return (p_value, len(flag_transactions_id))
def run(xls_file, value_file, itemset_str_lst, delimiter, alternative): global readFile import readFile transaction_list, columnid2name = readFile.readFiles( xls_file, value_file, delimiter) max_lambda = maxLambda(transaction_list) if alternative < 0: global lamp from lamp import reverseValue transaction_list = reverseValue(transaction_list, "fisher") func = FunctionOfX(transaction_list, max_lambda, abs(alternative)) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in range(len(transaction_list)): t = transaction_list[i] if len(itemset & t.itemset) == len(itemset): flag_transactions_id.append(i) p_value, stat_value = func.calPValue(transaction_list, flag_transactions_id) n = len(transaction_list) n1 = func.getN1() sys.stdout.write("p-value: %s (N: %s, n1: %s, x: %s, a: %s)\n" % (p_value, n, n1, len(flag_transactions_id), stat_value)) return (p_value, len(flag_transactions_id))
def run(xls_file, value_file, itemset_str_lst, delimiter, alternative): global readFile import readFile transaction_list, columnid2name = readFile.readFiles(xls_file, value_file, delimiter) max_lambda = maxLambda(transaction_list) if alternative < 0: global lamp from lamp import reverseValue transaction_list = reverseValue( transaction_list, "fisher" ) func = FunctionOfX(transaction_list, max_lambda, abs( alternative ) ) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in xrange( len(transaction_list) ): t = transaction_list[i] if len( itemset & t.itemset ) == len(itemset): flag_transactions_id.append( i ) p_value, stat_value = func.calPValue(transaction_list, flag_transactions_id) n = len(transaction_list) n1 = func.getN1() sys.stdout.write("p-value: %s (N: %s, n1: %s, x: %s, a: %s)\n" % (p_value, n, n1, len(flag_transactions_id), stat_value)) return (p_value, len(flag_transactions_id))
def run(xls_file, value_file, itemset_str_lst, delimiter, alternative): global readFile import readFile transaction_list, columnid2name = readFile.readFiles(xls_file, value_file, delimiter) if alternative < 0: global lamp from lamp import reverseValue transaction_list = reverseValue(transaction_list, "u_test") func = FunctionOfX(transaction_list, alternative) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in xrange(len(transaction_list)): t = transaction_list[i] if len(itemset & t.itemset) == len(itemset): flag_transactions_id.append(i) p_value, stat_score = func.calPValue(transaction_list, flag_transactions_id) # print i # print item_id # print columnid2name[item_id] # p, stat_score = func.calPValue(transaction_list, itemset) n = len(transaction_list) sys.stdout.write("p-value: %g (N: %s, x: %s, z-score: %f)\n" % (p_value, n, len(flag_transactions_id), stat_score)) return (p_value, len(flag_transactions_id))
def run(transaction_file, flag_file, threshold, k, set_method, lcm_path, max_comb, log_file, alternative): # read 2 files and get transaction list sys.stderr.write( "Read input files ...\n" ) transaction_list = set() try: transaction_list, columnid2name = readFile.readFiles(transaction_file, flag_file, ",") # If the alternative hypothesis is 'less', # the positive and negative of observe values are reversed, # and conduct the identical procedure to 'greater'. if alternative < 0: transaction_list = lamp.reverseValue( transaction_list, set_method ) max_comb = lamp.convertMaxComb( max_comb, len(columnid2name) ) except ValueError as e: return except KeyError as e: return trans4lcm = transaction_file + ".4lcm53" # the filename for outputting logs # run multiple test try: outlog = open( log_file, 'w' ) except IOError as e: outlog.close() start_time = time.time() # generate null distribution sys.stderr.write( "Calculate the minimum p-value distribution using the permutation test ...\n" ) outlog.write("Calculate the minimum p-value distribution using the permutation test ...\n") min_p_list, fre_pattern, func_f = \ generateMinPDist(transaction_list, trans4lcm, threshold, set_method, \ lcm_path, max_comb, k, outlog, alternative) # adjusted significance level outlog.write("Adjust significance level ...\n") adjusted_threshold, sorted_min_p_list = adjustedThreshold( min_p_list, threshold, k ) outlog.write("Adjusted significance level: %s\n" % adjusted_threshold) correction_term_time = time.time() # enumerate combination whose P-value up to adjusted threshold outlog.write("Calculate the p-values in the given data set ...\n") enrich_lst, time_enumerate_freq, time_enumerate_total = \ enumerateSigComb( transaction_list, trans4lcm, fre_pattern, func_f, \ max_comb, adjusted_threshold, outlog ) finish_test_time = time.time() # output the significant combinations outputResult( transaction_file, flag_file, threshold, k, set_method, max_comb, columnid2name, \ enrich_lst, adjusted_threshold, transaction_list, func_f, sorted_min_p_list, alternative ) # output time cost sys.stdout.write("Time (sec.): Computing correction factor %.3f, Enumerating significant combinations %.3f, Total %.3f\n" \ % (correction_term_time-start_time, time_enumerate_total, finish_test_time - start_time)) # output the minimum P-values outputMinP( min_p_list ) outlog.close() return enrich_lst, adjusted_threshold, columnid2name
def run(transaction_file, flag_file, threshold, k, set_method, lcm_path, max_comb, log_file, alternative): # read 2 files and get transaction list sys.stderr.write( "Read input files ...\n" ) transaction_list = set() try: transaction_list, columnid2name = readFile.readFiles(transaction_file, flag_file, ",") # If the alternative hypothesis is 'less', # the positive and negative of observe values are reversed, # and conduct the identical procedure to 'greater'. if alternative < 0: transaction_list = lamp.reverseValue( transaction_list, set_method ) max_comb = lamp.convertMaxComb( max_comb, len(columnid2name) ) except ValueError, e: return
def run(transaction_file, flag_file, threshold, k, set_method, lcm_path, max_comb, log_file, alternative): # read 2 files and get transaction list sys.stderr.write("Read input files ...\n") transaction_list = set() try: transaction_list, columnid2name = readFile.readFiles( transaction_file, flag_file, ",") # If the alternative hypothesis is 'less', # the positive and negative of observe values are reversed, # and conduct the identical procedure to 'greater'. if alternative < 0: transaction_list = lamp.reverseValue(transaction_list, set_method) max_comb = lamp.convertMaxComb(max_comb, len(columnid2name)) except ValueError, e: return