def run(xls_file, value_file, itemset_str_lst, delimiter, alternative): global readFile import readFile transaction_list, columnid2name = readFile.readFiles( xls_file, value_file, delimiter) max_lambda = maxLambda(transaction_list) if alternative < 0: global lamp from lamp import reverseValue transaction_list = reverseValue(transaction_list, "fisher") func = FunctionOfX(transaction_list, max_lambda, abs(alternative)) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in range(len(transaction_list)): t = transaction_list[i] if len(itemset & t.itemset) == len(itemset): flag_transactions_id.append(i) p_value, stat_value = func.calPValue(transaction_list, flag_transactions_id) n = len(transaction_list) n1 = func.getN1() sys.stdout.write("p-value: %s (N: %s, n1: %s, x: %s, a: %s)\n" % (p_value, n, n1, len(flag_transactions_id), stat_value)) return (p_value, len(flag_transactions_id))
def run(xls_file, value_file, itemset_str_lst, delimiter): transaction_list, columnid2name, lcm2transaction_id = readFile.readFiles( xls_file, value_file, delimiter) # transaction_list, columnid2name = readFile.readFiles(xls_file, value_file) func = FunctionOfX(transaction_list) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in xrange(len(transaction_list)): t = transaction_list[i] if len(itemset & t.itemset) == len(itemset): flag_transactions_id.append(i) p_value, stat_score = func.calPValue(transaction_list, flag_transactions_id) # print i # print item_id # print columnid2name[item_id] # p, stat_score = func.calPValue(transaction_list, itemset) n = len(transaction_list) sys.stdout.write("p-value: %g (N: %s, x: %s, z-score: %f)\n" \ % (p_value, n, len(flag_transactions_id), stat_score)) return (p_value, len(flag_transactions_id))
def run(xls_file, value_file, itemset_str_lst, delimiter, alternative): global readFile import readFile transaction_list, columnid2name = readFile.readFiles(xls_file, value_file, delimiter) max_lambda = maxLambda(transaction_list) if alternative < 0: global lamp from lamp import reverseValue transaction_list = reverseValue( transaction_list, "fisher" ) func = FunctionOfX(transaction_list, max_lambda, abs( alternative ) ) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in xrange( len(transaction_list) ): t = transaction_list[i] if len( itemset & t.itemset ) == len(itemset): flag_transactions_id.append( i ) p_value, stat_value = func.calPValue(transaction_list, flag_transactions_id) n = len(transaction_list) n1 = func.getN1() sys.stdout.write("p-value: %s (N: %s, n1: %s, x: %s, a: %s)\n" % (p_value, n, n1, len(flag_transactions_id), stat_value)) return (p_value, len(flag_transactions_id))
def run(xls_file, value_file, itemset_str_lst, delimiter, alternative): global readFile import readFile transaction_list, columnid2name = readFile.readFiles( xls_file, value_file, delimiter) if alternative < 0: global lamp from lamp import reverseValue transaction_list = reverseValue(transaction_list, "u_test") func = FunctionOfX(transaction_list, alternative) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in range(len(transaction_list)): t = transaction_list[i] if len(itemset & t.itemset) == len(itemset): flag_transactions_id.append(i) p_value, stat_score = func.calPValue(transaction_list, flag_transactions_id) # print i # print item_id # print columnid2name[item_id] # p, stat_score = func.calPValue(transaction_list, itemset) n = len(transaction_list) sys.stdout.write("p-value: %g (N: %s, x: %s, z-score: %f)\n" \ % (p_value, n, len(flag_transactions_id), stat_score)) return (p_value, len(flag_transactions_id))
def run(xls_file, value_file, itemset_str_lst, delimiter, alternative): global readFile import readFile transaction_list, columnid2name = readFile.readFiles(xls_file, value_file, delimiter) if alternative < 0: global lamp from lamp import reverseValue transaction_list = reverseValue(transaction_list, "u_test") func = FunctionOfX(transaction_list, alternative) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in xrange(len(transaction_list)): t = transaction_list[i] if len(itemset & t.itemset) == len(itemset): flag_transactions_id.append(i) p_value, stat_score = func.calPValue(transaction_list, flag_transactions_id) # print i # print item_id # print columnid2name[item_id] # p, stat_score = func.calPValue(transaction_list, itemset) n = len(transaction_list) sys.stdout.write("p-value: %g (N: %s, x: %s, z-score: %f)\n" % (p_value, n, len(flag_transactions_id), stat_score)) return (p_value, len(flag_transactions_id))
def run(xls_file, value_file, itemset_str_lst, delimiter): transaction_list, columnid2name, lcm2transaction_id = readFile.readFiles(xls_file, value_file, delimiter) # transaction_list, columnid2name = readFile.readFiles(xls_file, value_file) func = FunctionOfX(transaction_list) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in xrange( len(transaction_list) ): t = transaction_list[i] if len( itemset & t.itemset ) == len(itemset): flag_transactions_id.append( i ) p_value, stat_score = func.calPValue(transaction_list, flag_transactions_id) # print i # print item_id # print columnid2name[item_id] # p, stat_score = func.calPValue(transaction_list, itemset) n = len(transaction_list) sys.stdout.write("p-value: %g (N: %s, x: %s, z-score: %f)\n" \ % (p_value, n, len(flag_transactions_id), stat_score)) return (p_value, len(flag_transactions_id))
def run(transaction_file, flag_file, threshold, k, set_method, lcm_path, max_comb, log_file, alternative): # read 2 files and get transaction list sys.stderr.write( "Read input files ...\n" ) transaction_list = set() try: transaction_list, columnid2name = readFile.readFiles(transaction_file, flag_file, ",") # If the alternative hypothesis is 'less', # the positive and negative of observe values are reversed, # and conduct the identical procedure to 'greater'. if alternative < 0: transaction_list = lamp.reverseValue( transaction_list, set_method ) max_comb = lamp.convertMaxComb( max_comb, len(columnid2name) ) except ValueError as e: return except KeyError as e: return trans4lcm = transaction_file + ".4lcm53" # the filename for outputting logs # run multiple test try: outlog = open( log_file, 'w' ) except IOError as e: outlog.close() start_time = time.time() # generate null distribution sys.stderr.write( "Calculate the minimum p-value distribution using the permutation test ...\n" ) outlog.write("Calculate the minimum p-value distribution using the permutation test ...\n") min_p_list, fre_pattern, func_f = \ generateMinPDist(transaction_list, trans4lcm, threshold, set_method, \ lcm_path, max_comb, k, outlog, alternative) # adjusted significance level outlog.write("Adjust significance level ...\n") adjusted_threshold, sorted_min_p_list = adjustedThreshold( min_p_list, threshold, k ) outlog.write("Adjusted significance level: %s\n" % adjusted_threshold) correction_term_time = time.time() # enumerate combination whose P-value up to adjusted threshold outlog.write("Calculate the p-values in the given data set ...\n") enrich_lst, time_enumerate_freq, time_enumerate_total = \ enumerateSigComb( transaction_list, trans4lcm, fre_pattern, func_f, \ max_comb, adjusted_threshold, outlog ) finish_test_time = time.time() # output the significant combinations outputResult( transaction_file, flag_file, threshold, k, set_method, max_comb, columnid2name, \ enrich_lst, adjusted_threshold, transaction_list, func_f, sorted_min_p_list, alternative ) # output time cost sys.stdout.write("Time (sec.): Computing correction factor %.3f, Enumerating significant combinations %.3f, Total %.3f\n" \ % (correction_term_time-start_time, time_enumerate_total, finish_test_time - start_time)) # output the minimum P-values outputMinP( min_p_list ) outlog.close() return enrich_lst, adjusted_threshold, columnid2name
def run(transaction_file, flag_file, threshold, set_method, lcm_path, max_comb, log_file, alternative): # read 2 files and get transaction list sys.stderr.write( "Read input files ...\n" ) transaction_list = set() try: transaction_list, columnid2name = readFile.readFiles(transaction_file, flag_file, ',') max_comb = lamp.convertMaxComb( max_comb, len(columnid2name) ) except ValueError, e: return
def run(transaction_file, flag_file, threshold, set_method, lcm_path, max_comb, log_file, alternative): # read 2 files and get transaction list sys.stderr.write("Read input files ...\n") transaction_list = set() try: transaction_list, columnid2name = readFile.readFiles( transaction_file, flag_file, ',') # If the alternative hypothesis is 'less', # the positive and negative of observe values are reversed, # and conduct the identical procedure to 'greater'. if alternative < 0: transaction_list = reverseValue(transaction_list, set_method) max_comb = convertMaxComb(max_comb, len(columnid2name)) except ValueError as e: return except KeyError as e: return # run multiple test transaction4lcm53 = transaction_file + ".4lcm53" # run try: outlog = open(log_file, 'w') starttime = time.time() sys.stderr.write("Compute the optimal correction factor ...") fre_pattern, lam_star, max_lambda, correction_term_time, func_f \ = runMultTest(transaction_list, transaction4lcm53, threshold, set_method, \ lcm_path, max_comb, outlog, alternative) k = fre_pattern.getTotal(lam_star) sys.stderr.write(" %s\n" % k) sys.stderr.write("Compute P-values of testable combinations ...\n") enrich_lst, finish_test_time \ = fwerControl(transaction_list, fre_pattern, lam_star, max_lambda, \ threshold, func_f, columnid2name, outlog) outlog.close() except IOError as e: outlog.close() sys.stderr.write("Output results ...\n") # If the positives and negatives are reversed, the number of positives is calculated. if (alternative < 0) and (set_method in BINARY_METHODS): for l in enrich_lst: l[3] = l[2] - l[3] # output result outputResult( transaction_file, flag_file, threshold, set_method, max_comb, \ columnid2name, lam_star, k, enrich_lst, transaction_list, func_f, alternative ) # output time cost sys.stdout.write("Time (sec.): Computing correction factor %.3f, Enumerating significant combinations %.3f, Total %.3f\n" \ % (correction_term_time-starttime, finish_test_time - correction_term_time, finish_test_time - starttime)) return enrich_lst, k, lam_star, columnid2name
def run(transaction_file, flag_file, threshold, set_method, lcm_path, max_comb, log_file, alternative): # read 2 files and get transaction list sys.stderr.write("Read input files ...\n") transaction_list = set() try: transaction_list, columnid2name = readFile.readFiles( transaction_file, flag_file, ',') max_comb = lamp.convertMaxComb(max_comb, len(columnid2name)) except ValueError, e: return
def run(transaction_file, flag_file, threshold, set_method, lcm_path, max_comb, log_file, delm): # read 2 files and get transaction list transaction_list = set() try: transaction_list, columnid2name, lcm2transaction_id = readFile.readFiles(transaction_file, flag_file, delm) if max_comb == "all": max_comb = -1 elif max_comb >= len(columnid2name): max_comb = -1 else: pass except ValueError, e: return
def run(transaction_file, flag_file, threshold, set_method, lcm_path, max_comb, log_file, alternative): # read 2 files and get transaction list sys.stderr.write( "Read input files ...\n" ) transaction_list = set() try: transaction_list, columnid2name = readFile.readFiles(transaction_file, flag_file, ',') # If the alternative hypothesis is 'less', # the positive and negative of observe values are reversed, # and conduct the identical procedure to 'greater'. if alternative < 0: transaction_list = reverseValue( transaction_list, set_method ) max_comb = convertMaxComb( max_comb, len(columnid2name) ) except ValueError, e: return
def run(transaction_file, flag_file, threshold, set_method, lcm_path, max_comb, log_file, delm): # read 2 files and get transaction list transaction_list = set() try: transaction_list, columnid2name, lcm2transaction_id = readFile.readFiles( transaction_file, flag_file, delm) if max_comb == "all": max_comb = -1 elif max_comb >= len(columnid2name): max_comb = -1 else: pass except ValueError, e: return
def run(transaction_file, flag_file, threshold, set_method, lcm_path, max_comb, min_target, log_file, delm, time_file): # RRedit: ADDED LOGRANK OPTION AND TIME_FILE # read 2 files and get transaction list transaction_list = set() try: if not set_method == "logrank": transaction_list, columnid2name, lcm2transaction_id = readFile.readFiles( transaction_file, flag_file, delm) elif set_method == "logrank": transaction_list, columnid2name, lcm2transaction_id = readFileSA.readFiles( transaction_file, flag_file, delm, time_file) if max_comb == "all": max_comb = -1 elif max_comb >= len(columnid2name): max_comb = -1 else: pass except ValueError, e: return
def run(xls_file, value_file, itemset_str_lst, delimiter): transaction_list, columnid2name, lcm2transaction_id = readFile.readFiles(xls_file, value_file, delimiter) max_lambda = maxLambda(transaction_list) func = FunctionOfX(transaction_list, max_lambda) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in xrange( len(transaction_list) ): t = transaction_list[i] if len( itemset & t.itemset ) == len(itemset): flag_transactions_id.append( i ) p_value, stat_value = func.calPValue(transaction_list, flag_transactions_id) n = len(transaction_list) n1 = func.getN1() sys.stdout.write("p-value: %s (N: %s, n1: %s, x: %s, a: %s)\n" % (p_value, n, n1, len(flag_transactions_id), stat_value)) return (p_value, len(flag_transactions_id))
def run(xls_file, value_file, itemset_str_lst, delimiter): transaction_list, columnid2name, lcm2transaction_id = readFile.readFiles( xls_file, value_file, delimiter) max_lambda = maxLambda(transaction_list) func = FunctionOfX(transaction_list, max_lambda) colname2id_dict = readFile.colname2id(columnid2name) itemset = set() for i in itemset_str_lst: item_id = colname2id_dict[i] itemset.add(item_id + 1) flag_transactions_id = [] for i in xrange(len(transaction_list)): t = transaction_list[i] if len(itemset & t.itemset) == len(itemset): flag_transactions_id.append(i) p_value, stat_value = func.calPValue(transaction_list, flag_transactions_id) n = len(transaction_list) n1 = func.getN1() sys.stdout.write("p-value: %s (N: %s, n1: %s, x: %s, a: %s)\n" % (p_value, n, n1, len(flag_transactions_id), stat_value)) return (p_value, len(flag_transactions_id))