예제 #1
0
파일: fastwy.py 프로젝트: a-terada/lamp
def run(transaction_file, flag_file, threshold, k, set_method, lcm_path, max_comb, log_file, alternative):
	# read 2 files and get transaction list
	sys.stderr.write( "Read input files ...\n" )
	transaction_list = set()
	try:
		transaction_list, columnid2name = readFile.readFiles(transaction_file, flag_file, ",")
		# If the alternative hypothesis is 'less',
		# the positive and negative of observe values are reversed, 
		# and conduct the identical procedure to 'greater'.
		if alternative < 0:
			transaction_list = lamp.reverseValue( transaction_list, set_method )
		max_comb = lamp.convertMaxComb( max_comb, len(columnid2name) )
	except ValueError as e:
		return
	except KeyError as e:
		return

	trans4lcm = transaction_file + ".4lcm53" # the filename for outputting logs 

	# run multiple test
	try:
		outlog = open( log_file, 'w' )
	except IOError as e:
		outlog.close()

	start_time = time.time()
	# generate null distribution
	sys.stderr.write( "Calculate the minimum p-value distribution using the permutation test ...\n" )
	outlog.write("Calculate the minimum p-value distribution using the permutation test ...\n")
	min_p_list, fre_pattern, func_f = \
				generateMinPDist(transaction_list, trans4lcm, threshold, set_method, \
								 lcm_path, max_comb, k, outlog, alternative)
	# adjusted significance level
	outlog.write("Adjust significance level ...\n")
	adjusted_threshold, sorted_min_p_list = adjustedThreshold( min_p_list, threshold, k )
	outlog.write("Adjusted significance level: %s\n" % adjusted_threshold)
	correction_term_time = time.time()
	# enumerate combination whose P-value up to adjusted threshold
	outlog.write("Calculate the p-values in the given data set ...\n")	
	enrich_lst, time_enumerate_freq, time_enumerate_total = \
				enumerateSigComb( transaction_list, trans4lcm, fre_pattern, func_f, \
								  max_comb, adjusted_threshold, outlog )
	
	finish_test_time = time.time()

	# output the significant combinations
	outputResult( transaction_file, flag_file, threshold, k, set_method, max_comb, columnid2name, \
				  enrich_lst, adjusted_threshold, transaction_list, func_f, sorted_min_p_list, alternative )
	
	# output time cost
	sys.stdout.write("Time (sec.): Computing correction factor %.3f, Enumerating significant combinations %.3f, Total %.3f\n" \
					 % (correction_term_time-start_time, time_enumerate_total, finish_test_time - start_time))

	# output the minimum P-values
	outputMinP( min_p_list )
	
	outlog.close()
	
	return enrich_lst, adjusted_threshold, columnid2name
예제 #2
0
def run(transaction_file, flag_file, threshold, set_method, lcm_path, max_comb, log_file, alternative):
	# read 2 files and get transaction list
	sys.stderr.write( "Read input files ...\n" )
	transaction_list = set()
	try:
		transaction_list, columnid2name = readFile.readFiles(transaction_file, flag_file, ',')
		max_comb = lamp.convertMaxComb( max_comb, len(columnid2name) )
	except ValueError, e:
		return
예제 #3
0
def run(transaction_file, flag_file, threshold, set_method, lcm_path, max_comb,
        log_file, alternative):
    # read 2 files and get transaction list
    sys.stderr.write("Read input files ...\n")
    transaction_list = set()
    try:
        transaction_list, columnid2name = readFile.readFiles(
            transaction_file, flag_file, ',')
        max_comb = lamp.convertMaxComb(max_comb, len(columnid2name))
    except ValueError, e:
        return
예제 #4
0
파일: fastwy.py 프로젝트: a-terada/lamp
def run(transaction_file, flag_file, threshold, k, set_method, lcm_path, max_comb, log_file, alternative):
	# read 2 files and get transaction list
	sys.stderr.write( "Read input files ...\n" )
	transaction_list = set()
	try:
		transaction_list, columnid2name = readFile.readFiles(transaction_file, flag_file, ",")
		# If the alternative hypothesis is 'less',
		# the positive and negative of observe values are reversed, 
		# and conduct the identical procedure to 'greater'.
		if alternative < 0:
			transaction_list = lamp.reverseValue( transaction_list, set_method )
		max_comb = lamp.convertMaxComb( max_comb, len(columnid2name) )
	except ValueError, e:
		return
예제 #5
0
파일: fastwy.py 프로젝트: msakai/lamp
def run(transaction_file, flag_file, threshold, k, set_method, lcm_path,
        max_comb, log_file, alternative):
    # read 2 files and get transaction list
    sys.stderr.write("Read input files ...\n")
    transaction_list = set()
    try:
        transaction_list, columnid2name = readFile.readFiles(
            transaction_file, flag_file, ",")
        # If the alternative hypothesis is 'less',
        # the positive and negative of observe values are reversed,
        # and conduct the identical procedure to 'greater'.
        if alternative < 0:
            transaction_list = lamp.reverseValue(transaction_list, set_method)
        max_comb = lamp.convertMaxComb(max_comb, len(columnid2name))
    except ValueError, e:
        return