Example #1
0
def run(xls_file, value_file, itemset_str_lst, delimiter, alternative):
    global readFile
    import readFile
    transaction_list, columnid2name = readFile.readFiles(
        xls_file, value_file, delimiter)

    if alternative < 0:
        global lamp
        from lamp import reverseValue
        transaction_list = reverseValue(transaction_list, "u_test")

    func = FunctionOfX(transaction_list, alternative)
    colname2id_dict = readFile.colname2id(columnid2name)
    itemset = set()
    for i in itemset_str_lst:
        item_id = colname2id_dict[i]
        itemset.add(item_id + 1)

    flag_transactions_id = []
    for i in range(len(transaction_list)):
        t = transaction_list[i]
        if len(itemset & t.itemset) == len(itemset):
            flag_transactions_id.append(i)
    p_value, stat_score = func.calPValue(transaction_list,
                                         flag_transactions_id)
    #		print i
    #		print item_id
    #		print columnid2name[item_id]
    #	p, stat_score = func.calPValue(transaction_list, itemset)
    n = len(transaction_list)

    sys.stdout.write("p-value: %g (N: %s, x: %s, z-score: %f)\n" \
         % (p_value, n, len(flag_transactions_id), stat_score))
    return (p_value, len(flag_transactions_id))
Example #2
0
def run(xls_file, value_file, itemset_str_lst, delimiter, alternative):
    global readFile
    import readFile
    transaction_list, columnid2name = readFile.readFiles(
        xls_file, value_file, delimiter)
    max_lambda = maxLambda(transaction_list)
    if alternative < 0:
        global lamp
        from lamp import reverseValue
        transaction_list = reverseValue(transaction_list, "fisher")
    func = FunctionOfX(transaction_list, max_lambda, abs(alternative))
    colname2id_dict = readFile.colname2id(columnid2name)

    itemset = set()
    for i in itemset_str_lst:
        item_id = colname2id_dict[i]
        itemset.add(item_id + 1)

    flag_transactions_id = []
    for i in range(len(transaction_list)):
        t = transaction_list[i]
        if len(itemset & t.itemset) == len(itemset):
            flag_transactions_id.append(i)
    p_value, stat_value = func.calPValue(transaction_list,
                                         flag_transactions_id)
    n = len(transaction_list)
    n1 = func.getN1()
    sys.stdout.write("p-value: %s (N: %s, n1: %s, x: %s, a: %s)\n" %
                     (p_value, n, n1, len(flag_transactions_id), stat_value))
    return (p_value, len(flag_transactions_id))
Example #3
0
def run(xls_file, value_file, itemset_str_lst, delimiter, alternative):
	global readFile
	import readFile
	transaction_list, columnid2name = readFile.readFiles(xls_file, value_file, delimiter)
	max_lambda = maxLambda(transaction_list)
	if alternative < 0:
		global lamp
		from lamp import reverseValue
		transaction_list = reverseValue( transaction_list, "fisher" )
	func = FunctionOfX(transaction_list, max_lambda, abs( alternative ) )
	colname2id_dict = readFile.colname2id(columnid2name)

	itemset = set()
	for i in itemset_str_lst:
		item_id = colname2id_dict[i]
		itemset.add(item_id + 1)
		
	flag_transactions_id = []
	for i in xrange( len(transaction_list) ):
		t = transaction_list[i]
		if len( itemset & t.itemset ) == len(itemset):
			flag_transactions_id.append( i )
	p_value, stat_value = func.calPValue(transaction_list, flag_transactions_id)
	n = len(transaction_list)
	n1 = func.getN1()
	sys.stdout.write("p-value: %s (N: %s, n1: %s, x: %s, a: %s)\n"
					 % (p_value, n, n1, len(flag_transactions_id), stat_value))
	return (p_value, len(flag_transactions_id))
Example #4
0
def run(xls_file, value_file, itemset_str_lst, delimiter, alternative):
    global readFile
    import readFile

    transaction_list, columnid2name = readFile.readFiles(xls_file, value_file, delimiter)

    if alternative < 0:
        global lamp
        from lamp import reverseValue

        transaction_list = reverseValue(transaction_list, "u_test")

    func = FunctionOfX(transaction_list, alternative)
    colname2id_dict = readFile.colname2id(columnid2name)
    itemset = set()
    for i in itemset_str_lst:
        item_id = colname2id_dict[i]
        itemset.add(item_id + 1)

    flag_transactions_id = []
    for i in xrange(len(transaction_list)):
        t = transaction_list[i]
        if len(itemset & t.itemset) == len(itemset):
            flag_transactions_id.append(i)
    p_value, stat_score = func.calPValue(transaction_list, flag_transactions_id)
    # 		print i
    # 		print item_id
    # 		print columnid2name[item_id]
    # 	p, stat_score = func.calPValue(transaction_list, itemset)
    n = len(transaction_list)

    sys.stdout.write("p-value: %g (N: %s, x: %s, z-score: %f)\n" % (p_value, n, len(flag_transactions_id), stat_score))
    return (p_value, len(flag_transactions_id))
Example #5
0
def run(transaction_file, flag_file, threshold, k, set_method, lcm_path, max_comb, log_file, alternative):
	# read 2 files and get transaction list
	sys.stderr.write( "Read input files ...\n" )
	transaction_list = set()
	try:
		transaction_list, columnid2name = readFile.readFiles(transaction_file, flag_file, ",")
		# If the alternative hypothesis is 'less',
		# the positive and negative of observe values are reversed, 
		# and conduct the identical procedure to 'greater'.
		if alternative < 0:
			transaction_list = lamp.reverseValue( transaction_list, set_method )
		max_comb = lamp.convertMaxComb( max_comb, len(columnid2name) )
	except ValueError as e:
		return
	except KeyError as e:
		return

	trans4lcm = transaction_file + ".4lcm53" # the filename for outputting logs 

	# run multiple test
	try:
		outlog = open( log_file, 'w' )
	except IOError as e:
		outlog.close()

	start_time = time.time()
	# generate null distribution
	sys.stderr.write( "Calculate the minimum p-value distribution using the permutation test ...\n" )
	outlog.write("Calculate the minimum p-value distribution using the permutation test ...\n")
	min_p_list, fre_pattern, func_f = \
				generateMinPDist(transaction_list, trans4lcm, threshold, set_method, \
								 lcm_path, max_comb, k, outlog, alternative)
	# adjusted significance level
	outlog.write("Adjust significance level ...\n")
	adjusted_threshold, sorted_min_p_list = adjustedThreshold( min_p_list, threshold, k )
	outlog.write("Adjusted significance level: %s\n" % adjusted_threshold)
	correction_term_time = time.time()
	# enumerate combination whose P-value up to adjusted threshold
	outlog.write("Calculate the p-values in the given data set ...\n")	
	enrich_lst, time_enumerate_freq, time_enumerate_total = \
				enumerateSigComb( transaction_list, trans4lcm, fre_pattern, func_f, \
								  max_comb, adjusted_threshold, outlog )
	
	finish_test_time = time.time()

	# output the significant combinations
	outputResult( transaction_file, flag_file, threshold, k, set_method, max_comb, columnid2name, \
				  enrich_lst, adjusted_threshold, transaction_list, func_f, sorted_min_p_list, alternative )
	
	# output time cost
	sys.stdout.write("Time (sec.): Computing correction factor %.3f, Enumerating significant combinations %.3f, Total %.3f\n" \
					 % (correction_term_time-start_time, time_enumerate_total, finish_test_time - start_time))

	# output the minimum P-values
	outputMinP( min_p_list )
	
	outlog.close()
	
	return enrich_lst, adjusted_threshold, columnid2name
Example #6
0
def run(transaction_file, flag_file, threshold, k, set_method, lcm_path, max_comb, log_file, alternative):
	# read 2 files and get transaction list
	sys.stderr.write( "Read input files ...\n" )
	transaction_list = set()
	try:
		transaction_list, columnid2name = readFile.readFiles(transaction_file, flag_file, ",")
		# If the alternative hypothesis is 'less',
		# the positive and negative of observe values are reversed, 
		# and conduct the identical procedure to 'greater'.
		if alternative < 0:
			transaction_list = lamp.reverseValue( transaction_list, set_method )
		max_comb = lamp.convertMaxComb( max_comb, len(columnid2name) )
	except ValueError, e:
		return
Example #7
0
def run(transaction_file, flag_file, threshold, k, set_method, lcm_path,
        max_comb, log_file, alternative):
    # read 2 files and get transaction list
    sys.stderr.write("Read input files ...\n")
    transaction_list = set()
    try:
        transaction_list, columnid2name = readFile.readFiles(
            transaction_file, flag_file, ",")
        # If the alternative hypothesis is 'less',
        # the positive and negative of observe values are reversed,
        # and conduct the identical procedure to 'greater'.
        if alternative < 0:
            transaction_list = lamp.reverseValue(transaction_list, set_method)
        max_comb = lamp.convertMaxComb(max_comb, len(columnid2name))
    except ValueError, e:
        return