コード例 #1
0
def give_genelist_for_trait(trait, cutoff, exp, gxe_boolean, TFset):
	"""
	Check enriched genes for each trait for presence of regulator
	
	Perhaps write the genelists to a file for any threshold
	For later use...
	"""
	
	#print "collecting data for %s of %s with %s"%(trait, exp, cutoff)
	##########################################
	####Run functions towards enrichment######
	##########################################
	
	#Get genelist from eQTL
	l1 = marker_logp_list(trait, gxe_boolean, exp)
	l2 = add_chromosome_and_position(l1)
	l3 = retrieve_logp_above_cutoff(l2, cutoff)
	l4 = check_region(l3)
	l5 = iterate_marker_tuple(l4)
	l6 = set_region_from_adjacent_markers(l5, l2)	
	l7 = combine_marker_region_data(trait, l6)
	gene_dict = find_genes_in_regions(l7)

	#Count the number of found qtls
	qtls = len(gene_dict)
	
	#delete all objects after they become useless
	#because django is a memory horder
	#when gc.collect() is run it will delete all unreferenced dataobjects
	del l1
	del l2
	del l3
	del l4 
	del l5 
	del l6
	del l7
	
	#print "trait %s has %s eQTLs"%(trait, qtls)
	
	if qtls != 0:

		gene_list = read_distinct_genes(gene_dict)
		geneset = set(gene_list)
		TF_in_eQTL = TFset & geneset
		del TFset
		del geneset
		
		if TF_in_eQTL:
			del TF_in_eQTL
			print "winner winner chicken dinner!"
			return gene_list
		else:
			del gene_list
			return []
コード例 #2
0
def give_genelist_for_trait(trait, cutoff, exp, gxe_boolean):
	"""
	Check enriched genes for each trait for presence of regulator
	
	Perhaps write the genelists to a file for any threshold
	For later use...
	"""
	eQTLsize_list = []
	#print "collecting data for %s of %s with %s"%(trait, exp, cutoff)
	##########################################
	####Run functions towards enrichment######
	##########################################
	
	#Get genelist from eQTL
	l1 = marker_logp_list(trait, gxe_boolean, exp)
	l2 = add_chromosome_and_position(l1)
	l3 = retrieve_logp_above_cutoff(l2, cutoff)
	l4 = check_region(l3)
	l5 = iterate_marker_tuple(l4)
	l6 = set_region_from_adjacent_markers(l5, l2)	
	l7 = combine_marker_region_data(trait, l6)
	gene_dict = find_genes_in_regions(l7)

	#Count the number of found qtls
	qtls = len(gene_dict)
	
	#delete all objects after they become useless
	#because django is a memory horder
	#when gc.collect() is run it will delete all unreferenced dataobjects
	del l1
	del l2
	del l3
	del l4 
	del l5 
	del l6
	del l7
	
	#print "trait %s has %s eQTLs"%(trait, qtls)
	
	if qtls != 0:
		for key, value in gene_dict.iteritems():
			region = "%s_%s - %s_%s"%(key[1], key[2], key[1], key[3])
			eQTLsize_list.append([region, len(value)])
	
	return eQTLsize_list
コード例 #3
0
def give_genelist_for_trait(trait, cutoff, exp, gxe_boolean, fisher_alpha_name, mult_alpha_name, postA_name, postB_name, data_dict):
	"""
	Check enriched genes for each trait for presence of regulator
	
	Perhaps write the genelists to a file for any threshold
	For later use...
	"""
	
	##########################################
	####Run functions towards enrichment######
	##########################################
	
	#Get genelist from eQTL
	l1 = marker_logp_list(trait, gxe_boolean, exp)
	l2 = add_chromosome_and_position(l1)
	l3 = retrieve_logp_above_cutoff(l2, cutoff)
	l4 = check_region(l3)
	l5 = iterate_marker_tuple(l4)
	l6 = set_region_from_adjacent_markers(l5, l2)	
	l7 = combine_marker_region_data(trait, l6)
	gene_dict = find_genes_in_regions(l7)

	#Count the number of found qtls
	qtls = len(gene_dict)
	
	#delete all objects after they become useless
	#because django is a memory horder
	#when gc.collect() is run it will delete all unreferenced dataobjects
	del l1
	del l2
	del l3
	del l4 
	del l5 
	del l6
	del l7

	
	if qtls != 0:
	
		gene_list = read_distinct_genes(gene_dict)
		
		absent_genes, present_genes = segregate_gene_list(data_dict, gene_list)	
		gene_inside_qtl_dict, gene_outside_qtl_dict, tot_in_qtl, tot_out_qtl = annotate_from_csv(data_dict, present_genes)

		golist_unique = unique_GO_list(gene_inside_qtl_dict)
		
		#Prepare for counted genes inside qtl:
		golist_in_flat = flatten_array(gene_inside_qtl_dict)
		c_go_in_qtl_dict = count_all_goterms(golist_in_flat)
		
		#Prepare for counted genes outside qtl:
		golist_out_flat = flatten_array(gene_outside_qtl_dict)
		c_go_out_qtl_dict = count_all_goterms(golist_out_flat)
		
		#Create contingency tables for the Fishers exact test						
		c_array, total_genes = populate_contingency_table(c_go_in_qtl_dict, c_go_out_qtl_dict, golist_unique, tot_in_qtl, tot_out_qtl)
		
		#perform the fisher exact test on all created contingency tables
		fisher_python = fish_for_python(c_array)
		
		#only allow the results with p values below fisher_alpha to pass
		#fisher_alpha default is 0.05 unless another value is given
		#front end
		significant_info_fish = extract_significant_result_fish(fisher_python, fisher_alpha_name)
		
		#perform a multiple test
		enriched_golist_mult = correct_pvalues_for_multiple_testing(significant_info_fish)
		
		significant_info_mult = extract_significant_result_mult(enriched_golist_mult, mult_alpha_name)
		
		#Post processing
		qtl_go_dict = make_qtl_go_dict(gene_inside_qtl_dict, gene_dict)

		
		#A
		approved_golistA = post_process_A(enriched_golist_mult, qtl_go_dict, postA_name)
		#B
		godict_full = post_process_B(approved_golistA, gene_inside_qtl_dict, gene_outside_qtl_dict, postB_name)
		#go_gene_dict_full:
		#dict[i, go, fu_p_value, adj(fu_p_val, go_frac_scA, go_frac_scB]) = [gene list]

		
		del gene_list
		del absent_genes
		del present_genes
		del gene_inside_qtl_dict
		del gene_outside_qtl_dict
		#del tot_in_qtl
		#del tot_out_qtl
		del golist_unique
		del golist_in_flat
		#del c_go_in_qtl_dict
		del golist_out_flat
		#del c_go_out_qtl_dict
		del c_array
		del total_genes
		del fisher_python
		del significant_info_fish
		del significant_info_mult
		del enriched_golist_mult
		del qtl_go_dict
		del approved_golistA 

		return godict_full, gene_dict, qtls, c_go_in_qtl_dict, c_go_out_qtl_dict, tot_in_qtl, tot_out_qtl
	
	else:	
		return {}, {}, qtls, {}, {}, 0, 0