def RunBICTest(chr, beginSeqName, fileName, globalOrLocalTest, organism):
    """
    Runs all BIC test calculations.
    chr: Current chromosome number.
    beginSeqName: Beginning of the sequence name of the queried chromosome. (e.g. "Gene")
    fileName: Output file name.
    globalOrLocalTest: String identifying whether to use global or local Mt values.
    organism: Organism's name.
    """
    print ("BIC")
    if (globalOrLocalTest == "Global"):
        PalphaGammaHatAsym = printAndParseFiles.parsePalphaGamma("Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatAsymmetric%d.txt" % (globalOrLocalTest,chr), beginSeqName)
        PalphaGammaHatSym = printAndParseFiles.parsePalphaGamma("Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatSymmetric%d.txt" % (globalOrLocalTest,chr), beginSeqName)
        LgammaAsym = printAndParseFiles.parseValueFile("Calculations_" + organism + "\Lgamma%s\LgammaAsymmetric%d.txt" % (globalOrLocalTest,chr))
        LgammaSym = printAndParseFiles.parseValueFile("Calculations_" + organism + "\Lgamma%s\LgammaSymmetric%d.txt" % (globalOrLocalTest,chr))
        BICAsym = BIC.findBIC(PalphaGammaHatAsym, LgammaAsym, "A")
        BICSym = BIC.findBIC(PalphaGammaHatSym, LgammaSym, "S")
        printAndParseFiles.compareAndPrintBICs(BICAsym, BICSym, fileName)
    elif (globalOrLocalTest == "Local"):
        PalphaGammaHatAsym = printAndParseFiles.parsePalphaGamma("Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatAsymmetric%d.txt" % (globalOrLocalTest,chr), beginSeqName)
        PalphaGammaHatSym = printAndParseFiles.parsePalphaGamma("Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatSymmetric%d.txt" % (globalOrLocalTest,chr), beginSeqName)
        LgammaAsym = printAndParseFiles.parseValueFile("Calculations_" + organism + "\Lgamma%s\LgammaAsymmetric%d.txt" % (globalOrLocalTest,chr))
        LgammaSym = printAndParseFiles.parseValueFile("Calculations_" + organism + "\Lgamma%s\LgammaSymmetric%d.txt" % (globalOrLocalTest,chr))
        BICAsym = BIC.findBIC(PalphaGammaHatAsym, LgammaAsym, "A")
        BICSym = BIC.findBIC(PalphaGammaHatSym, LgammaSym, "S")
        printAndParseFiles.compareAndPrintBICs(BICAsym, BICSym, fileName)
    else:
        print "Error with Global or Local choice. Please type 'Global' or 'Local'"
        
    print ("DONE WITH CHR %d" % chr)
Exemple #2
0
def output(I, forward, reverse):
	D 	= (list(), list())
	model 	= BIC.get_best_model(I, penality , diff_threshold)
	print model.k
	for rv in model.rvs:
		print rv
	for i,FH in enumerate((forward, reverse)):
		for line in FH:
			chrom,start, stop, cov 	= line.strip("\n").split("\t")
			pos 					= (float(stop) + float(start ) ) /2.
			if pos > I.stop:
				break
			elif I.start<=pos <=I.stop:

				D[i].append((pos, float(cov)))
	minX,maxX 		= min([x for d in D for x,y in d]), max([x for d in D for x,y in d])

	xs 				= np.linspace(0, (maxX-minX)/100., 1000 )
	bins 			= 500
	counts,edges 	= np.histogram([(x-minX)/100. for x,y in D[0]], weights=[y for x,y in D[0]], bins=bins, normed=1)
	edges 			= (edges[:-1] + edges[1:])/2.
	F 				= plt.figure(figsize=(15,10))
	plt.bar(edges, counts,width=(edges[-1]-edges[0])/bins,alpha=0.5)
	plt.plot(xs, map(lambda x: model.pdf(x, 1) , xs), linewidth=2.)

	counts,edges 	= np.histogram([(x-minX)/100. for x,y in D[1]], weights=[y for x,y in D[1]], bins=bins, normed=1)
	edges 			= (edges[:-1] + edges[1:])/2.
	plt.bar(edges, -counts, width=(edges[-1]-edges[0])/bins, color="red",alpha=0.5)
	plt.plot(xs, map(lambda x: -model.pdf(x, -1) ,xs ), linewidth=2.)
	
	plt.show()
Exemple #3
0
def output(I, G):
	model 	= BIC.get_best_model(I, penality , diff_threshold)
	bidirs 	= [rv for rv in model.rvs if check_bidir_component(rv,si_thresh=si_thresh, l_thresh=l_thresh, w_thresh=w_thresh, pi_thresh=pi_thresh)]
	for c in bidirs:
		G["mu"].append(c.mu)
		G["si"].append(c.si)
		G["l"].append(c.l)
		G["pi"].append(c.pi)
		
	pass
Exemple #4
0
def ouput(I, FHW):
    model = BIC.get_best_model(I, penality, diff_threshold)
    bidirs = [
        rv
        for rv in model.rvs
        if check_bidir_component(rv, si_thresh=si_thresh, l_thresh=l_thresh, w_thresh=w_thresh, pi_thresh=pi_thresh)
    ]
    if bidirs:
        FHW.write("#" + I.chrom + ":" + str(I.start) + "-" + str(I.stop) + "\n")
        for N in bidirs:
            XS = bin_ChIP_signal(N, I)
            FHW.write(N.__str__() + "\n")
            for X, data_type, peak in XS:
                FHW.write(data_type + "," + str(peak) + "," + ",".join([str(x) + "-" + str(y) for x, y in X]) + "\n")
Exemple #5
0
def output(I,  FHW, penality,diff_threshold ):
	model 	= BIC.get_best_model(I, penality , diff_threshold)
	FHW.write("#" + I.chrom + ":" + str(I.start) + "-" + str(I.stop) +  "," + str(I.annotation_N) + "\n")
	for rv in model.rvs:
		FHW.write(rv.__str__()+"\n")
Exemple #6
0
def run(root):

	display_fits 	= False
	parameters 		= False
	correlation 	= False
	correlation_BO 	= True
	if correlation_BO:
		DIR 			="/Users/joazofeifa/Lab/gro_seq_files/HCT116/EMG_out_files/"
		DMSO1hr101911 	="DMSO1hr101911_model_fits/EMG-4_bidirectional_hits_intervals.bed"
		DMSO1027 		="DMSO1027_1212_model_fits/EMG-3_bidirectional_hits_intervals.bed"
		Ma6_NoIndex 	="Ma6_NoIndex_L008_R1_001/EMG-6_bidirectional_hits_intervals.bed"
		DMSO2_3 		="Allen2014_DMSO2_3-2_bidirectional_hits_intervals.bed"
		Nutlin2_3 		= "Nutlin2_3_model_fits/EMG-2_bidirectional_hits_intervals.bed"
		
		RefSeq 			= "/Users/joazofeifa/Lab/genome_files/RefSeqHG19.txt"
		ChIP_p53 		= "/Users/joazofeifa/Lab/ACM_IEEE_Paper_analysis/files/bedFiles/Atleast7of7.bedbothstrands.bed_norefgene.bed"
		ChIP_p53 		= "/Users/joazofeifa/Lab/nutlin_bidirectional_hits_intervals_091715.bed.count.bed.h.bed.namescoreDMSObi.resSig.txt.bed.txt"
		DMSO_forward 	= "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.pos.BedGraph"
		DMSO_reverse 	= "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.neg.BedGraph"
		Nutlin_forward 	= "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/Nutlin2_3.sorted.pos.BedGraph"
		Nutlin_reverse 	= "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/Nutlin2_3.sorted.neg.BedGraph"

#		DMSO1hr101911_L,DMSO1hr101911_G = load.load_model_fits_bed_file(DIR+DMSO1hr101911)
#		DMSO1027_L,DMSO1027_G 			= load.load_model_fits_bed_file(DIR+DMSO1027)
#		Ma6_NoIndex_L,Ma6_NoIndex_G 	= load.load_model_fits_bed_file(DIR+Ma6_NoIndex)
		DMSO2_3_L,DMSO2_3_G 			= load.load_model_fits_bed_file(DIR+DMSO2_3)
		correlations.parameters_dist(DMSO2_3_L)
		#Nutlin2_3_L,Nutlin2_3_G 		= load.load_model_fits_bed_file(DIR+Nutlin2_3)
		# density_plots.insert_bedgraph(DMSO2_3_L,(DMSO_forward,DMSO_reverse ))
		# density_plots.insert_bedgraph(Nutlin2_3_L,(Nutlin_forward,Nutlin_reverse ))




#		overlaps 						= correlations.match_UP(Ma6_NoIndex_L, DMSO2_3_L)
#		density_plots.plot_density(overlaps)
#		correlations.p53_binding(Nutlin2_3_L, DMSO2_3_L, overlaps)
#		correlations.label_p53(overlaps, attr="lam", LOG=True )
#		correlations.promoter_differences_test((DMSO2_3_L,Nutlin2_3_L))		
#		correlations.p53_differences_test((Nutlin2_3_L,))		

#		correlations.si_lam(overlaps)
#		correlations.run(overlaps, attr="si", LOG=False	 )
#		correlations.run_all(overlaps)
	if correlation:
		DIR 			="/Users/joazofeifa/Lab/gro_seq_files/HCT116/EMG_out_files/"
		DMSO1hr101911 	="DMSO1hr101911_model_fits/model_fits.txt"
		DMSO1027 		="DMSO1027_1212_model_fits/model_fits.txt"
		Ma6_NoIndex 	="Ma6_NoIndex_L008_R1_001/model_fits.txt"
		DMSO2_3 		="DMSO2_3_model_fits/model_fits.txt"
		Nutlin2_3 		= "Nutlin2_3_model_fits/model_fits.txt"
		DMSO1hr101911_L,DMSO1hr101911_G = load.load_model_fits_bed_file(DIR+DMSO1hr101911)
		DMSO1027_L,DMSO1027_G 			= load.load_model_fits_bed_file(DIR+DMSO1027)
		Ma6_NoIndex_L,Ma6_NoIndex_L 	= load.load_model_fits_bed_file(DIR+Ma6_NoIndex)
		DMSO2_3_L,DMSO2_3_G 			= load.load_model_fits_bed_file(DIR+DMSO2_3)
		Nutlin2_3_L,Nutlin2_3_G 		= load.load_model_fits_bed_file(DIR+Nutlin2_3)
		
		correlations.run(DMSO2_3_L,DMSO2_3_L,Ma6_NoIndex_L,Ma6_NoIndex_L )

	if display_fits:
		out_dir 	= "/Users/joeyazo/Desktop/Lab/gro_seq_files/HCT116/EMG_out_files/"
		model_file 	= out_dir+"model_fits_out_all_4"
		data_file 	= out_dir+"test_file_2.tsv"

		intervals 	= load.EMG_out(model_file)
		load.insert_data(data_file, intervals)
		dmf.display(intervals,bins=300)

	if parameters:


		EMG_out_FILE 	= root + "gro_seq_files/HCT116/EMG_out_files/EMG_model_fits_all_0"
		parameters 		= False
		BIC_analysis 	= True 
		#only supports loading one at a time
		fits 			= load.EMG_out(EMG_out_FILE)
		if parameters:
			lap.run(fits, spec=None, 
				weight_thresh=0.1,retry_tresh=0,
				converged=True)
		if BIC_analysis:
			BIC.run(fits)