def run( options ): info("Read and build treatment bedGraph...") tbio = cBedGraphIO.bedGraphIO(options.tfile) tbtrack = tbio.build_bdgtrack() info("Read and build control bedGraph...") cbio = cBedGraphIO.bedGraphIO(options.cfile) cbtrack = cbio.build_bdgtrack() method = options.method info("Calculate scores comparing treatment and control by %s..." % method) # build score track if method == 'ppois': sbtrack = tbtrack.make_scoreTrack_for_macs(cbtrack) sbtrack = scoreTracktoBedGraph(sbtrack,'-100logp') elif method == 'qpois': sbtrack = tbtrack.make_scoreTrack_for_macs(cbtrack) pqtable = sbtrack.make_pq_table() sbtrack.assign_qvalue(pqtable) sbtrack = scoreTracktoBedGraph(sbtrack,'-100logq') elif method == 'substract': sbtrack = tbtrack.overlie(cbtrack,func=lambda x,y:x-y) elif method == 'divide': sbtrack = tbtrack.overlie(cbtrack,func=lambda x,y:float(x)/y) elif method == 'logLR': # log likelihood sbtrack = tbtrack.overlie(cbtrack,func=logLR) else: raise Exception("Can't reach here!") info("Write to output bedGraph...") ofhd = io.open(options.ofile,"wb") sbtrack.write_bedGraph(ofhd,name="%s_Scores" % (method.upper()),description="Scores calculated by %s" % (method.upper()))
def run( options ): options = opt_validate( options ) scaling_factor = options.sfactor pseudo_depth = 1.0/scaling_factor # not an actual depth, but its reciprocal, a trick to override SPMR while necessary. info("Read and build treatment bedGraph...") tbio = cBedGraphIO.bedGraphIO(options.tfile) tbtrack = tbio.build_bdgtrack() info("Read and build control bedGraph...") cbio = cBedGraphIO.bedGraphIO(options.cfile) cbtrack = cbio.build_bdgtrack() info("Build scoreTrackII...") sbtrack = tbtrack.make_scoreTrackII_for_macs( cbtrack, depth1 = pseudo_depth, depth2 = pseudo_depth ) if abs(scaling_factor-1) > 1e-6: # Only for the case while your input is SPMR from MACS2 callpeak; Let's override SPMR. info("Values in your input bedGraph files will be multiplied by %f ..." % scaling_factor) sbtrack.change_normalization_method( ord('M') ) # a hack to override SPMR sbtrack.set_pseudocount( options.pseudocount ) already_processed_method_list = [] for (i, method) in enumerate(options.method): if method in already_processed_method_list: continue else: already_processed_method_list.append( method ) info("Calculate scores comparing treatment and control by '%s'..." % method) if options.ofile: ofile = os.path.join( options.outdir, options.ofile[ i ] ) else: ofile = os.path.join( options.outdir, options.oprefix + "_" + method + ".bdg" ) # build score track if method == 'ppois': sbtrack.change_score_method( ord('p') ) elif method == 'qpois': sbtrack.change_score_method( ord('q') ) elif method == 'subtract': sbtrack.change_score_method( ord('d') ) elif method == 'logFE': sbtrack.change_score_method( ord('f') ) elif method == 'FE': sbtrack.change_score_method( ord('F') ) elif method == 'logLR': # log likelihood sbtrack.change_score_method( ord('l') ) elif method == 'slogLR': # log likelihood sbtrack.change_score_method( ord('s') ) else: raise Exception("Can't reach here!") info("Write bedGraph of scores...") ofhd = open(ofile,"wb") sbtrack.write_bedGraph(ofhd,name="%s_Scores" % (method.upper()),description="Scores calculated by %s" % (method.upper()), column = 3) info("Finished '%s'! Please check '%s'!" % (method, ofile))
def run( options ): if options.maxgap >= options.minlen: error("MAXGAP should be smaller than MINLEN! Your input is MAXGAP = %d and MINLEN = %d" % (options.maxgap, options.minlen)) LLR_cutoff = options.cutoff ofile_prefix = options.oprefix info("Read and build treatment 1 bedGraph...") t1bio = cBedGraphIO.bedGraphIO(options.t1bdg) t1btrack = t1bio.build_bdgtrack() info("Read and build control 1 bedGraph...") c1bio = cBedGraphIO.bedGraphIO(options.c1bdg) c1btrack = c1bio.build_bdgtrack() info("Read and build treatment 2 bedGraph...") t2bio = cBedGraphIO.bedGraphIO(options.t2bdg) t2btrack = t2bio.build_bdgtrack() info("Read and build control 2 bedGraph...") c2bio = cBedGraphIO.bedGraphIO(options.c2bdg) c2btrack = c2bio.build_bdgtrack() depth1 = options.depth1 depth2 = options.depth2 if depth1 > depth2: # scale down condition 1 to size of condition 2 depth1 = depth2 / depth1 depth2 = 1.0 elif depth1 < depth2: # scale down condition 2 to size of condition 1 depth2 = depth1/ depth2 depth1 = 1.0 else: # no need to scale down any depth1 = 1.0 depth2 = 1.0 twoconditionscore = cScoreTrack.TwoConditionScores( t1btrack, c1btrack, t2btrack, c2btrack, depth1, depth2 ) twoconditionscore.build() twoconditionscore.finalize() (cat1,cat2,cat3) = twoconditionscore.call_peaks(min_length=options.minlen, max_gap=options.maxgap, cutoff=options.cutoff) info("Write peaks...") nf = open ("%s_c%.1f_cond1.bed" % (options.oprefix,options.cutoff),"w") cat1.write_to_bed(nf, name_prefix=options.oprefix+"_cond1_", name="condition 1", description="unique regions in condition 1", score_column="score") nf = open ("%s_c%.1f_cond2.bed" % (options.oprefix,options.cutoff),"w") cat2.write_to_bed(nf, name_prefix=options.oprefix+"_cond2_", name="condition 2", description="unique regions in condition 2", score_column="score") nf = open ("%s_c%.1f_common.bed" % (options.oprefix,options.cutoff),"w") cat3.write_to_bed(nf, name_prefix=options.oprefix+"_common_",name="common", description="common regions in both conditions", score_column="score") info("Done")
def run( options ): info("Read and build treatment bedGraph...") tbio = cBedGraphIO.bedGraphIO(options.tfile) tbtrack = tbio.build_bdgtrack() treat_depth = options.tdepth info("Read and build control bedGraph...") cbio = cBedGraphIO.bedGraphIO(options.cfile) cbtrack = cbio.build_bdgtrack() ctrl_depth = options.cdepth info("Build scoreTrackII...") sbtrack = tbtrack.make_scoreTrackII_for_macs( cbtrack, depth1 = treat_depth, depth2 = ctrl_depth ) # normalize by depth if abs(treat_depth-1) > 1e-6 or abs(ctrl_depth-1) > 1e-6: # if depth of treat and control is 1.0 ( files are generated # by MACS2 --SPMR ), no need for the following step. info("Normalize by sequencing depth of million reads...") sbtrack.change_normalization_method( ord('M') ) sbtrack.set_pseudocount( options.pseudocount ) #def make_scoreTrackII_for_macs (self, bdgTrack2, float depth1 = 1.0, float depth2 = 1.0 ): method = options.method info("Calculate scores comparing treatment and control by %s..." % method) # build score track if method == 'ppois': sbtrack.change_score_method( ord('p') ) elif method == 'qpois': sbtrack.change_score_method( ord('q') ) elif method == 'subtract': sbtrack.change_score_method( ord('d') ) elif method == 'logFE': sbtrack.change_score_method( ord('f') ) elif method == 'FE': sbtrack.change_score_method( ord('F') ) elif method == 'logLR': # log likelihood sbtrack.change_score_method( ord('l') ) else: raise Exception("Can't reach here!") info("Write bedGraph of scores...") ofhd = io.open(options.ofile,"wb") #r = sbtrack.get_data_by_chr("chr22") #print r sbtrack.write_bedGraph(ofhd,name="%s_Scores" % (method.upper()),description="Scores calculated by %s" % (method.upper()), column = 3) info("Finished! Please check %s!" % (options.ofile))
def run(options): info("Read and build bedGraph...") bio = cBedGraphIO.bedGraphIO(options.ifile) btrack = bio.build_bdgtrack(baseline_value=0) info("Call peaks from bedGraph...") peaks = btrack.call_peaks(cutoff=float(options.cutoff), min_length=int(options.minlen), max_gap=int(options.maxgap), call_summits=options.call_summits) info("Write peaks...") if options.ofile: options.oprefix = options.ofile nf = open(os.path.join(options.outdir, options.ofile), 'w') else: nf = open( os.path.join( options.outdir, "%s_c%.1f_l%d_g%d_peaks.narrowPeak" % (options.oprefix, options.cutoff, options.minlen, options.maxgap)), "w") peaks.write_to_narrowPeak(nf, name=options.oprefix, name_prefix=options.oprefix + "_narrowPeak", score_column="score", trackline=options.trackline) info("Done")
def run(options): options = opt_validate(options) # weights = options.weights info("Read and build bedGraph for each replicate...") reps = [] i = 1 for ifile in options.ifile: info("Read file #%d" % i) reps.append(cBedGraphIO.bedGraphIO(ifile).build_bdgtrack()) i += 1 # first two reps info("combining #1 and #2 with method '%s'" % options.method) cmbtrack = reps[0].overlie(reps[1], func=options.method) ofile = os.path.join(options.outdir, options.ofile) info("Write bedGraph of combined scores...") ofhd = open(ofile, "wb") cmbtrack.write_bedGraph( ofhd, name="%s_combined_scores" % (options.method.upper()), description="Scores calculated by %s" % (options.method.upper()), ) info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
def run( options ): info("Read and build bedGraph...") bio = cBedGraphIO.bedGraphIO(options.ifile) btrack = bio.build_bdgtrack(baseline_value=0) if options.cutoff_analysis: info("Analyze cutoff vs number of peaks/total length of peaks/average length of peak") cutoff_analysis_result = btrack.cutoff_analysis( int(options.maxgap), int(options.minlen), 50 ) info("Write report...") if options.ofile: fhd = open( os.path.join( options.outdir, options.ofile ), 'w' ) else: fhd = open ( os.path.join( options.outdir, "%s_l%d_g%d_cutoff_analysis.txt" % (options.oprefix,options.minlen,options.maxgap)), "w" ) fhd.write( cutoff_analysis_result ) info("Done") else: info("Call peaks from bedGraph...") peaks = btrack.call_peaks(cutoff=float(options.cutoff),min_length=int(options.minlen),max_gap=int(options.maxgap),call_summits=options.call_summits) info("Write peaks...") if options.ofile: options.oprefix = options.ofile nf = open( os.path.join( options.outdir, options.ofile ), 'w' ) else: nf = open ( os.path.join( options.outdir, "%s_c%.1f_l%d_g%d_peaks.narrowPeak" % (options.oprefix,options.cutoff,options.minlen,options.maxgap)), "w" ) peaks.write_to_narrowPeak(nf, name=options.oprefix, name_prefix=options.oprefix+"_narrowPeak", score_column="score", trackline=options.trackline) info("Done")
def run( options ): LLR_cutoff = options.cutoff ofile_prefix = options.oprefix info("Read and build treatment 1 bedGraph...") t1bio = cBedGraphIO.bedGraphIO(options.t1bdg) t1btrack = t1bio.build_bdgtrack() info("Read and build control 1 bedGraph...") c1bio = cBedGraphIO.bedGraphIO(options.c1bdg) c1btrack = c1bio.build_bdgtrack() depth1 = options.depth1 info("Read and build treatment 2 bedGraph...") t2bio = cBedGraphIO.bedGraphIO(options.t2bdg) t2btrack = t2bio.build_bdgtrack() info("Read and build control 2 bedGraph...") c2bio = cBedGraphIO.bedGraphIO(options.c2bdg) c2btrack = c2bio.build_bdgtrack() depth2 = options.depth2 twoconditionscore = cScoreTrack.TwoConditionScores( t1btrack, c1btrack, t2btrack, c2btrack, depth1, depth2 ) twoconditionscore.build() twoconditionscore.finalize() twoconditionscore.compute_all_pvalues() twoconditionscore.compute_track_qvalues() #(cat1,cat2,cat3,cat4) = twoconditionscore.call_peaks(min_length=options.minlen, cutoff=options.cutoff) (cat1,cat2,cat3) = twoconditionscore.call_peaks(min_length=options.minlen, cutoff=options.cutoff) info("Write peaks...") nf = open ("%s_c%.1f_cat1_peaks.encodePeak" % (options.oprefix,options.cutoff),"w") cat1.write_to_narrowPeak(nf, name_prefix=options.oprefix+"_encodePeak", score_column="score") nf = open ("%s_c%.1f_cat2_peaks.encodePeak" % (options.oprefix,options.cutoff),"w") cat2.write_to_narrowPeak(nf, name_prefix=options.oprefix+"_encodePeak", score_column="score") nf = open ("%s_c%.1f_cat3_peaks.encodePeak" % (options.oprefix,options.cutoff),"w") cat3.write_to_narrowPeak(nf, name_prefix=options.oprefix+"_encodePeak", score_column="score") #nf = open ("%s_c%.1f_cat4_peaks.encodePeak" % (options.oprefix,options.cutoff),"w") #cat4.write_to_narrowPeak(nf, name_prefix=options.oprefix+"_encodePeak", score_column="score") info("Done")
def run( options ): info("Read and build bedGraph...") bio = cBedGraphIO.bedGraphIO(options.ifile) btrack = bio.build_bdgtrack(baseline_value=0) info("Call peaks from bedGraph...") peaks = btrack.call_peaks(cutoff=float(options.cutoff),min_length=int(options.minlen),max_gap=int(options.maxgap),call_summits=options.call_summits) info("Write peaks...") nf = open ("%s_c%.1f_l%d_g%d_peaks.encodePeak" % (options.oprefix,options.cutoff,options.minlen,options.maxgap),"w") peaks.write_to_narrowPeak(nf, name_prefix=options.oprefix+"_encodePeak", score_column="score", trackline=options.trackline) info("Done")
def run( options ): info("Read and build bedGraph...") bio = cBedGraphIO.bedGraphIO(options.ifile) btrack = bio.build_bdgtrack(baseline_value=0) info("Call peaks from bedGraph...") #(peaks,bpeaks) = btrack.call_broadpeaks (lvl1_cutoff=options.cutoffpeak, lvl2_cutoff=options.cutofflink, min_length=options.minlen, lvl1_max_gap=options.lvl1maxgap, lvl2_max_gap=options.lvl2maxgap) bpeaks = btrack.call_broadpeaks (lvl1_cutoff=options.cutoffpeak, lvl2_cutoff=options.cutofflink, min_length=options.minlen, lvl1_max_gap=options.lvl1maxgap, lvl2_max_gap=options.lvl2maxgap) info("Write peaks...") #nf = open ("%s_c%.1f_l%d_g%d_peaks.encodePeak" % (options.oprefix,options.cutoffpeak,options.minlen,options.lvl1maxgap),"w") bf = open ("%s_c%.1f_C%.2f_l%d_g%d_G%d_broad.bed12" % (options.oprefix,options.cutoffpeak,options.cutofflink,options.minlen,options.lvl1maxgap,options.lvl2maxgap),"w") bpeaks[1].write_to_gappedPeak(bf, name_prefix=options.oprefix+"_broadRegion") info("Done")
def run( options ): options = opt_validate( options ) info("Read and build bedGraph...") bio = cBedGraphIO.bedGraphIO(options.ifile) btrack = bio.build_bdgtrack(baseline_value=0) info("Modify bedGraph...") if options.method.lower() == "multiply": btrack.apply_func( lambda x: x * options.extraparam) elif options.method.lower() == "add": btrack.apply_func( lambda x: x + options.extraparam) elif options.method.lower() == "p2q": btrack.p2q() ofile = os.path.join( options.outdir, options.ofile ) info("Write bedGraph of modified scores...") ofhd = open(ofile,"wb") btrack.write_bedGraph(ofhd,name="%s_modified_scores" % (options.method.upper()),description="Scores calculated by %s" % (options.method.upper())) info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
def run(options): options = opt_validate(options) info("Read and build bedGraph...") bio = cBedGraphIO.bedGraphIO(options.ifile) btrack = bio.build_bdgtrack(baseline_value=0) info("Modify bedGraph...") if options.method.lower() == "multiply": btrack.apply_func(lambda x: x * options.extraparam) elif options.method.lower() == "add": btrack.apply_func(lambda x: x + options.extraparam) elif options.method.lower() == "p2q": btrack.p2q() elif options.method.lower() == "analen": btrack.analen() ofile = os.path.join(options.outdir, options.ofile) info("Write bedGraph of modified scores...") ofhd = open(ofile, "wb") btrack.write_bedGraph(ofhd, name="%s_modified_scores" % (options.method.upper()), description="Scores calculated by %s" % (options.method.upper())) info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
def run(options): options = opt_validate(options) #weights = options.weights info("Read and build bedGraph for each replicate...") reps = [] i = 1 for ifile in options.ifile: info("Read file #%d" % i) reps.append(cBedGraphIO.bedGraphIO(ifile).build_bdgtrack()) i += 1 # first two reps info("combining #1 and #2 with method '%s'" % options.method) cmbtrack = reps[0].overlie(reps[1], func=options.method) ofile = os.path.join(options.outdir, options.ofile) info("Write bedGraph of combined scores...") ofhd = open(ofile, "wb") cmbtrack.write_bedGraph( ofhd, name="%s_combined_scores" % (options.method.upper()), description="Scores calculated by %s" % (options.method.upper())) info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
def run( args ): """The Differential function/pipeline for MACS. """ # Parse options... options = diff_opt_validate( args ) #0 output arguments # info("\n"+options.argtxt) ofile_prefix = options.name # check if tag files exist with open(options.t1bdg) as f: pass with open(options.c1bdg) as f: pass with open(options.t2bdg) as f: pass with open(options.c2bdg) as f: pass if not options.peaks1 == '': info("Read peaks for condition 1...") p1io = PeakIO() with open(options.peaks1, 'rU') as f: p1io.read_from_xls(f) if not options.peaks2 == '': info("Read peaks for condition 2...") p2io = PeakIO() with open(options.peaks2, 'rU') as f: p2io.read_from_xls(f) #1 Read tag files info("Read and build treatment 1 bedGraph...") t1bio = cBedGraphIO.bedGraphIO(options.t1bdg) t1btrack = t1bio.build_bdgtrack() info("Read and build control 1 bedGraph...") c1bio = cBedGraphIO.bedGraphIO(options.c1bdg) c1btrack = c1bio.build_bdgtrack() if len(options.depth) >=2: depth1 = options.depth[0] depth2 = options.depth[1] else: depth1 = options.depth[0] depth2 = depth1 info("Read and build treatment 2 bedGraph...") t2bio = cBedGraphIO.bedGraphIO(options.t2bdg) t2btrack = t2bio.build_bdgtrack() info("Read and build control 2 bedGraph...") c2bio = cBedGraphIO.bedGraphIO(options.c2bdg) c2btrack = c2bio.build_bdgtrack() #3 Call Peaks diffscore = DiffScoreTrackI( t1btrack, c1btrack, t2btrack, c2btrack, depth1, depth2 ) diffscore.finalize() if options.call_peaks: diffscore.set_track_score_method(options.track_score_method) info("Calling peaks") if options.track_score_method == 'p': diffscore.call_peaks(cutoff = options.peaks_log_pvalue, min_length = options.pminlen) elif options.track_score_method == 'q': diffscore.call_peaks(cutoff = options.peaks_log_qvalue, min_length = options.pminlen) else: raise NotImplementedError else: info("Using existing peaks") diffscore.store_peaks(p1io, p2io) info("Rebuilding chromosomes") diffscore.rebuild_chromosomes() diffscore.annotate_peaks() info("Calling differentially occupied peaks") if options.score_method == 'p': diffscore.call_diff_peaks(cutoff = options.log_pvalue, min_length = options.dminlen, score_method = options.score_method) if options.score_method == 'q': diffscore.call_diff_peaks(cutoff = options.log_qvalue, min_length = options.dminlen, score_method = options.score_method) # diffscore.print_some_peaks() # diffscore.print_diff_peaks() info("Write output xls and BED files...") ofhd_xls = open( os.path.join( options.outdir, options.peakxls), "w" ) ofhd_xls.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION)) ofhd_xls.write( options.argtxt+"\n" ) ofhd_bed = open( os.path.join( options.outdir, options.peakbed), "w" ) # pass write method so we can print too, and include name diffscore.write_peaks(xls=ofhd_xls, bed=ofhd_bed, name = options.name, name_prefix="%s_peak_", description="Peaks for %s (Made with MACS v2, " + strftime("%x") + ")", trackline=options.trackline) ofhd_xls.close() ofhd_bed.close() if diffscore.has_peakio(): info("Write annotated peak xls files...") ofhd_xls1 = open( os.path.join( options.outdir, options.peak1xls), "w" ) ofhd_xls1.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION)) ofhd_xls1.write(options.argtxt+"\n") ofhd_xls2 = open( os.path.join( options.outdir, options.peak2xls), "w" ) ofhd_xls2.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION)) ofhd_xls2.write(options.argtxt+"\n") diffscore.write_peaks_by_summit(ofhd_xls1, ofhd_xls2, name = options.name, name_prefix="%s_peak_") ofhd_xls1.close() ofhd_xls2.close() if options.store_bdg: info("#4 Write output bedgraph files...") ofhd_logLR = open( os.path.join( options.outdir, options.bdglogLR), "w" ) ofhd_pvalue = open( os.path.join( options.outdir, options.bdgpvalue), "w" ) ofhd_logFC = open( os.path.join( options.outdir, options.bdglogFC), "w" ) diffscore.write_bedgraphs(logLR=ofhd_logLR, pvalue=ofhd_pvalue, logFC=ofhd_logFC, name = options.name, description=" for %s (Made with MACS v2, " + strftime("%x") + ")", trackline=options.trackline) ofhd_logLR.close() ofhd_pvalue.close() ofhd_logFC.close()
def run(options): if options.maxgap >= options.minlen: error( "MAXGAP should be smaller than MINLEN! Your input is MAXGAP = %d and MINLEN = %d" % (options.maxgap, options.minlen)) LLR_cutoff = options.cutoff ofile_prefix = options.oprefix info("Read and build treatment 1 bedGraph...") t1bio = cBedGraphIO.bedGraphIO(options.t1bdg) t1btrack = t1bio.build_bdgtrack() info("Read and build control 1 bedGraph...") c1bio = cBedGraphIO.bedGraphIO(options.c1bdg) c1btrack = c1bio.build_bdgtrack() info("Read and build treatment 2 bedGraph...") t2bio = cBedGraphIO.bedGraphIO(options.t2bdg) t2btrack = t2bio.build_bdgtrack() info("Read and build control 2 bedGraph...") c2bio = cBedGraphIO.bedGraphIO(options.c2bdg) c2btrack = c2bio.build_bdgtrack() depth1 = options.depth1 depth2 = options.depth2 if depth1 > depth2: # scale down condition 1 to size of condition 2 depth1 = depth2 / depth1 depth2 = 1.0 elif depth1 < depth2: # scale down condition 2 to size of condition 1 depth2 = depth1 / depth2 depth1 = 1.0 else: # no need to scale down any depth1 = 1.0 depth2 = 1.0 twoconditionscore = cScoreTrack.TwoConditionScores(t1btrack, c1btrack, t2btrack, c2btrack, depth1, depth2) twoconditionscore.build() twoconditionscore.finalize() (cat1, cat2, cat3) = twoconditionscore.call_peaks(min_length=options.minlen, max_gap=options.maxgap, cutoff=options.cutoff) info("Write peaks...") ofiles = [] name_prefix = [] if options.ofile: ofiles = map(lambda x: os.path.join(options.outdir, x), options.ofile) name_prefix = options.ofile else: ofiles = [ os.path.join( options.outdir, "%s_c%.1f_cond1.bed" % (options.oprefix, options.cutoff)), os.path.join( options.outdir, "%s_c%.1f_cond2.bed" % (options.oprefix, options.cutoff)), os.path.join( options.outdir, "%s_c%.1f_common.bed" % (options.oprefix, options.cutoff)) ] name_prefix = [ options.oprefix + "_cond1_", options.oprefix + "_cond2_", options.oprefix + "_common_", ] nf = open(ofiles[0], 'w') cat1.write_to_bed(nf, name_prefix=name_prefix[0], name="condition 1", description="unique regions in condition 1", score_column="score") nf = open(ofiles[1], 'w') cat2.write_to_bed(nf, name_prefix=name_prefix[1], name="condition 2", description="unique regions in condition 2", score_column="score") nf = open(ofiles[2], 'w') cat3.write_to_bed(nf, name_prefix=name_prefix[2], name="common", description="common regions in both conditions", score_column="score") info("Done")
def run( options ): options.do_MCMC = True # load precompiled matrix gfolds_c = PCGF(options.cutoff) info("Read peak files...") info("Peak of condition 1 treatment...") t1_peakio = genericBedIO(options.peak1) t1_peak = t1_peakio.build_bedtrack() info("Peak of condition 2 treatment...") t2_peakio = genericBedIO(options.peak2) t2_peak = t2_peakio.build_bedtrack() # get union peak regions union_peak = t1_peak.overlie(t2_peak) info("Read and build bedGraph...") info("Pileup of condition 1 treatment...") t1_bio = bedGraphIO(options.t1bdg) t1_btrack = t1_bio.build_bdgtrack(baseline_value=0) info("Pileup of condition 2 treatment...") t2_bio = bedGraphIO(options.t2bdg) t2_btrack = t2_bio.build_bdgtrack(baseline_value=0) # calculate sum of all signals in million t1_sum = t1_btrack.summary()[0] t2_sum = t2_btrack.summary()[0] n1 = t1_sum/1000000.0 # signal per million n2 = t2_sum/1000000.0 offset = -log(n1,2)+log(n2,2) info("t1 sum: %.1f, t2 sum: %.1f, Offset is %.2f" % (t1_sum,t2_sum,offset)) # combine two tracks info("Combine tracks...") comb_track = t1_btrack.make_scoreTrack_for_macs2diff(t2_btrack) info("Extract average values in union regions...") data_in_union = comb_track.extract_average(union_peak) # ([id,...],[count1,...],[count2,...]) # if n1 > n2: # r1 = n1/n2 # r2 = 1 # else: # r1 = 1 # r2 = n2/n1 for i in xrange(len(data_in_union[0])): data_in_union[1][i] = int(data_in_union[1][i]) # actual values are Pileup Per Peak Per Million reads (PPPPM) data_in_union[2][i] = int(data_in_union[2][i]) #info("Convert gfold...") info( "Calculate gfold ..." ) gfolds = convert_gfold(data_in_union, gfolds_c, offset=offset, cutoff=options.cutoff,mcmc=options.do_MCMC) # sort by gfold gfolds.sort(cmp=lambda x,y:cmp(x[1],y[1])) # write differential regions with gfold>0 info( "Write differential regions to %s ..." % (options.oprefix+"_diff.bed") ) ofhd = open(options.oprefix+"_diff.bed","w") for (rid, gf) in gfolds: if gf != 0: (chrom,start,end) = rid.split('.') ofhd.write( "%s\t%s\t%s\t%s\t%.5f\n" % (chrom,start,end,'.',gf) ) ofhd.close() info( "Write gfold values for each region to %s ..." % (options.oprefix+"_diff.txt") ) ofhd = open(options.oprefix+"_diff.txt","w") gf_dict = dict(gfolds) for i in xrange(len(data_in_union[0])): gftmp = gf_dict[data_in_union[0][i]] tmp1 = data_in_union[1][i] tmp2 = data_in_union[2][i] ofhd.write("%s\t%.5f\t%.5f\t%.5f\n" % (data_in_union[0][i],tmp1/n1,tmp2/n2,gftmp)) ofhd.close()