def run( o_options ): """The Main function/pipeline for duplication filter. """ # Parse options... options = opt_validate( o_options ) # end of parsing commandline options info = options.info warn = options.warn debug = options.debug error = options.error outputfile = open(options.oprefix+"_refinepeak.bed", "w") #if options.outputfile != "stdout": # assert not os.path.exists(options.outputfile), "%s already exists, please check!" % options.outputfile # outfhd = open(options.outputfile,"w") #else: # outfhd = sys.stdout peakio = file(options.bedfile) peaks = PeakIO() for l in peakio: fs = l.rstrip().split() peaks.add( fs[0], int(fs[1]), int(fs[2]), name=fs[3] ) peaks.sort() #for l in peakio: #l = peakio.readline() #fs = l.rstrip().split() #print fs #1 Read tag files info("read tag files...") fwtrack = load_tag_files_options (options) #info("tag size = %d" % options.tsize) #fwtrack.fw = options.tsize retval = fwtrack.compute_region_tags_from_peaks( peaks, find_summit, window_size = options.windowsize, cutoff = options.cutoff ) outputfile.write( "\n".join( map(lambda x: "%s\t%d\t%d\t%s\t%.2f" % x , retval) ) ) info("Done!") info("Check output file: %s" % options.oprefix+"_refinepeak.bed")
def run( o_options ): """The Main function/pipeline for duplication filter. """ # Parse options... options = opt_validate( o_options ) # end of parsing commandline options info = options.info warn = options.warn debug = options.debug error = options.error outputfile = open(options.oprefix+"_refinepeak.bed", "w") peakio = file(options.bedfile) peaks = PeakIO() for l in peakio: fs = l.rstrip().split() peaks.add( fs[0], int(fs[1]), int(fs[2]), name=fs[3] ) peaks.sort() #1 Read tag files info("read tag files...") fwtrack = load_tag_files_options (options) retval = fwtrack.compute_region_tags_from_peaks( peaks, find_summit, window_size = options.windowsize, cutoff = options.cutoff ) outputfile.write( "\n".join( map(lambda x: "%s\t%d\t%d\t%s\t%.2f" % x , retval) ) ) info("Done!") info("Check output file: %s" % options.oprefix+"_refinepeak.bed")
def setUp( self ): self.range = [ 0, 2000 ] self.binding_sites = [ 300, 500, 700 ] self.binding_strength = [ 60, 45, 55 ] # approximate binding affility self.binding_width = [ 150, 150, 150 ]# binding width, left and right sides are cutting sites self.cutting_variation = 50 # variation at the correct cutting sites self.tag_size = 50 self.test_tags_file = "random_test.bed" self.genome_size = 10000 self.plus_tags = [ ] self.minus_tags = [ ] for i in range( len(self.binding_sites) ): j = 0 while j <= self.binding_strength[ i ]: x = int( normalvariate( self.binding_sites[ i ] - self.binding_width[ i ]/2, self.cutting_variation ) ) if x > self.range[ 0 ] and x + self.tag_size < self.range[ 1 ]: self.plus_tags.append( x ) j += 1 j = 0 while j <= self.binding_strength[ i ]: x = int( normalvariate( self.binding_sites[ i ] + self.binding_width[ i ]/2, self.cutting_variation ) ) if x - self.tag_size > self.range[ 0 ] and x < self.range[ 1 ]: self.minus_tags.append( x ) j += 1 self.plus_tags = sorted(self.plus_tags) self.minus_tags = sorted(self.minus_tags) #print self.plus_tags #print self.minus_tags self.result_peak = PeakIO() # write reads in bed files fhd = open( self.test_tags_file, "w" ) for x in self.plus_tags: fhd.write( "chr1\t%d\t%d\t.\t0\t+\n" % ( x, x + self.tag_size ) ) for x in self.minus_tags: fhd.write( "chr1\t%d\t%d\t.\t0\t-\n" % ( x - self.tag_size, x ) )
def run( args ): """The Differential function/pipeline for MACS. """ # Parse options... options = diff_opt_validate( args ) #0 output arguments # info("\n"+options.argtxt) ofile_prefix = options.name # check if tag files exist with open(options.t1bdg) as f: pass with open(options.c1bdg) as f: pass with open(options.t2bdg) as f: pass with open(options.c2bdg) as f: pass if not options.peaks1 == '': info("Read peaks for condition 1...") p1io = PeakIO() with open(options.peaks1, 'rU') as f: p1io.read_from_xls(f) if not options.peaks2 == '': info("Read peaks for condition 2...") p2io = PeakIO() with open(options.peaks2, 'rU') as f: p2io.read_from_xls(f) #1 Read tag files info("Read and build treatment 1 bedGraph...") t1bio = cBedGraphIO.bedGraphIO(options.t1bdg) t1btrack = t1bio.build_bdgtrack() info("Read and build control 1 bedGraph...") c1bio = cBedGraphIO.bedGraphIO(options.c1bdg) c1btrack = c1bio.build_bdgtrack() if len(options.depth) >=2: depth1 = options.depth[0] depth2 = options.depth[1] else: depth1 = options.depth[0] depth2 = depth1 info("Read and build treatment 2 bedGraph...") t2bio = cBedGraphIO.bedGraphIO(options.t2bdg) t2btrack = t2bio.build_bdgtrack() info("Read and build control 2 bedGraph...") c2bio = cBedGraphIO.bedGraphIO(options.c2bdg) c2btrack = c2bio.build_bdgtrack() #3 Call Peaks diffscore = DiffScoreTrackI( t1btrack, c1btrack, t2btrack, c2btrack, depth1, depth2 ) diffscore.finalize() if options.call_peaks: diffscore.set_track_score_method(options.track_score_method) info("Calling peaks") if options.track_score_method == 'p': diffscore.call_peaks(cutoff = options.peaks_log_pvalue, min_length = options.pminlen) elif options.track_score_method == 'q': diffscore.call_peaks(cutoff = options.peaks_log_qvalue, min_length = options.pminlen) else: raise NotImplementedError else: info("Using existing peaks") diffscore.store_peaks(p1io, p2io) info("Rebuilding chromosomes") diffscore.rebuild_chromosomes() diffscore.annotate_peaks() info("Calling differentially occupied peaks") if options.score_method == 'p': diffscore.call_diff_peaks(cutoff = options.log_pvalue, min_length = options.dminlen, score_method = options.score_method) if options.score_method == 'q': diffscore.call_diff_peaks(cutoff = options.log_qvalue, min_length = options.dminlen, score_method = options.score_method) # diffscore.print_some_peaks() # diffscore.print_diff_peaks() info("Write output xls and BED files...") ofhd_xls = open( os.path.join( options.outdir, options.peakxls), "w" ) ofhd_xls.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION)) ofhd_xls.write( options.argtxt+"\n" ) ofhd_bed = open( os.path.join( options.outdir, options.peakbed), "w" ) # pass write method so we can print too, and include name diffscore.write_peaks(xls=ofhd_xls, bed=ofhd_bed, name = options.name, name_prefix="%s_peak_", description="Peaks for %s (Made with MACS v2, " + strftime("%x") + ")", trackline=options.trackline) ofhd_xls.close() ofhd_bed.close() if diffscore.has_peakio(): info("Write annotated peak xls files...") ofhd_xls1 = open( os.path.join( options.outdir, options.peak1xls), "w" ) ofhd_xls1.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION)) ofhd_xls1.write(options.argtxt+"\n") ofhd_xls2 = open( os.path.join( options.outdir, options.peak2xls), "w" ) ofhd_xls2.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION)) ofhd_xls2.write(options.argtxt+"\n") diffscore.write_peaks_by_summit(ofhd_xls1, ofhd_xls2, name = options.name, name_prefix="%s_peak_") ofhd_xls1.close() ofhd_xls2.close() if options.store_bdg: info("#4 Write output bedgraph files...") ofhd_logLR = open( os.path.join( options.outdir, options.bdglogLR), "w" ) ofhd_pvalue = open( os.path.join( options.outdir, options.bdgpvalue), "w" ) ofhd_logFC = open( os.path.join( options.outdir, options.bdglogFC), "w" ) diffscore.write_bedgraphs(logLR=ofhd_logLR, pvalue=ofhd_pvalue, logFC=ofhd_logFC, name = options.name, description=" for %s (Made with MACS v2, " + strftime("%x") + ")", trackline=options.trackline) ofhd_logLR.close() ofhd_pvalue.close() ofhd_logFC.close()