Example #1
0
def run( o_options ):
    """The Main function/pipeline for duplication filter.
    
    """
    # Parse options...
    options = opt_validate( o_options )
    # end of parsing commandline options
    info = options.info
    warn = options.warn
    debug = options.debug
    error = options.error


    outputfile = open(options.oprefix+"_refinepeak.bed", "w")

    
    #if options.outputfile != "stdout":
    #    assert not os.path.exists(options.outputfile), "%s already exists, please check!" % options.outputfile
    #    outfhd = open(options.outputfile,"w")
    #else:
    #    outfhd = sys.stdout

    peakio = file(options.bedfile)
    peaks = PeakIO()
    for l in peakio:
        fs = l.rstrip().split()
        peaks.add( fs[0], int(fs[1]), int(fs[2]), name=fs[3] )

    peaks.sort()
    
    #for l in peakio:
    #l = peakio.readline()
    #fs = l.rstrip().split()
    #print fs
    
    #1 Read tag files
    info("read tag files...")
    fwtrack = load_tag_files_options (options)
    
    #info("tag size = %d" % options.tsize)
    #fwtrack.fw = options.tsize

    retval = fwtrack.compute_region_tags_from_peaks( peaks, find_summit, window_size = options.windowsize, cutoff = options.cutoff )
    outputfile.write( "\n".join( map(lambda x: "%s\t%d\t%d\t%s\t%.2f" % x , retval) ) )
    info("Done!")
    info("Check output file: %s" % options.oprefix+"_refinepeak.bed")
Example #2
0
def run( o_options ):
    """The Main function/pipeline for duplication filter.
    
    """
    # Parse options...
    options = opt_validate( o_options )
    # end of parsing commandline options
    info = options.info
    warn = options.warn
    debug = options.debug
    error = options.error


    outputfile = open(options.oprefix+"_refinepeak.bed", "w")

    peakio = file(options.bedfile)
    peaks = PeakIO()
    for l in peakio:
        fs = l.rstrip().split()
        peaks.add( fs[0], int(fs[1]), int(fs[2]), name=fs[3] )

    peaks.sort()
    
    #1 Read tag files
    info("read tag files...")
    fwtrack = load_tag_files_options (options)
    
    retval = fwtrack.compute_region_tags_from_peaks( peaks, find_summit, window_size = options.windowsize, cutoff = options.cutoff )
    outputfile.write( "\n".join( map(lambda x: "%s\t%d\t%d\t%s\t%.2f" % x , retval) ) )
    info("Done!")
    info("Check output file: %s" % options.oprefix+"_refinepeak.bed")
Example #3
0
    def setUp( self ):
        self.range             = [   0, 2000 ]
        self.binding_sites     = [ 300, 500, 700 ]
        self.binding_strength  = [ 60,  45,  55 ] # approximate binding affility 
        self.binding_width     = [ 150, 150, 150 ]# binding width, left and right sides are cutting sites
        self.cutting_variation = 50              # variation at the correct cutting sites
        self.tag_size          = 50
        self.test_tags_file    = "random_test.bed"
        self.genome_size       = 10000
        
        self.plus_tags         = [ ]
        self.minus_tags        = [ ]
        
        for i in range( len(self.binding_sites) ):
            j = 0
            while j <= self.binding_strength[ i ]:
                x = int( normalvariate( self.binding_sites[ i ] - self.binding_width[ i ]/2,
                                        self.cutting_variation ) )
                if x > self.range[ 0 ] and x + self.tag_size < self.range[ 1 ]:
                    self.plus_tags.append( x )
                    j += 1
            
            j = 0
            while j <= self.binding_strength[ i ]:
                x = int( normalvariate( self.binding_sites[ i ] + self.binding_width[ i ]/2,
                                        self.cutting_variation ) )
                if x - self.tag_size > self.range[ 0 ] and x < self.range[ 1 ]:
                    self.minus_tags.append( x )
                    j += 1

        self.plus_tags = sorted(self.plus_tags)
        self.minus_tags = sorted(self.minus_tags)

        #print self.plus_tags
        #print self.minus_tags

        self.result_peak = PeakIO()

        # write reads in bed files
        fhd = open( self.test_tags_file, "w" )
        for x in self.plus_tags:
            fhd.write( "chr1\t%d\t%d\t.\t0\t+\n" % ( x, x + self.tag_size ) )
        for x in self.minus_tags:
            fhd.write( "chr1\t%d\t%d\t.\t0\t-\n" % ( x - self.tag_size, x ) )
Example #4
0
def run( args ):
    """The Differential function/pipeline for MACS.
    
    """
    # Parse options...
    options = diff_opt_validate( args )
    #0 output arguments
#    info("\n"+options.argtxt)
 
    ofile_prefix = options.name
    
    # check if tag files exist
    with open(options.t1bdg) as f: pass
    with open(options.c1bdg) as f: pass
    with open(options.t2bdg) as f: pass
    with open(options.c2bdg) as f: pass
    
    if not options.peaks1 == '':
        info("Read peaks for condition 1...")
        p1io = PeakIO()
        with open(options.peaks1, 'rU') as f:
            p1io.read_from_xls(f)

    if not options.peaks2 == '':
        info("Read peaks for condition 2...")
        p2io = PeakIO()
        with open(options.peaks2, 'rU') as f:
            p2io.read_from_xls(f)
    
    #1 Read tag files
    info("Read and build treatment 1 bedGraph...")
    t1bio = cBedGraphIO.bedGraphIO(options.t1bdg)
    t1btrack = t1bio.build_bdgtrack()

    info("Read and build control 1 bedGraph...")
    c1bio = cBedGraphIO.bedGraphIO(options.c1bdg)
    c1btrack = c1bio.build_bdgtrack()

    if len(options.depth) >=2:
        depth1 = options.depth[0]
        depth2 = options.depth[1]
    else:
        depth1 = options.depth[0]
        depth2 = depth1
    
    info("Read and build treatment 2 bedGraph...")
    t2bio = cBedGraphIO.bedGraphIO(options.t2bdg)
    t2btrack = t2bio.build_bdgtrack()

    info("Read and build control 2 bedGraph...")
    c2bio = cBedGraphIO.bedGraphIO(options.c2bdg)
    c2btrack = c2bio.build_bdgtrack()
    
    #3 Call Peaks

    diffscore = DiffScoreTrackI( t1btrack,
                                 c1btrack,
                                 t2btrack,
                                 c2btrack,
                                 depth1, depth2 )
    diffscore.finalize()
    if options.call_peaks:
        diffscore.set_track_score_method(options.track_score_method)
        info("Calling peaks")
        if options.track_score_method == 'p':
            diffscore.call_peaks(cutoff = options.peaks_log_pvalue,
                                 min_length = options.pminlen)
        elif options.track_score_method == 'q':
            diffscore.call_peaks(cutoff = options.peaks_log_qvalue,
                                 min_length = options.pminlen)
        else:
            raise NotImplementedError
    else:
        info("Using existing peaks")
        diffscore.store_peaks(p1io, p2io)
        info("Rebuilding chromosomes")
        diffscore.rebuild_chromosomes()
        diffscore.annotate_peaks()
    
    info("Calling differentially occupied peaks")
    if options.score_method == 'p':
        diffscore.call_diff_peaks(cutoff = options.log_pvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
    if options.score_method == 'q':
        diffscore.call_diff_peaks(cutoff = options.log_qvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
#    diffscore.print_some_peaks()
#    diffscore.print_diff_peaks()
    
    info("Write output xls and BED files...")
    ofhd_xls = open( os.path.join( options.outdir, options.peakxls), "w" )
    ofhd_xls.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
    ofhd_xls.write( options.argtxt+"\n" )
    ofhd_bed = open( os.path.join( options.outdir, options.peakbed), "w" )

    # pass write method so we can print too, and include name
    diffscore.write_peaks(xls=ofhd_xls, bed=ofhd_bed,
                    name = options.name, name_prefix="%s_peak_",
                    description="Peaks for %s (Made with MACS v2, " + strftime("%x") + ")",
                    trackline=options.trackline)
    ofhd_xls.close()
    ofhd_bed.close()
    
    if diffscore.has_peakio():
        info("Write annotated peak xls files...")
        ofhd_xls1 = open( os.path.join( options.outdir, options.peak1xls), "w" )
        ofhd_xls1.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls1.write(options.argtxt+"\n")
        ofhd_xls2 = open( os.path.join( options.outdir, options.peak2xls), "w" )
        ofhd_xls2.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls2.write(options.argtxt+"\n")
        diffscore.write_peaks_by_summit(ofhd_xls1, ofhd_xls2,
                                        name = options.name, name_prefix="%s_peak_")
        ofhd_xls1.close()
        ofhd_xls2.close()
    
    if options.store_bdg:
        info("#4 Write output bedgraph files...")
        ofhd_logLR = open( os.path.join( options.outdir, options.bdglogLR), "w" )
        ofhd_pvalue = open( os.path.join( options.outdir, options.bdgpvalue), "w" )
        ofhd_logFC = open( os.path.join( options.outdir, options.bdglogFC), "w" )
        diffscore.write_bedgraphs(logLR=ofhd_logLR, pvalue=ofhd_pvalue,
                                  logFC=ofhd_logFC, name = options.name,
                                  description=" for %s (Made with MACS v2, " + strftime("%x") + ")",
                                  trackline=options.trackline)
        ofhd_logLR.close()
        ofhd_pvalue.close()
        ofhd_logFC.close()
Example #5
0
def run( args ):
    """The Differential function/pipeline for MACS.
    
    """
    # Parse options...
    options = diff_opt_validate( args )
    #0 output arguments
#    info("\n"+options.argtxt)
 
    ofile_prefix = options.name
    
    # check if tag files exist
    with open(options.t1bdg) as f: pass
    with open(options.c1bdg) as f: pass
    with open(options.t2bdg) as f: pass
    with open(options.c2bdg) as f: pass
    
    if not options.peaks1 == '':
        info("Read peaks for condition 1...")
        p1io = PeakIO()
        with open(options.peaks1, 'rU') as f:
            p1io.read_from_xls(f)

    if not options.peaks2 == '':
        info("Read peaks for condition 2...")
        p2io = PeakIO()
        with open(options.peaks2, 'rU') as f:
            p2io.read_from_xls(f)
    
    #1 Read tag files
    info("Read and build treatment 1 bedGraph...")
    t1bio = cBedGraphIO.bedGraphIO(options.t1bdg)
    t1btrack = t1bio.build_bdgtrack()

    info("Read and build control 1 bedGraph...")
    c1bio = cBedGraphIO.bedGraphIO(options.c1bdg)
    c1btrack = c1bio.build_bdgtrack()

    if len(options.depth) >=2:
        depth1 = options.depth[0]
        depth2 = options.depth[1]
    else:
        depth1 = options.depth[0]
        depth2 = depth1
    
    info("Read and build treatment 2 bedGraph...")
    t2bio = cBedGraphIO.bedGraphIO(options.t2bdg)
    t2btrack = t2bio.build_bdgtrack()

    info("Read and build control 2 bedGraph...")
    c2bio = cBedGraphIO.bedGraphIO(options.c2bdg)
    c2btrack = c2bio.build_bdgtrack()
    
    #3 Call Peaks

    diffscore = DiffScoreTrackI( t1btrack,
                                 c1btrack,
                                 t2btrack,
                                 c2btrack,
                                 depth1, depth2 )
    diffscore.finalize()
    if options.call_peaks:
        diffscore.set_track_score_method(options.track_score_method)
        info("Calling peaks")
        if options.track_score_method == 'p':
            diffscore.call_peaks(cutoff = options.peaks_log_pvalue,
                                 min_length = options.pminlen)
        elif options.track_score_method == 'q':
            diffscore.call_peaks(cutoff = options.peaks_log_qvalue,
                                 min_length = options.pminlen)
        else:
            raise NotImplementedError
    else:
        info("Using existing peaks")
        diffscore.store_peaks(p1io, p2io)
        info("Rebuilding chromosomes")
        diffscore.rebuild_chromosomes()
        diffscore.annotate_peaks()
    
    info("Calling differentially occupied peaks")
    if options.score_method == 'p':
        diffscore.call_diff_peaks(cutoff = options.log_pvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
    if options.score_method == 'q':
        diffscore.call_diff_peaks(cutoff = options.log_qvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
#    diffscore.print_some_peaks()
#    diffscore.print_diff_peaks()
    
    info("Write output xls and BED files...")
    ofhd_xls = open( os.path.join( options.outdir, options.peakxls), "w" )
    ofhd_xls.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
    ofhd_xls.write( options.argtxt+"\n" )
    ofhd_bed = open( os.path.join( options.outdir, options.peakbed), "w" )

    # pass write method so we can print too, and include name
    diffscore.write_peaks(xls=ofhd_xls, bed=ofhd_bed,
                    name = options.name, name_prefix="%s_peak_",
                    description="Peaks for %s (Made with MACS v2, " + strftime("%x") + ")",
                    trackline=options.trackline)
    ofhd_xls.close()
    ofhd_bed.close()
    
    if diffscore.has_peakio():
        info("Write annotated peak xls files...")
        ofhd_xls1 = open( os.path.join( options.outdir, options.peak1xls), "w" )
        ofhd_xls1.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls1.write(options.argtxt+"\n")
        ofhd_xls2 = open( os.path.join( options.outdir, options.peak2xls), "w" )
        ofhd_xls2.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls2.write(options.argtxt+"\n")
        diffscore.write_peaks_by_summit(ofhd_xls1, ofhd_xls2,
                                        name = options.name, name_prefix="%s_peak_")
        ofhd_xls1.close()
        ofhd_xls2.close()
    
    if options.store_bdg:
        info("#4 Write output bedgraph files...")
        ofhd_logLR = open( os.path.join( options.outdir, options.bdglogLR), "w" )
        ofhd_pvalue = open( os.path.join( options.outdir, options.bdgpvalue), "w" )
        ofhd_logFC = open( os.path.join( options.outdir, options.bdglogFC), "w" )
        diffscore.write_bedgraphs(logLR=ofhd_logLR, pvalue=ofhd_pvalue,
                                  logFC=ofhd_logFC, name = options.name,
                                  description=" for %s (Made with MACS v2, " + strftime("%x") + ")",
                                  trackline=options.trackline)
        ofhd_logLR.close()
        ofhd_pvalue.close()
        ofhd_logFC.close()