Example #1
0
def run( options ):
    info("Read and build treatment bedGraph...")
    tbio = cBedGraphIO.bedGraphIO(options.tfile)
    tbtrack = tbio.build_bdgtrack()

    info("Read and build control bedGraph...")
    cbio = cBedGraphIO.bedGraphIO(options.cfile)
    cbtrack = cbio.build_bdgtrack()

    method = options.method

    info("Calculate scores comparing treatment and control by %s..." % method)
    # build score track
    if method == 'ppois':
        sbtrack = tbtrack.make_scoreTrack_for_macs(cbtrack)
        sbtrack = scoreTracktoBedGraph(sbtrack,'-100logp')        
    elif method == 'qpois':
        sbtrack = tbtrack.make_scoreTrack_for_macs(cbtrack)
        pqtable = sbtrack.make_pq_table()
        sbtrack.assign_qvalue(pqtable)
        sbtrack = scoreTracktoBedGraph(sbtrack,'-100logq')
    elif method == 'substract':
        sbtrack = tbtrack.overlie(cbtrack,func=lambda x,y:x-y)
    elif method == 'divide':
        sbtrack = tbtrack.overlie(cbtrack,func=lambda x,y:float(x)/y)
    elif method == 'logLR':             # log likelihood
        sbtrack = tbtrack.overlie(cbtrack,func=logLR)
    else:
        raise Exception("Can't reach here!")

    info("Write to output bedGraph...")
    ofhd = io.open(options.ofile,"wb")

    sbtrack.write_bedGraph(ofhd,name="%s_Scores" % (method.upper()),description="Scores calculated by %s" % (method.upper()))
Example #2
0
def run( options ):
    options = opt_validate( options )
    scaling_factor = options.sfactor
    pseudo_depth = 1.0/scaling_factor   # not an actual depth, but its reciprocal, a trick to override SPMR while necessary.

    info("Read and build treatment bedGraph...")
    tbio = cBedGraphIO.bedGraphIO(options.tfile)
    tbtrack = tbio.build_bdgtrack()

    info("Read and build control bedGraph...")
    cbio = cBedGraphIO.bedGraphIO(options.cfile)
    cbtrack = cbio.build_bdgtrack()

    info("Build scoreTrackII...")
    sbtrack = tbtrack.make_scoreTrackII_for_macs( cbtrack, depth1 = pseudo_depth, depth2 = pseudo_depth )
    if abs(scaling_factor-1) > 1e-6:
        # Only for the case while your input is SPMR from MACS2 callpeak; Let's override SPMR.
        info("Values in your input bedGraph files will be multiplied by %f ..." % scaling_factor)
        sbtrack.change_normalization_method( ord('M') ) # a hack to override SPMR
    sbtrack.set_pseudocount( options.pseudocount )

    already_processed_method_list = []
    for (i, method) in enumerate(options.method):
        if method in already_processed_method_list:
            continue
        else:
            already_processed_method_list.append( method )

        info("Calculate scores comparing treatment and control by '%s'..." % method)
        if options.ofile:
            ofile = os.path.join( options.outdir, options.ofile[ i ] )
        else:
            ofile = os.path.join( options.outdir, options.oprefix + "_" + method + ".bdg" )
        # build score track
        if method == 'ppois':
            sbtrack.change_score_method( ord('p') )
        elif method == 'qpois':
            sbtrack.change_score_method( ord('q') )
        elif method == 'subtract':
            sbtrack.change_score_method( ord('d') )
        elif method == 'logFE':
            sbtrack.change_score_method( ord('f') )
        elif method == 'FE':
            sbtrack.change_score_method( ord('F') )
        elif method == 'logLR':             # log likelihood
            sbtrack.change_score_method( ord('l') )
        elif method == 'slogLR':             # log likelihood
            sbtrack.change_score_method( ord('s') )
        else:
            raise Exception("Can't reach here!")
        
        info("Write bedGraph of scores...")
        ofhd = open(ofile,"wb")
        sbtrack.write_bedGraph(ofhd,name="%s_Scores" % (method.upper()),description="Scores calculated by %s" % (method.upper()), column = 3)
        info("Finished '%s'! Please check '%s'!" % (method, ofile))
Example #3
0
def run( options ):
    if options.maxgap >= options.minlen:
        error("MAXGAP should be smaller than MINLEN! Your input is MAXGAP = %d and MINLEN = %d" % (options.maxgap, options.minlen))

    LLR_cutoff = options.cutoff
    ofile_prefix = options.oprefix

    info("Read and build treatment 1 bedGraph...")
    t1bio = cBedGraphIO.bedGraphIO(options.t1bdg)
    t1btrack = t1bio.build_bdgtrack()

    info("Read and build control 1 bedGraph...")
    c1bio = cBedGraphIO.bedGraphIO(options.c1bdg)
    c1btrack = c1bio.build_bdgtrack()

    info("Read and build treatment 2 bedGraph...")
    t2bio = cBedGraphIO.bedGraphIO(options.t2bdg)
    t2btrack = t2bio.build_bdgtrack()

    info("Read and build control 2 bedGraph...")
    c2bio = cBedGraphIO.bedGraphIO(options.c2bdg)
    c2btrack = c2bio.build_bdgtrack()

    depth1 = options.depth1
    depth2 = options.depth2

    if depth1 > depth2:         # scale down condition 1 to size of condition 2
        depth1 = depth2 / depth1
        depth2 = 1.0
    elif depth1 < depth2:       # scale down condition 2 to size of condition 1
        depth2 = depth1/ depth2
        depth1 = 1.0
    else:                       # no need to scale down any
        depth1 = 1.0
        depth2 = 1.0

    twoconditionscore = cScoreTrack.TwoConditionScores( t1btrack,
                                                        c1btrack,
                                                        t2btrack,
                                                        c2btrack,
                                                        depth1,
                                                        depth2 )
    twoconditionscore.build()
    twoconditionscore.finalize()
    (cat1,cat2,cat3) = twoconditionscore.call_peaks(min_length=options.minlen, max_gap=options.maxgap, cutoff=options.cutoff)

    info("Write peaks...")
    nf = open ("%s_c%.1f_cond1.bed" % (options.oprefix,options.cutoff),"w")        
    cat1.write_to_bed(nf, name_prefix=options.oprefix+"_cond1_", name="condition 1", description="unique regions in condition 1", score_column="score")
    nf = open ("%s_c%.1f_cond2.bed" % (options.oprefix,options.cutoff),"w")        
    cat2.write_to_bed(nf, name_prefix=options.oprefix+"_cond2_", name="condition 2", description="unique regions in condition 2", score_column="score")
    nf = open ("%s_c%.1f_common.bed" % (options.oprefix,options.cutoff),"w")        
    cat3.write_to_bed(nf, name_prefix=options.oprefix+"_common_",name="common", description="common regions in both conditions", score_column="score")
    info("Done")
Example #4
0
def run( options ):
    info("Read and build treatment bedGraph...")
    tbio = cBedGraphIO.bedGraphIO(options.tfile)
    tbtrack = tbio.build_bdgtrack()

    treat_depth = options.tdepth

    info("Read and build control bedGraph...")
    cbio = cBedGraphIO.bedGraphIO(options.cfile)
    cbtrack = cbio.build_bdgtrack()

    ctrl_depth = options.cdepth

    info("Build scoreTrackII...")
    sbtrack = tbtrack.make_scoreTrackII_for_macs( cbtrack, depth1 = treat_depth, depth2 = ctrl_depth )
    # normalize by depth
    if abs(treat_depth-1) > 1e-6 or abs(ctrl_depth-1) > 1e-6:
        # if depth of treat and control is 1.0 ( files are generated
        # by MACS2 --SPMR ), no need for the following step.
        info("Normalize by sequencing depth of million reads...")
        sbtrack.change_normalization_method( ord('M') )
    sbtrack.set_pseudocount( options.pseudocount )
    
    #def make_scoreTrackII_for_macs (self, bdgTrack2, float depth1 = 1.0, float depth2 = 1.0 ):
    
    method = options.method

    info("Calculate scores comparing treatment and control by %s..." % method)
    # build score track
    if method == 'ppois':
        sbtrack.change_score_method( ord('p') )
    elif method == 'qpois':
        sbtrack.change_score_method( ord('q') )        
    elif method == 'subtract':
        sbtrack.change_score_method( ord('d') )        
    elif method == 'logFE':
        sbtrack.change_score_method( ord('f') )
    elif method == 'FE':
        sbtrack.change_score_method( ord('F') )        
    elif method == 'logLR':             # log likelihood
        sbtrack.change_score_method( ord('l') )
    else:
        raise Exception("Can't reach here!")

    info("Write bedGraph of scores...")
    ofhd = io.open(options.ofile,"wb")

    #r = sbtrack.get_data_by_chr("chr22")

    #print r

    sbtrack.write_bedGraph(ofhd,name="%s_Scores" % (method.upper()),description="Scores calculated by %s" % (method.upper()), column = 3)
    info("Finished! Please check %s!" % (options.ofile))
Example #5
0
def run(options):
    info("Read and build bedGraph...")
    bio = cBedGraphIO.bedGraphIO(options.ifile)
    btrack = bio.build_bdgtrack(baseline_value=0)

    info("Call peaks from bedGraph...")
    peaks = btrack.call_peaks(cutoff=float(options.cutoff),
                              min_length=int(options.minlen),
                              max_gap=int(options.maxgap),
                              call_summits=options.call_summits)

    info("Write peaks...")
    if options.ofile:
        options.oprefix = options.ofile
        nf = open(os.path.join(options.outdir, options.ofile), 'w')
    else:
        nf = open(
            os.path.join(
                options.outdir, "%s_c%.1f_l%d_g%d_peaks.narrowPeak" %
                (options.oprefix, options.cutoff, options.minlen,
                 options.maxgap)), "w")
    peaks.write_to_narrowPeak(nf,
                              name=options.oprefix,
                              name_prefix=options.oprefix + "_narrowPeak",
                              score_column="score",
                              trackline=options.trackline)
    info("Done")
Example #6
0
def run(options):
    options = opt_validate(options)
    # weights = options.weights

    info("Read and build bedGraph for each replicate...")
    reps = []
    i = 1
    for ifile in options.ifile:
        info("Read file #%d" % i)
        reps.append(cBedGraphIO.bedGraphIO(ifile).build_bdgtrack())
        i += 1

    # first two reps

    info("combining #1 and #2 with method '%s'" % options.method)
    cmbtrack = reps[0].overlie(reps[1], func=options.method)
    ofile = os.path.join(options.outdir, options.ofile)
    info("Write bedGraph of combined scores...")
    ofhd = open(ofile, "wb")
    cmbtrack.write_bedGraph(
        ofhd,
        name="%s_combined_scores" % (options.method.upper()),
        description="Scores calculated by %s" % (options.method.upper()),
    )
    info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
Example #7
0
def run( options ):
    info("Read and build bedGraph...")
    bio = cBedGraphIO.bedGraphIO(options.ifile)
    btrack = bio.build_bdgtrack(baseline_value=0)

    if options.cutoff_analysis:
        info("Analyze cutoff vs number of peaks/total length of peaks/average length of peak")
        cutoff_analysis_result = btrack.cutoff_analysis( int(options.maxgap), int(options.minlen), 50 )
        info("Write report...")
        if options.ofile:
            fhd = open( os.path.join( options.outdir, options.ofile ), 'w' )
        else:
            fhd = open ( os.path.join( options.outdir, "%s_l%d_g%d_cutoff_analysis.txt" % (options.oprefix,options.minlen,options.maxgap)), "w" )
        fhd.write( cutoff_analysis_result )
        info("Done")
    else:
        info("Call peaks from bedGraph...")
        peaks = btrack.call_peaks(cutoff=float(options.cutoff),min_length=int(options.minlen),max_gap=int(options.maxgap),call_summits=options.call_summits)

        info("Write peaks...")
        if options.ofile:
            options.oprefix = options.ofile
            nf = open( os.path.join( options.outdir, options.ofile ), 'w' )
        else:
            nf = open ( os.path.join( options.outdir, "%s_c%.1f_l%d_g%d_peaks.narrowPeak" % (options.oprefix,options.cutoff,options.minlen,options.maxgap)), "w" )
        peaks.write_to_narrowPeak(nf, name=options.oprefix, name_prefix=options.oprefix+"_narrowPeak", score_column="score", trackline=options.trackline)
        info("Done")
Example #8
0
def run( options ):
    LLR_cutoff = options.cutoff
    ofile_prefix = options.oprefix

    info("Read and build treatment 1 bedGraph...")
    t1bio = cBedGraphIO.bedGraphIO(options.t1bdg)
    t1btrack = t1bio.build_bdgtrack()

    info("Read and build control 1 bedGraph...")
    c1bio = cBedGraphIO.bedGraphIO(options.c1bdg)
    c1btrack = c1bio.build_bdgtrack()

    depth1 = options.depth1

    info("Read and build treatment 2 bedGraph...")
    t2bio = cBedGraphIO.bedGraphIO(options.t2bdg)
    t2btrack = t2bio.build_bdgtrack()

    info("Read and build control 2 bedGraph...")
    c2bio = cBedGraphIO.bedGraphIO(options.c2bdg)
    c2btrack = c2bio.build_bdgtrack()

    depth2 = options.depth2

    twoconditionscore = cScoreTrack.TwoConditionScores( t1btrack,
                                                        c1btrack,
                                                        t2btrack,
                                                        c2btrack,
                                                        depth1,
                                                        depth2 )
    twoconditionscore.build()
    twoconditionscore.finalize()
    twoconditionscore.compute_all_pvalues()
    twoconditionscore.compute_track_qvalues()
    #(cat1,cat2,cat3,cat4) = twoconditionscore.call_peaks(min_length=options.minlen, cutoff=options.cutoff)
    (cat1,cat2,cat3) = twoconditionscore.call_peaks(min_length=options.minlen, cutoff=options.cutoff)

    info("Write peaks...")
    nf = open ("%s_c%.1f_cat1_peaks.encodePeak" % (options.oprefix,options.cutoff),"w")        
    cat1.write_to_narrowPeak(nf, name_prefix=options.oprefix+"_encodePeak", score_column="score")
    nf = open ("%s_c%.1f_cat2_peaks.encodePeak" % (options.oprefix,options.cutoff),"w")        
    cat2.write_to_narrowPeak(nf, name_prefix=options.oprefix+"_encodePeak", score_column="score")
    nf = open ("%s_c%.1f_cat3_peaks.encodePeak" % (options.oprefix,options.cutoff),"w")        
    cat3.write_to_narrowPeak(nf, name_prefix=options.oprefix+"_encodePeak", score_column="score")
    #nf = open ("%s_c%.1f_cat4_peaks.encodePeak" % (options.oprefix,options.cutoff),"w")        
    #cat4.write_to_narrowPeak(nf, name_prefix=options.oprefix+"_encodePeak", score_column="score")    
    info("Done")
Example #9
0
def run( options ):
    info("Read and build bedGraph...")
    bio = cBedGraphIO.bedGraphIO(options.ifile)
    btrack = bio.build_bdgtrack(baseline_value=0)

    info("Call peaks from bedGraph...")    
    peaks = btrack.call_peaks(cutoff=float(options.cutoff),min_length=int(options.minlen),max_gap=int(options.maxgap),call_summits=options.call_summits)

    info("Write peaks...")
    nf = open ("%s_c%.1f_l%d_g%d_peaks.encodePeak" % (options.oprefix,options.cutoff,options.minlen,options.maxgap),"w")        
    peaks.write_to_narrowPeak(nf, name_prefix=options.oprefix+"_encodePeak", score_column="score", trackline=options.trackline)
    info("Done")
Example #10
0
def run( options ):
    info("Read and build bedGraph...")
    bio = cBedGraphIO.bedGraphIO(options.ifile)
    btrack = bio.build_bdgtrack(baseline_value=0)

    info("Call peaks from bedGraph...")
    #(peaks,bpeaks) = btrack.call_broadpeaks (lvl1_cutoff=options.cutoffpeak, lvl2_cutoff=options.cutofflink, min_length=options.minlen, lvl1_max_gap=options.lvl1maxgap, lvl2_max_gap=options.lvl2maxgap)
    bpeaks = btrack.call_broadpeaks (lvl1_cutoff=options.cutoffpeak, lvl2_cutoff=options.cutofflink, min_length=options.minlen, lvl1_max_gap=options.lvl1maxgap, lvl2_max_gap=options.lvl2maxgap)

    info("Write peaks...")
    #nf = open ("%s_c%.1f_l%d_g%d_peaks.encodePeak" % (options.oprefix,options.cutoffpeak,options.minlen,options.lvl1maxgap),"w")
    bf = open ("%s_c%.1f_C%.2f_l%d_g%d_G%d_broad.bed12" % (options.oprefix,options.cutoffpeak,options.cutofflink,options.minlen,options.lvl1maxgap,options.lvl2maxgap),"w")        
    bpeaks[1].write_to_gappedPeak(bf, name_prefix=options.oprefix+"_broadRegion")    
    info("Done")
Example #11
0
def run( options ):
    options = opt_validate( options )
    info("Read and build bedGraph...")
    bio = cBedGraphIO.bedGraphIO(options.ifile)
    btrack = bio.build_bdgtrack(baseline_value=0)

    info("Modify bedGraph...")
    if options.method.lower() == "multiply":
        btrack.apply_func( lambda x: x * options.extraparam)
    elif options.method.lower() == "add":
        btrack.apply_func( lambda x: x + options.extraparam)
    elif options.method.lower() == "p2q":
        btrack.p2q()
        
    ofile = os.path.join( options.outdir, options.ofile )
    info("Write bedGraph of modified scores...")
    ofhd = open(ofile,"wb")
    btrack.write_bedGraph(ofhd,name="%s_modified_scores" % (options.method.upper()),description="Scores calculated by %s" % (options.method.upper()))
    info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
Example #12
0
def run(options):
    options = opt_validate(options)
    info("Read and build bedGraph...")
    bio = cBedGraphIO.bedGraphIO(options.ifile)
    btrack = bio.build_bdgtrack(baseline_value=0)

    info("Modify bedGraph...")
    if options.method.lower() == "multiply":
        btrack.apply_func(lambda x: x * options.extraparam)
    elif options.method.lower() == "add":
        btrack.apply_func(lambda x: x + options.extraparam)
    elif options.method.lower() == "p2q":
        btrack.p2q()
    elif options.method.lower() == "analen":
        btrack.analen()

    ofile = os.path.join(options.outdir, options.ofile)
    info("Write bedGraph of modified scores...")
    ofhd = open(ofile, "wb")
    btrack.write_bedGraph(ofhd,
                          name="%s_modified_scores" % (options.method.upper()),
                          description="Scores calculated by %s" %
                          (options.method.upper()))
    info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
Example #13
0
def run(options):
    options = opt_validate(options)
    #weights = options.weights

    info("Read and build bedGraph for each replicate...")
    reps = []
    i = 1
    for ifile in options.ifile:
        info("Read file #%d" % i)
        reps.append(cBedGraphIO.bedGraphIO(ifile).build_bdgtrack())
        i += 1

    # first two reps

    info("combining #1 and #2 with method '%s'" % options.method)
    cmbtrack = reps[0].overlie(reps[1], func=options.method)
    ofile = os.path.join(options.outdir, options.ofile)
    info("Write bedGraph of combined scores...")
    ofhd = open(ofile, "wb")
    cmbtrack.write_bedGraph(
        ofhd,
        name="%s_combined_scores" % (options.method.upper()),
        description="Scores calculated by %s" % (options.method.upper()))
    info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
Example #14
0
def run( args ):
    """The Differential function/pipeline for MACS.
    
    """
    # Parse options...
    options = diff_opt_validate( args )
    #0 output arguments
#    info("\n"+options.argtxt)
 
    ofile_prefix = options.name
    
    # check if tag files exist
    with open(options.t1bdg) as f: pass
    with open(options.c1bdg) as f: pass
    with open(options.t2bdg) as f: pass
    with open(options.c2bdg) as f: pass
    
    if not options.peaks1 == '':
        info("Read peaks for condition 1...")
        p1io = PeakIO()
        with open(options.peaks1, 'rU') as f:
            p1io.read_from_xls(f)

    if not options.peaks2 == '':
        info("Read peaks for condition 2...")
        p2io = PeakIO()
        with open(options.peaks2, 'rU') as f:
            p2io.read_from_xls(f)
    
    #1 Read tag files
    info("Read and build treatment 1 bedGraph...")
    t1bio = cBedGraphIO.bedGraphIO(options.t1bdg)
    t1btrack = t1bio.build_bdgtrack()

    info("Read and build control 1 bedGraph...")
    c1bio = cBedGraphIO.bedGraphIO(options.c1bdg)
    c1btrack = c1bio.build_bdgtrack()

    if len(options.depth) >=2:
        depth1 = options.depth[0]
        depth2 = options.depth[1]
    else:
        depth1 = options.depth[0]
        depth2 = depth1
    
    info("Read and build treatment 2 bedGraph...")
    t2bio = cBedGraphIO.bedGraphIO(options.t2bdg)
    t2btrack = t2bio.build_bdgtrack()

    info("Read and build control 2 bedGraph...")
    c2bio = cBedGraphIO.bedGraphIO(options.c2bdg)
    c2btrack = c2bio.build_bdgtrack()
    
    #3 Call Peaks

    diffscore = DiffScoreTrackI( t1btrack,
                                 c1btrack,
                                 t2btrack,
                                 c2btrack,
                                 depth1, depth2 )
    diffscore.finalize()
    if options.call_peaks:
        diffscore.set_track_score_method(options.track_score_method)
        info("Calling peaks")
        if options.track_score_method == 'p':
            diffscore.call_peaks(cutoff = options.peaks_log_pvalue,
                                 min_length = options.pminlen)
        elif options.track_score_method == 'q':
            diffscore.call_peaks(cutoff = options.peaks_log_qvalue,
                                 min_length = options.pminlen)
        else:
            raise NotImplementedError
    else:
        info("Using existing peaks")
        diffscore.store_peaks(p1io, p2io)
        info("Rebuilding chromosomes")
        diffscore.rebuild_chromosomes()
        diffscore.annotate_peaks()
    
    info("Calling differentially occupied peaks")
    if options.score_method == 'p':
        diffscore.call_diff_peaks(cutoff = options.log_pvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
    if options.score_method == 'q':
        diffscore.call_diff_peaks(cutoff = options.log_qvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
#    diffscore.print_some_peaks()
#    diffscore.print_diff_peaks()
    
    info("Write output xls and BED files...")
    ofhd_xls = open( os.path.join( options.outdir, options.peakxls), "w" )
    ofhd_xls.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
    ofhd_xls.write( options.argtxt+"\n" )
    ofhd_bed = open( os.path.join( options.outdir, options.peakbed), "w" )

    # pass write method so we can print too, and include name
    diffscore.write_peaks(xls=ofhd_xls, bed=ofhd_bed,
                    name = options.name, name_prefix="%s_peak_",
                    description="Peaks for %s (Made with MACS v2, " + strftime("%x") + ")",
                    trackline=options.trackline)
    ofhd_xls.close()
    ofhd_bed.close()
    
    if diffscore.has_peakio():
        info("Write annotated peak xls files...")
        ofhd_xls1 = open( os.path.join( options.outdir, options.peak1xls), "w" )
        ofhd_xls1.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls1.write(options.argtxt+"\n")
        ofhd_xls2 = open( os.path.join( options.outdir, options.peak2xls), "w" )
        ofhd_xls2.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls2.write(options.argtxt+"\n")
        diffscore.write_peaks_by_summit(ofhd_xls1, ofhd_xls2,
                                        name = options.name, name_prefix="%s_peak_")
        ofhd_xls1.close()
        ofhd_xls2.close()
    
    if options.store_bdg:
        info("#4 Write output bedgraph files...")
        ofhd_logLR = open( os.path.join( options.outdir, options.bdglogLR), "w" )
        ofhd_pvalue = open( os.path.join( options.outdir, options.bdgpvalue), "w" )
        ofhd_logFC = open( os.path.join( options.outdir, options.bdglogFC), "w" )
        diffscore.write_bedgraphs(logLR=ofhd_logLR, pvalue=ofhd_pvalue,
                                  logFC=ofhd_logFC, name = options.name,
                                  description=" for %s (Made with MACS v2, " + strftime("%x") + ")",
                                  trackline=options.trackline)
        ofhd_logLR.close()
        ofhd_pvalue.close()
        ofhd_logFC.close()
Example #15
0
def run(options):
    if options.maxgap >= options.minlen:
        error(
            "MAXGAP should be smaller than MINLEN! Your input is MAXGAP = %d and MINLEN = %d"
            % (options.maxgap, options.minlen))

    LLR_cutoff = options.cutoff
    ofile_prefix = options.oprefix

    info("Read and build treatment 1 bedGraph...")
    t1bio = cBedGraphIO.bedGraphIO(options.t1bdg)
    t1btrack = t1bio.build_bdgtrack()

    info("Read and build control 1 bedGraph...")
    c1bio = cBedGraphIO.bedGraphIO(options.c1bdg)
    c1btrack = c1bio.build_bdgtrack()

    info("Read and build treatment 2 bedGraph...")
    t2bio = cBedGraphIO.bedGraphIO(options.t2bdg)
    t2btrack = t2bio.build_bdgtrack()

    info("Read and build control 2 bedGraph...")
    c2bio = cBedGraphIO.bedGraphIO(options.c2bdg)
    c2btrack = c2bio.build_bdgtrack()

    depth1 = options.depth1
    depth2 = options.depth2

    if depth1 > depth2:  # scale down condition 1 to size of condition 2
        depth1 = depth2 / depth1
        depth2 = 1.0
    elif depth1 < depth2:  # scale down condition 2 to size of condition 1
        depth2 = depth1 / depth2
        depth1 = 1.0
    else:  # no need to scale down any
        depth1 = 1.0
        depth2 = 1.0

    twoconditionscore = cScoreTrack.TwoConditionScores(t1btrack, c1btrack,
                                                       t2btrack, c2btrack,
                                                       depth1, depth2)
    twoconditionscore.build()
    twoconditionscore.finalize()
    (cat1, cat2,
     cat3) = twoconditionscore.call_peaks(min_length=options.minlen,
                                          max_gap=options.maxgap,
                                          cutoff=options.cutoff)

    info("Write peaks...")

    ofiles = []
    name_prefix = []
    if options.ofile:
        ofiles = map(lambda x: os.path.join(options.outdir, x), options.ofile)
        name_prefix = options.ofile
    else:
        ofiles = [
            os.path.join(
                options.outdir,
                "%s_c%.1f_cond1.bed" % (options.oprefix, options.cutoff)),
            os.path.join(
                options.outdir,
                "%s_c%.1f_cond2.bed" % (options.oprefix, options.cutoff)),
            os.path.join(
                options.outdir,
                "%s_c%.1f_common.bed" % (options.oprefix, options.cutoff))
        ]
        name_prefix = [
            options.oprefix + "_cond1_",
            options.oprefix + "_cond2_",
            options.oprefix + "_common_",
        ]

    nf = open(ofiles[0], 'w')
    cat1.write_to_bed(nf,
                      name_prefix=name_prefix[0],
                      name="condition 1",
                      description="unique regions in condition 1",
                      score_column="score")

    nf = open(ofiles[1], 'w')
    cat2.write_to_bed(nf,
                      name_prefix=name_prefix[1],
                      name="condition 2",
                      description="unique regions in condition 2",
                      score_column="score")

    nf = open(ofiles[2], 'w')
    cat3.write_to_bed(nf,
                      name_prefix=name_prefix[2],
                      name="common",
                      description="common regions in both conditions",
                      score_column="score")
    info("Done")
Example #16
0
def run( args ):
    """The Differential function/pipeline for MACS.
    
    """
    # Parse options...
    options = diff_opt_validate( args )
    #0 output arguments
#    info("\n"+options.argtxt)
 
    ofile_prefix = options.name
    
    # check if tag files exist
    with open(options.t1bdg) as f: pass
    with open(options.c1bdg) as f: pass
    with open(options.t2bdg) as f: pass
    with open(options.c2bdg) as f: pass
    
    if not options.peaks1 == '':
        info("Read peaks for condition 1...")
        p1io = PeakIO()
        with open(options.peaks1, 'rU') as f:
            p1io.read_from_xls(f)

    if not options.peaks2 == '':
        info("Read peaks for condition 2...")
        p2io = PeakIO()
        with open(options.peaks2, 'rU') as f:
            p2io.read_from_xls(f)
    
    #1 Read tag files
    info("Read and build treatment 1 bedGraph...")
    t1bio = cBedGraphIO.bedGraphIO(options.t1bdg)
    t1btrack = t1bio.build_bdgtrack()

    info("Read and build control 1 bedGraph...")
    c1bio = cBedGraphIO.bedGraphIO(options.c1bdg)
    c1btrack = c1bio.build_bdgtrack()

    if len(options.depth) >=2:
        depth1 = options.depth[0]
        depth2 = options.depth[1]
    else:
        depth1 = options.depth[0]
        depth2 = depth1
    
    info("Read and build treatment 2 bedGraph...")
    t2bio = cBedGraphIO.bedGraphIO(options.t2bdg)
    t2btrack = t2bio.build_bdgtrack()

    info("Read and build control 2 bedGraph...")
    c2bio = cBedGraphIO.bedGraphIO(options.c2bdg)
    c2btrack = c2bio.build_bdgtrack()
    
    #3 Call Peaks

    diffscore = DiffScoreTrackI( t1btrack,
                                 c1btrack,
                                 t2btrack,
                                 c2btrack,
                                 depth1, depth2 )
    diffscore.finalize()
    if options.call_peaks:
        diffscore.set_track_score_method(options.track_score_method)
        info("Calling peaks")
        if options.track_score_method == 'p':
            diffscore.call_peaks(cutoff = options.peaks_log_pvalue,
                                 min_length = options.pminlen)
        elif options.track_score_method == 'q':
            diffscore.call_peaks(cutoff = options.peaks_log_qvalue,
                                 min_length = options.pminlen)
        else:
            raise NotImplementedError
    else:
        info("Using existing peaks")
        diffscore.store_peaks(p1io, p2io)
        info("Rebuilding chromosomes")
        diffscore.rebuild_chromosomes()
        diffscore.annotate_peaks()
    
    info("Calling differentially occupied peaks")
    if options.score_method == 'p':
        diffscore.call_diff_peaks(cutoff = options.log_pvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
    if options.score_method == 'q':
        diffscore.call_diff_peaks(cutoff = options.log_qvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
#    diffscore.print_some_peaks()
#    diffscore.print_diff_peaks()
    
    info("Write output xls and BED files...")
    ofhd_xls = open( os.path.join( options.outdir, options.peakxls), "w" )
    ofhd_xls.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
    ofhd_xls.write( options.argtxt+"\n" )
    ofhd_bed = open( os.path.join( options.outdir, options.peakbed), "w" )

    # pass write method so we can print too, and include name
    diffscore.write_peaks(xls=ofhd_xls, bed=ofhd_bed,
                    name = options.name, name_prefix="%s_peak_",
                    description="Peaks for %s (Made with MACS v2, " + strftime("%x") + ")",
                    trackline=options.trackline)
    ofhd_xls.close()
    ofhd_bed.close()
    
    if diffscore.has_peakio():
        info("Write annotated peak xls files...")
        ofhd_xls1 = open( os.path.join( options.outdir, options.peak1xls), "w" )
        ofhd_xls1.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls1.write(options.argtxt+"\n")
        ofhd_xls2 = open( os.path.join( options.outdir, options.peak2xls), "w" )
        ofhd_xls2.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls2.write(options.argtxt+"\n")
        diffscore.write_peaks_by_summit(ofhd_xls1, ofhd_xls2,
                                        name = options.name, name_prefix="%s_peak_")
        ofhd_xls1.close()
        ofhd_xls2.close()
    
    if options.store_bdg:
        info("#4 Write output bedgraph files...")
        ofhd_logLR = open( os.path.join( options.outdir, options.bdglogLR), "w" )
        ofhd_pvalue = open( os.path.join( options.outdir, options.bdgpvalue), "w" )
        ofhd_logFC = open( os.path.join( options.outdir, options.bdglogFC), "w" )
        diffscore.write_bedgraphs(logLR=ofhd_logLR, pvalue=ofhd_pvalue,
                                  logFC=ofhd_logFC, name = options.name,
                                  description=" for %s (Made with MACS v2, " + strftime("%x") + ")",
                                  trackline=options.trackline)
        ofhd_logLR.close()
        ofhd_pvalue.close()
        ofhd_logFC.close()
Example #17
0
def run( options ):
    options.do_MCMC = True
    
    # load precompiled matrix
    gfolds_c = PCGF(options.cutoff)

    info("Read peak files...")
    info("Peak of condition 1 treatment...")

    t1_peakio = genericBedIO(options.peak1)
    t1_peak = t1_peakio.build_bedtrack()

    info("Peak of condition 2 treatment...")

    t2_peakio = genericBedIO(options.peak2)
    t2_peak = t2_peakio.build_bedtrack()

    # get union peak regions
    union_peak = t1_peak.overlie(t2_peak)

    info("Read and build bedGraph...")
    info("Pileup of condition 1 treatment...")
    t1_bio = bedGraphIO(options.t1bdg)
    t1_btrack = t1_bio.build_bdgtrack(baseline_value=0)

    info("Pileup of condition 2 treatment...")
    t2_bio = bedGraphIO(options.t2bdg)
    t2_btrack = t2_bio.build_bdgtrack(baseline_value=0)

    # calculate sum of all signals in million
    t1_sum = t1_btrack.summary()[0]
    t2_sum = t2_btrack.summary()[0]    
    n1 = t1_sum/1000000.0               # signal per million
    n2 = t2_sum/1000000.0
    offset = -log(n1,2)+log(n2,2)
    info("t1 sum: %.1f, t2 sum: %.1f, Offset is %.2f" % (t1_sum,t2_sum,offset))

    # combine two tracks
    info("Combine tracks...")
    comb_track = t1_btrack.make_scoreTrack_for_macs2diff(t2_btrack)

    info("Extract average values in union regions...")
    data_in_union = comb_track.extract_average(union_peak) # ([id,...],[count1,...],[count2,...])

    # if n1 > n2:
    #     r1 = n1/n2
    #     r2 = 1
    # else:
    #     r1 = 1
    #     r2 = n2/n1
    for i in xrange(len(data_in_union[0])):
        data_in_union[1][i] = int(data_in_union[1][i]) # actual values are Pileup Per Peak Per Million reads (PPPPM)
        data_in_union[2][i] = int(data_in_union[2][i])

    #info("Convert gfold...")
    info( "Calculate gfold ..." )
    gfolds = convert_gfold(data_in_union, gfolds_c, offset=offset, cutoff=options.cutoff,mcmc=options.do_MCMC)
    
    # sort by gfold
    gfolds.sort(cmp=lambda x,y:cmp(x[1],y[1]))

    # write differential regions with gfold>0

    info( "Write differential regions to %s ..." % (options.oprefix+"_diff.bed") )
    ofhd = open(options.oprefix+"_diff.bed","w")

    for (rid, gf) in gfolds:
        if gf != 0:
            (chrom,start,end) = rid.split('.')
            ofhd.write( "%s\t%s\t%s\t%s\t%.5f\n" % (chrom,start,end,'.',gf) )

    ofhd.close()

    info( "Write gfold values for each region to %s ..." % (options.oprefix+"_diff.txt") )
    ofhd = open(options.oprefix+"_diff.txt","w")

    gf_dict = dict(gfolds)

    for i in xrange(len(data_in_union[0])):
        gftmp = gf_dict[data_in_union[0][i]]
        tmp1 = data_in_union[1][i]
        tmp2 = data_in_union[2][i]
        ofhd.write("%s\t%.5f\t%.5f\t%.5f\n" % (data_in_union[0][i],tmp1/n1,tmp2/n2,gftmp))

    ofhd.close()