Beispiel #1
0
def addWigToPeak(fw, rw, peaks, isControl=False):
    '''
    Parameters:
        fw: the wig file of forward strand.
        rw: the wig file of reverse strand.
        peaks: the peaks to add the data to.
    output:
        None. The peaks will be populated with data.
    '''
    import wig
    fwig = wig.loadWig(fw, False, '+')
    rwig = wig.loadWig(rw, False, '-')
    total_f = 0
    total_r = 0
    chroms = []
    for chrom in peaks:
        if not (chrom in fwig and chrom in rwig):
            continue
        chroms.append(chrom)
        chromPeaks = peaks[chrom]
        chromFwig = fwig[chrom]
        chromRwig = rwig[chrom]
        #total_f += chromFwig[:,1].sum()
        #total_r += chromRwig[:,1].sum()
        fi = 0 #index to track chromFwig
        ri = 0 #index to track chromRwig
        pi = 0 #index to track chromPeaks
        while pi < len(chromPeaks):
            p = chromPeaks[pi]
            p.prepareData(isControl)
            #add the forward data

            def _addWig(cp, cwig, i1, strand):
                while i1 < cwig.shape[0] and cwig[i1,0]-1 < cp.start:
                    i1 += 1

                while i1 < cwig.shape[0] and cp.start <= cwig[i1,0] - 1 < cp.end:
                    if isControl:
                        cp.addCtrl(cwig[i1,0]-1, cwig[i1,0], cwig[i1,1], strand)
                    else:
                        cp.addData(cwig[i1,0]-1, cwig[i1,0], cwig[i1,1], strand)
                    i1 += 1

                if strand == 1 and not isControl:
                    i2 = i1
                    cend = cp.end - cp.start + cp.end - 1
                    if pi < len(chromPeaks)-1 and chromPeaks[pi+1].start < cend:
                        cend = chromPeaks[pi+1].start
                    while i2 < cwig.shape[0] and cwig[i2,0] - 1 < cend:
                        cp.addData(cwig[i2,0]-1, cwig[i2,0], cwig[i2,1], strand)
                        i2 += 1

                return i1

            fi = _addWig(p, chromFwig, fi, 0)
            ri = _addWig(p, chromRwig, ri, 1)

            pi += 1
    return fwig, rwig, chroms
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser(description="Pair the peaks on different strand.")
    parser.add_argument( "peak", help="The bed file contains all peaks." )
    parser.add_argument( 'forwardWig', help="The forward wig file." )
    parser.add_argument( 'reverseWig', help="The reverse wig file." )
    parser.add_argument( 'output', help="The output prefix. Will output two files, one containing all the singletons and the other with the pairs. The pair file will be in GFF format and singletons in BED format. Output files will be prefix_singletons.bed and prefix_pairs.gff" )
    parser.add_argument( '-d', '--downstream', type = int, default = 100, help = "Within how far downstream to look for a mate." )
    parser.add_argument( '-u', '--upstream', type = int, default = 0, help = "Within how far upstream to look for a mate." )

    args = parser.parse_args()

    fpeaks, rpeaks = loadPeaks( args.peak )
    fwig = WIG.loadWig( args.forwardWig, smooth=False )
    rwig = WIG.loadWig( args.reverseWig, smooth=False, strand='-' )
    writePeaks( fpeaks, rpeaks, 'test_peaks_smoothed_lowlim100.bed', fwig, rwig, args.upstream, args.downstream, args.output)
Beispiel #3
0
def main(args):
    for filename in args.inputs:
        strand = '+'
        if 'Reverse' in filename:
            strand = '-'
        lines = []
        wigdata = wig.loadWig( filename, False, strand )
        print wigdata

        chroms = Queue()
        count = 0
        outQ = Queue()
        for chrom in wigdata:
            print "add ", chrom
            count += 1
            chroms.put( (chrom, wigdata[chrom][:,0:2]) )

        NUM_PROCESSES = args.process

        
        processID = 1
        for i in range( NUM_PROCESSES):
            Process( target=add_chrom_data, args=( chroms, outQ, processID,  args, strand) ).start()
            processID += 1

        if args.method != 'nb':
            kargs = (args.bw, args.nbw)
        else:
            kargs = (args.r, args.mean)
        tokens = filename.split('.')
        tokens[-1] = args.method +'_'+str(kargs[0]) + '_' + str(kargs[1]) +  "_smoothed.wig"
        out = open( '.'.join(tokens), 'w' )
        out.write('track type=wiggle_0 name=%s_%d_%f\n'%( args.method, kargs[0], kargs[1], ))
        for i in range( count ):
            out.write( ''.join(outQ.get()) )


        out.close()
        for i in range( NUM_PROCESSES ):
            chroms.put('STOP')
Beispiel #4
0
    for chrom, oriPeaks, subPeaks, chromWig in iter(tasks.get, "STOP"):
        print 'Process ', processID, ' is processing ', chrom
        outQ.put( (chrom, filter_a_chrom( chrom, oriPeaks, subPeaks, chromWig, args ) ) )
        

if __name__=='__main__':
    parser = argparse.ArgumentParser(description='Filter splitted peaks reported by PeakSplitter.')
    parser.add_argument('wigfile', help='Wig file for the peaks.')
    parser.add_argument('subpeakfile', help='Peak file in bed format.')
    parser.add_argument('oripeakfile', help='The original bed file contains the peaks splitted.' )
    parser.add_argument('strand', choices=['+', '-', '.'], help='The strand of the peaks we are currently processing. Choices are +, -, . .')
    parser.add_argument( '--threshfrac', type=float, default = 0.5, help='minimum pileup of a subpeak as a fraction of the highest point of the original peak. Default is 0.5.' )
    parser.add_argument('--cutoff', type=float, default = 5, help='minimum pileup of a subpeak regardless the relative fraction. Default is 5.')

    args = parser.parse_args()
    wigData = wig.loadWig( args.wigfile, smooth = False )
    subPeaks = getPeaks( args.subpeakfile )
    oriPeaks = getPeaks( args.oripeakfile )
    chroms = set( subPeaks.keys() ).intersection(set( oriPeaks.keys() ))
    tasks = Queue()
    count = 0
    for c in chroms:
        count += 1
        tasks.put( (c, oriPeaks[c], subPeaks[c], wigData[c]) )

    freeze_support()
    NUM_PROCESSES = 4
    processID = 1
    processes = []
    outQ = Queue()
    for i in range( NUM_PROCESSES ):
Beispiel #5
0
    chromWig = w[peak.chrom]
    ws = bisect.bisect_left(chromWig[:,0],peak.start+1) #The start of the region
    we = bisect.bisect_right(chromWig[:,0], peak.end) #The right side of the regions
    axis_x = []
    x = [] #The data for processing
    for i in range(ws, we):
        axis_x.append(chromWig[i,0])
        x += [chromWig[i,0]] * int(chromWig[i,1])
    axis_x = np.array(axis_x)
    x = np.array(x)
    return axis_x, x, chromWig[ws:we,1]

if __name__ == '__main__':
    color_iter = itertools.cycle(['k','r','g','b','c','m','y'])
    FILTER_VAL = [99999,100000,1000000]
    fwig = wig.loadWig('/home/caofan/Downloads/MJF11_hg19/1_Bam/test_apex/MAX_sc-197_SNU16_XO111_Forward.wig', smooth=False)
    rwig = wig.loadWig('/home/caofan/Downloads/MJF11_hg19/1_Bam/test_apex/MAX_sc-197_SNU16_XO111_Reverse.wig', smooth=False)
    peaks = Peak.objects.filter(run=9).order_by('-size')
    
    for i in range(10):
        print peaks[i]
        for filter_val in FILTER_VAL:
            
            if peaks[i].strand == '+':
                axis_x, x, orig_y = getPeakWig(fwig, peaks[i])
            else:
                axis_x, x, orig_y = getPeakWig(rwig, peaks[i])
            model = DPGMM(1)
            skmodel = mixture.DPGMM(n_components=8,alpha=32,n_iter=10000)
            min_x = axis_x[0]
            axis_x = axis_x - min_x
Beispiel #6
0
import wig
import argparse

def getArgs():
    parser = argparse.ArgumentParser("Convert wig file to bedgraph file.")
    parser.add_argument('filename', help="Input wig file")
    parser.add_argument('strand', choices=['+','-','.'], help="The strand of the input. Can be [+, - , . ]")
    parser.add_argument('output', help="The output file.")

    args = parser.parse_args()
    return args


if __name__=='__main__':
    args = getArgs()
    wigData = wig.loadWig(args.filename, False, args.strand)
    wig.writeAsBedGraph(wigData, args.output)