Exemple #1
0
def filter_a_chrom(chrom, oriPeaks, subPeaks, chromWig, args):
    '''
    Filters a chromsome
    oriPeaks and subPeaks are lists of peaks from the same chromosome.
    Both peak lists are sorted by coordinates on the chromosome.
    '''
    sub_peak_starts = [k[1] for k in subPeaks]
    result = []
    for op in oriPeaks:
        startIdx = bisect.bisect_left( chromWig[:,0], op[1] )
        endIdx = bisect.bisect_right( chromWig[:,0], op[2] )
        subStartIdx = bisect.bisect_left( sub_peak_starts, op[1] )
        subEndIdx = bisect.bisect_right( sub_peak_starts, op[2] )
        if endIdx <= startIdx or subEndIdx <= subStartIdx:
            continue
        currWig = wig.expandWig( chromWig[startIdx:endIdx,:],0, 1, smooth=False )
        startPos = chromWig[ startIdx, 0 ]
        endPos = chromWig[ endIdx - 1, 0 ]
        maxV = np.max( currWig ) #The maximum value
        for subI in range( subStartIdx, subEndIdx ):
            relStart = max( 0, subPeaks[ subI ][1] - startPos )
            relEnd = min( currWig.shape[0] - 1, subPeaks[ subI ][2] - startPos )
            subMaxV = np.max( currWig[ relStart: relEnd ] )
            if subMaxV > args.threshfrac * maxV and subMaxV > args.cutoff:
                result.append( subPeaks[ subI ] )
                result[-1][4] = subMaxV
                result[-1][5] = args.strand
    return result
Exemple #2
0
def add_chrom_data(taskQ, outQ, processID, args, strand  ):
    if args.method == 'g':
        kargs = (args.bw, args.nbw)
    else:
        kargs = (args.r, args.mean)
    for chrom, chromWig in iter( taskQ.get, 'STOP' ):
        print "add chrom data Process ", processID," is processing ", chrom
        lines = []
        lines.append("variableStep chrom=%s\n"%(chrom,))
        startp = chromWig[0,0]
        expanded = wig.expandWig( chromWig, offset, 1, strand = strand, method=args.method, kargs=kargs)
        for i in range( expanded.shape[0] ):
            if expanded[i] > 0.8:
                lines.append( "%d\t%f\n"%(int(i + startp - offset), expanded[i], ) )
        #for i in range( chromWig.shape[0] ):
        #    lines.append('%d\t%f\n'%( chromWig[ i, 0], chromWig[i,1]))
        expanded.resize(100000, refcheck=False)
        expanded.resize(0, refcheck=False)
        chromWig.resize(100000, refcheck=False)
        chromWig.resize(0, refcheck=False)

        outQ.put(lines)
        gc.collect()
Exemple #3
0
def pair( fpeaks, rpeaks, fwig, rwig, ulimit, dlimit, prefix):
    '''
    Assuming that the peaks on one strand is mutually exclusive.
    They do not overlap with each other.
    In this case, the ordering of the starts of the peaks and the
    ends of the peaks are the same. And that when the starts are 
    sorted, the ends are also sorted.
    '''
    print "fwig: ", fwig
    print "rwig: ", rwig
    offset = 5
    expandCol = 1
    out1 = open(prefix + "_singletons_dev.bed",'w')
    out2 = open(prefix + "_pairs_dev.gff", "w")
    print fpeaks.keys()
    for chrom in fpeaks:
        if chrom not in rpeaks:
            continue
        print chrom
        fp = fpeaks[chrom]
        rp = rpeaks[chrom]
        pairF = []  #Store the pairing information, if unpaired, it will be negative.
        pairR = []
        fw = fwig[chrom]
        expandedFw = WIG.expandWig( fw, offset, expandCol, smooth=False )
        rw = rwig[chrom]
        expandedRw = WIG.expandWig( rw, offset, expandCol, smooth=False )
        rstarts = []
        rends = []
        fprefer = []
        rprefer = []
        unpairedF = []
        for f in fp:
            fprefer.append( SortedCollection( key=itemgetter(1) ) )
            pairF.append( ( -1, 0, 0) )  #( index of the mate, score, distance )
        for r in rp:
            rprefer.append( SortedCollection( key=itemgetter(1) ) )
            pairR.append( (-1, 0, 0) )
            rstarts.append( r[1] )
            rends.append( r[2] )
        for i in range( len( fp ) ):
            currfp = fp[ i ]
            start = currfp[1]
            end = currfp[2]
            es = start - ulimit
            ee = end + dlimit
            currFw = expandedFw[ max( 0, start - fw[0,0] ) + offset : max( 0, end - fw[0,0] ) + offset + 1]
            si = bisect.bisect_left( rends, es )
            ei = bisect.bisect_right( rstarts, ee )
            ftagCounts,_,_,_ = gs.getTagCount( fwig, chrom, start, end )

            #print ei - si
            maxScore = 0
            bestDist = 0
            bestIdx = 0
            bestRpos = 0
            bestHeight = 0
            for idx in range( si, ei ):
                currrp = rp[ idx ]
                rstart = currrp[1]
                rend = currrp[2]
                currRw = expandedRw[ max( 0, rstart - rw[0, 0] ) + offset : max( 0, rend - rw[0,0] ) + offset + 1 ]
                rtagCoungs,_,_,_ = gs.getTagCount( rwig, chrom, currrp[1], currrp[2] )

                tempScore, tempDist, tempRpos, tempHeight = getScore( currFw, currRw, start, rstart )

                fprefer[ i ].insert( (idx, tempScore, tempDist, tempRpos, tempHeight) )
                rprefer[ idx ].insert( (i, tempScore, tempDist, tempRpos, tempHeight) )
                if tempScore > maxScore:
                    maxScore = tempScore
                    bestDist = tempDist
                    bestIdx = idx
                    bestRpos = tempRpos
                    bestHeight = tempHeight
            if maxScore > pairR[ bestIdx ][1]:
                pairF[i] = ( bestIdx, maxScore, bestDist , bestRpos, bestHeight)
                if pairR[ bestIdx ][0] >= 0:
                    pairF[ pairR[ bestIdx ][ 0 ] ] = (-1, 0, 0)
                    unpairedF.append( pairR[ bestIdx ][0] )
                pairR[bestIdx] = ( i, maxScore, bestDist, bestRpos, bestHeight )
            else:
                unpairedF.append( i )
            try:
                fprefer[ i ].remove( ( bestIdx, maxScore, bestDist, bestRpos, bestHeight ) )
            except ValueError:
                #print "Value error: ", bestIdx, ' ',maxScore, ' ',bestDist,' ', si,' ', ei
                pass
        singletons = []
        pairs = []
        while len(unpairedF) > 0:
            for u in unpairedF:
                if len( fprefer[u] ) > 0:
                    ridx = fprefer[u][-1][0]
                    if pairR[ ridx ][1] < fprefer[u][-1][1]:
                        if pairR[ ridx ][0] > 0:
                            pairF[ pairR[ ridx ][0] ] = (-1, 0, 0)
                            unpairedF.append( pairR[ ridx ][0] )
                        pairR[ ridx ] = ( u, fprefer[u][-1][1], fprefer[u][-1][2], fprefer[u][-1][3], fprefer[u][-1][4] )
                        pairF[ u ] = ( ridx, fprefer[u][-1][1], fprefer[u][-1][2], fprefer[u][-1][3], fprefer[u][-1][4] )
                    fprefer[u].remove( (ridx, fprefer[u][-1][1], fprefer[u][-1][2], fprefer[u][-1][3], fprefer[u][-1][4] ) )
                else:
                    unpairedF.remove( u )

        for i,f in enumerate(pairF):
            fp[i][1] -= 1
            if f[0] == -1:
                singletons.append( fp[i] )
            else:
                rp[f[0]][1] -= 1
                #pairs.append( fp[i] )
                #pairs.append( rp[f[0]] )
                pairStart = (2*f[3] - f[2])/2
                pairEnd = pairStart + 1
                pairs.append( [fp[i][0],'.','.',pairStart-10, pairEnd+10,f[4],'.','.','cw_distance='+str(f[2]) ] )

        for i,f in enumerate(pairR):
            rp[i][1] -= 1
            if f[0] == -1:
                singletons.append( rp[i] )

        singletons.sort(key=lambda k:( k[0], k[1], k[2] ))
        pairs.sort(key = lambda k:( k[0], k[1], k[2]))
        print "singletons: ", len(singletons)
        print "pairs: ", len(pairs)

        for s in singletons:
            out1.write('\t'.join([str(i) for i in s]))
            out1.write('\n')
        for p in pairs:
            out2.write('\t'.join([str(i) for i in p]))
            out2.write('\n')
    out1.close()
    out2.close()