def get_nlinks_dict(c1,bams,name2index,args,ll={}):

    count={}
#    index2=name2index[c2]
    for sam in bams:
        print("{}:{}-{}".format(c1,0,int(ll[c1])-1))
        for aln in sam.fetch(reference=c1):
            if (not aln.is_duplicate) and (aln.mapq >= args.mapq) and (BamTags.mate_mapq(aln) >= args.mapq):
                count[aln.rnext]=count.get(aln.rnext,0)+1
    return count
def chicago_pairs(sca,mapper,bamlist,minq=20,mask={}):
    for seg in mapper.scaffold_contigs[sca]:
          ref="_".join(seg.split("_")[:-1])
          for b in bamlist:
               for aln in b.fetch(until_eof=True,reference=ref):
                    if not aln.is_read1: continue
                    if aln.is_duplicate: continue
                    if aln.mapq < minq : continue
                    if BamTags.mate_mapq(aln) < minq : continue
#                    print("#x",ref,mask.get(ref,[]))
                    if mask_test(ref,aln.pos,mask) or mask_test(b.getrname(aln.rnext),aln.pnext,mask) : continue
                    
                    contig = b.getrname(aln.tid) # snam[aln.tid]
                    
                    ncontig= b.getrname(aln.rnext) if aln.rnext>=0 else -1

                    scaffold ,z1a,z2a,z3a,c1 = mapper.mapCoord( contig, aln.pos,     aln.pos+1 ) 
                    nscaffold,z2p,x2p,z3p,c2 = mapper.mapCoord(ncontig, aln.pnext,aln.pnext+1 ) 
                    if debug: print(("#x",contig,ncontig,aln.pos,aln.pnext,scaffold,nscaffold,sca,z1a,z2p,ref,mapper.ocontig_contigs.get(ref,[])))
                    if scaffold==nscaffold and sca==scaffold:
                         #yield( sc,seg,contig,ncontig,scaffold,z1a,z2a,z3a,nscaffold,z2p,x2p,z3p )
                         yield( sca,z1a,z2p,c1,c2,seg,contig,ncontig,scaffold,z1a,z2a,z3a,nscaffold,z2p,x2p,z3p,aln.query_name )
                        #
                        if c1[-1]=="3":
                            k1 = contig_seq[c1[:-2]][-args.kmer:]
                        else:
                            k1 = rc(contig_seq[c1[:-2]][:args.kmer])
                        if c2[-1]=="5":
                            k2 = contig_seq[c2[:-2]][:args.kmer]
                        else:
                            k2 = rc(contig_seq[c2[:-2]][-args.kmer:])
                        print("\t".join(map(str,["gS:",sc,int(xx),c1,c2,c1[-1],c2[-1],k1,k2,k1 in seq, k2 in seq,"{}:{}".format(seq,qal),st,aln.is_reverse])))
                        #if overlaps_gap(aln2scaffoldx(aln)):
                        #pass
                        # this read overlapps a gap?
                        #OPEN:Scaffold278839     isotig431029.5  TTTTTCTCATCAGTCTCCTCATCACAAGTTTTCTATAAGTCCCCA   isotig1560433.5 TTAAATCATAGCAGTACTCATGAAGAGGGTAGTAAAGCACTGGAA   57      14      :       AG

            if (not args.direct) and (not args.twopass) and (BamTags.mate_mapq(aln)>=10.0 and nscaffold in my_scaffolds) : # we might want this read because it's sister maps to one of our scaffolds
                sc,xx,yy,st,cdz = mapper.mapCoord( ncontig,aln.pnext,aln.pnext+1 )
                mate_scaffold_strand = st if not aln.mate_is_reverse else -1*st
                if mate_scaffold_strand == -1:
                    xx -= insert_size
                    yy -= insert_size
                else:
                    xx += insert_size
                    yy += insert_size
                gaps = mapper.hits_gaps(sc,xx-400,yy+400)
                if args.debug: print("#",gaps)
                if gaps:
                    seq = aln.seq
                    qal = aln.qual
                    if mate_scaffold_strand==1:
                        seq = rc(seq)
    buff = 0  #args.trim
    for contig in contigs:
        if args.debug: print("#{}".format(contig))
        if (not contig in llen) or llen[contig] < args.minlen: continue
        spans = []
        edges = []
        nb = []
        lll = llen[contig]
        #        print "#",contig
        for sam in bams:
            for aln in sam.fetch(region=contig):
                if (aln.tid == aln.rnext) and (aln.pos < aln.pnext) and (
                    (not args.ignoreDuplicates) or
                    (not aln.is_duplicate)) and (aln.mapq >= args.mapq) and (
                        BamTags.mate_mapq(aln) >= args.mapq):
                    #                   spans.append( tuple( [aln.pos, aln.pos+aln.tlen,aln.mapq,oname[sam]] )  )
                    #                   if aln.tlen > 2*buff and aln.tlen < range_cutoff:
                    if aln.tlen > 2 * buff:  # and aln.tlen < range_cutoff:
                        ll = ces.model.lnF(aln.tlen)
                        #                       ll=get_score( aln.tlen )
                        edges.append(tuple([aln.pos + buff, ll]))
                        edges.append(
                            tuple([min(lll, aln.pos + aln.tlen - buff), -ll]))

                        nb.append(tuple([aln.pos + buff, 1]))
                        nb.append(
                            tuple([min(lll, aln.pos + aln.tlen - buff), -1]))

        edges.sort()
        nb.sort()
        alniter = alngenerator()
    else:
        alniter = sam.fetch(until_eof=True)

    for aln in alniter:
#        print aln.pos, dir(aln)
        nr+=1
        if nr%200000 == 0:
            sys.stderr.write("%d\n"%nr)

        i,j = aln.tid, aln.rnext
        if args.debug: print([i,j,int(aln.mapq),aln.tlen,aln.is_duplicate])

        if int(aln.mapq) < args.min_qual: continue
        if BamTags.mate_mapq(aln) < args.min_qual: continue
        if aln.is_duplicate : continue
        if args.junctions and BamTags.junction(aln) != "T": continue

        if i==j:
            if (slen[i]<args.minlength) : continue
            if aln.tlen < 0: continue
            hist[aln.tlen] = hist.get(aln.tlen,0)+1
            n+=1
        else:
            if (slen[i]<args.minlength) or (slen[j]<args.minlength) : continue
            if not aln.is_read1: continue
            x = min( aln.pos , slen[i] - aln.pos ) + min( aln.pnext,slen[j]-aln.pnext)
            hist2[x] = hist2.get(x,0)+1 
            n2+=1
Exemple #6
0
def bam2chicagolinks(bamlist,
                     my_contigs,
                     smap,
                     mask_ranges,
                     mapq,
                     minl,
                     internal=False,
                     tidlist=False):

    #     print("bam2chicagolinks")
    #     if not smap: smap=SegMapper("/dev/null")
    #     print(my_contigs)
    #     print(smap)
    #     print(smap.segment_info)
    for seg in my_contigs.keys():
        if not smap:
            ti = bamlist[0].references.index(seg)
            l1 = bamlist[0].lengths[ti]
        links = {}
        tids = {}
        if smap:
            scaffold, x, y = smap.get_segment_info(seg)
            region = "{}:{}-{}".format(scaffold, x, y)
        else:
            region = seg
            x = -1
            y = -1
        print("#", seg, region)
        for bam in bamlist:
            #print(region)
            for a in bam.fetch(region=region):
                if a.mapq < mapq: continue
                if a.is_duplicate: continue
                if BamTags.mate_mapq(a) < mapq: continue
                c1, c2, xx, yy = a.tid, a.rnext, a.pos, a.pnext
                if xx < x: continue  #print("#wtf?")
                c1 = bam.getrname(c1)
                c2 = bam.getrname(c2)

                if mask_test(c1, xx, mask_ranges): continue
                if mask_test(c2, yy, mask_ranges): continue
                #print a

                i = 0
                if smap:
                    c2, yyy = smap.map_coord(c2, yy)
                else:
                    yyy = yy

                if c2 == seg and not internal: continue
                if not smap and not c1 == c2: continue
                #print( seg,c2,1+xx-x,1+yy-z)
                #if 1+xx-x<0: print ("#?",region,scaffold,1+xx-x,x,y,seg,c1,xx,x,yy,z)
                links[c2] = links.get(c2, []) + [(1 + xx - x, yyy)]
                tids[c2] = tids.get(c2, []) + [a.query_name]
        for c2 in links.keys():
            if False and not c2 in smap.seg2len:
                print(
                    "that's weird: {} not in table of segment lengths?".format(
                        c2))
                raise Exception
            if smap and not c2 in smap.seg2len: continue
            if smap:
                if (smap.seg2len.get(seg) > minl
                        and smap.seg2len.get(c2) > minl):
                    if tidlist:
                        yield ((seg, c2, smap.seg2len.get(seg),
                                smap.seg2len.get(c2), len(links[c2]),
                                links[c2], tids[c2]))
                    else:
                        yield ((seg, c2, smap.seg2len.get(seg),
                                smap.seg2len.get(c2), len(links[c2]),
                                links[c2]))
            else:
                #                    print(seg,c2,l1,l1,len(links[c2]),links[c2])
                if tidlist:
                    yield ((seg, c2, l1, l1, len(links[c2]), links[c2],
                            tids[c2]))
                else:
                    yield ((seg, c2, l1, l1, len(links[c2]), links[c2]))
                                        "{}:{}".format(seq, qal),
                                        st,
                                        aln.is_reverse,
                                    ],
                                )
                            )
                        )
                        # if overlaps_gap(aln2scaffoldx(aln)):
                        # pass
                        # this read overlapps a gap?
                        # OPEN:Scaffold278839     isotig431029.5  TTTTTCTCATCAGTCTCCTCATCACAAGTTTTCTATAAGTCCCCA   isotig1560433.5 TTAAATCATAGCAGTACTCATGAAGAGGGTAGTAAAGCACTGGAA   57      14      :       AG

            if (
                (not args.direct)
                and (not args.twopass)
                and (BamTags.mate_mapq(aln) >= 10.0 and nscaffold in my_scaffolds)
            ):  # we might want this read because it's sister maps to one of our scaffolds
                sc, xx, yy, st, cdz = mapper.mapCoord(ncontig, aln.pnext, aln.pnext + 1)
                mate_scaffold_strand = st if not aln.mate_is_reverse else -1 * st
                if mate_scaffold_strand == -1:
                    xx -= insert_size
                    yy -= insert_size
                else:
                    xx += insert_size
                    yy += insert_size
                gaps = mapper.hits_gaps(sc, xx - 400, yy + 400)
                if args.debug:
                    print("#", gaps)
                if gaps:
                    seq = aln.seq
                    qal = aln.qual
def get_nlinks_spanning(c1,xx,ll,bams,name2index,args,gap):
    l1=ll[c1]

    count=0
#    index2=name2index[c2]
    for sam in bams:
        for aln in sam.fetch(region=c1):
            if (aln.rnext==aln.tid) and (not aln.is_duplicate) and (aln.mapq >= args.mapq) and (BamTags.mate_mapq(aln) >= args.mapq) and (aln.pos<xx-old_div(gap,2)) and (aln.pnext>xx+old_div(gap,2)):
                count+=1
    
    return count
def get_nlinks(c1,c2,ll,bams,name2index,args):
    l1=ll[c1]
    l2=ll[c2]
    if l2<l1:
        x = c1
        c1= c2
        c2= x

    count=0
    index2=name2index[c2]
    for sam in bams:
        for aln in sam.fetch(region=c1):
            if (aln.rnext==index2) and (not aln.is_duplicate) and (aln.mapq >= args.mapq) and (BamTags.mate_mapq(aln) >= args.mapq):
                count+=1

    return count
    llen={}
    for i in range(len(snam)):
        llen[snam[i]]=slen[i]

    buff=0 #args.trim
    for contig in contigs:
        if args.debug: print("#{}".format(contig))
        if (not contig in llen) or  llen[contig]<args.minlen: continue
        spans=[]
        edges=[]
        nb=[]
        lll = llen[contig]
#        print "#",contig
        for sam in bams:
            for aln in sam.fetch(region=contig):
                if (aln.tid==aln.rnext) and (aln.pos<aln.pnext) and ((not args.ignoreDuplicates) or (not aln.is_duplicate)) and (aln.mapq >= args.mapq) and (BamTags.mate_mapq(aln) >= args.mapq):
#                   spans.append( tuple( [aln.pos, aln.pos+aln.tlen,aln.mapq,oname[sam]] )  )
#                   if aln.tlen > 2*buff and aln.tlen < range_cutoff:
                   if aln.tlen > 2*buff: # and aln.tlen < range_cutoff:
                       ll=ces.model.lnF(aln.tlen)
#                       ll=get_score( aln.tlen )                   
                       edges.append( tuple([aln.pos + buff         , ll]) )
                       edges.append( tuple([min(lll,aln.pos+aln.tlen-buff)  ,-ll]) )

                       nb.append( tuple([aln.pos + buff         , 1]) )
                       nb.append( tuple([min(lll,aln.pos+aln.tlen-buff)  ,-1]) )

        edges.sort()
        nb.sort()
        rs=0
        n=0
def bam2chicagolinks(bamlist,my_contigs,smap,mask_ranges,mapq,minl,internal=False,tidlist=False):

#     print("bam2chicagolinks")
#     if not smap: smap=SegMapper("/dev/null")
#     print(my_contigs)
#     print(smap)
#     print(smap.segment_info)
     for seg in my_contigs.keys():
          if not smap:
               ti=bamlist[0].references.index(seg)
               l1 = bamlist[0].lengths[ti]
          links = {}
          tids = {}
          if smap:
               scaffold,x,y = smap.get_segment_info(seg)
               region = "{}:{}-{}".format(scaffold,x,y)
          else:
               region = seg
               x=-1
               y=-1
          print("#",seg,region)
          for bam in bamlist:
               #print(region)
               for a in bam.fetch(region=region): 
                    if a.mapq<mapq: continue
                    if a.is_duplicate: continue
                    if BamTags.mate_mapq(a)<mapq: continue
                    c1,c2,xx,yy = a.tid,a.rnext,a.pos,a.pnext
                    if xx<x: continue #print("#wtf?")
                    c1 = bam.getrname(c1)
                    c2 = bam.getrname(c2)

                    if mask_test(c1,xx,mask_ranges): continue
                    if mask_test(c2,yy,mask_ranges): continue
                    #print a

                    i=0
                    if smap:
                         c2,yyy = smap.map_coord(c2,yy)
                    else:
                         yyy=yy

                    if c2==seg and not internal : continue
                    if not smap and not c1==c2: continue
                    #print( seg,c2,1+xx-x,1+yy-z)
                    #if 1+xx-x<0: print ("#?",region,scaffold,1+xx-x,x,y,seg,c1,xx,x,yy,z)
                    links[c2] = links.get(c2,[]) + [( 1+xx-x,yyy  )]
                    tids[c2]  = tids.get(c2,[])  + [ a.query_name ]
          for c2 in links.keys():
               if False and not c2 in smap.seg2len:
                    print ("that's weird: {} not in table of segment lengths?".format(c2))
                    raise Exception 
               if smap and not c2 in smap.seg2len: continue
               if smap:
                    if (smap.seg2len.get(seg)>minl and smap.seg2len.get(c2)>minl): 
                         if tidlist:
                              yield( (seg,c2,smap.seg2len.get(seg),smap.seg2len.get(c2),len(links[c2]),links[c2], tids[c2]) )
                         else:
                              yield( (seg,c2,smap.seg2len.get(seg),smap.seg2len.get(c2),len(links[c2]),links[c2]) )
               else:
#                    print(seg,c2,l1,l1,len(links[c2]),links[c2])
                    if tidlist:
                         yield( (seg,c2,l1,l1,len(links[c2]),links[c2],tids[c2]) )
                    else:
                         yield( (seg,c2,l1,l1,len(links[c2]),links[c2]         ) )