def get_nlinks_dict(c1,bams,name2index,args,ll={}): count={} # index2=name2index[c2] for sam in bams: print("{}:{}-{}".format(c1,0,int(ll[c1])-1)) for aln in sam.fetch(reference=c1): if (not aln.is_duplicate) and (aln.mapq >= args.mapq) and (BamTags.mate_mapq(aln) >= args.mapq): count[aln.rnext]=count.get(aln.rnext,0)+1 return count
def chicago_pairs(sca,mapper,bamlist,minq=20,mask={}): for seg in mapper.scaffold_contigs[sca]: ref="_".join(seg.split("_")[:-1]) for b in bamlist: for aln in b.fetch(until_eof=True,reference=ref): if not aln.is_read1: continue if aln.is_duplicate: continue if aln.mapq < minq : continue if BamTags.mate_mapq(aln) < minq : continue # print("#x",ref,mask.get(ref,[])) if mask_test(ref,aln.pos,mask) or mask_test(b.getrname(aln.rnext),aln.pnext,mask) : continue contig = b.getrname(aln.tid) # snam[aln.tid] ncontig= b.getrname(aln.rnext) if aln.rnext>=0 else -1 scaffold ,z1a,z2a,z3a,c1 = mapper.mapCoord( contig, aln.pos, aln.pos+1 ) nscaffold,z2p,x2p,z3p,c2 = mapper.mapCoord(ncontig, aln.pnext,aln.pnext+1 ) if debug: print(("#x",contig,ncontig,aln.pos,aln.pnext,scaffold,nscaffold,sca,z1a,z2p,ref,mapper.ocontig_contigs.get(ref,[]))) if scaffold==nscaffold and sca==scaffold: #yield( sc,seg,contig,ncontig,scaffold,z1a,z2a,z3a,nscaffold,z2p,x2p,z3p ) yield( sca,z1a,z2p,c1,c2,seg,contig,ncontig,scaffold,z1a,z2a,z3a,nscaffold,z2p,x2p,z3p,aln.query_name )
# if c1[-1]=="3": k1 = contig_seq[c1[:-2]][-args.kmer:] else: k1 = rc(contig_seq[c1[:-2]][:args.kmer]) if c2[-1]=="5": k2 = contig_seq[c2[:-2]][:args.kmer] else: k2 = rc(contig_seq[c2[:-2]][-args.kmer:]) print("\t".join(map(str,["gS:",sc,int(xx),c1,c2,c1[-1],c2[-1],k1,k2,k1 in seq, k2 in seq,"{}:{}".format(seq,qal),st,aln.is_reverse]))) #if overlaps_gap(aln2scaffoldx(aln)): #pass # this read overlapps a gap? #OPEN:Scaffold278839 isotig431029.5 TTTTTCTCATCAGTCTCCTCATCACAAGTTTTCTATAAGTCCCCA isotig1560433.5 TTAAATCATAGCAGTACTCATGAAGAGGGTAGTAAAGCACTGGAA 57 14 : AG if (not args.direct) and (not args.twopass) and (BamTags.mate_mapq(aln)>=10.0 and nscaffold in my_scaffolds) : # we might want this read because it's sister maps to one of our scaffolds sc,xx,yy,st,cdz = mapper.mapCoord( ncontig,aln.pnext,aln.pnext+1 ) mate_scaffold_strand = st if not aln.mate_is_reverse else -1*st if mate_scaffold_strand == -1: xx -= insert_size yy -= insert_size else: xx += insert_size yy += insert_size gaps = mapper.hits_gaps(sc,xx-400,yy+400) if args.debug: print("#",gaps) if gaps: seq = aln.seq qal = aln.qual if mate_scaffold_strand==1: seq = rc(seq)
buff = 0 #args.trim for contig in contigs: if args.debug: print("#{}".format(contig)) if (not contig in llen) or llen[contig] < args.minlen: continue spans = [] edges = [] nb = [] lll = llen[contig] # print "#",contig for sam in bams: for aln in sam.fetch(region=contig): if (aln.tid == aln.rnext) and (aln.pos < aln.pnext) and ( (not args.ignoreDuplicates) or (not aln.is_duplicate)) and (aln.mapq >= args.mapq) and ( BamTags.mate_mapq(aln) >= args.mapq): # spans.append( tuple( [aln.pos, aln.pos+aln.tlen,aln.mapq,oname[sam]] ) ) # if aln.tlen > 2*buff and aln.tlen < range_cutoff: if aln.tlen > 2 * buff: # and aln.tlen < range_cutoff: ll = ces.model.lnF(aln.tlen) # ll=get_score( aln.tlen ) edges.append(tuple([aln.pos + buff, ll])) edges.append( tuple([min(lll, aln.pos + aln.tlen - buff), -ll])) nb.append(tuple([aln.pos + buff, 1])) nb.append( tuple([min(lll, aln.pos + aln.tlen - buff), -1])) edges.sort() nb.sort()
alniter = alngenerator() else: alniter = sam.fetch(until_eof=True) for aln in alniter: # print aln.pos, dir(aln) nr+=1 if nr%200000 == 0: sys.stderr.write("%d\n"%nr) i,j = aln.tid, aln.rnext if args.debug: print([i,j,int(aln.mapq),aln.tlen,aln.is_duplicate]) if int(aln.mapq) < args.min_qual: continue if BamTags.mate_mapq(aln) < args.min_qual: continue if aln.is_duplicate : continue if args.junctions and BamTags.junction(aln) != "T": continue if i==j: if (slen[i]<args.minlength) : continue if aln.tlen < 0: continue hist[aln.tlen] = hist.get(aln.tlen,0)+1 n+=1 else: if (slen[i]<args.minlength) or (slen[j]<args.minlength) : continue if not aln.is_read1: continue x = min( aln.pos , slen[i] - aln.pos ) + min( aln.pnext,slen[j]-aln.pnext) hist2[x] = hist2.get(x,0)+1 n2+=1
def bam2chicagolinks(bamlist, my_contigs, smap, mask_ranges, mapq, minl, internal=False, tidlist=False): # print("bam2chicagolinks") # if not smap: smap=SegMapper("/dev/null") # print(my_contigs) # print(smap) # print(smap.segment_info) for seg in my_contigs.keys(): if not smap: ti = bamlist[0].references.index(seg) l1 = bamlist[0].lengths[ti] links = {} tids = {} if smap: scaffold, x, y = smap.get_segment_info(seg) region = "{}:{}-{}".format(scaffold, x, y) else: region = seg x = -1 y = -1 print("#", seg, region) for bam in bamlist: #print(region) for a in bam.fetch(region=region): if a.mapq < mapq: continue if a.is_duplicate: continue if BamTags.mate_mapq(a) < mapq: continue c1, c2, xx, yy = a.tid, a.rnext, a.pos, a.pnext if xx < x: continue #print("#wtf?") c1 = bam.getrname(c1) c2 = bam.getrname(c2) if mask_test(c1, xx, mask_ranges): continue if mask_test(c2, yy, mask_ranges): continue #print a i = 0 if smap: c2, yyy = smap.map_coord(c2, yy) else: yyy = yy if c2 == seg and not internal: continue if not smap and not c1 == c2: continue #print( seg,c2,1+xx-x,1+yy-z) #if 1+xx-x<0: print ("#?",region,scaffold,1+xx-x,x,y,seg,c1,xx,x,yy,z) links[c2] = links.get(c2, []) + [(1 + xx - x, yyy)] tids[c2] = tids.get(c2, []) + [a.query_name] for c2 in links.keys(): if False and not c2 in smap.seg2len: print( "that's weird: {} not in table of segment lengths?".format( c2)) raise Exception if smap and not c2 in smap.seg2len: continue if smap: if (smap.seg2len.get(seg) > minl and smap.seg2len.get(c2) > minl): if tidlist: yield ((seg, c2, smap.seg2len.get(seg), smap.seg2len.get(c2), len(links[c2]), links[c2], tids[c2])) else: yield ((seg, c2, smap.seg2len.get(seg), smap.seg2len.get(c2), len(links[c2]), links[c2])) else: # print(seg,c2,l1,l1,len(links[c2]),links[c2]) if tidlist: yield ((seg, c2, l1, l1, len(links[c2]), links[c2], tids[c2])) else: yield ((seg, c2, l1, l1, len(links[c2]), links[c2]))
"{}:{}".format(seq, qal), st, aln.is_reverse, ], ) ) ) # if overlaps_gap(aln2scaffoldx(aln)): # pass # this read overlapps a gap? # OPEN:Scaffold278839 isotig431029.5 TTTTTCTCATCAGTCTCCTCATCACAAGTTTTCTATAAGTCCCCA isotig1560433.5 TTAAATCATAGCAGTACTCATGAAGAGGGTAGTAAAGCACTGGAA 57 14 : AG if ( (not args.direct) and (not args.twopass) and (BamTags.mate_mapq(aln) >= 10.0 and nscaffold in my_scaffolds) ): # we might want this read because it's sister maps to one of our scaffolds sc, xx, yy, st, cdz = mapper.mapCoord(ncontig, aln.pnext, aln.pnext + 1) mate_scaffold_strand = st if not aln.mate_is_reverse else -1 * st if mate_scaffold_strand == -1: xx -= insert_size yy -= insert_size else: xx += insert_size yy += insert_size gaps = mapper.hits_gaps(sc, xx - 400, yy + 400) if args.debug: print("#", gaps) if gaps: seq = aln.seq qal = aln.qual
def get_nlinks_spanning(c1,xx,ll,bams,name2index,args,gap): l1=ll[c1] count=0 # index2=name2index[c2] for sam in bams: for aln in sam.fetch(region=c1): if (aln.rnext==aln.tid) and (not aln.is_duplicate) and (aln.mapq >= args.mapq) and (BamTags.mate_mapq(aln) >= args.mapq) and (aln.pos<xx-old_div(gap,2)) and (aln.pnext>xx+old_div(gap,2)): count+=1 return count
def get_nlinks(c1,c2,ll,bams,name2index,args): l1=ll[c1] l2=ll[c2] if l2<l1: x = c1 c1= c2 c2= x count=0 index2=name2index[c2] for sam in bams: for aln in sam.fetch(region=c1): if (aln.rnext==index2) and (not aln.is_duplicate) and (aln.mapq >= args.mapq) and (BamTags.mate_mapq(aln) >= args.mapq): count+=1 return count
llen={} for i in range(len(snam)): llen[snam[i]]=slen[i] buff=0 #args.trim for contig in contigs: if args.debug: print("#{}".format(contig)) if (not contig in llen) or llen[contig]<args.minlen: continue spans=[] edges=[] nb=[] lll = llen[contig] # print "#",contig for sam in bams: for aln in sam.fetch(region=contig): if (aln.tid==aln.rnext) and (aln.pos<aln.pnext) and ((not args.ignoreDuplicates) or (not aln.is_duplicate)) and (aln.mapq >= args.mapq) and (BamTags.mate_mapq(aln) >= args.mapq): # spans.append( tuple( [aln.pos, aln.pos+aln.tlen,aln.mapq,oname[sam]] ) ) # if aln.tlen > 2*buff and aln.tlen < range_cutoff: if aln.tlen > 2*buff: # and aln.tlen < range_cutoff: ll=ces.model.lnF(aln.tlen) # ll=get_score( aln.tlen ) edges.append( tuple([aln.pos + buff , ll]) ) edges.append( tuple([min(lll,aln.pos+aln.tlen-buff) ,-ll]) ) nb.append( tuple([aln.pos + buff , 1]) ) nb.append( tuple([min(lll,aln.pos+aln.tlen-buff) ,-1]) ) edges.sort() nb.sort() rs=0 n=0
def bam2chicagolinks(bamlist,my_contigs,smap,mask_ranges,mapq,minl,internal=False,tidlist=False): # print("bam2chicagolinks") # if not smap: smap=SegMapper("/dev/null") # print(my_contigs) # print(smap) # print(smap.segment_info) for seg in my_contigs.keys(): if not smap: ti=bamlist[0].references.index(seg) l1 = bamlist[0].lengths[ti] links = {} tids = {} if smap: scaffold,x,y = smap.get_segment_info(seg) region = "{}:{}-{}".format(scaffold,x,y) else: region = seg x=-1 y=-1 print("#",seg,region) for bam in bamlist: #print(region) for a in bam.fetch(region=region): if a.mapq<mapq: continue if a.is_duplicate: continue if BamTags.mate_mapq(a)<mapq: continue c1,c2,xx,yy = a.tid,a.rnext,a.pos,a.pnext if xx<x: continue #print("#wtf?") c1 = bam.getrname(c1) c2 = bam.getrname(c2) if mask_test(c1,xx,mask_ranges): continue if mask_test(c2,yy,mask_ranges): continue #print a i=0 if smap: c2,yyy = smap.map_coord(c2,yy) else: yyy=yy if c2==seg and not internal : continue if not smap and not c1==c2: continue #print( seg,c2,1+xx-x,1+yy-z) #if 1+xx-x<0: print ("#?",region,scaffold,1+xx-x,x,y,seg,c1,xx,x,yy,z) links[c2] = links.get(c2,[]) + [( 1+xx-x,yyy )] tids[c2] = tids.get(c2,[]) + [ a.query_name ] for c2 in links.keys(): if False and not c2 in smap.seg2len: print ("that's weird: {} not in table of segment lengths?".format(c2)) raise Exception if smap and not c2 in smap.seg2len: continue if smap: if (smap.seg2len.get(seg)>minl and smap.seg2len.get(c2)>minl): if tidlist: yield( (seg,c2,smap.seg2len.get(seg),smap.seg2len.get(c2),len(links[c2]),links[c2], tids[c2]) ) else: yield( (seg,c2,smap.seg2len.get(seg),smap.seg2len.get(c2),len(links[c2]),links[c2]) ) else: # print(seg,c2,l1,l1,len(links[c2]),links[c2]) if tidlist: yield( (seg,c2,l1,l1,len(links[c2]),links[c2],tids[c2]) ) else: yield( (seg,c2,l1,l1,len(links[c2]),links[c2] ) )