continue ##create ranges of accepted alignments accept_alignment_ranges = [None] * pblen #alignments[pbname].sort(key=lambda a: (a.send-a.sstart) * pow(a.pctid/100.0,2)) alignments[pbname].sort(key=lambda a: (a.send-a.sstart)) for alignment in alignments[pbname]: for p in range(alignment.sstart-1,alignment.send): accept_alignment_ranges[p] = alignment.qname ## ##find clr ranges ## #find ranges covered_ranges = map(lambda (s,e): CoverageRange(s,e,1.0,CovStat["COVERED"]), getMarkedRanges(map(lambda c: 1 if not c == None else 0 , accept_alignment_ranges))) uncovered_ranges = map(lambda (s,e): CoverageRange(s,e,0.7,CovStat["UNCOVERED"]), getMarkedRanges(map(lambda c: 1 if c == None else 0 , accept_alignment_ranges))) #remove uncorrected ends uncovered_ranges = filter(lambda x: not (x.begin == 0 or x.end == pblen-1),uncovered_ranges) joined_ranges = sorted(covered_ranges + uncovered_ranges, key=lambda x: x.begin) #find the clr ranges while True: clr_ranges = get_contiguous_ranges(joined_ranges) if( all(map(lambda y: y.pctid > CLR_ID_CUTOFF,clr_ranges))): break for cr in clr_ranges: #skip clr ranges that are ok if cr.pctid > CLR_ID_CUTOFF:
alignmentIt = getNucmerAlignmentIterator(afh) sys.stderr.write("Loaded Alignments\n"); counter = 0 for name,group in groupby(alignmentIt, lambda x: x.sname): #build coverage vector cov = getCoverageFromNucAlignments(group) #mark the regions with 0 (no) coverage as 1 and change #everything else to 0 cov_inv = map(lambda c: 1 if c == 0 else 0, cov) #ranges with zero coverage zero_cov_ranges = getMarkedRanges(cov_inv) seq = reads[name] #calculate GC % for windows of GC_WINDOW_SIZE gc_sliding_window = getGCSlidingWindow(seq, GC_WINDOW_SIZE) #filter gaps that are at > MIN_COV_GAP #and have at least one base > GC_THRESHOLD #take the sum of the lengths of all of the regions gc_gap_bases = sum(map(lambda (s,e): e-s if e-s > MIN_COV_GAP and any(map(lambda x: True if x > GC_THRESHOLD else False, gc_sliding_window[s:e])) else 0, zero_cov_ranges)) ofh.write("%s\t%d\n" % (name,gc_gap_bases)) if counter % 10000 == 0:
ftbases = open(sys.argv[3] + ".uncov.total.bases", "w") pcov = [] # pct cov total_bases = 0 total_uncovered_bases = 0 for pbname, alignments in groupby(getNucmerAlignmentIterator(fh), lambda x: x.sname): a = list(alignments) cov = getCoverageFromNucAlignments(a) # mark the 0 coverage regions zcov = map(lambda c: 1 if c == 0 else 0, cov) # ranges with 0 coverage zcov_ranges = getMarkedRanges(zcov) # only look at the gaps larger than the min gap size zcov_ranges_filt = filter(lambda (x, y): y - x > COV_GAP_MIN, zcov_ranges) # write out the regions that pass filter to region file freg.write("\t".join([pbname, " ".join(map(lambda t: "%d,%d" % t, zcov_ranges_filt))]) + "\n") total_bases += a[0].slen for rbeg, rend in zcov_ranges_filt: total_uncovered_bases += rend - rbeg pct_ranges = map( lambda (rb, re): (int(float(rb) / (a[0].slen - 1) * 100), int(float(re) / (a[0].slen - 1) * 100)), zcov_ranges_filt, )
pcov = [] #pct cov total_bases = 0 total_uncovered_bases = 0 for pbname, alignments in groupby(getNucmerAlignmentIterator(fh), lambda x: x.sname): a = list(alignments) cov = getCoverageFromNucAlignments(a) #mark the 0 coverage regions zcov = map(lambda c: 1 if c == 0 else 0, cov) #ranges with 0 coverage zcov_ranges = getMarkedRanges(zcov) #only look at the gaps larger than the min gap size zcov_ranges_filt = filter(lambda (x, y): y - x > COV_GAP_MIN, zcov_ranges) #write out the regions that pass filter to region file freg.write("\t".join( [pbname, " ".join(map(lambda t: "%d,%d" % t, zcov_ranges_filt))]) + "\n") total_bases += a[0].slen for rbeg, rend in zcov_ranges_filt: total_uncovered_bases += rend - rbeg pct_ranges = map( lambda (rb, re):
alignmentIt = getNucmerAlignmentIterator(afh) sys.stderr.write("Loaded Alignments\n") counter = 0 for name, group in groupby(alignmentIt, lambda x: x.sname): #build coverage vector cov = getCoverageFromNucAlignments(group) #mark the regions with 0 (no) coverage as 1 and change #everything else to 0 cov_inv = map(lambda c: 1 if c == 0 else 0, cov) #ranges with zero coverage zero_cov_ranges = getMarkedRanges(cov_inv) seq = reads[name] #calculate GC % for windows of GC_WINDOW_SIZE gc_sliding_window = getGCSlidingWindow(seq, GC_WINDOW_SIZE) #filter gaps that are at > MIN_COV_GAP #and have at least one base > GC_THRESHOLD #take the sum of the lengths of all of the regions gc_gap_bases = sum( map( lambda (s, e): e - s if e - s > MIN_COV_GAP and any( map(lambda x: True if x > GC_THRESHOLD else False, gc_sliding_window[s:e])) else 0, zero_cov_ranges))