sys.exit(1) fh = open(sys.argv[1]) COV_GAP_MIN = int(sys.argv[2]) fhist = open(sys.argv[3] + ".uncov.hist", "w") freg = open(sys.argv[3] + ".uncov.regions", "w") ftbases = open(sys.argv[3] + ".uncov.total.bases", "w") pcov = [] # pct cov total_bases = 0 total_uncovered_bases = 0 for pbname, alignments in groupby(getNucmerAlignmentIterator(fh), lambda x: x.sname): a = list(alignments) cov = getCoverageFromNucAlignments(a) # mark the 0 coverage regions zcov = map(lambda c: 1 if c == 0 else 0, cov) # ranges with 0 coverage zcov_ranges = getMarkedRanges(zcov) # only look at the gaps larger than the min gap size zcov_ranges_filt = filter(lambda (x, y): y - x > COV_GAP_MIN, zcov_ranges) # write out the regions that pass filter to region file freg.write("\t".join([pbname, " ".join(map(lambda t: "%d,%d" % t, zcov_ranges_filt))]) + "\n")
if not len(sys.argv) == 4: print "gc_count.py reads.fa alignments.sc outprefix" sys.exit(1) rfh = open(sys.argv[1]) afh = open(sys.argv[2]) ofh = open(sys.argv[3]+".uncov.gc.bases","w") reads = {} for entry in fastaIterator(rfh): reads[str(entry.name)] = str(entry.seq) sys.stderr.write("Loaded reads\n") alignmentIt = getNucmerAlignmentIterator(afh) sys.stderr.write("Loaded Alignments\n"); counter = 0 for name,group in groupby(alignmentIt, lambda x: x.sname): #build coverage vector cov = getCoverageFromNucAlignments(group) #mark the regions with 0 (no) coverage as 1 and change #everything else to 0 cov_inv = map(lambda c: 1 if c == 0 else 0, cov) #ranges with zero coverage zero_cov_ranges = getMarkedRanges(cov_inv)
import sys from io import getNucmerAlignmentIterator, nucRecordToString from itertools import groupby from operator import attrgetter #dist from the end #that is considered in the middle END_CUTOFF = 200 if not len(sys.argv) == 2: sys.exit("alignment_verify.py alignments.sc") fh = open(sys.argv[1]) for pbname, alignments in groupby(getNucmerAlignmentIterator(fh), attrgetter("sname")): al = list(alignments) ##if all alignments are to the same unitig if all(map(lambda x: x.qname == al[0].qname , al)): continue for aln in al: if (aln.qstart > END_CUTOFF and aln.qstart < (aln.qlen-END_CUTOFF) and aln.qend > END_CUTOFF and aln.qend < (aln.qlen-END_CUTOFF)): print nucRecordToString(aln)+"\tMIDDLE" else: print nucRecordToString(aln) fh.close()