Beispiel #1
0
reads = {}

for entry in fastaIterator(rfh):
    reads[str(entry.name)] = str(entry.seq)
sys.stderr.write("Loaded reads\n")

alignmentIt = getNucmerAlignmentIterator(afh)

sys.stderr.write("Loaded Alignments\n");

counter = 0
for name,group in groupby(alignmentIt, lambda x: x.sname):

    #build coverage vector
    cov = getCoverageFromNucAlignments(group)
    
    #mark the regions with 0 (no) coverage as 1 and change
    #everything else to 0
    cov_inv = map(lambda c: 1 if c == 0 else 0, cov)
    
    #ranges with zero coverage
    zero_cov_ranges = getMarkedRanges(cov_inv)
        
    seq = reads[name]

    #calculate GC % for windows of GC_WINDOW_SIZE
    gc_sliding_window = getGCSlidingWindow(seq, GC_WINDOW_SIZE)
    
    #filter gaps that are at > MIN_COV_GAP
    #and have at least one base > GC_THRESHOLD
Beispiel #2
0
COV_GAP_MIN = int(sys.argv[2])

fhist = open(sys.argv[3] + ".uncov.hist", "w")
freg = open(sys.argv[3] + ".uncov.regions", "w")
ftbases = open(sys.argv[3] + ".uncov.total.bases", "w")

pcov = []  #pct cov

total_bases = 0
total_uncovered_bases = 0

for pbname, alignments in groupby(getNucmerAlignmentIterator(fh),
                                  lambda x: x.sname):
    a = list(alignments)
    cov = getCoverageFromNucAlignments(a)

    #mark the 0 coverage regions
    zcov = map(lambda c: 1 if c == 0 else 0, cov)

    #ranges with 0 coverage
    zcov_ranges = getMarkedRanges(zcov)

    #only look at the gaps larger than the min gap size
    zcov_ranges_filt = filter(lambda (x, y): y - x > COV_GAP_MIN, zcov_ranges)

    #write out the regions that pass filter to region file
    freg.write("\t".join(
        [pbname, " ".join(map(lambda t: "%d,%d" % t, zcov_ranges_filt))]) +
               "\n")
Beispiel #3
0
fh = open(sys.argv[1])

COV_GAP_MIN = int(sys.argv[2])

fhist = open(sys.argv[3] + ".uncov.hist", "w")
freg = open(sys.argv[3] + ".uncov.regions", "w")
ftbases = open(sys.argv[3] + ".uncov.total.bases", "w")

pcov = []  # pct cov

total_bases = 0
total_uncovered_bases = 0

for pbname, alignments in groupby(getNucmerAlignmentIterator(fh), lambda x: x.sname):
    a = list(alignments)
    cov = getCoverageFromNucAlignments(a)

    # mark the 0 coverage regions
    zcov = map(lambda c: 1 if c == 0 else 0, cov)

    # ranges with 0 coverage
    zcov_ranges = getMarkedRanges(zcov)

    # only look at the gaps larger than the min gap size
    zcov_ranges_filt = filter(lambda (x, y): y - x > COV_GAP_MIN, zcov_ranges)

    # write out the regions that pass filter to region file
    freg.write("\t".join([pbname, " ".join(map(lambda t: "%d,%d" % t, zcov_ranges_filt))]) + "\n")

    total_bases += a[0].slen
    for rbeg, rend in zcov_ranges_filt:
Beispiel #4
0
reads = {}

for entry in fastaIterator(rfh):
    reads[str(entry.name)] = str(entry.seq)
sys.stderr.write("Loaded reads\n")

alignmentIt = getNucmerAlignmentIterator(afh)

sys.stderr.write("Loaded Alignments\n")

counter = 0
for name, group in groupby(alignmentIt, lambda x: x.sname):

    #build coverage vector
    cov = getCoverageFromNucAlignments(group)

    #mark the regions with 0 (no) coverage as 1 and change
    #everything else to 0
    cov_inv = map(lambda c: 1 if c == 0 else 0, cov)

    #ranges with zero coverage
    zero_cov_ranges = getMarkedRanges(cov_inv)

    seq = reads[name]

    #calculate GC % for windows of GC_WINDOW_SIZE
    gc_sliding_window = getGCSlidingWindow(seq, GC_WINDOW_SIZE)

    #filter gaps that are at > MIN_COV_GAP
    #and have at least one base > GC_THRESHOLD