예제 #1
0
        continue
    ##create ranges of accepted alignments
    accept_alignment_ranges = [None] * pblen
    #alignments[pbname].sort(key=lambda a: (a.send-a.sstart) * pow(a.pctid/100.0,2))
    alignments[pbname].sort(key=lambda a: (a.send-a.sstart))
    for alignment in alignments[pbname]:
        for p in range(alignment.sstart-1,alignment.send):
            accept_alignment_ranges[p] = alignment.qname

    ##
    ##find clr ranges
    ##

    #find ranges
    covered_ranges = map(lambda (s,e): CoverageRange(s,e,1.0,CovStat["COVERED"]),
                         getMarkedRanges(map(lambda c: 1 if not c == None else 0 , accept_alignment_ranges)))
    uncovered_ranges = map(lambda (s,e): CoverageRange(s,e,0.7,CovStat["UNCOVERED"]),
                           getMarkedRanges(map(lambda c: 1 if c == None else 0 , accept_alignment_ranges)))
    #remove uncorrected ends
    uncovered_ranges = filter(lambda x: not (x.begin == 0 or x.end == pblen-1),uncovered_ranges)
    
    joined_ranges = sorted(covered_ranges + uncovered_ranges, key=lambda x: x.begin)

    #find the clr ranges
    while True:
        clr_ranges = get_contiguous_ranges(joined_ranges)
        if( all(map(lambda y: y.pctid > CLR_ID_CUTOFF,clr_ranges))):
            break
        for cr in clr_ranges:
            #skip clr ranges that are ok
            if cr.pctid > CLR_ID_CUTOFF:
예제 #2
0
        continue
    ##create ranges of accepted alignments
    accept_alignment_ranges = [None] * pblen
    #alignments[pbname].sort(key=lambda a: (a.send-a.sstart) * pow(a.pctid/100.0,2))
    alignments[pbname].sort(key=lambda a: (a.send-a.sstart))
    for alignment in alignments[pbname]:
        for p in range(alignment.sstart-1,alignment.send):
            accept_alignment_ranges[p] = alignment.qname

    ##
    ##find clr ranges
    ##

    #find ranges
    covered_ranges = map(lambda (s,e): CoverageRange(s,e,1.0,CovStat["COVERED"]),
                         getMarkedRanges(map(lambda c: 1 if not c == None else 0 , accept_alignment_ranges)))
    uncovered_ranges = map(lambda (s,e): CoverageRange(s,e,0.7,CovStat["UNCOVERED"]),
                           getMarkedRanges(map(lambda c: 1 if c == None else 0 , accept_alignment_ranges)))
    #remove uncorrected ends
    uncovered_ranges = filter(lambda x: not (x.begin == 0 or x.end == pblen-1),uncovered_ranges)
    
    joined_ranges = sorted(covered_ranges + uncovered_ranges, key=lambda x: x.begin)

    #find the clr ranges
    while True:
        clr_ranges = get_contiguous_ranges(joined_ranges)
        if( all(map(lambda y: y.pctid > CLR_ID_CUTOFF,clr_ranges))):
            break
        for cr in clr_ranges:
            #skip clr ranges that are ok
            if cr.pctid > CLR_ID_CUTOFF:
예제 #3
0
파일: gc_count.py 프로젝트: km1500/ectools
alignmentIt = getNucmerAlignmentIterator(afh)

sys.stderr.write("Loaded Alignments\n");

counter = 0
for name,group in groupby(alignmentIt, lambda x: x.sname):

    #build coverage vector
    cov = getCoverageFromNucAlignments(group)
    
    #mark the regions with 0 (no) coverage as 1 and change
    #everything else to 0
    cov_inv = map(lambda c: 1 if c == 0 else 0, cov)
    
    #ranges with zero coverage
    zero_cov_ranges = getMarkedRanges(cov_inv)
        
    seq = reads[name]

    #calculate GC % for windows of GC_WINDOW_SIZE
    gc_sliding_window = getGCSlidingWindow(seq, GC_WINDOW_SIZE)
    
    #filter gaps that are at > MIN_COV_GAP
    #and have at least one base > GC_THRESHOLD
    #take the sum of the lengths of all of the regions
    gc_gap_bases = sum(map(lambda (s,e): 
                           e-s if e-s > MIN_COV_GAP and any(map(lambda x: True if x > GC_THRESHOLD else False, gc_sliding_window[s:e])) else 0, 
                           zero_cov_ranges))
    
    ofh.write("%s\t%d\n" % (name,gc_gap_bases))
    if counter % 10000 == 0:
예제 #4
0
ftbases = open(sys.argv[3] + ".uncov.total.bases", "w")

pcov = []  # pct cov

total_bases = 0
total_uncovered_bases = 0

for pbname, alignments in groupby(getNucmerAlignmentIterator(fh), lambda x: x.sname):
    a = list(alignments)
    cov = getCoverageFromNucAlignments(a)

    # mark the 0 coverage regions
    zcov = map(lambda c: 1 if c == 0 else 0, cov)

    # ranges with 0 coverage
    zcov_ranges = getMarkedRanges(zcov)

    # only look at the gaps larger than the min gap size
    zcov_ranges_filt = filter(lambda (x, y): y - x > COV_GAP_MIN, zcov_ranges)

    # write out the regions that pass filter to region file
    freg.write("\t".join([pbname, " ".join(map(lambda t: "%d,%d" % t, zcov_ranges_filt))]) + "\n")

    total_bases += a[0].slen
    for rbeg, rend in zcov_ranges_filt:
        total_uncovered_bases += rend - rbeg

    pct_ranges = map(
        lambda (rb, re): (int(float(rb) / (a[0].slen - 1) * 100), int(float(re) / (a[0].slen - 1) * 100)),
        zcov_ranges_filt,
    )
예제 #5
0
pcov = []  #pct cov

total_bases = 0
total_uncovered_bases = 0

for pbname, alignments in groupby(getNucmerAlignmentIterator(fh),
                                  lambda x: x.sname):
    a = list(alignments)
    cov = getCoverageFromNucAlignments(a)

    #mark the 0 coverage regions
    zcov = map(lambda c: 1 if c == 0 else 0, cov)

    #ranges with 0 coverage
    zcov_ranges = getMarkedRanges(zcov)

    #only look at the gaps larger than the min gap size
    zcov_ranges_filt = filter(lambda (x, y): y - x > COV_GAP_MIN, zcov_ranges)

    #write out the regions that pass filter to region file
    freg.write("\t".join(
        [pbname, " ".join(map(lambda t: "%d,%d" % t, zcov_ranges_filt))]) +
               "\n")

    total_bases += a[0].slen
    for rbeg, rend in zcov_ranges_filt:
        total_uncovered_bases += rend - rbeg

    pct_ranges = map(
        lambda (rb, re):
예제 #6
0
alignmentIt = getNucmerAlignmentIterator(afh)

sys.stderr.write("Loaded Alignments\n")

counter = 0
for name, group in groupby(alignmentIt, lambda x: x.sname):

    #build coverage vector
    cov = getCoverageFromNucAlignments(group)

    #mark the regions with 0 (no) coverage as 1 and change
    #everything else to 0
    cov_inv = map(lambda c: 1 if c == 0 else 0, cov)

    #ranges with zero coverage
    zero_cov_ranges = getMarkedRanges(cov_inv)

    seq = reads[name]

    #calculate GC % for windows of GC_WINDOW_SIZE
    gc_sliding_window = getGCSlidingWindow(seq, GC_WINDOW_SIZE)

    #filter gaps that are at > MIN_COV_GAP
    #and have at least one base > GC_THRESHOLD
    #take the sum of the lengths of all of the regions
    gc_gap_bases = sum(
        map(
            lambda (s, e): e - s if e - s > MIN_COV_GAP and any(
                map(lambda x: True if x > GC_THRESHOLD else False,
                    gc_sliding_window[s:e])) else 0, zero_cov_ranges))