Ejemplo n.º 1
0
    mm = 0
    for c1, c2 in zip(s1, s2):
        if c1 != c2:
            mm += 1
            if mm > dist:
                return True
    return False


if __name__ == "__main__":
    seqlen, dist, uniqueds, outfile = sys.argv[1:]

    all_quality = defaultdict(dict)

    for u in uniqueds.split(","):
        rtd_run.load_uniqued(all_quality, u, count_by_ind=True)

    seq_by_len = defaultdict(list)

    for k in all_quality.keys():
        seq_by_len[len(k)].append(k)

    seqs = seq_by_len[int(seqlen)]
    seqs.sort()

    offby = {}
    for i, s in enumerate(seqs):
        offby[s] = [si for si in seqs if si != s and not si in offby.keys() and not reject_pair(s, si, int(dist))]
        print >>sys.stderr, "\r%s / %s" % (i, len(seqs)),

    open(outfile, "w").write(offby.__repr__())
Ejemplo n.º 2
0
    bysize_dir = os.path.join(outdir,'by_size/uni_len')
    bysize_done = os.path.join(outdir,'by_size.done')
    denovo_ref = os.path.join(outdir,'denovo.fa')

    if os.path.exists(denovo_ref):
        print >> sys.stderr, 'REMOVE REF: %s' % denovo_ref
        os.unlink(denovo_ref)

    if os.path.exists(bysize_done):
        ofbysize = get_uniqued_by_size(bysize_dir)
    else:
        all_quality = defaultdict(dict)
        
        for uniqued in uniqueds:
            load_uniqued(all_quality,uniqued,count_by_ind=True)
            
        print >> sys.stderr, 'LOAD COMPLETE. WRITE BY-SIZE.'
        ofbysize = write_uniqued_by_size(all_quality,bysize_dir)
        del all_quality
        ret = os.system('touch %s' % bysize_done) 
    
    sizes = sorted(ofbysize.keys(),reverse=True)

    for i in sizes:
        print >> sys.stderr, '\nSTART %s' % i
        uni = ofbysize[i]

        ufq = uniqued_to_fastq(uni)
        nreads = get_read_count(ufq)