def ichunker(seqs, rledict=None, min_clust_size=1, max_clust_size=sys.maxint): """Return iterator of (seqlist, rlelist) tuples. Clusters are broken into chunks no larger than 0.5*max_clust_size. Returns an iterator of (cluster_seqs, rle_info). If rle_info is not provided, rle_info is None. """ for cluster in seqs: cluster = list(cluster) if len(cluster) < min_clust_size: continue elif len(cluster) > max_clust_size: shuffle(cluster) # combine trailing chunk with next to last if less than # half the target chunk size combine_last = max_clust_size * 0.5 for chunk in chunker(cluster, max_clust_size, combine_last): rlelist = [from_ascii(rledict[s.id]) for s in chunk] if rledict else None yield (chunk, rlelist) else: rlelist = [from_ascii(rledict[s.id]) for s in cluster] if rledict else None yield (cluster, rlelist)
def decode(aligns): aligns["t_seq"], aligns["q_seq"] = homodecodealignment( aligns["t_seq"], from_ascii(decoding[aligns["t_name"]]), aligns["q_seq"], from_ascii(decoding[aligns["q_name"]]), ) return aligns
def action(args): # Ignore SIGPIPE, for head support utils.exit_on_sigpipe() utils.exit_on_sigint() rledict = {seqname: rle for seqname, rle in csv.reader(args.rle)} seqs = ((s, from_ascii(rledict[s.id])) for s in args.seqs) pool = Pool(processes=args.threads) seqs = pool.imap(seq_and_homodecode, seqs, chunksize=140) for seq, decoded in seqs: args.outfile.write('>{}\n{}\n'.format(seq.description, decoded))
def decode(aligns): aligns['t_seq'], aligns['q_seq'] = homodecodealignment( aligns['t_seq'], from_ascii(decoding[aligns['t_name']]), aligns['q_seq'], from_ascii(decoding[aligns['q_name']])) return aligns
def rlemap(seq): decoded = homodecode(seq.seq, from_ascii(args.rlefile[seq.id])) return SeqLite(seq.id, seq.description, decoded)
def test01(self): v = range(79) self.assertEqual(v, sequtils.from_ascii(sequtils.to_ascii(v)))