def basecall(model, reads, chunksize=4000, overlap=500, batchsize=32, reverse=False): reads = (read_chunk for read in reads for read_chunk in split_read(read, chunksize * batchsize)[::-1 if reverse else 1]) chunks = (((read, start, end), chunk(torch.from_numpy(read.signal[start:end]), chunksize, overlap)) for (read, start, end) in reads) batches = ((k, compute_scores(model, batch, reverse=reverse)) for k, batch in batchify(chunks, batchsize=batchsize)) stitched = ((read, stitch(x, chunksize, overlap, end - start, model.stride, reverse=reverse)) for ((read, start, end), x) in unbatchify(batches)) transferred = thread_map(transfer, stitched, n_thread=1) return ((read, concat([part for k, part in parts])) for read, parts in groupby(transferred, lambda x: x[0]))
def basecall(model, reads, chunksize=4000, overlap=100, batchsize=32, reverse=False): """ Basecalls a set of reads. """ chunks = thread_iter( ((read, 0, len(read.signal)), chunk(torch.from_numpy(read.signal), chunksize, overlap)) for read in reads) batches = thread_iter(batchify(chunks, batchsize=batchsize)) scores = thread_iter((read, compute_scores(model, batch, reverse=reverse)) for read, batch in batches) results = thread_iter( (read, stitch_results(scores, end - start, chunksize, overlap, model.stride, reverse)) for ((read, start, end), scores) in unbatchify(scores)) return thread_iter( (read, apply_stride_to_moves(model, attrs)) for read, attrs in results)
def basecall(model, reads, beamsize=5, chunksize=0, overlap=0, batchsize=1, qscores=False, reverse=None): """ Basecalls a set of reads. """ chunks = ( (read, chunk(torch.tensor(read.signal), chunksize, overlap)) for read in reads ) scores = unbatchify( (k, compute_scores(model, v)) for k, v in batchify(chunks, batchsize) ) scores = ( (read, {'scores': stitch(v, chunksize, overlap, len(read.signal), model.stride)}) for read, v in scores ) decoder = partial(decode, decode=model.decode, beamsize=beamsize, qscores=qscores, stride=model.stride) basecalls = process_map(decoder, scores, n_proc=4) return basecalls
def basecall(model, reads, aligner=None, beamsize=40, chunksize=4000, overlap=500, batchsize=32, qscores=False, reverse=False): """ Basecalls a set of reads. """ _decode = partial(decode_int8, seqdist=model.seqdist, beamsize=beamsize) reads = (read_chunk for read in reads for read_chunk in split_read(read)[::-1 if reverse else 1]) chunks = (((read, start, end), chunk(torch.from_numpy(read.signal[start:end]), chunksize, overlap)) for (read, start, end) in reads) batches = ( (k, quantise_int8(compute_scores(model, batch, reverse=reverse))) for k, batch in thread_iter(batchify(chunks, batchsize=batchsize))) stitched = ((read, stitch(x, chunksize, overlap, end - start, model.stride, reverse=reverse)) for ((read, start, end), x) in unbatchify(batches)) transferred = thread_map(transfer, stitched, n_thread=1) basecalls = thread_map(_decode, transferred, n_thread=8) basecalls = ((read, ''.join(seq for k, seq in parts)) for read, parts in groupby( basecalls, lambda x: (x[0].parent if hasattr(x[0], 'parent') else x[0]))) basecalls = ((read, { 'sequence': seq, 'qstring': '?' * len(seq) if qscores else '*', 'mean_qscore': 0.0 }) for read, seq in basecalls) if aligner: return align_map(aligner, basecalls) return basecalls
def basecall(model, reads, aligner=None, beamsize=40, chunksize=4000, overlap=500, batchsize=32, qscores=False): """ Basecalls a set of reads. """ split_read_length=400000 _stitch = partial( stitch, start=overlap // 2 // model.stride, end=(chunksize - overlap // 2) // model.stride, ) _decode = partial(decode_int8, seqdist=model.seqdist, beamsize=beamsize) reads = ( ((read, i), x) for read in reads for (i, x) in enumerate(torch.split(torch.from_numpy(read.signal), split_read_length)) ) chunks = ( ((read, chunk(signal, chunksize, overlap, pad_start=True)) for (read, signal) in reads) ) batches = ( (read, quantise_int8(compute_scores(model, batch))) for read, batch in thread_iter(batchify(chunks, batchsize=batchsize)) ) stitched = ((read, _stitch(x)) for (read, x) in unbatchify(batches)) transferred = thread_map(transfer, stitched, n_thread=1) basecalls = thread_map(_decode, transferred, n_thread=8) basecalls = ( (read, ''.join(seq for k, seq in parts)) for read, parts in groupby(basecalls, lambda x: x[0][0]) ) basecalls = ( (read, {'sequence': seq, 'qstring': '?' * len(seq) if qscores else '*', 'mean_qscore': 0.0}) for read, seq in basecalls ) if aligner: return align_map(aligner, basecalls) return basecalls