Exemple #1
0
def remap(read_ref, ev, min_prob, kmer_len, prior, slip):
    inMat = sloika.features.from_events(ev, tag='')
    inMat = np.expand_dims(inMat, axis=1)
    post = sloika.decode.prepare_post(calc_post(inMat),
                                      min_prob=min_prob,
                                      drop_bad=False)

    kmers = np.array(bio.seq_to_kmers(read_ref, kmer_len))
    seq = [kmer_to_state[k] + 1 for k in kmers]
    prior0 = None if prior[0] is None else sloika.util.geometric_prior(
        len(seq), prior[0])
    prior1 = None if prior[1] is None else sloika.util.geometric_prior(
        len(seq), prior[1], rev=True)

    score, path = sloika.transducer.map_to_sequence(post,
                                                    seq,
                                                    slip=slip,
                                                    prior_initial=prior0,
                                                    prior_final=prior1,
                                                    log=False)

    ev = nprf.append_fields(
        ev, ['seq_pos', 'kmer', 'good_emission'],
        [path, kmers[path], np.repeat(True, len(ev))])

    return (score, ev, path, seq)
Exemple #2
0
 def test_de_bruijn_allkmers(self):
     alpha = 4
     dblen = 2
     debruijn_seq = ''.join(
         [str(y) for y in bio.de_bruijn(alpha, dblen, pad=True)])
     kmers = bio.seq_to_kmers(debruijn_seq, dblen)
     self.assertEqual(len(kmers), alpha**dblen)
Exemple #3
0
 def test_de_bruijn_noduplicates(self):
     alpha = 4
     dblen = 2
     debruijn_seq = ''.join(
         [str(y) for y in bio.de_bruijn(alpha, dblen, pad=True)])
     all_kmers = bio.seq_to_kmers(debruijn_seq, dblen)
     self.assertTrue(len(all_kmers) == len(set(all_kmers)))
Exemple #4
0
def raw_remap(ref, signal, min_prob, kmer_len, prior, slip):
    """ Map raw signal to reference sequence using transducer model"""
    from sloika import config  # local import to avoid CUDA init in main thread

    inMat = (signal - np.median(signal)) / mad(signal)
    inMat = inMat[:, None, None].astype(config.sloika_dtype)
    post = sloika.decode.prepare_post(batch.calc_post(inMat),
                                      min_prob=min_prob,
                                      drop_bad=False)

    kmers = np.array(bio.seq_to_kmers(ref, kmer_len))
    seq = [batch.kmer_to_state[k] + 1 for k in kmers]
    prior0 = None if prior[0] is None else sloika.util.geometric_prior(
        len(seq), prior[0])
    prior1 = None if prior[1] is None else sloika.util.geometric_prior(
        len(seq), prior[1], rev=True)

    score, path = sloika.transducer.map_to_sequence(post,
                                                    seq,
                                                    slip=slip,
                                                    prior_initial=prior0,
                                                    prior_final=prior1,
                                                    log=False)

    mapping_dtype = [
        ('start', '<i8'),
        ('length', '<i8'),
        ('seq_pos', '<i8'),
        ('move', '<i8'),
        ('kmer', 'S{}'.format(kmer_len)),
        ('good_emission', '?'),
    ]
    mapping_table = np.zeros(post.shape[0], dtype=mapping_dtype)
    stride = int(np.ceil(signal.shape[0] / float(post.shape[0])))
    mapping_table['start'] = np.arange(
        0, signal.shape[0], stride, dtype=np.int) - stride // 2
    mapping_table['length'] = stride
    mapping_table['seq_pos'] = path
    mapping_table['move'] = np.ediff1d(path, to_begin=1)
    mapping_table['kmer'] = kmers[path]
    # We set 'good_emission' for compatability only
    mapping_table['good_emission'] = True

    _, mapping_table = trim_signal_and_mapping(signal, mapping_table, 0,
                                               len(signal))

    return (score, mapping_table, path, seq)
Exemple #5
0
 def test_seq_to_kmers_returns_correct(self):
     self.assertEqual(bio.seq_to_kmers(self.base_seq, 10), self.kmers1)