def test_invalid_spacing(): """ Check if expected exceptions are raised if an invalid spacing is given. """ alphabet = seq.ProteinSequence.alphabet with pytest.raises(ValueError): # Not enough informative positions for given k align.KmerAlphabet(alphabet, 5, spacing=[0, 1, 3, 4]) with pytest.raises(ValueError): # Duplicate positions align.KmerAlphabet(alphabet, 5, spacing=[0, 1, 1, 3, 4]) with pytest.raises(ValueError): # Negative values align.KmerAlphabet(alphabet, 5, spacing=[-1, 1, 2, 3, 4])
def test_from_sequences(k, random_sequences, spacing): """ Test the :meth:`from_sequences()` constructor, by checking for each sequence position, if the position is in the C-array of the corresponding k-mer. """ table = align.KmerTable.from_sequences(k, random_sequences, spacing=spacing) kmer_alph = align.KmerAlphabet(random_sequences[0].alphabet, k, spacing) assert kmer_alph == table.kmer_alphabet for i, sequence in enumerate(random_sequences): for j in range(kmer_alph.kmer_array_length(len(sequence))): if spacing is None: kmer = kmer_alph.fuse(sequence.code[j:j + k]) else: kmer = kmer_alph.fuse(sequence.code[kmer_alph.spacing + j]) assert np.array([i, j]) in table[kmer]
def test_get_kmers(): """ Test whether the correct used *k-mers* are returned by :meth:`get_kmer()`, by constructing a table with exactly one appearance for each *k-mer* in a random list of *k-mers*. """ np.random.seed(0) kmer_alphabet = align.KmerAlphabet( seq.NucleotideSequence.unambiguous_alphabet(), 8) ref_mask = np.random.choice([False, True], size=len(kmer_alphabet)) ref_kmers = np.where(ref_mask)[0] kmer_dict = {kmer: np.zeros((1, 2), dtype=np.uint32) for kmer in ref_kmers} # [[0, 0]] = Dummy position # The actual position is not relevant for the tested function table = align.KmerTable.from_positions(kmer_alphabet, kmer_dict) test_kmers = table.get_kmers() assert test_kmers.tolist() == ref_kmers.tolist()
def kmer_alphabet(): return align.KmerAlphabet(seq.ProteinSequence.alphabet, 3)
def spaced_kmer_alphabet(): return align.KmerAlphabet(seq.ProteinSequence.alphabet, K, spacing=[0, 1, 2])