def kmer_records(draw, kmer_size, num_colors, kmer_strings=dna_sequences): kmer = draw(kmer_strings(min_size=kmer_size, max_size=kmer_size)) coverage = tuple( draw( s.lists(s.integers(min_value=1, max_value=MAX_UINT), min_size=num_colors, max_size=num_colors))) edges = np.array(draw( s.lists(s.lists(s.integers(min_value=0, max_value=1), min_size=8, max_size=8), min_size=num_colors, max_size=num_colors)), dtype=np.uint8) edges = [EdgeSet(np.concatenate((e[:4], e[::-1][:4]))) for e in edges] return KmerRecord(kmer, coverage, edges)
def test_empty_kmer(self): es = EdgeSet(np.zeros(8)) for as_revcomp in [True, False]: assert es.to_str(as_revcomp=as_revcomp) == '........'
def test_raises_on_non_lexlo_kmer(self): es = EdgeSet(np.zeros(8)) with pytest.raises(AssertionError): es.get_outgoing_kmers('TTT') with pytest.raises(AssertionError): es.get_incoming_kmers('TTT')
def test_outgoing_strings_does_not_return_lexicographically_lowest_kmer(self): es = EdgeSet(np.zeros(8)) es.add_edge('G') assert ['CGG'] == list(es.get_outgoing_kmer_strings('ACG'))
def test_outgoing_returns_lexicographically_lowest_kmers(self): es = EdgeSet(np.zeros(8)) es.add_edge('G') assert ['CCG'] == es.get_outgoing_kmers('ACG')
def test_incoming_strings_does_not_return_lexicographically_lowest_kmers(self): es = EdgeSet(np.zeros(8)) es.add_edge('t') assert ['TTA'] == list(es.get_incoming_kmer_strings('TAA'))
def test_with_all_true(self): es = EdgeSet(np.ones(8)) for letter in 'acgtACGT': assert es.is_edge(letter)
def test_with_none_true(self): es = EdgeSet(np.zeros(8)) for letter in 'acgtACGT': assert not es.is_edge(letter)
def test_no_incoming_all_outgoing(self): es = EdgeSet(np.concatenate([np.zeros(4), np.ones(4)])) assert 0 == len(es.get_incoming_kmers('AAA')) assert {'AAA', 'AAC', 'AAG', 'AAT'} == set(es.get_outgoing_kmers('AAA'))
def test_all_incoming_no_outgoing(self): es = EdgeSet(np.concatenate([np.ones(4), np.zeros(4)])) assert es.get_incoming_kmers('AAA') == ['AAA', 'CAA', 'GAA', 'TAA'] assert 0 == len(es.get_outgoing_kmers('AAA'))
def test_no_incoming_or_outgoing(self): es = EdgeSet(np.zeros(8)) assert 0 == len(es.get_incoming_kmers('AAA')) assert 0 == len(es.get_outgoing_kmers('AAA'))
def test_with_all_incoming_and_no_outgiong(self): es = EdgeSet(np.concatenate([np.ones(4), np.zeros(4)])) assert list(es.incoming) == [1, 1, 1, 1] assert list(es.outgoing) == [0, 0, 0, 0]
def test_works(self): es = EdgeSet(np.ones(8)) for edge_idx in range(8): assert es[edge_idx]
def test_removes_each_edge(self): es = EdgeSet(np.ones(8)) for letter in 'acgtACGT': assert es.is_edge(letter) es.remove_edge(letter) assert not es.is_edge(letter)
def test_adds_each_edge(self): es = EdgeSet(np.zeros(8)) for letter in 'acgtACGT': assert not es.is_edge(letter) es.add_edge(letter) assert es.is_edge(letter)
def test_incoming_returns_lexicographically_lowest_kmers(self): es = EdgeSet(np.zeros(8)) es.add_edge('t') assert ['TAA'] == es.get_incoming_kmers('TAA')
def as_edge_set(edge_set_string): return EdgeSet(edge_set_string_to_array(edge_set_string))
def test_with_a_and_c(self): es = EdgeSet(np.zeros(8)) es.add_edge('A') es.add_edge('c') assert '.c..A...' == es.to_str() assert '...T..g.' == es.to_str(as_revcomp=True)