def test_lossless_reads(): A = Alphabet('ACGT') S = rand_seq(A, 100) with pytest.raises(AssertionError): next(rand_read(S, len_mean=200, num=1)) # len_mean must be < len(S) with pytest.raises(AssertionError): # at most one of num or expected_coverage given next(rand_read(S, len_mean=50, num=1, expected_coverage=1)) assert sum(1 for _ in rand_read(S, len_mean=50, num=10)) == 10, \ 'The number of sampled reads should be controllable' assert sum(1 for _ in rand_read(S, len_mean=50)) == 1, \ 'If neither num or expected coverage is given only one sample is read' # there should be no noise added read, pos = next(rand_read(S, len_mean=40, num=1)) assert S[pos:pos+len(read)] == read S = A.parse('ACT' * 100) reads = [x for x in rand_read(S, len_mean=100, len_sd=0.01, num=100)] assert set(len(read) for read, _ in reads) > 1, \ 'Read lengths should be randomly chosen' len_mean = sum(len(read) for read, _ in reads) / 100. assert len_mean > 50 and len_mean < 150, \ 'Normal distribution of read lengths works' # index edge cases A = Alphabet(['00', '01']) S = A.parse('01' * 10) _bak = np.random.normal np.random.normal = mock.Mock(return_value=[1]) assert next(rand_read(S, len_mean=1, num=1))[0] == A.parse('01'), \ 'sequences in alphabets with > 1 long letters can be sampled too' np.random.normal = _bak
def test_expected_coverage(): A = Alphabet('ACGT') S = rand_seq(A, 100) cov = 10 reads = [r for r in rand_read(S, len_mean=len(S)/2, expected_coverage=cov)] assert len(reads) == 2 * cov