def test_mixture_failure_modes(): freqs = microhapulator.load_marker_frequencies( data_file("freq/russ4-freq.tsv")) markers = microhapulator.load_marker_definitions( data_file("def/russ4-offsets.tsv")) seqs = microhapulator.load_marker_reference_sequences( data_file("refr/russ4-refr.fasta.gz")) profiles = [mhapi.sim(freqs) for _ in range(3)] with pytest.raises(ValueError, match=r"number of profiles must match number of seeds"): for read in mhapi.seq(profiles, markers, seqs, seeds=[42, 1776]): pass with pytest.raises( ValueError, match=r"mismatch between contributor number and proportions"): for read in mhapi.seq(profiles, markers, seqs, proportions=[0.5, 0.3, 0.1, 0.1]): pass message = r"specified proportions result in 0 reads for 1 or more individuals" with pytest.raises(ValueError, match=message): for read in mhapi.seq(profiles, markers, seqs, totalreads=500, proportions=[1, 100, 10000]): pass
def test_profile_roundtrip(tmp_path): seed = numpy.random.randint(1, 2**32 - 1) freqs = pd.read_csv(data_file("freq/asw5-freq.tsv"), sep="\t") profile = mhapi.sim(freqs, seed=seed) profile.dump(tmp_path / "profile.json") test = SimulatedProfile(fromfile=tmp_path / "profile.json") assert profile == test assert str(profile) == str(test)
def test_uneven_mixture(capsys): freqs = microhapulator.load_marker_frequencies( data_file("freq/russ4-freq.tsv")) markers = microhapulator.load_marker_definitions( data_file("def/russ4-offsets.tsv")) seqs = microhapulator.load_marker_reference_sequences( data_file("refr/russ4-refr.fasta.gz")) profiles = [mhapi.sim(freqs) for _ in range(3)] sequencer = mhapi.seq(profiles, markers, seqs, totalreads=500, proportions=[0.5, 0.3, 0.2]) for read in sequencer: pass terminal = capsys.readouterr() assert "numreads=250" in terminal.err assert "numreads=150" in terminal.err assert "numreads=100" in terminal.err
def test_even_mixture(): seed = numpy.random.randint(1, 2**32 - 1) print("Seed:", seed) numpy.random.seed(seed) freqs = microhapulator.load_marker_frequencies( data_file("freq/acb-dozen-freq.tsv")) markers = microhapulator.load_marker_definitions( data_file("def/acb-dozen-offsets.tsv")) seqs = microhapulator.load_marker_reference_sequences( data_file("refr/acb-dozen-refr.fasta")) profiles = list() for _ in range(numpy.random.randint(2, 6)): p = mhapi.sim(freqs) profiles.append(p) sequencer = mhapi.seq(profiles, markers, seqs, totalreads=1000) for n, read1, read2 in sequencer: pass numfragments = n * 2 assert numfragments == pytest.approx(1000, abs=50)
def test_no_seed(): freqs = pd.read_csv(data_file("freq/asw2-freq.tsv"), sep="\t") genotype = mhapi.sim(freqs) assert len(genotype.data["markers"]) == 2 assert sorted(genotype.data["markers"]) == ["mh07CP-004", "mh14CP-003"]
def test_meaning_of_life(): freqs = pd.read_csv(data_file("freq/ceu50-freq.tsv"), sep="\t") observed = mhapi.sim(freqs, seed=42) expected = SimulatedProfile( fromfile=data_file("prof/meaning-of-life.json.gz")) assert observed == expected