Esempio n. 1
0
def test_mixture_failure_modes():
    freqs = microhapulator.load_marker_frequencies(
        data_file("freq/russ4-freq.tsv"))
    markers = microhapulator.load_marker_definitions(
        data_file("def/russ4-offsets.tsv"))
    seqs = microhapulator.load_marker_reference_sequences(
        data_file("refr/russ4-refr.fasta.gz"))
    profiles = [mhapi.sim(freqs) for _ in range(3)]
    with pytest.raises(ValueError,
                       match=r"number of profiles must match number of seeds"):
        for read in mhapi.seq(profiles, markers, seqs, seeds=[42, 1776]):
            pass
    with pytest.raises(
            ValueError,
            match=r"mismatch between contributor number and proportions"):
        for read in mhapi.seq(profiles,
                              markers,
                              seqs,
                              proportions=[0.5, 0.3, 0.1, 0.1]):
            pass
    message = r"specified proportions result in 0 reads for 1 or more individuals"
    with pytest.raises(ValueError, match=message):
        for read in mhapi.seq(profiles,
                              markers,
                              seqs,
                              totalreads=500,
                              proportions=[1, 100, 10000]):
            pass
Esempio n. 2
0
def test_uneven_mixture(capsys):
    freqs = microhapulator.load_marker_frequencies(
        data_file("freq/russ4-freq.tsv"))
    markers = microhapulator.load_marker_definitions(
        data_file("def/russ4-offsets.tsv"))
    seqs = microhapulator.load_marker_reference_sequences(
        data_file("refr/russ4-refr.fasta.gz"))
    profiles = [mhapi.sim(freqs) for _ in range(3)]
    sequencer = mhapi.seq(profiles,
                          markers,
                          seqs,
                          totalreads=500,
                          proportions=[0.5, 0.3, 0.2])
    for read in sequencer:
        pass
    terminal = capsys.readouterr()
    assert "numreads=250" in terminal.err
    assert "numreads=150" in terminal.err
    assert "numreads=100" in terminal.err
Esempio n. 3
0
def test_even_mixture():
    seed = numpy.random.randint(1, 2**32 - 1)
    print("Seed:", seed)
    numpy.random.seed(seed)
    freqs = microhapulator.load_marker_frequencies(
        data_file("freq/acb-dozen-freq.tsv"))
    markers = microhapulator.load_marker_definitions(
        data_file("def/acb-dozen-offsets.tsv"))
    seqs = microhapulator.load_marker_reference_sequences(
        data_file("refr/acb-dozen-refr.fasta"))
    profiles = list()
    for _ in range(numpy.random.randint(2, 6)):
        p = mhapi.sim(freqs)
        profiles.append(p)
    sequencer = mhapi.seq(profiles, markers, seqs, totalreads=1000)
    for n, read1, read2 in sequencer:
        pass
    numfragments = n * 2
    assert numfragments == pytest.approx(1000, abs=50)
def test_load_marker_frequencies_missing_or_bad_columns(tsv):
    message = r"column\(s\) missing from marker frequency file: Haplotype"
    with pytest.raises(ValueError, match=message):
        microhapulator.load_marker_frequencies(data_file(tsv))
def test_load_marker_frequencies_extra_column_ok():
    freqs = microhapulator.load_marker_frequencies(
        data_file("freq/korea-5loc-freq-extracol.tsv"))
    assert list(freqs.columns) == ["Marker", "Haplotype", "Frequency", "Foo"]
    assert freqs.Haplotype.iloc[1] == "C,A,G,G"
def test_load_marker_frequencies(tsv, nrows, value):
    freqs = microhapulator.load_marker_frequencies(data_file(tsv))
    assert list(freqs.columns) == ["Marker", "Haplotype", "Frequency"]
    assert freqs.shape[0] == nrows
    assert freqs.Frequency.iloc[23] == pytest.approx(value)