Example #1
0
def test_read_genotype_likelihoods():
    tables = list(VcfReader("tests/data/genotype-likelihoods.vcf", genotype_likelihoods=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "chrA"
    assert table.samples == ["sample1", "sample2"]
    assert len(table.variants) == 4

    assert len(table.genotypes) == 2
    assert list(table.genotypes[0]) == canonic_index_list_to_biallelic_gt_list([2, 1, 1, 1])
    assert list(table.genotypes[1]) == canonic_index_list_to_biallelic_gt_list([1, 0, 0, 1])

    gl0 = GenotypeLikelihoods([-2.1206, -0.8195, -0.07525])
    gl1 = GenotypeLikelihoods([-10.3849, 0, -5.99143])
    gl2 = GenotypeLikelihoods([-2.1, -0.8, -0.8])
    gl3 = GenotypeLikelihoods([0, -10.0, -0.6])

    assert len(table.genotype_likelihoods_of("sample1")) == 4
    assert len(table.genotype_likelihoods_of("sample2")) == 4

    expected1 = [gl0, gl2, None, gl0]
    expected2 = [gl1, gl3, None, gl1]
    for actual_gl, expected_gl in zip(table.genotype_likelihoods_of("sample1"), expected1):
        assert_genotype_likelihoods(actual_gl, expected_gl)
    for actual_gl, expected_gl in zip(table.genotype_likelihoods_of("sample2"), expected2):
        assert_genotype_likelihoods(actual_gl, expected_gl)
Example #2
0
def test_read_tetraploid_unphased():
    tables = list(
        VcfReader("tests/data/polyploid.chr22.unphased.vcf", phases=False))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "chr22"
    assert table.samples == ["HG00514_NA19240"]
    assert len(table.variants) == 8
    assert table.variants[0].reference_allele == "A"
    assert table.variants[0].alternative_allele == "C"
    assert table.variants[1].reference_allele == "G"
    assert table.variants[1].alternative_allele == "A"
    assert table.variants[2].reference_allele == "G"
    assert table.variants[2].alternative_allele == "T"
    assert table.variants[3].reference_allele == "G"
    assert table.variants[3].alternative_allele == "C"
    print("Got:")
    for genotype in table.genotypes[0]:
        print(genotype)
    print("Exp:")
    for genotypte in canonic_index_list_to_biallelic_gt_list(
        [3, 2, 0, 3, 3, 1, 1, 1]):
        print(genotype)
    assert table.genotypes[0] == canonic_index_list_to_biallelic_gt_list(
        [3, 2, 0, 3, 3, 1, 1, 1], 4)
Example #3
0
def test_read_multisample_vcf():
    tables = list(VcfReader("tests/data/multisample.vcf"))
    assert len(tables) == 2
    table, table_b = tables
    assert table_b.chromosome == "chrB"
    assert table_b.samples == ["sample1", "sample2"]

    assert table.chromosome == "chrA"
    assert len(table.variants) == 3
    assert table.samples == ["sample1", "sample2"]

    assert table.variants[0].reference_allele == "A"
    assert table.variants[0].alternative_allele == "T"
    assert table.variants[1].reference_allele == "C"
    assert table.variants[1].alternative_allele == "G"
    assert table.variants[2].reference_allele == "G"
    assert table.variants[2].alternative_allele == "T"

    assert len(table.genotypes) == 2
    assert list(table.genotypes[0]) == canonic_index_list_to_biallelic_gt_list(
        [1, 1, 1])
    assert list(table.genotypes[1]) == canonic_index_list_to_biallelic_gt_list(
        [1, 1, 0])

    assert list(table.genotypes_of(
        "sample1")) == canonic_index_list_to_biallelic_gt_list([1, 1, 1])
    assert list(table.genotypes_of(
        "sample2")) == canonic_index_list_to_biallelic_gt_list([1, 1, 0])
Example #4
0
def test_phase_doubletrio_pure_genetic():
    reads = ""
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual(
        "individualA", canonic_index_list_to_biallelic_gt_list([1, 2, 1, 0]))
    pedigree.add_individual(
        "individualB", canonic_index_list_to_biallelic_gt_list([1, 0, 1, 1]))
    pedigree.add_individual(
        "individualC", canonic_index_list_to_biallelic_gt_list([2, 1, 1, 0]))
    pedigree.add_individual(
        "individualD", canonic_index_list_to_biallelic_gt_list([1, 2, 2, 1]))
    pedigree.add_individual(
        "individualE", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 0]))
    pedigree.add_relationship("individualA", "individualB", "individualC")
    pedigree.add_relationship("individualC", "individualD", "individualE")
    recombcost = [2, 2, 2]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree, positions=[10, 20, 30, 40])
    assert cost == 0
    assert len(set(transmission_vector)) == 1
    all_expected_haplotypes = [
        ("0100", "1110"),
        ("0011", "1000"),
        ("1110", "1000"),
        ("1111", "0110"),
        ("1000", "0110"),
    ]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 4)
    trio_transmission_vectors = get_trio_transmission_vectors(
        transmission_vector, 4)
    assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0],
                             4)
    assert_trio_allele_order(superreads_list[2:], trio_transmission_vectors[1],
                             4)
Example #5
0
def test_phase_trio5():
    reads = """
      B 101
      B 101
      B 101
      A 111
      A 111
      A 111
      C 111
      C 111
      C 111
    """
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual("individual0",
                            canonic_index_list_to_biallelic_gt_list([1, 1, 1]))
    pedigree.add_individual("individual1",
                            canonic_index_list_to_biallelic_gt_list([1, 1, 1]))
    pedigree.add_individual("individual2",
                            canonic_index_list_to_biallelic_gt_list([1, 1, 1]))
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [2, 2, 2]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree)
    assert cost == 3
    assert len(set(transmission_vector)) == 1
    all_expected_haplotypes = [("111", "000"), ("111", "000"), ("111", "000")]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
    assert_trio_allele_order(superreads_list, transmission_vector, 3)
Example #6
0
def test_phase_quartet3():
    reads = """
      A 1111
      A 0000
      B 1010
      C 111000
      C 010101
      D 000000
      D 010
      B 0101
      C  1100
      D  10010
      A   0000
      A   1111
      B   1010
      B   0101
    """
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1]))
    pedigree.add_individual(
        "individual3",
        canonic_index_list_to_biallelic_gt_list([0, 1, 0, 0, 1, 0]))
    pedigree.add_relationship("individual0", "individual1", "individual2")
    pedigree.add_relationship("individual0", "individual1", "individual3")
    recombcost = [3, 3, 3, 4, 3, 3]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree)
    print(cost)
    print(transmission_vector)
    assert cost == 8
    # TODO: expect transmission in both trio relations. Update once transmission vectors
    #       are returned per trio relationship
    # assert transmission_vector in ([0,0,0,1,1,1], [1,1,1,0,0,0], [2,2,2,3,3,3], [3,3,3,2,2,2])
    all_expected_haplotypes = [
        ("111111", "000000"),
        ("010101", "101010"),
        ("111000", "010101"),
        ("000000", "010010"),
    ]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
    trio_transmission_vectors = get_trio_transmission_vectors(
        transmission_vector, 6)
    assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0],
                             6)
    assert_trio_allele_order(
        [superreads_list[0], superreads_list[1], superreads_list[3]],
        trio_transmission_vectors[1],
        6,
    )
Example #7
0
def test_read_phased_vcf():
    for filename in ["tests/data/phased-via-HP.vcf", "tests/data/phased-via-PS.vcf"]:
        print("Testing", filename)
        tables = list(VcfReader(filename, phases=True))
        assert len(tables) == 2
        table_a, table_b = tables

        assert table_a.chromosome == "chrA"
        assert len(table_a.variants) == 4
        assert table_a.samples == ["sample1", "sample2"]

        assert table_b.chromosome == "chrB"
        assert len(table_b.variants) == 2
        assert table_b.samples == ["sample1", "sample2"]

        assert len(table_a.genotypes) == 2
        assert list(table_a.genotypes[0]) == canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1])
        assert list(table_a.genotypes[1]) == canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1])
        assert list(table_a.genotypes_of("sample1")) == canonic_index_list_to_biallelic_gt_list(
            [1, 2, 1, 1]
        )
        assert list(table_a.genotypes_of("sample2")) == canonic_index_list_to_biallelic_gt_list(
            [1, 1, 1, 1]
        )

        assert len(table_b.genotypes) == 2
        assert list(table_b.genotypes[0]) == canonic_index_list_to_biallelic_gt_list([0, 1])
        assert list(table_b.genotypes[1]) == canonic_index_list_to_biallelic_gt_list([1, 2])
        assert list(table_b.genotypes_of("sample1")) == canonic_index_list_to_biallelic_gt_list(
            [0, 1]
        )
        assert list(table_b.genotypes_of("sample2")) == canonic_index_list_to_biallelic_gt_list(
            [1, 2]
        )

        print(table_a.phases)
        assert len(table_a.phases) == 2
        expected_phase_sample1 = [
            None,
            None,
            VariantCallPhase(block_id=300, phase=(1, 0), quality=23),
            VariantCallPhase(block_id=300, phase=(0, 1), quality=42),
        ]
        expected_phase_sample2 = [
            VariantCallPhase(block_id=100, phase=(0, 1), quality=10),
            VariantCallPhase(block_id=100, phase=(1, 0), quality=20),
            VariantCallPhase(block_id=300, phase=(0, 1), quality=30),
            VariantCallPhase(block_id=300, phase=(0, 1), quality=None),
        ]
        assert list(table_a.phases[0]) == expected_phase_sample1
        assert list(table_a.phases[1]) == expected_phase_sample2
        assert list(table_a.phases_of("sample1")) == expected_phase_sample1
        assert list(table_a.phases_of("sample2")) == expected_phase_sample2

        assert len(table_b.phases) == 2
        assert list(table_b.phases[0]) == [None, None]
        assert list(table_b.phases[1]) == [None, None]
        assert list(table_b.phases_of("sample1")) == [None, None]
        assert list(table_b.phases_of("sample2")) == [None, None]
Example #8
0
def test_phase_quartet2():
    reads = """
      A 111111
      A 000000
      B 010101
      B 101010
      C 000000
      C 010101
      D 000000
      D 010101
    """
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 1, 0, 1, 0, 1]))
    pedigree.add_individual(
        "individual3",
        canonic_index_list_to_biallelic_gt_list([0, 1, 0, 1, 0, 1]))
    pedigree.add_relationship("individual0", "individual1", "individual2")
    pedigree.add_relationship("individual0", "individual1", "individual3")
    recombcost = [3, 3, 3, 3, 3, 3]

    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree)
    assert cost == 0
    assert len(set(transmission_vector)) == 1
    all_expected_haplotypes = [
        ("111111", "000000"),
        ("010101", "101010"),
        ("000000", "010101"),
        ("000000", "010101"),
    ]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
    trio_transmission_vectors = get_trio_transmission_vectors(
        transmission_vector, 6)
    assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0],
                             6)
    assert_trio_allele_order(
        [superreads_list[0], superreads_list[1], superreads_list[3]],
        trio_transmission_vectors[1],
        6,
    )
Example #9
0
def test_weighted_genotyping4():
    reads = """
    00  00
    0000
    000
    111
    111101
    111111
    111110
      000
      1111
    """

    weights = """
    11  11
    1111
    111
    111
    111111
    111111
    111111
      111
      1111
    """
    genotypes = canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])
    check_genotyping_single_individual(reads, weights, None, genotypes, 10)
Example #10
0
def test_small_example():
    reads = """
    11111111
    00000000
    """
    genotypes = canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1, 1, 1])
    check_genotyping_single_individual(reads, None, None, genotypes, 1000)
Example #11
0
def bipartition(reads):
    positions = reads.get_positions()
    # create genotypes over your variants: all heterozygous (=1)
    genotypes = canonic_index_list_to_biallelic_gt_list([1] * len(positions))
    # genotype likelihoods are None
    genotype_likelihoods = [None] * len(positions)
    # create empty pedigree
    pedigree = Pedigree(NumericSampleIds())
    # add one individual to pedigree
    pedigree.add_individual('individual0', genotypes, genotype_likelihoods)
    # recombination cost vector, irrelevant if one using one individual
    recombcost = [1] * len(positions)

    # run the core phasing algorithm, creating a DP table
    dp_table = PedigreeDPTable(reads,
                               recombcost,
                               pedigree,
                               distrust_genotypes=False)
    phasing, transmission_vector = dp_table.get_super_reads()
    #print('PHASING')
    #print(phasing[0])
    #print(phasing[0][0])
    #print(phasing[0][1])
    mec_score = dp_table.get_optimal_cost()
    eprint("MEC Score:", mec_score)
    eprint("MEC Score / readset length:",
           float(mec_score) / float(readset_length))

    # In case the bi-partition of reads is of interest:
    partition = dp_table.get_optimal_partitioning()
    #print(partition)
    eprint("partition fraction:", sum(partition) / float(len(partition)))

    return phasing, partition
Example #12
0
def test_geno1():
    reads = """
    1111111111
    0000011111
    """

    genotypes = canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 2, 2, 2, 2, 2])
    check_genotyping_single_individual(reads, None, None, genotypes, 10)
Example #13
0
def test_geno8():
    reads = """
    11
    11
    10
    """
    genotypes = canonic_index_list_to_biallelic_gt_list([2, 1])
    check_genotyping_single_individual(reads, None, None, genotypes, 10)
Example #14
0
def test_genotyping_empty_readset():
    rs = ReadSet()
    genotypes = canonic_index_list_to_biallelic_gt_list([1, 1])
    recombcost = [1, 1]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    genotype_likelihoods = [None, None]
    pedigree.add_individual("individual0", genotypes, genotype_likelihoods)
    _ = GenotypeDPTable(numeric_sample_ids, rs, recombcost, pedigree)
Example #15
0
def test_phase_trio3():
    reads = """
      A 1111
      B 1010
      C 111000
      C 010101
      B 0101
      A  0000
      B  1010
      C  1010
      C  1100
      A   0000
      A   1111
      B   1010
      B    010
    """
    pedigree = Pedigree(NumericSampleIds())
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]))
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1]))
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [3, 3, 3, 4, 3, 3]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree)
    assert cost == 4
    assert transmission_vector in (
        [0, 0, 0, 1, 1, 1],
        [1, 1, 1, 0, 0, 0],
        [2, 2, 2, 3, 3, 3],
        [3, 3, 3, 2, 2, 2],
    )
    all_expected_haplotypes = [
        ("111111", "000000"),
        ("010101", "101010"),
        ("111000", "010101"),
    ]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
    assert_trio_allele_order(superreads_list, transmission_vector, 6)
Example #16
0
def test_phase_trio_genotype_likelihoods():
    reads = """
      A 111
      A 010
      A 110
      B 001
      B 110
      B 101
      C 001
      C 010
      C 010
    """
    pedigree = Pedigree(NumericSampleIds())
    genotype_likelihoods_mother = [
        PhredGenotypeLikelihoods([0, 0, 0]),
        PhredGenotypeLikelihoods([0, 0, 1]),
        PhredGenotypeLikelihoods([5, 0, 5]),
    ]
    genotype_likelihoods0 = [PhredGenotypeLikelihoods([0, 0, 0])] * 3
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0]),
        genotype_likelihoods_mother,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0]),
        genotype_likelihoods0,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0]),
        genotype_likelihoods0,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [10, 10, 10]
    superreads_list, transmission_vector, cost = phase_pedigree(
        reads, recombcost, pedigree, True)
    assert cost == 3
    assert len(set(transmission_vector)) == 1
    all_expected_haplotypes = [("111", "010"), ("001", "110"), ("001", "010")]
    assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
    assert_trio_allele_order(superreads_list, transmission_vector, 3)
def test_genotyping_trio13():
    reads = """
      A 1111
      A 0000
      B 1111
      B 0000
    """

    expected_genotypes = [
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]),
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]),
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]),
    ]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0, 1, 0])] * 6,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0, 1, 0])] * 6,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 6,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [1000000, 1000000, 1000000, 1000000, 1000000, 1000000]
    genotype_pedigree(
        numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes, scaling=1000,
    )
def test_genotyping_trio1():
    reads = """
      A 00
      A 00
      B 11
      B 11
      C 11
      C 00
    """

    expected_genotypes = [
        canonic_index_list_to_biallelic_gt_list([0, 0]),
        canonic_index_list_to_biallelic_gt_list([2, 2]),
        canonic_index_list_to_biallelic_gt_list([1, 1]),
    ]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([1, 1]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([1, 1]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([1, 1]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [10, 10]
    genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
def test_genotyping_quartet4():
    reads = """
      A 1111
      A 0000
      B 1010
      C 111000
      C 010101
      D 000000
      D 010
      B 0101
      C  1100
      D  10010
      A   0000
      A   1111
      B   1010
      B   0101
    """
    expected_genotypes = [
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]),
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]),
        canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1]),
        canonic_index_list_to_biallelic_gt_list([0, 1, 0, 0, 1, 0]),
    ]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6,
    )
    pedigree.add_individual(
        "individual3",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    pedigree.add_relationship("individual0", "individual1", "individual3")
    recombcost = [3, 3, 3, 4, 3, 3]
    genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
Example #20
0
def test_geno_exact1():
    reads = """
          11
           01
        """

    expected_likelihoods = [
        PhredGenotypeLikelihoods([0.06666666666666667, 0.3333333333333333, 0.6]),
        PhredGenotypeLikelihoods([0.20930232558139536, 0.5813953488372093, 0.20930232558139536]),
        PhredGenotypeLikelihoods([0.06666666666666667, 0.3333333333333333, 0.6]),
    ]
    genotypes = canonic_index_list_to_biallelic_gt_list([2, 1, 2])
    check_genotyping_single_individual(reads, None, expected_likelihoods, genotypes, 10)
def test_weighted_genotyping():
    reads = """
      B 00
      B 11
      A 11
      A 00
      C 11
      C 11
    """
    weights = """
      99
      99
      99
      99
      99
      99
    """
    expected_genotypes = [
        canonic_index_list_to_biallelic_gt_list([1, 1]),
        canonic_index_list_to_biallelic_gt_list([1, 1]),
        canonic_index_list_to_biallelic_gt_list([2, 2]),
    ]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    # recombination is extremely unlikely
    recombcost = [1000, 1000, 1000, 1000]

    expected = {
        0: [[0, 1, 0], [0, 1, 0]],
        1: [[0, 1, 0], [0, 1, 0]],
        2: [[0, 1.0 / 3.0, 2 / 3.0], [0, 1.0 / 3.0, 2 / 3.0]],
    }
    genotype_pedigree(
        numeric_sample_ids,
        reads,
        recombcost,
        pedigree,
        expected_genotypes,
        weights,
        expected,
        scaling=500,
    )
Example #22
0
def test_phase_empty_readset(algorithm):
    rs = ReadSet()
    recombcost = [1, 1]
    genotypes = canonic_index_list_to_biallelic_gt_list([1, 1])
    pedigree = Pedigree(NumericSampleIds())
    genotype_likelihoods = [None, None]
    pedigree.add_individual("individual0", genotypes, genotype_likelihoods)

    if algorithm == "hapchat":
        dp_table = HapChatCore(rs)
    else:
        dp_table = PedigreeDPTable(rs, recombcost, pedigree)

    _ = dp_table.get_super_reads()
Example #23
0
def test_geno_exact2():
    reads = """
        11
        11
        """
    weights = """
        11
        11
        """

    expected_likelihoods = [
        PhredGenotypeLikelihoods([0.00914139256727894, 0.25040580948312685, 0.7404527979495942]),
        PhredGenotypeLikelihoods([0.00914139256727894, 0.25040580948312685, 0.7404527979495942]),
    ]
    genotypes = canonic_index_list_to_biallelic_gt_list([2, 2])
    check_genotyping_single_individual(reads, weights, expected_likelihoods, genotypes, 10)
Example #24
0
def test_geno_10():
    reads = """
    001100
    000000
    000000
    110011
    110011
    111111
         """
    genotypes = canonic_index_list_to_biallelic_gt_list([1, 1, 0, 0, 1, 1])
    genotype_priors = [
        PhredGenotypeLikelihoods([0.1, 0.8, 0.1]),
        PhredGenotypeLikelihoods([0.1, 0.8, 0.1]),
        PhredGenotypeLikelihoods([0.7, 0.2, 0.1]),
        PhredGenotypeLikelihoods([0.7, 0.2, 0.1]),
        PhredGenotypeLikelihoods([0.1, 0.8, 0.1]),
        PhredGenotypeLikelihoods([0.1, 0.8, 0.1]),
    ]
    check_genotyping_single_individual(reads, None, None, genotypes, 50, genotype_priors)
Example #25
0
def test_geno6():
    reads = """
        0100000000000
        0100010000000
        1110000000010
        0100000000000
        0101000001000
        0100010   000
        0 10000000100
        1111111011100
        0100111010011
        1111111000111
        1111110011111
        11110000  000
        1110011011111
        1111001011111
        0111111110  1
        """
    genotypes = canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1])
    check_genotyping_single_individual(reads, None, None, genotypes, 60)
def test_genotyping_trio10():
    reads = """
      B 0000
      B 0000
      B 0000
      B 0000
      B 0000
      B 0000
      A 1111
      A 1111
      A 1111
      A 1111
      A 1111
      A 1111
    """

    # no reads for child, but genotype must be 1/0 for each pos. (due to inheritance)
    expected_genotypes = [
        canonic_index_list_to_biallelic_gt_list([2, 2, 2, 2]),
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1]),
    ]
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    pedigree.add_individual(
        "individual0",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4,
    )
    pedigree.add_individual(
        "individual1",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4,
    )
    pedigree.add_individual(
        "individual2",
        canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]),
        [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4,
    )
    pedigree.add_relationship("individual0", "individual1", "individual2")
    recombcost = [10, 10, 10, 10]
    genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)