Beispiel #1
0
def check_genotyping_single_individual(
    reads,
    weights=None,
    expected=None,
    genotypes=None,
    scaling=None,
    genotype_priors=None,
):
    # 0) set up read set
    readset = string_to_readset(s=reads, w=weights, scale_quality=scaling)
    positions = readset.get_positions()

    # 1) Genotype using forward backward algorithm
    recombcost = [1] * len(positions)
    numeric_sample_ids = NumericSampleIds()
    pedigree = Pedigree(numeric_sample_ids)
    genotype_likelihoods = [
        PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])
    ] * len(positions)

    if genotype_priors is not None:
        genotype_likelihoods = genotype_priors

    pedigree.add_individual(
        "individual0",
        [canonic_index_to_biallelic_gt(1) for i in range(len(positions))],
        genotype_likelihoods,
    )
    dp_forward_backward = GenotypeDPTable(numeric_sample_ids, readset,
                                          recombcost, pedigree)

    # check the results
    compare_to_expected(dp_forward_backward, positions, expected, genotypes)
Beispiel #2
0
def test_read_phased():
    tables = list(VcfReader("tests/data/phasedinput.vcf", phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "ref"
    assert table.samples == ["sample"]
    assert len(table.variants) == 2
    assert table.variants[0].reference_allele == "A"
    assert table.variants[0].alternative_allele == "C"
    assert table.variants[1].reference_allele == "G"
    assert table.variants[1].alternative_allele == "T"
    assert table.genotypes[0][0] == table.genotypes[0][1] == canonic_index_to_biallelic_gt(1)
def verify(rs, all_heterozygous=False):
    positions = rs.get_positions()
    # recombination costs 1, should not occur
    recombcost = [1] * len(positions)
    pedigree = Pedigree(NumericSampleIds())
    genotype_likelihoods = [
        None if all_heterozygous else PhredGenotypeLikelihoods([0, 0, 0])
    ] * len(positions)
    # all genotypes heterozygous
    pedigree.add_individual(
        "individual0",
        [canonic_index_to_biallelic_gt(1) for _ in range(len(positions))],
        genotype_likelihoods,
    )
    dp_table = PedigreeDPTable(rs, recombcost, pedigree, distrust_genotypes=not all_heterozygous)
    verify_mec_score_and_partitioning(dp_table, rs)
Beispiel #4
0
def check_phasing_single_individual(reads, algorithm="whatshap", weights=None):
    # 0) set up read set
    readset = string_to_readset(reads, weights)
    positions = readset.get_positions()

    # for hapchat
    if algorithm == "hapchat":
        dp_table = HapChatCore(readset)
        superreads = dp_table.get_super_reads()
        cost = dp_table.get_optimal_cost()
        partition = dp_table.get_optimal_partitioning()
        compare_phasing_brute_force(superreads[0][0], cost, partition, readset,
                                    True, weights, algorithm)
        return

    # 1) Phase using PedMEC code for single individual
    for all_heterozygous in [False, True]:
        recombcost = [1] * len(
            positions)  # recombination costs 1, should not occur
        pedigree = Pedigree(NumericSampleIds())
        genotype_likelihoods = [
            None if all_heterozygous else PhredGenotypeLikelihoods([0, 0, 0])
        ] * len(positions)
        pedigree.add_individual(
            "individual0",
            [canonic_index_to_biallelic_gt(1) for i in range(len(positions))],
            genotype_likelihoods,
        )  # all genotypes heterozygous
        dp_table = PedigreeDPTable(readset,
                                   recombcost,
                                   pedigree,
                                   distrust_genotypes=not all_heterozygous)
        superreads, transmission_vector = dp_table.get_super_reads()
        cost = dp_table.get_optimal_cost()
        # TODO: transmission vectors not returned properly, see issue 73
        assert len(set(transmission_vector)) == 1
        partition = dp_table.get_optimal_partitioning()
        compare_phasing_brute_force(superreads[0], cost, partition, readset,
                                    all_heterozygous, weights)

    # 2) Phase using PedMEC code for trios with two "empty" individuals (i.e. having no reads)
    for all_heterozygous in [False, True]:
        recombcost = [1] * len(
            positions)  # recombination costs 1, should not occur
        pedigree = Pedigree(NumericSampleIds())
        genotype_likelihoods = [
            None if all_heterozygous else PhredGenotypeLikelihoods([0, 0, 0])
        ] * len(positions)
        pedigree.add_individual(
            "individual0",
            [canonic_index_to_biallelic_gt(1) for _ in range(len(positions))],
            genotype_likelihoods,
        )  # all genotypes heterozygous
        pedigree.add_individual(
            "individual1",
            [canonic_index_to_biallelic_gt(1) for _ in range(len(positions))],
            genotype_likelihoods,
        )  # all genotypes heterozygous
        pedigree.add_individual(
            "individual2",
            [canonic_index_to_biallelic_gt(1) for _ in range(len(positions))],
            genotype_likelihoods,
        )  # all genotypes heterozygous
        pedigree.add_relationship("individual0", "individual1", "individual2")
        dp_table = PedigreeDPTable(readset,
                                   recombcost,
                                   pedigree,
                                   distrust_genotypes=not all_heterozygous)
        cost = dp_table.get_optimal_cost()
        superreads, transmission_vector = dp_table.get_super_reads()
        assert len(set(transmission_vector)) == 1
        partition = dp_table.get_optimal_partitioning()
        compare_phasing_brute_force(superreads[0], cost, partition, readset,
                                    all_heterozygous, weights)