def genotype_pedigree(
    numeric_sample_ids,
    reads,
    recombcost,
    pedigree,
    expected_genotypes,
    weights=None,
    expected=None,
    scaling=10,
    positions=None,
):
    rs = string_to_readset_pedigree(s=reads, w=weights, scaling_quality=scaling)
    dp_forward_backward = GenotypeDPTable(numeric_sample_ids, rs, recombcost, pedigree, positions)

    # for each position compare the likeliest genotype to the expected ones
    print("expected genotypes: ", expected_genotypes)
    positions = rs.get_positions()
    for pos in range(len(positions)):
        for individual in range(len(pedigree)):
            likelihoods = dp_forward_backward.get_genotype_likelihoods(
                "individual" + str(individual), pos
            )

            # if expected likelihoods given, compare
            if expected is not None:
                print(
                    "likelihoods: ",
                    likelihoods,
                    " expected likelihoods: ",
                    expected[individual][pos],
                )
                assert likelihoods == PhredGenotypeLikelihoods(expected[individual][pos])

            # find the likeliest genotype
            max_val = -1
            max_geno = Genotype([])
            for genotype in likelihoods.genotypes():
                assert not math.isnan(likelihoods[genotype])
                if likelihoods[genotype] > max_val:
                    max_val = likelihoods[genotype]
                    max_geno = genotype

            # compare it to the expected genotype
            print(
                "pos.: " + str(pos) + " individual " + str(individual) + ": ",
                likelihoods,
                " expected genotype: ",
                expected_genotypes[individual][pos],
            )
            assert max_geno == expected_genotypes[individual][pos]
        print("\n")
Beispiel #2
0
def phase_pedigree(reads,
                   recombcost,
                   pedigree,
                   distrust_genotypes=False,
                   positions=None):
    rs = string_to_readset_pedigree(reads)
    dp_table = PedigreeDPTable(rs, recombcost, pedigree, distrust_genotypes,
                               positions)
    superreads_list, transmission_vector = dp_table.get_super_reads()
    cost = dp_table.get_optimal_cost()
    for superreads in superreads_list:
        for sr in superreads:
            print(sr)
    print("Cost:", dp_table.get_optimal_cost())
    print("Transmission vector:", transmission_vector)
    print("Partition:", dp_table.get_optimal_partitioning())
    return superreads_list, transmission_vector, cost
Beispiel #3
0
def genotype_pedigree(numeric_sample_ids,
                      reads,
                      recombcost,
                      pedigree,
                      expected_genotypes,
                      weights=None,
                      expected=None,
                      scaling=10,
                      positions=None):
    rs = string_to_readset_pedigree(s=reads,
                                    w=weights,
                                    scaling_quality=scaling)
    dp_forward_backward = GenotypeDPTable(numeric_sample_ids, rs, recombcost,
                                          pedigree, positions)

    # for each position compare the likeliest genotype to the expected ones
    print('expected genotypes: ', expected_genotypes)
    positions = rs.get_positions()
    for pos in range(len(positions)):
        for individual in range(len(pedigree)):
            likelihoods = dp_forward_backward.get_genotype_likelihoods(
                'individual' + str(individual), pos)

            # if expected likelihoods given, compare
            if expected is not None:
                print('likelihoods: ', likelihoods, ' expected likelihoods: ',
                      expected[individual][pos])
                assert (likelihoods == expected[individual][pos])

            # find the likeliest genotype
            max_val = -1
            max_index = -1
            for i in range(len(likelihoods)):
                assert (not math.isnan(likelihoods[i]))
                if likelihoods[i] > max_val:
                    max_val = likelihoods[i]
                    max_index = i

            # compare it to the expected genotype
            print(
                'pos.: ' + str(pos) + ' individual ' + str(individual) + ': ',
                likelihoods, ' expected genotype: ',
                expected_genotypes[individual][pos])
            assert (max_index == expected_genotypes[individual][pos])
        print("\n")