def test_phase_trio5(): reads = """ B 101 B 101 B 101 A 111 A 111 A 111 C 111 C 111 C 111 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual('individual0', [1,1,1]) pedigree.add_individual('individual1', [1,1,1]) pedigree.add_individual('individual2', [1,1,1]) pedigree.add_relationship('individual0', 'individual1', 'individual2') recombcost = [2,2,2] superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree) assert cost == 3 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [ ('111','000'), ('111','000'), ('111','000') ] assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
def create_pedigree( default_gq, distrust_genotypes, family, gl_regularizer, numeric_sample_ids, phasable_variant_table, trios, ): pedigree = Pedigree(numeric_sample_ids) for sample in family: # If distrusting genotypes, we pass genotype likelihoods on to pedigree object if distrust_genotypes: genotype_likelihoods = [] for gt, gl in zip( phasable_variant_table.genotypes_of(sample), phasable_variant_table.genotype_likelihoods_of(sample), ): assert gt.is_diploid_and_biallelic() if gl is None: # all genotypes get default_gq as genotype likelihood, exept the called genotype ... x = [default_gq] * 3 # ... which gets a 0 x[gt.get_index()] = 0 genotype_likelihoods.append(PhredGenotypeLikelihoods(x)) else: genotype_likelihoods.append(gl.as_phred(regularizer=gl_regularizer)) else: genotype_likelihoods = None pedigree.add_individual( sample, phasable_variant_table.genotypes_of(sample), genotype_likelihoods ) for trio in trios: pedigree.add_relationship(father_id=trio.father, mother_id=trio.mother, child_id=trio.child) return pedigree
def test_phase_trio1(): reads = """ A 111 A 010 A 110 B 001 B 110 B 101 C 001 C 010 C 010 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual('individual0', [1,2,1]) pedigree.add_individual('individual1', [1,1,1]) pedigree.add_individual('individual2', [0,1,1]) pedigree.add_relationship('individual0', 'individual1', 'individual2') recombcost = [10,10,10] superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree) assert cost == 2 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [ ('111','010'), ('001','110'), ('001','010') ] assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
def test_phase_trio_genotype_likelihoods(): reads = """ A 111 A 010 A 110 B 001 B 110 B 101 C 001 C 010 C 010 """ pedigree = Pedigree(NumericSampleIds()) genotype_likelihoods_mother = [ PhredGenotypeLikelihoods(0,0,0), PhredGenotypeLikelihoods(0,0,1), PhredGenotypeLikelihoods(5,0,5) ] genotype_likelihoods0 = [PhredGenotypeLikelihoods(0,0,0)] * 3 pedigree.add_individual('individual0', [0,0,0], genotype_likelihoods_mother) pedigree.add_individual('individual1', [0,0,0], genotype_likelihoods0) pedigree.add_individual('individual2', [0,0,0], genotype_likelihoods0) pedigree.add_relationship('individual0', 'individual1', 'individual2') recombcost = [10,10,10] superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree, True) assert cost == 3 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [ ('111','010'), ('001','110'), ('001','010') ] assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
def test_genotyping_trio10(): reads = """ B 0000 B 0000 B 0000 B 0000 B 0000 B 0000 A 1111 A 1111 A 1111 A 1111 A 1111 A 1111 """ # no reads for child, but genotype must be 1/0 for each pos. (due to inheritance) expected_genotypes = [[2, 2, 2, 2], [0, 0, 0, 0], [1, 1, 1, 1]] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( 'individual0', [0, 0, 0, 0], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 4) pedigree.add_individual( 'individual1', [0, 0, 0, 0], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 4) pedigree.add_individual( 'individual2', [0, 0, 0, 0], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 4) pedigree.add_relationship('individual0', 'individual1', 'individual2') recombcost = [10, 10, 10, 10] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
def test_phase_trio4(): reads = """ B 101 B 101 B 101 A 111 A 111 A 111 C 111 C 111 C 111 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual('individual0', [1,1,1]) pedigree.add_individual('individual1', [1,1,1]) pedigree.add_individual('individual2', [1,1,1]) pedigree.add_relationship('individual0', 'individual1', 'individual2') recombcost = [1,1,1] superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree) assert cost == 2 assert transmission_vector in ([0,2,0], [2,0,2], [1,3,1], [3,1,3]) all_expected_haplotypes = [ ('111','000'), ('101','010'), ('111','000') ] assert_haplotypes(superreads_list, all_expected_haplotypes, 3)
def test_phase_doubletrio_pure_genetic(): reads = "" pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual( "individualA", canonic_index_list_to_biallelic_gt_list([1, 2, 1, 0])) pedigree.add_individual( "individualB", canonic_index_list_to_biallelic_gt_list([1, 0, 1, 1])) pedigree.add_individual( "individualC", canonic_index_list_to_biallelic_gt_list([2, 1, 1, 0])) pedigree.add_individual( "individualD", canonic_index_list_to_biallelic_gt_list([1, 2, 2, 1])) pedigree.add_individual( "individualE", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 0])) pedigree.add_relationship("individualA", "individualB", "individualC") pedigree.add_relationship("individualC", "individualD", "individualE") recombcost = [2, 2, 2] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree, positions=[10, 20, 30, 40]) assert cost == 0 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [ ("0100", "1110"), ("0011", "1000"), ("1110", "1000"), ("1111", "0110"), ("1000", "0110"), ] assert_haplotypes(superreads_list, all_expected_haplotypes, 4) trio_transmission_vectors = get_trio_transmission_vectors( transmission_vector, 4) assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0], 4) assert_trio_allele_order(superreads_list[2:], trio_transmission_vectors[1], 4)
def test_phase_trio3(): reads = """ A 1111 B 1010 C 111000 C 010101 B 0101 A 0000 B 1010 C 1010 C 1100 A 0000 A 1111 B 1010 B 010 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual('individual0', [1,1,1,1,1,1]) pedigree.add_individual('individual1', [1,1,1,1,1,1]) pedigree.add_individual('individual2', [1,2,1,1,0,1]) pedigree.add_relationship('individual0', 'individual1', 'individual2') recombcost = [3,3,3,4,3,3] superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree) assert cost == 4 assert transmission_vector in ([0,0,0,1,1,1], [1,1,1,0,0,0], [2,2,2,3,3,3], [3,3,3,2,2,2]) all_expected_haplotypes = [ ('111111','000000'), ('010101','101010'), ('111000','010101') ] assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
def test_genotyping_trio14(): reads = """ A 111111 A 111111 B 111111 B 000000 C 000000 """ expected_genotypes = [[2, 2, 2, 2, 2, 2], [1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1]] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( 'individual0', [0, 0, 0, 0, 0, 0], [PhredGenotypeLikelihoods(1 / 3.0, 1 / 3.0, 1 / 3.0)] * 6) pedigree.add_individual( 'individual1', [0, 0, 0, 0, 0, 0], [PhredGenotypeLikelihoods(1 / 3.0, 1 / 3.0, 1 / 3.0)] * 6) pedigree.add_individual( 'individual2', [0, 0, 0, 0, 0, 0], [PhredGenotypeLikelihoods(1 / 3.0, 1 / 3.0, 1 / 3.0)] * 6) pedigree.add_relationship('individual0', 'individual1', 'individual2') recombcost = [1000000, 1000000, 1000000, 1000000, 1000000, 1000000] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes, scaling=1000)
def test_phase_trio5(): reads = """ B 101 B 101 B 101 A 111 A 111 A 111 C 111 C 111 C 111 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual("individual0", canonic_index_list_to_biallelic_gt_list([1, 1, 1])) pedigree.add_individual("individual1", canonic_index_list_to_biallelic_gt_list([1, 1, 1])) pedigree.add_individual("individual2", canonic_index_list_to_biallelic_gt_list([1, 1, 1])) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [2, 2, 2] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree) assert cost == 3 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [("111", "000"), ("111", "000"), ("111", "000")] assert_haplotypes(superreads_list, all_expected_haplotypes, 3) assert_trio_allele_order(superreads_list, transmission_vector, 3)
def test_phase_doubletrio_pure_genetic(): reads = "" pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual('individualA', [1, 2, 1, 0]) pedigree.add_individual('individualB', [1, 0, 1, 1]) pedigree.add_individual('individualC', [2, 1, 1, 0]) pedigree.add_individual('individualD', [1, 2, 2, 1]) pedigree.add_individual('individualE', [1, 1, 1, 0]) pedigree.add_relationship('individualA', 'individualB', 'individualC') pedigree.add_relationship('individualC', 'individualD', 'individualE') recombcost = [2, 2, 2] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree, positions=[10, 20, 30, 40]) assert cost == 0 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [('0100', '1110'), ('0011', '1000'), ('1110', '1000'), ('1111', '0110'), ('1000', '0110')] assert_haplotypes(superreads_list, all_expected_haplotypes, 4) trio_transmission_vectors = get_trio_transmission_vectors( transmission_vector, 4) assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0], 4) assert_trio_allele_order(superreads_list[2:], trio_transmission_vectors[1], 4)
def test_genotyping_trio1(): reads = """ A 00 A 00 B 11 B 11 C 11 C 00 """ expected_genotypes = [[0, 0], [2, 2], [1, 1]] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( 'individual0', [1, 1], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 2) pedigree.add_individual( 'individual1', [1, 1], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 2) pedigree.add_individual( 'individual2', [1, 1], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 2) pedigree.add_relationship('individual0', 'individual1', 'individual2') recombcost = [10, 10] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
def test_genotyping_trio1(): reads = """ A 00 A 00 B 11 B 11 C 11 C 00 """ expected_genotypes = [ canonic_index_list_to_biallelic_gt_list([0, 0]), canonic_index_list_to_biallelic_gt_list([2, 2]), canonic_index_list_to_biallelic_gt_list([1, 1]), ] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([1, 1]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([1, 1]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([1, 1]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 2, ) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [10, 10] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
def test_genotyping_trio13(): reads = """ A 1111 A 0000 B 1111 B 0000 """ expected_genotypes = [ canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]), canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]), canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]), ] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([0, 1, 0])] * 6, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([0, 1, 0])] * 6, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 6, ) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [1000000, 1000000, 1000000, 1000000, 1000000, 1000000] genotype_pedigree( numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes, scaling=1000, )
def test_genotyping_trio5(): reads = """ B 101 B 101 B 101 A 111 A 111 A 111 C 111 C 111 C 101 C 101 """ expected_genotypes = [[2, 2, 2], [2, 0, 2], [2, 1, 2]] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( 'individual0', [0, 0, 0], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 3) pedigree.add_individual( 'individual1', [0, 0, 0], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 3) pedigree.add_individual( 'individual2', [0, 0, 0], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 3) pedigree.add_relationship('individual0', 'individual1', 'individual2') recombcost = [2, 2, 2] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
def test_phase_quartet2(): reads = """ A 111111 A 000000 B 010101 B 101010 C 000000 C 010101 D 000000 D 010101 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual('individual0', [1,1,1,1,1,1]) pedigree.add_individual('individual1', [1,1,1,1,1,1]) pedigree.add_individual('individual2', [0,1,0,1,0,1]) pedigree.add_individual('individual3', [0,1,0,1,0,1]) pedigree.add_relationship('individual0', 'individual1', 'individual2') pedigree.add_relationship('individual0', 'individual1', 'individual3') recombcost =[3,3,3,3,3,3] superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree) assert cost == 0 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [ ('111111','000000'), ('010101','101010'), ('000000','010101'), ('000000','010101') ] assert_haplotypes(superreads_list, all_expected_haplotypes, 6)
def test_genotyping_quartet3(): reads = """ A 111111 A 000000 B 010101 B 101010 C 000000 C 010101 D 000000 D 010101 """ expected_genotypes = [[1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1], [0, 1, 0, 1, 0, 1], [0, 1, 0, 1, 0, 1]] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( 'individual0', [0, 0, 0, 0, 0, 0], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 6) pedigree.add_individual( 'individual1', [0, 0, 0, 0, 0, 0], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 6) pedigree.add_individual( 'individual2', [0, 0, 0, 0, 0, 0], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 6) pedigree.add_individual( 'individual3', [0, 0, 0, 0, 0, 0], [PhredGenotypeLikelihoods(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)] * 6) pedigree.add_relationship('individual0', 'individual1', 'individual2') pedigree.add_relationship('individual0', 'individual1', 'individual3') recombcost = [3, 3, 3, 3, 3, 3] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
def test_weighted_genotyping(): reads = """ B 00 B 11 A 11 A 00 C 11 C 11 """ weights = """ 99 99 99 99 99 99 """ expected_genotypes = [ canonic_index_list_to_biallelic_gt_list([1, 1]), canonic_index_list_to_biallelic_gt_list([1, 1]), canonic_index_list_to_biallelic_gt_list([2, 2]), ] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([0.25, 0.5, 0.25])] * 4, ) pedigree.add_relationship("individual0", "individual1", "individual2") # recombination is extremely unlikely recombcost = [1000, 1000, 1000, 1000] expected = { 0: [[0, 1, 0], [0, 1, 0]], 1: [[0, 1, 0], [0, 1, 0]], 2: [[0, 1.0 / 3.0, 2 / 3.0], [0, 1.0 / 3.0, 2 / 3.0]], } genotype_pedigree( numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes, weights, expected, scaling=500, )
def test_phase_quartet3(): reads = """ A 1111 A 0000 B 1010 C 111000 C 010101 D 000000 D 010 B 0101 C 1100 D 10010 A 0000 A 1111 B 1010 B 0101 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1])) pedigree.add_individual( "individual3", canonic_index_list_to_biallelic_gt_list([0, 1, 0, 0, 1, 0])) pedigree.add_relationship("individual0", "individual1", "individual2") pedigree.add_relationship("individual0", "individual1", "individual3") recombcost = [3, 3, 3, 4, 3, 3] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree) print(cost) print(transmission_vector) assert cost == 8 # TODO: expect transmission in both trio relations. Update once transmission vectors # are returned per trio relationship # assert transmission_vector in ([0,0,0,1,1,1], [1,1,1,0,0,0], [2,2,2,3,3,3], [3,3,3,2,2,2]) all_expected_haplotypes = [ ("111111", "000000"), ("010101", "101010"), ("111000", "010101"), ("000000", "010010"), ] assert_haplotypes(superreads_list, all_expected_haplotypes, 6) trio_transmission_vectors = get_trio_transmission_vectors( transmission_vector, 6) assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0], 6) assert_trio_allele_order( [superreads_list[0], superreads_list[1], superreads_list[3]], trio_transmission_vectors[1], 6, )
def test_genotyping_empty_trio(): rs = ReadSet() recombcost = [] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual("individual0", [], []) pedigree.add_individual("individual1", [], []) pedigree.add_individual("individual2", [], []) pedigree.add_relationship("individual0", "individual1", "individual2") _ = GenotypeDPTable(numeric_sample_ids, rs, recombcost, pedigree)
def test_phase_empty_trio(): rs = ReadSet() recombcost = [] pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual('individual0', []) pedigree.add_individual('individual1', []) pedigree.add_individual('individual2', []) pedigree.add_relationship('individual0', 'individual1', 'individual2') dp_table = PedigreeDPTable(rs, recombcost, pedigree) (superreadsm, superreadsf, superreadsc), transmission_vector = dp_table.get_super_reads()
def test_genotyping_empty_trio(): rs = ReadSet() recombcost = [] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual('individual0', [], []) pedigree.add_individual('individual1', [], []) pedigree.add_individual('individual2', [], []) pedigree.add_relationship('individual0', 'individual1', 'individual2') dp_forward_backward = GenotypeDPTable(numeric_sample_ids, rs, recombcost, pedigree)
def test_genotyping_quartet4(): reads = """ A 1111 A 0000 B 1010 C 111000 C 010101 D 000000 D 010 B 0101 C 1100 D 10010 A 0000 A 1111 B 1010 B 0101 """ expected_genotypes = [ canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]), canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1]), canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1]), canonic_index_list_to_biallelic_gt_list([0, 1, 0, 0, 1, 0]), ] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6, ) pedigree.add_individual( "individual3", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 6, ) pedigree.add_relationship("individual0", "individual1", "individual2") pedigree.add_relationship("individual0", "individual1", "individual3") recombcost = [3, 3, 3, 4, 3, 3] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)
def test_phase_quartet2(): reads = """ A 111111 A 000000 B 010101 B 101010 C 000000 C 010101 D 000000 D 010101 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 1, 0, 1, 0, 1])) pedigree.add_individual( "individual3", canonic_index_list_to_biallelic_gt_list([0, 1, 0, 1, 0, 1])) pedigree.add_relationship("individual0", "individual1", "individual2") pedigree.add_relationship("individual0", "individual1", "individual3") recombcost = [3, 3, 3, 3, 3, 3] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree) assert cost == 0 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [ ("111111", "000000"), ("010101", "101010"), ("000000", "010101"), ("000000", "010101"), ] assert_haplotypes(superreads_list, all_expected_haplotypes, 6) trio_transmission_vectors = get_trio_transmission_vectors( transmission_vector, 6) assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0], 6) assert_trio_allele_order( [superreads_list[0], superreads_list[1], superreads_list[3]], trio_transmission_vectors[1], 6, )
def test_phase_trio_pure_genetic(): reads = "" pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual('individual0', [2,1,1,0]) pedigree.add_individual('individual1', [1,2,2,1]) pedigree.add_individual('individual2', [1,1,1,0]) pedigree.add_relationship('individual0', 'individual1', 'individual2') recombcost = [2,2,2] superreads_list, transmission_vector, cost = phase_pedigree(reads, recombcost, pedigree, positions=[10,20,30,40]) assert cost == 0 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [ ('1110','1000'), ('1111','0110'), ('1000','0110') ] assert_haplotypes(superreads_list, all_expected_haplotypes, 4)
def test_phase_trio3(): reads = """ A 1111 B 1010 C 111000 C 010101 B 0101 A 0000 B 1010 C 1010 C 1100 A 0000 A 1111 B 1010 B 010 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1, 1, 1])) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1, 0, 1])) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [3, 3, 3, 4, 3, 3] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree) assert cost == 4 assert transmission_vector in ( [0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0], [2, 2, 2, 3, 3, 3], [3, 3, 3, 2, 2, 2], ) all_expected_haplotypes = [ ("111111", "000000"), ("010101", "101010"), ("111000", "010101"), ] assert_haplotypes(superreads_list, all_expected_haplotypes, 6) assert_trio_allele_order(superreads_list, transmission_vector, 6)
def test_phase_trio_genotype_likelihoods(): reads = """ A 111 A 010 A 110 B 001 B 110 B 101 C 001 C 010 C 010 """ pedigree = Pedigree(NumericSampleIds()) genotype_likelihoods_mother = [ PhredGenotypeLikelihoods([0, 0, 0]), PhredGenotypeLikelihoods([0, 0, 1]), PhredGenotypeLikelihoods([5, 0, 5]), ] genotype_likelihoods0 = [PhredGenotypeLikelihoods([0, 0, 0])] * 3 pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([0, 0, 0]), genotype_likelihoods_mother, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([0, 0, 0]), genotype_likelihoods0, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 0, 0]), genotype_likelihoods0, ) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [10, 10, 10] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree, True) assert cost == 3 assert len(set(transmission_vector)) == 1 all_expected_haplotypes = [("111", "010"), ("001", "110"), ("001", "010")] assert_haplotypes(superreads_list, all_expected_haplotypes, 3) assert_trio_allele_order(superreads_list, transmission_vector, 3)
def test_phase_quartet3(): reads = """ A 1111 A 0000 B 1010 C 111000 C 010101 D 000000 D 010 B 0101 C 1100 D 10010 A 0000 A 1111 B 1010 B 0101 """ pedigree = Pedigree(NumericSampleIds()) pedigree.add_individual('individual0', [1, 1, 1, 1, 1, 1]) pedigree.add_individual('individual1', [1, 1, 1, 1, 1, 1]) pedigree.add_individual('individual2', [1, 2, 1, 1, 0, 1]) pedigree.add_individual('individual3', [0, 1, 0, 0, 1, 0]) pedigree.add_relationship('individual0', 'individual1', 'individual2') pedigree.add_relationship('individual0', 'individual1', 'individual3') recombcost = [3, 3, 3, 4, 3, 3] superreads_list, transmission_vector, cost = phase_pedigree( reads, recombcost, pedigree) print(cost) print(transmission_vector) assert cost == 8 # TODO: expect transmission in both trio relations. Update once transmission vectors # are returned per trio relationship #assert transmission_vector in ([0,0,0,1,1,1], [1,1,1,0,0,0], [2,2,2,3,3,3], [3,3,3,2,2,2]) all_expected_haplotypes = [('111111', '000000'), ('010101', '101010'), ('111000', '010101'), ('000000', '010010')] assert_haplotypes(superreads_list, all_expected_haplotypes, 6) trio_transmission_vectors = get_trio_transmission_vectors( transmission_vector, 6) assert_trio_allele_order(superreads_list[:3], trio_transmission_vectors[0], 6) assert_trio_allele_order( [superreads_list[0], superreads_list[1], superreads_list[3]], trio_transmission_vectors[1], 6)
def test_weighted_genotyping(): reads = """ B 00 B 11 A 11 A 00 C 11 C 11 """ weights = """ 99 99 99 99 99 99 """ expected_genotypes = [[1, 1], [1, 1], [2, 2]] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual('individual0', [0, 0, 0, 0], [PhredGenotypeLikelihoods(0.25, 0.5, 0.25)] * 4) pedigree.add_individual('individual1', [0, 0, 0, 0], [PhredGenotypeLikelihoods(0.25, 0.5, 0.25)] * 4) pedigree.add_individual('individual2', [0, 0, 0, 0], [PhredGenotypeLikelihoods(0.25, 0.5, 0.25)] * 4) pedigree.add_relationship('individual0', 'individual1', 'individual2') # recombination is extremely unlikely recombcost = [1000, 1000, 1000, 1000] expected = { 0: [[0, 1, 0], [0, 1, 0]], 1: [[0, 1, 0], [0, 1, 0]], 2: [[0, 1.0 / 3.0, 2 / 3.0], [0, 1.0 / 3.0, 2 / 3.0]] } genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes, weights, expected, scaling=500)
def test_genotyping_trio10(): reads = """ B 0000 B 0000 B 0000 B 0000 B 0000 B 0000 A 1111 A 1111 A 1111 A 1111 A 1111 A 1111 """ # no reads for child, but genotype must be 1/0 for each pos. (due to inheritance) expected_genotypes = [ canonic_index_list_to_biallelic_gt_list([2, 2, 2, 2]), canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1]), ] numeric_sample_ids = NumericSampleIds() pedigree = Pedigree(numeric_sample_ids) pedigree.add_individual( "individual0", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4, ) pedigree.add_individual( "individual1", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4, ) pedigree.add_individual( "individual2", canonic_index_list_to_biallelic_gt_list([0, 0, 0, 0]), [PhredGenotypeLikelihoods([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])] * 4, ) pedigree.add_relationship("individual0", "individual1", "individual2") recombcost = [10, 10, 10, 10] genotype_pedigree(numeric_sample_ids, reads, recombcost, pedigree, expected_genotypes)