def test_003_check_merge_multi(self): ref_seq = 'ATGGTATGCGATTGACC' chrom='chrom1' h1 = [Variant(chrom, 0, 'ATG', alt='G', qual=5, genotype_data={'GT':'1/1'}), Variant(chrom, 4, 'T', alt='G', qual=5, genotype_data={'GT':'1/1'}), Variant(chrom, 7, 'G', alt='GG', qual=5, genotype_data={'GT':'1/1'}), Variant(chrom, 9, ref_seq[9], alt=ref_seq[9] + 'T', qual=5, genotype_data={'GT':'1/1'}), ] h2 = [Variant(chrom, 1, 'T', alt='TT', qual=10, genotype_data={'GT':'1/1'}), Variant(chrom, 2, ref_seq[2:10], alt=ref_seq[2], qual=10, genotype_data={'GT':'1/1'}), ] # POS 0 1 2 3 4 5 6 7 8 9 10 # REF A T G G T A T G C G A # H1 - - G G g A T Gg C Gt A # H2 A Tt G - - - - - - - A # expected merged variants ref_expt = "ATGGTATGCG" alt1_expt = "GGGATGGCGT" alt2_expt = "ATTG" expt = Variant(chrom, 0, ref_expt, alt=[alt1_expt, alt2_expt], qual=7.5, genotype_data={'GT': '1|2'}) comb_interval, trees = self.intervaltree_prep(h1, h2, ref_seq) # preserve phase otherwise alts could be switched around got = _merge_variants(comb_interval, trees, ref_seq, discard_phase=False) for key in ('chrom', 'pos', 'ref', 'qual', 'alt', 'gt', 'phased'): expected = getattr(expt, key) result = getattr(got, key) self.assertEqual(expected, result, 'Merging failed for {}:{} {}.'.format(expt.chrom, expt.pos+1, key))
def test_002_check_merge_snps(self): ref_seq = 'ATGGTATGCGATTGACC' chrom = 'chrom1' h1 = [ Variant(chrom, 0, 'A', alt='C', qual=5, sample_dict={'GT': '1/1'}) ] h2 = [ Variant(chrom, 0, 'A', alt='T', qual=10, sample_dict={'GT': '1/1'}) ] expt = Variant(chrom, 0, 'A', alt=['C', 'T'], qual=7.5, sample_dict={'GT': '1/2'}) comb_interval, trees = self.intervaltree_prep(h1, h2, ref_seq) # preserve phase otherwise alts could be switched around got = _merge_variants(comb_interval, trees, ref_seq, discard_phase=False) for key in ('chrom', 'pos', 'ref', 'qual', 'alt', 'gt'): expected = getattr(expt, key) result = getattr(got, key) self.assertEqual( expected, result, 'Merging failed for {}:{} {}.'.format(expt.chrom, expt.pos + 1, key))
def test_004_check_merge_multi_bug(self): # if we have two indels on one haplotype that cancel each other out # (e.g. insertion of a T followed by a deletion of a T) # check we don't have an alt that is the same as the ref. ref_seq = 'TTTTTTTTTT' chrom = 'chrom1' h1 = [ Variant(chrom, 0, 'TTTTT', alt='T', qual=5, genotype_data={'GT': '1/1'}) ] h2 = [ Variant(chrom, 1, 'T', alt='TT', qual=10, genotype_data={'GT': '1/1'}), Variant(chrom, 3, 'TT', alt='T', qual=10, genotype_data={'GT': '1/1'}) ] expt = Variant(chrom, 0, 'TTTTT', alt='T', qual=5, genotype_data={'GT': '1|0'}) comb_interval, trees = self.intervaltree_prep(h1, h2, ref_seq) # preserve phase otherwise alts could be switched around got = _merge_variants(comb_interval, trees, ref_seq, discard_phase=False) for key in ('chrom', 'pos', 'ref', 'qual', 'alt', 'gt', 'phased'): expected = getattr(expt, key) result = getattr(got, key) self.assertEqual( expected, result, 'Merging failed for {}:{} {}.'.format(expt.chrom, expt.pos + 1, key))