class MergeCoOccurVar(unittest.TestCase): def setUp(self): self.vcfio = FakeVCFIO( { "AF": HeaderInfoAttr("AF", "Alternative alleles frequencies", "Float", "A") }, { "AD": HeaderFormatAttr("AD", "Alternative alleles depths", "Integer", "A"), "DP": HeaderFormatAttr("DP", "total depth", "Integer", "1") }) self.ref_seq = "ACGCAAATCTCGGCATGCCGATT" # | | | | | | | | | | # 1 3 5 7 9 11 14 17 20 23 self.variant_1 = VCFRecord( "chr1", # chrom None, # pos "artificial_1", # id None, # ref None, # alt 10, # qual ["lowQual", "lowDP"], # filter {"AF": [0.05]}, # info ["DP", "AD"], # format { "splA": { "AD": [10], "DP": 100 }, "splB": { "AD": [40], "DP": 4900 }, }) self.variant_2 = VCFRecord( "chr1", # chrom None, # pos None, # id None, # ref None, # alt 30, # qual ["PASS"], # filter {"AF": [0.06]}, # info ["DP", "AD"], # format { "splA": { "AD": [5], "DP": 50 }, "splB": { "AD": [31], "DP": 550 }, }) self.expected_merge = VCFRecord( "chr1", # chrom None, # pos None, # id None, # ref None, # alt 20, # qual ["lowQual", "lowDP"], # filter { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=A/T", "chr1:20=G/C"] }, # info ["DP", "AD"], # format { "splA": { "AD": [5], "DP": 50 }, "splB": { "AD": [31], "DP": 550 }, }) def testMergedRecord_1_substit(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "A" self.variant_1.alt = ["T"] # Variant 2 self.variant_2.pos = 20 self.variant_2.ref = "G" self.variant_2.alt = ["C"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAATCTCGGCATGCCG" self.expected_merge.alt = ["TAATCTCGGCATGCCC"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=A/T", "chr1:20=G/C"] } # Eval observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), self.ref_seq) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_2_largeSubstit(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "AAAT" self.variant_1.alt = ["TGCA"] # Variant 2 self.variant_2.pos = 10 self.variant_2.ref = "TC" self.variant_2.alt = ["GG"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAATCTC" self.expected_merge.alt = ["TGCACGG"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=AAAT/TGCA", "chr1:10=TC/GG"] } # Eval observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), self.ref_seq) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_3_largeCloseSubstit(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "AAAT" self.variant_1.alt = ["TGCA"] # Variant 2 self.variant_2.pos = 9 self.variant_2.ref = "CT" self.variant_2.alt = ["GG"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAATCT" self.expected_merge.alt = ["TGCAGG"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=AAAT/TGCA", "chr1:9=CT/GG"] } # Eval observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), self.ref_seq) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_4_delIns(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "AAAT" self.variant_1.alt = ["-"] # Variant 2 self.variant_2.pos = 10 self.variant_2.ref = "-" self.variant_2.alt = ["GGCATCT"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAATC" self.expected_merge.alt = ["CGGCATCT"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=AAAT/-", "chr1:10=-/GGCATCT"] } # Eval observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), self.ref_seq) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_5_coDelIns(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "AAAT" self.variant_1.alt = ["-"] # Variant 2 self.variant_2.pos = 9 self.variant_2.ref = "-" self.variant_2.alt = ["AGG"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAAT" self.expected_merge.alt = ["AGG"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=AAAT/-", "chr1:9=-/AGG"] } # Eval observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), self.ref_seq) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_6_insDel(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "-" self.variant_1.alt = ["GTGTG"] # Variant 2 self.variant_2.pos = 7 self.variant_2.ref = "ATC" self.variant_2.alt = ["-"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAATC" self.expected_merge.alt = ["GTGTGAA"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:7=ATC/-"] } # Eval observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), self.ref_seq) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_7_closeInsDel(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "-" self.variant_1.alt = ["GTGTG"] # Variant 2 self.variant_2.pos = 6 self.variant_2.ref = "AA" self.variant_2.alt = ["-"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAA" self.expected_merge.alt = ["GTGTGA"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:6=AA/-"] } # Eval observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), self.ref_seq) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_8_coInsDel(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "-" self.variant_1.alt = ["GTGTG"] # Variant 2 self.variant_2.pos = 5 self.variant_2.ref = "AA" self.variant_2.alt = ["-"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AA" self.expected_merge.alt = ["GTGTG"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:5=AA/-"] } # Eval observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), self.ref_seq) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge))
class MergeCoOccurVar(unittest.TestCase): def setUp(self): # VCF self.vcfio = FakeVCFIO( { "AF": HeaderInfoAttr("AF", "Alternative alleles frequencies", "Float", "A") }, { "AD": HeaderFormatAttr("AD", "Alternative alleles depths", "Integer", "A"), "DP": HeaderFormatAttr("DP", "total depth", "Integer", "1") }) # Ref seq tmp_folder = tempfile.gettempdir() unique_id = str(uuid.uuid1()) self.tmp_fasta_path = os.path.join(tmp_folder, unique_id + ".fa") self.tmp_faidx_path = os.path.join(tmp_folder, unique_id + ".fa.fai") self.ref_seq = "ACGCAAATCTCGGCATGCCGATT" # | | | | | | | | | | # 1 3 5 7 9 11 14 17 20 23 with open(self.tmp_fasta_path, "w") as FH_seq: FH_seq.write(">chr1\n{}".format(self.ref_seq)) with open(self.tmp_faidx_path, "w") as FH_faidx: FH_faidx.write("chr1\t{}\t6\t60\t61".format(len(self.ref_seq))) # Variants self.variant_1 = VCFRecord( "chr1", # chrom None, # pos "artificial_1", # id None, # ref None, # alt 10, # qual ["lowQual", "lowDP"], # filter {"AF": [0.05]}, # info ["DP", "AD"], # format { "splA": { "AD": [10], "DP": 100 }, "splB": { "AD": [40], "DP": 4900 }, }) self.variant_2 = VCFRecord( "chr1", # chrom None, # pos None, # id None, # ref None, # alt 30, # qual ["PASS"], # filter {"AF": [0.06]}, # info ["DP", "AD"], # format { "splA": { "AD": [5], "DP": 50 }, "splB": { "AD": [31], "DP": 550 }, }) self.expected_merge = VCFRecord( "chr1", # chrom None, # pos None, # id None, # ref None, # alt 20, # qual ["lowQual", "lowDP"], # filter { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=A/T", "chr1:20=G/C"] }, # info ["DP", "AD"], # format { "splA": { "AD": [5], "DP": 50 }, "splB": { "AD": [31], "DP": 550 }, }) def tearDown(self): # Clean temporary files for curr_file in [self.tmp_fasta_path, self.tmp_faidx_path]: if os.path.exists(curr_file): os.remove(curr_file) def testMergedRecord_1_substit(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "A" self.variant_1.alt = ["T"] # Variant 2 self.variant_2.pos = 20 self.variant_2.ref = "G" self.variant_2.alt = ["C"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAATCTCGGCATGCCG" self.expected_merge.alt = ["TAATCTCGGCATGCCC"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=A/T", "chr1:20=G/C"] } # Eval with IdxFastaIO(self.tmp_fasta_path) as FH_ref: observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), FH_ref) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_2_largeSubstit(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "AAAT" self.variant_1.alt = ["TGCA"] # Variant 2 self.variant_2.pos = 10 self.variant_2.ref = "TC" self.variant_2.alt = ["GG"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAATCTC" self.expected_merge.alt = ["TGCACGG"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=AAAT/TGCA", "chr1:10=TC/GG"] } # Eval with IdxFastaIO(self.tmp_fasta_path) as FH_ref: observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), FH_ref) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_3_largeCloseSubstit(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "AAAT" self.variant_1.alt = ["TGCA"] # Variant 2 self.variant_2.pos = 9 self.variant_2.ref = "CT" self.variant_2.alt = ["GG"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAATCT" self.expected_merge.alt = ["TGCAGG"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=AAAT/TGCA", "chr1:9=CT/GG"] } # Eval with IdxFastaIO(self.tmp_fasta_path) as FH_ref: observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), FH_ref) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_4_delIns(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "AAAT" self.variant_1.alt = ["-"] # Variant 2 self.variant_2.pos = 10 self.variant_2.ref = "-" self.variant_2.alt = ["GGCATCT"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAATC" self.expected_merge.alt = ["CGGCATCT"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=AAAT/-", "chr1:10=-/GGCATCT"] } # Eval with IdxFastaIO(self.tmp_fasta_path) as FH_ref: observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), FH_ref) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_5_coDelIns(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "AAAT" self.variant_1.alt = ["-"] # Variant 2 self.variant_2.pos = 9 self.variant_2.ref = "-" self.variant_2.alt = ["AGG"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAAT" self.expected_merge.alt = ["AGG"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=AAAT/-", "chr1:9=-/AGG"] } # Eval with IdxFastaIO(self.tmp_fasta_path) as FH_ref: observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), FH_ref) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_6_insDel(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "-" self.variant_1.alt = ["GTGTG"] # Variant 2 self.variant_2.pos = 7 self.variant_2.ref = "ATC" self.variant_2.alt = ["-"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAATC" self.expected_merge.alt = ["GTGTGAA"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:7=ATC/-"] } # Eval with IdxFastaIO(self.tmp_fasta_path) as FH_ref: observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), FH_ref) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_7_closeInsDel(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "-" self.variant_1.alt = ["GTGTG"] # Variant 2 self.variant_2.pos = 6 self.variant_2.ref = "AA" self.variant_2.alt = ["-"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AAA" self.expected_merge.alt = ["GTGTGA"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:6=AA/-"] } # Eval with IdxFastaIO(self.tmp_fasta_path) as FH_ref: observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), FH_ref) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge)) def testMergedRecord_8_coInsDel(self): # Variant 1 self.variant_1.pos = 5 self.variant_1.ref = "-" self.variant_1.alt = ["GTGTG"] # Variant 2 self.variant_2.pos = 5 self.variant_2.ref = "AA" self.variant_2.alt = ["-"] # Expected merge self.expected_merge.pos = 5 self.expected_merge.ref = "AA" self.expected_merge.alt = ["GTGTG"] self.expected_merge.info = { "AF": [0.06], "MCO_QUAL": [10, 30], "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:5=AA/-"] } # Eval with IdxFastaIO(self.tmp_fasta_path) as FH_ref: observed_merge = mergedRecord(self.vcfio, self.variant_1, self.variant_1.getName(), self.variant_2, self.variant_2.getName(), FH_ref) self.assertEqual(strVariant(observed_merge), strVariant(self.expected_merge))