コード例 #1
0
class MergeCoOccurVar(unittest.TestCase):
    def setUp(self):
        self.vcfio = FakeVCFIO(
            {
                "AF":
                HeaderInfoAttr("AF", "Alternative alleles frequencies",
                               "Float", "A")
            }, {
                "AD":
                HeaderFormatAttr("AD", "Alternative alleles depths", "Integer",
                                 "A"),
                "DP":
                HeaderFormatAttr("DP", "total depth", "Integer", "1")
            })
        self.ref_seq = "ACGCAAATCTCGGCATGCCGATT"
        #               | | | | | |  |  |  |  |
        #               1 3 5 7 9 11 14 17 20 23
        self.variant_1 = VCFRecord(
            "chr1",  # chrom
            None,  # pos
            "artificial_1",  # id
            None,  # ref
            None,  # alt
            10,  # qual
            ["lowQual", "lowDP"],  # filter
            {"AF": [0.05]},  # info
            ["DP", "AD"],  # format
            {
                "splA": {
                    "AD": [10],
                    "DP": 100
                },
                "splB": {
                    "AD": [40],
                    "DP": 4900
                },
            })
        self.variant_2 = VCFRecord(
            "chr1",  # chrom
            None,  # pos
            None,  # id
            None,  # ref
            None,  # alt
            30,  # qual
            ["PASS"],  # filter
            {"AF": [0.06]},  # info
            ["DP", "AD"],  # format
            {
                "splA": {
                    "AD": [5],
                    "DP": 50
                },
                "splB": {
                    "AD": [31],
                    "DP": 550
                },
            })
        self.expected_merge = VCFRecord(
            "chr1",  # chrom
            None,  # pos
            None,  # id
            None,  # ref
            None,  # alt
            20,  # qual
            ["lowQual", "lowDP"],  # filter
            {
                "AF": [0.06],
                "MCO_QUAL": [10, 30],
                "MCO_VAR": ["chr1:5=A/T", "chr1:20=G/C"]
            },  # info
            ["DP", "AD"],  # format
            {
                "splA": {
                    "AD": [5],
                    "DP": 50
                },
                "splB": {
                    "AD": [31],
                    "DP": 550
                },
            })

    def testMergedRecord_1_substit(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "A"
        self.variant_1.alt = ["T"]
        # Variant 2
        self.variant_2.pos = 20
        self.variant_2.ref = "G"
        self.variant_2.alt = ["C"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAATCTCGGCATGCCG"
        self.expected_merge.alt = ["TAATCTCGGCATGCCC"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=A/T", "chr1:20=G/C"]
        }
        # Eval
        observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                      self.variant_1.getName(), self.variant_2,
                                      self.variant_2.getName(), self.ref_seq)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_2_largeSubstit(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "AAAT"
        self.variant_1.alt = ["TGCA"]
        # Variant 2
        self.variant_2.pos = 10
        self.variant_2.ref = "TC"
        self.variant_2.alt = ["GG"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAATCTC"
        self.expected_merge.alt = ["TGCACGG"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=AAAT/TGCA", "chr1:10=TC/GG"]
        }
        # Eval
        observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                      self.variant_1.getName(), self.variant_2,
                                      self.variant_2.getName(), self.ref_seq)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_3_largeCloseSubstit(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "AAAT"
        self.variant_1.alt = ["TGCA"]
        # Variant 2
        self.variant_2.pos = 9
        self.variant_2.ref = "CT"
        self.variant_2.alt = ["GG"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAATCT"
        self.expected_merge.alt = ["TGCAGG"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=AAAT/TGCA", "chr1:9=CT/GG"]
        }
        # Eval
        observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                      self.variant_1.getName(), self.variant_2,
                                      self.variant_2.getName(), self.ref_seq)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_4_delIns(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "AAAT"
        self.variant_1.alt = ["-"]
        # Variant 2
        self.variant_2.pos = 10
        self.variant_2.ref = "-"
        self.variant_2.alt = ["GGCATCT"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAATC"
        self.expected_merge.alt = ["CGGCATCT"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=AAAT/-", "chr1:10=-/GGCATCT"]
        }
        # Eval
        observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                      self.variant_1.getName(), self.variant_2,
                                      self.variant_2.getName(), self.ref_seq)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_5_coDelIns(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "AAAT"
        self.variant_1.alt = ["-"]
        # Variant 2
        self.variant_2.pos = 9
        self.variant_2.ref = "-"
        self.variant_2.alt = ["AGG"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAAT"
        self.expected_merge.alt = ["AGG"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=AAAT/-", "chr1:9=-/AGG"]
        }
        # Eval
        observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                      self.variant_1.getName(), self.variant_2,
                                      self.variant_2.getName(), self.ref_seq)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_6_insDel(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "-"
        self.variant_1.alt = ["GTGTG"]
        # Variant 2
        self.variant_2.pos = 7
        self.variant_2.ref = "ATC"
        self.variant_2.alt = ["-"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAATC"
        self.expected_merge.alt = ["GTGTGAA"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:7=ATC/-"]
        }
        # Eval
        observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                      self.variant_1.getName(), self.variant_2,
                                      self.variant_2.getName(), self.ref_seq)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_7_closeInsDel(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "-"
        self.variant_1.alt = ["GTGTG"]
        # Variant 2
        self.variant_2.pos = 6
        self.variant_2.ref = "AA"
        self.variant_2.alt = ["-"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAA"
        self.expected_merge.alt = ["GTGTGA"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:6=AA/-"]
        }
        # Eval
        observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                      self.variant_1.getName(), self.variant_2,
                                      self.variant_2.getName(), self.ref_seq)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_8_coInsDel(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "-"
        self.variant_1.alt = ["GTGTG"]
        # Variant 2
        self.variant_2.pos = 5
        self.variant_2.ref = "AA"
        self.variant_2.alt = ["-"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AA"
        self.expected_merge.alt = ["GTGTG"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:5=AA/-"]
        }
        # Eval
        observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                      self.variant_1.getName(), self.variant_2,
                                      self.variant_2.getName(), self.ref_seq)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))
コード例 #2
0
class MergeCoOccurVar(unittest.TestCase):
    def setUp(self):
        # VCF
        self.vcfio = FakeVCFIO(
            {
                "AF":
                HeaderInfoAttr("AF", "Alternative alleles frequencies",
                               "Float", "A")
            }, {
                "AD":
                HeaderFormatAttr("AD", "Alternative alleles depths", "Integer",
                                 "A"),
                "DP":
                HeaderFormatAttr("DP", "total depth", "Integer", "1")
            })
        # Ref seq
        tmp_folder = tempfile.gettempdir()
        unique_id = str(uuid.uuid1())
        self.tmp_fasta_path = os.path.join(tmp_folder, unique_id + ".fa")
        self.tmp_faidx_path = os.path.join(tmp_folder, unique_id + ".fa.fai")
        self.ref_seq = "ACGCAAATCTCGGCATGCCGATT"
        #               | | | | | |  |  |  |  |
        #               1 3 5 7 9 11 14 17 20 23
        with open(self.tmp_fasta_path, "w") as FH_seq:
            FH_seq.write(">chr1\n{}".format(self.ref_seq))
        with open(self.tmp_faidx_path, "w") as FH_faidx:
            FH_faidx.write("chr1\t{}\t6\t60\t61".format(len(self.ref_seq)))
        # Variants
        self.variant_1 = VCFRecord(
            "chr1",  # chrom
            None,  # pos
            "artificial_1",  # id
            None,  # ref
            None,  # alt
            10,  # qual
            ["lowQual", "lowDP"],  # filter
            {"AF": [0.05]},  # info
            ["DP", "AD"],  # format
            {
                "splA": {
                    "AD": [10],
                    "DP": 100
                },
                "splB": {
                    "AD": [40],
                    "DP": 4900
                },
            })
        self.variant_2 = VCFRecord(
            "chr1",  # chrom
            None,  # pos
            None,  # id
            None,  # ref
            None,  # alt
            30,  # qual
            ["PASS"],  # filter
            {"AF": [0.06]},  # info
            ["DP", "AD"],  # format
            {
                "splA": {
                    "AD": [5],
                    "DP": 50
                },
                "splB": {
                    "AD": [31],
                    "DP": 550
                },
            })
        self.expected_merge = VCFRecord(
            "chr1",  # chrom
            None,  # pos
            None,  # id
            None,  # ref
            None,  # alt
            20,  # qual
            ["lowQual", "lowDP"],  # filter
            {
                "AF": [0.06],
                "MCO_QUAL": [10, 30],
                "MCO_VAR": ["chr1:5=A/T", "chr1:20=G/C"]
            },  # info
            ["DP", "AD"],  # format
            {
                "splA": {
                    "AD": [5],
                    "DP": 50
                },
                "splB": {
                    "AD": [31],
                    "DP": 550
                },
            })

    def tearDown(self):
        # Clean temporary files
        for curr_file in [self.tmp_fasta_path, self.tmp_faidx_path]:
            if os.path.exists(curr_file):
                os.remove(curr_file)

    def testMergedRecord_1_substit(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "A"
        self.variant_1.alt = ["T"]
        # Variant 2
        self.variant_2.pos = 20
        self.variant_2.ref = "G"
        self.variant_2.alt = ["C"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAATCTCGGCATGCCG"
        self.expected_merge.alt = ["TAATCTCGGCATGCCC"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=A/T", "chr1:20=G/C"]
        }
        # Eval
        with IdxFastaIO(self.tmp_fasta_path) as FH_ref:
            observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                          self.variant_1.getName(),
                                          self.variant_2,
                                          self.variant_2.getName(), FH_ref)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_2_largeSubstit(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "AAAT"
        self.variant_1.alt = ["TGCA"]
        # Variant 2
        self.variant_2.pos = 10
        self.variant_2.ref = "TC"
        self.variant_2.alt = ["GG"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAATCTC"
        self.expected_merge.alt = ["TGCACGG"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=AAAT/TGCA", "chr1:10=TC/GG"]
        }
        # Eval
        with IdxFastaIO(self.tmp_fasta_path) as FH_ref:
            observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                          self.variant_1.getName(),
                                          self.variant_2,
                                          self.variant_2.getName(), FH_ref)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_3_largeCloseSubstit(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "AAAT"
        self.variant_1.alt = ["TGCA"]
        # Variant 2
        self.variant_2.pos = 9
        self.variant_2.ref = "CT"
        self.variant_2.alt = ["GG"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAATCT"
        self.expected_merge.alt = ["TGCAGG"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=AAAT/TGCA", "chr1:9=CT/GG"]
        }
        # Eval
        with IdxFastaIO(self.tmp_fasta_path) as FH_ref:
            observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                          self.variant_1.getName(),
                                          self.variant_2,
                                          self.variant_2.getName(), FH_ref)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_4_delIns(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "AAAT"
        self.variant_1.alt = ["-"]
        # Variant 2
        self.variant_2.pos = 10
        self.variant_2.ref = "-"
        self.variant_2.alt = ["GGCATCT"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAATC"
        self.expected_merge.alt = ["CGGCATCT"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=AAAT/-", "chr1:10=-/GGCATCT"]
        }
        # Eval
        with IdxFastaIO(self.tmp_fasta_path) as FH_ref:
            observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                          self.variant_1.getName(),
                                          self.variant_2,
                                          self.variant_2.getName(), FH_ref)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_5_coDelIns(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "AAAT"
        self.variant_1.alt = ["-"]
        # Variant 2
        self.variant_2.pos = 9
        self.variant_2.ref = "-"
        self.variant_2.alt = ["AGG"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAAT"
        self.expected_merge.alt = ["AGG"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=AAAT/-", "chr1:9=-/AGG"]
        }
        # Eval
        with IdxFastaIO(self.tmp_fasta_path) as FH_ref:
            observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                          self.variant_1.getName(),
                                          self.variant_2,
                                          self.variant_2.getName(), FH_ref)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_6_insDel(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "-"
        self.variant_1.alt = ["GTGTG"]
        # Variant 2
        self.variant_2.pos = 7
        self.variant_2.ref = "ATC"
        self.variant_2.alt = ["-"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAATC"
        self.expected_merge.alt = ["GTGTGAA"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:7=ATC/-"]
        }
        # Eval
        with IdxFastaIO(self.tmp_fasta_path) as FH_ref:
            observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                          self.variant_1.getName(),
                                          self.variant_2,
                                          self.variant_2.getName(), FH_ref)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_7_closeInsDel(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "-"
        self.variant_1.alt = ["GTGTG"]
        # Variant 2
        self.variant_2.pos = 6
        self.variant_2.ref = "AA"
        self.variant_2.alt = ["-"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AAA"
        self.expected_merge.alt = ["GTGTGA"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:6=AA/-"]
        }
        # Eval
        with IdxFastaIO(self.tmp_fasta_path) as FH_ref:
            observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                          self.variant_1.getName(),
                                          self.variant_2,
                                          self.variant_2.getName(), FH_ref)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))

    def testMergedRecord_8_coInsDel(self):
        # Variant 1
        self.variant_1.pos = 5
        self.variant_1.ref = "-"
        self.variant_1.alt = ["GTGTG"]
        # Variant 2
        self.variant_2.pos = 5
        self.variant_2.ref = "AA"
        self.variant_2.alt = ["-"]
        # Expected merge
        self.expected_merge.pos = 5
        self.expected_merge.ref = "AA"
        self.expected_merge.alt = ["GTGTG"]
        self.expected_merge.info = {
            "AF": [0.06],
            "MCO_QUAL": [10, 30],
            "MCO_VAR": ["chr1:5=-/GTGTG", "chr1:5=AA/-"]
        }
        # Eval
        with IdxFastaIO(self.tmp_fasta_path) as FH_ref:
            observed_merge = mergedRecord(self.vcfio, self.variant_1,
                                          self.variant_1.getName(),
                                          self.variant_2,
                                          self.variant_2.getName(), FH_ref)
        self.assertEqual(strVariant(observed_merge),
                         strVariant(self.expected_merge))