Example #1
0
    def test_should_parse_empty_file(self):
        lines = [
            '##fileformat=VCFv4.2\n',
            '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n',
        ]
        reader = VCFReader(iter(lines))

        record_count = len(list(reader.read_records()))

        self.assertEqual(0, record_count)
Example #2
0
    def test_should_parse_single_record(self):
        lines = [
            '##fileformat=VCFv4.2\n',
            '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n',
            'chr0\t0\t.\tP\tQ\t0\tPASS\t\n',
        ]
        reader = VCFReader(iter(lines))

        record_count = len(list(reader.read_records()))

        self.assertEqual(1, record_count)
Example #3
0
    def test_should_parse_header_when_parsing_records(self):
        lines = [
            '##fileformat=VCFv4.2\n',
            '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n',
            'chr0\t0\t.\tP\tQ\t0\tPASS\t\n',
        ]
        reader = VCFReader(iter(lines))

        self.assertIsNone(reader.header)
        list(reader.read_records())
        self.assertIsNotNone(reader.header)
Example #4
0
    def test_read_VCF_line(self):
        with open(os.path.join(self.data_dir, "vcf_example.vcf"), "r") as vcf_file:
            vcf_handler = VCFReader(vcf_file)
            vcf_handler.read_header()
            self.assertEqual(len(vcf_handler.header.file_metadata), 7)
            self.assertEqual(len(vcf_handler.header.samples), 2)

            records = list(vcf_handler.read_records())
            self.assertEqual(len(records), 2)

            # test first record fully
            self.variant_is_equal(records[0], ("20", 9, set(), "CT", "C"))  # zero=based representation
            self.assertEqual(records[0].filters, set())
            self.assertEqual(records[0].passes_filter, True)

            self.assertEqual(len(records[0].info), 12)
            self.assertEqual(records[0].info["PP"], [3000])
            self.assertEqual(records[0].info["DP"], [250])
            self.assertEqual(records[0].info["DPR"], [140])
            self.assertEqual(records[0].info["DPF"], [110])
            self.assertEqual(records[0].info["VC"], [100])
            self.assertEqual(records[0].info["VCR"], [49])
            self.assertEqual(records[0].info["VCF"], [51])
            self.assertEqual(records[0].info["ABPV"], [0.2])
            self.assertEqual(records[0].info["SBPV"], [0.3])
            self.assertEqual(records[0].info["MQ"], [70])
            self.assertEqual(records[0].info["BR"], [31])
            self.assertEqual(records[0].info["QD"], [None])

            self.assertEqual(records[0].samples, ['sample1', 'sample2'])
            self.assertEqual(records[0].sample_info.get_field('sample1', "GT"), GenotypeCall("0/1"))
            self.assertEqual(records[0].sample_info.get_field('sample2', "GT"), GenotypeCall("1/1"))

            self.assertEqual(records[0].sample_info.get_field('sample1', 'PL'), [3000, 0, 3000])
            self.assertEqual(records[0].sample_info.get_field('sample2', 'PL'), [114, 0, 0])

            self.assertEqual(records[0].sample_info.get_field('sample1', 'GQ'), [1000])
            self.assertEqual(records[0].sample_info.get_field('sample2', 'GQ'), [None])

            # check that ordering in the dictionaries is preserved
            expected_keys = ["PP", "DP", "DPR", "DPF", "VC", "VCR",
                             "VCF", "ABPV", "SBPV", "MQ", "BR", "QD"]

            self.assertEqual(list(records[0].info.keys()), expected_keys)

            # ensure last record is still being read correctly
            self.variant_is_equal(records[-1], ("20", 10, set(), "T", "G"))