def test_should_parse_empty_file(self): lines = [ '##fileformat=VCFv4.2\n', '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n', ] reader = VCFReader(iter(lines)) record_count = len(list(reader.read_records())) self.assertEqual(0, record_count)
def test_should_parse_single_record(self): lines = [ '##fileformat=VCFv4.2\n', '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n', 'chr0\t0\t.\tP\tQ\t0\tPASS\t\n', ] reader = VCFReader(iter(lines)) record_count = len(list(reader.read_records())) self.assertEqual(1, record_count)
def test_should_parse_header_when_parsing_records(self): lines = [ '##fileformat=VCFv4.2\n', '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n', 'chr0\t0\t.\tP\tQ\t0\tPASS\t\n', ] reader = VCFReader(iter(lines)) self.assertIsNone(reader.header) list(reader.read_records()) self.assertIsNotNone(reader.header)
def test_read_VCF_line(self): with open(os.path.join(self.data_dir, "vcf_example.vcf"), "r") as vcf_file: vcf_handler = VCFReader(vcf_file) vcf_handler.read_header() self.assertEqual(len(vcf_handler.header.file_metadata), 7) self.assertEqual(len(vcf_handler.header.samples), 2) records = list(vcf_handler.read_records()) self.assertEqual(len(records), 2) # test first record fully self.variant_is_equal(records[0], ("20", 9, set(), "CT", "C")) # zero=based representation self.assertEqual(records[0].filters, set()) self.assertEqual(records[0].passes_filter, True) self.assertEqual(len(records[0].info), 12) self.assertEqual(records[0].info["PP"], [3000]) self.assertEqual(records[0].info["DP"], [250]) self.assertEqual(records[0].info["DPR"], [140]) self.assertEqual(records[0].info["DPF"], [110]) self.assertEqual(records[0].info["VC"], [100]) self.assertEqual(records[0].info["VCR"], [49]) self.assertEqual(records[0].info["VCF"], [51]) self.assertEqual(records[0].info["ABPV"], [0.2]) self.assertEqual(records[0].info["SBPV"], [0.3]) self.assertEqual(records[0].info["MQ"], [70]) self.assertEqual(records[0].info["BR"], [31]) self.assertEqual(records[0].info["QD"], [None]) self.assertEqual(records[0].samples, ['sample1', 'sample2']) self.assertEqual(records[0].sample_info.get_field('sample1', "GT"), GenotypeCall("0/1")) self.assertEqual(records[0].sample_info.get_field('sample2', "GT"), GenotypeCall("1/1")) self.assertEqual(records[0].sample_info.get_field('sample1', 'PL'), [3000, 0, 3000]) self.assertEqual(records[0].sample_info.get_field('sample2', 'PL'), [114, 0, 0]) self.assertEqual(records[0].sample_info.get_field('sample1', 'GQ'), [1000]) self.assertEqual(records[0].sample_info.get_field('sample2', 'GQ'), [None]) # check that ordering in the dictionaries is preserved expected_keys = ["PP", "DP", "DPR", "DPF", "VC", "VCR", "VCF", "ABPV", "SBPV", "MQ", "BR", "QD"] self.assertEqual(list(records[0].info.keys()), expected_keys) # ensure last record is still being read correctly self.variant_is_equal(records[-1], ("20", 10, set(), "T", "G"))