def test_vcf_records_raisesTypeErrorWhenClosed(self): file_contents = [ "##metaheader1\n", self.entab("#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"), self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"), ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) reader = VcfReader(mock_reader) record_iter = reader.vcf_records() self.assertRaises(TypeError, next, record_iter)
def test_vcf_records_raisesTypeErrorWhenClosed(self): file_contents = [ "##metaheader1\n", self.entab( "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n" ), self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n") ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) reader = VcfReader(mock_reader) record_iter = reader.vcf_records() self.assertRaises(TypeError, next, record_iter)
def test_get_format_tag_list(self): file_contents = [ '##FORMAT=<ID=GT,Number=1>\n', '##FORMAT=<ID=GQ,Number=1,Description="bar">\n', '#columnHeader\n', 'record1\n', 'record2' ] mock_file_reader = MockFileReader("my_dir/my_file.txt", file_contents) vcf_reader = VcfReader(mock_file_reader) actual_format_set = vcf_reader.format_metaheaders self.assertEquals(["GQ", "GT"], sorted(actual_format_set.keys()))
def test_init_sampleNamesInitialized(self): file_contents = [ "##metaheader1\n", "##metaheader2\n", self.entab( "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleA|SampleB\n" ), "record1\n", "record2" ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) actual_vcf_reader = VcfReader(mock_reader) self.assertEquals(["SampleA", "SampleB"], actual_vcf_reader.sample_names)
def test_sort_delegatesToFileReader(self): _FILE_CONTENTS = [ "##FORMAT=<ID=DP,Number=1,Type=Integer,Description='Read Depth'>\n", self.entab( "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n" ), ] class ReversedSortMockFileReader(MockFileReader): def __init__(self, filename): MockFileReader.__init__(self, filename, _FILE_CONTENTS) self.filename = filename def __lt__(self, other): return self.filename > other.filename reader1 = VcfReader(ReversedSortMockFileReader("1.txt")) reader2 = VcfReader(ReversedSortMockFileReader("2.txt")) reader3 = VcfReader(ReversedSortMockFileReader("3.txt")) actual_readers = sorted([reader1, reader2, reader3]) self.assertEquals([reader3, reader2, reader1], actual_readers)
def test_format_tag_ids_emptyWhenNoFormatTags(self): file_contents = [ "##metaheader1\n", "##INFO=<ID=AF,Number=A,Type=Float,Description='Allele Frequency'>\n", self.entab( "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n" ), self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"), self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR") ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) reader = VcfReader(mock_reader) self.assertEquals(0, len(reader.format_metaheaders))
def test_get_info_field_list(self): file_contents = [ '##INFO=<ID=AF,Number=1>\n', '##FORMAT=<ID=GQ,Number=1,Description="bar">\n', '##INFO=<ID=AA,Number=1>\n', '#columnHeader\n', 'record1\n', 'record2' ] mock_file_reader = MockFileReader("my_dir/my_file.txt", file_contents) vcf_reader = VcfReader(mock_file_reader) actual_format_set = vcf_reader.info_metaheaders expected_format_set = ["AA", "AF"] self.assertEquals(expected_format_set, sorted(actual_format_set.keys()))
def test_format_tag_ids_immutable(self): file_contents = [ "##metaheader1\n", "##FORMAT=<ID=DP,Number=1,Type=Integer,Description='Read Depth'>\n", self.entab( "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n" ), self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"), self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR") ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) reader = VcfReader(mock_reader) self.assertEquals(["DP"], sorted(reader.format_metaheaders.keys())) del reader.format_metaheaders["DP"] self.assertEquals(["DP"], sorted(reader.format_metaheaders.keys()))
def test_format_tag_ids_ignoresRelatedFieldNames(self): file_contents = [ "##metaheader1\n", "##FORMAT=<UUID=DPX1,ID=DP1>\n", "##FORMAT=<ID=DP2,UUID=DPX2>\n", "##FORMAT=<ID=DP3,ID=DPX3>\n", "##FORMAT=<NOID=DPX>\n", self.entab( "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n" ), self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"), self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR") ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) reader = VcfReader(mock_reader) self.assertEquals(set(["DP1", "DP2", "DP3"]), set(reader.format_metaheaders.keys()))
def test_init(self): file_contents = [ "##metaheader1\n", "##metaheader2\n", "#columnHeader\n", "record1\n", "record2" ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) actual_vcf_reader = VcfReader(mock_reader) self.assertEquals("my_dir/my_file.txt", actual_vcf_reader.input_filepath) self.assertEquals("my_file.txt", actual_vcf_reader.file_name) self.assertEquals("#columnHeader", actual_vcf_reader.column_header) self.assertEquals(["##metaheader1", "##metaheader2"], actual_vcf_reader.metaheaders) self.assertEquals([], actual_vcf_reader.sample_names)
def test_format_tag_ids_idsAreUnique(self): file_contents = [ "##metaheader1\n", "##FORMAT=<ID=AF,Description='Allele Frequency 1'>\n", "##FORMAT=<ID=AF,Description='Allele Frequency 2'>\n", self.entab( "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n" ), self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"), self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR") ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) reader = VcfReader(mock_reader) self.assertEquals(["AF"], sorted(reader.format_metaheaders.keys())) self.assertEquals("##FORMAT=<ID=AF,Description='Allele Frequency 2'>", reader.format_metaheaders["AF"])
def test_format_metaheaders(self): file_contents = [ "##metaheader1\n", "##FORMAT=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">\n", "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read Depth\">\n", self.entab( "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n" ), self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR") ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) reader = VcfReader(mock_reader) expected_metaheaders = { "AF": '##FORMAT=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">', "DP": '##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">' } self.assertEquals(expected_metaheaders, reader.format_metaheaders)
def test_filter_metaheaders(self): file_contents = [ "##metaheader1\n", "##FORMAT=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">\n", "##INFO=<ID=SNP,Number=1,Type=Integer,Description=\"snp\">\n", "##FILTER=<ID=.,Number=1,Type=Integer,Description=\"foo\">\n", "##FILTER=<ID=PASS,Number=1,Type=Integer,Description=\"bar\">\n", self.entab( "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n" ), self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"), self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR") ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) reader = VcfReader(mock_reader) expected_metaheaders = { ".": "##FILTER=<ID=.,Number=1,Type=Integer,Description=\"foo\">", "PASS": "******"bar\">", } self.assertEquals(expected_metaheaders, reader.filter_metaheaders)
def test_vcf_records(self): file_contents = [ "##metaheader1\n", "##metaheader2\n", self.entab( "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n" ), self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"), self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR") ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) reader = VcfReader(mock_reader) actual_vcf_records = [] reader.open() for vcf_record in reader.vcf_records(): actual_vcf_records.append(vcf_record) reader.close() self.assertEquals(2, len(actual_vcf_records)) self.assertEquals('chr1', actual_vcf_records[0].chrom) self.assertEquals('chr2', actual_vcf_records[1].chrom) self.assertTrue(mock_reader.open_was_called) self.assertTrue(mock_reader.close_was_called)
def test_vcf_records(self): file_contents = [ "##metaheader1\n", "##metaheader2\n", self.entab("#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"), self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"), self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR"), ] mock_reader = MockFileReader("my_dir/my_file.txt", file_contents) reader = VcfReader(mock_reader) actual_vcf_records = [] reader.open() for vcf_record in reader.vcf_records(): actual_vcf_records.append(vcf_record) reader.close() self.assertEquals(2, len(actual_vcf_records)) self.assertEquals("chr1", actual_vcf_records[0].chrom) self.assertEquals("chr2", actual_vcf_records[1].chrom) self.assertTrue(mock_reader.open_was_called) self.assertTrue(mock_reader.close_was_called)