예제 #1
0
    def test_vcf_records_raisesTypeErrorWhenClosed(self):
        file_contents = [
            "##metaheader1\n",
            self.entab("#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"),
            self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"),
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)
        reader = VcfReader(mock_reader)

        record_iter = reader.vcf_records()
        self.assertRaises(TypeError, next, record_iter)
예제 #2
0
    def test_vcf_records_raisesTypeErrorWhenClosed(self):
        file_contents = [
            "##metaheader1\n",
            self.entab(
                "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"
            ),
            self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n")
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)
        reader = VcfReader(mock_reader)

        record_iter = reader.vcf_records()
        self.assertRaises(TypeError, next, record_iter)
예제 #3
0
    def test_get_format_tag_list(self):
        file_contents = [
            '##FORMAT=<ID=GT,Number=1>\n',
            '##FORMAT=<ID=GQ,Number=1,Description="bar">\n', '#columnHeader\n',
            'record1\n', 'record2'
        ]
        mock_file_reader = MockFileReader("my_dir/my_file.txt", file_contents)

        vcf_reader = VcfReader(mock_file_reader)
        actual_format_set = vcf_reader.format_metaheaders
        self.assertEquals(["GQ", "GT"], sorted(actual_format_set.keys()))
예제 #4
0
    def test_init_sampleNamesInitialized(self):
        file_contents = [
            "##metaheader1\n", "##metaheader2\n",
            self.entab(
                "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleA|SampleB\n"
            ), "record1\n", "record2"
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)

        actual_vcf_reader = VcfReader(mock_reader)
        self.assertEquals(["SampleA", "SampleB"],
                          actual_vcf_reader.sample_names)
예제 #5
0
    def test_sort_delegatesToFileReader(self):
        _FILE_CONTENTS = [
            "##FORMAT=<ID=DP,Number=1,Type=Integer,Description='Read Depth'>\n",
            self.entab(
                "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"
            ),
        ]

        class ReversedSortMockFileReader(MockFileReader):
            def __init__(self, filename):
                MockFileReader.__init__(self, filename, _FILE_CONTENTS)
                self.filename = filename

            def __lt__(self, other):
                return self.filename > other.filename

        reader1 = VcfReader(ReversedSortMockFileReader("1.txt"))
        reader2 = VcfReader(ReversedSortMockFileReader("2.txt"))
        reader3 = VcfReader(ReversedSortMockFileReader("3.txt"))

        actual_readers = sorted([reader1, reader2, reader3])

        self.assertEquals([reader3, reader2, reader1], actual_readers)
예제 #6
0
    def test_format_tag_ids_emptyWhenNoFormatTags(self):
        file_contents = [
            "##metaheader1\n",
            "##INFO=<ID=AF,Number=A,Type=Float,Description='Allele Frequency'>\n",
            self.entab(
                "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"
            ),
            self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"),
            self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR")
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)
        reader = VcfReader(mock_reader)

        self.assertEquals(0, len(reader.format_metaheaders))
예제 #7
0
    def test_get_info_field_list(self):
        file_contents = [
            '##INFO=<ID=AF,Number=1>\n',
            '##FORMAT=<ID=GQ,Number=1,Description="bar">\n',
            '##INFO=<ID=AA,Number=1>\n', '#columnHeader\n', 'record1\n',
            'record2'
        ]
        mock_file_reader = MockFileReader("my_dir/my_file.txt", file_contents)

        vcf_reader = VcfReader(mock_file_reader)
        actual_format_set = vcf_reader.info_metaheaders
        expected_format_set = ["AA", "AF"]

        self.assertEquals(expected_format_set,
                          sorted(actual_format_set.keys()))
예제 #8
0
    def test_format_tag_ids_immutable(self):
        file_contents = [
            "##metaheader1\n",
            "##FORMAT=<ID=DP,Number=1,Type=Integer,Description='Read Depth'>\n",
            self.entab(
                "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"
            ),
            self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"),
            self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR")
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)
        reader = VcfReader(mock_reader)

        self.assertEquals(["DP"], sorted(reader.format_metaheaders.keys()))
        del reader.format_metaheaders["DP"]
        self.assertEquals(["DP"], sorted(reader.format_metaheaders.keys()))
예제 #9
0
    def test_format_tag_ids_ignoresRelatedFieldNames(self):
        file_contents = [
            "##metaheader1\n", "##FORMAT=<UUID=DPX1,ID=DP1>\n",
            "##FORMAT=<ID=DP2,UUID=DPX2>\n", "##FORMAT=<ID=DP3,ID=DPX3>\n",
            "##FORMAT=<NOID=DPX>\n",
            self.entab(
                "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"
            ),
            self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"),
            self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR")
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)
        reader = VcfReader(mock_reader)

        self.assertEquals(set(["DP1", "DP2", "DP3"]),
                          set(reader.format_metaheaders.keys()))
예제 #10
0
    def test_init(self):
        file_contents = [
            "##metaheader1\n", "##metaheader2\n", "#columnHeader\n",
            "record1\n", "record2"
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)

        actual_vcf_reader = VcfReader(mock_reader)

        self.assertEquals("my_dir/my_file.txt",
                          actual_vcf_reader.input_filepath)
        self.assertEquals("my_file.txt", actual_vcf_reader.file_name)
        self.assertEquals("#columnHeader", actual_vcf_reader.column_header)
        self.assertEquals(["##metaheader1", "##metaheader2"],
                          actual_vcf_reader.metaheaders)
        self.assertEquals([], actual_vcf_reader.sample_names)
예제 #11
0
    def test_format_tag_ids_idsAreUnique(self):
        file_contents = [
            "##metaheader1\n",
            "##FORMAT=<ID=AF,Description='Allele Frequency 1'>\n",
            "##FORMAT=<ID=AF,Description='Allele Frequency 2'>\n",
            self.entab(
                "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"
            ),
            self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"),
            self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR")
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)
        reader = VcfReader(mock_reader)

        self.assertEquals(["AF"], sorted(reader.format_metaheaders.keys()))
        self.assertEquals("##FORMAT=<ID=AF,Description='Allele Frequency 2'>",
                          reader.format_metaheaders["AF"])
예제 #12
0
    def test_format_metaheaders(self):
        file_contents = [
            "##metaheader1\n",
            "##FORMAT=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">\n",
            "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read Depth\">\n",
            self.entab(
                "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"
            ),
            self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR")
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)
        reader = VcfReader(mock_reader)

        expected_metaheaders = {
            "AF":
            '##FORMAT=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">',
            "DP":
            '##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">'
        }

        self.assertEquals(expected_metaheaders, reader.format_metaheaders)
예제 #13
0
    def test_filter_metaheaders(self):
        file_contents = [
            "##metaheader1\n",
            "##FORMAT=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">\n",
            "##INFO=<ID=SNP,Number=1,Type=Integer,Description=\"snp\">\n",
            "##FILTER=<ID=.,Number=1,Type=Integer,Description=\"foo\">\n",
            "##FILTER=<ID=PASS,Number=1,Type=Integer,Description=\"bar\">\n",
            self.entab(
                "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"
            ),
            self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"),
            self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR")
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)
        reader = VcfReader(mock_reader)

        expected_metaheaders = {
            ".": "##FILTER=<ID=.,Number=1,Type=Integer,Description=\"foo\">",
            "PASS":
            "******"bar\">",
        }

        self.assertEquals(expected_metaheaders, reader.filter_metaheaders)
예제 #14
0
    def test_vcf_records(self):
        file_contents = [
            "##metaheader1\n", "##metaheader2\n",
            self.entab(
                "#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"
            ),
            self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"),
            self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR")
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)
        reader = VcfReader(mock_reader)

        actual_vcf_records = []
        reader.open()
        for vcf_record in reader.vcf_records():
            actual_vcf_records.append(vcf_record)
        reader.close()

        self.assertEquals(2, len(actual_vcf_records))
        self.assertEquals('chr1', actual_vcf_records[0].chrom)
        self.assertEquals('chr2', actual_vcf_records[1].chrom)
        self.assertTrue(mock_reader.open_was_called)
        self.assertTrue(mock_reader.close_was_called)
예제 #15
0
    def test_vcf_records(self):
        file_contents = [
            "##metaheader1\n",
            "##metaheader2\n",
            self.entab("#CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FORMAT|SampleNormal|SampleTumor\n"),
            self.entab("chr1|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR\n"),
            self.entab("chr2|1|.|A|C|.|.|INFO|FORMAT|NORMAL|TUMOR"),
        ]
        mock_reader = MockFileReader("my_dir/my_file.txt", file_contents)
        reader = VcfReader(mock_reader)

        actual_vcf_records = []
        reader.open()
        for vcf_record in reader.vcf_records():
            actual_vcf_records.append(vcf_record)
        reader.close()

        self.assertEquals(2, len(actual_vcf_records))
        self.assertEquals("chr1", actual_vcf_records[0].chrom)
        self.assertEquals("chr2", actual_vcf_records[1].chrom)
        self.assertTrue(mock_reader.open_was_called)
        self.assertTrue(mock_reader.close_was_called)