Beispiel #1
0
class TestVariant(TestCase):
    def setUp(self):
        header_lines = [
                '##fileformat=VCFv4.2',
                '##fileDate=20151202',
                '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
                '##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
                '##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
                '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
                '##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
                '##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
                '#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878' ]
        self.vcf = Vcf()
        self.vcf.add_header(header_lines)
        self.variant_line = '1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG	GT:SU	0/0:9'
        self.variant = Variant(self.variant_line.split('\t'), self.vcf)

    def test_set_info(self):
        self.variant.set_info('SVTYPE', 'INV')
        self.assertEqual(self.variant.info['SVTYPE'], 'INV')
        self.variant.set_info('IMAFLAG', False)
        self.assertEqual(self.variant.info['IMAFLAG'], False)
        with self.assertRaises(SystemExit) as cm:
            self.variant.set_info('SUPER', True)

    def test_get_info(self):
        self.assertEqual(self.variant.get_info('IMAFLAG'), True)
        self.assertEqual(self.variant.get_info('SVTYPE'), 'BND')
        with self.assertRaises(KeyError) as cm:
            self.variant.get_info('CALI')

    def test_get_info_string(self):
        self.assertEqual(self.variant.get_info_string(), 'SVTYPE=BND;STRANDS=-+:9;IMAFLAG')
        self.variant.set_info('IMAFLAG', False)
        self.assertEqual(self.variant.get_info_string(), 'SVTYPE=BND;STRANDS=-+:9')

    def test_get_format_string(self):
        self.assertEqual(self.variant.get_format_string(), 'GT:SU') 

    def test_genotype(self):
        self.assertEqual(self.variant.genotype('NA12878').get_gt_string(), '0/0:9')

    def test_var_string(self):
        self.assertEqual(self.variant.get_var_string(), self.variant_line)
Beispiel #2
0
class TestVariant(TestCase):
    def setUp(self):
        header_lines = [
            '##fileformat=VCFv4.2', '##fileDate=20151202',
            '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
            '##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
            '##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
            '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
            '##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
            '##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
            '#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878	NA0001'
        ]
        self.vcf = Vcf()
        self.vcf.add_header(header_lines)
        self.variant_line = '1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG	GT:SU	0/0:9	1/1:15'
        self.variant = Variant(self.variant_line.split('\t'), self.vcf)

    def test_parse_genotypes(self):
        genotype_field_strings = ['0/1:20', '0/0:15']
        parsed_dict = self.variant._parse_genotypes(genotype_field_strings)

        na12878_gt = Genotype(self.variant,
                              genotype_field_strings[0].split(':'))
        na0001_gt = Genotype(self.variant,
                             genotype_field_strings[1].split(':'))
        expected_genotype_dict = {'NA12878': na12878_gt, 'NA0001': na0001_gt}

        self.assertEqual(parsed_dict, expected_genotype_dict)

    def test_set_info(self):
        self.variant.set_info('SVTYPE', 'INV')
        self.assertEqual(self.variant.info['SVTYPE'], 'INV')
        self.variant.set_info('IMAFLAG', False)
        self.assertEqual(self.variant.info['IMAFLAG'], False)
        with self.assertRaises(SystemExit) as cm:
            self.variant.set_info('SUPER', True)

    def test_get_info(self):
        self.assertEqual(self.variant.get_info('IMAFLAG'), True)
        self.assertEqual(self.variant.get_info('SVTYPE'), 'BND')
        with self.assertRaises(KeyError) as cm:
            self.variant.get_info('CALI')

    def test_get_info_string(self):
        self.assertEqual(self.variant.get_info_string(),
                         'SVTYPE=BND;STRANDS=-+:9;IMAFLAG')
        self.variant.set_info('IMAFLAG', False)
        self.assertEqual(self.variant.get_info_string(),
                         'SVTYPE=BND;STRANDS=-+:9')

    def test_get_format_string(self):
        self.assertEqual(self.variant.get_format_string(), 'GT:SU')

    def test_get_gt_string(self):
        self.assertEqual(self.variant.get_gt_string(), '0/0:9	1/1:15')

    def test_genotype(self):
        self.assertEqual(
            self.variant.genotype('NA12878').get_gt_string(), '0/0:9')

    def test_genotypes(self):
        self.assertEqual([x.get_gt_string() for x in self.variant.genotypes()],
                         ['0/0:9', '1/1:15'])

    def test_var_string(self):
        self.assertEqual(self.variant.get_var_string(), self.variant_line)
        self.variant.genotype('NA12878').set_format('GT', './.')
        self.assertEqual(
            self.variant.get_var_string(use_cached_gt_string=True),
            self.variant_line)
        self.assertNotEqual(self.variant.get_var_string(), self.variant_line)
Beispiel #3
0
class TestVariant(TestCase):
    def setUp(self):
        header_lines = [
            "##fileformat=VCFv4.2",
            "##fileDate=20151202",
            '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
            '##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
            '##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
            '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
            '##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
            '##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
            "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878	NA0001",
        ]
        self.vcf = Vcf()
        self.vcf.add_header(header_lines)
        self.variant_line = (
            "1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG	GT:SU	0/0:9	1/1:15"
        )
        self.variant = Variant(self.variant_line.split("\t"), self.vcf)

    def test_parse_genotypes(self):
        genotype_field_strings = ["0/1:20", "0/0:15"]
        parsed_dict = self.variant._parse_genotypes(genotype_field_strings)

        na12878_gt = Genotype(self.variant, genotype_field_strings[0].split(":"))
        na0001_gt = Genotype(self.variant, genotype_field_strings[1].split(":"))
        expected_genotype_dict = {"NA12878": na12878_gt, "NA0001": na0001_gt}

        self.assertEqual(parsed_dict, expected_genotype_dict)

    def test_set_info(self):
        self.variant.set_info("SVTYPE", "INV")
        self.assertEqual(self.variant.info["SVTYPE"], "INV")
        self.variant.set_info("IMAFLAG", False)
        self.assertEqual(self.variant.info["IMAFLAG"], False)
        with self.assertRaises(SystemExit) as cm:
            self.variant.set_info("SUPER", True)

    def test_get_info(self):
        self.assertEqual(self.variant.get_info("IMAFLAG"), True)
        self.assertEqual(self.variant.get_info("SVTYPE"), "BND")
        with self.assertRaises(KeyError) as cm:
            self.variant.get_info("CALI")

    def test_get_info_string(self):
        self.assertEqual(self.variant.get_info_string(), "SVTYPE=BND;STRANDS=-+:9;IMAFLAG")
        self.variant.set_info("IMAFLAG", False)
        self.assertEqual(self.variant.get_info_string(), "SVTYPE=BND;STRANDS=-+:9")

    def test_get_format_string(self):
        self.assertEqual(self.variant.get_format_string(), "GT:SU")

    def test_get_format_string_caching(self):
        header_lines = [
            "##fileformat=VCFv4.2",
            "##fileDate=20151202",
            '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
            '##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
            '##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
            '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
            '##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
            '##FORMAT=<ID=AS,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
            '##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
            "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878",
        ]
        vcf = Vcf()
        vcf.add_header(header_lines)
        variant_line = "1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG	GT:AS:SU	0/0:1:9"
        variant = Variant(variant_line.split("\t"), vcf)
        self.assertEqual(variant.get_format_string(), "GT:AS:SU")

        gts = variant.genotypes()
        self.assertEqual(variant.get_format_string(), "GT:SU:AS")

        self.assertEqual(variant.get_format_string(True), "GT:AS:SU")

    def test_get_gt_string(self):
        self.assertEqual(self.variant.get_gt_string(), "0/0:9	1/1:15")

    def test_genotype(self):
        self.assertEqual(self.variant.genotype("NA12878").get_gt_string(), "0/0:9")

    def test_set_genotype(self):
        new_genotype = Genotype(self.variant, ["0/1", "9"])
        self.variant.set_genotype("NA12878", new_genotype)
        self.assertEqual(self.variant.genotype("NA12878").get_gt_string(), "0/1:9")

    def test_genotypes(self):
        self.assertEqual([x.get_gt_string() for x in self.variant.genotypes()], ["0/0:9", "1/1:15"])

    def test_var_string(self):
        self.assertEqual(self.variant.get_var_string(), self.variant_line)
        self.variant.genotype("NA12878").set_format("GT", "./.")
        self.assertEqual(self.variant.get_var_string(use_cached_gt_string=True), self.variant_line)
        self.assertNotEqual(self.variant.get_var_string(), self.variant_line)

    def test_var_string_format_caching(self):
        header_lines = [
            "##fileformat=VCFv4.2",
            "##fileDate=20151202",
            '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
            '##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
            '##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
            '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
            '##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
            '##FORMAT=<ID=AS,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
            '##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
            "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878",
        ]
        vcf = Vcf()
        vcf.add_header(header_lines)
        variant_line = "1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG	GT:AS:SU	0/0:1:9"
        uncached_line = "1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG	GT:SU:AS	0/0:9:1"
        variant = Variant(variant_line.split("\t"), vcf)
        gt = variant.genotypes()  # force parsing
        self.assertEqual(variant.get_var_string(), uncached_line)
        self.assertEqual(variant.get_var_string(use_cached_gt_string=True), variant_line)

    def test_add_genotype(self):
        header_lines = [
            "##fileformat=VCFv4.2",
            "##fileDate=20151202",
            '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
            '##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
            '##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
            '##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
            '##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
            "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878",
        ]
        vcf = Vcf()
        vcf.add_header(header_lines)
        variant_line = "1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG	SU	9"
        variant = Variant(variant_line.split("\t"), vcf)
        self.assertEqual(variant.get_gt_string(), "./.:9")
Beispiel #4
0
class TestVariant(TestCase):
    def setUp(self):
        header_lines = [
                '##fileformat=VCFv4.2',
                '##fileDate=20151202',
                '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
                '##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
                '##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
                '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
                '##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
                '##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
                '#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878	NA0001' ]
        self.vcf = Vcf()
        self.vcf.add_header(header_lines)
        self.variant_line = '1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG	GT:SU	0/0:9	1/1:15'
        self.variant = Variant(self.variant_line.split('\t'), self.vcf)

    def test_parse_genotypes(self):
        genotype_field_strings = ['0/1:20', '0/0:15']
        parsed_dict = self.variant._parse_genotypes(genotype_field_strings)

        na12878_gt = Genotype(self.variant, genotype_field_strings[0].split(':'))
        na0001_gt = Genotype(self.variant, genotype_field_strings[1].split(':'))
        expected_genotype_dict = { 'NA12878': na12878_gt, 'NA0001': na0001_gt }

        self.assertEqual(parsed_dict, expected_genotype_dict)

    def test_set_info(self):
        self.variant.set_info('SVTYPE', 'INV')
        self.assertEqual(self.variant.info['SVTYPE'], 'INV')
        self.variant.set_info('IMAFLAG', False)
        self.assertEqual(self.variant.info['IMAFLAG'], False)
        with self.assertRaises(SystemExit) as cm:
            self.variant.set_info('SUPER', True)

    def test_get_info(self):
        self.assertEqual(self.variant.get_info('IMAFLAG'), True)
        self.assertEqual(self.variant.get_info('SVTYPE'), 'BND')
        with self.assertRaises(KeyError) as cm:
            self.variant.get_info('CALI')

    def test_get_info_string(self):
        self.assertEqual(self.variant.get_info_string(), 'SVTYPE=BND;STRANDS=-+:9;IMAFLAG')
        self.variant.set_info('IMAFLAG', False)
        self.assertEqual(self.variant.get_info_string(), 'SVTYPE=BND;STRANDS=-+:9')

    def test_get_format_string(self):
        self.assertEqual(self.variant.get_format_string(), 'GT:SU') 

    def test_get_gt_string(self):
        self.assertEqual(self.variant.get_gt_string(), '0/0:9	1/1:15')

    def test_genotype(self):
        self.assertEqual(self.variant.genotype('NA12878').get_gt_string(), '0/0:9')

    def test_genotypes(self):
        self.assertEqual([ x.get_gt_string() for x in self.variant.genotypes() ], ['0/0:9', '1/1:15'])

    def test_var_string(self):
        self.assertEqual(self.variant.get_var_string(), self.variant_line)
        self.variant.genotype('NA12878').set_format('GT', './.')
        self.assertEqual(self.variant.get_var_string(use_cached_gt_string=True), self.variant_line)
        self.assertNotEqual(self.variant.get_var_string(), self.variant_line)

    def test_add_genotype(self):
        header_lines = [
                '##fileformat=VCFv4.2',
                '##fileDate=20151202',
                '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
                '##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
                '##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
                '##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
                '##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
                '#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878' ]
        vcf = Vcf()
        vcf.add_header(header_lines)
        variant_line = '1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG	SU	9'
        variant = Variant(variant_line.split('\t'), vcf)
        self.assertEqual(variant.get_gt_string(), './.:9')
Beispiel #5
0
class TestVariant8Col(TestCase):
    def setUp(self):
        header_lines = [
            '##fileformat=VCFv4.2', '##fileDate=20151202',
            '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
            '##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
            '##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
            '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
            '##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
            '##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
            '#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO'
        ]
        self.vcf = Vcf()
        self.vcf.add_header(header_lines)
        self.variant_line = '1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG'
        self.variant = Variant(self.variant_line.split('\t'), self.vcf)

    def test_set_info(self):
        self.variant.set_info('SVTYPE', 'INV')
        self.assertEqual(self.variant.info['SVTYPE'], 'INV')
        self.variant.set_info('IMAFLAG', False)
        self.assertEqual(self.variant.info['IMAFLAG'], False)
        with self.assertRaises(SystemExit) as cm:
            self.variant.set_info('SUPER', True)

    def test_get_info(self):
        self.assertEqual(self.variant.get_info('IMAFLAG'), True)
        self.assertEqual(self.variant.get_info('SVTYPE'), 'BND')
        with self.assertRaises(KeyError) as cm:
            self.variant.get_info('CALI')

    def test_get_info_string(self):
        self.assertEqual(self.variant.get_info_string(),
                         'SVTYPE=BND;STRANDS=-+:9;IMAFLAG')
        self.variant.set_info('IMAFLAG', False)
        self.assertEqual(self.variant.get_info_string(),
                         'SVTYPE=BND;STRANDS=-+:9')

    def test_get_format_string(self):
        self.assertEqual(self.variant.get_format_string(), None)

    def test_get_format_string_caching(self):
        header_lines = [
            '##fileformat=VCFv4.2', '##fileDate=20151202',
            '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
            '##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
            '##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
            '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
            '##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
            '##FORMAT=<ID=AS,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
            '##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
            '#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO'
        ]
        vcf = Vcf()
        vcf.add_header(header_lines)
        variant_line = '1	820915	5838_1	N	]GL000232.1:20940]N	0.00	.	SVTYPE=BND;STRANDS=-+:9;IMAFLAG'
        variant = Variant(variant_line.split('\t'), vcf)
        self.assertEqual(variant.get_format_string(), None)

        gts = variant.genotypes()
        self.assertEqual(variant.get_format_string(), None)

        self.assertEqual(variant.get_format_string(True), None)

    def test_get_gt_string(self):
        self.assertEqual(self.variant.get_gt_string(), None)

    def test_genotypes(self):
        self.assertEqual(self.variant.genotypes(), [])

    def test_var_string(self):
        self.assertEqual(self.variant.get_var_string(), self.variant_line)