Esempio n. 1
0
    def setUp(self):
        """ define a default SNV object
        """

        self.pops = [
            "AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", "ESP_AF",
            "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"
        ]
        Info.set_populations(self.pops)

        chrom = "1"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "G"
        qual = "1000"
        filt = "PASS"

        info = "HGNC_ID=1001;CQ=missense_variant;random_tag"
        self.keys = "GT:DP:AD"
        self.values = "0/1:50:10,10"

        self.var = SNV(chrom,
                       pos,
                       snp_id,
                       ref,
                       alt,
                       qual,
                       filt,
                       info=info,
                       format=self.keys,
                       sample=self.values)
Esempio n. 2
0
    def test_passes_known_genes(self):
        ''' test that genes pass or fail when they affect known genes or not
        '''

        SNV.known_genes = {"1001": {'irrelevant'}}
        info = "HGNC_ID=1001;CQ=missense_variant;random_tag"
        var = SNV('1',
                  '100',
                  '.',
                  'A',
                  'G',
                  '1000',
                  'PASS',
                  info=info,
                  format='GT:DP',
                  sample='0/1:50')

        # a variant that affects a known gene passes
        self.assertTrue(var.passes_filters())

        # a variant that doesn't affect any known genes fails
        SNV.known_genes = {"1002": {'irrelevant'}}
        self.assertFalse(var.passes_filters())

        # if we haven't provided any known genes, the variant passes
        SNV.known_genes = None
        self.assertTrue(var.passes_filters())
    def test_add_single_variant(self):
        """ test that add_single_variant() works correctly
        """

        # the sub-functions are all tested elsewhere, this test merely checks
        # that valid variants are added to the variants list, and invalid
        # variants are passed over without being added to the variants list

        # set up an autosomal variant
        line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"]
        gender = "M"
        variant = SNV(*line[:6])

        # check that the variant is added to the variant list
        variants = []
        self.vcf_loader.add_single_variant(variants, variant, gender, line)
        self.assertEqual(variants, [variant])

        # set up an X-chrom male het
        line = ["X", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"]
        variant = SNV(*line[:6])

        # check that the X-chrom male het is not added to the variant list
        variants = []
        self.vcf_loader.add_single_variant(variants, variant, gender, line)
        self.assertEqual(variants, [])
    def create_snv(self, gender, genotype, chrom, pos, cq=None):
        """ create a default variant
        """

        snp_id = "."
        ref = "A"
        alt = "G"
        filt = "PASS"

        if cq is None:
            cq = "missense_variant"

        # set up a SNV object, since SNV inherits VcfInfo
        var = SNV(chrom, pos, snp_id, ref, alt, filt)

        info = "HGNC=TEST;CQ={};random_tag".format(cq)
        format_keys = "GT:DP"
        sample_values = genotype + ":50"

        var.add_info(info)
        var.add_format(format_keys, sample_values)
        var.set_gender(gender)
        var.set_genotype()

        return var
Esempio n. 5
0
 def setUp(self):
     """ define a default VcfInfo object
     """
     
     chrom = "1"
     pos = "15000000"
     snp_id = "CM00001"
     ref = "A"
     alt = "G"
     filt = "PASS"
     
     # set up a SNV object, since SNV inherits VcfInfo
     self.var = SNV(chrom, pos, snp_id, ref, alt, filt)
     self.var.debug_chrom = "1"
     self.var.debug_pos = "15000000"
     
     self.default_info = "HGNC=ATRX;CQ=missense_variant;random_tag"
     
     # here are the default filtering criteria, as loaded into python
     known_genes = {"ATRX": {"inheritance": {"Hemizygous": \
         {"Loss of function"}}, "start": "10000000", "chrom": "1", \
         "confirmed_status": {"Confirmed DD Gene"}, "end": "20000000"}}
     
     SNV.known_genes = known_genes
     
     self.var.add_info(self.default_info)
 def create_snv(self, gender, genotype):
     """ create a default variant
     """
     
     chrom = "X"
     pos = "15000000"
     snp_id = "."
     ref = "A"
     alt = "G"
     qual = "50"
     filt = "PASS"
     
     # set up a SNV object, since SNV inherits VcfInfo
     var = SNV(chrom, pos, snp_id, ref, alt, filt)
     
     info = "HGNC=TEST;CQ=missense_variant;random_tag;EUR_AF=0.0005"
     format_keys = "GT:DP"
     sample_values = genotype + ":50"
     
     var.vcf_line = [chrom, pos, snp_id, ref, alt, qual, filt, info, format_keys, sample_values]
     
     var.add_info(info)
     var.add_format(format_keys, sample_values)
     var.set_gender(gender)
     var.set_genotype()
     
     return var
Esempio n. 7
0
 def test_open_individual(self):
     ''' test that open_individual() works correctly
     '''
     
     # missing individual returns empty list
     self.assertEqual(open_individual(None), [])
     
     vcf = make_vcf_header()
     vcf.append(make_vcf_line(pos=1, extra='HGNC=TEST;MAX_AF=0.0001'))
     vcf.append(make_vcf_line(pos=2, extra='HGNC=ATRX;MAX_AF=0.0001'))
     
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     
     person = Person('fam_id', 'sample', 'dad', 'mom', 'F', '2', path)
     
     var1 = SNV(chrom="1", position=1, id=".", ref="G", alts="T",
         qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=TEST;MAX_AF=0.0001",
         format="DP:GT", sample="50:0/1", gender="female", mnv_code=None)
     var2 = SNV(chrom="1", position=2, id=".", ref="G", alts="T",
         qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001",
         format="DP:GT", sample="50:0/1", gender="female", mnv_code=None)
     
     self.assertEqual(open_individual(person), [var2])
     
     # define a set of variants to automatically pass, and check that these
     # variants pass.
     child_keys = set([('1', 1), ('1', 2)])
     self.assertEqual(open_individual(person,
         child_variants=child_keys), [var1, var2])
Esempio n. 8
0
 def test_open_individual_with_mnvs(self):
     ''' test that open_individual works with MNVs
     '''
     
     vcf = make_vcf_header()
     vcf.append(make_vcf_line(pos=1, cq='splice_region_variant',
         extra='HGNC=ATRX;MAX_AF=0.0001'))
     vcf.append(make_vcf_line(pos=2, cq='missense_variant',
         extra='HGNC=ATRX;MAX_AF=0.0001'))
     
     path = os.path.join(self.temp_dir, "temp.vcf.gz")
     write_gzipped_vcf(path, vcf)
     
     person = Person('fam_id', 'sample', 'dad', 'mom', 'F', '2', path)
     
     args = {'chrom': "1", 'position': 1, 'id': ".", 'ref': "G", 'alts': "T",
         'filter': "PASS", 'info': "CQ=splice_region_variant;HGNC=ATRX;MAX_AF=0.0001",
         'format': "DP:GT", 'sample': "50:0/1", 'gender': "female",
         'mnv_code': 'modified_protein_altering_mnv', 'qual': '1000'}
     var1 = SNV(**args)
     
     args['position'] = 2
     args['mnv_code'] = None
     args['info'] = "CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001"
     var2 = SNV(**args)
     
     # by default only one variant passes
     self.assertEqual(open_individual(person), [var2])
     
     # if we include MNVs, then the passing variants swap
     self.assertEqual(open_individual(person,
         mnvs={('1', 1): 'modified_protein_altering_mnv',
         ('1', 2): 'modified_synonymous_mnv'}), [var1])
Esempio n. 9
0
    def test_construct_variant(self):
        """ test that construct_variant() works correctly
        """

        # check that construct variant works for SNVs
        line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"]
        gender = "M"
        test_var = SNV(*line, gender=gender)

        variant = construct_variant(line, gender)

        self.assertEqual(variant.get_key(), test_var.get_key())
        self.assertEqual(variant.format, {'GT': '0/1'})

        # check that construct variant works for CNVs
        line = [
            "1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT",
            "0/1"
        ]
        gender = "M"
        test_var = CNV(*line, gender=gender)

        variant = construct_variant(line, gender)

        self.assertEqual(variant.get_key(), test_var.get_key())
        self.assertEqual(variant.format, {'GT': '0/1'})
Esempio n. 10
0
    def test_load_trio(self):
        ''' test that load_trio() works correctly
        '''
        def make_vcf(person):
            # make a VCF, where one line would pass the default filtering
            vcf = make_vcf_header()
            vcf.append(make_vcf_line(pos=1, extra='HGNC=TEST;MAX_AF=0.0001'))
            vcf.append(make_vcf_line(pos=2, extra='HGNC=ATRX;MAX_AF=0.0001'))

            path = os.path.join(self.temp_dir, "{}.vcf.gz".format(person))
            write_gzipped_vcf(path, vcf)
            return path

        child_path = make_vcf('child')
        mother_path = make_vcf('mother')
        father_path = make_vcf('father')

        family = Family('fam_id')
        family.add_child('sample', 'mother_id', 'father_id', 'female', '2',
                         child_path)
        family.add_mother('mother_id', '0', '0', 'female', '1', mother_path)
        family.add_father('father_id', '0', '0', 'male', '1', father_path)
        family.set_child()

        sum_x_lr2_proband = 0

        # define the parameters and values for the SNV class
        args = {
            'chrom': "1",
            'position': 2,
            'id': ".",
            'ref': "G",
            'alts': "T",
            'filter': "PASS",
            'info': "CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001",
            'format': "DP:GT:AD",
            'sample': "50:0/1:10,10",
            'gender': "female",
            'mnv_code': None,
            'qual': '1000'
        }
        dad_args = copy.deepcopy(args)
        dad_args['gender'] = 'male'

        self.assertEqual(load_trio(family, sum_x_lr2_proband), [
            TrioGenotypes(chrom="1",
                          pos=2,
                          child=SNV(**args),
                          mother=SNV(**args),
                          father=SNV(**dad_args))
        ])
Esempio n. 11
0
    def test_filter_de_novos(self):
        """ check that filter_de_novos() works correctly
        """

        # make a family without parents
        family = Family("fam_id")
        child_gender = "female"
        family.add_child("child_id", "child_vcf_path", "2", child_gender)
        self.vcf_loader.family = family

        # set up an autosomal variant
        line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"]
        gender = "M"
        child_var = SNV(*line[:6])
        child_var.add_info(line[7])
        child_var.add_format(line[8], line[9])
        child_var.set_gender(child_gender)
        child_var.set_genotype()

        # combine the variant into a list of TrioGenotypes
        child_vars = [child_var]
        mother_vars = []
        father_vars = []
        trio_variants = self.vcf_loader.combine_trio_variants(
            child_vars, mother_vars, father_vars)

        # check that vars without parents get passed through automatically
        self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9),
                         trio_variants)

        # now add parents to the family
        family.add_mother("mother_id", "mother_vcf_path", "1", "female")
        family.add_father("father_id", "father_vcf_path", "1", "male")

        # re-generate the variants list now that parents have been included
        trio_variants = self.vcf_loader.combine_trio_variants(
            child_vars, mother_vars, father_vars)

        # check that vars with parents, and that appear to be de novo are
        # filtered out
        self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9),
                         [])

        # check that vars with parents, but which are not de novo, are retained
        mother_vars = child_vars
        trio_variants = self.vcf_loader.combine_trio_variants(
            child_vars, mother_vars, father_vars)

        self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9),
                         trio_variants)
Esempio n. 12
0
    def test_get_parental_var_snv(self):
        ''' check that get_parental_var() works correctly for SNVs
        '''

        sex = 'F'
        var = create_snv(sex, '0/1')
        mom = Person('fam_id', 'mom', '0', '0', 'F', '1', '/PATH')
        parental = []

        # try to get a matching variant for a mother. This will create a default
        # variant for a missing parental genotype
        self.assertEqual(
            get_parental_var(var, parental, mom),
            SNV(chrom="1",
                position=150,
                id=".",
                ref="A",
                alts="G",
                qual='1000',
                filter="PASS",
                info=str(var.info),
                format="GT",
                sample="0/0",
                gender="female",
                mnv_code=None))

        # now see if we can pick up a  variant where it does exist
        mother_var = create_snv(sex, '0/0')
        self.assertEqual(get_parental_var(var, [mother_var], mom), mother_var)
Esempio n. 13
0
    def construct_variant(self, line, gender):
        """ constructs a Variant object for a VCF line, specific to the variant type
        
        Args:
            line: list of elements of a single sample VCF line:
                [chrom, position, snp_id, ref_allele, alt_allele, quality,
                filter_value, info, format_keys, format_values]
            gender: gender of the individual to whom the variant line belongs
                (eg "1" or "M" for male, "2", or "F" for female).
        
        Returns:
            returns a Variant object
        """

        # CNVs are found by their alt_allele values, as either <DUP>, or <DEL>
        if line[4] == "<DUP>" or line[4] == "<DEL>":
            var = CNV(line[0], line[1], line[2], line[3], line[4], line[6])
            var.add_info(line[7])
            # CNVs require the format values for filtering
            var.set_gender(gender)
            var.add_format(line[8], line[9])
            if self.known_genes is not None:
                var.fix_gene_IDs()
        else:
            var = SNV(line[0], line[1], line[2], line[3], line[4], line[6])
            var.add_info(line[7])

        return var
Esempio n. 14
0
    def create_snv(self,
                   chrom,
                   geno="0/1",
                   info=None,
                   pos='150',
                   snp_id='.',
                   ref='A',
                   alt='G',
                   qual='1000',
                   filt='PASS',
                   **kwargs):

        if info is None:
            info = "HGNC=ATRX;CQ=missense_variant;random_tag;AF_AFR=0.0001"

        keys = "GT:DP:TEAM29_FILTER:PP_DNM"
        values = "{0}:50:PASS:0.99".format(geno)

        return SNV(chrom,
                   pos,
                   snp_id,
                   ref,
                   alt,
                   qual,
                   filt,
                   info=info,
                   format=keys,
                   sample=values,
                   gender='male',
                   **kwargs)
Esempio n. 15
0
    def test_construct_variant(self):
        """ test that construct_variant() works correctly
        """

        # check that construct variant works for SNVs
        line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"]
        gender = "M"
        test_var = SNV(*line[:6])

        variant = self.vcf_loader.construct_variant(line, gender)

        self.assertEqual(variant.get_key(), test_var.get_key())
        # initally constructing a SNV shouldn't affect the format variable
        self.assertEqual(variant.format, None)

        # check that construct variant works for CNVs
        line = [
            "1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT",
            "0/1"
        ]
        gender = "M"
        test_var = CNV(*line[:6])
        test_var.add_info(line[7])

        variant = self.vcf_loader.construct_variant(line, gender)

        self.assertEqual(variant.get_key(), test_var.get_key())
        self.assertNotEqual(variant.format, None)
 def test_passes_known_genes(self):
     ''' test that genes pass or fail when they affect known genes or not
     '''
     
     SNV.known_genes = {"1001": {'irrelevant'}}
     info = "HGNC_ID=1001;CQ=missense_variant;random_tag"
     var = SNV('1', '100', '.', 'A', 'G', '1000', 'PASS', info=info,
         format='GT:DP', sample='0/1:50')
     
     # a variant that affects a known gene passes
     self.assertTrue(var.passes_filters())
     
     # a variant that doesn't affect any known genes fails
     SNV.known_genes = {"1002": {'irrelevant'}}
     self.assertFalse(var.passes_filters())
     
     # if we haven't provided any known genes, the variant passes
     SNV.known_genes = None
     self.assertTrue(var.passes_filters())
Esempio n. 17
0
 def test_construct_variant(self):
     """ test that construct_variant() works correctly
     """
     
     # check that construct variant works for SNVs
     line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"]
     gender = "M"
     test_var = SNV(*line[:6])
     
     variant = construct_variant(line, gender, self.known_genes)
     
     self.assertEqual(variant.get_key(), test_var.get_key())
     # initally constructing a SNV shouldn't affect the format variable
     self.assertEqual(variant.format, None)
     
     # check that construct variant works for CNVs
     line = ["1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1"]
     gender = "M"
     test_var = CNV(*line[:6])
     test_var.add_info(line[7])
     
     variant = construct_variant(line, gender, self.known_genes)
     
     self.assertEqual(variant.get_key(), test_var.get_key())
     self.assertNotEqual(variant.format, None)
Esempio n. 18
0
    def create_snv(self, chrom, geno="0/1"):
        """ create a default variant
        """

        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "G"
        filt = "PASS"

        # set up a SNV object, since SNV inherits VcfInfo
        var = SNV(chrom, pos, snp_id, ref, alt, filt)

        default_info = "HGNC=ATRX;CQ=missense_variant;random_tag;AF_AFR=0.0001"
        keys = "GT:DP:TEAM29_FILTER:PP_DNM"
        values = "{0}:50:PASS:0.99".format(geno)

        var.add_info(default_info)
        var.add_format(keys, values)
        var.set_gender("male")
        var.set_genotype()

        return var
Esempio n. 19
0
 def test_construct_variant(self):
     """ test that construct_variant() works correctly
     """
     
     # check that construct variant works for SNVs
     line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"]
     gender = "M"
     test_var = SNV(*line, gender=gender)
     
     variant = construct_variant(line, gender)
     
     self.assertEqual(variant.get_key(), test_var.get_key())
     self.assertEqual(variant.format, {'GT': '0/1'})
     
     # check that construct variant works for CNVs
     line = ["1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1"]
     gender = "M"
     test_var = CNV(*line, gender=gender)
     
     variant = construct_variant(line, gender)
     
     self.assertEqual(variant.get_key(), test_var.get_key())
     self.assertEqual(variant.format, {'GT': '0/1'})
Esempio n. 20
0
    def setUp(self):
        """ define a default VcfInfo object
        """

        chrom = "1"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "G"
        filt = "PASS"

        # set up a SNV object, since SNV inherits VcfInfo
        self.var = SNV(chrom, pos, snp_id, ref, alt, filt)

        info = "HGNC=ATRX;CQ=missense_variant;random_tag"
        self.pops = ["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", \
            "EAS_AF", "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", \
            "UK10K_cohort_AF"]

        self.format_keys = "GT:DP"
        self.sample_values = "0/1:50"

        self.var.add_info(info)
    def create_snv(self, gender, genotype):
        """ create a default variant
        """

        chrom = "1"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "G"
        filt = "PASS"

        # set up a SNV object, since SNV inherits VcfInfo
        var = SNV(chrom, pos, snp_id, ref, alt, filt)

        info = "HGNC=TEST;CQ=missense_variant;DENOVO-SNP;PP_DNM=0.99"
        keys = "GT:DP:TEAM29_FILTER:PP_DNM"
        values = genotype + ":50:PASS:0.99"

        var.add_info(info)
        var.add_format(keys, values)
        var.set_gender(gender)
        var.set_genotype()

        return var
Esempio n. 22
0
    def test_filter_de_novos(self):
        """ check that filter_de_novos() works correctly
        """

        # make a family without parents
        family = Family("fam_id")
        child_gender = "female"
        family.add_child('child_id', 'mother_id', 'father_id', child_gender,
                         '2', 'child_path')

        # set up an autosomal variant
        gender = "M"
        args = [
            "1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1", gender
        ]
        child_var = SNV(*args)

        # combine the variant into a list of TrioGenotypes
        child_vars = [child_var]
        mother_vars = []
        father_vars = []
        trio_variants = combine_trio_variants(family, child_vars, mother_vars,
                                              father_vars)

        # check that vars without parents get passed through automatically
        self.assertEqual(filter_de_novos(trio_variants, 0.9), trio_variants)

        # now add parents to the family
        family.add_mother("mother_id", '0', '0', 'female', '1',
                          "mother_vcf_path")
        family.add_father("father_id", '0', '0', 'male', '1',
                          "father_vcf_path")
        family = family

        # re-generate the variants list now that parents have been included
        trio_variants = combine_trio_variants(family, child_vars, mother_vars,
                                              father_vars)

        # check that vars with parents, and that appear to be de novo are
        # filtered out
        self.assertEqual(filter_de_novos(trio_variants, 0.9), [])

        # check that vars with parents, but which are not de novo, are retained
        mother_vars = child_vars
        trio_variants = combine_trio_variants(family, child_vars, mother_vars,
                                              father_vars)

        self.assertEqual(filter_de_novos(trio_variants, 0.9), trio_variants)
 def create_snv(self, sex, genotype, cq="missense_variant", hgnc="TEST", chrom="1"):
     """ create a default variant
     """
     
     pos = "15000000"
     snp_id = "."
     ref = "A"
     alt = "G"
     filt = "PASS"
     
     # set up a SNV object, since SNV inherits VcfInfo
     var = SNV(chrom, pos, snp_id, ref, alt, filt)
     
     info = "HGNC={0};CQ={1};DENOVO-SNP;PP_DNM=0.99".format(hgnc, cq)
     keys = "GT:DP:TEAM29_FILTER:PP_DNM"
     values = genotype + ":50:PASS:0.99"
     
     var.add_info(info)
     var.add_format(keys, values)
     var.set_gender(sex)
     var.set_genotype()
     
     return var
 def setUp(self):
     """ define a default SNV object
     """
     
     self.pops = ["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", "ESP_AF",
         "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"]
     Info.set_populations(self.pops)
     
     chrom = "1"
     pos = "15000000"
     snp_id = "."
     ref = "A"
     alt = "G"
     qual = "1000"
     filt = "PASS"
     
     info = "HGNC_ID=1001;CQ=missense_variant;random_tag"
     self.keys = "GT:DP:AD"
     self.values = "0/1:50:10,10"
     
     self.var = SNV(chrom, pos, snp_id, ref, alt, qual, filt, info=info,
         format=self.keys, sample=self.values)
Esempio n. 25
0
    def get_parental_var(self, var, parental_vars, gender, matcher):
        """ get the corresponding parental variant to a childs variant, or
        create a default variant with reference genotype.
        
        Args:
            var: childs var, as Variant object
            parental_vars: list of parental variants
            gender: gender of the parent
            matcher: cnv matcher for parent
        
        Returns:
            returns a Variant object, matched to the proband's variant
        """

        key = var.get_key()

        # if the variant is a CNV, the corresponding variant might not match
        # the start site, so we look a variant that overlaps
        if isinstance(var, CNV) and matcher.has_match(var):
            key = matcher.get_overlap_key(key)

        for parental in parental_vars:
            if key == parental.get_key():
                return parental

        # if the childs variant does not exist in the parents VCF, then we
        # create a default variant for the parent
        if isinstance(var, CNV):
            parental = CNV(var.chrom, var.position, var.variant_id,
                           var.ref_allele, var.alt_allele, var.filter)
        else:
            parental = SNV(var.chrom, var.position, var.variant_id,
                           var.ref_allele, var.alt_allele, var.filter)

        parental.set_gender(gender)
        parental.set_default_genotype()

        return parental
 def setUp(self):
     """ define a default VcfInfo object
     """
     
     chrom = "1"
     pos = "15000000"
     snp_id = "."
     ref = "A"
     alt = "G"
     filt = "PASS"
     
     # set up a SNV object, since SNV inherits VcfInfo
     self.var = SNV(chrom, pos, snp_id, ref, alt, filt)
     
     info = "HGNC=ATRX;CQ=missense_variant;random_tag"
     self.pops = ["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", \
         "EAS_AF", "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", \
         "UK10K_cohort_AF"]
     
     self.format_keys = "GT:DP"
     self.sample_values = "0/1:50"
     
     self.var.add_info(info)
    def test_analyse_trio(self):
        ''' test that analyse_trio() works correctly
        '''

        # construct the VCFs for the trio members
        paths = {}
        for member in ['child', 'mom', 'dad']:
            vcf = make_vcf_header()

            geno, pp_dnm = '0/0', ''
            if member == 'child':
                geno, pp_dnm = '0/1', ';DENOVO-SNP;PP_DNM=1'

            vcf.append(
                make_vcf_line(genotype=geno, extra='HGNC=ARID1B' + pp_dnm))

            # write the VCF data to a file
            handle = tempfile.NamedTemporaryFile(dir=self.temp_dir,
                                                 delete=False,
                                                 suffix='.vcf')
            for x in vcf:
                handle.write(x.encode('utf8'))
            handle.flush()

            paths[member] = handle.name

        # create a Family object, so we can load the data from the trio's VCFs
        fam_id = 'fam01'
        child = Person(fam_id, 'child', 'dad', 'mom', 'female', '2',
                       paths['child'])
        mom = Person(fam_id, 'mom', '0', '0', 'female', '1', paths['mom'])
        dad = Person(fam_id, 'dad', '0', '0', 'male', '1', paths['dad'])
        family = Family(fam_id, [child], mom, dad)

        self.assertEqual(self.finder.analyse_trio(family), [(TrioGenotypes(
            chrom="1",
            pos=1,
            child=SNV(
                chrom="1",
                position=1,
                id=".",
                ref="G",
                alts="T",
                qual='1000',
                filter="PASS",
                info="CQ=missense_variant;DENOVO-SNP;HGNC=ARID1B;PP_DNM=1",
                format="DP:GT",
                sample="50:0/1",
                gender="female",
                mnv_code=None),
            mother=SNV(chrom="1",
                       position=1,
                       id=".",
                       ref="G",
                       alts="T",
                       qual='1000',
                       filter="PASS",
                       info="CQ=missense_variant;HGNC=ARID1B",
                       format="DP:GT",
                       sample="50:0/0",
                       gender="female",
                       mnv_code=None),
            father=SNV(chrom="1",
                       position=1,
                       id=".",
                       ref="G",
                       alts="T",
                       qual='1000',
                       filter="PASS",
                       info="CQ=missense_variant;HGNC=ARID1B",
                       format="DP:GT",
                       sample="50:0/0",
                       gender="male",
                       mnv_code=None)), ['single_variant'], [
                           'Monoallelic', 'Mosaic'
                       ], ['ARID1B'])])
Esempio n. 28
0
class TestVariantInfoPy(unittest.TestCase):
    """
    """
    def setUp(self):
        """ define a default VcfInfo object
        """

        chrom = "1"
        pos = "15000000"
        snp_id = "CM00001"
        ref = "A"
        alt = "G"
        filt = "PASS"

        # set up a SNV object, since SNV inherits VcfInfo
        self.var = SNV(chrom, pos, snp_id, ref, alt, filt)
        self.var.debug_chrom = "1"
        self.var.debug_pos = "15000000"

        self.default_info = "HGNC=ATRX;CQ=missense_variant;random_tag"

        # here are the default filtering criteria, as loaded into python
        known_genes = {"ATRX": {"inheritance": {"Hemizygous": \
            {"Loss of function"}}, "start": "10000000", "chrom": "1", \
            "confirmed_status": {"Confirmed DD Gene"}, "end": "20000000"}}

        SNV.known_genes = known_genes

        self.var.add_info(self.default_info)

    def test_set_gene_from_info(self):
        """ test that test_set_gene_from_info() works correctly
        """

        # check for when a HGNC key exists
        self.var.info["HGNC"] = "A"
        self.var.set_gene_from_info()
        self.assertEqual(self.var.gene, "A")

        # check for when a HGNC key doesn't exist
        del self.var.info["HGNC"]
        self.var.set_gene_from_info()
        self.assertIsNone(self.var.gene)

    def test_is_lof(self):
        """ test that is_lof() works correctly
        """

        # check that known LOF consensequence return True
        self.var.consequence = "stop_gained"
        self.assertTrue(self.var.is_lof())

        # check that known non-LOF consensequence returns False
        self.var.consequence = "missense_variant"
        self.assertFalse(self.var.is_lof())

        # check that null values return False
        self.var.consequence = None
        self.assertFalse(self.var.is_lof())

    def test_get_allele_frequency(self):
        """ tests that number conversion works as expected
        """

        # single number returns that number
        self.assertEqual(self.var.get_allele_frequency("1"), 1)

        # two numbers return one number
        self.assertEqual(self.var.get_allele_frequency("1,1"), 1)

        # two numbers return the highest number
        self.assertEqual(self.var.get_allele_frequency("1,2"), 2)

        # number and string return the number
        self.assertEqual(self.var.get_allele_frequency("a,1"), 1)

        # single string value returns None
        self.assertEqual(self.var.get_allele_frequency("a"), None)

        # multiple string values return None
        self.assertEqual(self.var.get_allele_frequency("a,b"), None)

    def test_is_number(self):
        """ tests that we can check if a value represents a number
        """

        self.assertEqual(self.var.is_number(None), False)
        self.assertEqual(self.var.is_number("5"), True)
        self.assertEqual(self.var.is_number("a"), False)

    def test_find_max_allele_frequency(self):
        """ test if the MAF finder operates correctly
        """

        # check for var without recorded MAF
        self.assertIsNone(self.var.find_max_allele_frequency())

        # check for single population
        self.var.info["MAX_AF"] = "0.005"
        self.assertEqual(self.var.find_max_allele_frequency(), 0.005)

        # check for two populations
        self.var.info["AFR_AF"] = "0.01"
        self.assertEqual(self.var.find_max_allele_frequency(), 0.01)

        # check for all populations
        pops = set(["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", \
            "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"])
        for pop in pops:
            self.var.info[pop] = "0.05"
            self.assertEqual(self.var.find_max_allele_frequency(), 0.05)
Esempio n. 29
0
    def create_snv(self, gender, genotype):
        """ create a default variant
        """

        chrom = "X"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "G"
        qual = "50"
        filt = "PASS"

        # set up a SNV object, since SNV inherits VcfInfo
        var = SNV(chrom, pos, snp_id, ref, alt, filt)

        info = "HGNC=TEST;CQ=missense_variant;random_tag;EUR_AF=0.0005"
        format_keys = "GT:DP"
        sample_values = genotype + ":50"

        var.vcf_line = [
            chrom, pos, snp_id, ref, alt, qual, filt, info, format_keys,
            sample_values
        ]

        var.add_info(info)
        var.add_format(format_keys, sample_values)
        var.set_gender(gender)
        var.set_genotype()

        return var
Esempio n. 30
0
class TestVariantSnvPy(unittest.TestCase):
    """ unit testing of the SNV class
    """
    def setUp(self):
        """ define a default SNV object
        """

        self.pops = [
            "AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", "ESP_AF",
            "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"
        ]
        Info.set_populations(self.pops)

        chrom = "1"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "G"
        qual = "1000"
        filt = "PASS"

        info = "HGNC_ID=1001;CQ=missense_variant;random_tag"
        self.keys = "GT:DP:AD"
        self.values = "0/1:50:10,10"

        self.var = SNV(chrom,
                       pos,
                       snp_id,
                       ref,
                       alt,
                       qual,
                       filt,
                       info=info,
                       format=self.keys,
                       sample=self.values)

    def tearDown(self):
        SNV.known_genes = None
        Info.set_populations([])

    def test_get_key(self):
        """ tests that get_key() operates correctly
        """

        # make sure the chrom and position are correct
        self.var.chrom = "1"
        self.var.position = "15000000"

        self.assertEqual(self.var.get_key(), ("1", "15000000"))

        # and make sure the chrom and position are correct if we change them
        self.var.chrom = "22"
        self.var.position = "123456789"
        self.assertEqual(self.var.get_key(), ("22", "123456789"))

    def test_convert_genotype(self):
        """ test that genotypes convert from two char to single char
        """

        genotypes = [("0/0", 0), ("0/1", 1), ("1/0", 1), ("1/1", 2), \
            ("1/2", 1), ("2/1", 1), ("0/2", 1), ("2/0", 1), ("2/2", 2)]

        # run thorugh all the legit genotype codes
        for geno in genotypes:
            genotype = geno[0]
            result = geno[1]
            self.assertEqual(self.var.convert_genotype(genotype), result)

        # Raise error when converting single character genotype
        with self.assertRaises(ValueError):
            self.var.convert_genotype("0")

        # raise error when converting unknown genotype
        with self.assertRaises(AssertionError):
            self.var.convert_genotype("a/a")

        # also include other genotype format posibilities. None of these are
        # used, but since they aren't explicitly forbidden, make sure they work

        # check two character strings
        self.assertEqual(self.var.convert_genotype("12|34"), 1)
        self.assertEqual(self.var.convert_genotype("99|99"), 2)

    def test_set_genotype_autosomal(self):
        """ test that set_genotype() operates correctly
        """

        self.var.add_format(self.keys, self.values)
        self.var._set_gender("male")

        genotypes = [("0/0", 0), ("0/1", 1), ("1/1", 2)]

        for geno in genotypes:
            genotype = geno[0]
            result = geno[1]

            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.get_genotype(), result)

        # remove the format attribute, so we can raise an error
        self.var.format = None
        with self.assertRaises(ValueError):
            self.var.set_genotype()

    def test_set_genotype_allosomal_male(self):
        """ test that set_genotype() operates correctly for the male X chrom
        """

        self.var.add_format(self.keys, self.values)
        self.var.chrom = "X"
        self.var._set_gender("male")

        genotypes = [("0/0", 0), ("1/1", 2)]

        for geno in genotypes:
            genotype = geno[0]
            result = geno[1]

            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.get_genotype(), result)

        # check that we raise an error for X chrom hets
        genotypes = ["0/1", "1/0"]
        for genotype in genotypes:
            self.var.format["GT"] = genotype
            with self.assertRaises(ValueError):
                self.var.set_genotype()

    def test_set_genotype_allosomal_female(self):
        """ test that set_genotype() operates correctly for the female X chrom
        """

        self.var.add_format(self.keys, self.values)
        self.var.chrom = "X"
        self.var._set_gender("female")

        genotypes = [("0/0", 0), ("0/1", 1), ("1/1", 2)]

        for geno in genotypes:
            genotype = geno[0]
            result = geno[1]

            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.get_genotype(), result)

    def test_set_allosomal_male(self):
        """test that convert_allosomal_genotype_code_to_alleles handles hets in
        male correctly
        """
        self.var._set_gender("male")
        self.var.chrom = 'X'
        self.var.genotype = '1'
        #treat as hom if VAF > 0.8
        self.var.format["AD"] = '1,19'
        self.var.format["GT"] = '1/1'
        self.var.convert_allosomal_genotype_code_to_alleles()

        self.assertEqual(self.var.alleles, set([self.var.alt_alleles]))

        #treat as hom if denovo
        self.var.format["AD"] = '10,19'
        self.var.format["PP_DNM"] = 0.0099
        self.var.convert_allosomal_genotype_code_to_alleles()

        self.assertEqual(self.var.alleles, set([self.var.alt_alleles]))

    def test_is_het_autosomal(self):
        """ tests that is_het() operates correctly for automsal chromosomes
        """

        self.var.add_format(self.keys, self.values)
        self.var._set_gender("male")

        het = [("0/0", False), ("0/1", True), ("1/1", False)]

        for geno in het:
            genotype = geno[0]
            result = geno[1]

            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.is_het(), result)

    def test_is_hom_alt_autosomal(self):
        """ tests that is_hom_alt() operates correctly for automsal chromosomes
        """

        self.var.add_format(self.keys, self.values)
        self.var._set_gender("male")

        hom_alt = [("0/0", False), ("0/1", False), ("1/1", True)]

        for geno in hom_alt:
            genotype = geno[0]
            result = geno[1]

            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.is_hom_alt(), result)

    def test_is_hom_ref_autosomal(self):
        """ tests that is_hom_ref() operates correctly for automsal chromosomes
        """

        self.var.add_format(self.keys, self.values)
        self.var._set_gender("male")

        hom_ref = [("0/0", True), ("0/1", False), ("1/1", False)]

        for geno in hom_ref:
            genotype = geno[0]
            result = geno[1]

            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.is_hom_ref(), result)

    def test_is_not_ref_autosomal(self):
        """ tests that is_not_ref() operates correctly for automsal chromosomes
        """

        self.var.add_format(self.keys, self.values)
        self.var._set_gender("male")

        not_ref = [("0/0", False), ("0/1", True), ("1/1", True)]

        for geno in not_ref:
            genotype = geno[0]
            result = geno[1]

            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.is_not_ref(), result)

    def test_is_not_alt_autosomal(self):
        """ tests that is_not_ref() operates correctly for automsal chromosomes
        """

        self.var.add_format(self.keys, self.values)
        self.var._set_gender("male")

        not_alt = [("0/0", True), ("0/1", True), ("1/1", False)]

        for geno in not_alt:
            genotype = geno[0]
            result = geno[1]

            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.is_not_alt(), result)

    def test_passes_default_filters(self):
        """ test that different variants pass or fail the VcfInfo filters
        """

        # check that a default variant passes the filters
        self.assertTrue(self.var.passes_filters())

    def test_passes_known_genes(self):
        ''' test that genes pass or fail when they affect known genes or not
        '''

        SNV.known_genes = {"1001": {'irrelevant'}}
        info = "HGNC_ID=1001;CQ=missense_variant;random_tag"
        var = SNV('1',
                  '100',
                  '.',
                  'A',
                  'G',
                  '1000',
                  'PASS',
                  info=info,
                  format='GT:DP',
                  sample='0/1:50')

        # a variant that affects a known gene passes
        self.assertTrue(var.passes_filters())

        # a variant that doesn't affect any known genes fails
        SNV.known_genes = {"1002": {'irrelevant'}}
        self.assertFalse(var.passes_filters())

        # if we haven't provided any known genes, the variant passes
        SNV.known_genes = None
        self.assertTrue(var.passes_filters())

    def test_passes_alternate_filter_string(self):
        """ test that the alternate permitted FILTER string also passes
        """

        # check that the alternate FILTER value passes
        self.var.filter = "."
        self.assertTrue(self.var.passes_filters())

        self.var.filter = "FAIL"
        self.assertFalse(self.var.passes_filters())

        # check that low VQSLOD on its own will pass the variant
        self.var.filter = "LOW_VQSLOD"
        self.assertTrue(self.var.passes_filters())

        # check that low VQSLOD in a de novo will still pass
        self.var.filter = "LOW_VQSLOD"
        self.var.info["DENOVO-SNP"] = True
        self.assertTrue(self.var.passes_filters())

    def test_passes_filters_low_maf(self):
        """ tests that low MAF values pass the filters
        """

        # check that low MAF values pass the filters
        for pop in self.pops:
            self.var.info[pop] = "0.001"
            self.assertTrue(self.var.passes_filters())

            # and check that MAF on the threshold still pass
            self.var.info[pop] = "0.005"
            self.assertTrue(self.var.passes_filters())

    def test_out_of_range_maf(self):
        """ check that MAF outside 0-1 still pass or fail correctly
        """

        self.var.info["AFR_AF"] = "-1"
        self.assertTrue(self.var.passes_filters())

        self.var.info["AFR_AF"] = "100"
        self.assertFalse(self.var.passes_filters())

    def test_fails_filters_high_maf(self):
        """ test that variants with high MAF fail the filtering
        """

        # check th
        for pop in self.pops:
            var = self.var
            var.info[pop] = "0.0101"
            self.assertFalse(var.passes_filters())

    def test_passes_consequence_filter(self):
        """ check all the consequence values that should pass
        """

        vep_passing = ["transcript_ablation", "splice_donor_variant", \
            "splice_acceptor_variant", "frameshift_variant", \
            "initiator_codon_variant", "inframe_insertion", "inframe_deletion",\
            "missense_variant", "transcript_amplification", "stop_gained",\
            "stop_lost"]

        # check all the passing consequences
        for cq in vep_passing:
            self.var.consequence = [[cq]]
            self.assertTrue(self.var.passes_filters())

    def test_fails_consequence_filter(self):
        """ check all the consequence values that should fail
        """

        vep_failing = ["splice_region_variant", \
            "incomplete_terminal_codon_variant", "synonymous_variant", \
            "stop_retained_variant", "mature_miRNA_variant", \
            "5_prime_UTR_variant", "3_prime_UTR_variant", \
            "non_coding_exon_variant", "nc_transcript_variant", \
            "intron_variant", "NMD_transcript_variant", \
            "upstream_gene_variant", "downstream_gene_variant", \
            "TFBS_ablation", "TFBS_amplification", "TF_binding_site_variant", \
            "regulatory_region_variant", "regulatory_region_ablation", \
            "regulatory_region_amplification", "feature_elongation", \
            "feature_truncation", "intergenic_variant", "coding_sequence_variant"]

        # check all the failing consequences
        for cq in vep_failing:
            self.var.info.consequence = [[cq]]
            self.assertFalse(self.var.passes_filters())

    def test_passes_filters_with_debug(self):
        """ check that passes_filters_with_debug() generates a failure message
        """

        # make a variant that will fail the filtering, and set the site for
        # debugging
        self.var.info["AFR_AF"] = "0.05"
        self.var.debug_pos = self.var.get_position()

        # get ready to capture the output from a print function
        out = StringIO()
        sys.stdout = out

        # check that the variant fails (and secondarily prints the failure mode)
        self.assertFalse(self.var.passes_filters_with_debug())
        output = out.getvalue().strip()

        # check that the message about why the variant failed filtering is correct
        self.assertEqual(output, "failed MAF: 0.05")

        # reset the standard out, so that we can observe other print statements
        sys.stdout = sys.__stdout__
class TestVariantSnvPy(unittest.TestCase):
    """
    """
    
    def setUp(self):
        """ define a default VcfInfo object
        """
        
        chrom = "1"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "G"
        filt = "PASS"
        
        # set up a SNV object, since SNV inherits VcfInfo
        self.var = SNV(chrom, pos, snp_id, ref, alt, filt)
        
        info = "HGNC=ATRX;CQ=missense_variant;random_tag"
        self.pops = ["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", \
            "EAS_AF", "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", \
            "UK10K_cohort_AF"]
        
        self.format_keys = "GT:DP"
        self.sample_values = "0/1:50"
        
        self.var.add_info(info)
    
    def test_get_key(self):
        """ tests that get_key() operates correctly
        """
        
        # make sure the chrom and position are correct
        self.var.chrom = "1"
        self.var.position = "15000000"
        
        self.assertEqual(self.var.get_key(), ("1", "15000000"))
        
        # and make sure the chrom and position are correct if we change them
        self.var.chrom = "22"
        self.var.position = "123456789"
        self.assertEqual(self.var.get_key(), ("22", "123456789"))
    
    def test_convert_genotype(self):
        """ test that genotypes convert from two char to single char
        """
        
        genotypes = [("0/0", 0), ("0/1", 1), ("1/0", 1), ("1/1", 2), \
            ("1/2", 1), ("2/1", 1), ("0/2", 1), ("2/0", 1), ("2/2", 2)]
        
        # run thorugh all the legit genotype codes
        for geno in genotypes:
            genotype = geno[0]
            result = geno[1]
            self.assertEqual(self.var.convert_genotype(genotype), result)
         
        # Raise error when converting single character genotype
        with self.assertRaises(ValueError):
            self.var.convert_genotype("0")
          
        # raise error when converting unknown genotype
        with self.assertRaises(AssertionError):
            self.var.convert_genotype("a/a")
            
        # also include other genotype format posibilities. None of these are
        # used, but since they aren't explicitly forbidden, make sure they work
        
        # check two character strings
        self.assertEqual(self.var.convert_genotype("12|34"), 1)
        self.assertEqual(self.var.convert_genotype("99|99"), 2)
    
    def test_set_default_genotype(self):
        """ test that set_default_genotype() operates correctly on the autosomes
        """
        
        self.var.set_gender("male")
        self.var.set_default_genotype()
        self.assertEqual(self.var.get_genotype(), 0)
    
    def test_set_genotype_autosomal(self):
        """ test that set_genotype() operates correctly
        """
        
        self.var.add_format(self.format_keys, self.sample_values)
        self.var.set_gender("male")
        
        genotypes = [("0/0", 0), ("0/1", 1), ("1/1", 2)]
        
        for geno in genotypes:
            genotype = geno[0]
            result = geno[1]
            
            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.get_genotype(), result)
        
        # remove the format attribute, so we can raise an error
        del self.var.format
        with self.assertRaises(ValueError):
            self.var.set_genotype()
    
    def test_set_genotype_allosomal_male(self):
        """ test that set_genotype() operates correctly for the male X chrom
        """
        
        self.var.add_format(self.format_keys, self.sample_values)
        self.var.chrom = "X"
        self.var.set_gender("male")
        
        genotypes = [("0/0", 0), ("1/1", 2)]
        
        for geno in genotypes:
            genotype = geno[0]
            result = geno[1]
            
            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.get_genotype(), result)
        
        # check that we raise an error for X chrom hets
        genotypes = ["0/1", "1/0"]
        for genotype in genotypes:
            self.var.format["GT"] = genotype
            with self.assertRaises(ValueError):
                self.var.set_genotype()
    
    def test_set_genotype_allosomal_female(self):
        """ test that set_genotype() operates correctly for the female X chrom
        """
        
        self.var.add_format(self.format_keys, self.sample_values)
        self.var.chrom = "X"
        self.var.set_gender("female")
        
        genotypes = [("0/0", 0), ("0/1", 1), ("1/1", 2)]
        
        for geno in genotypes:
            genotype = geno[0]
            result = geno[1]
            
            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.get_genotype(), result)
    
    def test_is_het_autosomal(self):
        """ tests that is_het() operates correctly for automsal chromosomes
        """
        
        self.var.add_format(self.format_keys, self.sample_values)
        self.var.set_gender("male")
        
        het = [("0/0", False), ("0/1", True), ("1/1", False)]
        
        for geno in het:
            genotype = geno[0]
            result = geno[1]
            
            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.is_het(), result)
     
    def test_is_hom_alt_autosomal(self):
        """ tests that is_hom_alt() operates correctly for automsal chromosomes
        """
        
        self.var.add_format(self.format_keys, self.sample_values)
        self.var.set_gender("male")
        
        hom_alt = [("0/0", False), ("0/1", False), ("1/1", True)]
        
        for geno in hom_alt:
            genotype = geno[0]
            result = geno[1]
            
            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.is_hom_alt(), result)
            
    def test_is_hom_ref_autosomal(self):
        """ tests that is_hom_ref() operates correctly for automsal chromosomes
        """
        
        self.var.add_format(self.format_keys, self.sample_values)
        self.var.set_gender("male")
        
        hom_ref = [("0/0", True), ("0/1", False), ("1/1", False)]
        
        for geno in hom_ref:
            genotype = geno[0]
            result = geno[1]
            
            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.is_hom_ref(), result)
        
    def test_is_not_ref_autosomal(self):
        """ tests that is_not_ref() operates correctly for automsal chromosomes
        """
        
        self.var.add_format(self.format_keys, self.sample_values)
        self.var.set_gender("male")
        
        not_ref = [("0/0", False), ("0/1", True), ("1/1", True)]
        
        for geno in not_ref:
            genotype = geno[0]
            result = geno[1]
            
            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.is_not_ref(), result)
    
    def test_is_not_alt_autosomal(self):
        """ tests that is_not_ref() operates correctly for automsal chromosomes
        """
        
        self.var.add_format(self.format_keys, self.sample_values)
        self.var.set_gender("male")
        
        not_alt = [("0/0", True), ("0/1", True), ("1/1", False)]
        
        for geno in not_alt:
            genotype = geno[0]
            result = geno[1]
            
            self.var.format["GT"] = genotype
            self.var.set_genotype()
            self.assertEqual(self.var.is_not_alt(), result)
    
    def test_passes_default_filters(self):
        """ test that different variants pass or fail the VcfInfo filters
        """
        
        # check that a default variant passes the filters
        self.assertTrue(self.var.passes_filters())
    
    def test_passes_alternate_filter_string(self):
        """ test that the alternate permitted FILTER string also passes
        """
        
        # check that the alternate FILTER value passes
        self.var.filter = "."
        self.assertTrue(self.var.passes_filters())
        
        self.var.filter = "FAIL"
        self.assertFalse(self.var.passes_filters())
        
        # check that low VQSLOD on its own will pass the variant
        self.var.filter = "LOW_VQSLOD"
        self.assertTrue(self.var.passes_filters())
        
        # check that low VQSLOD in a de novo will still pass
        self.var.filter = "LOW_VQSLOD"
        self.var.info["DENOVO-SNP"] = True
        self.assertTrue(self.var.passes_filters())
    
    def test_passes_filters_low_maf(self):
        """ tests that low MAF values pass the filters
        """
        
        # check that low MAF values pass the filters
        for pop in self.pops:
            self.var.info[pop] = "0.005"
            self.assertTrue(self.var.passes_filters())
            
            # and check that MAF on the threshold still pass
            self.var.info[pop] = "0.01"
            self.assertTrue(self.var.passes_filters())
    
    def test_out_of_range_maf(self):
        """ check that MAF outside 0-1 still pass or fail correctly
        """
        
        self.var.info["AFR_AF"] = "-1"
        self.assertTrue(self.var.passes_filters())
      
        self.var.info["AFR_AF"] = "100"
        self.assertFalse(self.var.passes_filters())
      
    def test_fails_filters_high_maf(self):
        """ test that variants with high MAF fail the filtering
        """
        
        # check th
        for pop in self.pops:
            var = self.var
            var.info[pop] = "0.0101"
            self.assertFalse(var.passes_filters())
    
    def test_passes_consequence_filter(self):
        """ check all the consequence values that should pass
        """
        
        vep_passing = ["transcript_ablation", "splice_donor_variant", \
            "splice_acceptor_variant", "frameshift_variant", \
            "initiator_codon_variant", "inframe_insertion", "inframe_deletion",\
            "missense_variant", "transcript_amplification", "stop_gained",\
            "stop_lost", "coding_sequence_variant"]
        
        # check all the passing consequences
        for cq in vep_passing:
            self.var.consequence = [cq]
            self.assertTrue(self.var.passes_filters())
            
    def test_fails_consequence_filter(self):
        """ check all the consequence values that should fail
        """
        
        vep_failing = ["splice_region_variant", \
            "incomplete_terminal_codon_variant", "synonymous_variant", \
            "stop_retained_variant", "mature_miRNA_variant", \
            "5_prime_UTR_variant", "3_prime_UTR_variant", \
            "non_coding_exon_variant", "nc_transcript_variant", \
            "intron_variant", "NMD_transcript_variant", \
            "upstream_gene_variant", "downstream_gene_variant", \
            "TFBS_ablation", "TFBS_amplification", "TF_binding_site_variant", \
            "regulatory_region_variant", "regulatory_region_ablation", \
            "regulatory_region_amplification", "feature_elongation", \
            "feature_truncation", "intergenic_variant"]
        
        # check all the failing consequences
        for cq in vep_failing:
            self.var.consequence = [cq]
            self.assertFalse(self.var.passes_filters())
    
    def test_passes_filters_with_debug(self):
        """ check that passes_filters_with_debug() generates a failure message
        """
        
        # make a variant that will fail the filtering, and set the site for
        # debugging
        self.var.info["AFR_AF"] = "0.05"
        self.var.debug_pos = self.var.get_position()
        
        # get ready to capture the output from a print function
        out = StringIO()
        sys.stdout = out
        
        # check that the variant fails (and secondarily prints the failure mode)
        self.assertFalse(self.var.passes_filters_with_debug())
        output = out.getvalue().strip()
        
        # check that the message about why the variant failed filtering is correct
        self.assertEqual(output, "failed MAF: 0.05")
Esempio n. 32
0
class TestVariantInfoPy(unittest.TestCase):
    """
    """
    
    def setUp(self):
        """ define a default VcfInfo object
        """
        
        chrom = "1"
        pos = "15000000"
        snp_id = "CM00001"
        ref = "A"
        alt = "G"
        filt = "PASS"
        
        # set up a SNV object, since SNV inherits VcfInfo
        self.var = SNV(chrom, pos, snp_id, ref, alt, filt)
        self.var.debug_chrom = "1"
        self.var.debug_pos = "15000000"
        
        self.default_info = "HGNC=ATRX;CQ=missense_variant;random_tag"
        
        # here are the default filtering criteria, as loaded into python
        known_genes = {"ATRX": {"inheritance": {"Hemizygous": \
            {"Loss of function"}}, "start": "10000000", "chrom": "1", \
            "confirmed_status": {"Confirmed DD Gene"}, "end": "20000000"}}
        
        SNV.known_genes = known_genes
        
        self.var.add_info(self.default_info)
    
    def test_set_gene_from_info(self):
        """ test that test_set_gene_from_info() works correctly
        """
        
        # check for when a HGNC key exists
        self.var.info["HGNC"] = "A"
        self.var.set_gene_from_info()
        self.assertEqual(self.var.genes, ["A"])
        
        # check for when a HGNC key doesn't exist
        del self.var.info["HGNC"]
        self.var.set_gene_from_info()
        self.assertIsNone(self.var.genes)
        
        # check for multiple gene symbols
        self.var.info["HGNC"] = "A|B|C"
        self.var.set_gene_from_info()
        self.assertEqual(self.var.genes, ["A", "B", "C"])
        
        # check for multiple gene symbols, when some are missing
        self.var.info["HGNC"] = "|.|C"
        self.var.set_gene_from_info()
        self.assertEqual(self.var.genes, [None, None, "C"])
        
        # check for multiple gene symbols, when some missing symbols have
        # alternates in other symbol fields.
        self.var.info["HGNC"] = ".|.|C"
        self.var.info["SYMBOL"] = "Z|.|C"
        self.var.set_gene_from_info()
        self.assertEqual(self.var.genes, ["Z", None, "C"])
        
        # Check that including alternate symbols has the correct precendence
        # order. Note that doing this properly would require checking all of the
        # possible order combinations.
        self.var.info["HGNC"] = ".|.|C"
        self.var.info["SYMBOL"] = "Z|.|C"
        self.var.info["ENSG"] = "A|.|C"
        self.var.set_gene_from_info()
        self.assertEqual(self.var.genes, ["Z", None, "C"])
    
    def test_is_lof(self):
        """ test that is_lof() works correctly
        """
        
        # check that known LOF consensequence return True
        self.var.consequence = ["stop_gained"]
        self.assertTrue(self.var.is_lof())
        
        # check that known non-LOF consensequence returns False
        self.var.consequence = ["missense_variant"]
        self.assertFalse(self.var.is_lof())
        
        # check that null values return False
        self.var.consequence = None
        self.assertFalse(self.var.is_lof())
        
        # check when the variant overlaps multiple genes (so has multiple
        # gene symbols and consequences).
        self.var.consequence = ["stop_gained", "missense_variant"]
        self.var.genes = ["ATRX", "TTN"]
        self.assertTrue(self.var.is_lof())
        self.assertTrue(self.var.is_lof("ATRX"))
        self.assertFalse(self.var.is_lof("TTN"))
    
    def test_correct_multiple_alt(self):
        """ test that correct_multiple_alt works correctly
        """
        
        # define the number of alleles and consequences for multiple alleles
        self.var.info["AC"] = "1,1"
        cq = ["missense_variant,splice_acceptor_variant"]
        
        # check with alts that fall in one gene
        self.var.info["HGNC"] = "ATRX,ATRX"
        self.var.set_gene_from_info()
        self.assertEqual(self.var.correct_multiple_alt(cq),
            (['splice_acceptor_variant'], ['ATRX'], None))
        
        # check with alts that fall in multiple genes
        cq = ["missense_variant|regulatory_region_variant,stop_gained|splice_acceptor_variant"]
        self.var.info["HGNC"] = "ATRX|TTN,ATRX|TTN"
        self.var.set_gene_from_info()
        self.assertEqual(self.var.correct_multiple_alt(cq),
            (['stop_gained', 'splice_acceptor_variant'], ['ATRX', 'TTN'], None))
        
        # check a cq that has already been split by "|" (ie by gene)
        cq = ["missense_variant", "regulatory_region_variant,stop_gained",
            "splice_acceptor_variant"]
        self.var.set_gene_from_info()
        self.assertEqual(self.var.correct_multiple_alt(cq),
            (['stop_gained', 'splice_acceptor_variant'], ['ATRX', 'TTN'], None))
        
        # check that if the proband has a zero count for an allele, then we
        # disregard the consequences and HGNC symbols for that allele
        self.var.info["AC"] = "1,0"
        self.var.set_gene_from_info()
        self.assertEqual(self.var.correct_multiple_alt(cq),
            (['missense_variant', 'regulatory_region_variant'], ['ATRX', 'TTN'], None))
        
        # revert the allele counts, but drop the HGNC symbol, and make sure the
        # HGNC symbol returned is None
        self.var.info["AC"] = "1,1"
        del self.var.info["HGNC"]
        self.var.set_gene_from_info()
        self.assertEqual(self.var.correct_multiple_alt(cq),
            (['stop_gained', 'splice_acceptor_variant'], [], None))
    
    def test_get_most_severe_consequence(self):
        """ test that get_most_severe_consequence works correctly
        """
        
        # check for the most simple list
        cq = ["missense_variant", "splice_acceptor_variant"]
        self.assertEqual(self.var.get_most_severe_consequence(cq), \
            "splice_acceptor_variant")
        
        # check for a single-entry list
        cq = ["missense_variant"]
        self.assertEqual(self.var.get_most_severe_consequence(cq), "missense_variant")
        
        # check for lists of lists per allele
        cq_per_allele = [["synonymous_variant", "splice_donor_variant"], \
            ["missense_variant", "regulatory_region_variant"]]
        self.assertEqual(self.var.get_most_severe_consequence(cq_per_allele), \
            ["missense_variant", "splice_donor_variant"])
    
    def test_get_per_gene_consequence(self):
        """ test that get_per_gene_consequence works correctly
        """
        
        self.var.genes = ["ATRX"]
        self.var.consequence = ["missense_variant"]
        
        self.assertEqual(self.var.get_per_gene_consequence(None), ["missense_variant"])
        self.assertEqual(self.var.get_per_gene_consequence("ATRX"), ["missense_variant"])
        self.assertEqual(self.var.get_per_gene_consequence("TEST"), [])
        
        # check a variant with consequences in multiple genes, that we only
        # pull out the consequencesquences for a single gene
        self.var.genes = ["ATRX", "TTN"]
        self.var.consequence = ["missense_variant", "synonymous_variant"]
        self.assertEqual(self.var.get_per_gene_consequence("ATRX"), ["missense_variant"])
        self.assertEqual(self.var.get_per_gene_consequence("TTN"), ["synonymous_variant"])
        
        # check a symbol where two symbols match
        self.var.genes = ["TEMP", "ATRX", "TEMP"]
        self.var.consequence = ["splice_acceptor_variant", "missense_variant", \
            "synonymous_variant"]
        self.assertEqual(self.var.get_per_gene_consequence("TEMP"), \
            ["splice_acceptor_variant", "synonymous_variant"])
        
        # check a symbol with some None gene symbols
        self.var.genes = [None, "ATRX", None]
        self.var.consequence = ["splice_acceptor_variant", "missense_variant", \
            "synonymous_variant"]
        self.assertEqual(self.var.get_per_gene_consequence("ATRX"), \
            ["missense_variant"])
        
        # check that the earlier VCFs with single consequences but multiple
        # symbols from HGNC_ALL give the same consequence for all genes.
        info = "HGNC_ALL=ATRX&TTN;CQ=missense_variant;random_tag"
        del self.var.info["HGNC"]
        self.var.genes = None
        self.var.add_info(info)
        
        self.assertEqual(self.var.get_per_gene_consequence("ATRX"), \
            ["missense_variant"])
        self.assertEqual(self.var.get_per_gene_consequence("TTN"), \
            ["missense_variant"])
        
    def test_get_allele_frequency(self):
        """ tests that number conversion works as expected
        """
        
        # single number returns that number
        self.assertEqual(self.var.get_allele_frequency("1"), 1)
        
        # two numbers return one number
        self.assertEqual(self.var.get_allele_frequency("1,1"), 1)
        
        # two numbers return the highest number
        self.assertEqual(self.var.get_allele_frequency("1,2"), 2)
        
        # number and string return the number
        self.assertEqual(self.var.get_allele_frequency("a,1"), 1)
        
        # single string value returns None
        self.assertEqual(self.var.get_allele_frequency("a"), None)
        
        # multiple string values return None
        self.assertEqual(self.var.get_allele_frequency("a,b"), None)
    
    def test_is_number(self):
        """ tests that we can check if a value represents a number
        """
        
        self.assertEqual(self.var.is_number(None), False)
        self.assertEqual(self.var.is_number("5"), True)
        self.assertEqual(self.var.is_number("a"), False)
    
    def test_find_max_allele_frequency(self):
        """ test if the MAF finder operates correctly
        """
        
        # check for var without recorded MAF
        self.assertIsNone(self.var.find_max_allele_frequency())
        
        # check for single population
        self.var.info["MAX_AF"] = "0.005"
        self.assertEqual(self.var.find_max_allele_frequency(), 0.005)
        
        # check for two populations
        self.var.info["AFR_AF"] = "0.01"
        self.assertEqual(self.var.find_max_allele_frequency(), 0.01)
        
        # check for all populations
        pops = set(["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", \
            "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"])
        for pop in pops:
            self.var.info[pop] = "0.05"
            self.assertEqual(self.var.find_max_allele_frequency(), 0.05)