예제 #1
0
    def setUp(self):
        """ define a default VcfInfo object
        """

        chrom = "1"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "<DUP>"
        qual = "1000"
        filt = "PASS"

        info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;" \
            "CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;" \
            "MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000"
        keys = "inheritance:DP"
        values = "deNovo:50"
        sex = "F"

        # set up a CNV object
        self.var = CNV(chrom,
                       pos,
                       snp_id,
                       ref,
                       alt,
                       qual,
                       filt,
                       info=info,
                       format=keys,
                       sample=values,
                       gender=sex)
예제 #2
0
    def create_cnv(self, gender, inh, chrom, pos, cq=None):
        """ create a default variant
        """

        snp_id = "."
        ref = "A"
        alt = "<DEL>"
        filt = "PASS"

        if cq is None:
            cq = "transcript_ablation"

        # set up a SNV object, since SNV inherits VcfInfo
        var = CNV(chrom, pos, snp_id, ref, alt, filt)

        info = "CQ={};HGNC=TEST;HGNC_ALL=TEST;END=16000000;SVLEN=5000".format(
            cq)
        format_keys = "INHERITANCE:DP"
        sample_values = inh + ":50"

        var.add_info(info)
        var.add_format(format_keys, sample_values)
        var.set_gender(gender)
        var.set_genotype()

        return var
예제 #3
0
    def create_cnv(self,
                   chrom,
                   info=None,
                   pos='15000000',
                   snp_id='.',
                   ref='A',
                   alt='<DUP>',
                   qual='1000',
                   filt='PASS',
                   **kwargs):

        if info is None:
            info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;CNSOLIDATE;' \
                'WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;MEANLR2=0.5;' \
                'MADL2R=0.02;END=16000000;SVLEN=1000000"

        keys = "inheritance:DP"
        values = "deNovo:50"

        return CNV(chrom,
                   pos,
                   snp_id,
                   ref,
                   alt,
                   qual,
                   filt,
                   info=info,
                   format=keys,
                   sample=values,
                   gender='male',
                   **kwargs)
예제 #4
0
    def test_construct_variant(self):
        """ test that construct_variant() works correctly
        """

        # check that construct variant works for SNVs
        line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"]
        gender = "M"
        test_var = SNV(*line[:6])

        variant = self.vcf_loader.construct_variant(line, gender)

        self.assertEqual(variant.get_key(), test_var.get_key())
        # initally constructing a SNV shouldn't affect the format variable
        self.assertEqual(variant.format, None)

        # check that construct variant works for CNVs
        line = [
            "1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT",
            "0/1"
        ]
        gender = "M"
        test_var = CNV(*line[:6])
        test_var.add_info(line[7])

        variant = self.vcf_loader.construct_variant(line, gender)

        self.assertEqual(variant.get_key(), test_var.get_key())
        self.assertNotEqual(variant.format, None)
예제 #5
0
    def test_construct_variant(self):
        """ test that construct_variant() works correctly
        """

        # check that construct variant works for SNVs
        line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"]
        gender = "M"
        test_var = SNV(*line, gender=gender)

        variant = construct_variant(line, gender)

        self.assertEqual(variant.get_key(), test_var.get_key())
        self.assertEqual(variant.format, {'GT': '0/1'})

        # check that construct variant works for CNVs
        line = [
            "1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT",
            "0/1"
        ]
        gender = "M"
        test_var = CNV(*line, gender=gender)

        variant = construct_variant(line, gender)

        self.assertEqual(variant.get_key(), test_var.get_key())
        self.assertEqual(variant.format, {'GT': '0/1'})
예제 #6
0
 def test_include_variant(self):
     """ check that include_variant() works correctly
     """
     
     mnvs = {}
     child_variants = False
     gender = "M"
     # make a child var which passes the filters
     line = ["1", "100", ".", "T", "A", "1000", "PASS", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender, mnvs))
     
     # make a child var that fails the filters, which should return False
     line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender, mnvs))
     
     # now check for parents variants
     child_variants = True
     # check a parents var, where we have a matching child var
     self.vcf_loader.child_keys = set([("1", 100), ("X", 200)])
     line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender, mnvs))
     
     # check a parents var, where we don't have a matching child var
     line = ["1", "200", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender, mnvs))
     
     # and check parental CNVs
     line = ["1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1"]
     gender = "M"
     test_var = CNV(*line[:6])
     test_var.add_info(line[7])
     
     # in this function we look for overlap in CNVs. Set up a child CNV
     # that the parents CNV must match.
     self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender, mnvs))
     
     # check that a parental CNV without any overlap to any childs CNVs,
     # fails to pass
     line = ["1", "300", ".", "T", "<DEL>", "1000", "PASS", "END=400", "GT", "0/1"]
     gender = "M"
     self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender, mnvs))
예제 #7
0
    def test_get_parental_var_cnv(self):
        ''' check that get_parental_var() works correctly for CNVs
        '''

        sex = 'F'
        var = create_cnv(sex, 'deNovo')
        mom = Person('fam_id', 'mom', '0', '0', 'F', '1', '/PATH')
        parental_vars = []

        self.assertEqual(
            get_parental_var(var, parental_vars, mom),
            CNV(chrom="1",
                position=150,
                id=".",
                ref="A",
                alts="<REF>",
                qual='1000',
                filter="PASS",
                info=str(var.info),
                format='INHERITANCE',
                sample='uncertain',
                gender="female",
                mnv_code=None))

        # check that even if a CNV exist in the parent at a matching site, we
        # still create a new CNV objectr for the parent
        mother_var = create_cnv(sex, 'uncertain')
        self.assertEqual(
            get_parental_var(var, [mother_var], mom),
            CNV(chrom="1",
                position=150,
                id=".",
                ref="A",
                alts="<REF>",
                qual='1000',
                filter="PASS",
                info=str(var.info),
                format='INHERITANCE',
                sample='uncertain',
                gender="female",
                mnv_code=None))
예제 #8
0
 def test_include_variant(self):
     """ check that include_variant() works correctly
     """
     
     child_variants = False
     gender = "M"
     # make a child var which passes the filters
     line = ["1", "100", ".", "T", "A", "1000", "PASS", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender))
     
     # make a child var that fails the filters, which should return False
     line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender))
     
     # now check for parents variants
     child_variants = True
     # check a parents var, where we have a matching child var
     self.vcf_loader.child_keys = set([("1", 100), ("X", 200)])
     line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender))
     
     # check a parents var, where we don't have a matching child var
     line = ["1", "200", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender))
     
     # and check parental CNVs
     line = ["1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1"]
     gender = "M"
     test_var = CNV(*line[:6])
     test_var.add_info(line[7])
     
     # in this function we look for overlap in CNVs. Set up a child CNV
     # that the parents CNV must match.
     self.vcf_loader.cnv_matcher = MatchCNVs([test_var])
     self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender))
     
     # check that a parental CNV without any overlap to any childs CNVs,
     # fails to pass
     line = ["1", "300", ".", "T", "<DEL>", "1000", "PASS", "END=400", "GT", "0/1"]
     gender = "M"
     self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender))
예제 #9
0
    def create_cnv(self, chrom):

        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "<DUP>"
        filt = "PASS"

        # set up a SNV object, since SNV inherits VcfInfo
        var = CNV(chrom, pos, snp_id, ref, alt, filt)

        info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000"
        format_keys = "inheritance:DP"
        sample_values = "deNovo:50"

        var.add_info(info)
        var.add_format(format_keys, sample_values)
        var.set_gender("F")
        var.set_genotype()

        return var
예제 #10
0
 def setUp(self):
     """ define a default VcfInfo object
     """
     
     chrom = "1"
     pos = "15000000"
     snp_id = "."
     ref = "A"
     alt = "<DUP>"
     filt = "PASS"
     
     # set up a SNV object, since SNV inherits VcfInfo
     self.var = CNV(chrom, pos, snp_id, ref, alt, filt)
     
     info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000"
     format_keys = "inheritance:DP"
     sample_values = "deNovo:50"
     
     self.var.add_info(info)
     self.var.add_format(format_keys, sample_values)
     self.var.set_gender("F")
예제 #11
0
    def create_cnv(self, gender, inh, chrom, pos):
        """ create a default variant
        """

        snp_id = "."
        ref = "A"
        alt = "<DUP>"
        filt = "PASS"

        # set up a SNV object, since SNV inherits VcfInfo
        var = CNV(chrom, pos, snp_id, ref, alt, filt)

        info = "HGNC=TEST;HGNC_ALL=TEST;END=16000000;SVLEN=5000"
        format_keys = "INHERITANCE:DP"
        sample_values = inh + ":50"

        var.add_info(info)
        var.add_format(format_keys, sample_values)
        var.set_gender(gender)
        var.set_genotype()

        return var
예제 #12
0
 def test_get_parental_var_cnv_maternally_inherited(self):
     ''' test that we can construct a maternally inherited CNV
     '''
     
     sex = 'F'
     mom = Person('fam_id', 'mom', '0', '0', 'F', '1', '/PATH')
     
     # check that even if a CNV exist in the parent at a matching site, we
     # still create a new CNV object for the parent
     var = create_cnv(sex, 'maternal')
     self.assertEqual(get_parental_var(var, [], mom),
         CNV(chrom="1", position=150, id=".", ref="A",
             alts="<DUP>", qual='1000',filter="PASS", info=str(var.info),
             format='INHERITANCE', sample='uncertain', gender="female",
             mnv_code=None))
 def create_cnv(self, gender, inh, cifer, chrom, pos):
     """ create a default variant
     """
     
     snp_id = "."
     ref = "A"
     alt = "<DUP>"
     filt = "PASS"
     
     # set up a SNV object, since SNV inherits VcfInfo
     var = CNV(chrom, pos, snp_id, ref, alt, filt)
     
     info = "HGNC=TEST;HGNC_ALL=TEST;END=16000000;SVLEN=5000;CNS=3"
     format_keys = "CIFER:INHERITANCE:DP"
     sample_values = cifer + ":" + inh + ":50"
     
     var.add_info(info)
     var.add_format(format_keys, sample_values)
     var.set_gender(gender)
     var.set_genotype()
     
     return var
예제 #14
0
 def setUp(self):
     """ define a default VcfInfo object
     """
     
     chrom = "1"
     pos = "15000000"
     snp_id = "."
     ref = "A"
     alt = "<DUP>"
     filt = "PASS"
     
     info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;" \
         "CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;" \
         "MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000"
     keys = "inheritance:DP"
     values = "deNovo:50"
     sex = "F"
     
     # set up a CNV object
     self.var = CNV(chrom, pos, snp_id, ref, alt, filt, info=info,
         format=keys, sample=values, gender=sex)
예제 #15
0
 def test_include_variant(self):
     """ check that include_variant() works correctly
     """
     
     mnvs = {}
     child_keys = None
     gender = "M"
     sum_x_l2r = {}
     # make a child var which passes the filters
     line = ["1", "100", ".", "T", "A", "1000", "PASS", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertTrue(include_variant(line, child_keys, gender, mnvs, sum_x_l2r))
     
     # make a child var that fails the filters, which should return False
     line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertFalse(include_variant(line, child_keys, gender, mnvs, sum_x_l2r))
     
     # now check for parents variants
     # check a parents var, where we have a matching child var
     child_keys = set([("1", 100), ("X", 200)])
     line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertTrue(include_variant(line, child_keys, gender, mnvs, sum_x_l2r))
     
     # check a parents var, where we don't have a matching child var
     line = ["1", "200", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"]
     self.assertFalse(include_variant(line, child_keys, gender, mnvs, sum_x_l2r))
     
     # and check parental CNVs
     line = ["1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1"]
     gender = "M"
     test_var = CNV(*line)
     
     # in this function we look for overlap in CNVs. Set up a child CNV
     # that the parents CNV must match.
     self.assertTrue(include_variant(line, child_keys, gender, mnvs, sum_x_l2r))
     
     # check that a parental CNV without any overlap to any childs CNVs,
     # fails to pass
     line = ["1", "300", ".", "T", "<DEL>", "1000", "PASS", "END=400", "GT", "0/1"]
     gender = "M"
     self.assertFalse(include_variant(line, child_keys, gender, mnvs, sum_x_l2r))
 def setUp(self):
     """ define a default VcfInfo object
     """
     
     chrom = "1"
     pos = "15000000"
     snp_id = "."
     ref = "A"
     alt = "<DUP>"
     filt = "PASS"
     
     # set up a SNV object, since SNV inherits VcfInfo
     cnv = CNV(chrom, pos, snp_id, ref, alt, filt)
     self.var = ExomeCNV(cnv)
     
     info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;CONVEX;RC50INTERNALFREQ=0.005;COMMONFORWARDS=0.000;MEANLR2=0.5;MADL2R=0.02"
     format_keys = "inheritance:DP"
     sample_values = "deNovo:50"
     
     self.var.cnv.add_info(info)
     self.var.cnv.add_format(format_keys, sample_values)
     self.var.cnv.set_gender("F")
예제 #17
0
    def get_parental_var(self, var, parental_vars, gender, matcher):
        """ get the corresponding parental variant to a childs variant, or
        create a default variant with reference genotype.
        
        Args:
            var: childs var, as Variant object
            parental_vars: list of parental variants
            gender: gender of the parent
            matcher: cnv matcher for parent
        
        Returns:
            returns a Variant object, matched to the proband's variant
        """

        key = var.get_key()

        # if the variant is a CNV, the corresponding variant might not match
        # the start site, so we look a variant that overlaps
        if isinstance(var, CNV) and matcher.has_match(var):
            key = matcher.get_overlap_key(key)

        for parental in parental_vars:
            if key == parental.get_key():
                return parental

        # if the childs variant does not exist in the parents VCF, then we
        # create a default variant for the parent
        if isinstance(var, CNV):
            parental = CNV(var.chrom, var.position, var.variant_id,
                           var.ref_allele, var.alt_allele, var.filter)
        else:
            parental = SNV(var.chrom, var.position, var.variant_id,
                           var.ref_allele, var.alt_allele, var.filter)

        parental.set_gender(gender)
        parental.set_default_genotype()

        return parental
예제 #18
0
 def get_parental_var(self, var, parental_vars, gender, matcher):
     """ get the corresponding parental variant to a childs variant, or
     create a default variant with reference genotype.
     
     Args:
         var: childs var, as Variant object
         parental_vars: list of parental variants
         gender: gender of the parent
         matcher: cnv matcher for parent
     
     Returns:
         returns a Variant object, matched to the proband's variant
     """
     
     key = var.get_key()
     
     # if the variant is a CNV, the corresponding variant might not match
     # the start site, so we look a variant that overlaps
     if isinstance(var, CNV) and matcher.has_match(var):
         key = matcher.get_overlap_key(key)
         
     for parental in parental_vars:
         if key == parental.get_key():
             return parental
     
     # if the childs variant does not exist in the parents VCF, then we
     # create a default variant for the parent
     if isinstance(var, CNV):
         parental = CNV(var.chrom, var.position, var.variant_id, var.ref_allele, var.alt_allele, var.filter)
     else:
         parental = SNV(var.chrom, var.position, var.variant_id, var.ref_allele, var.alt_allele, var.filter)
     
     parental.set_gender(gender)
     parental.set_default_genotype()
     
     return parental
예제 #19
0
    def construct_variant(self, line, gender):
        """ constructs a Variant object for a VCF line, specific to the variant type
        
        Args:
            line: list of elements of a single sample VCF line:
                [chrom, position, snp_id, ref_allele, alt_allele, quality,
                filter_value, info, format_keys, format_values]
            gender: gender of the individual to whom the variant line belongs
                (eg "1" or "M" for male, "2", or "F" for female).
        
        Returns:
            returns a Variant object
        """

        # CNVs are found by their alt_allele values, as either <DUP>, or <DEL>
        if line[4] == "<DUP>" or line[4] == "<DEL>":
            var = CNV(line[0], line[1], line[2], line[3], line[4], line[6])
            var.add_info(line[7])
            # CNVs require the format values for filtering
            var.set_gender(gender)
            var.add_format(line[8], line[9])
            if self.known_genes is not None:
                var.fix_gene_IDs()
        else:
            var = SNV(line[0], line[1], line[2], line[3], line[4], line[6])
            var.add_info(line[7])

        return var
예제 #20
0
class TestVariantCnvPy(unittest.TestCase):
    """
    """
    
    def setUp(self):
        """ define a default VcfInfo object
        """
        
        chrom = "1"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "<DUP>"
        filt = "PASS"
        
        # set up a SNV object, since SNV inherits VcfInfo
        self.var = CNV(chrom, pos, snp_id, ref, alt, filt)
        
        info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000"
        format_keys = "inheritance:DP"
        sample_values = "deNovo:50"
        
        self.var.add_info(info)
        self.var.add_format(format_keys, sample_values)
        self.var.set_gender("F")
    
    def test_set_genotype(self):
        """ test that set_genotype() operates correctly
        """
        
        # check that DUPs are set correctly
        self.var.alt_allele = "<DUP>"
        self.var.set_genotype()
        self.assertEqual(self.var.genotype, "DUP")
        
        # check that DELs are set correctly
        self.var.alt_allele = "<DEL>"
        self.var.set_genotype()
        self.assertEqual(self.var.genotype, "DEL")
        
        # check that other genotypes raise an error
        self.var.alt_allele = "G"
        with self.assertRaises(ValueError):
            self.var.set_genotype()
        
        # and check that we raise an error for female Y chrom CNVs
        self.var.chrom = "Y"
        self.var.set_gender("F")
        with self.assertRaises(ValueError):
            self.var.set_genotype()
    
    def test_set_genotype_pseudoautosomal(self):
        """ test that set_genotype() works correctly in pseudoautosomal regions
        """
        
        pseudoautosomal_region_start = 60002
        pseudoautosomal_region_end = 2699520
        
        # set a CNV that lies within a pseudoautosomal region
        self.var.chrom = "X"
        self.var.position = pseudoautosomal_region_start + 1000
        self.var.info["END"] = pseudoautosomal_region_end - 1000
        self.var.set_gender("F")
        
        self.var.alt_allele = "<DUP>"
        self.var.set_genotype()
        self.assertEqual(self.var.genotype, "DUP")
        self.assertEqual(self.var.get_inheritance_type(), "autosomal")
        
    def test_get_range(self):
        """ test that get_range() operates correctly
        """
        
        # check that range is set correctly under normal function
        self.var.position = 1000
        self.var.info["END"] = "2000"
        self.assertEqual(self.var.get_range(), (1000, 2000))
        
        # check that range is set correctly when no info available
        self.var.info = {}
        self.assertEqual(self.var.get_range(), (1000, 11000))
    
    def test_fix_gene_IDs(self):
        """ test that fix_gene_IDs() works correctly
        """
        
        self.var.known_genes = {"TEST": {"start": 1000, "end": 2000, "chrom": "5"}}
        
        # make a CNV that will overlap with the known gene set
        self.var.genes = ["TEST"]
        self.var.position = 1000
        self.var.info["END"] = "1500"
        
        # check that fixing gene names does not alter anything for a CNV in a
        # single known gene
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.genes, ["TEST"])
        
        # check that fixing gene names does not alter names not in the gene dict
        self.var.genes = ["TEST", "TEST2"]
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.genes, ["TEST", "TEST2"])
        
        # check that fixing gene names drop name of genes where the name is in
        # the known genes dict, and the CNV and gene do not overlap
        self.var.position = 900
        self.var.info["END"] = "950"
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.genes, [".", "TEST2"])
        
        # check that when we do not have any known genes, the gene names are
        # unaltered
        self.var.genes = ["TEST", "TEST2"]
        self.var.known_genes = None
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.genes, ["TEST", "TEST2"])
    
    def test_set_gene_from_info_cnv(self):
        """ test that set_add_gene_from_info() works correctly
        """
        
        # make sure the known genes are None, otherwise sometimes the values
        # from test_variant_info.py unit tests can bleed through. I'm not sure
        # why!
        self.var.known_genes = None
        
        # check that HGNC takes precedence
        self.var.info["HGNC"] = "A"
        self.var.info["HGNC_ALL"] = "B"
        self.var.set_gene_from_info()
        self.assertEqual(self.var.genes, ["A"])
        
        # check that HGNC is used in the absence of HGNC_ALL
        del self.var.info["HGNC"]
        self.var.set_gene_from_info()
        self.assertEqual(self.var.genes, ["B"])
        
        # check that when HGNC and HGNC_ALL are undefined, we can still include
        # CNVs overlapping genes through NUMBERGENES > 0.
        del self.var.info["HGNC_ALL"]
        
        # first test for NUMBERGENES = 0
        self.var.info["NUMBERGENES"] = 0
        self.var.set_gene_from_info()
        self.assertIsNone(self.var.genes)
        
        # and then make sure we are correct for NUMBERGENES > 0
        self.var.info["NUMBERGENES"] = 1
        self.var.set_gene_from_info()
        self.assertEqual(self.var.genes, ["."])
        
        # finally check for no HGNC, HGNC_ALL, or NUMBERGENES
        del self.var.info["NUMBERGENES"]
        self.var.set_gene_from_info()
        self.assertEqual(self.var.genes, "1:15000000")
    
    def test_get_genes(self):
        """ test that get_genes() works correctly
        """
        
        self.var.genes = None
        self.assertEqual(self.var.get_genes(), [])
        
        self.var.genes = ["TEST"]
        self.assertEqual(self.var.get_genes(), ["TEST"])
        
        self.var.genes = ["TEST1", "TEST2"]
        self.assertEqual(self.var.get_genes(), ["TEST1", "TEST2"])
        
        self.var.genes = ["."]
        self.assertEqual(self.var.get_genes(), ["."])
    
    def test_fails_y_chrom_female(self):
        """ test that passes_filters() works correctly for female Y chrom CNVs
        """
        
        self.var.chrom = "Y"
        self.var.set_gender("F")
        
        self.assertFalse(self.var.passes_filters())
예제 #21
0
 def construct_variant(self, line, gender):
     """ constructs a Variant object for a VCF line, specific to the variant type
     
     Args:
         line: list of elements of a single sample VCF line:
             [chrom, position, snp_id, ref_allele, alt_allele, quality,
             filter_value, info, format_keys, format_values]
         gender: gender of the individual to whom the variant line belongs
             (eg "1" or "M" for male, "2", or "F" for female).
     
     Returns:
         returns a Variant object
     """
     
     # CNVs are found by their alt_allele values, as either <DUP>, or <DEL>
     if line[4] == "<DUP>" or line[4] == "<DEL>":
         var = CNV(line[0], line[1], line[2], line[3], line[4], line[6])
         var.add_info(line[7])
         # CNVs require the format values for filtering
         var.set_gender(gender)
         var.add_format(line[8], line[9])
         if self.known_genes is not None:
             var.fix_gene_IDs()
     else:
         var = SNV(line[0], line[1], line[2], line[3], line[4], line[6])
         var.add_info(line[7])
     
     return var
예제 #22
0
class TestVariantCnvPy(unittest.TestCase):
    """ unit testing of the CNV class
    """
    def setUp(self):
        """ define a default VcfInfo object
        """

        chrom = "1"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "<DUP>"
        qual = "1000"
        filt = "PASS"

        info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;" \
            "CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;" \
            "MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000"
        keys = "inheritance:DP"
        values = "deNovo:50"
        sex = "F"

        # set up a CNV object
        self.var = CNV(chrom,
                       pos,
                       snp_id,
                       ref,
                       alt,
                       qual,
                       filt,
                       info=info,
                       format=keys,
                       sample=values,
                       gender=sex)

    def test_set_genotype(self):
        """ test that set_genotype() operates correctly
        """

        # check that DUPs are set correctly
        self.var.alt_alleles = ["<DUP>"]
        self.var.set_genotype()
        self.assertEqual(self.var.genotype, "DUP")

        # check that DELs are set correctly
        self.var.alt_alleles = ["<DEL>"]
        self.var.set_genotype()
        self.assertEqual(self.var.genotype, "DEL")

        # check that other genotypes raise an error
        self.var.alt_alleles = ["G"]
        with self.assertRaises(ValueError):
            self.var.set_genotype()

        # and check that we raise an error for female Y chrom CNVs
        self.var.chrom = "Y"
        self.var._set_gender("F")
        with self.assertRaises(ValueError):
            self.var.set_genotype()

    def test_set_genotype_pseudoautosomal(self):
        """ test that set_genotype() works correctly in pseudoautosomal regions
        """

        pseudoautosomal_region_start = 60002
        pseudoautosomal_region_end = 2699520

        # set a CNV that lies within a pseudoautosomal region
        self.var.chrom = "X"
        self.var.position = pseudoautosomal_region_start + 1000
        self.var.info["END"] = pseudoautosomal_region_end - 1000
        self.var._set_gender("F")

        self.var.alt_alleles = ["<DUP>"]
        self.var.set_genotype()
        self.assertEqual(self.var.genotype, "DUP")
        self.assertEqual(self.var.get_inheritance_type(), "autosomal")

    def test_get_range(self):
        """ test that get_range() operates correctly
        """

        # check that range is set correctly under normal function
        self.var.position = 1000
        self.var.info["END"] = "2000"
        self.assertEqual(self.var.get_range(), (1000, 2000))

        # check that range is set correctly when no info available
        self.var.info = {}
        self.assertEqual(self.var.get_range(), (1000, 11000))

    def test_fix_gene_IDs(self):
        """ test that fix_gene_IDs() works correctly
        """

        self.var.known_genes = {
            "TEST": {
                "start": 1000,
                "end": 2000,
                "chrom": "5"
            }
        }

        # make a CNV that will overlap with the known gene set
        self.var.info.symbols = [Symbols(info={'HGNC_ID': 'TEST'}, idx=0)]
        self.var.position = 1000
        self.var.info["END"] = "1500"

        # check that fixing gene names does not alter anything for a CNV in a
        # single known gene
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.info.get_genes(), [['TEST']])

        # check that fixing gene names does not alter names not in the gene dict
        self.var.info.symbols = [
            Symbols(info={'HGNC_ID': 'TEST|TEST2'}, idx=0)
        ]
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.info.get_genes(), [['TEST', 'TEST2']])

        # check that fixing gene names drop name of genes where the name is in
        # the known genes dict, and the CNV and gene do not overlap
        self.var.position = 900
        self.var.info["END"] = "950"
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.info.get_genes(), [[None, 'TEST2']])

        # check that when we do not have any known genes, the gene names are
        # unaltered
        self.var.info.symbols = [
            Symbols(info={'HGNC_ID': 'TEST|TEST2'}, idx=0)
        ]
        self.var.known_genes = None
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.info.get_genes(), [['TEST', 'TEST2']])

    def test_set_gene_from_info_cnv(self):
        """ test that set_add_gene_from_info() works correctly
        """

        # make sure the known genes are None, otherwise sometimes the values
        # from test_variant_info.py unit tests can bleed through. I'm not sure
        # why!
        self.var.known_genes = None

        # check that HGNC takes precedence
        self.var.info["HGNC"] = "A"
        genes = self.var.info.parse_gene_symbols(self.var.alt_alleles, [])
        self.assertEqual(genes, [Symbols(info={'HGNC': 'A'}, idx=0)])

        # check that HGNC_ALL doesn't affect anything
        self.var.info["HGNC_ALL"] = "B"
        del self.var.info["HGNC"]
        genes = self.var.info.parse_gene_symbols(self.var.alt_alleles, [])
        self.assertEqual(genes, [Symbols(info={}, idx=0)])

    def test_get_genes(self):
        """ test that get_genes() works correctly
        """

        self.var.info.symbols = [Symbols(info={}, idx=0)]
        self.assertEqual(self.var.info.get_genes(), [[]])

        self.var.info.symbols = [Symbols(info={'HGNC': 'TEST'}, idx=0)]
        self.assertEqual(self.var.info.get_genes(), [["TEST"]])

        self.var.info.symbols = [Symbols(info={'HGNC': 'TEST1|TEST2'}, idx=0)]
        self.assertEqual(self.var.info.get_genes(), [["TEST1", "TEST2"]])

        self.var.info.symbols = [Symbols(info={'HGNC': '.'}, idx=0)]
        self.assertEqual(self.var.info.get_genes(), [[None]])

    def test_fails_y_chrom_female(self):
        """ test that passes_filters() works correctly for female Y chrom CNVs
        """

        self.var.chrom = "Y"
        self.var._set_gender("F")

        self.assertFalse(self.var.passes_filters())
예제 #23
0
class TestVariantCnvPy(unittest.TestCase):
    """ unit testing of the CNV class
    """
    
    def setUp(self):
        """ define a default VcfInfo object
        """
        
        chrom = "1"
        pos = "15000000"
        snp_id = "."
        ref = "A"
        alt = "<DUP>"
        filt = "PASS"
        
        info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;" \
            "CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;" \
            "MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000"
        keys = "inheritance:DP"
        values = "deNovo:50"
        sex = "F"
        
        # set up a CNV object
        self.var = CNV(chrom, pos, snp_id, ref, alt, filt, info=info,
            format=keys, sample=values, gender=sex)
    
    def test_set_genotype(self):
        """ test that set_genotype() operates correctly
        """
        
        # check that DUPs are set correctly
        self.var.alt_alleles = ["<DUP>"]
        self.var.set_genotype()
        self.assertEqual(self.var.genotype, "DUP")
        
        # check that DELs are set correctly
        self.var.alt_alleles = ["<DEL>"]
        self.var.set_genotype()
        self.assertEqual(self.var.genotype, "DEL")
        
        # check that other genotypes raise an error
        self.var.alt_alleles = ["G"]
        with self.assertRaises(ValueError):
            self.var.set_genotype()
        
        # and check that we raise an error for female Y chrom CNVs
        self.var.chrom = "Y"
        self.var._set_gender("F")
        with self.assertRaises(ValueError):
            self.var.set_genotype()
    
    def test_set_genotype_pseudoautosomal(self):
        """ test that set_genotype() works correctly in pseudoautosomal regions
        """
        
        pseudoautosomal_region_start = 60002
        pseudoautosomal_region_end = 2699520
        
        # set a CNV that lies within a pseudoautosomal region
        self.var.chrom = "X"
        self.var.position = pseudoautosomal_region_start + 1000
        self.var.info["END"] = pseudoautosomal_region_end - 1000
        self.var._set_gender("F")
        
        self.var.alt_alleles = ["<DUP>"]
        self.var.set_genotype()
        self.assertEqual(self.var.genotype, "DUP")
        self.assertEqual(self.var.get_inheritance_type(), "autosomal")
        
    def test_get_range(self):
        """ test that get_range() operates correctly
        """
        
        # check that range is set correctly under normal function
        self.var.position = 1000
        self.var.info["END"] = "2000"
        self.assertEqual(self.var.get_range(), (1000, 2000))
        
        # check that range is set correctly when no info available
        self.var.info = {}
        self.assertEqual(self.var.get_range(), (1000, 11000))
    
    def test_fix_gene_IDs(self):
        """ test that fix_gene_IDs() works correctly
        """
        
        self.var.known_genes = {"TEST": {"start": 1000, "end": 2000, "chrom": "5"}}
        
        # make a CNV that will overlap with the known gene set
        self.var.genes = [["TEST"]]
        self.var.position = 1000
        self.var.info["END"] = "1500"
        
        # check that fixing gene names does not alter anything for a CNV in a
        # single known gene
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.genes, [["TEST"]])
        
        # check that fixing gene names does not alter names not in the gene dict
        self.var.genes = [["TEST", "TEST2"]]
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.genes, [["TEST", "TEST2"]])
        
        # check that fixing gene names drop name of genes where the name is in
        # the known genes dict, and the CNV and gene do not overlap
        self.var.position = 900
        self.var.info["END"] = "950"
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.genes, [[".", "TEST2"]])
        
        # check that when we do not have any known genes, the gene names are
        # unaltered
        self.var.genes = [["TEST", "TEST2"]]
        self.var.known_genes = None
        self.var.fix_gene_IDs()
        self.assertEqual(self.var.genes, [["TEST", "TEST2"]])
    
    def test_set_gene_from_info_cnv(self):
        """ test that set_add_gene_from_info() works correctly
        """
        
        # make sure the known genes are None, otherwise sometimes the values
        # from test_variant_info.py unit tests can bleed through. I'm not sure
        # why!
        self.var.known_genes = None
        
        # check that HGNC takes precedence
        self.var.info["HGNC"] = "A"
        self.var.info["HGNC_ALL"] = "B"
        genes = self.var.get_gene_from_info(self.var.info, self.var.alt_alleles, [])
        self.assertEqual(genes, [["A"]])
        
        # check that HGNC is used in the absence of HGNC_ALL
        del self.var.info["HGNC"]
        genes = self.var.get_gene_from_info(self.var.info, self.var.alt_alleles, [])
        self.assertEqual(genes, [["B"]])
        
        # check that when HGNC and HGNC_ALL are undefined, we can still include
        # CNVs overlapping genes through NUMBERGENES > 0.
        del self.var.info["HGNC_ALL"]
        
        # first test for NUMBERGENES = 0
        self.var.info["NUMBERGENES"] = 0
        genes = self.var.get_gene_from_info(self.var.info, self.var.alt_alleles, [])
        self.assertIsNone(genes)
        
        # and then make sure we are correct for NUMBERGENES > 0
        self.var.info["NUMBERGENES"] = 1
        genes = self.var.get_gene_from_info(self.var.info, self.var.alt_alleles, [])
        self.assertEqual(genes, [["."]])
        
        # finally check for no HGNC, HGNC_ALL, or NUMBERGENES
        del self.var.info["NUMBERGENES"]
        genes = self.var.get_gene_from_info(self.var.info, self.var.alt_alleles, [])
        self.assertEqual(genes, [["1:15000000"]])
    
    def test_get_genes(self):
        """ test that get_genes() works correctly
        """
        
        self.var.genes = None
        self.assertEqual(self.var.get_genes(), [])
        
        self.var.genes = ["TEST"]
        self.assertEqual(self.var.get_genes(), ["TEST"])
        
        self.var.genes = ["TEST1", "TEST2"]
        self.assertEqual(self.var.get_genes(), ["TEST1", "TEST2"])
        
        self.var.genes = ["."]
        self.assertEqual(self.var.get_genes(), ["."])
    
    def test_fails_y_chrom_female(self):
        """ test that passes_filters() works correctly for female Y chrom CNVs
        """
        
        self.var.chrom = "Y"
        self.var._set_gender("F")
        
        self.assertFalse(self.var.passes_filters())