def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "<DUP>" qual = "1000" filt = "PASS" info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;" \ "CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;" \ "MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000" keys = "inheritance:DP" values = "deNovo:50" sex = "F" # set up a CNV object self.var = CNV(chrom, pos, snp_id, ref, alt, qual, filt, info=info, format=keys, sample=values, gender=sex)
def create_cnv(self, gender, inh, chrom, pos, cq=None): """ create a default variant """ snp_id = "." ref = "A" alt = "<DEL>" filt = "PASS" if cq is None: cq = "transcript_ablation" # set up a SNV object, since SNV inherits VcfInfo var = CNV(chrom, pos, snp_id, ref, alt, filt) info = "CQ={};HGNC=TEST;HGNC_ALL=TEST;END=16000000;SVLEN=5000".format( cq) format_keys = "INHERITANCE:DP" sample_values = inh + ":50" var.add_info(info) var.add_format(format_keys, sample_values) var.set_gender(gender) var.set_genotype() return var
def create_cnv(self, chrom, info=None, pos='15000000', snp_id='.', ref='A', alt='<DUP>', qual='1000', filt='PASS', **kwargs): if info is None: info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;CNSOLIDATE;' \ 'WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;MEANLR2=0.5;' \ 'MADL2R=0.02;END=16000000;SVLEN=1000000" keys = "inheritance:DP" values = "deNovo:50" return CNV(chrom, pos, snp_id, ref, alt, qual, filt, info=info, format=keys, sample=values, gender='male', **kwargs)
def test_construct_variant(self): """ test that construct_variant() works correctly """ # check that construct variant works for SNVs line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"] gender = "M" test_var = SNV(*line[:6]) variant = self.vcf_loader.construct_variant(line, gender) self.assertEqual(variant.get_key(), test_var.get_key()) # initally constructing a SNV shouldn't affect the format variable self.assertEqual(variant.format, None) # check that construct variant works for CNVs line = [ "1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1" ] gender = "M" test_var = CNV(*line[:6]) test_var.add_info(line[7]) variant = self.vcf_loader.construct_variant(line, gender) self.assertEqual(variant.get_key(), test_var.get_key()) self.assertNotEqual(variant.format, None)
def test_construct_variant(self): """ test that construct_variant() works correctly """ # check that construct variant works for SNVs line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"] gender = "M" test_var = SNV(*line, gender=gender) variant = construct_variant(line, gender) self.assertEqual(variant.get_key(), test_var.get_key()) self.assertEqual(variant.format, {'GT': '0/1'}) # check that construct variant works for CNVs line = [ "1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1" ] gender = "M" test_var = CNV(*line, gender=gender) variant = construct_variant(line, gender) self.assertEqual(variant.get_key(), test_var.get_key()) self.assertEqual(variant.format, {'GT': '0/1'})
def test_include_variant(self): """ check that include_variant() works correctly """ mnvs = {} child_variants = False gender = "M" # make a child var which passes the filters line = ["1", "100", ".", "T", "A", "1000", "PASS", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender, mnvs)) # make a child var that fails the filters, which should return False line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender, mnvs)) # now check for parents variants child_variants = True # check a parents var, where we have a matching child var self.vcf_loader.child_keys = set([("1", 100), ("X", 200)]) line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender, mnvs)) # check a parents var, where we don't have a matching child var line = ["1", "200", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender, mnvs)) # and check parental CNVs line = ["1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1"] gender = "M" test_var = CNV(*line[:6]) test_var.add_info(line[7]) # in this function we look for overlap in CNVs. Set up a child CNV # that the parents CNV must match. self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender, mnvs)) # check that a parental CNV without any overlap to any childs CNVs, # fails to pass line = ["1", "300", ".", "T", "<DEL>", "1000", "PASS", "END=400", "GT", "0/1"] gender = "M" self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender, mnvs))
def test_get_parental_var_cnv(self): ''' check that get_parental_var() works correctly for CNVs ''' sex = 'F' var = create_cnv(sex, 'deNovo') mom = Person('fam_id', 'mom', '0', '0', 'F', '1', '/PATH') parental_vars = [] self.assertEqual( get_parental_var(var, parental_vars, mom), CNV(chrom="1", position=150, id=".", ref="A", alts="<REF>", qual='1000', filter="PASS", info=str(var.info), format='INHERITANCE', sample='uncertain', gender="female", mnv_code=None)) # check that even if a CNV exist in the parent at a matching site, we # still create a new CNV objectr for the parent mother_var = create_cnv(sex, 'uncertain') self.assertEqual( get_parental_var(var, [mother_var], mom), CNV(chrom="1", position=150, id=".", ref="A", alts="<REF>", qual='1000', filter="PASS", info=str(var.info), format='INHERITANCE', sample='uncertain', gender="female", mnv_code=None))
def test_include_variant(self): """ check that include_variant() works correctly """ child_variants = False gender = "M" # make a child var which passes the filters line = ["1", "100", ".", "T", "A", "1000", "PASS", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender)) # make a child var that fails the filters, which should return False line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender)) # now check for parents variants child_variants = True # check a parents var, where we have a matching child var self.vcf_loader.child_keys = set([("1", 100), ("X", 200)]) line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender)) # check a parents var, where we don't have a matching child var line = ["1", "200", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender)) # and check parental CNVs line = ["1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1"] gender = "M" test_var = CNV(*line[:6]) test_var.add_info(line[7]) # in this function we look for overlap in CNVs. Set up a child CNV # that the parents CNV must match. self.vcf_loader.cnv_matcher = MatchCNVs([test_var]) self.assertTrue(self.vcf_loader.include_variant(line, child_variants, gender)) # check that a parental CNV without any overlap to any childs CNVs, # fails to pass line = ["1", "300", ".", "T", "<DEL>", "1000", "PASS", "END=400", "GT", "0/1"] gender = "M" self.assertFalse(self.vcf_loader.include_variant(line, child_variants, gender))
def create_cnv(self, chrom): pos = "15000000" snp_id = "." ref = "A" alt = "<DUP>" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo var = CNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000" format_keys = "inheritance:DP" sample_values = "deNovo:50" var.add_info(info) var.add_format(format_keys, sample_values) var.set_gender("F") var.set_genotype() return var
def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "<DUP>" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo self.var = CNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000" format_keys = "inheritance:DP" sample_values = "deNovo:50" self.var.add_info(info) self.var.add_format(format_keys, sample_values) self.var.set_gender("F")
def create_cnv(self, gender, inh, chrom, pos): """ create a default variant """ snp_id = "." ref = "A" alt = "<DUP>" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo var = CNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=TEST;HGNC_ALL=TEST;END=16000000;SVLEN=5000" format_keys = "INHERITANCE:DP" sample_values = inh + ":50" var.add_info(info) var.add_format(format_keys, sample_values) var.set_gender(gender) var.set_genotype() return var
def test_get_parental_var_cnv_maternally_inherited(self): ''' test that we can construct a maternally inherited CNV ''' sex = 'F' mom = Person('fam_id', 'mom', '0', '0', 'F', '1', '/PATH') # check that even if a CNV exist in the parent at a matching site, we # still create a new CNV object for the parent var = create_cnv(sex, 'maternal') self.assertEqual(get_parental_var(var, [], mom), CNV(chrom="1", position=150, id=".", ref="A", alts="<DUP>", qual='1000',filter="PASS", info=str(var.info), format='INHERITANCE', sample='uncertain', gender="female", mnv_code=None))
def create_cnv(self, gender, inh, cifer, chrom, pos): """ create a default variant """ snp_id = "." ref = "A" alt = "<DUP>" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo var = CNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=TEST;HGNC_ALL=TEST;END=16000000;SVLEN=5000;CNS=3" format_keys = "CIFER:INHERITANCE:DP" sample_values = cifer + ":" + inh + ":50" var.add_info(info) var.add_format(format_keys, sample_values) var.set_gender(gender) var.set_genotype() return var
def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "<DUP>" filt = "PASS" info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;" \ "CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;" \ "MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000" keys = "inheritance:DP" values = "deNovo:50" sex = "F" # set up a CNV object self.var = CNV(chrom, pos, snp_id, ref, alt, filt, info=info, format=keys, sample=values, gender=sex)
def test_include_variant(self): """ check that include_variant() works correctly """ mnvs = {} child_keys = None gender = "M" sum_x_l2r = {} # make a child var which passes the filters line = ["1", "100", ".", "T", "A", "1000", "PASS", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertTrue(include_variant(line, child_keys, gender, mnvs, sum_x_l2r)) # make a child var that fails the filters, which should return False line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertFalse(include_variant(line, child_keys, gender, mnvs, sum_x_l2r)) # now check for parents variants # check a parents var, where we have a matching child var child_keys = set([("1", 100), ("X", 200)]) line = ["1", "100", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertTrue(include_variant(line, child_keys, gender, mnvs, sum_x_l2r)) # check a parents var, where we don't have a matching child var line = ["1", "200", ".", "T", "A", "1000", "FAIL", "CQ=missense_variant;HGNC=ATRX", "GT", "0/1"] self.assertFalse(include_variant(line, child_keys, gender, mnvs, sum_x_l2r)) # and check parental CNVs line = ["1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1"] gender = "M" test_var = CNV(*line) # in this function we look for overlap in CNVs. Set up a child CNV # that the parents CNV must match. self.assertTrue(include_variant(line, child_keys, gender, mnvs, sum_x_l2r)) # check that a parental CNV without any overlap to any childs CNVs, # fails to pass line = ["1", "300", ".", "T", "<DEL>", "1000", "PASS", "END=400", "GT", "0/1"] gender = "M" self.assertFalse(include_variant(line, child_keys, gender, mnvs, sum_x_l2r))
def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "<DUP>" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo cnv = CNV(chrom, pos, snp_id, ref, alt, filt) self.var = ExomeCNV(cnv) info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;CONVEX;RC50INTERNALFREQ=0.005;COMMONFORWARDS=0.000;MEANLR2=0.5;MADL2R=0.02" format_keys = "inheritance:DP" sample_values = "deNovo:50" self.var.cnv.add_info(info) self.var.cnv.add_format(format_keys, sample_values) self.var.cnv.set_gender("F")
def get_parental_var(self, var, parental_vars, gender, matcher): """ get the corresponding parental variant to a childs variant, or create a default variant with reference genotype. Args: var: childs var, as Variant object parental_vars: list of parental variants gender: gender of the parent matcher: cnv matcher for parent Returns: returns a Variant object, matched to the proband's variant """ key = var.get_key() # if the variant is a CNV, the corresponding variant might not match # the start site, so we look a variant that overlaps if isinstance(var, CNV) and matcher.has_match(var): key = matcher.get_overlap_key(key) for parental in parental_vars: if key == parental.get_key(): return parental # if the childs variant does not exist in the parents VCF, then we # create a default variant for the parent if isinstance(var, CNV): parental = CNV(var.chrom, var.position, var.variant_id, var.ref_allele, var.alt_allele, var.filter) else: parental = SNV(var.chrom, var.position, var.variant_id, var.ref_allele, var.alt_allele, var.filter) parental.set_gender(gender) parental.set_default_genotype() return parental
def construct_variant(self, line, gender): """ constructs a Variant object for a VCF line, specific to the variant type Args: line: list of elements of a single sample VCF line: [chrom, position, snp_id, ref_allele, alt_allele, quality, filter_value, info, format_keys, format_values] gender: gender of the individual to whom the variant line belongs (eg "1" or "M" for male, "2", or "F" for female). Returns: returns a Variant object """ # CNVs are found by their alt_allele values, as either <DUP>, or <DEL> if line[4] == "<DUP>" or line[4] == "<DEL>": var = CNV(line[0], line[1], line[2], line[3], line[4], line[6]) var.add_info(line[7]) # CNVs require the format values for filtering var.set_gender(gender) var.add_format(line[8], line[9]) if self.known_genes is not None: var.fix_gene_IDs() else: var = SNV(line[0], line[1], line[2], line[3], line[4], line[6]) var.add_info(line[7]) return var
class TestVariantCnvPy(unittest.TestCase): """ """ def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "<DUP>" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo self.var = CNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000" format_keys = "inheritance:DP" sample_values = "deNovo:50" self.var.add_info(info) self.var.add_format(format_keys, sample_values) self.var.set_gender("F") def test_set_genotype(self): """ test that set_genotype() operates correctly """ # check that DUPs are set correctly self.var.alt_allele = "<DUP>" self.var.set_genotype() self.assertEqual(self.var.genotype, "DUP") # check that DELs are set correctly self.var.alt_allele = "<DEL>" self.var.set_genotype() self.assertEqual(self.var.genotype, "DEL") # check that other genotypes raise an error self.var.alt_allele = "G" with self.assertRaises(ValueError): self.var.set_genotype() # and check that we raise an error for female Y chrom CNVs self.var.chrom = "Y" self.var.set_gender("F") with self.assertRaises(ValueError): self.var.set_genotype() def test_set_genotype_pseudoautosomal(self): """ test that set_genotype() works correctly in pseudoautosomal regions """ pseudoautosomal_region_start = 60002 pseudoautosomal_region_end = 2699520 # set a CNV that lies within a pseudoautosomal region self.var.chrom = "X" self.var.position = pseudoautosomal_region_start + 1000 self.var.info["END"] = pseudoautosomal_region_end - 1000 self.var.set_gender("F") self.var.alt_allele = "<DUP>" self.var.set_genotype() self.assertEqual(self.var.genotype, "DUP") self.assertEqual(self.var.get_inheritance_type(), "autosomal") def test_get_range(self): """ test that get_range() operates correctly """ # check that range is set correctly under normal function self.var.position = 1000 self.var.info["END"] = "2000" self.assertEqual(self.var.get_range(), (1000, 2000)) # check that range is set correctly when no info available self.var.info = {} self.assertEqual(self.var.get_range(), (1000, 11000)) def test_fix_gene_IDs(self): """ test that fix_gene_IDs() works correctly """ self.var.known_genes = {"TEST": {"start": 1000, "end": 2000, "chrom": "5"}} # make a CNV that will overlap with the known gene set self.var.genes = ["TEST"] self.var.position = 1000 self.var.info["END"] = "1500" # check that fixing gene names does not alter anything for a CNV in a # single known gene self.var.fix_gene_IDs() self.assertEqual(self.var.genes, ["TEST"]) # check that fixing gene names does not alter names not in the gene dict self.var.genes = ["TEST", "TEST2"] self.var.fix_gene_IDs() self.assertEqual(self.var.genes, ["TEST", "TEST2"]) # check that fixing gene names drop name of genes where the name is in # the known genes dict, and the CNV and gene do not overlap self.var.position = 900 self.var.info["END"] = "950" self.var.fix_gene_IDs() self.assertEqual(self.var.genes, [".", "TEST2"]) # check that when we do not have any known genes, the gene names are # unaltered self.var.genes = ["TEST", "TEST2"] self.var.known_genes = None self.var.fix_gene_IDs() self.assertEqual(self.var.genes, ["TEST", "TEST2"]) def test_set_gene_from_info_cnv(self): """ test that set_add_gene_from_info() works correctly """ # make sure the known genes are None, otherwise sometimes the values # from test_variant_info.py unit tests can bleed through. I'm not sure # why! self.var.known_genes = None # check that HGNC takes precedence self.var.info["HGNC"] = "A" self.var.info["HGNC_ALL"] = "B" self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["A"]) # check that HGNC is used in the absence of HGNC_ALL del self.var.info["HGNC"] self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["B"]) # check that when HGNC and HGNC_ALL are undefined, we can still include # CNVs overlapping genes through NUMBERGENES > 0. del self.var.info["HGNC_ALL"] # first test for NUMBERGENES = 0 self.var.info["NUMBERGENES"] = 0 self.var.set_gene_from_info() self.assertIsNone(self.var.genes) # and then make sure we are correct for NUMBERGENES > 0 self.var.info["NUMBERGENES"] = 1 self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["."]) # finally check for no HGNC, HGNC_ALL, or NUMBERGENES del self.var.info["NUMBERGENES"] self.var.set_gene_from_info() self.assertEqual(self.var.genes, "1:15000000") def test_get_genes(self): """ test that get_genes() works correctly """ self.var.genes = None self.assertEqual(self.var.get_genes(), []) self.var.genes = ["TEST"] self.assertEqual(self.var.get_genes(), ["TEST"]) self.var.genes = ["TEST1", "TEST2"] self.assertEqual(self.var.get_genes(), ["TEST1", "TEST2"]) self.var.genes = ["."] self.assertEqual(self.var.get_genes(), ["."]) def test_fails_y_chrom_female(self): """ test that passes_filters() works correctly for female Y chrom CNVs """ self.var.chrom = "Y" self.var.set_gender("F") self.assertFalse(self.var.passes_filters())
class TestVariantCnvPy(unittest.TestCase): """ unit testing of the CNV class """ def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "<DUP>" qual = "1000" filt = "PASS" info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;" \ "CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;" \ "MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000" keys = "inheritance:DP" values = "deNovo:50" sex = "F" # set up a CNV object self.var = CNV(chrom, pos, snp_id, ref, alt, qual, filt, info=info, format=keys, sample=values, gender=sex) def test_set_genotype(self): """ test that set_genotype() operates correctly """ # check that DUPs are set correctly self.var.alt_alleles = ["<DUP>"] self.var.set_genotype() self.assertEqual(self.var.genotype, "DUP") # check that DELs are set correctly self.var.alt_alleles = ["<DEL>"] self.var.set_genotype() self.assertEqual(self.var.genotype, "DEL") # check that other genotypes raise an error self.var.alt_alleles = ["G"] with self.assertRaises(ValueError): self.var.set_genotype() # and check that we raise an error for female Y chrom CNVs self.var.chrom = "Y" self.var._set_gender("F") with self.assertRaises(ValueError): self.var.set_genotype() def test_set_genotype_pseudoautosomal(self): """ test that set_genotype() works correctly in pseudoautosomal regions """ pseudoautosomal_region_start = 60002 pseudoautosomal_region_end = 2699520 # set a CNV that lies within a pseudoautosomal region self.var.chrom = "X" self.var.position = pseudoautosomal_region_start + 1000 self.var.info["END"] = pseudoautosomal_region_end - 1000 self.var._set_gender("F") self.var.alt_alleles = ["<DUP>"] self.var.set_genotype() self.assertEqual(self.var.genotype, "DUP") self.assertEqual(self.var.get_inheritance_type(), "autosomal") def test_get_range(self): """ test that get_range() operates correctly """ # check that range is set correctly under normal function self.var.position = 1000 self.var.info["END"] = "2000" self.assertEqual(self.var.get_range(), (1000, 2000)) # check that range is set correctly when no info available self.var.info = {} self.assertEqual(self.var.get_range(), (1000, 11000)) def test_fix_gene_IDs(self): """ test that fix_gene_IDs() works correctly """ self.var.known_genes = { "TEST": { "start": 1000, "end": 2000, "chrom": "5" } } # make a CNV that will overlap with the known gene set self.var.info.symbols = [Symbols(info={'HGNC_ID': 'TEST'}, idx=0)] self.var.position = 1000 self.var.info["END"] = "1500" # check that fixing gene names does not alter anything for a CNV in a # single known gene self.var.fix_gene_IDs() self.assertEqual(self.var.info.get_genes(), [['TEST']]) # check that fixing gene names does not alter names not in the gene dict self.var.info.symbols = [ Symbols(info={'HGNC_ID': 'TEST|TEST2'}, idx=0) ] self.var.fix_gene_IDs() self.assertEqual(self.var.info.get_genes(), [['TEST', 'TEST2']]) # check that fixing gene names drop name of genes where the name is in # the known genes dict, and the CNV and gene do not overlap self.var.position = 900 self.var.info["END"] = "950" self.var.fix_gene_IDs() self.assertEqual(self.var.info.get_genes(), [[None, 'TEST2']]) # check that when we do not have any known genes, the gene names are # unaltered self.var.info.symbols = [ Symbols(info={'HGNC_ID': 'TEST|TEST2'}, idx=0) ] self.var.known_genes = None self.var.fix_gene_IDs() self.assertEqual(self.var.info.get_genes(), [['TEST', 'TEST2']]) def test_set_gene_from_info_cnv(self): """ test that set_add_gene_from_info() works correctly """ # make sure the known genes are None, otherwise sometimes the values # from test_variant_info.py unit tests can bleed through. I'm not sure # why! self.var.known_genes = None # check that HGNC takes precedence self.var.info["HGNC"] = "A" genes = self.var.info.parse_gene_symbols(self.var.alt_alleles, []) self.assertEqual(genes, [Symbols(info={'HGNC': 'A'}, idx=0)]) # check that HGNC_ALL doesn't affect anything self.var.info["HGNC_ALL"] = "B" del self.var.info["HGNC"] genes = self.var.info.parse_gene_symbols(self.var.alt_alleles, []) self.assertEqual(genes, [Symbols(info={}, idx=0)]) def test_get_genes(self): """ test that get_genes() works correctly """ self.var.info.symbols = [Symbols(info={}, idx=0)] self.assertEqual(self.var.info.get_genes(), [[]]) self.var.info.symbols = [Symbols(info={'HGNC': 'TEST'}, idx=0)] self.assertEqual(self.var.info.get_genes(), [["TEST"]]) self.var.info.symbols = [Symbols(info={'HGNC': 'TEST1|TEST2'}, idx=0)] self.assertEqual(self.var.info.get_genes(), [["TEST1", "TEST2"]]) self.var.info.symbols = [Symbols(info={'HGNC': '.'}, idx=0)] self.assertEqual(self.var.info.get_genes(), [[None]]) def test_fails_y_chrom_female(self): """ test that passes_filters() works correctly for female Y chrom CNVs """ self.var.chrom = "Y" self.var._set_gender("F") self.assertFalse(self.var.passes_filters())
class TestVariantCnvPy(unittest.TestCase): """ unit testing of the CNV class """ def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "<DUP>" filt = "PASS" info = "HGNC=TEST;HGNC_ALL=TEST,OR5A1;CQ=missense_variant;" \ "CNSOLIDATE;WSCORE=0.5;CALLP=0.000;COMMONFORWARDS=0.000;" \ "MEANLR2=0.5;MADL2R=0.02;END=16000000;SVLEN=1000000" keys = "inheritance:DP" values = "deNovo:50" sex = "F" # set up a CNV object self.var = CNV(chrom, pos, snp_id, ref, alt, filt, info=info, format=keys, sample=values, gender=sex) def test_set_genotype(self): """ test that set_genotype() operates correctly """ # check that DUPs are set correctly self.var.alt_alleles = ["<DUP>"] self.var.set_genotype() self.assertEqual(self.var.genotype, "DUP") # check that DELs are set correctly self.var.alt_alleles = ["<DEL>"] self.var.set_genotype() self.assertEqual(self.var.genotype, "DEL") # check that other genotypes raise an error self.var.alt_alleles = ["G"] with self.assertRaises(ValueError): self.var.set_genotype() # and check that we raise an error for female Y chrom CNVs self.var.chrom = "Y" self.var._set_gender("F") with self.assertRaises(ValueError): self.var.set_genotype() def test_set_genotype_pseudoautosomal(self): """ test that set_genotype() works correctly in pseudoautosomal regions """ pseudoautosomal_region_start = 60002 pseudoautosomal_region_end = 2699520 # set a CNV that lies within a pseudoautosomal region self.var.chrom = "X" self.var.position = pseudoautosomal_region_start + 1000 self.var.info["END"] = pseudoautosomal_region_end - 1000 self.var._set_gender("F") self.var.alt_alleles = ["<DUP>"] self.var.set_genotype() self.assertEqual(self.var.genotype, "DUP") self.assertEqual(self.var.get_inheritance_type(), "autosomal") def test_get_range(self): """ test that get_range() operates correctly """ # check that range is set correctly under normal function self.var.position = 1000 self.var.info["END"] = "2000" self.assertEqual(self.var.get_range(), (1000, 2000)) # check that range is set correctly when no info available self.var.info = {} self.assertEqual(self.var.get_range(), (1000, 11000)) def test_fix_gene_IDs(self): """ test that fix_gene_IDs() works correctly """ self.var.known_genes = {"TEST": {"start": 1000, "end": 2000, "chrom": "5"}} # make a CNV that will overlap with the known gene set self.var.genes = [["TEST"]] self.var.position = 1000 self.var.info["END"] = "1500" # check that fixing gene names does not alter anything for a CNV in a # single known gene self.var.fix_gene_IDs() self.assertEqual(self.var.genes, [["TEST"]]) # check that fixing gene names does not alter names not in the gene dict self.var.genes = [["TEST", "TEST2"]] self.var.fix_gene_IDs() self.assertEqual(self.var.genes, [["TEST", "TEST2"]]) # check that fixing gene names drop name of genes where the name is in # the known genes dict, and the CNV and gene do not overlap self.var.position = 900 self.var.info["END"] = "950" self.var.fix_gene_IDs() self.assertEqual(self.var.genes, [[".", "TEST2"]]) # check that when we do not have any known genes, the gene names are # unaltered self.var.genes = [["TEST", "TEST2"]] self.var.known_genes = None self.var.fix_gene_IDs() self.assertEqual(self.var.genes, [["TEST", "TEST2"]]) def test_set_gene_from_info_cnv(self): """ test that set_add_gene_from_info() works correctly """ # make sure the known genes are None, otherwise sometimes the values # from test_variant_info.py unit tests can bleed through. I'm not sure # why! self.var.known_genes = None # check that HGNC takes precedence self.var.info["HGNC"] = "A" self.var.info["HGNC_ALL"] = "B" genes = self.var.get_gene_from_info(self.var.info, self.var.alt_alleles, []) self.assertEqual(genes, [["A"]]) # check that HGNC is used in the absence of HGNC_ALL del self.var.info["HGNC"] genes = self.var.get_gene_from_info(self.var.info, self.var.alt_alleles, []) self.assertEqual(genes, [["B"]]) # check that when HGNC and HGNC_ALL are undefined, we can still include # CNVs overlapping genes through NUMBERGENES > 0. del self.var.info["HGNC_ALL"] # first test for NUMBERGENES = 0 self.var.info["NUMBERGENES"] = 0 genes = self.var.get_gene_from_info(self.var.info, self.var.alt_alleles, []) self.assertIsNone(genes) # and then make sure we are correct for NUMBERGENES > 0 self.var.info["NUMBERGENES"] = 1 genes = self.var.get_gene_from_info(self.var.info, self.var.alt_alleles, []) self.assertEqual(genes, [["."]]) # finally check for no HGNC, HGNC_ALL, or NUMBERGENES del self.var.info["NUMBERGENES"] genes = self.var.get_gene_from_info(self.var.info, self.var.alt_alleles, []) self.assertEqual(genes, [["1:15000000"]]) def test_get_genes(self): """ test that get_genes() works correctly """ self.var.genes = None self.assertEqual(self.var.get_genes(), []) self.var.genes = ["TEST"] self.assertEqual(self.var.get_genes(), ["TEST"]) self.var.genes = ["TEST1", "TEST2"] self.assertEqual(self.var.get_genes(), ["TEST1", "TEST2"]) self.var.genes = ["."] self.assertEqual(self.var.get_genes(), ["."]) def test_fails_y_chrom_female(self): """ test that passes_filters() works correctly for female Y chrom CNVs """ self.var.chrom = "Y" self.var._set_gender("F") self.assertFalse(self.var.passes_filters())