def setUp(self): """ define a default SNV object """ self.pops = [ "AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF" ] Info.set_populations(self.pops) chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "G" qual = "1000" filt = "PASS" info = "HGNC_ID=1001;CQ=missense_variant;random_tag" self.keys = "GT:DP:AD" self.values = "0/1:50:10,10" self.var = SNV(chrom, pos, snp_id, ref, alt, qual, filt, info=info, format=self.keys, sample=self.values)
def test_passes_known_genes(self): ''' test that genes pass or fail when they affect known genes or not ''' SNV.known_genes = {"1001": {'irrelevant'}} info = "HGNC_ID=1001;CQ=missense_variant;random_tag" var = SNV('1', '100', '.', 'A', 'G', '1000', 'PASS', info=info, format='GT:DP', sample='0/1:50') # a variant that affects a known gene passes self.assertTrue(var.passes_filters()) # a variant that doesn't affect any known genes fails SNV.known_genes = {"1002": {'irrelevant'}} self.assertFalse(var.passes_filters()) # if we haven't provided any known genes, the variant passes SNV.known_genes = None self.assertTrue(var.passes_filters())
def test_add_single_variant(self): """ test that add_single_variant() works correctly """ # the sub-functions are all tested elsewhere, this test merely checks # that valid variants are added to the variants list, and invalid # variants are passed over without being added to the variants list # set up an autosomal variant line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"] gender = "M" variant = SNV(*line[:6]) # check that the variant is added to the variant list variants = [] self.vcf_loader.add_single_variant(variants, variant, gender, line) self.assertEqual(variants, [variant]) # set up an X-chrom male het line = ["X", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"] variant = SNV(*line[:6]) # check that the X-chrom male het is not added to the variant list variants = [] self.vcf_loader.add_single_variant(variants, variant, gender, line) self.assertEqual(variants, [])
def create_snv(self, gender, genotype, chrom, pos, cq=None): """ create a default variant """ snp_id = "." ref = "A" alt = "G" filt = "PASS" if cq is None: cq = "missense_variant" # set up a SNV object, since SNV inherits VcfInfo var = SNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=TEST;CQ={};random_tag".format(cq) format_keys = "GT:DP" sample_values = genotype + ":50" var.add_info(info) var.add_format(format_keys, sample_values) var.set_gender(gender) var.set_genotype() return var
def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "CM00001" ref = "A" alt = "G" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo self.var = SNV(chrom, pos, snp_id, ref, alt, filt) self.var.debug_chrom = "1" self.var.debug_pos = "15000000" self.default_info = "HGNC=ATRX;CQ=missense_variant;random_tag" # here are the default filtering criteria, as loaded into python known_genes = {"ATRX": {"inheritance": {"Hemizygous": \ {"Loss of function"}}, "start": "10000000", "chrom": "1", \ "confirmed_status": {"Confirmed DD Gene"}, "end": "20000000"}} SNV.known_genes = known_genes self.var.add_info(self.default_info)
def create_snv(self, gender, genotype): """ create a default variant """ chrom = "X" pos = "15000000" snp_id = "." ref = "A" alt = "G" qual = "50" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo var = SNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=TEST;CQ=missense_variant;random_tag;EUR_AF=0.0005" format_keys = "GT:DP" sample_values = genotype + ":50" var.vcf_line = [chrom, pos, snp_id, ref, alt, qual, filt, info, format_keys, sample_values] var.add_info(info) var.add_format(format_keys, sample_values) var.set_gender(gender) var.set_genotype() return var
def test_open_individual(self): ''' test that open_individual() works correctly ''' # missing individual returns empty list self.assertEqual(open_individual(None), []) vcf = make_vcf_header() vcf.append(make_vcf_line(pos=1, extra='HGNC=TEST;MAX_AF=0.0001')) vcf.append(make_vcf_line(pos=2, extra='HGNC=ATRX;MAX_AF=0.0001')) path = os.path.join(self.temp_dir, "temp.vcf") write_temp_vcf(path, vcf) person = Person('fam_id', 'sample', 'dad', 'mom', 'F', '2', path) var1 = SNV(chrom="1", position=1, id=".", ref="G", alts="T", qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=TEST;MAX_AF=0.0001", format="DP:GT", sample="50:0/1", gender="female", mnv_code=None) var2 = SNV(chrom="1", position=2, id=".", ref="G", alts="T", qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001", format="DP:GT", sample="50:0/1", gender="female", mnv_code=None) self.assertEqual(open_individual(person), [var2]) # define a set of variants to automatically pass, and check that these # variants pass. child_keys = set([('1', 1), ('1', 2)]) self.assertEqual(open_individual(person, child_variants=child_keys), [var1, var2])
def test_open_individual_with_mnvs(self): ''' test that open_individual works with MNVs ''' vcf = make_vcf_header() vcf.append(make_vcf_line(pos=1, cq='splice_region_variant', extra='HGNC=ATRX;MAX_AF=0.0001')) vcf.append(make_vcf_line(pos=2, cq='missense_variant', extra='HGNC=ATRX;MAX_AF=0.0001')) path = os.path.join(self.temp_dir, "temp.vcf.gz") write_gzipped_vcf(path, vcf) person = Person('fam_id', 'sample', 'dad', 'mom', 'F', '2', path) args = {'chrom': "1", 'position': 1, 'id': ".", 'ref': "G", 'alts': "T", 'filter': "PASS", 'info': "CQ=splice_region_variant;HGNC=ATRX;MAX_AF=0.0001", 'format': "DP:GT", 'sample': "50:0/1", 'gender': "female", 'mnv_code': 'modified_protein_altering_mnv', 'qual': '1000'} var1 = SNV(**args) args['position'] = 2 args['mnv_code'] = None args['info'] = "CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001" var2 = SNV(**args) # by default only one variant passes self.assertEqual(open_individual(person), [var2]) # if we include MNVs, then the passing variants swap self.assertEqual(open_individual(person, mnvs={('1', 1): 'modified_protein_altering_mnv', ('1', 2): 'modified_synonymous_mnv'}), [var1])
def test_construct_variant(self): """ test that construct_variant() works correctly """ # check that construct variant works for SNVs line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"] gender = "M" test_var = SNV(*line, gender=gender) variant = construct_variant(line, gender) self.assertEqual(variant.get_key(), test_var.get_key()) self.assertEqual(variant.format, {'GT': '0/1'}) # check that construct variant works for CNVs line = [ "1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1" ] gender = "M" test_var = CNV(*line, gender=gender) variant = construct_variant(line, gender) self.assertEqual(variant.get_key(), test_var.get_key()) self.assertEqual(variant.format, {'GT': '0/1'})
def test_load_trio(self): ''' test that load_trio() works correctly ''' def make_vcf(person): # make a VCF, where one line would pass the default filtering vcf = make_vcf_header() vcf.append(make_vcf_line(pos=1, extra='HGNC=TEST;MAX_AF=0.0001')) vcf.append(make_vcf_line(pos=2, extra='HGNC=ATRX;MAX_AF=0.0001')) path = os.path.join(self.temp_dir, "{}.vcf.gz".format(person)) write_gzipped_vcf(path, vcf) return path child_path = make_vcf('child') mother_path = make_vcf('mother') father_path = make_vcf('father') family = Family('fam_id') family.add_child('sample', 'mother_id', 'father_id', 'female', '2', child_path) family.add_mother('mother_id', '0', '0', 'female', '1', mother_path) family.add_father('father_id', '0', '0', 'male', '1', father_path) family.set_child() sum_x_lr2_proband = 0 # define the parameters and values for the SNV class args = { 'chrom': "1", 'position': 2, 'id': ".", 'ref': "G", 'alts': "T", 'filter': "PASS", 'info': "CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001", 'format': "DP:GT:AD", 'sample': "50:0/1:10,10", 'gender': "female", 'mnv_code': None, 'qual': '1000' } dad_args = copy.deepcopy(args) dad_args['gender'] = 'male' self.assertEqual(load_trio(family, sum_x_lr2_proband), [ TrioGenotypes(chrom="1", pos=2, child=SNV(**args), mother=SNV(**args), father=SNV(**dad_args)) ])
def test_filter_de_novos(self): """ check that filter_de_novos() works correctly """ # make a family without parents family = Family("fam_id") child_gender = "female" family.add_child("child_id", "child_vcf_path", "2", child_gender) self.vcf_loader.family = family # set up an autosomal variant line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"] gender = "M" child_var = SNV(*line[:6]) child_var.add_info(line[7]) child_var.add_format(line[8], line[9]) child_var.set_gender(child_gender) child_var.set_genotype() # combine the variant into a list of TrioGenotypes child_vars = [child_var] mother_vars = [] father_vars = [] trio_variants = self.vcf_loader.combine_trio_variants( child_vars, mother_vars, father_vars) # check that vars without parents get passed through automatically self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9), trio_variants) # now add parents to the family family.add_mother("mother_id", "mother_vcf_path", "1", "female") family.add_father("father_id", "father_vcf_path", "1", "male") # re-generate the variants list now that parents have been included trio_variants = self.vcf_loader.combine_trio_variants( child_vars, mother_vars, father_vars) # check that vars with parents, and that appear to be de novo are # filtered out self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9), []) # check that vars with parents, but which are not de novo, are retained mother_vars = child_vars trio_variants = self.vcf_loader.combine_trio_variants( child_vars, mother_vars, father_vars) self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9), trio_variants)
def test_get_parental_var_snv(self): ''' check that get_parental_var() works correctly for SNVs ''' sex = 'F' var = create_snv(sex, '0/1') mom = Person('fam_id', 'mom', '0', '0', 'F', '1', '/PATH') parental = [] # try to get a matching variant for a mother. This will create a default # variant for a missing parental genotype self.assertEqual( get_parental_var(var, parental, mom), SNV(chrom="1", position=150, id=".", ref="A", alts="G", qual='1000', filter="PASS", info=str(var.info), format="GT", sample="0/0", gender="female", mnv_code=None)) # now see if we can pick up a variant where it does exist mother_var = create_snv(sex, '0/0') self.assertEqual(get_parental_var(var, [mother_var], mom), mother_var)
def construct_variant(self, line, gender): """ constructs a Variant object for a VCF line, specific to the variant type Args: line: list of elements of a single sample VCF line: [chrom, position, snp_id, ref_allele, alt_allele, quality, filter_value, info, format_keys, format_values] gender: gender of the individual to whom the variant line belongs (eg "1" or "M" for male, "2", or "F" for female). Returns: returns a Variant object """ # CNVs are found by their alt_allele values, as either <DUP>, or <DEL> if line[4] == "<DUP>" or line[4] == "<DEL>": var = CNV(line[0], line[1], line[2], line[3], line[4], line[6]) var.add_info(line[7]) # CNVs require the format values for filtering var.set_gender(gender) var.add_format(line[8], line[9]) if self.known_genes is not None: var.fix_gene_IDs() else: var = SNV(line[0], line[1], line[2], line[3], line[4], line[6]) var.add_info(line[7]) return var
def create_snv(self, chrom, geno="0/1", info=None, pos='150', snp_id='.', ref='A', alt='G', qual='1000', filt='PASS', **kwargs): if info is None: info = "HGNC=ATRX;CQ=missense_variant;random_tag;AF_AFR=0.0001" keys = "GT:DP:TEAM29_FILTER:PP_DNM" values = "{0}:50:PASS:0.99".format(geno) return SNV(chrom, pos, snp_id, ref, alt, qual, filt, info=info, format=keys, sample=values, gender='male', **kwargs)
def test_construct_variant(self): """ test that construct_variant() works correctly """ # check that construct variant works for SNVs line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"] gender = "M" test_var = SNV(*line[:6]) variant = self.vcf_loader.construct_variant(line, gender) self.assertEqual(variant.get_key(), test_var.get_key()) # initally constructing a SNV shouldn't affect the format variable self.assertEqual(variant.format, None) # check that construct variant works for CNVs line = [ "1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1" ] gender = "M" test_var = CNV(*line[:6]) test_var.add_info(line[7]) variant = self.vcf_loader.construct_variant(line, gender) self.assertEqual(variant.get_key(), test_var.get_key()) self.assertNotEqual(variant.format, None)
def test_construct_variant(self): """ test that construct_variant() works correctly """ # check that construct variant works for SNVs line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"] gender = "M" test_var = SNV(*line[:6]) variant = construct_variant(line, gender, self.known_genes) self.assertEqual(variant.get_key(), test_var.get_key()) # initally constructing a SNV shouldn't affect the format variable self.assertEqual(variant.format, None) # check that construct variant works for CNVs line = ["1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1"] gender = "M" test_var = CNV(*line[:6]) test_var.add_info(line[7]) variant = construct_variant(line, gender, self.known_genes) self.assertEqual(variant.get_key(), test_var.get_key()) self.assertNotEqual(variant.format, None)
def create_snv(self, chrom, geno="0/1"): """ create a default variant """ pos = "15000000" snp_id = "." ref = "A" alt = "G" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo var = SNV(chrom, pos, snp_id, ref, alt, filt) default_info = "HGNC=ATRX;CQ=missense_variant;random_tag;AF_AFR=0.0001" keys = "GT:DP:TEAM29_FILTER:PP_DNM" values = "{0}:50:PASS:0.99".format(geno) var.add_info(default_info) var.add_format(keys, values) var.set_gender("male") var.set_genotype() return var
def test_construct_variant(self): """ test that construct_variant() works correctly """ # check that construct variant works for SNVs line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"] gender = "M" test_var = SNV(*line, gender=gender) variant = construct_variant(line, gender) self.assertEqual(variant.get_key(), test_var.get_key()) self.assertEqual(variant.format, {'GT': '0/1'}) # check that construct variant works for CNVs line = ["1", "100", ".", "T", "<DEL>", "1000", "PASS", "END=200", "GT", "0/1"] gender = "M" test_var = CNV(*line, gender=gender) variant = construct_variant(line, gender) self.assertEqual(variant.get_key(), test_var.get_key()) self.assertEqual(variant.format, {'GT': '0/1'})
def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "G" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo self.var = SNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=ATRX;CQ=missense_variant;random_tag" self.pops = ["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", \ "EAS_AF", "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", \ "UK10K_cohort_AF"] self.format_keys = "GT:DP" self.sample_values = "0/1:50" self.var.add_info(info)
def create_snv(self, gender, genotype): """ create a default variant """ chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "G" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo var = SNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=TEST;CQ=missense_variant;DENOVO-SNP;PP_DNM=0.99" keys = "GT:DP:TEAM29_FILTER:PP_DNM" values = genotype + ":50:PASS:0.99" var.add_info(info) var.add_format(keys, values) var.set_gender(gender) var.set_genotype() return var
def test_filter_de_novos(self): """ check that filter_de_novos() works correctly """ # make a family without parents family = Family("fam_id") child_gender = "female" family.add_child('child_id', 'mother_id', 'father_id', child_gender, '2', 'child_path') # set up an autosomal variant gender = "M" args = [ "1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1", gender ] child_var = SNV(*args) # combine the variant into a list of TrioGenotypes child_vars = [child_var] mother_vars = [] father_vars = [] trio_variants = combine_trio_variants(family, child_vars, mother_vars, father_vars) # check that vars without parents get passed through automatically self.assertEqual(filter_de_novos(trio_variants, 0.9), trio_variants) # now add parents to the family family.add_mother("mother_id", '0', '0', 'female', '1', "mother_vcf_path") family.add_father("father_id", '0', '0', 'male', '1', "father_vcf_path") family = family # re-generate the variants list now that parents have been included trio_variants = combine_trio_variants(family, child_vars, mother_vars, father_vars) # check that vars with parents, and that appear to be de novo are # filtered out self.assertEqual(filter_de_novos(trio_variants, 0.9), []) # check that vars with parents, but which are not de novo, are retained mother_vars = child_vars trio_variants = combine_trio_variants(family, child_vars, mother_vars, father_vars) self.assertEqual(filter_de_novos(trio_variants, 0.9), trio_variants)
def create_snv(self, sex, genotype, cq="missense_variant", hgnc="TEST", chrom="1"): """ create a default variant """ pos = "15000000" snp_id = "." ref = "A" alt = "G" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo var = SNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC={0};CQ={1};DENOVO-SNP;PP_DNM=0.99".format(hgnc, cq) keys = "GT:DP:TEAM29_FILTER:PP_DNM" values = genotype + ":50:PASS:0.99" var.add_info(info) var.add_format(keys, values) var.set_gender(sex) var.set_genotype() return var
def setUp(self): """ define a default SNV object """ self.pops = ["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"] Info.set_populations(self.pops) chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "G" qual = "1000" filt = "PASS" info = "HGNC_ID=1001;CQ=missense_variant;random_tag" self.keys = "GT:DP:AD" self.values = "0/1:50:10,10" self.var = SNV(chrom, pos, snp_id, ref, alt, qual, filt, info=info, format=self.keys, sample=self.values)
def get_parental_var(self, var, parental_vars, gender, matcher): """ get the corresponding parental variant to a childs variant, or create a default variant with reference genotype. Args: var: childs var, as Variant object parental_vars: list of parental variants gender: gender of the parent matcher: cnv matcher for parent Returns: returns a Variant object, matched to the proband's variant """ key = var.get_key() # if the variant is a CNV, the corresponding variant might not match # the start site, so we look a variant that overlaps if isinstance(var, CNV) and matcher.has_match(var): key = matcher.get_overlap_key(key) for parental in parental_vars: if key == parental.get_key(): return parental # if the childs variant does not exist in the parents VCF, then we # create a default variant for the parent if isinstance(var, CNV): parental = CNV(var.chrom, var.position, var.variant_id, var.ref_allele, var.alt_allele, var.filter) else: parental = SNV(var.chrom, var.position, var.variant_id, var.ref_allele, var.alt_allele, var.filter) parental.set_gender(gender) parental.set_default_genotype() return parental
def test_analyse_trio(self): ''' test that analyse_trio() works correctly ''' # construct the VCFs for the trio members paths = {} for member in ['child', 'mom', 'dad']: vcf = make_vcf_header() geno, pp_dnm = '0/0', '' if member == 'child': geno, pp_dnm = '0/1', ';DENOVO-SNP;PP_DNM=1' vcf.append( make_vcf_line(genotype=geno, extra='HGNC=ARID1B' + pp_dnm)) # write the VCF data to a file handle = tempfile.NamedTemporaryFile(dir=self.temp_dir, delete=False, suffix='.vcf') for x in vcf: handle.write(x.encode('utf8')) handle.flush() paths[member] = handle.name # create a Family object, so we can load the data from the trio's VCFs fam_id = 'fam01' child = Person(fam_id, 'child', 'dad', 'mom', 'female', '2', paths['child']) mom = Person(fam_id, 'mom', '0', '0', 'female', '1', paths['mom']) dad = Person(fam_id, 'dad', '0', '0', 'male', '1', paths['dad']) family = Family(fam_id, [child], mom, dad) self.assertEqual(self.finder.analyse_trio(family), [(TrioGenotypes( chrom="1", pos=1, child=SNV( chrom="1", position=1, id=".", ref="G", alts="T", qual='1000', filter="PASS", info="CQ=missense_variant;DENOVO-SNP;HGNC=ARID1B;PP_DNM=1", format="DP:GT", sample="50:0/1", gender="female", mnv_code=None), mother=SNV(chrom="1", position=1, id=".", ref="G", alts="T", qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=ARID1B", format="DP:GT", sample="50:0/0", gender="female", mnv_code=None), father=SNV(chrom="1", position=1, id=".", ref="G", alts="T", qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=ARID1B", format="DP:GT", sample="50:0/0", gender="male", mnv_code=None)), ['single_variant'], [ 'Monoallelic', 'Mosaic' ], ['ARID1B'])])
class TestVariantInfoPy(unittest.TestCase): """ """ def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "CM00001" ref = "A" alt = "G" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo self.var = SNV(chrom, pos, snp_id, ref, alt, filt) self.var.debug_chrom = "1" self.var.debug_pos = "15000000" self.default_info = "HGNC=ATRX;CQ=missense_variant;random_tag" # here are the default filtering criteria, as loaded into python known_genes = {"ATRX": {"inheritance": {"Hemizygous": \ {"Loss of function"}}, "start": "10000000", "chrom": "1", \ "confirmed_status": {"Confirmed DD Gene"}, "end": "20000000"}} SNV.known_genes = known_genes self.var.add_info(self.default_info) def test_set_gene_from_info(self): """ test that test_set_gene_from_info() works correctly """ # check for when a HGNC key exists self.var.info["HGNC"] = "A" self.var.set_gene_from_info() self.assertEqual(self.var.gene, "A") # check for when a HGNC key doesn't exist del self.var.info["HGNC"] self.var.set_gene_from_info() self.assertIsNone(self.var.gene) def test_is_lof(self): """ test that is_lof() works correctly """ # check that known LOF consensequence return True self.var.consequence = "stop_gained" self.assertTrue(self.var.is_lof()) # check that known non-LOF consensequence returns False self.var.consequence = "missense_variant" self.assertFalse(self.var.is_lof()) # check that null values return False self.var.consequence = None self.assertFalse(self.var.is_lof()) def test_get_allele_frequency(self): """ tests that number conversion works as expected """ # single number returns that number self.assertEqual(self.var.get_allele_frequency("1"), 1) # two numbers return one number self.assertEqual(self.var.get_allele_frequency("1,1"), 1) # two numbers return the highest number self.assertEqual(self.var.get_allele_frequency("1,2"), 2) # number and string return the number self.assertEqual(self.var.get_allele_frequency("a,1"), 1) # single string value returns None self.assertEqual(self.var.get_allele_frequency("a"), None) # multiple string values return None self.assertEqual(self.var.get_allele_frequency("a,b"), None) def test_is_number(self): """ tests that we can check if a value represents a number """ self.assertEqual(self.var.is_number(None), False) self.assertEqual(self.var.is_number("5"), True) self.assertEqual(self.var.is_number("a"), False) def test_find_max_allele_frequency(self): """ test if the MAF finder operates correctly """ # check for var without recorded MAF self.assertIsNone(self.var.find_max_allele_frequency()) # check for single population self.var.info["MAX_AF"] = "0.005" self.assertEqual(self.var.find_max_allele_frequency(), 0.005) # check for two populations self.var.info["AFR_AF"] = "0.01" self.assertEqual(self.var.find_max_allele_frequency(), 0.01) # check for all populations pops = set(["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", \ "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"]) for pop in pops: self.var.info[pop] = "0.05" self.assertEqual(self.var.find_max_allele_frequency(), 0.05)
def create_snv(self, gender, genotype): """ create a default variant """ chrom = "X" pos = "15000000" snp_id = "." ref = "A" alt = "G" qual = "50" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo var = SNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=TEST;CQ=missense_variant;random_tag;EUR_AF=0.0005" format_keys = "GT:DP" sample_values = genotype + ":50" var.vcf_line = [ chrom, pos, snp_id, ref, alt, qual, filt, info, format_keys, sample_values ] var.add_info(info) var.add_format(format_keys, sample_values) var.set_gender(gender) var.set_genotype() return var
class TestVariantSnvPy(unittest.TestCase): """ unit testing of the SNV class """ def setUp(self): """ define a default SNV object """ self.pops = [ "AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF" ] Info.set_populations(self.pops) chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "G" qual = "1000" filt = "PASS" info = "HGNC_ID=1001;CQ=missense_variant;random_tag" self.keys = "GT:DP:AD" self.values = "0/1:50:10,10" self.var = SNV(chrom, pos, snp_id, ref, alt, qual, filt, info=info, format=self.keys, sample=self.values) def tearDown(self): SNV.known_genes = None Info.set_populations([]) def test_get_key(self): """ tests that get_key() operates correctly """ # make sure the chrom and position are correct self.var.chrom = "1" self.var.position = "15000000" self.assertEqual(self.var.get_key(), ("1", "15000000")) # and make sure the chrom and position are correct if we change them self.var.chrom = "22" self.var.position = "123456789" self.assertEqual(self.var.get_key(), ("22", "123456789")) def test_convert_genotype(self): """ test that genotypes convert from two char to single char """ genotypes = [("0/0", 0), ("0/1", 1), ("1/0", 1), ("1/1", 2), \ ("1/2", 1), ("2/1", 1), ("0/2", 1), ("2/0", 1), ("2/2", 2)] # run thorugh all the legit genotype codes for geno in genotypes: genotype = geno[0] result = geno[1] self.assertEqual(self.var.convert_genotype(genotype), result) # Raise error when converting single character genotype with self.assertRaises(ValueError): self.var.convert_genotype("0") # raise error when converting unknown genotype with self.assertRaises(AssertionError): self.var.convert_genotype("a/a") # also include other genotype format posibilities. None of these are # used, but since they aren't explicitly forbidden, make sure they work # check two character strings self.assertEqual(self.var.convert_genotype("12|34"), 1) self.assertEqual(self.var.convert_genotype("99|99"), 2) def test_set_genotype_autosomal(self): """ test that set_genotype() operates correctly """ self.var.add_format(self.keys, self.values) self.var._set_gender("male") genotypes = [("0/0", 0), ("0/1", 1), ("1/1", 2)] for geno in genotypes: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.get_genotype(), result) # remove the format attribute, so we can raise an error self.var.format = None with self.assertRaises(ValueError): self.var.set_genotype() def test_set_genotype_allosomal_male(self): """ test that set_genotype() operates correctly for the male X chrom """ self.var.add_format(self.keys, self.values) self.var.chrom = "X" self.var._set_gender("male") genotypes = [("0/0", 0), ("1/1", 2)] for geno in genotypes: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.get_genotype(), result) # check that we raise an error for X chrom hets genotypes = ["0/1", "1/0"] for genotype in genotypes: self.var.format["GT"] = genotype with self.assertRaises(ValueError): self.var.set_genotype() def test_set_genotype_allosomal_female(self): """ test that set_genotype() operates correctly for the female X chrom """ self.var.add_format(self.keys, self.values) self.var.chrom = "X" self.var._set_gender("female") genotypes = [("0/0", 0), ("0/1", 1), ("1/1", 2)] for geno in genotypes: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.get_genotype(), result) def test_set_allosomal_male(self): """test that convert_allosomal_genotype_code_to_alleles handles hets in male correctly """ self.var._set_gender("male") self.var.chrom = 'X' self.var.genotype = '1' #treat as hom if VAF > 0.8 self.var.format["AD"] = '1,19' self.var.format["GT"] = '1/1' self.var.convert_allosomal_genotype_code_to_alleles() self.assertEqual(self.var.alleles, set([self.var.alt_alleles])) #treat as hom if denovo self.var.format["AD"] = '10,19' self.var.format["PP_DNM"] = 0.0099 self.var.convert_allosomal_genotype_code_to_alleles() self.assertEqual(self.var.alleles, set([self.var.alt_alleles])) def test_is_het_autosomal(self): """ tests that is_het() operates correctly for automsal chromosomes """ self.var.add_format(self.keys, self.values) self.var._set_gender("male") het = [("0/0", False), ("0/1", True), ("1/1", False)] for geno in het: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.is_het(), result) def test_is_hom_alt_autosomal(self): """ tests that is_hom_alt() operates correctly for automsal chromosomes """ self.var.add_format(self.keys, self.values) self.var._set_gender("male") hom_alt = [("0/0", False), ("0/1", False), ("1/1", True)] for geno in hom_alt: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.is_hom_alt(), result) def test_is_hom_ref_autosomal(self): """ tests that is_hom_ref() operates correctly for automsal chromosomes """ self.var.add_format(self.keys, self.values) self.var._set_gender("male") hom_ref = [("0/0", True), ("0/1", False), ("1/1", False)] for geno in hom_ref: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.is_hom_ref(), result) def test_is_not_ref_autosomal(self): """ tests that is_not_ref() operates correctly for automsal chromosomes """ self.var.add_format(self.keys, self.values) self.var._set_gender("male") not_ref = [("0/0", False), ("0/1", True), ("1/1", True)] for geno in not_ref: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.is_not_ref(), result) def test_is_not_alt_autosomal(self): """ tests that is_not_ref() operates correctly for automsal chromosomes """ self.var.add_format(self.keys, self.values) self.var._set_gender("male") not_alt = [("0/0", True), ("0/1", True), ("1/1", False)] for geno in not_alt: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.is_not_alt(), result) def test_passes_default_filters(self): """ test that different variants pass or fail the VcfInfo filters """ # check that a default variant passes the filters self.assertTrue(self.var.passes_filters()) def test_passes_known_genes(self): ''' test that genes pass or fail when they affect known genes or not ''' SNV.known_genes = {"1001": {'irrelevant'}} info = "HGNC_ID=1001;CQ=missense_variant;random_tag" var = SNV('1', '100', '.', 'A', 'G', '1000', 'PASS', info=info, format='GT:DP', sample='0/1:50') # a variant that affects a known gene passes self.assertTrue(var.passes_filters()) # a variant that doesn't affect any known genes fails SNV.known_genes = {"1002": {'irrelevant'}} self.assertFalse(var.passes_filters()) # if we haven't provided any known genes, the variant passes SNV.known_genes = None self.assertTrue(var.passes_filters()) def test_passes_alternate_filter_string(self): """ test that the alternate permitted FILTER string also passes """ # check that the alternate FILTER value passes self.var.filter = "." self.assertTrue(self.var.passes_filters()) self.var.filter = "FAIL" self.assertFalse(self.var.passes_filters()) # check that low VQSLOD on its own will pass the variant self.var.filter = "LOW_VQSLOD" self.assertTrue(self.var.passes_filters()) # check that low VQSLOD in a de novo will still pass self.var.filter = "LOW_VQSLOD" self.var.info["DENOVO-SNP"] = True self.assertTrue(self.var.passes_filters()) def test_passes_filters_low_maf(self): """ tests that low MAF values pass the filters """ # check that low MAF values pass the filters for pop in self.pops: self.var.info[pop] = "0.001" self.assertTrue(self.var.passes_filters()) # and check that MAF on the threshold still pass self.var.info[pop] = "0.005" self.assertTrue(self.var.passes_filters()) def test_out_of_range_maf(self): """ check that MAF outside 0-1 still pass or fail correctly """ self.var.info["AFR_AF"] = "-1" self.assertTrue(self.var.passes_filters()) self.var.info["AFR_AF"] = "100" self.assertFalse(self.var.passes_filters()) def test_fails_filters_high_maf(self): """ test that variants with high MAF fail the filtering """ # check th for pop in self.pops: var = self.var var.info[pop] = "0.0101" self.assertFalse(var.passes_filters()) def test_passes_consequence_filter(self): """ check all the consequence values that should pass """ vep_passing = ["transcript_ablation", "splice_donor_variant", \ "splice_acceptor_variant", "frameshift_variant", \ "initiator_codon_variant", "inframe_insertion", "inframe_deletion",\ "missense_variant", "transcript_amplification", "stop_gained",\ "stop_lost"] # check all the passing consequences for cq in vep_passing: self.var.consequence = [[cq]] self.assertTrue(self.var.passes_filters()) def test_fails_consequence_filter(self): """ check all the consequence values that should fail """ vep_failing = ["splice_region_variant", \ "incomplete_terminal_codon_variant", "synonymous_variant", \ "stop_retained_variant", "mature_miRNA_variant", \ "5_prime_UTR_variant", "3_prime_UTR_variant", \ "non_coding_exon_variant", "nc_transcript_variant", \ "intron_variant", "NMD_transcript_variant", \ "upstream_gene_variant", "downstream_gene_variant", \ "TFBS_ablation", "TFBS_amplification", "TF_binding_site_variant", \ "regulatory_region_variant", "regulatory_region_ablation", \ "regulatory_region_amplification", "feature_elongation", \ "feature_truncation", "intergenic_variant", "coding_sequence_variant"] # check all the failing consequences for cq in vep_failing: self.var.info.consequence = [[cq]] self.assertFalse(self.var.passes_filters()) def test_passes_filters_with_debug(self): """ check that passes_filters_with_debug() generates a failure message """ # make a variant that will fail the filtering, and set the site for # debugging self.var.info["AFR_AF"] = "0.05" self.var.debug_pos = self.var.get_position() # get ready to capture the output from a print function out = StringIO() sys.stdout = out # check that the variant fails (and secondarily prints the failure mode) self.assertFalse(self.var.passes_filters_with_debug()) output = out.getvalue().strip() # check that the message about why the variant failed filtering is correct self.assertEqual(output, "failed MAF: 0.05") # reset the standard out, so that we can observe other print statements sys.stdout = sys.__stdout__
class TestVariantSnvPy(unittest.TestCase): """ """ def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "." ref = "A" alt = "G" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo self.var = SNV(chrom, pos, snp_id, ref, alt, filt) info = "HGNC=ATRX;CQ=missense_variant;random_tag" self.pops = ["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", \ "EAS_AF", "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", \ "UK10K_cohort_AF"] self.format_keys = "GT:DP" self.sample_values = "0/1:50" self.var.add_info(info) def test_get_key(self): """ tests that get_key() operates correctly """ # make sure the chrom and position are correct self.var.chrom = "1" self.var.position = "15000000" self.assertEqual(self.var.get_key(), ("1", "15000000")) # and make sure the chrom and position are correct if we change them self.var.chrom = "22" self.var.position = "123456789" self.assertEqual(self.var.get_key(), ("22", "123456789")) def test_convert_genotype(self): """ test that genotypes convert from two char to single char """ genotypes = [("0/0", 0), ("0/1", 1), ("1/0", 1), ("1/1", 2), \ ("1/2", 1), ("2/1", 1), ("0/2", 1), ("2/0", 1), ("2/2", 2)] # run thorugh all the legit genotype codes for geno in genotypes: genotype = geno[0] result = geno[1] self.assertEqual(self.var.convert_genotype(genotype), result) # Raise error when converting single character genotype with self.assertRaises(ValueError): self.var.convert_genotype("0") # raise error when converting unknown genotype with self.assertRaises(AssertionError): self.var.convert_genotype("a/a") # also include other genotype format posibilities. None of these are # used, but since they aren't explicitly forbidden, make sure they work # check two character strings self.assertEqual(self.var.convert_genotype("12|34"), 1) self.assertEqual(self.var.convert_genotype("99|99"), 2) def test_set_default_genotype(self): """ test that set_default_genotype() operates correctly on the autosomes """ self.var.set_gender("male") self.var.set_default_genotype() self.assertEqual(self.var.get_genotype(), 0) def test_set_genotype_autosomal(self): """ test that set_genotype() operates correctly """ self.var.add_format(self.format_keys, self.sample_values) self.var.set_gender("male") genotypes = [("0/0", 0), ("0/1", 1), ("1/1", 2)] for geno in genotypes: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.get_genotype(), result) # remove the format attribute, so we can raise an error del self.var.format with self.assertRaises(ValueError): self.var.set_genotype() def test_set_genotype_allosomal_male(self): """ test that set_genotype() operates correctly for the male X chrom """ self.var.add_format(self.format_keys, self.sample_values) self.var.chrom = "X" self.var.set_gender("male") genotypes = [("0/0", 0), ("1/1", 2)] for geno in genotypes: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.get_genotype(), result) # check that we raise an error for X chrom hets genotypes = ["0/1", "1/0"] for genotype in genotypes: self.var.format["GT"] = genotype with self.assertRaises(ValueError): self.var.set_genotype() def test_set_genotype_allosomal_female(self): """ test that set_genotype() operates correctly for the female X chrom """ self.var.add_format(self.format_keys, self.sample_values) self.var.chrom = "X" self.var.set_gender("female") genotypes = [("0/0", 0), ("0/1", 1), ("1/1", 2)] for geno in genotypes: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.get_genotype(), result) def test_is_het_autosomal(self): """ tests that is_het() operates correctly for automsal chromosomes """ self.var.add_format(self.format_keys, self.sample_values) self.var.set_gender("male") het = [("0/0", False), ("0/1", True), ("1/1", False)] for geno in het: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.is_het(), result) def test_is_hom_alt_autosomal(self): """ tests that is_hom_alt() operates correctly for automsal chromosomes """ self.var.add_format(self.format_keys, self.sample_values) self.var.set_gender("male") hom_alt = [("0/0", False), ("0/1", False), ("1/1", True)] for geno in hom_alt: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.is_hom_alt(), result) def test_is_hom_ref_autosomal(self): """ tests that is_hom_ref() operates correctly for automsal chromosomes """ self.var.add_format(self.format_keys, self.sample_values) self.var.set_gender("male") hom_ref = [("0/0", True), ("0/1", False), ("1/1", False)] for geno in hom_ref: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.is_hom_ref(), result) def test_is_not_ref_autosomal(self): """ tests that is_not_ref() operates correctly for automsal chromosomes """ self.var.add_format(self.format_keys, self.sample_values) self.var.set_gender("male") not_ref = [("0/0", False), ("0/1", True), ("1/1", True)] for geno in not_ref: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.is_not_ref(), result) def test_is_not_alt_autosomal(self): """ tests that is_not_ref() operates correctly for automsal chromosomes """ self.var.add_format(self.format_keys, self.sample_values) self.var.set_gender("male") not_alt = [("0/0", True), ("0/1", True), ("1/1", False)] for geno in not_alt: genotype = geno[0] result = geno[1] self.var.format["GT"] = genotype self.var.set_genotype() self.assertEqual(self.var.is_not_alt(), result) def test_passes_default_filters(self): """ test that different variants pass or fail the VcfInfo filters """ # check that a default variant passes the filters self.assertTrue(self.var.passes_filters()) def test_passes_alternate_filter_string(self): """ test that the alternate permitted FILTER string also passes """ # check that the alternate FILTER value passes self.var.filter = "." self.assertTrue(self.var.passes_filters()) self.var.filter = "FAIL" self.assertFalse(self.var.passes_filters()) # check that low VQSLOD on its own will pass the variant self.var.filter = "LOW_VQSLOD" self.assertTrue(self.var.passes_filters()) # check that low VQSLOD in a de novo will still pass self.var.filter = "LOW_VQSLOD" self.var.info["DENOVO-SNP"] = True self.assertTrue(self.var.passes_filters()) def test_passes_filters_low_maf(self): """ tests that low MAF values pass the filters """ # check that low MAF values pass the filters for pop in self.pops: self.var.info[pop] = "0.005" self.assertTrue(self.var.passes_filters()) # and check that MAF on the threshold still pass self.var.info[pop] = "0.01" self.assertTrue(self.var.passes_filters()) def test_out_of_range_maf(self): """ check that MAF outside 0-1 still pass or fail correctly """ self.var.info["AFR_AF"] = "-1" self.assertTrue(self.var.passes_filters()) self.var.info["AFR_AF"] = "100" self.assertFalse(self.var.passes_filters()) def test_fails_filters_high_maf(self): """ test that variants with high MAF fail the filtering """ # check th for pop in self.pops: var = self.var var.info[pop] = "0.0101" self.assertFalse(var.passes_filters()) def test_passes_consequence_filter(self): """ check all the consequence values that should pass """ vep_passing = ["transcript_ablation", "splice_donor_variant", \ "splice_acceptor_variant", "frameshift_variant", \ "initiator_codon_variant", "inframe_insertion", "inframe_deletion",\ "missense_variant", "transcript_amplification", "stop_gained",\ "stop_lost", "coding_sequence_variant"] # check all the passing consequences for cq in vep_passing: self.var.consequence = [cq] self.assertTrue(self.var.passes_filters()) def test_fails_consequence_filter(self): """ check all the consequence values that should fail """ vep_failing = ["splice_region_variant", \ "incomplete_terminal_codon_variant", "synonymous_variant", \ "stop_retained_variant", "mature_miRNA_variant", \ "5_prime_UTR_variant", "3_prime_UTR_variant", \ "non_coding_exon_variant", "nc_transcript_variant", \ "intron_variant", "NMD_transcript_variant", \ "upstream_gene_variant", "downstream_gene_variant", \ "TFBS_ablation", "TFBS_amplification", "TF_binding_site_variant", \ "regulatory_region_variant", "regulatory_region_ablation", \ "regulatory_region_amplification", "feature_elongation", \ "feature_truncation", "intergenic_variant"] # check all the failing consequences for cq in vep_failing: self.var.consequence = [cq] self.assertFalse(self.var.passes_filters()) def test_passes_filters_with_debug(self): """ check that passes_filters_with_debug() generates a failure message """ # make a variant that will fail the filtering, and set the site for # debugging self.var.info["AFR_AF"] = "0.05" self.var.debug_pos = self.var.get_position() # get ready to capture the output from a print function out = StringIO() sys.stdout = out # check that the variant fails (and secondarily prints the failure mode) self.assertFalse(self.var.passes_filters_with_debug()) output = out.getvalue().strip() # check that the message about why the variant failed filtering is correct self.assertEqual(output, "failed MAF: 0.05")
class TestVariantInfoPy(unittest.TestCase): """ """ def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "CM00001" ref = "A" alt = "G" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo self.var = SNV(chrom, pos, snp_id, ref, alt, filt) self.var.debug_chrom = "1" self.var.debug_pos = "15000000" self.default_info = "HGNC=ATRX;CQ=missense_variant;random_tag" # here are the default filtering criteria, as loaded into python known_genes = {"ATRX": {"inheritance": {"Hemizygous": \ {"Loss of function"}}, "start": "10000000", "chrom": "1", \ "confirmed_status": {"Confirmed DD Gene"}, "end": "20000000"}} SNV.known_genes = known_genes self.var.add_info(self.default_info) def test_set_gene_from_info(self): """ test that test_set_gene_from_info() works correctly """ # check for when a HGNC key exists self.var.info["HGNC"] = "A" self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["A"]) # check for when a HGNC key doesn't exist del self.var.info["HGNC"] self.var.set_gene_from_info() self.assertIsNone(self.var.genes) # check for multiple gene symbols self.var.info["HGNC"] = "A|B|C" self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["A", "B", "C"]) # check for multiple gene symbols, when some are missing self.var.info["HGNC"] = "|.|C" self.var.set_gene_from_info() self.assertEqual(self.var.genes, [None, None, "C"]) # check for multiple gene symbols, when some missing symbols have # alternates in other symbol fields. self.var.info["HGNC"] = ".|.|C" self.var.info["SYMBOL"] = "Z|.|C" self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["Z", None, "C"]) # Check that including alternate symbols has the correct precendence # order. Note that doing this properly would require checking all of the # possible order combinations. self.var.info["HGNC"] = ".|.|C" self.var.info["SYMBOL"] = "Z|.|C" self.var.info["ENSG"] = "A|.|C" self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["Z", None, "C"]) def test_is_lof(self): """ test that is_lof() works correctly """ # check that known LOF consensequence return True self.var.consequence = ["stop_gained"] self.assertTrue(self.var.is_lof()) # check that known non-LOF consensequence returns False self.var.consequence = ["missense_variant"] self.assertFalse(self.var.is_lof()) # check that null values return False self.var.consequence = None self.assertFalse(self.var.is_lof()) # check when the variant overlaps multiple genes (so has multiple # gene symbols and consequences). self.var.consequence = ["stop_gained", "missense_variant"] self.var.genes = ["ATRX", "TTN"] self.assertTrue(self.var.is_lof()) self.assertTrue(self.var.is_lof("ATRX")) self.assertFalse(self.var.is_lof("TTN")) def test_correct_multiple_alt(self): """ test that correct_multiple_alt works correctly """ # define the number of alleles and consequences for multiple alleles self.var.info["AC"] = "1,1" cq = ["missense_variant,splice_acceptor_variant"] # check with alts that fall in one gene self.var.info["HGNC"] = "ATRX,ATRX" self.var.set_gene_from_info() self.assertEqual(self.var.correct_multiple_alt(cq), (['splice_acceptor_variant'], ['ATRX'], None)) # check with alts that fall in multiple genes cq = ["missense_variant|regulatory_region_variant,stop_gained|splice_acceptor_variant"] self.var.info["HGNC"] = "ATRX|TTN,ATRX|TTN" self.var.set_gene_from_info() self.assertEqual(self.var.correct_multiple_alt(cq), (['stop_gained', 'splice_acceptor_variant'], ['ATRX', 'TTN'], None)) # check a cq that has already been split by "|" (ie by gene) cq = ["missense_variant", "regulatory_region_variant,stop_gained", "splice_acceptor_variant"] self.var.set_gene_from_info() self.assertEqual(self.var.correct_multiple_alt(cq), (['stop_gained', 'splice_acceptor_variant'], ['ATRX', 'TTN'], None)) # check that if the proband has a zero count for an allele, then we # disregard the consequences and HGNC symbols for that allele self.var.info["AC"] = "1,0" self.var.set_gene_from_info() self.assertEqual(self.var.correct_multiple_alt(cq), (['missense_variant', 'regulatory_region_variant'], ['ATRX', 'TTN'], None)) # revert the allele counts, but drop the HGNC symbol, and make sure the # HGNC symbol returned is None self.var.info["AC"] = "1,1" del self.var.info["HGNC"] self.var.set_gene_from_info() self.assertEqual(self.var.correct_multiple_alt(cq), (['stop_gained', 'splice_acceptor_variant'], [], None)) def test_get_most_severe_consequence(self): """ test that get_most_severe_consequence works correctly """ # check for the most simple list cq = ["missense_variant", "splice_acceptor_variant"] self.assertEqual(self.var.get_most_severe_consequence(cq), \ "splice_acceptor_variant") # check for a single-entry list cq = ["missense_variant"] self.assertEqual(self.var.get_most_severe_consequence(cq), "missense_variant") # check for lists of lists per allele cq_per_allele = [["synonymous_variant", "splice_donor_variant"], \ ["missense_variant", "regulatory_region_variant"]] self.assertEqual(self.var.get_most_severe_consequence(cq_per_allele), \ ["missense_variant", "splice_donor_variant"]) def test_get_per_gene_consequence(self): """ test that get_per_gene_consequence works correctly """ self.var.genes = ["ATRX"] self.var.consequence = ["missense_variant"] self.assertEqual(self.var.get_per_gene_consequence(None), ["missense_variant"]) self.assertEqual(self.var.get_per_gene_consequence("ATRX"), ["missense_variant"]) self.assertEqual(self.var.get_per_gene_consequence("TEST"), []) # check a variant with consequences in multiple genes, that we only # pull out the consequencesquences for a single gene self.var.genes = ["ATRX", "TTN"] self.var.consequence = ["missense_variant", "synonymous_variant"] self.assertEqual(self.var.get_per_gene_consequence("ATRX"), ["missense_variant"]) self.assertEqual(self.var.get_per_gene_consequence("TTN"), ["synonymous_variant"]) # check a symbol where two symbols match self.var.genes = ["TEMP", "ATRX", "TEMP"] self.var.consequence = ["splice_acceptor_variant", "missense_variant", \ "synonymous_variant"] self.assertEqual(self.var.get_per_gene_consequence("TEMP"), \ ["splice_acceptor_variant", "synonymous_variant"]) # check a symbol with some None gene symbols self.var.genes = [None, "ATRX", None] self.var.consequence = ["splice_acceptor_variant", "missense_variant", \ "synonymous_variant"] self.assertEqual(self.var.get_per_gene_consequence("ATRX"), \ ["missense_variant"]) # check that the earlier VCFs with single consequences but multiple # symbols from HGNC_ALL give the same consequence for all genes. info = "HGNC_ALL=ATRX&TTN;CQ=missense_variant;random_tag" del self.var.info["HGNC"] self.var.genes = None self.var.add_info(info) self.assertEqual(self.var.get_per_gene_consequence("ATRX"), \ ["missense_variant"]) self.assertEqual(self.var.get_per_gene_consequence("TTN"), \ ["missense_variant"]) def test_get_allele_frequency(self): """ tests that number conversion works as expected """ # single number returns that number self.assertEqual(self.var.get_allele_frequency("1"), 1) # two numbers return one number self.assertEqual(self.var.get_allele_frequency("1,1"), 1) # two numbers return the highest number self.assertEqual(self.var.get_allele_frequency("1,2"), 2) # number and string return the number self.assertEqual(self.var.get_allele_frequency("a,1"), 1) # single string value returns None self.assertEqual(self.var.get_allele_frequency("a"), None) # multiple string values return None self.assertEqual(self.var.get_allele_frequency("a,b"), None) def test_is_number(self): """ tests that we can check if a value represents a number """ self.assertEqual(self.var.is_number(None), False) self.assertEqual(self.var.is_number("5"), True) self.assertEqual(self.var.is_number("a"), False) def test_find_max_allele_frequency(self): """ test if the MAF finder operates correctly """ # check for var without recorded MAF self.assertIsNone(self.var.find_max_allele_frequency()) # check for single population self.var.info["MAX_AF"] = "0.005" self.assertEqual(self.var.find_max_allele_frequency(), 0.005) # check for two populations self.var.info["AFR_AF"] = "0.01" self.assertEqual(self.var.find_max_allele_frequency(), 0.01) # check for all populations pops = set(["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", \ "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"]) for pop in pops: self.var.info[pop] = "0.05" self.assertEqual(self.var.find_max_allele_frequency(), 0.05)