class TestVariantInfoPy(unittest.TestCase): """ """ def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "CM00001" ref = "A" alt = "G" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo self.var = SNV(chrom, pos, snp_id, ref, alt, filt) self.var.debug_chrom = "1" self.var.debug_pos = "15000000" self.default_info = "HGNC=ATRX;CQ=missense_variant;random_tag" # here are the default filtering criteria, as loaded into python known_genes = {"ATRX": {"inheritance": {"Hemizygous": \ {"Loss of function"}}, "start": "10000000", "chrom": "1", \ "confirmed_status": {"Confirmed DD Gene"}, "end": "20000000"}} SNV.known_genes = known_genes self.var.add_info(self.default_info) def test_set_gene_from_info(self): """ test that test_set_gene_from_info() works correctly """ # check for when a HGNC key exists self.var.info["HGNC"] = "A" self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["A"]) # check for when a HGNC key doesn't exist del self.var.info["HGNC"] self.var.set_gene_from_info() self.assertIsNone(self.var.genes) # check for multiple gene symbols self.var.info["HGNC"] = "A|B|C" self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["A", "B", "C"]) # check for multiple gene symbols, when some are missing self.var.info["HGNC"] = "|.|C" self.var.set_gene_from_info() self.assertEqual(self.var.genes, [None, None, "C"]) # check for multiple gene symbols, when some missing symbols have # alternates in other symbol fields. self.var.info["HGNC"] = ".|.|C" self.var.info["SYMBOL"] = "Z|.|C" self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["Z", None, "C"]) # Check that including alternate symbols has the correct precendence # order. Note that doing this properly would require checking all of the # possible order combinations. self.var.info["HGNC"] = ".|.|C" self.var.info["SYMBOL"] = "Z|.|C" self.var.info["ENSG"] = "A|.|C" self.var.set_gene_from_info() self.assertEqual(self.var.genes, ["Z", None, "C"]) def test_is_lof(self): """ test that is_lof() works correctly """ # check that known LOF consensequence return True self.var.consequence = ["stop_gained"] self.assertTrue(self.var.is_lof()) # check that known non-LOF consensequence returns False self.var.consequence = ["missense_variant"] self.assertFalse(self.var.is_lof()) # check that null values return False self.var.consequence = None self.assertFalse(self.var.is_lof()) # check when the variant overlaps multiple genes (so has multiple # gene symbols and consequences). self.var.consequence = ["stop_gained", "missense_variant"] self.var.genes = ["ATRX", "TTN"] self.assertTrue(self.var.is_lof()) self.assertTrue(self.var.is_lof("ATRX")) self.assertFalse(self.var.is_lof("TTN")) def test_correct_multiple_alt(self): """ test that correct_multiple_alt works correctly """ # define the number of alleles and consequences for multiple alleles self.var.info["AC"] = "1,1" cq = ["missense_variant,splice_acceptor_variant"] # check with alts that fall in one gene self.var.info["HGNC"] = "ATRX,ATRX" self.var.set_gene_from_info() self.assertEqual(self.var.correct_multiple_alt(cq), (['splice_acceptor_variant'], ['ATRX'], None)) # check with alts that fall in multiple genes cq = ["missense_variant|regulatory_region_variant,stop_gained|splice_acceptor_variant"] self.var.info["HGNC"] = "ATRX|TTN,ATRX|TTN" self.var.set_gene_from_info() self.assertEqual(self.var.correct_multiple_alt(cq), (['stop_gained', 'splice_acceptor_variant'], ['ATRX', 'TTN'], None)) # check a cq that has already been split by "|" (ie by gene) cq = ["missense_variant", "regulatory_region_variant,stop_gained", "splice_acceptor_variant"] self.var.set_gene_from_info() self.assertEqual(self.var.correct_multiple_alt(cq), (['stop_gained', 'splice_acceptor_variant'], ['ATRX', 'TTN'], None)) # check that if the proband has a zero count for an allele, then we # disregard the consequences and HGNC symbols for that allele self.var.info["AC"] = "1,0" self.var.set_gene_from_info() self.assertEqual(self.var.correct_multiple_alt(cq), (['missense_variant', 'regulatory_region_variant'], ['ATRX', 'TTN'], None)) # revert the allele counts, but drop the HGNC symbol, and make sure the # HGNC symbol returned is None self.var.info["AC"] = "1,1" del self.var.info["HGNC"] self.var.set_gene_from_info() self.assertEqual(self.var.correct_multiple_alt(cq), (['stop_gained', 'splice_acceptor_variant'], [], None)) def test_get_most_severe_consequence(self): """ test that get_most_severe_consequence works correctly """ # check for the most simple list cq = ["missense_variant", "splice_acceptor_variant"] self.assertEqual(self.var.get_most_severe_consequence(cq), \ "splice_acceptor_variant") # check for a single-entry list cq = ["missense_variant"] self.assertEqual(self.var.get_most_severe_consequence(cq), "missense_variant") # check for lists of lists per allele cq_per_allele = [["synonymous_variant", "splice_donor_variant"], \ ["missense_variant", "regulatory_region_variant"]] self.assertEqual(self.var.get_most_severe_consequence(cq_per_allele), \ ["missense_variant", "splice_donor_variant"]) def test_get_per_gene_consequence(self): """ test that get_per_gene_consequence works correctly """ self.var.genes = ["ATRX"] self.var.consequence = ["missense_variant"] self.assertEqual(self.var.get_per_gene_consequence(None), ["missense_variant"]) self.assertEqual(self.var.get_per_gene_consequence("ATRX"), ["missense_variant"]) self.assertEqual(self.var.get_per_gene_consequence("TEST"), []) # check a variant with consequences in multiple genes, that we only # pull out the consequencesquences for a single gene self.var.genes = ["ATRX", "TTN"] self.var.consequence = ["missense_variant", "synonymous_variant"] self.assertEqual(self.var.get_per_gene_consequence("ATRX"), ["missense_variant"]) self.assertEqual(self.var.get_per_gene_consequence("TTN"), ["synonymous_variant"]) # check a symbol where two symbols match self.var.genes = ["TEMP", "ATRX", "TEMP"] self.var.consequence = ["splice_acceptor_variant", "missense_variant", \ "synonymous_variant"] self.assertEqual(self.var.get_per_gene_consequence("TEMP"), \ ["splice_acceptor_variant", "synonymous_variant"]) # check a symbol with some None gene symbols self.var.genes = [None, "ATRX", None] self.var.consequence = ["splice_acceptor_variant", "missense_variant", \ "synonymous_variant"] self.assertEqual(self.var.get_per_gene_consequence("ATRX"), \ ["missense_variant"]) # check that the earlier VCFs with single consequences but multiple # symbols from HGNC_ALL give the same consequence for all genes. info = "HGNC_ALL=ATRX&TTN;CQ=missense_variant;random_tag" del self.var.info["HGNC"] self.var.genes = None self.var.add_info(info) self.assertEqual(self.var.get_per_gene_consequence("ATRX"), \ ["missense_variant"]) self.assertEqual(self.var.get_per_gene_consequence("TTN"), \ ["missense_variant"]) def test_get_allele_frequency(self): """ tests that number conversion works as expected """ # single number returns that number self.assertEqual(self.var.get_allele_frequency("1"), 1) # two numbers return one number self.assertEqual(self.var.get_allele_frequency("1,1"), 1) # two numbers return the highest number self.assertEqual(self.var.get_allele_frequency("1,2"), 2) # number and string return the number self.assertEqual(self.var.get_allele_frequency("a,1"), 1) # single string value returns None self.assertEqual(self.var.get_allele_frequency("a"), None) # multiple string values return None self.assertEqual(self.var.get_allele_frequency("a,b"), None) def test_is_number(self): """ tests that we can check if a value represents a number """ self.assertEqual(self.var.is_number(None), False) self.assertEqual(self.var.is_number("5"), True) self.assertEqual(self.var.is_number("a"), False) def test_find_max_allele_frequency(self): """ test if the MAF finder operates correctly """ # check for var without recorded MAF self.assertIsNone(self.var.find_max_allele_frequency()) # check for single population self.var.info["MAX_AF"] = "0.005" self.assertEqual(self.var.find_max_allele_frequency(), 0.005) # check for two populations self.var.info["AFR_AF"] = "0.01" self.assertEqual(self.var.find_max_allele_frequency(), 0.01) # check for all populations pops = set(["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", \ "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"]) for pop in pops: self.var.info[pop] = "0.05" self.assertEqual(self.var.find_max_allele_frequency(), 0.05)
class TestVariantInfoPy(unittest.TestCase): """ """ def setUp(self): """ define a default VcfInfo object """ chrom = "1" pos = "15000000" snp_id = "CM00001" ref = "A" alt = "G" filt = "PASS" # set up a SNV object, since SNV inherits VcfInfo self.var = SNV(chrom, pos, snp_id, ref, alt, filt) self.var.debug_chrom = "1" self.var.debug_pos = "15000000" self.default_info = "HGNC=ATRX;CQ=missense_variant;random_tag" # here are the default filtering criteria, as loaded into python known_genes = {"ATRX": {"inheritance": {"Hemizygous": \ {"Loss of function"}}, "start": "10000000", "chrom": "1", \ "confirmed_status": {"Confirmed DD Gene"}, "end": "20000000"}} SNV.known_genes = known_genes self.var.add_info(self.default_info) def test_set_gene_from_info(self): """ test that test_set_gene_from_info() works correctly """ # check for when a HGNC key exists self.var.info["HGNC"] = "A" self.var.set_gene_from_info() self.assertEqual(self.var.gene, "A") # check for when a HGNC key doesn't exist del self.var.info["HGNC"] self.var.set_gene_from_info() self.assertIsNone(self.var.gene) def test_is_lof(self): """ test that is_lof() works correctly """ # check that known LOF consensequence return True self.var.consequence = "stop_gained" self.assertTrue(self.var.is_lof()) # check that known non-LOF consensequence returns False self.var.consequence = "missense_variant" self.assertFalse(self.var.is_lof()) # check that null values return False self.var.consequence = None self.assertFalse(self.var.is_lof()) def test_get_allele_frequency(self): """ tests that number conversion works as expected """ # single number returns that number self.assertEqual(self.var.get_allele_frequency("1"), 1) # two numbers return one number self.assertEqual(self.var.get_allele_frequency("1,1"), 1) # two numbers return the highest number self.assertEqual(self.var.get_allele_frequency("1,2"), 2) # number and string return the number self.assertEqual(self.var.get_allele_frequency("a,1"), 1) # single string value returns None self.assertEqual(self.var.get_allele_frequency("a"), None) # multiple string values return None self.assertEqual(self.var.get_allele_frequency("a,b"), None) def test_is_number(self): """ tests that we can check if a value represents a number """ self.assertEqual(self.var.is_number(None), False) self.assertEqual(self.var.is_number("5"), True) self.assertEqual(self.var.is_number("a"), False) def test_find_max_allele_frequency(self): """ test if the MAF finder operates correctly """ # check for var without recorded MAF self.assertIsNone(self.var.find_max_allele_frequency()) # check for single population self.var.info["MAX_AF"] = "0.005" self.assertEqual(self.var.find_max_allele_frequency(), 0.005) # check for two populations self.var.info["AFR_AF"] = "0.01" self.assertEqual(self.var.find_max_allele_frequency(), 0.01) # check for all populations pops = set(["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF", \ "ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"]) for pop in pops: self.var.info[pop] = "0.05" self.assertEqual(self.var.find_max_allele_frequency(), 0.05)