def test_load_trio(self): ''' test that load_trio() works correctly ''' def make_vcf(person): # make a VCF, where one line would pass the default filtering vcf = make_vcf_header() vcf.append(make_vcf_line(pos=1, extra='HGNC=TEST;MAX_AF=0.0001')) vcf.append(make_vcf_line(pos=2, extra='HGNC=ATRX;MAX_AF=0.0001')) path = os.path.join(self.temp_dir, "{}.vcf.gz".format(person)) write_gzipped_vcf(path, vcf) return path child_path = make_vcf('child') mother_path = make_vcf('mother') father_path = make_vcf('father') family = Family('fam_id') family.add_child('sample', 'mother_id', 'father_id', 'female', '2', child_path) family.add_mother('mother_id', '0', '0', 'female', '1', mother_path) family.add_father('father_id', '0', '0', 'male', '1', father_path) family.set_child() sum_x_lr2_proband = 0 # define the parameters and values for the SNV class args = {'chrom': "1", 'position': 2, 'id': ".", 'ref': "G", 'alts': "T", 'filter': "PASS", 'info': "CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001", 'format': "DP:GT", 'sample': "50:0/1", 'gender': "female", 'mnv_code': None, 'qual': '1000'} dad_args = copy.deepcopy(args) dad_args['gender'] = 'male' self.assertEqual(load_trio(family, sum_x_lr2_proband), [TrioGenotypes(chrom="1", pos=2, child=SNV(**args), mother=SNV(**args), father=SNV(**dad_args)) ])
def test_load_variants(self): ''' test that load_variants() works correctly. Mainly checks variables are set correctly. ''' vcf = make_minimal_vcf() path = os.path.join(self.temp_dir, "temp.vcf.gz") write_gzipped_vcf(path, vcf) sum_x_lr2 = {} fam = Family('fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)]) variants = load_variants(fam, 0.9, ['AFR_AF'], self.known_genes, set(), sum_x_lr2) self.assertEqual(SNV.known_genes, self.known_genes) self.assertEqual(CNV.known_genes, self.known_genes) self.assertEqual(Info.populations, ['AFR_AF']) self.assertEqual(Info.last_base, set()) # and check that the variants = load_variants(fam, 0.9, [], None, set([('1', 100)]), sum_x_lr2) self.assertIsNone(SNV.known_genes, self.known_genes) self.assertIsNone(CNV.known_genes, self.known_genes) self.assertEqual(Info.populations, []) self.assertEqual(Info.last_base, set([('1', 100)]))
def test_analyse_trio(self): ''' test that analyse_trio() works correctly ''' # construct the VCFs for the trio members paths = {} for member in ['child', 'mom', 'dad']: vcf = make_vcf_header() geno, pp_dnm = '0/0', '' if member == 'child': geno, pp_dnm = '0/1', ';DENOVO-SNP;PP_DNM=1' vcf.append( make_vcf_line(genotype=geno, extra='HGNC=ARID1B' + pp_dnm)) # write the VCF data to a file handle = tempfile.NamedTemporaryFile(dir=self.temp_dir, delete=False, suffix='.vcf') for x in vcf: handle.write(x.encode('utf8')) handle.flush() paths[member] = handle.name # create a Family object, so we can load the data from the trio's VCFs fam_id = 'fam01' child = Person(fam_id, 'child', 'dad', 'mom', 'female', '2', paths['child']) mom = Person(fam_id, 'mom', '0', '0', 'female', '1', paths['mom']) dad = Person(fam_id, 'dad', '0', '0', 'male', '1', paths['dad']) family = Family(fam_id, [child], mom, dad) self.assertEqual(self.finder.analyse_trio(family), [(TrioGenotypes( chrom="1", pos=1, child=SNV( chrom="1", position=1, id=".", ref="G", alts="T", qual='1000', filter="PASS", info="CQ=missense_variant;DENOVO-SNP;HGNC=ARID1B;PP_DNM=1", format="DP:GT", sample="50:0/1", gender="female", mnv_code=None), mother=SNV(chrom="1", position=1, id=".", ref="G", alts="T", qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=ARID1B", format="DP:GT", sample="50:0/0", gender="female", mnv_code=None), father=SNV(chrom="1", position=1, id=".", ref="G", alts="T", qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=ARID1B", format="DP:GT", sample="50:0/0", gender="male", mnv_code=None)), ['single_variant'], [ 'Monoallelic', 'Mosaic' ], ['ARID1B'])])
def test_find_variants(self): """ test that find_variants() works correctly """ # define the trio, so that we can know whether the parents are affected. # The child also needs to be included and set, so that we can get the # child ID for logging purposes. family = Family("famID") family.add_child("child_id", 'dad_id', 'mom_id', 'f', '2', "/vcf/path") family.add_father("dad_id", '0', '0', 'm', '1', "/vcf/path") family.add_mother("mom_id", '0', '0', 'f', '1', "/vcf/path") family.set_child() # create variants that cover various scenarios snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2") snv2 = create_variant("F", "missense_variant|synonymous_variant", "OTHER1|OTHER2") snv3 = create_variant("F", "missense_variant", "") snv4 = create_variant("F", "missense_variant", "TESTX", chrom="X") self.finder.known_genes = { "TEST1": { "inh": ["Monoallelic"] }, "OTHER1": { "inh": ["Monoallelic"] }, "OTHER2": { "inh": ["Monoallelic"] }, "TESTX": { "inh": ["X-linked dominant"] } } # check the simplest case, a variant in a known gene self.assertEqual( self.finder.find_variants([snv1], "TEST1", family), [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"])]) # check that a gene not in a known gene does not pass self.assertEqual(self.finder.find_variants([snv1], "TEST2", family), []) # check a variant where the gene is known, but the consequence for that # gene is not functional, does not pass self.assertEqual(self.finder.find_variants([snv2], "OTHER2", family), []) # check that intergenic variants (which lack HGNC symbols) do not pass self.assertEqual(self.finder.find_variants([snv3], None, family), []) # check that a variant on chrX passes through the allosomal instance self.assertEqual( self.finder.find_variants([snv4], "TESTX", family), [(snv4, ["single_variant"], ["X-linked dominant"], ["TESTX"])]) # remove the known genes, so that the variants in unknown genes pass self.finder.known_genes = None self.assertEqual( sorted(self.finder.find_variants([snv1], "TEST2", family)), [(snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]), (snv1, ["single_variant"], ["Mosaic"], ["TEST2"])]) # but variants without gene symbols still are excluded self.assertEqual(self.finder.find_variants([snv3], None, family), [])