Beispiel #1
0
    def test_load_trio(self):
        ''' test that load_trio() works correctly
        '''
        
        def make_vcf(person):
            # make a VCF, where one line would pass the default filtering
            vcf = make_vcf_header()
            vcf.append(make_vcf_line(pos=1, extra='HGNC=TEST;MAX_AF=0.0001'))
            vcf.append(make_vcf_line(pos=2, extra='HGNC=ATRX;MAX_AF=0.0001'))
            
            path = os.path.join(self.temp_dir, "{}.vcf.gz".format(person))
            write_gzipped_vcf(path, vcf)
            return path
        
        child_path = make_vcf('child')
        mother_path = make_vcf('mother')
        father_path = make_vcf('father')
        
        family = Family('fam_id')
        family.add_child('sample', 'mother_id', 'father_id', 'female', '2', child_path)
        family.add_mother('mother_id', '0', '0', 'female', '1', mother_path)
        family.add_father('father_id', '0', '0', 'male', '1', father_path)
        family.set_child()

        sum_x_lr2_proband = 0
        
        # define the parameters and values for the SNV class
        args = {'chrom': "1", 'position': 2, 'id': ".", 'ref': "G", 'alts': "T",
            'filter': "PASS", 'info': "CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001",
            'format': "DP:GT", 'sample': "50:0/1", 'gender': "female",
            'mnv_code': None, 'qual': '1000'}
        dad_args = copy.deepcopy(args)
        dad_args['gender'] = 'male'
        
        self.assertEqual(load_trio(family, sum_x_lr2_proband),
            [TrioGenotypes(chrom="1", pos=2, child=SNV(**args),
                mother=SNV(**args), father=SNV(**dad_args)) ])
Beispiel #2
0
 def test_load_variants(self):
     ''' test that load_variants() works correctly. Mainly checks variables
     are set correctly.
     '''
     
     vcf = make_minimal_vcf()
     path = os.path.join(self.temp_dir, "temp.vcf.gz")
     write_gzipped_vcf(path, vcf)
     sum_x_lr2 = {}
     
     fam = Family('fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)])
     variants = load_variants(fam, 0.9, ['AFR_AF'], self.known_genes, set(), sum_x_lr2)
     
     self.assertEqual(SNV.known_genes, self.known_genes)
     self.assertEqual(CNV.known_genes, self.known_genes)
     self.assertEqual(Info.populations, ['AFR_AF'])
     self.assertEqual(Info.last_base, set())
     
     # and check that the
     variants = load_variants(fam, 0.9, [], None, set([('1', 100)]), sum_x_lr2)
     self.assertIsNone(SNV.known_genes, self.known_genes)
     self.assertIsNone(CNV.known_genes, self.known_genes)
     self.assertEqual(Info.populations, [])
     self.assertEqual(Info.last_base, set([('1', 100)]))
    def test_analyse_trio(self):
        ''' test that analyse_trio() works correctly
        '''

        # construct the VCFs for the trio members
        paths = {}
        for member in ['child', 'mom', 'dad']:
            vcf = make_vcf_header()

            geno, pp_dnm = '0/0', ''
            if member == 'child':
                geno, pp_dnm = '0/1', ';DENOVO-SNP;PP_DNM=1'

            vcf.append(
                make_vcf_line(genotype=geno, extra='HGNC=ARID1B' + pp_dnm))

            # write the VCF data to a file
            handle = tempfile.NamedTemporaryFile(dir=self.temp_dir,
                                                 delete=False,
                                                 suffix='.vcf')
            for x in vcf:
                handle.write(x.encode('utf8'))
            handle.flush()

            paths[member] = handle.name

        # create a Family object, so we can load the data from the trio's VCFs
        fam_id = 'fam01'
        child = Person(fam_id, 'child', 'dad', 'mom', 'female', '2',
                       paths['child'])
        mom = Person(fam_id, 'mom', '0', '0', 'female', '1', paths['mom'])
        dad = Person(fam_id, 'dad', '0', '0', 'male', '1', paths['dad'])
        family = Family(fam_id, [child], mom, dad)

        self.assertEqual(self.finder.analyse_trio(family), [(TrioGenotypes(
            chrom="1",
            pos=1,
            child=SNV(
                chrom="1",
                position=1,
                id=".",
                ref="G",
                alts="T",
                qual='1000',
                filter="PASS",
                info="CQ=missense_variant;DENOVO-SNP;HGNC=ARID1B;PP_DNM=1",
                format="DP:GT",
                sample="50:0/1",
                gender="female",
                mnv_code=None),
            mother=SNV(chrom="1",
                       position=1,
                       id=".",
                       ref="G",
                       alts="T",
                       qual='1000',
                       filter="PASS",
                       info="CQ=missense_variant;HGNC=ARID1B",
                       format="DP:GT",
                       sample="50:0/0",
                       gender="female",
                       mnv_code=None),
            father=SNV(chrom="1",
                       position=1,
                       id=".",
                       ref="G",
                       alts="T",
                       qual='1000',
                       filter="PASS",
                       info="CQ=missense_variant;HGNC=ARID1B",
                       format="DP:GT",
                       sample="50:0/0",
                       gender="male",
                       mnv_code=None)), ['single_variant'], [
                           'Monoallelic', 'Mosaic'
                       ], ['ARID1B'])])
    def test_find_variants(self):
        """ test that find_variants() works correctly
        """

        # define the trio, so that we can know whether the parents are affected.
        # The child also needs to be included and set, so that we can get the
        # child ID for logging purposes.
        family = Family("famID")
        family.add_child("child_id", 'dad_id', 'mom_id', 'f', '2', "/vcf/path")
        family.add_father("dad_id", '0', '0', 'm', '1', "/vcf/path")
        family.add_mother("mom_id", '0', '0', 'f', '1', "/vcf/path")
        family.set_child()

        # create variants that cover various scenarios
        snv1 = create_variant("F", "missense_variant|missense_variant",
                              "TEST1|TEST2")
        snv2 = create_variant("F", "missense_variant|synonymous_variant",
                              "OTHER1|OTHER2")
        snv3 = create_variant("F", "missense_variant", "")
        snv4 = create_variant("F", "missense_variant", "TESTX", chrom="X")

        self.finder.known_genes = {
            "TEST1": {
                "inh": ["Monoallelic"]
            },
            "OTHER1": {
                "inh": ["Monoallelic"]
            },
            "OTHER2": {
                "inh": ["Monoallelic"]
            },
            "TESTX": {
                "inh": ["X-linked dominant"]
            }
        }

        # check the simplest case, a variant in a known gene
        self.assertEqual(
            self.finder.find_variants([snv1], "TEST1", family),
            [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"])])

        # check that a gene not in a known gene does not pass
        self.assertEqual(self.finder.find_variants([snv1], "TEST2", family),
                         [])

        # check a variant where the gene is known, but the consequence for that
        # gene is not functional, does not pass
        self.assertEqual(self.finder.find_variants([snv2], "OTHER2", family),
                         [])

        # check that intergenic variants (which lack HGNC symbols) do not pass
        self.assertEqual(self.finder.find_variants([snv3], None, family), [])

        # check that a variant on chrX passes through the allosomal instance
        self.assertEqual(
            self.finder.find_variants([snv4], "TESTX", family),
            [(snv4, ["single_variant"], ["X-linked dominant"], ["TESTX"])])

        # remove the known genes, so that the variants in unknown genes pass
        self.finder.known_genes = None
        self.assertEqual(
            sorted(self.finder.find_variants([snv1], "TEST2", family)),
            [(snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]),
             (snv1, ["single_variant"], ["Mosaic"], ["TEST2"])])

        # but variants without gene symbols still are excluded
        self.assertEqual(self.finder.find_variants([snv3], None, family), [])