Esempio n. 1
0
    def setUp(self):
        """ define a default Person object
        """

        ID = "fam_ID"

        self.family = Family(ID)
Esempio n. 2
0
 def test_load_trio(self):
     ''' test that load_trio() works correctly
     '''
     
     def make_vcf(person):
         # make a VCF, where one line would pass the default filtering
         vcf = make_vcf_header()
         vcf.append(make_vcf_line(pos=1, extra='HGNC=TEST;MAX_AF=0.0001'))
         vcf.append(make_vcf_line(pos=2, extra='HGNC=ATRX;MAX_AF=0.0001'))
         
         path = os.path.join(self.temp_dir, "{}.vcf.gz".format(person))
         self.write_gzipped_vcf(path, vcf)
         return path
     
     child_path = make_vcf('child')
     mother_path = make_vcf('mother')
     father_path = make_vcf('father')
     
     family = Family('fam_id')
     family.add_child('sample', 'mother_id', 'father_id', 'female', '2', child_path)
     family.add_mother('mother_id', '0', '0', 'female', '1', mother_path)
     family.add_father('father_id', '0', '0', 'male', '1', father_path)
     family.set_child()
     
     # define the parameters and values for the SNV class
     args = {'chrom': "1", 'position': 2, 'id': ".", 'ref': "G", 'alts': "T",
         'filter': "PASS", 'info': "CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001",
         'format': "DP:GT", 'sample': "50:0/1", 'gender': "female",
         'mnv_code': None}
     dad_args = copy.deepcopy(args)
     dad_args['gender'] = 'male'
     
     self.assertEqual(self.vcf_loader.load_trio(family),
         [TrioGenotypes(chrom="1", pos=2, child=SNV(**args),
             mother=SNV(**args), father=SNV(**dad_args)) ])
Esempio n. 3
0
    def create_family(self, child_gender, mom_aff, dad_aff):
        """ create a default family, with optional gender and parental statuses
        """

        fam = Family("test")
        fam.add_child("child", "child_vcf", "2", child_gender)
        fam.add_mother("mother", "mother_vcf", mom_aff, "2")
        fam.add_father("father", "father_vcf", dad_aff, "1")
        fam.set_child()

        return fam
    def create_family(self, child_gender, mom_aff, dad_aff):
        """ create a default family, with optional gender and parental statuses
        """

        fam = Family('test')
        fam.add_child('child', 'mother', 'father', child_gender, '2',
                      'child_vcf')
        fam.add_mother('mother', '0', '0', 'female', mom_aff, 'mother_vcf')
        fam.add_father('father', '0', '0', 'male', dad_aff, 'father_vcf')
        fam.set_child()

        return fam
Esempio n. 5
0
    def test_load_trio(self):
        ''' test that load_trio() works correctly
        '''
        def make_vcf(person):
            # make a VCF, where one line would pass the default filtering
            vcf = make_vcf_header()
            vcf.append(make_vcf_line(pos=1, extra='HGNC=TEST;MAX_AF=0.0001'))
            vcf.append(make_vcf_line(pos=2, extra='HGNC=ATRX;MAX_AF=0.0001'))

            path = os.path.join(self.temp_dir, "{}.vcf.gz".format(person))
            write_gzipped_vcf(path, vcf)
            return path

        child_path = make_vcf('child')
        mother_path = make_vcf('mother')
        father_path = make_vcf('father')

        family = Family('fam_id')
        family.add_child('sample', 'mother_id', 'father_id', 'female', '2',
                         child_path)
        family.add_mother('mother_id', '0', '0', 'female', '1', mother_path)
        family.add_father('father_id', '0', '0', 'male', '1', father_path)
        family.set_child()

        sum_x_lr2_proband = 0

        # define the parameters and values for the SNV class
        args = {
            'chrom': "1",
            'position': 2,
            'id': ".",
            'ref': "G",
            'alts': "T",
            'filter': "PASS",
            'info': "CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001",
            'format': "DP:GT:AD",
            'sample': "50:0/1:10,10",
            'gender': "female",
            'mnv_code': None,
            'qual': '1000'
        }
        dad_args = copy.deepcopy(args)
        dad_args['gender'] = 'male'

        self.assertEqual(load_trio(family, sum_x_lr2_proband), [
            TrioGenotypes(chrom="1",
                          pos=2,
                          child=SNV(**args),
                          mother=SNV(**args),
                          father=SNV(**dad_args))
        ])
Esempio n. 6
0
 def test_get_vcf_provenance(self):
     """ test that get_vcf_provenance() works correctly
     """
     
     path = os.path.join(self.temp_dir, "temp.vcf")
     gz_path = os.path.join(self.temp_dir, "temp.vcf.gz")
     date_path = os.path.join(self.temp_dir, "temp.process.2014-02-20.vcf")
     
     family = Family('famid')
     family.add_child('child_id', 'mother', 'father', 'f', '2', path)
     family.add_mother('mom_id', '0', '0', 'female', '1', gz_path)
     family.add_father('dad_id', '0', '0', 'male', '1', date_path)
     family.set_child()
     
     vcf = make_minimal_vcf()
     vcf_string = "".join(vcf)
     if IS_PYTHON3:
         vcf_string = vcf_string.encode("utf-8")
     ungzipped_hash = hashlib.sha1(vcf_string).hexdigest()
     header = vcf[:4]
     
     write_temp_vcf(path, vcf)
     
     # check that the file defs return correctly
     (checksum, basename, date) = get_vcf_provenance(family.child)
     
     self.assertEqual(checksum, ungzipped_hash)
     self.assertEqual(basename, "temp.vcf")
     self.assertEqual(date, "2014-01-01")
     
     # now write a gzip file, and check that we get the correct hash
     write_gzipped_vcf(gz_path, vcf)
     handle = open(gz_path, "rb")
     gzipped_hash = hashlib.sha1(handle.read()).hexdigest()
     handle.close()
     
     (checksum, basename, date) = get_vcf_provenance(family.mother)
     self.assertEqual(checksum, gzipped_hash)
     
     # check that when a fileDate isn't available in the VCF, we can pick
     # the date from the path
     vcf.pop(1)
     write_temp_vcf(date_path, vcf)
     (checksum, basename, date) = get_vcf_provenance(family.father)
     self.assertEqual(date, "2014-02-20")
     
     # and check we get null values if the family member is not present
     family.father = None
     provenance = get_vcf_provenance(family.father)
     self.assertEqual(provenance, ('NA', 'NA', 'NA'))
 def create_family(self, child_gender, mom_aff, dad_aff):
     """ create a default family, with optional gender and parental statuses
     """
     
     fam = Family("test")
     fam.add_child("child", "child_vcf", "2", child_gender)
     fam.add_mother("mother", "mother_vcf", mom_aff, "2")
     fam.add_father("father", "father_vcf", dad_aff, "1")
     fam.set_child()
     
     return fam
 def create_family(self, child_gender, mom_aff, dad_aff):
     """ create a default family, with optional gender and parental statuses
     """
     
     fam = Family('test')
     fam.add_child('child', 'mother', 'father', child_gender, '2', 'child_vcf')
     fam.add_mother('mother', '0', '0', 'female', mom_aff, 'mother_vcf')
     fam.add_father('father', '0', '0', 'male', dad_aff, 'father_vcf')
     fam.set_child()
     
     return fam
Esempio n. 9
0
    def test_debug_option(self):
        """ test whether we can set up the class with the debug option
        """

        known = {}
        pops = None
        sum_x_lr2 = {}

        vcf = make_minimal_vcf()
        path = os.path.join(self.temp_dir, "temp.vcf.gz")
        write_gzipped_vcf(path, vcf)

        fam = Family(
            'fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)])

        # if the debug info isn't available, then the SNV object doesn't use the
        # debug filter function
        variants = load_variants(fam, 1.0, pops, known, set(), sum_x_lr2)
        self.assertNotEqual(SNV.passes_filters, SNV.passes_filters_with_debug)

        # if the debug info is passed in, check that the debug filter function
        # got set correctly
        variants = load_variants(fam, 1.0, pops, known, set(), sum_x_lr2, "1",
                                 "10000")
        self.assertEqual(SNV.passes_filters, SNV.passes_filters_with_debug)
 def test_is_compound_pair_proband_only(self):
     """ check that is_compound_pair() includes proband-only pairs
     """
     
     fam = Family("test")
     fam.add_child("child", 'dad_id', 'mom_id', 'F', '2',  "child_vcf")
     fam.set_child()
     
     # set some variants, so we can alter them later
     var1 = self.create_variant(chrom="1", position="150", sex="F", cq="stop_gained")
     var2 = self.create_variant(chrom="1", position="160", sex="F", cq="stop_gained")
     
     inh = Autosomal([var1, var2], fam, self.known_gene, "TEST")
     
     # check that a proband-only passes, regardless of the parental genotypes
     self.assertTrue(inh.is_compound_pair(var1, var2))
Esempio n. 11
0
    def test_load_variants(self):
        ''' test that load_variants() works correctly. Mainly checks variables
        are set correctly.
        '''

        vcf = make_minimal_vcf()
        path = os.path.join(self.temp_dir, "temp.vcf.gz")
        write_gzipped_vcf(path, vcf)
        sum_x_lr2 = {}
        parents = True

        fam = Family(
            'fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)])
        variants = load_variants(fam, 0.9, ['AFR_AF'], self.known_genes, set(),
                                 sum_x_lr2, parents)

        self.assertEqual(SNV.known_genes, self.known_genes)
        self.assertEqual(CNV.known_genes, self.known_genes)
        self.assertEqual(Info.populations, ['AFR_AF'])
        self.assertEqual(Info.last_base, set())

        # and check that the
        variants = load_variants(fam, 0.9, [], None, set([('1', 100)]),
                                 sum_x_lr2, parents)
        self.assertIsNone(SNV.known_genes, self.known_genes)
        self.assertIsNone(CNV.known_genes, self.known_genes)
        self.assertEqual(Info.populations, [])
        self.assertEqual(Info.last_base, set([('1', 100)]))
Esempio n. 12
0
 def setUp(self):
     """ define a default Person object
     """
     
     ID = "fam_ID"
     
     self.family = Family(ID)
 def test_find_variants(self):
     """ test that find_variants() works correctly
     """
     
     # define the trio, so that we can know whether the parents are affected.
     # The child also needs to be included and set, so that we can get the
     # child ID for logging purposes.
     family = Family("famID")
     family.add_child("child_id", 'dad_id', 'mom_id', 'f', '2', "/vcf/path")
     family.add_father("dad_id", '0', '0', 'm', '1', "/vcf/path")
     family.add_mother("mom_id", '0', '0', 'f', '1', "/vcf/path")
     family.set_child()
     
     # create variants that cover various scenarios
     snv1 = create_variant("F", "missense_variant|missense_variant", "TEST1|TEST2")
     snv2 = create_variant("F", "missense_variant|synonymous_variant", "OTHER1|OTHER2")
     snv3 = create_variant("F", "missense_variant", "")
     snv4 = create_variant("F", "missense_variant", "TESTX", chrom="X")
     
     self.finder.known_genes = {"TEST1": {"inh": ["Monoallelic"]},
         "OTHER1": {"inh": ["Monoallelic"]},
         "OTHER2": {"inh": ["Monoallelic"]},
         "TESTX": {"inh": ["X-linked dominant"]}}
     
     # check the simplest case, a variant in a known gene
     self.assertEqual(self.finder.find_variants([snv1], "TEST1", family),
         [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"])])
     
     # check that a gene not in a known gene does not pass
     self.assertEqual(self.finder.find_variants([snv1], "TEST2", family), [])
     
     # check a variant where the gene is known, but the consequence for that
     # gene is not functional, does not pass
     self.assertEqual(self.finder.find_variants([snv2], "OTHER2", family), [])
     
     # check that intergenic variants (which lack HGNC symbols) do not pass
     self.assertEqual(self.finder.find_variants([snv3], None, family), [])
     
     # check that a variant on chrX passes through the allosomal instance
     self.assertEqual(self.finder.find_variants([snv4], "TESTX", family),
         [(snv4, ["single_variant"], ["X-linked dominant"], ["TESTX"])])
     
     # remove the known genes, so that the variants in unknown genes pass
     self.finder.known_genes = None
     self.assertEqual(sorted(self.finder.find_variants([snv1], "TEST2", family)),
         [(snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]),
         (snv1, ["single_variant"], ["Mosaic"], ["TEST2"])])
     
     # but variants without gene symbols still are excluded
     self.assertEqual(self.finder.find_variants([snv3], None, family), [])
Esempio n. 14
0
    def setUp(self):
        """ define a default VcfInfo object
        """

        family = Family("FamID")
        family.add_child("child_id", "/child/path", "2", "M")
        family.add_mother("mom_id", "/mother/path", "1", "F")
        family.add_father("dad_id", "/father/path", "2", "M")
        family.set_child()

        variants = []
        snv = self.create_var("1", True)
        cnv = self.create_var("1", False)

        variants.append((snv, ["single_variant"], ["Monoallelic"], ["ATRX"]))
        variants.append((cnv, ["single_variant"], ["Monoallelic"], ["ATRX"]))

        self.post_filter = PostInheritanceFilter(variants, family)
Esempio n. 15
0
    def test_filter_de_novos(self):
        """ check that filter_de_novos() works correctly
        """

        # make a family without parents
        family = Family("fam_id")
        child_gender = "female"
        family.add_child("child_id", "child_vcf_path", "2", child_gender)
        self.vcf_loader.family = family

        # set up an autosomal variant
        line = ["1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1"]
        gender = "M"
        child_var = SNV(*line[:6])
        child_var.add_info(line[7])
        child_var.add_format(line[8], line[9])
        child_var.set_gender(child_gender)
        child_var.set_genotype()

        # combine the variant into a list of TrioGenotypes
        child_vars = [child_var]
        mother_vars = []
        father_vars = []
        trio_variants = self.vcf_loader.combine_trio_variants(
            child_vars, mother_vars, father_vars)

        # check that vars without parents get passed through automatically
        self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9),
                         trio_variants)

        # now add parents to the family
        family.add_mother("mother_id", "mother_vcf_path", "1", "female")
        family.add_father("father_id", "father_vcf_path", "1", "male")

        # re-generate the variants list now that parents have been included
        trio_variants = self.vcf_loader.combine_trio_variants(
            child_vars, mother_vars, father_vars)

        # check that vars with parents, and that appear to be de novo are
        # filtered out
        self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9),
                         [])

        # check that vars with parents, but which are not de novo, are retained
        mother_vars = child_vars
        trio_variants = self.vcf_loader.combine_trio_variants(
            child_vars, mother_vars, father_vars)

        self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9),
                         trio_variants)
Esempio n. 16
0
    def setUp(self):
        """ define a default variant object
        """

        Info.populations = ['AFR_AF']

        family = Family('test')
        family.add_child('child', 'mother', 'father', 'male', '2', 'child_vcf')
        family.add_mother('mother', '0', '0', 'female', '1', 'mother_vcf')
        family.add_father('father', '0', '0', 'male', '2', 'father_vcf')
        family.set_child()

        self.variants = []
        snv = self.create_var("1", True)
        cnv = self.create_var("1", False)

        self.variants.append(
            (snv, ["single_variant"], ["Monoallelic"], ["ATRX"]))
        self.variants.append(
            (cnv, ["single_variant"], ["Monoallelic"], ["ATRX"]))

        self.post_filter = PostInheritanceFilter(family)
Esempio n. 17
0
    def test_filter_de_novos(self):
        """ check that filter_de_novos() works correctly
        """

        # make a family without parents
        family = Family("fam_id")
        child_gender = "female"
        family.add_child('child_id', 'mother_id', 'father_id', child_gender,
                         '2', 'child_path')

        # set up an autosomal variant
        gender = "M"
        args = [
            "1", "100", ".", "T", "G", "1000", "PASS", ".", "GT", "0/1", gender
        ]
        child_var = SNV(*args)

        # combine the variant into a list of TrioGenotypes
        child_vars = [child_var]
        mother_vars = []
        father_vars = []
        trio_variants = combine_trio_variants(family, child_vars, mother_vars,
                                              father_vars)

        # check that vars without parents get passed through automatically
        self.assertEqual(filter_de_novos(trio_variants, 0.9), trio_variants)

        # now add parents to the family
        family.add_mother("mother_id", '0', '0', 'female', '1',
                          "mother_vcf_path")
        family.add_father("father_id", '0', '0', 'male', '1',
                          "father_vcf_path")
        family = family

        # re-generate the variants list now that parents have been included
        trio_variants = combine_trio_variants(family, child_vars, mother_vars,
                                              father_vars)

        # check that vars with parents, and that appear to be de novo are
        # filtered out
        self.assertEqual(filter_de_novos(trio_variants, 0.9), [])

        # check that vars with parents, but which are not de novo, are retained
        mother_vars = child_vars
        trio_variants = combine_trio_variants(family, child_vars, mother_vars,
                                              father_vars)

        self.assertEqual(filter_de_novos(trio_variants, 0.9), trio_variants)
 def setUp(self):
     """ define a default variant object
     """
     
     Info.populations = ['AFR_AF']
     
     family = Family('test')
     family.add_child('child', 'mother', 'father', 'male', '2', 'child_vcf')
     family.add_mother('mother', '0', '0', 'female', '1', 'mother_vcf')
     family.add_father('father', '0', '0', 'male', '2', 'father_vcf')
     family.set_child()
     
     self.variants = []
     snv = self.create_var("1", True)
     cnv = self.create_var("1", False)
     
     self.variants.append((snv, ["single_variant"], ["Monoallelic"], ["ATRX"]))
     self.variants.append((cnv, ["single_variant"], ["Monoallelic"], ["ATRX"]))
     
     self.post_filter = PostInheritanceFilter(family)
Esempio n. 19
0
def get_families(args):
    """ loads a list of Family objects for multiple families, or a single trio
    """

    if args.ped is None:
        fam_id = 'blank_family_ID'
        family = Family(fam_id)
        family.add_child('child', args.mother, args.father, args.gender, '2',
                         args.child)
        if args.mother is not None:
            family.add_mother('mother', '0', '0', '2', args.mom_aff,
                              args.mother)
        if args.father is not None:
            family.add_father('father', '0', '0', '1', args.dad_aff,
                              args.father)

        families = [family]
    else:
        families = load_families(args.ped)

    return families
def get_families(args):
    """ loads a list of Family objects for multiple families, or a single trio
    """
    
    if args.ped is None:
        fam_id = 'blank_family_ID'
        family = Family(fam_id)
        family.add_child('child', args.mother, args.father, args.gender, '2', args.child)
        if args.mother is not None:
            family.add_mother('mother', '0', '0', '2',  args.mom_aff, args.mother)
        if args.father is not None:
            family.add_father('father',  '0', '0', '1', args.dad_aff, args.father)
        
        families = [family]
    else:
        families = load_families(args.ped)
    
    return families
Esempio n. 21
0
 def test_filter_de_novos(self):
     """ check that filter_de_novos() works correctly
     """
     
     # make a family without parents
     family = Family("fam_id")
     child_gender = "female"
     family.add_child('child_id', 'mother_id', 'father_id', child_gender, '2', 'child_path')
     self.vcf_loader.family = family
     
     # set up an autosomal variant
     gender = "M"
     args = ["1", "100", ".", "T", "G", "PASS", ".", "GT", "0/1", gender]
     child_var = SNV(*args)
     
     # combine the variant into a list of TrioGenotypes
     child_vars = [child_var]
     mother_vars = []
     father_vars = []
     trio_variants = self.vcf_loader.combine_trio_variants(family, child_vars, mother_vars, father_vars)
     
     # check that vars without parents get passed through automatically
     self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9), trio_variants)
     
     # now add parents to the family
     family.add_mother("mother_id", '0', '0', 'female', '1', "mother_vcf_path")
     family.add_father("father_id", '0', '0', 'male', '1', "father_vcf_path")
     self.vcf_loader.family = family
     
     # re-generate the variants list now that parents have been included
     trio_variants = self.vcf_loader.combine_trio_variants(family, child_vars, mother_vars, father_vars)
     
     # check that vars with parents, and that appear to be de novo are
     # filtered out
     self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9), [])
     
     # check that vars with parents, but which are not de novo, are retained
     mother_vars = child_vars
     trio_variants = self.vcf_loader.combine_trio_variants(family, child_vars, mother_vars, father_vars)
     
     self.assertEqual(self.vcf_loader.filter_de_novos(trio_variants, 0.9), trio_variants)
Esempio n. 22
0
    def test_is_compound_pair_proband_only(self):
        """ check that is_compound_pair() includes proband-only pairs
        """

        fam = Family("test")
        fam.add_child("child", 'dad_id', 'mom_id', 'F', '2', "child_vcf")
        fam.set_child()

        # set some variants, so we can alter them later
        var1 = self.create_variant(chrom="1",
                                   position="150",
                                   sex="F",
                                   cq="stop_gained")
        var2 = self.create_variant(chrom="1",
                                   position="160",
                                   sex="F",
                                   cq="stop_gained")

        inh = Autosomal([var1, var2], fam, self.known_gene, "TEST")

        # check that a proband-only passes, regardless of the parental genotypes
        self.assertTrue(inh.is_compound_pair(var1, var2))
Esempio n. 23
0
    def test_load_families(self):
        """ check that load_families works correctly
        """

        # construct a temporary family that will have the same sample IDs etc
        # as for the one loaded from the ped file.
        family = Family("fam_ID")
        family.add_child("proband", 'dad', 'mom', 'F', '2',
                         "/path/to/proband_vcf.gz")
        family.add_mother("mom", '0', '0', 'F', '1', "/path/to/mom_vcf.gz")
        family.add_father("dad", '0', '0', 'M', '1', "/path/to/dad_vcf.gz")

        # load the ped file, and check that the load_families function returns
        # the expected Family object
        self.assertEqual(load_families(self.path), [family])

        # add an extra family, with multiple sibs
        self.tempfile.write(
            "fam_ID2  proband2 dad2  mom2  F  2  /path/to/proband2_vcf.gz\n")
        self.tempfile.write(
            "fam_ID2  dad2     0     0     M  1  /path/to/dad2_vcf.gz\n")
        self.tempfile.write(
            "fam_ID2  mom2     0     0     F  1  /path/to/mom2_vcf.gz\n")
        self.tempfile.write(
            "fam_ID2  sib      dad2  mom2  F  2  /path/to/sib_vcf.gz\n")
        self.tempfile.flush()

        # construct a temporary family that will have the same sample IDs etc
        # as for the one loaded from the ped file.
        fam2 = Family("fam_ID2")
        fam2.add_child("proband2", 'dad2', 'mom2', 'F', '2',
                       "/path/to/proband2_vcf.gz")
        fam2.add_child("sib", 'dad2', 'mom2', 'F', '2', "/path/to/sib_vcf.gz")
        fam2.add_mother("mom2", '0', '0', 'F', '1', "/path/to/mom2_vcf.gz")
        fam2.add_father("dad2", '0', '0', 'M', '1', "/path/to/dad2_vcf.gz")

        # load the ped file, and check that the load_families function returns
        # the expected Families objects
        self.assertEqual(sorted(load_families(self.path)),
                         sorted([family, fam2]))
Esempio n. 24
0
    def test_get_vcf_provenance(self):
        """ test that get_vcf_provenance() works correctly
        """

        path = os.path.join(self.temp_dir, "temp.vcf")
        gz_path = os.path.join(self.temp_dir, "temp.vcf.gz")
        date_path = os.path.join(self.temp_dir, "temp.process.2014-02-20.vcf")

        family = Family('famid')
        family.add_child('child_id', 'mother', 'father', 'f', '2', path)
        family.add_mother('mom_id', '0', '0', 'female', '1', gz_path)
        family.add_father('dad_id', '0', '0', 'male', '1', date_path)
        family.set_child()

        vcf = make_minimal_vcf()
        vcf_string = "".join(vcf)
        if IS_PYTHON3:
            vcf_string = vcf_string.encode("utf-8")
        ungzipped_hash = hashlib.sha1(vcf_string).hexdigest()
        header = vcf[:4]

        write_temp_vcf(path, vcf)

        # check that the file defs return correctly
        (checksum, basename, date) = get_vcf_provenance(family.child)

        self.assertEqual(checksum, ungzipped_hash)
        self.assertEqual(basename, "temp.vcf")
        self.assertEqual(date, "2014-01-01")

        # now write a gzip file, and check that we get the correct hash
        write_gzipped_vcf(gz_path, vcf)
        handle = open(gz_path, "rb")
        gzipped_hash = hashlib.sha1(handle.read()).hexdigest()
        handle.close()

        (checksum, basename, date) = get_vcf_provenance(family.mother)
        self.assertEqual(checksum, gzipped_hash)

        # check that when a fileDate isn't available in the VCF, we can pick
        # the date from the path
        vcf.pop(1)
        write_temp_vcf(date_path, vcf)
        (checksum, basename, date) = get_vcf_provenance(family.father)
        self.assertEqual(date, "2014-02-20")

        # and check we get null values if the family member is not present
        family.father = None
        provenance = get_vcf_provenance(family.father)
        self.assertEqual(provenance, ('NA', 'NA', 'NA'))
Esempio n. 25
0
class TestFamily(unittest.TestCase):
    """
    """
    def setUp(self):
        """ define a default Person object
        """

        ID = "fam_ID"

        self.family = Family(ID)

    def test__iter__(self):
        ''' test that __iter__() works correctly
        '''

        family_id, person_id, mom_id, dad_id = 'fam_ID', 'child', 'dad', 'mom',
        path, status, sex = 'child.vcf', '2', 'M'
        child = Person(family_id, person_id, dad_id, mom_id, sex, status, path)

        self.family.add_child(person_id, dad_id, mom_id, sex, status, path)
        self.family.set_child()

        # check the Family iterates by getting a list of the Family object
        members = list(self.family)
        self.assertEqual(members, [child, None, None])

    def test_add_father(self):
        """ test that add_father() works correctly
        """

        person_id = "parent_ID"
        path = "/home/parent.vcf"
        status = "1"
        sex = "1"
        dad_id = "0"
        mom_id = "0"

        # check that adding a male father doesn't raise an error
        self.family.add_father(person_id, dad_id, mom_id, sex, status, path)

        # check that adding a father for a second time is fine, but adding
        # a different father raises an error
        self.family.add_father(person_id, dad_id, mom_id, sex, status, path)
        with self.assertRaises(ValueError):
            self.family.add_father("different_ID", dad_id, mom_id, sex, status,
                                   path)

        # check that adding a female father raises an error
        self.setUp()
        sex = "2"
        with self.assertRaises(ValueError):
            self.family.add_father(person_id, dad_id, mom_id, sex, status,
                                   path)

    def test_add_mother(self):
        """ test that add_mother() works correctly
        """

        person_id = "parent_ID"
        path = "/home/parent.vcf"
        status = "1"
        sex = "2"
        dad_id = "0"
        mom_id = "0"

        # check that adding a female mother doesn't raise an error
        self.family.add_mother(person_id, dad_id, mom_id, sex, status, path)

        # check that adding a mother for a second time is fine, but adding
        # a different mother raises an error
        self.family.add_mother(person_id, dad_id, mom_id, sex, status, path)
        with self.assertRaises(ValueError):
            self.family.add_mother("different_ID", dad_id, mom_id, sex, status,
                                   path)

        # check that adding a male mother raises an error
        self.setUp()
        sex = "1"
        with self.assertRaises(ValueError):
            self.family.add_mother(person_id, dad_id, mom_id, sex, status,
                                   path)

    def test_add_child(self):
        """ check that add_child() works correctly
        """

        # check that we can add one child
        self.family.add_child("child1", 'dad', 'mom', 'male', '2',
                              "/home/child1.vcf")
        self.assertEqual(len(self.family.children), 1)

        # check that adding multiple children works correctly
        self.family.add_child("child2", 'dad', 'mom', 'female', '2',
                              "/home/child2.vcf")
        self.family.add_child("child3", 'dad', 'mom', 'male', '2',
                              "/home/child3.vcf")
        self.assertEqual(len(self.family.children), 3)

    def test_set_child(self):
        """ test that set_child() works correctly
        """

        # add one child
        self.family.add_child("child1", 'dad', 'mom', 'male', '2',
                              "/home/child1.vcf")

        # check that the child can be set correctly
        self.family.set_child()
        self.assertEqual(self.family.child, self.family.children[0])

        # add more children
        self.family.add_child("child2", 'dad', 'mom', 'male', '2',
                              "/home/child2.vcf")
        self.family.add_child("child3", 'dad', 'mom', 'female', '2',
                              "/home/child3.vcf")

        # check that the child can be set correctly with multiple children
        self.family.set_child()
        self.assertIn(self.family.child, self.family.children)

    def test_set_child_examined(self):
        """ test that set_child_examined() works correctly
        """

        # add one child
        self.family.add_child("child1", 'dad', 'mom', 'male', '2',
                              "/home/child1.vcf")

        # check that the child can be set correctly, and can be set as having
        # been examined
        self.family.set_child()
        self.family.set_child_examined()
        self.assertTrue(self.family.children[0].is_analysed())
        self.assertIsNone(self.family.child)

        # add another child, and check that when we set the child, we now pick
        # up this child since the other one has previously been examined
        self.family.add_child("child2", 'dad', 'mom', 'female', '2',
                              "/home/child2.vcf")
        self.family.set_child()
        self.assertEqual(self.family.child, self.family.children[1])

        # make sure that set_child_examined() doesn't default to None if we
        # have children left to analyse
        self.family.add_child("child3", 'dad', 'mom', 'female', '2',
                              "/home/child3.vcf")
        self.family.set_child()
        self.family.set_child_examined()
        self.assertIsNotNone(self.family.child)

        # and set child = None once we have analysed all the children
        self.family.set_child()
        self.family.set_child_examined()
        self.assertIsNone(self.family.child)
Esempio n. 26
0
 def test_load_families(self):
     """ check that load_families works correctly
     """
     
     # construct a temporary family that will have the same sample IDs etc
     # as for the one loaded from the ped file.
     family = Family("fam_ID")
     family.add_child("proband", 'dad', 'mom', 'F', '2', "/path/to/proband_vcf.gz")
     family.add_mother("mom", '0', '0', 'F', '1', "/path/to/mom_vcf.gz")
     family.add_father("dad", '0', '0', 'M', '1',  "/path/to/dad_vcf.gz")
     
     # load the ped file, and check that the load_families function returns
     # the expected Family object
     self.assertEqual(load_families(self.path), [family])
     
     # add an extra family, with multiple sibs
     self.tempfile.write("fam_ID2  proband2 dad2  mom2  F  2  /path/to/proband2_vcf.gz\n")
     self.tempfile.write("fam_ID2  dad2     0     0     M  1  /path/to/dad2_vcf.gz\n")
     self.tempfile.write("fam_ID2  mom2     0     0     F  1  /path/to/mom2_vcf.gz\n")
     self.tempfile.write("fam_ID2  sib      dad2  mom2  F  2  /path/to/sib_vcf.gz\n")
     self.tempfile.flush()
     
     # construct a temporary family that will have the same sample IDs etc
     # as for the one loaded from the ped file.
     fam2 = Family("fam_ID2")
     fam2.add_child("proband2", 'dad2', 'mom2', 'F', '2', "/path/to/proband2_vcf.gz")
     fam2.add_child("sib", 'dad2', 'mom2', 'F', '2', "/path/to/sib_vcf.gz")
     fam2.add_mother("mom2", '0', '0', 'F', '1', "/path/to/mom2_vcf.gz")
     fam2.add_father("dad2", '0', '0', 'M', '1', "/path/to/dad2_vcf.gz")
     
     # load the ped file, and check that the load_families function returns
     # the expected Families objects
     self.assertEqual(sorted(load_families(self.path)), sorted([family, fam2]))
Esempio n. 27
0
class TestFamily(unittest.TestCase):
    """
    """
    
    def setUp(self):
        """ define a default Person object
        """
        
        ID = "fam_ID"
        
        self.family = Family(ID)
    
    def test_add_father(self):
        """ test that add_father() works correctly
        """
        
        ID = "parent_ID"
        path = "/home/parent.vcf"
        affected = "1"
        gender = "1"
        
        # check that adding a male father doesn't raise an error
        self.family.add_father(ID, path, affected, gender)
        
        # check that adding a father for a second time is fine, but adding 
        # a different father raises an error
        self.family.add_father(ID, path, affected, gender)
        with self.assertRaises(ValueError):
            self.family.add_father("different_ID", path, affected, gender)
        
        # check that adding a female father raises an error
        self.setUp()
        gender = "2"
        with self.assertRaises(ValueError):
            self.family.add_father(ID, path, affected, gender)
        
    def test_add_mother(self):
        """ test that add_mother() works correctly
        """
        
        ID = "parent_ID"
        path = "/home/parent.vcf"
        affected = "1"
        gender = "2"
        
        # check that adding a female mother doesn't raise an error
        self.family.add_mother(ID, path, affected, gender)
        
        # check that adding a mother for a second time is fine, but adding 
        # a different mother raises an error
        self.family.add_mother(ID, path, affected, gender)
        with self.assertRaises(ValueError):
            self.family.add_mother("different_ID", path, affected, gender)
        
        # check that adding a male mother raises an error
        self.setUp()
        gender = "1"
        with self.assertRaises(ValueError):
            self.family.add_mother(ID, path, affected, gender)
        
    def test_add_child(self):
        """ check that add_child() works correctly
        """
        
        # check that we can add one child
        self.family.add_child("child1", "/home/child1.vcf", "2", "1")
        self.assertEqual(len(self.family.children), 1)
        
        # check that adding multiple children works correctly
        self.family.add_child("child2", "/home/child2.vcf", "2", "2")
        self.family.add_child("child3", "/home/child3.vcf", "2", "1")
        self.assertEqual(len(self.family.children), 3)
    
    def test_set_child(self):
        """ test that set_child() works correctly
        """
        
        # add one child
        self.family.add_child("child1", "/home/child1.vcf", "2", "1")
        
        # check that the child can be set correctly
        self.family.set_child()
        self.assertEqual(self.family.child, self.family.children[0])
    
        # add more children
        self.family.add_child("child2", "/home/child2.vcf", "2", "1")
        self.family.add_child("child3", "/home/child3.vcf", "2", "2")
        
        # check that the child can be set correctly with multiple children
        self.family.set_child()
        self.assertIn(self.family.child, self.family.children)
        
    
    def test_set_child_examined(self):
        """ test that set_child_examined() works correctly
        """
        
        # add one child
        self.family.add_child("child1", "/home/child1.vcf", "2", "1")
        
        # check that the child can be set correctly, and can be set as having
        # been examined
        self.family.set_child()
        self.family.set_child_examined()
        self.assertTrue(self.family.children[0].is_analysed())
        self.assertIsNone(self.family.child)
        
        # add another child, and check that when we set the child, we now pick
        # up this child since the other one has previously been examined
        self.family.add_child("child2", "/home/child2.vcf", "2", "2")
        self.family.set_child()
        self.assertEqual(self.family.child, self.family.children[1])
        
        # make sure that set_child_examined() doesn't default to None if we 
        # have children left to analyse
        self.family.add_child("child3", "/home/child3.vcf", "2", "2")
        self.family.set_child()
        self.family.set_child_examined()
        self.assertIsNotNone(self.family.child)
        
        # and set child = None once we have analysed all the children
        self.family.set_child()
        self.family.set_child_examined()
        self.assertIsNone(self.family.child)
    def test_analyse_trio(self):
        ''' test that analyse_trio() works correctly
        '''

        # construct the VCFs for the trio members
        paths = {}
        for member in ['child', 'mom', 'dad']:
            vcf = make_vcf_header()

            geno, pp_dnm = '0/0', ''
            if member == 'child':
                geno, pp_dnm = '0/1', ';DENOVO-SNP;PP_DNM=1'

            vcf.append(
                make_vcf_line(genotype=geno, extra='HGNC=ARID1B' + pp_dnm))

            # write the VCF data to a file
            handle = tempfile.NamedTemporaryFile(dir=self.temp_dir,
                                                 delete=False,
                                                 suffix='.vcf')
            for x in vcf:
                handle.write(x.encode('utf8'))
            handle.flush()

            paths[member] = handle.name

        # create a Family object, so we can load the data from the trio's VCFs
        fam_id = 'fam01'
        child = Person(fam_id, 'child', 'dad', 'mom', 'female', '2',
                       paths['child'])
        mom = Person(fam_id, 'mom', '0', '0', 'female', '1', paths['mom'])
        dad = Person(fam_id, 'dad', '0', '0', 'male', '1', paths['dad'])
        family = Family(fam_id, [child], mom, dad)

        self.assertEqual(self.finder.analyse_trio(family), [(TrioGenotypes(
            chrom="1",
            pos=1,
            child=SNV(
                chrom="1",
                position=1,
                id=".",
                ref="G",
                alts="T",
                qual='1000',
                filter="PASS",
                info="CQ=missense_variant;DENOVO-SNP;HGNC=ARID1B;PP_DNM=1",
                format="DP:GT",
                sample="50:0/1",
                gender="female",
                mnv_code=None),
            mother=SNV(chrom="1",
                       position=1,
                       id=".",
                       ref="G",
                       alts="T",
                       qual='1000',
                       filter="PASS",
                       info="CQ=missense_variant;HGNC=ARID1B",
                       format="DP:GT",
                       sample="50:0/0",
                       gender="female",
                       mnv_code=None),
            father=SNV(chrom="1",
                       position=1,
                       id=".",
                       ref="G",
                       alts="T",
                       qual='1000',
                       filter="PASS",
                       info="CQ=missense_variant;HGNC=ARID1B",
                       format="DP:GT",
                       sample="50:0/0",
                       gender="male",
                       mnv_code=None)), ['single_variant'], [
                           'Monoallelic', 'Mosaic'
                       ], ['ARID1B'])])
    def test_find_variants(self):
        """ test that find_variants() works correctly
        """

        # define the trio, so that we can know whether the parents are affected.
        # The child also needs to be included and set, so that we can get the
        # child ID for logging purposes.
        family = Family("famID")
        family.add_child("child_id", 'dad_id', 'mom_id', 'f', '2', "/vcf/path")
        family.add_father("dad_id", '0', '0', 'm', '1', "/vcf/path")
        family.add_mother("mom_id", '0', '0', 'f', '1', "/vcf/path")
        family.set_child()

        # create variants that cover various scenarios
        snv1 = create_variant("F", "missense_variant|missense_variant",
                              "TEST1|TEST2")
        snv2 = create_variant("F", "missense_variant|synonymous_variant",
                              "OTHER1|OTHER2")
        snv3 = create_variant("F", "missense_variant", "")
        snv4 = create_variant("F", "missense_variant", "TESTX", chrom="X")

        self.finder.known_genes = {
            "TEST1": {
                "inh": ["Monoallelic"]
            },
            "OTHER1": {
                "inh": ["Monoallelic"]
            },
            "OTHER2": {
                "inh": ["Monoallelic"]
            },
            "TESTX": {
                "inh": ["X-linked dominant"]
            }
        }

        # check the simplest case, a variant in a known gene
        self.assertEqual(
            self.finder.find_variants([snv1], "TEST1", family),
            [(snv1, ["single_variant"], ["Monoallelic"], ["TEST1"])])

        # check that a gene not in a known gene does not pass
        self.assertEqual(self.finder.find_variants([snv1], "TEST2", family),
                         [])

        # check a variant where the gene is known, but the consequence for that
        # gene is not functional, does not pass
        self.assertEqual(self.finder.find_variants([snv2], "OTHER2", family),
                         [])

        # check that intergenic variants (which lack HGNC symbols) do not pass
        self.assertEqual(self.finder.find_variants([snv3], None, family), [])

        # check that a variant on chrX passes through the allosomal instance
        self.assertEqual(
            self.finder.find_variants([snv4], "TESTX", family),
            [(snv4, ["single_variant"], ["X-linked dominant"], ["TESTX"])])

        # remove the known genes, so that the variants in unknown genes pass
        self.finder.known_genes = None
        self.assertEqual(
            sorted(self.finder.find_variants([snv1], "TEST2", family)),
            [(snv1, ["single_variant"], ["Monoallelic"], ["TEST2"]),
             (snv1, ["single_variant"], ["Mosaic"], ["TEST2"])])

        # but variants without gene symbols still are excluded
        self.assertEqual(self.finder.find_variants([snv3], None, family), [])