예제 #1
0
    def test_load_variants(self):
        ''' test that load_variants() works correctly. Mainly checks variables
        are set correctly.
        '''

        vcf = make_minimal_vcf()
        path = os.path.join(self.temp_dir, "temp.vcf.gz")
        write_gzipped_vcf(path, vcf)
        sum_x_lr2 = {}
        parents = True

        fam = Family(
            'fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)])
        variants = load_variants(fam, 0.9, ['AFR_AF'], self.known_genes, set(),
                                 sum_x_lr2, parents)

        self.assertEqual(SNV.known_genes, self.known_genes)
        self.assertEqual(CNV.known_genes, self.known_genes)
        self.assertEqual(Info.populations, ['AFR_AF'])
        self.assertEqual(Info.last_base, set())

        # and check that the
        variants = load_variants(fam, 0.9, [], None, set([('1', 100)]),
                                 sum_x_lr2, parents)
        self.assertIsNone(SNV.known_genes, self.known_genes)
        self.assertIsNone(CNV.known_genes, self.known_genes)
        self.assertEqual(Info.populations, [])
        self.assertEqual(Info.last_base, set([('1', 100)]))
예제 #2
0
 def test_open_vcf(self):
     """ test obtaining a file handle for the VCF
     """
     
     vcf = make_minimal_vcf()
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     
     # check that plain VCF files can be loaded
     handle = open_vcf(path)
     self.assertEqual(type(handle), io.TextIOWrapper)
     handle.close()
     
     # check that gzipped vcf files are handled correctly
     path = os.path.join(self.temp_dir, "temp.vcf.gz")
     write_gzipped_vcf(path, vcf)
     
     handle = open_vcf(path)
     if IS_PYTHON3:
         self.assertEqual(type(handle), io.TextIOWrapper)
     else:
         self.assertEqual(type(handle), gzip.GzipFile)
     handle.close()
     
     # make sure files that don't exists raise an error
     path = os.path.join(self.temp_dir, "zzz.txt")
     with self.assertRaises(OSError):
         open_vcf(path)
     
     # check that files with unknown extensions raise errors
     path = os.path.join(self.temp_dir, "temp.zzz")
     write_temp_vcf(path, vcf)
     with self.assertRaises(OSError):
         open_vcf(path)
예제 #3
0
 def test_exclude_header(self):
     """ test that exclude_header() works correctly
     """
     
     vcf = make_minimal_vcf()
     
     # make sure we drop the header, and only the header from the file
     # check this by reading the file, and making sure the first line
     # is the line we expect from the VCF
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     handler = open(path, "r")
     exclude_header(handler)
     self.assertEqual(handler.readline(), vcf[4])
     handler.close()
     
     # also check for gzipped VCF files.
     path = os.path.join(self.temp_dir, "temp.vcf.gz")
     write_gzipped_vcf(path, vcf)
     
     mode = 'r'
     if IS_PYTHON3:
         mode = 'rt'
     
     with gzip.open(path, mode) as handler:
         exclude_header(handler)
         self.assertEqual(handler.readline(), vcf[4])
예제 #4
0
    def test_open_vcf(self):
        """ test obtaining a file handle for the VCF
        """

        vcf = make_minimal_vcf()
        path = os.path.join(self.temp_dir, "temp.vcf")
        write_temp_vcf(path, vcf)

        # check that plain VCF files can be loaded
        handle = open_vcf(path)
        self.assertEqual(type(handle), io.TextIOWrapper)
        handle.close()

        # check that gzipped vcf files are handled correctly
        path = os.path.join(self.temp_dir, "temp.vcf.gz")
        write_gzipped_vcf(path, vcf)

        handle = open_vcf(path)
        if IS_PYTHON3:
            self.assertEqual(type(handle), io.TextIOWrapper)
        else:
            self.assertEqual(type(handle), gzip.GzipFile)
        handle.close()

        # make sure files that don't exists raise an error
        path = os.path.join(self.temp_dir, "zzz.txt")
        with self.assertRaises(OSError):
            open_vcf(path)

        # check that files with unknown extensions raise errors
        path = os.path.join(self.temp_dir, "temp.zzz")
        write_temp_vcf(path, vcf)
        with self.assertRaises(OSError):
            open_vcf(path)
예제 #5
0
    def test_debug_option(self):
        """ test whether we can set up the class with the debug option
        """

        known = {}
        pops = None
        sum_x_lr2 = {}

        vcf = make_minimal_vcf()
        path = os.path.join(self.temp_dir, "temp.vcf.gz")
        write_gzipped_vcf(path, vcf)

        fam = Family(
            'fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)])

        # if the debug info isn't available, then the SNV object doesn't use the
        # debug filter function
        variants = load_variants(fam, 1.0, pops, known, set(), sum_x_lr2)
        self.assertNotEqual(SNV.passes_filters, SNV.passes_filters_with_debug)

        # if the debug info is passed in, check that the debug filter function
        # got set correctly
        variants = load_variants(fam, 1.0, pops, known, set(), sum_x_lr2, "1",
                                 "10000")
        self.assertEqual(SNV.passes_filters, SNV.passes_filters_with_debug)
예제 #6
0
    def test_exclude_header(self):
        """ test that exclude_header() works correctly
        """

        vcf = make_minimal_vcf()

        # make sure we drop the header, and only the header from the file
        # check this by reading the file, and making sure the first line
        # is the line we expect from the VCF
        path = os.path.join(self.temp_dir, "temp.vcf")
        write_temp_vcf(path, vcf)
        handler = open(path, "r")
        exclude_header(handler)
        self.assertEqual(handler.readline(), vcf[4])
        handler.close()

        # also check for gzipped VCF files.
        path = os.path.join(self.temp_dir, "temp.vcf.gz")
        write_gzipped_vcf(path, vcf)

        mode = 'r'
        if IS_PYTHON3:
            mode = 'rt'

        with gzip.open(path, mode) as handler:
            exclude_header(handler)
            self.assertEqual(handler.readline(), vcf[4])
예제 #7
0
 def test_load_variants(self):
     ''' test that load_variants() works correctly. Mainly checks variables
     are set correctly.
     '''
     
     vcf = make_minimal_vcf()
     path = os.path.join(self.temp_dir, "temp.vcf.gz")
     write_gzipped_vcf(path, vcf)
     sum_x_lr2 = {}
     parents = True
     
     fam = Family('fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)])
     variants = load_variants(fam, 0.9, ['AFR_AF'], self.known_genes, set(), sum_x_lr2, parents)
     
     self.assertEqual(SNV.known_genes, self.known_genes)
     self.assertEqual(CNV.known_genes, self.known_genes)
     self.assertEqual(Info.populations, ['AFR_AF'])
     self.assertEqual(Info.last_base, set())
     
     # and check that the
     variants = load_variants(fam, 0.9, [], None, set([('1', 100)]), sum_x_lr2, parents)
     self.assertIsNone(SNV.known_genes, self.known_genes)
     self.assertIsNone(CNV.known_genes, self.known_genes)
     self.assertEqual(Info.populations, [])
     self.assertEqual(Info.last_base, set([('1', 100)]))
예제 #8
0
 def test_get_vcf_header(self):
     """ test that get_vcf_header() works correctly
     """
     
     vcf = make_minimal_vcf()
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     
     header = get_vcf_header(path)
     
     # check that the header is returned correctly
     self.assertEqual(header, vcf[:4])
예제 #9
0
    def test_get_vcf_header(self):
        """ test that get_vcf_header() works correctly
        """

        vcf = make_minimal_vcf()
        path = os.path.join(self.temp_dir, "temp.vcf")
        write_temp_vcf(path, vcf)

        header = get_vcf_header(path)

        # check that the header is returned correctly
        self.assertEqual(header, vcf[:4])
예제 #10
0
 def test_get_vcf_provenance(self):
     """ test that get_vcf_provenance() works correctly
     """
     
     path = os.path.join(self.temp_dir, "temp.vcf")
     gz_path = os.path.join(self.temp_dir, "temp.vcf.gz")
     date_path = os.path.join(self.temp_dir, "temp.process.2014-02-20.vcf")
     
     family = Family('famid')
     family.add_child('child_id', 'mother', 'father', 'f', '2', path)
     family.add_mother('mom_id', '0', '0', 'female', '1', gz_path)
     family.add_father('dad_id', '0', '0', 'male', '1', date_path)
     family.set_child()
     
     vcf = make_minimal_vcf()
     vcf_string = "".join(vcf)
     if IS_PYTHON3:
         vcf_string = vcf_string.encode("utf-8")
     ungzipped_hash = hashlib.sha1(vcf_string).hexdigest()
     header = vcf[:4]
     
     write_temp_vcf(path, vcf)
     
     # check that the file defs return correctly
     (checksum, basename, date) = get_vcf_provenance(family.child)
     
     self.assertEqual(checksum, ungzipped_hash)
     self.assertEqual(basename, "temp.vcf")
     self.assertEqual(date, "2014-01-01")
     
     # now write a gzip file, and check that we get the correct hash
     write_gzipped_vcf(gz_path, vcf)
     handle = open(gz_path, "rb")
     gzipped_hash = hashlib.sha1(handle.read()).hexdigest()
     handle.close()
     
     (checksum, basename, date) = get_vcf_provenance(family.mother)
     self.assertEqual(checksum, gzipped_hash)
     
     # check that when a fileDate isn't available in the VCF, we can pick
     # the date from the path
     vcf.pop(1)
     write_temp_vcf(date_path, vcf)
     (checksum, basename, date) = get_vcf_provenance(family.father)
     self.assertEqual(date, "2014-02-20")
     
     # and check we get null values if the family member is not present
     family.father = None
     provenance = get_vcf_provenance(family.father)
     self.assertEqual(provenance, ('NA', 'NA', 'NA'))
예제 #11
0
    def test_get_vcf_provenance(self):
        """ test that get_vcf_provenance() works correctly
        """

        path = os.path.join(self.temp_dir, "temp.vcf")
        gz_path = os.path.join(self.temp_dir, "temp.vcf.gz")
        date_path = os.path.join(self.temp_dir, "temp.process.2014-02-20.vcf")

        family = Family('famid')
        family.add_child('child_id', 'mother', 'father', 'f', '2', path)
        family.add_mother('mom_id', '0', '0', 'female', '1', gz_path)
        family.add_father('dad_id', '0', '0', 'male', '1', date_path)
        family.set_child()

        vcf = make_minimal_vcf()
        vcf_string = "".join(vcf)
        if IS_PYTHON3:
            vcf_string = vcf_string.encode("utf-8")
        ungzipped_hash = hashlib.sha1(vcf_string).hexdigest()
        header = vcf[:4]

        write_temp_vcf(path, vcf)

        # check that the file defs return correctly
        (checksum, basename, date) = get_vcf_provenance(family.child)

        self.assertEqual(checksum, ungzipped_hash)
        self.assertEqual(basename, "temp.vcf")
        self.assertEqual(date, "2014-01-01")

        # now write a gzip file, and check that we get the correct hash
        write_gzipped_vcf(gz_path, vcf)
        handle = open(gz_path, "rb")
        gzipped_hash = hashlib.sha1(handle.read()).hexdigest()
        handle.close()

        (checksum, basename, date) = get_vcf_provenance(family.mother)
        self.assertEqual(checksum, gzipped_hash)

        # check that when a fileDate isn't available in the VCF, we can pick
        # the date from the path
        vcf.pop(1)
        write_temp_vcf(date_path, vcf)
        (checksum, basename, date) = get_vcf_provenance(family.father)
        self.assertEqual(date, "2014-02-20")

        # and check we get null values if the family member is not present
        family.father = None
        provenance = get_vcf_provenance(family.father)
        self.assertEqual(provenance, ('NA', 'NA', 'NA'))
예제 #12
0
 def test_debug_option(self):
     """ test whether we can set up the class with the debug option
     """
     
     known = {}
     pops = None
     sum_x_lr2 = {}
     
     vcf = make_minimal_vcf()
     path = os.path.join(self.temp_dir, "temp.vcf.gz")
     write_gzipped_vcf(path, vcf)
     
     fam = Family('fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)])
     
     # if the debug info isn't available, then the SNV object doesn't use the
     # debug filter function
     variants = load_variants(fam, 1.0, pops, known, set(), sum_x_lr2)
     self.assertNotEqual(SNV.passes_filters, SNV.passes_filters_with_debug)
     
     # if the debug info is passed in, check that the debug filter function
     # got set correctly
     variants = load_variants(fam, 1.0, pops, known, set(), sum_x_lr2, "1", "10000")
     self.assertEqual(SNV.passes_filters, SNV.passes_filters_with_debug)