def test_load_variants(self): ''' test that load_variants() works correctly. Mainly checks variables are set correctly. ''' vcf = make_minimal_vcf() path = os.path.join(self.temp_dir, "temp.vcf.gz") write_gzipped_vcf(path, vcf) sum_x_lr2 = {} parents = True fam = Family( 'fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)]) variants = load_variants(fam, 0.9, ['AFR_AF'], self.known_genes, set(), sum_x_lr2, parents) self.assertEqual(SNV.known_genes, self.known_genes) self.assertEqual(CNV.known_genes, self.known_genes) self.assertEqual(Info.populations, ['AFR_AF']) self.assertEqual(Info.last_base, set()) # and check that the variants = load_variants(fam, 0.9, [], None, set([('1', 100)]), sum_x_lr2, parents) self.assertIsNone(SNV.known_genes, self.known_genes) self.assertIsNone(CNV.known_genes, self.known_genes) self.assertEqual(Info.populations, []) self.assertEqual(Info.last_base, set([('1', 100)]))
def test_open_vcf(self): """ test obtaining a file handle for the VCF """ vcf = make_minimal_vcf() path = os.path.join(self.temp_dir, "temp.vcf") write_temp_vcf(path, vcf) # check that plain VCF files can be loaded handle = open_vcf(path) self.assertEqual(type(handle), io.TextIOWrapper) handle.close() # check that gzipped vcf files are handled correctly path = os.path.join(self.temp_dir, "temp.vcf.gz") write_gzipped_vcf(path, vcf) handle = open_vcf(path) if IS_PYTHON3: self.assertEqual(type(handle), io.TextIOWrapper) else: self.assertEqual(type(handle), gzip.GzipFile) handle.close() # make sure files that don't exists raise an error path = os.path.join(self.temp_dir, "zzz.txt") with self.assertRaises(OSError): open_vcf(path) # check that files with unknown extensions raise errors path = os.path.join(self.temp_dir, "temp.zzz") write_temp_vcf(path, vcf) with self.assertRaises(OSError): open_vcf(path)
def test_exclude_header(self): """ test that exclude_header() works correctly """ vcf = make_minimal_vcf() # make sure we drop the header, and only the header from the file # check this by reading the file, and making sure the first line # is the line we expect from the VCF path = os.path.join(self.temp_dir, "temp.vcf") write_temp_vcf(path, vcf) handler = open(path, "r") exclude_header(handler) self.assertEqual(handler.readline(), vcf[4]) handler.close() # also check for gzipped VCF files. path = os.path.join(self.temp_dir, "temp.vcf.gz") write_gzipped_vcf(path, vcf) mode = 'r' if IS_PYTHON3: mode = 'rt' with gzip.open(path, mode) as handler: exclude_header(handler) self.assertEqual(handler.readline(), vcf[4])
def test_debug_option(self): """ test whether we can set up the class with the debug option """ known = {} pops = None sum_x_lr2 = {} vcf = make_minimal_vcf() path = os.path.join(self.temp_dir, "temp.vcf.gz") write_gzipped_vcf(path, vcf) fam = Family( 'fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)]) # if the debug info isn't available, then the SNV object doesn't use the # debug filter function variants = load_variants(fam, 1.0, pops, known, set(), sum_x_lr2) self.assertNotEqual(SNV.passes_filters, SNV.passes_filters_with_debug) # if the debug info is passed in, check that the debug filter function # got set correctly variants = load_variants(fam, 1.0, pops, known, set(), sum_x_lr2, "1", "10000") self.assertEqual(SNV.passes_filters, SNV.passes_filters_with_debug)
def test_load_variants(self): ''' test that load_variants() works correctly. Mainly checks variables are set correctly. ''' vcf = make_minimal_vcf() path = os.path.join(self.temp_dir, "temp.vcf.gz") write_gzipped_vcf(path, vcf) sum_x_lr2 = {} parents = True fam = Family('fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)]) variants = load_variants(fam, 0.9, ['AFR_AF'], self.known_genes, set(), sum_x_lr2, parents) self.assertEqual(SNV.known_genes, self.known_genes) self.assertEqual(CNV.known_genes, self.known_genes) self.assertEqual(Info.populations, ['AFR_AF']) self.assertEqual(Info.last_base, set()) # and check that the variants = load_variants(fam, 0.9, [], None, set([('1', 100)]), sum_x_lr2, parents) self.assertIsNone(SNV.known_genes, self.known_genes) self.assertIsNone(CNV.known_genes, self.known_genes) self.assertEqual(Info.populations, []) self.assertEqual(Info.last_base, set([('1', 100)]))
def test_get_vcf_header(self): """ test that get_vcf_header() works correctly """ vcf = make_minimal_vcf() path = os.path.join(self.temp_dir, "temp.vcf") write_temp_vcf(path, vcf) header = get_vcf_header(path) # check that the header is returned correctly self.assertEqual(header, vcf[:4])
def test_get_vcf_provenance(self): """ test that get_vcf_provenance() works correctly """ path = os.path.join(self.temp_dir, "temp.vcf") gz_path = os.path.join(self.temp_dir, "temp.vcf.gz") date_path = os.path.join(self.temp_dir, "temp.process.2014-02-20.vcf") family = Family('famid') family.add_child('child_id', 'mother', 'father', 'f', '2', path) family.add_mother('mom_id', '0', '0', 'female', '1', gz_path) family.add_father('dad_id', '0', '0', 'male', '1', date_path) family.set_child() vcf = make_minimal_vcf() vcf_string = "".join(vcf) if IS_PYTHON3: vcf_string = vcf_string.encode("utf-8") ungzipped_hash = hashlib.sha1(vcf_string).hexdigest() header = vcf[:4] write_temp_vcf(path, vcf) # check that the file defs return correctly (checksum, basename, date) = get_vcf_provenance(family.child) self.assertEqual(checksum, ungzipped_hash) self.assertEqual(basename, "temp.vcf") self.assertEqual(date, "2014-01-01") # now write a gzip file, and check that we get the correct hash write_gzipped_vcf(gz_path, vcf) handle = open(gz_path, "rb") gzipped_hash = hashlib.sha1(handle.read()).hexdigest() handle.close() (checksum, basename, date) = get_vcf_provenance(family.mother) self.assertEqual(checksum, gzipped_hash) # check that when a fileDate isn't available in the VCF, we can pick # the date from the path vcf.pop(1) write_temp_vcf(date_path, vcf) (checksum, basename, date) = get_vcf_provenance(family.father) self.assertEqual(date, "2014-02-20") # and check we get null values if the family member is not present family.father = None provenance = get_vcf_provenance(family.father) self.assertEqual(provenance, ('NA', 'NA', 'NA'))
def test_debug_option(self): """ test whether we can set up the class with the debug option """ known = {} pops = None sum_x_lr2 = {} vcf = make_minimal_vcf() path = os.path.join(self.temp_dir, "temp.vcf.gz") write_gzipped_vcf(path, vcf) fam = Family('fam', children=[Person('fam', 'child', '0', '0', 'f', '2', path)]) # if the debug info isn't available, then the SNV object doesn't use the # debug filter function variants = load_variants(fam, 1.0, pops, known, set(), sum_x_lr2) self.assertNotEqual(SNV.passes_filters, SNV.passes_filters_with_debug) # if the debug info is passed in, check that the debug filter function # got set correctly variants = load_variants(fam, 1.0, pops, known, set(), sum_x_lr2, "1", "10000") self.assertEqual(SNV.passes_filters, SNV.passes_filters_with_debug)