Beispiel #1
0
    def test_open_vcf(self):
        """ test obtaining a file handle for the VCF
        """

        vcf = make_minimal_vcf()
        path = os.path.join(self.temp_dir, "temp.vcf")
        write_temp_vcf(path, vcf)

        # check that plain VCF files can be loaded
        handle = open_vcf(path)
        self.assertEqual(type(handle), io.TextIOWrapper)
        handle.close()

        # check that gzipped vcf files are handled correctly
        path = os.path.join(self.temp_dir, "temp.vcf.gz")
        write_gzipped_vcf(path, vcf)

        handle = open_vcf(path)
        if IS_PYTHON3:
            self.assertEqual(type(handle), io.TextIOWrapper)
        else:
            self.assertEqual(type(handle), gzip.GzipFile)
        handle.close()

        # make sure files that don't exists raise an error
        path = os.path.join(self.temp_dir, "zzz.txt")
        with self.assertRaises(OSError):
            open_vcf(path)

        # check that files with unknown extensions raise errors
        path = os.path.join(self.temp_dir, "temp.zzz")
        write_temp_vcf(path, vcf)
        with self.assertRaises(OSError):
            open_vcf(path)
 def test_open_vcf(self):
     """ test obtaining a file handle for the VCF
     """
     
     vcf = make_minimal_vcf()
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     
     # check that plain VCF files can be loaded
     handle = open_vcf(path)
     self.assertEqual(type(handle), io.TextIOWrapper)
     handle.close()
     
     # check that gzipped vcf files are handled correctly
     path = os.path.join(self.temp_dir, "temp.vcf.gz")
     write_gzipped_vcf(path, vcf)
     
     handle = open_vcf(path)
     if IS_PYTHON3:
         self.assertEqual(type(handle), io.TextIOWrapper)
     else:
         self.assertEqual(type(handle), gzip.GzipFile)
     handle.close()
     
     # make sure files that don't exists raise an error
     path = os.path.join(self.temp_dir, "zzz.txt")
     with self.assertRaises(OSError):
         open_vcf(path)
     
     # check that files with unknown extensions raise errors
     path = os.path.join(self.temp_dir, "temp.zzz")
     write_temp_vcf(path, vcf)
     with self.assertRaises(OSError):
         open_vcf(path)
 def test_exclude_header(self):
     """ test that exclude_header() works correctly
     """
     
     vcf = make_minimal_vcf()
     
     # make sure we drop the header, and only the header from the file
     # check this by reading the file, and making sure the first line
     # is the line we expect from the VCF
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     handler = open(path, "r")
     exclude_header(handler)
     self.assertEqual(handler.readline(), vcf[4])
     handler.close()
     
     # also check for gzipped VCF files.
     path = os.path.join(self.temp_dir, "temp.vcf.gz")
     write_gzipped_vcf(path, vcf)
     
     mode = 'r'
     if IS_PYTHON3:
         mode = 'rt'
     
     with gzip.open(path, mode) as handler:
         exclude_header(handler)
         self.assertEqual(handler.readline(), vcf[4])
Beispiel #4
0
 def test_open_individual(self):
     ''' test that open_individual() works correctly
     '''
     
     # missing individual returns empty list
     self.assertEqual(open_individual(None), [])
     
     vcf = make_vcf_header()
     vcf.append(make_vcf_line(pos=1, extra='HGNC=TEST;MAX_AF=0.0001'))
     vcf.append(make_vcf_line(pos=2, extra='HGNC=ATRX;MAX_AF=0.0001'))
     
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     
     person = Person('fam_id', 'sample', 'dad', 'mom', 'F', '2', path)
     
     var1 = SNV(chrom="1", position=1, id=".", ref="G", alts="T",
         qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=TEST;MAX_AF=0.0001",
         format="DP:GT", sample="50:0/1", gender="female", mnv_code=None)
     var2 = SNV(chrom="1", position=2, id=".", ref="G", alts="T",
         qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001",
         format="DP:GT", sample="50:0/1", gender="female", mnv_code=None)
     
     self.assertEqual(open_individual(person), [var2])
     
     # define a set of variants to automatically pass, and check that these
     # variants pass.
     child_keys = set([('1', 1), ('1', 2)])
     self.assertEqual(open_individual(person,
         child_variants=child_keys), [var1, var2])
Beispiel #5
0
    def test_exclude_header(self):
        """ test that exclude_header() works correctly
        """

        vcf = make_minimal_vcf()

        # make sure we drop the header, and only the header from the file
        # check this by reading the file, and making sure the first line
        # is the line we expect from the VCF
        path = os.path.join(self.temp_dir, "temp.vcf")
        write_temp_vcf(path, vcf)
        handler = open(path, "r")
        exclude_header(handler)
        self.assertEqual(handler.readline(), vcf[4])
        handler.close()

        # also check for gzipped VCF files.
        path = os.path.join(self.temp_dir, "temp.vcf.gz")
        write_gzipped_vcf(path, vcf)

        mode = 'r'
        if IS_PYTHON3:
            mode = 'rt'

        with gzip.open(path, mode) as handler:
            exclude_header(handler)
            self.assertEqual(handler.readline(), vcf[4])
 def test_open_individual(self):
     ''' test that open_individual() works correctly
     '''
     
     # missing individual returns empty list
     self.assertEqual(open_individual(None), [])
     
     vcf = make_vcf_header()
     vcf.append(make_vcf_line(pos=1, extra='HGNC=TEST;MAX_AF=0.0001'))
     vcf.append(make_vcf_line(pos=2, extra='HGNC=ATRX;MAX_AF=0.0001'))
     
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     
     person = Person('fam_id', 'sample', 'dad', 'mom', 'F', '2', path)
     
     var1 = SNV(chrom="1", position=1, id=".", ref="G", alts="T",
         qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=TEST;MAX_AF=0.0001",
         format="DP:GT:AD", sample="50:0/1:10,10", gender="female", mnv_code=None)
     var2 = SNV(chrom="1", position=2, id=".", ref="G", alts="T",
         qual='1000', filter="PASS", info="CQ=missense_variant;HGNC=ATRX;MAX_AF=0.0001",
         format="DP:GT:AD", sample="50:0/1:10,10", gender="female", mnv_code=None)
     
     self.assertEqual(open_individual(person), [var2])
     
     # define a set of variants to automatically pass, and check that these
     # variants pass.
     child_keys = set([('1', 1), ('1', 2)])
     self.assertEqual(open_individual(person,
         child_variants=child_keys), [var1, var2])
 def test_get_vcf_header(self):
     """ test that get_vcf_header() works correctly
     """
     
     vcf = make_minimal_vcf()
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     
     header = get_vcf_header(path)
     
     # check that the header is returned correctly
     self.assertEqual(header, vcf[:4])
Beispiel #8
0
    def test_get_vcf_header(self):
        """ test that get_vcf_header() works correctly
        """

        vcf = make_minimal_vcf()
        path = os.path.join(self.temp_dir, "temp.vcf")
        write_temp_vcf(path, vcf)

        header = get_vcf_header(path)

        # check that the header is returned correctly
        self.assertEqual(header, vcf[:4])
 def test_get_vcf_provenance(self):
     """ test that get_vcf_provenance() works correctly
     """
     
     path = os.path.join(self.temp_dir, "temp.vcf")
     gz_path = os.path.join(self.temp_dir, "temp.vcf.gz")
     date_path = os.path.join(self.temp_dir, "temp.process.2014-02-20.vcf")
     
     family = Family('famid')
     family.add_child('child_id', 'mother', 'father', 'f', '2', path)
     family.add_mother('mom_id', '0', '0', 'female', '1', gz_path)
     family.add_father('dad_id', '0', '0', 'male', '1', date_path)
     family.set_child()
     
     vcf = make_minimal_vcf()
     vcf_string = "".join(vcf)
     if IS_PYTHON3:
         vcf_string = vcf_string.encode("utf-8")
     ungzipped_hash = hashlib.sha1(vcf_string).hexdigest()
     header = vcf[:4]
     
     write_temp_vcf(path, vcf)
     
     # check that the file defs return correctly
     (checksum, basename, date) = get_vcf_provenance(family.child)
     
     self.assertEqual(checksum, ungzipped_hash)
     self.assertEqual(basename, "temp.vcf")
     self.assertEqual(date, "2014-01-01")
     
     # now write a gzip file, and check that we get the correct hash
     write_gzipped_vcf(gz_path, vcf)
     handle = open(gz_path, "rb")
     gzipped_hash = hashlib.sha1(handle.read()).hexdigest()
     handle.close()
     
     (checksum, basename, date) = get_vcf_provenance(family.mother)
     self.assertEqual(checksum, gzipped_hash)
     
     # check that when a fileDate isn't available in the VCF, we can pick
     # the date from the path
     vcf.pop(1)
     write_temp_vcf(date_path, vcf)
     (checksum, basename, date) = get_vcf_provenance(family.father)
     self.assertEqual(date, "2014-02-20")
     
     # and check we get null values if the family member is not present
     family.father = None
     provenance = get_vcf_provenance(family.father)
     self.assertEqual(provenance, ('NA', 'NA', 'NA'))
Beispiel #10
0
    def test_get_vcf_provenance(self):
        """ test that get_vcf_provenance() works correctly
        """

        path = os.path.join(self.temp_dir, "temp.vcf")
        gz_path = os.path.join(self.temp_dir, "temp.vcf.gz")
        date_path = os.path.join(self.temp_dir, "temp.process.2014-02-20.vcf")

        family = Family('famid')
        family.add_child('child_id', 'mother', 'father', 'f', '2', path)
        family.add_mother('mom_id', '0', '0', 'female', '1', gz_path)
        family.add_father('dad_id', '0', '0', 'male', '1', date_path)
        family.set_child()

        vcf = make_minimal_vcf()
        vcf_string = "".join(vcf)
        if IS_PYTHON3:
            vcf_string = vcf_string.encode("utf-8")
        ungzipped_hash = hashlib.sha1(vcf_string).hexdigest()
        header = vcf[:4]

        write_temp_vcf(path, vcf)

        # check that the file defs return correctly
        (checksum, basename, date) = get_vcf_provenance(family.child)

        self.assertEqual(checksum, ungzipped_hash)
        self.assertEqual(basename, "temp.vcf")
        self.assertEqual(date, "2014-01-01")

        # now write a gzip file, and check that we get the correct hash
        write_gzipped_vcf(gz_path, vcf)
        handle = open(gz_path, "rb")
        gzipped_hash = hashlib.sha1(handle.read()).hexdigest()
        handle.close()

        (checksum, basename, date) = get_vcf_provenance(family.mother)
        self.assertEqual(checksum, gzipped_hash)

        # check that when a fileDate isn't available in the VCF, we can pick
        # the date from the path
        vcf.pop(1)
        write_temp_vcf(date_path, vcf)
        (checksum, basename, date) = get_vcf_provenance(family.father)
        self.assertEqual(date, "2014-02-20")

        # and check we get null values if the family member is not present
        family.father = None
        provenance = get_vcf_provenance(family.father)
        self.assertEqual(provenance, ('NA', 'NA', 'NA'))
Beispiel #11
0
 def test_open_individual_male_het_chrx(self):
     """ test that open_individual() passes over hets in males on chrX
     """
     
     # the sub-functions are all tested elsewhere, this test merely checks
     # that valid variants are added to the variants list, and invalid
     # variants are passed over without being added to the variants list
     
     vcf = make_vcf_header()
     vcf.append(make_vcf_line(chrom='X', pos=1, genotype='0/1',
         extra='HGNC=TEST;MAX_AF=0.0001'))
     
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     
     person = Person('fam_id', 'sample', 'dad', 'mom', 'M', '2', path)
     
     self.assertEqual(open_individual(person), [])
 def test_open_individual_male_het_chrx(self):
     """ test that open_individual() passes over hets in males on chrX
     """
     
     # the sub-functions are all tested elsewhere, this test merely checks
     # that valid variants are added to the variants list, and invalid
     # variants are passed over without being added to the variants list
     
     vcf = make_vcf_header()
     vcf.append(make_vcf_line(chrom='X', pos=1, genotype='0/1',
         extra='HGNC=TEST;MAX_AF=0.0001'))
     
     path = os.path.join(self.temp_dir, "temp.vcf")
     write_temp_vcf(path, vcf)
     
     person = Person('fam_id', 'sample', 'dad', 'mom', 'M', '2', path)
     
     self.assertEqual(open_individual(person), [])