Exemple #1
0
 def test_write_vcf_from_h5(self):
     # With hdf5 file
     tomato_h5 = VariationsH5(join(TEST_DATA_DIR,
                                   'tomato.apeki_gbs.calmd.h5'), "r")
     exp_fhand = open(join(TEST_DATA_DIR, "tomato.apeki_100_exp.vcf"), "rb")
     with NamedTemporaryFile(mode='wb') as tmp_fhand:
         write_vcf(tomato_h5, tmp_fhand)
         # only checking snps
         exp_lines = list(exp_fhand)
         exp_lines = [line for line in exp_lines
                          if not line.startswith(b'#')]
         with open(tmp_fhand.name, 'rb') as refhand:
             i = 0
             for line in refhand:
                 if line.startswith(b'#'):
                     continue
                 try:
                     assert line == exp_lines[i]
                 except AssertionError:
                     print(line.decode())
                     print(exp_lines[i].decode())
                 i += 1
                 if i > 42:
                     break
     exp_fhand.close()
Exemple #2
0
 def test_generated_vcf_feed_outputs_equal_vcfs(self):
     h5_vars = VariationsH5(
         join(TEST_DATA_DIR, 'tomato.apeki_gbs.calmd.1stchunk.h5'), "r")
     with NamedTemporaryFile(mode='wb') as vcf_vars_from_h5:
         write_vcf(h5_vars, vcf_vars_from_h5)
         vcf_vars_from_h5.flush()
         vcf_fhand = open(vcf_vars_from_h5.name, 'rb')
         vcf = VCFParser(vcf_fhand)
         vcf_vars_parsed = VariationsArrays()
         vcf_vars_parsed.put_vars(vcf)
         with NamedTemporaryFile(mode='wb') as vcf_vars_from_vcf:
             vcf_vars_parsed.write_vcf(vcf_vars_from_vcf)
             vcf_vars_from_vcf.flush()
             vcf_from_h5_fhand = open(vcf_vars_from_h5.name, 'rb')
             vcf_from_vcf_fhand = open(vcf_vars_from_vcf.name, 'rb')
             for line_parsed_from_h5, line_parsed_from_vcf in zip(
                     vcf_from_h5_fhand, vcf_from_vcf_fhand):
                 assert line_parsed_from_h5 == line_parsed_from_vcf, "when importing from a generated VCF and exporting to a new VCF both files must be the same"
Exemple #3
0
 def write_vcf(self, vcf_fhand):
     write_vcf(self, vcf_fhand)
 def write_vcf(self, vcf_fhand):
     write_vcf(self, vcf_fhand)
Exemple #5
0
    def test_write_vcf(self):
        # With all fields available
        tmp_fhand = NamedTemporaryFile()
        tmp_fhand.close()
        vcf_fhand = open(join(TEST_DATA_DIR, 'format_def_exp.vcf'), 'rb')
        vcf = VCFParser(vcf_fhand, max_field_lens={'alt': 2},
                        pre_read_max_size=10000)

        max_field_lens = {'CALLS': {b'GT': 1, b'HQ': 2, b'DP': 1, b'GQ': 1},
                          'FILTER': 1,
                          'INFO': {b'AA': 1, b'AF': 2, b'DP': 1,
                                   b'DB': 1, b'NS': 1, b'H2': 1}, 'alt': 2}
        max_field_str_lens = {'INFO': {b'AA': 1}, 'alt': 5, 'chrom': 2, 'ref': 4,
                              'id': 10, 'FILTER': 0}

        variations = VariationsArrays(ignore_undefined_fields=True)
        variations.put_vars(vcf, max_field_lens=max_field_lens,
                            max_field_str_lens=max_field_str_lens)
        vcf_fhand.close()
        with NamedTemporaryFile(mode='wb') as out_fhand:
            write_vcf(variations, out_fhand, vcf_format='VCFv4.0')
            vcf_fpath = join(TEST_DATA_DIR, 'format_def_exp.vcf')
            with open(vcf_fpath, 'r') as exp_fhand:
                exp_lines = list(exp_fhand)
                out_fhand.seek(0)
                with open(out_fhand.name) as refhand:
                    for line in refhand:
                        try:
                            assert line in exp_lines
                        except AssertionError:
                            print('aa', line)

        # With missing info in variations
        tmp_fhand = NamedTemporaryFile()
        out_fpath = tmp_fhand.name
        tmp_fhand.close()
        vcf_fhand = open(join(TEST_DATA_DIR, 'format_def_without_info.vcf'),
                         'rb')
        vcf = VCFParser(vcf_fhand)

        max_field_lens = {'INFO': {}, 'CALLS': {b'GQ': 1, b'GT': 1, b'HQ': 2,
                                                b'DP': 1},
                          'FILTER': 1, 'alt': 2}
        max_field_str_lens = {'ref': 4, 'INFO': {}, 'id': 10, 'FILTER': 0,
                              'alt': 5, 'chrom': 2}

        h5_without_info = VariationsH5(fpath=out_fpath, mode='w',
                                       ignore_undefined_fields=True)
        h5_without_info.put_vars(vcf, max_field_lens=max_field_lens,
                                 max_field_str_lens=max_field_str_lens)
        vcf_fhand.close()
        with NamedTemporaryFile(mode='wb') as out_fhand:
            write_vcf(h5_without_info, out_fhand, vcf_format='VCFv4.0')
            vcf_fpath = join(TEST_DATA_DIR, 'format_def_without_info_exp.vcf')
            with open(vcf_fpath, 'r') as exp_fhand:
                exp_lines = list(exp_fhand)
                out_fhand.seek(0)
                with open(out_fhand.name) as refhand:
                    for line in refhand:
                        try:
                            assert line in exp_lines
                        except AssertionError:
                            print(line)