Esempio n. 1
0
    def test_roundtrip(self,
                       expected_infos,
                       expected_fmt,
                       expected_fmt1,
                       expected_fmt2,
                       reader_excluded_info=None,
                       reader_excluded_format=None,
                       writer_excluded_info=None,
                       writer_excluded_format=None):
        expected_records = [
            record.format(info=info, fmt=expected_fmt, efmts1=e1,
                          efmts2=e2) for record, info, e1, e2 in zip(
                              self.record_format_strings, expected_infos,
                              expected_fmt1, expected_fmt2)
        ]
        expected = self.header + ''.join(expected_records)
        with vcf.VcfReader(
                test_utils.genomics_core_testdata('test_py_roundtrip.vcf'),
                excluded_info_fields=reader_excluded_info,
                excluded_format_fields=reader_excluded_format) as reader:

            records = list(reader.iterate())
            output_path = test_utils.test_tmpfile('test_roundtrip_tmpfile.vcf')
            with vcf.VcfWriter(
                    output_path,
                    header=reader.header,
                    excluded_info_fields=writer_excluded_info,
                    excluded_format_fields=writer_excluded_format) as writer:
                for record in records:
                    writer.write(record)

        with open(output_path) as f:
            actual = f.read()
        self.assertEqual(actual, expected)
Esempio n. 2
0
def main(argv):
    del argv

    priors = map(float, FLAGS.genotype_priors)
    sump = sum(priors)
    log_priors = [math.log10(x / sump) for x in priors]

    with vcf.VcfReader(FLAGS.input_vcf) as reader:
        with vcf.VcfWriter(FLAGS.output_vcf, header=reader.header) as writer:
            for variant in reader:
                recall_variant(log_priors, variant)
                # TODO(thomaswc): Also update the variant's quality.
                writer.write(variant)
Esempio n. 3
0
  def test_headerless_vcf(self):
    """Writes a headerless vcf and reads it back out."""
    test_vcf = test_utils.genomics_core_testdata('test_sites.vcf')
    output_vcf = test_utils.test_tmpfile('output.vcf')
    expected_variants = []
    with vcf.VcfReader(test_vcf) as reader:
      with vcf.VcfWriter(
          output_vcf, header=reader.header, exclude_header=True) as writer:
        for record in reader:
          expected_variants.append(record)
          writer.write(record)

      with vcf.VcfReader(output_vcf, header=reader.header) as actual_reader:
        self.assertEqual(expected_variants, list(actual_reader))
Esempio n. 4
0
def main(argv):
    if len(argv) != 3:
        print('Usage: {} <input_vcf> <output_vcf>'.format(argv[0]))
        sys.exit(-1)
    in_vcf = argv[1]
    out_vcf = argv[2]

    # Please try to keep the following part in sync with the documenation in
    # g3doc/overview.md.
    with vcf.VcfReader(in_vcf, use_index=False) as reader:
        print('Sample names in VCF: ', ' '.join(reader.header.sample_names))
        with vcf.VcfWriter(out_vcf, header=reader.header) as writer:
            for variant in reader:
                if variant.quality > 3.01:
                    writer.write(variant)
Esempio n. 5
0
 def write_variant_to_tempfile(self, variant):
   output_path = test_utils.test_tmpfile('test.vcf')
   header = variants_pb2.VcfHeader(
       contigs=[reference_pb2.ContigInfo(name='20')],
       sample_names=[call.call_set_name for call in variant.calls],
       formats=[
           variants_pb2.VcfFormatInfo(
               id='DP', number='1', type='Integer', description='Read depth'),
           variants_pb2.VcfFormatInfo(
               id='AD',
               number='R',
               type='Integer',
               description='Read depth for each allele')
       ])
   writer = vcf.VcfWriter(output_path, header=header)
   with writer:
     writer.write(variant)
   return output_path
Esempio n. 6
0
def main(argv):
    if len(argv) != 3:
        print('Usage: %s <input_vcf> <output_vcf>' % argv[0])
        sys.exit(-1)
    in_vcf = argv[1]
    out_vcf = argv[2]

    with vcf.VcfReader(in_vcf) as reader:
        if 'AD' in [info.id for info in reader.header.infos]:
            print('%s already contains AD field.' % in_vcf)
            sys.exit(-1)
        out_header = reader.header
        out_header.infos.extend([vcf_constants.reserved_info_field('AD')])

        with vcf.VcfWriter(out_vcf, header=out_header) as writer:
            for variant in reader:
                variant_utils.set_info(variant, 'AD', get_variant_ad(variant),
                                       writer)
                writer.write(variant)