def test_roundtrip(self, expected_infos, expected_fmt, expected_fmt1, expected_fmt2, reader_excluded_info=None, reader_excluded_format=None, writer_excluded_info=None, writer_excluded_format=None): expected_records = [ record.format(info=info, fmt=expected_fmt, efmts1=e1, efmts2=e2) for record, info, e1, e2 in zip( self.record_format_strings, expected_infos, expected_fmt1, expected_fmt2) ] expected = self.header + ''.join(expected_records) with vcf.VcfReader( test_utils.genomics_core_testdata('test_py_roundtrip.vcf'), excluded_info_fields=reader_excluded_info, excluded_format_fields=reader_excluded_format) as reader: records = list(reader.iterate()) output_path = test_utils.test_tmpfile('test_roundtrip_tmpfile.vcf') with vcf.VcfWriter( output_path, header=reader.header, excluded_info_fields=writer_excluded_info, excluded_format_fields=writer_excluded_format) as writer: for record in records: writer.write(record) with open(output_path) as f: actual = f.read() self.assertEqual(actual, expected)
def main(argv): del argv priors = map(float, FLAGS.genotype_priors) sump = sum(priors) log_priors = [math.log10(x / sump) for x in priors] with vcf.VcfReader(FLAGS.input_vcf) as reader: with vcf.VcfWriter(FLAGS.output_vcf, header=reader.header) as writer: for variant in reader: recall_variant(log_priors, variant) # TODO(thomaswc): Also update the variant's quality. writer.write(variant)
def test_headerless_vcf(self): """Writes a headerless vcf and reads it back out.""" test_vcf = test_utils.genomics_core_testdata('test_sites.vcf') output_vcf = test_utils.test_tmpfile('output.vcf') expected_variants = [] with vcf.VcfReader(test_vcf) as reader: with vcf.VcfWriter( output_vcf, header=reader.header, exclude_header=True) as writer: for record in reader: expected_variants.append(record) writer.write(record) with vcf.VcfReader(output_vcf, header=reader.header) as actual_reader: self.assertEqual(expected_variants, list(actual_reader))
def main(argv): if len(argv) != 3: print('Usage: {} <input_vcf> <output_vcf>'.format(argv[0])) sys.exit(-1) in_vcf = argv[1] out_vcf = argv[2] # Please try to keep the following part in sync with the documenation in # g3doc/overview.md. with vcf.VcfReader(in_vcf, use_index=False) as reader: print('Sample names in VCF: ', ' '.join(reader.header.sample_names)) with vcf.VcfWriter(out_vcf, header=reader.header) as writer: for variant in reader: if variant.quality > 3.01: writer.write(variant)
def write_variant_to_tempfile(self, variant): output_path = test_utils.test_tmpfile('test.vcf') header = variants_pb2.VcfHeader( contigs=[reference_pb2.ContigInfo(name='20')], sample_names=[call.call_set_name for call in variant.calls], formats=[ variants_pb2.VcfFormatInfo( id='DP', number='1', type='Integer', description='Read depth'), variants_pb2.VcfFormatInfo( id='AD', number='R', type='Integer', description='Read depth for each allele') ]) writer = vcf.VcfWriter(output_path, header=header) with writer: writer.write(variant) return output_path
def main(argv): if len(argv) != 3: print('Usage: %s <input_vcf> <output_vcf>' % argv[0]) sys.exit(-1) in_vcf = argv[1] out_vcf = argv[2] with vcf.VcfReader(in_vcf) as reader: if 'AD' in [info.id for info in reader.header.infos]: print('%s already contains AD field.' % in_vcf) sys.exit(-1) out_header = reader.header out_header.infos.extend([vcf_constants.reserved_info_field('AD')]) with vcf.VcfWriter(out_vcf, header=out_header) as writer: for variant in reader: variant_utils.set_info(variant, 'AD', get_variant_ad(variant), writer) writer.write(variant)