def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: postprocess_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() with genomics_io.make_ref_reader(FLAGS.ref) as reader: contigs = reader.contigs paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile) with tempfile.NamedTemporaryFile() as temp: postprocess_variants_lib.process_single_sites_tfrecords( contigs, paths, temp.name) # Read one CallVariantsOutput record and extract the sample name from it. # Note that this assumes that all CallVariantsOutput protos in the infile # contain a single VariantCall within their constituent Variant proto, and # that the call_set_name is identical in each of the records. record = next( io_utils.read_tfrecords( paths[0], proto=deepvariant_pb2.CallVariantsOutput, max_records=1)) sample_name = _extract_single_sample_name(record) write_call_variants_output_to_vcf( contigs=contigs, input_sorted_tfrecord_path=temp.name, output_vcf_path=FLAGS.outfile, qual_filter=FLAGS.qual_filter, multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter, sample_name=sample_name)
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: make_examples does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() hts_verbose.set(hts_verbose.htsLogLevel[FLAGS.hts_logging_level]) # Give htslib authentication access to GCS. htslib_gcp_oauth.init() # Set up options; may do I/O. options = default_options(add_flags=True, flags=FLAGS) # Check arguments that apply to any mode. if not options.reference_filename: errors.log_and_raise('ref argument is required.', errors.CommandLineError) if not options.reads_filename: errors.log_and_raise('reads argument is required.', errors.CommandLineError) if not options.examples_filename: errors.log_and_raise('examples argument is required.', errors.CommandLineError) if options.n_cores != 1: errors.log_and_raise( 'Currently only supports n_cores == 1 but got {}.'.format( options.n_cores), errors.CommandLineError) # Check for argument issues specific to train mode. if in_training_mode(options): if not options.truth_variants_filename: errors.log_and_raise( 'truth_variants is required when in training mode.', errors.CommandLineError) if not options.confident_regions_filename: errors.log_and_raise( 'confident_regions is required when in training mode.', errors.CommandLineError) if options.gvcf_filename: errors.log_and_raise('gvcf is not allowed in training mode.', errors.CommandLineError) else: # Check for argument issues specific to calling mode. if options.variant_caller_options.sample_name == _UNKNOWN_SAMPLE: errors.log_and_raise( 'sample_name must be specified in calling mode.', errors.CommandLineError) # Run! make_examples_runner(options)
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: call_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() # Give htslib authentication access to GCS. htslib_gcp_oauth.init() model = modeling.get_model(FLAGS.model_name) call_variants(examples_filename=FLAGS.examples, checkpoint_path=FLAGS.checkpoint, model=model, execution_hardware=FLAGS.execution_hardware, output_file=FLAGS.outfile, max_batches=FLAGS.max_batches, batch_size=FLAGS.batch_size)
def test_clean_commandline_error_exit_clean_exit(self, exc_type, exit_value): with mock.patch.object(sys, 'exit') as mock_exit: with errors.clean_commandline_error_exit(exit_value=exit_value): raise exc_type() mock_exit.assert_called_once_with(exit_value)
def test_clean_commandline_error_exit_raise_non_allowed( self, exc_type, msg): with self.assertRaisesRegexp(exc_type, msg): with errors.clean_commandline_error_exit(): raise exc_type(msg)
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: postprocess_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. if (not FLAGS.nonvariant_site_tfrecord_path) != ( not FLAGS.gvcf_outfile): errors.log_and_raise( 'gVCF creation requires both nonvariant_site_tfrecord_path and ' 'gvcf_outfile flags to be set.', errors.CommandLineError) proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() with genomics_io.make_ref_reader(FLAGS.ref) as reader: contigs = reader.contigs paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile) with tempfile.NamedTemporaryFile() as temp: postprocess_variants_lib.process_single_sites_tfrecords( contigs, paths, temp.name) # Read one CallVariantsOutput record and extract the sample name from it. # Note that this assumes that all CallVariantsOutput protos in the infile # contain a single VariantCall within their constituent Variant proto, and # that the call_set_name is identical in each of the records. record = next( io_utils.read_tfrecords( paths[0], proto=deepvariant_pb2.CallVariantsOutput, max_records=1)) sample_name = _extract_single_sample_name(record) independent_variants = _transform_call_variants_output_to_variants( input_sorted_tfrecord_path=temp.name, qual_filter=FLAGS.qual_filter, multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter, sample_name=sample_name) variant_generator = haplotypes.maybe_resolve_conflicting_variants( independent_variants) write_variants_to_vcf(contigs=contigs, variant_generator=variant_generator, output_vcf_path=FLAGS.outfile, sample_name=sample_name) # Also write out the gVCF file if it was provided. if FLAGS.nonvariant_site_tfrecord_path: nonvariant_generator = io_utils.read_shard_sorted_tfrecords( FLAGS.nonvariant_site_tfrecord_path, key=_get_contig_based_variant_sort_keyfn(contigs), proto=variants_pb2.Variant) with genomics_io.make_vcf_reader( FLAGS.outfile, use_index=False, include_likelihoods=True) as variant_reader: lessthanfn = _get_contig_based_lessthan(variant_reader.contigs) gvcf_variants = (_transform_to_gvcf_record(variant) for variant in variant_reader.iterate()) merged_variants = merge_variants_and_nonvariants( gvcf_variants, nonvariant_generator, lessthanfn) write_variants_to_vcf(contigs=contigs, variant_generator=merged_variants, output_vcf_path=FLAGS.gvcf_outfile, sample_name=sample_name, filters=FILTERS)