def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: postprocess_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.
        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        with genomics_io.make_ref_reader(FLAGS.ref) as reader:
            contigs = reader.contigs
        paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile)
        with tempfile.NamedTemporaryFile() as temp:
            postprocess_variants_lib.process_single_sites_tfrecords(
                contigs, paths, temp.name)
            # Read one CallVariantsOutput record and extract the sample name from it.
            # Note that this assumes that all CallVariantsOutput protos in the infile
            # contain a single VariantCall within their constituent Variant proto, and
            # that the call_set_name is identical in each of the records.
            record = next(
                io_utils.read_tfrecords(
                    paths[0],
                    proto=deepvariant_pb2.CallVariantsOutput,
                    max_records=1))
            sample_name = _extract_single_sample_name(record)
            write_call_variants_output_to_vcf(
                contigs=contigs,
                input_sorted_tfrecord_path=temp.name,
                output_vcf_path=FLAGS.outfile,
                qual_filter=FLAGS.qual_filter,
                multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
                sample_name=sample_name)
Ejemplo n.º 2
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: make_examples does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.

        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()
        hts_verbose.set(hts_verbose.htsLogLevel[FLAGS.hts_logging_level])

        # Give htslib authentication access to GCS.
        htslib_gcp_oauth.init()

        # Set up options; may do I/O.
        options = default_options(add_flags=True, flags=FLAGS)

        # Check arguments that apply to any mode.
        if not options.reference_filename:
            errors.log_and_raise('ref argument is required.',
                                 errors.CommandLineError)
        if not options.reads_filename:
            errors.log_and_raise('reads argument is required.',
                                 errors.CommandLineError)
        if not options.examples_filename:
            errors.log_and_raise('examples argument is required.',
                                 errors.CommandLineError)
        if options.n_cores != 1:
            errors.log_and_raise(
                'Currently only supports n_cores == 1 but got {}.'.format(
                    options.n_cores), errors.CommandLineError)

        # Check for argument issues specific to train mode.
        if in_training_mode(options):
            if not options.truth_variants_filename:
                errors.log_and_raise(
                    'truth_variants is required when in training mode.',
                    errors.CommandLineError)
            if not options.confident_regions_filename:
                errors.log_and_raise(
                    'confident_regions is required when in training mode.',
                    errors.CommandLineError)
            if options.gvcf_filename:
                errors.log_and_raise('gvcf is not allowed in training mode.',
                                     errors.CommandLineError)
        else:
            # Check for argument issues specific to calling mode.
            if options.variant_caller_options.sample_name == _UNKNOWN_SAMPLE:
                errors.log_and_raise(
                    'sample_name must be specified in calling mode.',
                    errors.CommandLineError)

        # Run!
        make_examples_runner(options)
Ejemplo n.º 3
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: call_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.
        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        # Give htslib authentication access to GCS.
        htslib_gcp_oauth.init()

        model = modeling.get_model(FLAGS.model_name)
        call_variants(examples_filename=FLAGS.examples,
                      checkpoint_path=FLAGS.checkpoint,
                      model=model,
                      execution_hardware=FLAGS.execution_hardware,
                      output_file=FLAGS.outfile,
                      max_batches=FLAGS.max_batches,
                      batch_size=FLAGS.batch_size)
Ejemplo n.º 4
0
 def test_clean_commandline_error_exit_clean_exit(self, exc_type,
                                                  exit_value):
     with mock.patch.object(sys, 'exit') as mock_exit:
         with errors.clean_commandline_error_exit(exit_value=exit_value):
             raise exc_type()
     mock_exit.assert_called_once_with(exit_value)
Ejemplo n.º 5
0
 def test_clean_commandline_error_exit_raise_non_allowed(
         self, exc_type, msg):
     with self.assertRaisesRegexp(exc_type, msg):
         with errors.clean_commandline_error_exit():
             raise exc_type(msg)
Ejemplo n.º 6
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: postprocess_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.

        if (not FLAGS.nonvariant_site_tfrecord_path) != (
                not FLAGS.gvcf_outfile):
            errors.log_and_raise(
                'gVCF creation requires both nonvariant_site_tfrecord_path and '
                'gvcf_outfile flags to be set.', errors.CommandLineError)

        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        with genomics_io.make_ref_reader(FLAGS.ref) as reader:
            contigs = reader.contigs
        paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile)
        with tempfile.NamedTemporaryFile() as temp:
            postprocess_variants_lib.process_single_sites_tfrecords(
                contigs, paths, temp.name)
            # Read one CallVariantsOutput record and extract the sample name from it.
            # Note that this assumes that all CallVariantsOutput protos in the infile
            # contain a single VariantCall within their constituent Variant proto, and
            # that the call_set_name is identical in each of the records.
            record = next(
                io_utils.read_tfrecords(
                    paths[0],
                    proto=deepvariant_pb2.CallVariantsOutput,
                    max_records=1))
            sample_name = _extract_single_sample_name(record)
            independent_variants = _transform_call_variants_output_to_variants(
                input_sorted_tfrecord_path=temp.name,
                qual_filter=FLAGS.qual_filter,
                multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
                sample_name=sample_name)
            variant_generator = haplotypes.maybe_resolve_conflicting_variants(
                independent_variants)
            write_variants_to_vcf(contigs=contigs,
                                  variant_generator=variant_generator,
                                  output_vcf_path=FLAGS.outfile,
                                  sample_name=sample_name)

        # Also write out the gVCF file if it was provided.
        if FLAGS.nonvariant_site_tfrecord_path:
            nonvariant_generator = io_utils.read_shard_sorted_tfrecords(
                FLAGS.nonvariant_site_tfrecord_path,
                key=_get_contig_based_variant_sort_keyfn(contigs),
                proto=variants_pb2.Variant)
            with genomics_io.make_vcf_reader(
                    FLAGS.outfile, use_index=False,
                    include_likelihoods=True) as variant_reader:
                lessthanfn = _get_contig_based_lessthan(variant_reader.contigs)
                gvcf_variants = (_transform_to_gvcf_record(variant)
                                 for variant in variant_reader.iterate())
                merged_variants = merge_variants_and_nonvariants(
                    gvcf_variants, nonvariant_generator, lessthanfn)
                write_variants_to_vcf(contigs=contigs,
                                      variant_generator=merged_variants,
                                      output_vcf_path=FLAGS.gvcf_outfile,
                                      sample_name=sample_name,
                                      filters=FILTERS)