Exemple #1
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: call_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.
        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        if FLAGS.use_tpu:
            master = tf_utils.resolve_master(FLAGS.master, FLAGS.tpu_name,
                                             FLAGS.tpu_zone, FLAGS.gcp_project)
        else:
            master = ''

        model = modeling.get_model(FLAGS.model_name)
        call_variants(
            examples_filename=FLAGS.examples,
            checkpoint_path=FLAGS.checkpoint,
            model=model,
            execution_hardware=FLAGS.execution_hardware,
            output_file=FLAGS.outfile,
            max_batches=FLAGS.max_batches,
            batch_size=FLAGS.batch_size,
            master=master,
            use_tpu=FLAGS.use_tpu,
        )
Exemple #2
0
def main(_):
  """Run and handle retryable errors."""
  proto_utils.uses_fast_cpp_protos_or_die()

  logging_level.set_from_flag()

  if FLAGS.random_seed:
    logging.info('Setting tf.random_seed to %d', FLAGS.random_seed)
    tf.compat.v1.set_random_seed(FLAGS.random_seed)
  else:
    logging.info('Not setting tf.random_seed, will be assigned a random value')

  if FLAGS.kmp_blocktime:
    os.environ['KMP_BLOCKTIME'] = FLAGS.kmp_blocktime
    logging.info('Set KMP_BLOCKTIME to %s', os.environ['KMP_BLOCKTIME'])

  for _ in range(FLAGS.num_retries + 1):
    try:
      parse_and_run()
      return
    except tf.errors.UnavailableError as e:
      # An UnavailableError indicates a gRPC error, typically this is
      # retryable.
      logging.error('Caught UnavailableError %s; will retry.', e)
    except tf.errors.InternalError as e:
      # Retry on an InternalError.
      logging.error('Caught InternalError %s; will retry.', e)
def main(_):
    proto_utils.uses_fast_cpp_protos_or_die()

    if not FLAGS.dataset_config_pbtxt:
        logging.error('Need to specify --dataset_config_pbtxt')
    logging_level.set_from_flag()

    if FLAGS.kmp_blocktime:
        os.environ['KMP_BLOCKTIME'] = FLAGS.kmp_blocktime
        logging.info('Set KMP_BLOCKTIME to %s', os.environ['KMP_BLOCKTIME'])

    master = tf_utils.resolve_master(
        FLAGS.master, FLAGS.tpu_name, FLAGS.tpu_zone,
        FLAGS.gcp_project) if FLAGS.use_tpu else ''
    eval_loop(
        master=master,
        dataset_config_pbtxt=FLAGS.dataset_config_pbtxt,
        checkpoint_dir=FLAGS.checkpoint_dir,
        model_name=FLAGS.model_name,
        batch_size=FLAGS.batch_size,
        max_examples=FLAGS.max_examples,
        eval_name=FLAGS.eval_name,
        max_evaluations=FLAGS.max_evaluations,
        use_tpu=FLAGS.use_tpu,
    )
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: postprocess_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.
        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        with genomics_io.make_ref_reader(FLAGS.ref) as reader:
            contigs = reader.contigs
        paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile)
        with tempfile.NamedTemporaryFile() as temp:
            postprocess_variants_lib.process_single_sites_tfrecords(
                contigs, paths, temp.name)
            # Read one CallVariantsOutput record and extract the sample name from it.
            # Note that this assumes that all CallVariantsOutput protos in the infile
            # contain a single VariantCall within their constituent Variant proto, and
            # that the call_set_name is identical in each of the records.
            record = next(
                io_utils.read_tfrecords(
                    paths[0],
                    proto=deepvariant_pb2.CallVariantsOutput,
                    max_records=1))
            sample_name = _extract_single_sample_name(record)
            write_call_variants_output_to_vcf(
                contigs=contigs,
                input_sorted_tfrecord_path=temp.name,
                output_vcf_path=FLAGS.outfile,
                qual_filter=FLAGS.qual_filter,
                multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
                sample_name=sample_name)
Exemple #5
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: make_examples does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.

        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()
        hts_verbose.set(hts_verbose.htsLogLevel[FLAGS.hts_logging_level])

        # Set up options; may do I/O.
        options = default_options(add_flags=True, flags_obj=FLAGS)

        # Check arguments that apply to any mode.
        if not options.reference_filename:
            errors.log_and_raise('ref argument is required.',
                                 errors.CommandLineError)
        if not options.reads_filename:
            errors.log_and_raise('reads argument is required.',
                                 errors.CommandLineError)
        if not options.examples_filename:
            errors.log_and_raise('examples argument is required.',
                                 errors.CommandLineError)
        if options.n_cores != 1:
            errors.log_and_raise(
                'Currently only supports n_cores == 1 but got {}.'.format(
                    options.n_cores), errors.CommandLineError)

        # Check for argument issues specific to train mode.
        if in_training_mode(options):
            if not options.truth_variants_filename:
                errors.log_and_raise(
                    'truth_variants is required when in training mode.',
                    errors.CommandLineError)
            if not options.confident_regions_filename:
                errors.log_and_raise(
                    'confident_regions is required when in training mode.',
                    errors.CommandLineError)
            if options.gvcf_filename:
                errors.log_and_raise('gvcf is not allowed in training mode.',
                                     errors.CommandLineError)
        else:
            # Check for argument issues specific to calling mode.
            if options.variant_caller_options.sample_name == _UNKNOWN_SAMPLE:
                errors.log_and_raise(
                    'sample_name must be specified in calling mode.',
                    errors.CommandLineError)
            if options.variant_caller_options.gq_resolution < 1:
                errors.log_and_raise(
                    'gq_resolution must be a non-negative integer.',
                    errors.CommandLineError)

        # Run!
        make_examples_runner(options)
Exemple #6
0
def main(argv=()):
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: make_examples does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv)), errors.CommandLineError)
    del argv  # Unused.

    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()
    hts_verbose.set(hts_verbose.htsLogLevel[FLAGS.hts_logging_level])

    # Set up options; may do I/O.
    options = default_options(add_flags=True, flags_obj=FLAGS)

    # Check arguments that apply to any mode.
    if not options.reference_filename:
      errors.log_and_raise('ref argument is required.', errors.CommandLineError)
    if not options.reads_filename:
      errors.log_and_raise('reads argument is required.',
                           errors.CommandLineError)
    if not options.examples_filename:
      errors.log_and_raise('examples argument is required.',
                           errors.CommandLineError)
    if options.n_cores != 1:
      errors.log_and_raise(
          'Currently only supports n_cores == 1 but got {}.'.format(
              options.n_cores), errors.CommandLineError)

    # Check for argument issues specific to train mode.
    if in_training_mode(options):
      if not options.truth_variants_filename:
        errors.log_and_raise(
            'truth_variants is required when in training mode.',
            errors.CommandLineError)
      if not options.confident_regions_filename:
        errors.log_and_raise(
            'confident_regions is required when in training mode.',
            errors.CommandLineError)
      if options.gvcf_filename:
        errors.log_and_raise('gvcf is not allowed in training mode.',
                             errors.CommandLineError)
    else:
      # Check for argument issues specific to calling mode.
      if options.variant_caller_options.sample_name == _UNKNOWN_SAMPLE:
        errors.log_and_raise('sample_name must be specified in calling mode.',
                             errors.CommandLineError)
      if options.variant_caller_options.gq_resolution < 1:
        errors.log_and_raise('gq_resolution must be a non-negative integer.',
                             errors.CommandLineError)

    # Run!
    make_examples_runner(options)
Exemple #7
0
def main(_):
  """Run and handle retryable errors."""
  proto_utils.uses_fast_cpp_protos_or_die()

  logging_level.set_from_flag()
  for _ in range(FLAGS.num_retries + 1):
    try:
      parse_and_run()
      return
    except tf.errors.UnavailableError as e:
      # An UnavailableError indicates a gRPC error, typically this is
      # retryable.
      logging.error('Caught UnavailableError %s; will retry.', e)
    except tf.errors.InternalError as e:
      # Retry on an InternalError.
      logging.error('Caught InternalError %s; will retry.', e)
def main(_):
    """Run and handle retryable errors."""
    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()
    for _ in range(FLAGS.num_retries + 1):
        try:
            parse_and_run()
            return
        except tf.errors.UnavailableError as e:
            # An UnavailableError indicates a gRPC error, typically this is
            # retryable.
            logging.error('Caught UnavailableError %s; will retry.', e)
        except tf.errors.InternalError as e:
            # Retry on an InternalError.
            logging.error('Caught InternalError %s; will retry.', e)
Exemple #9
0
def main(_):
    proto_utils.uses_fast_cpp_protos_or_die()

    if not FLAGS.dataset_config_pbtxt:
        logging.error('Need to specify --dataset_config_pbtxt')
    logging_level.set_from_flag()
    eval_loop(
        master=FLAGS.master,
        dataset_config_pbtxt=FLAGS.dataset_config_pbtxt,
        checkpoint_dir=FLAGS.checkpoint_dir,
        model_name=FLAGS.model_name,
        batch_size=FLAGS.batch_size,
        moving_average_decay=FLAGS.moving_average_decay,
        max_examples=FLAGS.max_examples,
        eval_dir=FLAGS.eval_dir,
        max_evaluations=FLAGS.max_evaluations,
    )
Exemple #10
0
def main(_):
  proto_utils.uses_fast_cpp_protos_or_die()

  if not FLAGS.dataset_config_pbtxt:
    logging.error('Need to specify --dataset_config_pbtxt')
  logging_level.set_from_flag()
  eval_loop(
      master=FLAGS.master,
      dataset_config_pbtxt=FLAGS.dataset_config_pbtxt,
      checkpoint_dir=FLAGS.checkpoint_dir,
      model_name=FLAGS.model_name,
      batch_size=FLAGS.batch_size,
      moving_average_decay=FLAGS.moving_average_decay,
      max_examples=FLAGS.max_examples,
      eval_dir=FLAGS.eval_dir,
      max_evaluations=FLAGS.max_evaluations,
  )
Exemple #11
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: make_examples does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.

        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()
        hts_verbose.set(hts_verbose.htsLogLevel[FLAGS.hts_logging_level])

        # Set up options; may do I/O.
        options = default_options(add_flags=True, flags_obj=FLAGS)
        check_options_are_valid(options)

        # Run!
        make_examples_core.make_examples_runner(options)
Exemple #12
0
def main(argv=()):
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: call_variants does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv)), errors.CommandLineError)
    del argv  # Unused.
    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()

    model = modeling.get_model(FLAGS.model_name)
    call_variants(
        examples_filename=FLAGS.examples,
        checkpoint_path=FLAGS.checkpoint,
        model=model,
        execution_hardware=FLAGS.execution_hardware,
        output_file=FLAGS.outfile,
        max_batches=FLAGS.max_batches,
        batch_size=FLAGS.batch_size)
Exemple #13
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: call_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.
        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        # Give htslib authentication access to GCS.
        htslib_gcp_oauth.init()

        model = modeling.get_model(FLAGS.model_name)
        call_variants(examples_filename=FLAGS.examples,
                      checkpoint_path=FLAGS.checkpoint,
                      model=model,
                      execution_hardware=FLAGS.execution_hardware,
                      output_file=FLAGS.outfile,
                      max_batches=FLAGS.max_batches,
                      batch_size=FLAGS.batch_size)
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: postprocess_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.

        if (not FLAGS.nonvariant_site_tfrecord_path) != (
                not FLAGS.gvcf_outfile):
            errors.log_and_raise(
                'gVCF creation requires both nonvariant_site_tfrecord_path and '
                'gvcf_outfile flags to be set.', errors.CommandLineError)

        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        with genomics_io.make_ref_reader(FLAGS.ref) as reader:
            contigs = reader.contigs
        paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile)
        with tempfile.NamedTemporaryFile() as temp:
            postprocess_variants_lib.process_single_sites_tfrecords(
                contigs, paths, temp.name)
            # Read one CallVariantsOutput record and extract the sample name from it.
            # Note that this assumes that all CallVariantsOutput protos in the infile
            # contain a single VariantCall within their constituent Variant proto, and
            # that the call_set_name is identical in each of the records.
            record = next(
                io_utils.read_tfrecords(
                    paths[0],
                    proto=deepvariant_pb2.CallVariantsOutput,
                    max_records=1))
            sample_name = _extract_single_sample_name(record)
            independent_variants = _transform_call_variants_output_to_variants(
                input_sorted_tfrecord_path=temp.name,
                qual_filter=FLAGS.qual_filter,
                multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
                sample_name=sample_name)
            variant_generator = haplotypes.maybe_resolve_conflicting_variants(
                independent_variants)
            write_variants_to_vcf(contigs=contigs,
                                  variant_generator=variant_generator,
                                  output_vcf_path=FLAGS.outfile,
                                  sample_name=sample_name)

        # Also write out the gVCF file if it was provided.
        if FLAGS.nonvariant_site_tfrecord_path:
            nonvariant_generator = io_utils.read_shard_sorted_tfrecords(
                FLAGS.nonvariant_site_tfrecord_path,
                key=_get_contig_based_variant_sort_keyfn(contigs),
                proto=variants_pb2.Variant)
            with genomics_io.make_vcf_reader(
                    FLAGS.outfile, use_index=False,
                    include_likelihoods=True) as variant_reader:
                lessthanfn = _get_contig_based_lessthan(variant_reader.contigs)
                gvcf_variants = (_transform_to_gvcf_record(variant)
                                 for variant in variant_reader.iterate())
                merged_variants = merge_variants_and_nonvariants(
                    gvcf_variants, nonvariant_generator, lessthanfn)
                write_variants_to_vcf(contigs=contigs,
                                      variant_generator=merged_variants,
                                      output_vcf_path=FLAGS.gvcf_outfile,
                                      sample_name=sample_name,
                                      filters=FILTERS)
Exemple #15
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: postprocess_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.

        if (not FLAGS.nonvariant_site_tfrecord_path) != (
                not FLAGS.gvcf_outfile):
            errors.log_and_raise(
                'gVCF creation requires both nonvariant_site_tfrecord_path and '
                'gvcf_outfile flags to be set.', errors.CommandLineError)

        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        fasta_reader = fasta.IndexedFastaReader(FLAGS.ref,
                                                cache_size=_FASTA_CACHE_SIZE)
        contigs = fasta_reader.header.contigs
        paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile)
        # Read one CallVariantsOutput record and extract the sample name from it.
        # Note that this assumes that all CallVariantsOutput protos in the infile
        # contain a single VariantCall within their constituent Variant proto, and
        # that the call_set_name is identical in each of the records.
        record = tf_utils.get_one_example_from_examples_path(
            ','.join(paths), proto=deepvariant_pb2.CallVariantsOutput)
        if record is None:
            raise ValueError('Cannot find any records in {}'.format(
                ','.join(paths)))

        sample_name = _extract_single_sample_name(record)
        header = dv_vcf_constants.deepvariant_header(
            contigs=contigs, sample_names=[sample_name])
        with tempfile.NamedTemporaryFile() as temp:
            postprocess_variants_lib.process_single_sites_tfrecords(
                contigs, paths, temp.name)
            independent_variants = _transform_call_variants_output_to_variants(
                input_sorted_tfrecord_path=temp.name,
                qual_filter=FLAGS.qual_filter,
                multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
                sample_name=sample_name)
            variant_generator = haplotypes.maybe_resolve_conflicting_variants(
                independent_variants)
            write_variants_to_vcf(variant_generator=variant_generator,
                                  output_vcf_path=FLAGS.outfile,
                                  header=header)

        # Also write out the gVCF file if it was provided.
        if FLAGS.nonvariant_site_tfrecord_path:
            nonvariant_generator = io_utils.read_shard_sorted_tfrecords(
                FLAGS.nonvariant_site_tfrecord_path,
                key=_get_contig_based_variant_sort_keyfn(contigs),
                proto=variants_pb2.Variant)
            with vcf.VcfReader(FLAGS.outfile) as variant_reader:
                lessthanfn = _get_contig_based_lessthan(contigs)
                gvcf_variants = (_transform_to_gvcf_record(variant)
                                 for variant in variant_reader.iterate())
                merged_variants = merge_variants_and_nonvariants(
                    gvcf_variants, nonvariant_generator, lessthanfn,
                    fasta_reader)
                write_variants_to_vcf(variant_generator=merged_variants,
                                      output_vcf_path=FLAGS.gvcf_outfile,
                                      header=header)
def main(argv=()):
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: postprocess_variants does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv)), errors.CommandLineError)
    del argv  # Unused.

    if (not FLAGS.nonvariant_site_tfrecord_path) != (not FLAGS.gvcf_outfile):
      errors.log_and_raise(
          'gVCF creation requires both nonvariant_site_tfrecord_path and '
          'gvcf_outfile flags to be set.', errors.CommandLineError)

    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()

    fasta_reader = fasta.RefFastaReader(FLAGS.ref, cache_size=_FASTA_CACHE_SIZE)
    contigs = fasta_reader.header.contigs
    paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile)
    # Read one CallVariantsOutput record and extract the sample name from it.
    # Note that this assumes that all CallVariantsOutput protos in the infile
    # contain a single VariantCall within their constituent Variant proto, and
    # that the call_set_name is identical in each of the records.
    record = next(
        io_utils.read_tfrecords(
            paths[0], proto=deepvariant_pb2.CallVariantsOutput, max_records=1))
    sample_name = _extract_single_sample_name(record)
    header = dv_vcf_constants.deepvariant_header(
        contigs=contigs, sample_names=[sample_name])
    with tempfile.NamedTemporaryFile() as temp:
      postprocess_variants_lib.process_single_sites_tfrecords(
          contigs, paths, temp.name)
      independent_variants = _transform_call_variants_output_to_variants(
          input_sorted_tfrecord_path=temp.name,
          qual_filter=FLAGS.qual_filter,
          multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
          sample_name=sample_name)
      variant_generator = haplotypes.maybe_resolve_conflicting_variants(
          independent_variants)
      write_variants_to_vcf(
          variant_generator=variant_generator,
          output_vcf_path=FLAGS.outfile,
          header=header)

    # Also write out the gVCF file if it was provided.
    if FLAGS.nonvariant_site_tfrecord_path:
      nonvariant_generator = io_utils.read_shard_sorted_tfrecords(
          FLAGS.nonvariant_site_tfrecord_path,
          key=_get_contig_based_variant_sort_keyfn(contigs),
          proto=variants_pb2.Variant)
      with vcf.VcfReader(FLAGS.outfile, use_index=False) as variant_reader:
        lessthanfn = _get_contig_based_lessthan(contigs)
        gvcf_variants = (
            _transform_to_gvcf_record(variant)
            for variant in variant_reader.iterate())
        merged_variants = merge_variants_and_nonvariants(
            gvcf_variants, nonvariant_generator, lessthanfn, fasta_reader)
        write_variants_to_vcf(
            variant_generator=merged_variants,
            output_vcf_path=FLAGS.gvcf_outfile,
            header=header)
Exemple #17
0
def main(argv=()):
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: postprocess_variants does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv)), errors.CommandLineError)
    del argv  # Unused.

    if (not FLAGS.nonvariant_site_tfrecord_path) != (not FLAGS.gvcf_outfile):
      errors.log_and_raise(
          'gVCF creation requires both nonvariant_site_tfrecord_path and '
          'gvcf_outfile flags to be set.', errors.CommandLineError)

    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()

    fasta_reader = fasta.IndexedFastaReader(
        FLAGS.ref, cache_size=_FASTA_CACHE_SIZE)
    contigs = fasta_reader.header.contigs
    paths = sharded_file_utils.maybe_generate_sharded_filenames(FLAGS.infile)
    # Read one CallVariantsOutput record and extract the sample name from it.
    # Note that this assumes that all CallVariantsOutput protos in the infile
    # contain a single VariantCall within their constituent Variant proto, and
    # that the call_set_name is identical in each of the records.
    record = tf_utils.get_one_example_from_examples_path(
        ','.join(paths), proto=deepvariant_pb2.CallVariantsOutput)
    if record is None:
      logging.info('call_variants_output is empty. Writing out empty VCF.')
      sample_name = dv_constants.DEFAULT_SAMPLE_NAME
      if FLAGS.sample_name:
        logging.info(
            '--sample_name is set in postprocess_variant. Using %s as the '
            'sample name.', FLAGS.sample_name)
        sample_name = FLAGS.sample_name
      variant_generator = iter([])
    else:
      sample_name = _extract_single_sample_name(record)
      temp = tempfile.NamedTemporaryFile()
      start_time = time.time()
      postprocess_variants_lib.process_single_sites_tfrecords(
          contigs, paths, temp.name)
      logging.info('CVO sorting took %s minutes',
                   (time.time() - start_time) / 60)

      logging.info('Transforming call_variants_output to variants.')
      independent_variants = _transform_call_variants_output_to_variants(
          input_sorted_tfrecord_path=temp.name,
          qual_filter=FLAGS.qual_filter,
          multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
          sample_name=sample_name,
          group_variants=FLAGS.group_variants,
          use_multiallelic_model=FLAGS.use_multiallelic_model)
      variant_generator = haplotypes.maybe_resolve_conflicting_variants(
          independent_variants)

    header = dv_vcf_constants.deepvariant_header(
        contigs=contigs, sample_names=[sample_name])
    use_csi = _decide_to_use_csi(contigs)

    start_time = time.time()
    if not FLAGS.nonvariant_site_tfrecord_path:
      logging.info('Writing variants to VCF.')
      write_variants_to_vcf(
          variant_iterable=variant_generator,
          output_vcf_path=FLAGS.outfile,
          header=header)
      if FLAGS.outfile.endswith('.gz'):
        build_index(FLAGS.outfile, use_csi)
      logging.info('VCF creation took %s minutes',
                   (time.time() - start_time) / 60)
    else:
      logging.info('Merging and writing variants to VCF and gVCF.')
      lessthanfn = _get_contig_based_lessthan(contigs)
      with vcf.VcfWriter(
          FLAGS.outfile, header=header, round_qualities=True) as vcf_writer, \
          vcf.VcfWriter(
              FLAGS.gvcf_outfile, header=header, round_qualities=True) \
          as gvcf_writer:
        nonvariant_generator = tfrecord.read_shard_sorted_tfrecords(
            FLAGS.nonvariant_site_tfrecord_path,
            key=_get_contig_based_variant_sort_keyfn(contigs),
            proto=variants_pb2.Variant)
        merge_and_write_variants_and_nonvariants(variant_generator,
                                                 nonvariant_generator,
                                                 lessthanfn, fasta_reader,
                                                 vcf_writer, gvcf_writer)
      if FLAGS.outfile.endswith('.gz'):
        build_index(FLAGS.outfile, use_csi)
      if FLAGS.gvcf_outfile.endswith('.gz'):
        build_index(FLAGS.gvcf_outfile, use_csi)
      logging.info('Finished writing VCF and gVCF in %s minutes.',
                   (time.time() - start_time) / 60)
    if FLAGS.vcf_stats_report:
      outfile_base = _get_base_path(FLAGS.outfile)
      with vcf.VcfReader(FLAGS.outfile) as reader:
        vcf_stats.create_vcf_report(
            variants=reader.iterate(),
            output_basename=outfile_base,
            sample_name=sample_name,
            vcf_reader=reader)
    if record:
      temp.close()
def main(_):
    proto_utils.uses_fast_cpp_protos_or_die()

    if not FLAGS.dataset_config_pbtxt:
        logging.error('Need to specify --dataset_config_pbtxt')
    logging_level.set_from_flag()

    g = tf.Graph()
    with g.as_default():
        tf_global_step = slim.get_or_create_global_step()

        model = modeling.get_model(FLAGS.model_name)
        dataset = data_providers.get_dataset(FLAGS.dataset_config_pbtxt)
        print('Running evaluations on {} with model {}\n'.format(
            dataset, model))

        batch = data_providers.make_training_batches(
            dataset.get_slim_dataset(), model, FLAGS.batch_size)
        images, labels, encoded_truth_variants = batch
        endpoints = model.create(images,
                                 dataset.num_classes,
                                 is_training=False)
        predictions = tf.argmax(endpoints['Predictions'], 1)

        # For eval, explicitly add moving_mean and moving_variance variables to
        # the MOVING_AVERAGE_VARIABLES collection.
        variable_averages = tf.train.ExponentialMovingAverage(
            FLAGS.moving_average_decay, tf_global_step)

        for var in tf.get_collection('moving_vars'):
            tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var)
        for var in slim.get_model_variables():
            tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var)

        variables_to_restore = variable_averages.variables_to_restore()
        variables_to_restore[tf_global_step.op.name] = tf_global_step

        # Define the metrics:
        metrics = {
            'Accuracy': tf.contrib.metrics.streaming_accuracy,
            'Mean_absolute_error':
            tf.contrib.metrics.streaming_mean_absolute_error,
            'FPs': tf.contrib.metrics.streaming_false_positives,
            'FNs': tf.contrib.metrics.streaming_false_negatives,
        }

        def _make_selector(func):
            return select_variants_weights(func, encoded_truth_variants)

        selectors = {
            'All': None,
            'SNPs': _make_selector(variantutils.is_snp),
            'Indels': _make_selector(variantutils.is_indel),
            'Insertions': _make_selector(variantutils.has_insertion),
            'Deletions': _make_selector(variantutils.has_deletion),
            'BiAllelic': _make_selector(variantutils.is_biallelic),
            'MultiAllelic': _make_selector(variantutils.is_multiallelic),
            # These haven't proven particularly useful, but are commented out here
            # in case someone wants to do some more explorations.
            # 'HomRef': tf.equal(labels, 0),
            # 'Het': tf.equal(labels, 1),
            # 'HomAlt': tf.equal(labels, 2),
            # 'NonRef': tf.greater(labels, 0),
        }
        metrics = calling_metrics(metrics, selectors, predictions, labels)
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(
            metrics)

        for name, value in names_to_values.iteritems():
            slim.summaries.add_scalar_summary(value, name, print_summary=True)

        slim.evaluation.evaluation_loop(
            FLAGS.master,
            FLAGS.checkpoint_dir,
            logdir=FLAGS.eval_dir,
            num_evals=FLAGS.batches_per_eval_step,
            eval_op=names_to_updates.values(),
            variables_to_restore=variables_to_restore,
            max_number_of_evaluations=FLAGS.max_evaluations,
            eval_interval_secs=FLAGS.eval_interval_secs)