Exemple #1
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: call_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.
        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        if FLAGS.use_tpu:
            master = tf_utils.resolve_master(FLAGS.master, FLAGS.tpu_name,
                                             FLAGS.tpu_zone, FLAGS.gcp_project)
        else:
            master = ''

        model = modeling.get_model(FLAGS.model_name)
        call_variants(
            examples_filename=FLAGS.examples,
            checkpoint_path=FLAGS.checkpoint,
            model=model,
            execution_hardware=FLAGS.execution_hardware,
            output_file=FLAGS.outfile,
            max_batches=FLAGS.max_batches,
            batch_size=FLAGS.batch_size,
            master=master,
            use_tpu=FLAGS.use_tpu,
        )
def main(argv):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: vcf_stats_report does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv[1:])), errors.CommandLineError)

    with vcf.VcfReader(FLAGS.input_vcf) as reader:
        sample_names = reader.header.sample_names
        if len(sample_names) != 1:
            raise ValueError(
                'There must be exactly one sample in VCF: {}'.format(
                    FLAGS.input_vcf))
        sample_name = sample_names[0]

        # Missing GT causes error later while reading, so throw a clearer error here
        vcf_columns = [col.id for col in reader.header.formats]
        if 'GT' not in vcf_columns:
            errors.log_and_raise('ERROR: No GT sub-column in VCF.')

        if FLAGS.num_records == -1:
            variants = reader.iterate()
        else:
            variants = itertools.islice(reader.iterate(), FLAGS.num_records)

        vcf_stats.create_vcf_report(variants,
                                    output_basename=FLAGS.outfile_base,
                                    sample_name=sample_name,
                                    vcf_reader=reader)
def main(argv):
  logging.set_stderrthreshold('info')
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: show_examples.py does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv[1:])), errors.CommandLineError)
    run()
Exemple #4
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: make_examples does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.

        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()
        hts_verbose.set(hts_verbose.htsLogLevel[FLAGS.hts_logging_level])

        # Set up options; may do I/O.
        options = default_options(add_flags=True, flags_obj=FLAGS)
        check_options_are_valid(options)

        # Run!
        make_examples_core.make_examples_runner(options)
Exemple #5
0
def check_options_are_valid(options):
  """Checks that all the options chosen make sense together."""

  # Check for general flags (shared for DeepVariant and DeepTrio).
  make_examples_options.check_options_are_valid(
      options, main_sample_index=MAIN_SAMPLE_INDEX)

  child = options.sample_options[MAIN_SAMPLE_INDEX]

  # Sanity check the sample_names (specific to trio).
  if (child.variant_caller_options.sample_name == FLAGS.sample_name_parent1 or
      child.variant_caller_options.sample_name == FLAGS.sample_name_parent2):
    errors.log_and_raise(
        'The sample_name of the child is the same as one of '
        'the parents.', errors.CommandLineError)

  if options.pic_options.alt_aligned_pileup == 'rows':
    errors.log_and_raise('--alt_aligned_pileup="rows" cannot be used with '
                         'DeepTrio because the pileup images would become '
                         'too tall for InceptionV3.')
Exemple #6
0
def main(argv=()):
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: call_variants does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv)), errors.CommandLineError)
    del argv  # Unused.
    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()

    model = modeling.get_model(FLAGS.model_name)
    call_variants(
        examples_filename=FLAGS.examples,
        checkpoint_path=FLAGS.checkpoint,
        model=model,
        execution_hardware=FLAGS.execution_hardware,
        output_file=FLAGS.outfile,
        max_batches=FLAGS.max_batches,
        batch_size=FLAGS.batch_size)
Exemple #7
0
 def test_log_and_raise(self, msg, cls):
     with mock.patch.object(logging, 'error') as mock_logging:
         with self.assertRaisesRegexp(cls, msg):
             errors.log_and_raise(msg, cls)
         mock_logging.assert_called_once_with(msg)
def check_options_are_valid(options: deepvariant_pb2.MakeExamplesOptions,
                            main_sample_index: int):
    """Checks that all the options chosen make sense together."""

    # Check arguments that apply to any mode.
    if not options.reference_filename:
        errors.log_and_raise('ref argument is required.',
                             errors.CommandLineError)

    if not options.examples_filename:
        errors.log_and_raise('examples argument is required.',
                             errors.CommandLineError)
    if options.n_cores != 1:
        errors.log_and_raise(
            'Currently only supports n_cores == 1 but got {}.'.format(
                options.n_cores), errors.CommandLineError)

    main_sample = options.sample_options[main_sample_index]
    if not main_sample.reads_filenames:
        errors.log_and_raise('reads argument is required.',
                             errors.CommandLineError)

    if make_examples_core.in_training_mode(options):
        if not options.truth_variants_filename:
            errors.log_and_raise(
                'truth_variants is required when in training mode.',
                errors.CommandLineError)
        if not options.confident_regions_filename:
            if (options.variant_caller == deepvariant_pb2.MakeExamplesOptions.
                    VCF_CANDIDATE_IMPORTER):
                logging.info(
                    'Note: --confident_regions is optional with '
                    'vcf_candidate_importer. '
                    'You did not specify --confident_regions, which means '
                    'examples will be generated for the whole region.')
            else:
                errors.log_and_raise(
                    'confident_regions is required when in training mode.',
                    errors.CommandLineError)
        if options.gvcf_filename:
            errors.log_and_raise('gvcf is not allowed in training mode.',
                                 errors.CommandLineError)
        if (options.variant_caller
                == deepvariant_pb2.MakeExamplesOptions.VCF_CANDIDATE_IMPORTER
                and main_sample.proposed_variants_filename):
            errors.log_and_raise(
                '--proposed_variants* should not be used with '
                'vcf_candidate_importer in training mode. '
                'Use --truth_variants to pass in the candidates '
                'with correct labels for training.', errors.CommandLineError)
    else:
        # Check for argument issues specific to calling mode.
        for sample in options.sample_options:
            # If there are reads, there must be a sample name too.
            if sample.reads_filenames:
                if sample.variant_caller_options.sample_name == _UNKNOWN_SAMPLE:
                    errors.log_and_raise(
                        'sample_name must be specified for all samples in calling mode.',
                        errors.CommandLineError)
        if main_sample.variant_caller_options.gq_resolution < 1:
            errors.log_and_raise('gq_resolution must be a positive integer.',
                                 errors.CommandLineError)

        if options.truth_variants_filename:
            errors.log_and_raise(
                'Do not specify --truth_variants in calling mode.',
                errors.CommandLineError)

        if (options.variant_caller ==
                deepvariant_pb2.MakeExamplesOptions.VCF_CANDIDATE_IMPORTER):
            if any(o.proposed_variants_filename is None
                   for o in options.sample_options):
                errors.log_and_raise(
                    '--proposed_variants* is required with vcf_candidate_importer in '
                    'calling mode.', errors.CommandLineError)

    multiplier = FLAGS.vsc_min_fraction_multiplier
    if multiplier <= 0 or multiplier > 1.0:
        errors.log_and_raise(
            '--vsc_min_fraction_multiplier must be within (0-1] interval.',
            errors.CommandLineError)

    for sample in options.sample_options:
        if sample.pileup_height < 10 or sample.pileup_height > 100:
            errors.log_and_raise(
                'Pileup image heights must be between 10 and 100.')
def shared_flags_to_options(
        add_flags, flags_obj, samples_in_order, sample_role_to_train,
        main_sample_index) -> deepvariant_pb2.MakeExamplesOptions:
    """Creates options from flags that are shared, along with given samples."""
    read_reqs = reads_pb2.ReadRequirements(
        keep_duplicates=flags_obj.keep_duplicates,
        keep_supplementary_alignments=flags_obj.keep_supplementary_alignments,
        keep_secondary_alignments=flags_obj.keep_secondary_alignments,
        min_base_quality=flags_obj.min_base_quality,
        min_mapping_quality=flags_obj.min_mapping_quality,
        min_base_quality_mode=reads_pb2.ReadRequirements.ENFORCED_BY_CLIENT)

    logging.vlog(3, 'ReadRequirements are: %s', read_reqs)

    pic_options = pileup_image.default_options(read_requirements=read_reqs)

    allele_counter_options = deepvariant_pb2.AlleleCounterOptions(
        partition_size=flags_obj.partition_size,
        read_requirements=read_reqs,
        track_ref_reads=flags_obj.track_ref_reads,
        normalize_reads=flags_obj.normalize_reads,
        keep_legacy_behavior=flags_obj.keep_legacy_allele_counter_behavior)

    options = deepvariant_pb2.MakeExamplesOptions(
        exclude_contigs=exclude_contigs.EXCLUDED_HUMAN_CONTIGS,
        # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'.
        random_seed=609314161,
        # # Not specified by default: calling_regions = 3;
        read_requirements=read_reqs,
        allele_counter_options=allele_counter_options,
        pic_options=pic_options,
        n_cores=1,
        task_id=0,
        num_shards=0,
        min_shared_contigs_basepairs=0.9,
        sample_options=samples_in_order,
        main_sample_index=main_sample_index,
        sample_role_to_train=sample_role_to_train)

    if add_flags:
        options.mode = make_examples_core.parse_proto_enum_flag(
            deepvariant_pb2.MakeExamplesOptions.Mode, flags_obj.mode.upper())

        options.labeler_algorithm = make_examples_core.parse_proto_enum_flag(
            deepvariant_pb2.MakeExamplesOptions.LabelerAlgorithm,
            flags_obj.labeler_algorithm.upper())

        options.variant_caller = make_examples_core.parse_proto_enum_flag(
            deepvariant_pb2.MakeExamplesOptions.VariantCaller,
            flags_obj.variant_caller.upper())

        if flags_obj.ref:
            options.reference_filename = flags_obj.ref
        if flags_obj.confident_regions:
            options.confident_regions_filename = flags_obj.confident_regions
        if flags_obj.truth_variants:
            options.truth_variants_filename = flags_obj.truth_variants
        if flags_obj.sequencing_type:
            options.pic_options.sequencing_type = make_examples_core.parse_proto_enum_flag(
                deepvariant_pb2.PileupImageOptions.SequencingType,
                flags_obj.sequencing_type)

        if flags_obj.channels:
            channel_set = flags_obj.channels.split(',')
            for channel in channel_set:
                if channel and channel not in dv_constants.OPT_CHANNELS:
                    err_msg = 'Channel "{}" is not one of the available opt channels: {}'.format(
                        channel, ', '.join(dv_constants.OPT_CHANNELS))
                    errors.log_and_raise(err_msg, errors.CommandLineError)
            options.pic_options.channels[:] = channel_set
            options.pic_options.num_channels += len(channel_set)

        if flags_obj.multi_allelic_mode:
            multi_allelic_enum = {
                'include_het_alt_images':
                deepvariant_pb2.PileupImageOptions.ADD_HET_ALT_IMAGES,
                'exclude_het_alt_images':
                deepvariant_pb2.PileupImageOptions.NO_HET_ALT_IMAGES,
            }[flags_obj.multi_allelic_mode]
            options.pic_options.multi_allelic_mode = multi_allelic_enum

        if flags_obj.pileup_image_width:
            options.pic_options.width = flags_obj.pileup_image_width

        options.pic_options.alt_aligned_pileup = flags_obj.alt_aligned_pileup
        options.pic_options.types_to_alt_align = flags_obj.types_to_alt_align

        if flags_obj.add_supporting_other_alt_color:
            options.pic_options.other_allele_supporting_read_alpha = 0.3

        if flags_obj.select_variant_types:
            options.select_variant_types[:] = flags_obj.select_variant_types.split(
            )
            for svt in options.select_variant_types:
                if svt not in make_examples_core.VARIANT_TYPE_SELECTORS:
                    errors.log_and_raise(
                        'Select variant type {} not recognized. Allowed values are {}'
                        .format(
                            svt, ', '.join(
                                make_examples_core.VARIANT_TYPE_SELECTORS)),
                        errors.CommandLineError)

        num_shards, examples, candidates, gvcf, runtime_by_region = (
            sharded_file_utils.resolve_filespecs(
                flags_obj.task, flags_obj.examples or '', flags_obj.candidates
                or '', flags_obj.gvcf or '', flags_obj.runtime_by_region
                or ''))
        options.examples_filename = examples
        options.candidates_filename = candidates
        options.gvcf_filename = gvcf
        options.include_med_dp = flags_obj.include_med_dp
        options.task_id = flags_obj.task
        options.num_shards = num_shards
        options.runtime_by_region = runtime_by_region

        options.parse_sam_aux_fields = make_examples_core.resolve_sam_aux_fields(
            flags_obj=flags_obj)
        if flags_obj.aux_fields_to_keep:
            options.aux_fields_to_keep[:] = flags_obj.aux_fields_to_keep.split(
                ',')
        else:
            options.aux_fields_to_keep = None
        options.use_original_quality_scores = flags_obj.use_original_quality_scores

        if flags_obj.add_hp_channel:
            options.pic_options.num_channels += 1
            options.pic_options.add_hp_channel = True

        if flags_obj.hp_tag_for_assembly_polishing < 0:
            errors.log_and_raise(
                '--hp_tag_for_assembly_polishing has to be set to a positive int.',
                errors.CommandLineError)
        if (flags_obj.hp_tag_for_assembly_polishing > 0
                and not flags_obj.sort_by_haplotypes):
            errors.log_and_raise(
                '--hp_tag_for_assembly_polishing requires --sort_by_haplotypes to be '
                'set ', errors.CommandLineError)

        options.pic_options.sort_by_haplotypes = flags_obj.sort_by_haplotypes
        options.pic_options.hp_tag_for_assembly_polishing = flags_obj.hp_tag_for_assembly_polishing

        if flags_obj.write_run_info:
            options.run_info_filename = examples + _RUN_INFO_FILE_EXTENSION

        options.calling_regions.extend(
            make_examples_core.parse_regions_flag(flags_obj.regions))
        options.exclude_calling_regions.extend(
            make_examples_core.parse_regions_flag(flags_obj.exclude_regions))

        options.realigner_enabled = flags_obj.realign_reads
        options.realigner_options.CopyFrom(
            realigner.realigner_config(flags_obj))

        if (options.mode == deepvariant_pb2.MakeExamplesOptions.TRAINING
                and flags_obj.training_random_emit_ref_sites != NO_RANDOM_REF):
            options.sample_options[
                main_sample_index].variant_caller_options.fraction_reference_sites_to_emit = (
                    flags_obj.training_random_emit_ref_sites)

        if (flags_obj.use_allele_frequency and not flags_obj.population_vcfs):
            errors.log_and_raise(
                'If use_allele_frequency is set then population_vcfs '
                'must be provided.', errors.CommandLineError)
        if flags_obj.use_allele_frequency:
            options.use_allele_frequency = flags_obj.use_allele_frequency
            options.pic_options.num_channels += 1
            options.pic_options.use_allele_frequency = True
        if flags_obj.population_vcfs:
            options.population_vcf_filenames.extend(
                re.split(',| ', flags_obj.population_vcfs))
        options.max_reads_per_partition = flags_obj.max_reads_per_partition
        options.use_ref_for_cram = flags_obj.use_ref_for_cram
        options.hts_block_size = flags_obj.hts_block_size
        options.logging_every_n_candidates = flags_obj.logging_every_n_candidates
        options.customized_classes_labeler_classes_list = flags_obj.customized_classes_labeler_classes_list
        options.customized_classes_labeler_info_field_name = flags_obj.customized_classes_labeler_info_field_name

    return options
Exemple #10
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: postprocess_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.

        if (not FLAGS.nonvariant_site_tfrecord_path) != (
                not FLAGS.gvcf_outfile):
            errors.log_and_raise(
                'gVCF creation requires both nonvariant_site_tfrecord_path and '
                'gvcf_outfile flags to be set.', errors.CommandLineError)

        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        fasta_reader = fasta.IndexedFastaReader(FLAGS.ref,
                                                cache_size=_FASTA_CACHE_SIZE)
        contigs = fasta_reader.header.contigs
        paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile)
        # Read one CallVariantsOutput record and extract the sample name from it.
        # Note that this assumes that all CallVariantsOutput protos in the infile
        # contain a single VariantCall within their constituent Variant proto, and
        # that the call_set_name is identical in each of the records.
        record = tf_utils.get_one_example_from_examples_path(
            ','.join(paths), proto=deepvariant_pb2.CallVariantsOutput)
        if record is None:
            raise ValueError('Cannot find any records in {}'.format(
                ','.join(paths)))

        sample_name = _extract_single_sample_name(record)
        header = dv_vcf_constants.deepvariant_header(
            contigs=contigs, sample_names=[sample_name])
        with tempfile.NamedTemporaryFile() as temp:
            postprocess_variants_lib.process_single_sites_tfrecords(
                contigs, paths, temp.name)
            independent_variants = _transform_call_variants_output_to_variants(
                input_sorted_tfrecord_path=temp.name,
                qual_filter=FLAGS.qual_filter,
                multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
                sample_name=sample_name)
            variant_generator = haplotypes.maybe_resolve_conflicting_variants(
                independent_variants)
            write_variants_to_vcf(variant_generator=variant_generator,
                                  output_vcf_path=FLAGS.outfile,
                                  header=header)

        # Also write out the gVCF file if it was provided.
        if FLAGS.nonvariant_site_tfrecord_path:
            nonvariant_generator = io_utils.read_shard_sorted_tfrecords(
                FLAGS.nonvariant_site_tfrecord_path,
                key=_get_contig_based_variant_sort_keyfn(contigs),
                proto=variants_pb2.Variant)
            with vcf.VcfReader(FLAGS.outfile) as variant_reader:
                lessthanfn = _get_contig_based_lessthan(contigs)
                gvcf_variants = (_transform_to_gvcf_record(variant)
                                 for variant in variant_reader.iterate())
                merged_variants = merge_variants_and_nonvariants(
                    gvcf_variants, nonvariant_generator, lessthanfn,
                    fasta_reader)
                write_variants_to_vcf(variant_generator=merged_variants,
                                      output_vcf_path=FLAGS.gvcf_outfile,
                                      header=header)
Exemple #11
0
def main(argv=()):
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: postprocess_variants does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv)), errors.CommandLineError)
    del argv  # Unused.

    if (not FLAGS.nonvariant_site_tfrecord_path) != (not FLAGS.gvcf_outfile):
      errors.log_and_raise(
          'gVCF creation requires both nonvariant_site_tfrecord_path and '
          'gvcf_outfile flags to be set.', errors.CommandLineError)

    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()

    fasta_reader = fasta.IndexedFastaReader(
        FLAGS.ref, cache_size=_FASTA_CACHE_SIZE)
    contigs = fasta_reader.header.contigs
    paths = sharded_file_utils.maybe_generate_sharded_filenames(FLAGS.infile)
    # Read one CallVariantsOutput record and extract the sample name from it.
    # Note that this assumes that all CallVariantsOutput protos in the infile
    # contain a single VariantCall within their constituent Variant proto, and
    # that the call_set_name is identical in each of the records.
    record = tf_utils.get_one_example_from_examples_path(
        ','.join(paths), proto=deepvariant_pb2.CallVariantsOutput)
    if record is None:
      logging.info('call_variants_output is empty. Writing out empty VCF.')
      sample_name = dv_constants.DEFAULT_SAMPLE_NAME
      if FLAGS.sample_name:
        logging.info(
            '--sample_name is set in postprocess_variant. Using %s as the '
            'sample name.', FLAGS.sample_name)
        sample_name = FLAGS.sample_name
      variant_generator = iter([])
    else:
      sample_name = _extract_single_sample_name(record)
      temp = tempfile.NamedTemporaryFile()
      start_time = time.time()
      postprocess_variants_lib.process_single_sites_tfrecords(
          contigs, paths, temp.name)
      logging.info('CVO sorting took %s minutes',
                   (time.time() - start_time) / 60)

      logging.info('Transforming call_variants_output to variants.')
      independent_variants = _transform_call_variants_output_to_variants(
          input_sorted_tfrecord_path=temp.name,
          qual_filter=FLAGS.qual_filter,
          multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
          sample_name=sample_name,
          group_variants=FLAGS.group_variants,
          use_multiallelic_model=FLAGS.use_multiallelic_model)
      variant_generator = haplotypes.maybe_resolve_conflicting_variants(
          independent_variants)

    header = dv_vcf_constants.deepvariant_header(
        contigs=contigs, sample_names=[sample_name])
    use_csi = _decide_to_use_csi(contigs)

    start_time = time.time()
    if not FLAGS.nonvariant_site_tfrecord_path:
      logging.info('Writing variants to VCF.')
      write_variants_to_vcf(
          variant_iterable=variant_generator,
          output_vcf_path=FLAGS.outfile,
          header=header)
      if FLAGS.outfile.endswith('.gz'):
        build_index(FLAGS.outfile, use_csi)
      logging.info('VCF creation took %s minutes',
                   (time.time() - start_time) / 60)
    else:
      logging.info('Merging and writing variants to VCF and gVCF.')
      lessthanfn = _get_contig_based_lessthan(contigs)
      with vcf.VcfWriter(
          FLAGS.outfile, header=header, round_qualities=True) as vcf_writer, \
          vcf.VcfWriter(
              FLAGS.gvcf_outfile, header=header, round_qualities=True) \
          as gvcf_writer:
        nonvariant_generator = tfrecord.read_shard_sorted_tfrecords(
            FLAGS.nonvariant_site_tfrecord_path,
            key=_get_contig_based_variant_sort_keyfn(contigs),
            proto=variants_pb2.Variant)
        merge_and_write_variants_and_nonvariants(variant_generator,
                                                 nonvariant_generator,
                                                 lessthanfn, fasta_reader,
                                                 vcf_writer, gvcf_writer)
      if FLAGS.outfile.endswith('.gz'):
        build_index(FLAGS.outfile, use_csi)
      if FLAGS.gvcf_outfile.endswith('.gz'):
        build_index(FLAGS.gvcf_outfile, use_csi)
      logging.info('Finished writing VCF and gVCF in %s minutes.',
                   (time.time() - start_time) / 60)
    if FLAGS.vcf_stats_report:
      outfile_base = _get_base_path(FLAGS.outfile)
      with vcf.VcfReader(FLAGS.outfile) as reader:
        vcf_stats.create_vcf_report(
            variants=reader.iterate(),
            output_basename=outfile_base,
            sample_name=sample_name,
            vcf_reader=reader)
    if record:
      temp.close()
def main(argv=()):
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: make_examples does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv)), errors.CommandLineError)
    del argv  # Unused.

    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()
    hts_verbose.set(hts_verbose.htsLogLevel[FLAGS.hts_logging_level])

    # Set up options; may do I/O.
    options = default_options(add_flags=True, flags_obj=FLAGS)

    # Check arguments that apply to any mode.
    if not options.reference_filename:
      errors.log_and_raise('ref argument is required.', errors.CommandLineError)
    if not options.reads_filename:
      errors.log_and_raise('reads argument is required.',
                           errors.CommandLineError)
    if not options.examples_filename:
      errors.log_and_raise('examples argument is required.',
                           errors.CommandLineError)
    if options.n_cores != 1:
      errors.log_and_raise(
          'Currently only supports n_cores == 1 but got {}.'.format(
              options.n_cores), errors.CommandLineError)

    # Check for argument issues specific to train mode.
    if in_training_mode(options):
      if not options.truth_variants_filename:
        errors.log_and_raise(
            'truth_variants is required when in training mode.',
            errors.CommandLineError)
      if not options.confident_regions_filename:
        errors.log_and_raise(
            'confident_regions is required when in training mode.',
            errors.CommandLineError)
      if options.gvcf_filename:
        errors.log_and_raise('gvcf is not allowed in training mode.',
                             errors.CommandLineError)
    else:
      # Check for argument issues specific to calling mode.
      if options.variant_caller_options.sample_name == _UNKNOWN_SAMPLE:
        errors.log_and_raise('sample_name must be specified in calling mode.',
                             errors.CommandLineError)
      if options.variant_caller_options.gq_resolution < 1:
        errors.log_and_raise('gq_resolution must be a non-negative integer.',
                             errors.CommandLineError)

    # Run!
    make_examples_runner(options)
def main(argv=()):
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: postprocess_variants does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv)), errors.CommandLineError)
    del argv  # Unused.

    if (not FLAGS.nonvariant_site_tfrecord_path) != (not FLAGS.gvcf_outfile):
      errors.log_and_raise(
          'gVCF creation requires both nonvariant_site_tfrecord_path and '
          'gvcf_outfile flags to be set.', errors.CommandLineError)

    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()

    fasta_reader = fasta.RefFastaReader(FLAGS.ref, cache_size=_FASTA_CACHE_SIZE)
    contigs = fasta_reader.header.contigs
    paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile)
    # Read one CallVariantsOutput record and extract the sample name from it.
    # Note that this assumes that all CallVariantsOutput protos in the infile
    # contain a single VariantCall within their constituent Variant proto, and
    # that the call_set_name is identical in each of the records.
    record = next(
        io_utils.read_tfrecords(
            paths[0], proto=deepvariant_pb2.CallVariantsOutput, max_records=1))
    sample_name = _extract_single_sample_name(record)
    header = dv_vcf_constants.deepvariant_header(
        contigs=contigs, sample_names=[sample_name])
    with tempfile.NamedTemporaryFile() as temp:
      postprocess_variants_lib.process_single_sites_tfrecords(
          contigs, paths, temp.name)
      independent_variants = _transform_call_variants_output_to_variants(
          input_sorted_tfrecord_path=temp.name,
          qual_filter=FLAGS.qual_filter,
          multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
          sample_name=sample_name)
      variant_generator = haplotypes.maybe_resolve_conflicting_variants(
          independent_variants)
      write_variants_to_vcf(
          variant_generator=variant_generator,
          output_vcf_path=FLAGS.outfile,
          header=header)

    # Also write out the gVCF file if it was provided.
    if FLAGS.nonvariant_site_tfrecord_path:
      nonvariant_generator = io_utils.read_shard_sorted_tfrecords(
          FLAGS.nonvariant_site_tfrecord_path,
          key=_get_contig_based_variant_sort_keyfn(contigs),
          proto=variants_pb2.Variant)
      with vcf.VcfReader(FLAGS.outfile, use_index=False) as variant_reader:
        lessthanfn = _get_contig_based_lessthan(contigs)
        gvcf_variants = (
            _transform_to_gvcf_record(variant)
            for variant in variant_reader.iterate())
        merged_variants = merge_variants_and_nonvariants(
            gvcf_variants, nonvariant_generator, lessthanfn, fasta_reader)
        write_variants_to_vcf(
            variant_generator=merged_variants,
            output_vcf_path=FLAGS.gvcf_outfile,
            header=header)
Exemple #14
0
def trio_samples_from_flags(add_flags=True, flags_obj=None):
  """Collects sample-related options into a list of samples."""
  # Sample-specific options.
  child_sample_name = make_examples_core.assign_sample_name(
      sample_name_flag=flags_obj.sample_name, reads_filenames=flags_obj.reads)

  parent1_sample_name = make_examples_core.assign_sample_name(
      sample_name_flag=flags_obj.sample_name_parent1,
      reads_filenames=flags_obj.reads_parent1)

  parent2_sample_name = make_examples_core.assign_sample_name(
      sample_name_flag=flags_obj.sample_name_parent2,
      reads_filenames=flags_obj.reads_parent2)

  parent1_options = deepvariant_pb2.SampleOptions(
      role='parent1',
      name=parent1_sample_name,
      variant_caller_options=make_examples_core.make_vc_options(
          sample_name=parent1_sample_name, flags_obj=flags_obj),
      order=[0, 1, 2],
      pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_PARENT)
  child_options = deepvariant_pb2.SampleOptions(
      role='child',
      name=child_sample_name,
      variant_caller_options=make_examples_core.make_vc_options(
          sample_name=child_sample_name, flags_obj=flags_obj),
      order=[0, 1, 2],
      pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_CHILD)
  parent2_options = deepvariant_pb2.SampleOptions(
      role='parent2',
      name=parent2_sample_name,
      variant_caller_options=make_examples_core.make_vc_options(
          sample_name=parent2_sample_name, flags_obj=flags_obj),
      # Swap the two parents when calling on parent2.
      order=[2, 1, 0],
      pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_PARENT)

  # If --sample_name_to_train is not set, train on the child.
  # This is for backward compatibility.
  sample_role_to_train = 'child'

  if add_flags:
    if flags_obj.reads:
      child_options.reads_filenames.extend(flags_obj.reads.split(','))
    if flags_obj.reads_parent1:
      parent1_options.reads_filenames.extend(flags_obj.reads_parent1.split(','))
    if flags_obj.reads_parent2:
      parent2_options.reads_filenames.extend(flags_obj.reads_parent2.split(','))

    if flags_obj.proposed_variants_child:
      child_options.proposed_variants_filename = flags_obj.proposed_variants_child
    if flags_obj.proposed_variants_parent1:
      parent1_options.proposed_variants_filename = flags_obj.proposed_variants_parent1
    if flags_obj.proposed_variants_parent2:
      parent2_options.proposed_variants_filename = flags_obj.proposed_variants_parent2

    if flags_obj.downsample_fraction_child != NO_DOWNSAMPLING:
      child_options.downsample_fraction = flags_obj.downsample_fraction_child
    if flags_obj.downsample_fraction_parents != NO_DOWNSAMPLING:
      parent1_options.downsample_fraction = flags_obj.downsample_fraction_parents
      parent2_options.downsample_fraction = flags_obj.downsample_fraction_parents

    if flags_obj.pileup_image_height_child:
      child_options.pileup_height = flags_obj.pileup_image_height_child
    if flags_obj.pileup_image_height_parent:
      parent1_options.pileup_height = parent2_options.pileup_height = flags_obj.pileup_image_height_parent

    if flags_obj.sample_name_to_train:
      if flags_obj.sample_name_to_train == flags_obj.sample_name:
        sample_role_to_train = child_options.role
      elif flags_obj.sample_name_to_train == flags_obj.sample_name_parent1:
        sample_role_to_train = parent1_options.role
      else:
        errors.log_and_raise(
            '--sample_name_to_train must match either --sample_name or '
            '--sample_name_parent1, or it can be unset to default to '
            '--sample_name.', errors.CommandLineError)

  # Ordering here determines the default order of samples, and when a sample
  # above has a custom .order, then this is the list those indices refer to.
  samples_in_order = [parent1_options, child_options, parent2_options]
  return samples_in_order, sample_role_to_train
Exemple #15
0
 def test_log_and_raise(self, msg, cls):
   with mock.patch.object(logging, 'error') as mock_logging:
     with self.assertRaisesRegexp(cls, msg):
       errors.log_and_raise(msg, cls)
     mock_logging.assert_called_once_with(msg)