def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: call_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() if FLAGS.use_tpu: master = tf_utils.resolve_master(FLAGS.master, FLAGS.tpu_name, FLAGS.tpu_zone, FLAGS.gcp_project) else: master = '' model = modeling.get_model(FLAGS.model_name) call_variants( examples_filename=FLAGS.examples, checkpoint_path=FLAGS.checkpoint, model=model, execution_hardware=FLAGS.execution_hardware, output_file=FLAGS.outfile, max_batches=FLAGS.max_batches, batch_size=FLAGS.batch_size, master=master, use_tpu=FLAGS.use_tpu, )
def main(argv): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: vcf_stats_report does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv[1:])), errors.CommandLineError) with vcf.VcfReader(FLAGS.input_vcf) as reader: sample_names = reader.header.sample_names if len(sample_names) != 1: raise ValueError( 'There must be exactly one sample in VCF: {}'.format( FLAGS.input_vcf)) sample_name = sample_names[0] # Missing GT causes error later while reading, so throw a clearer error here vcf_columns = [col.id for col in reader.header.formats] if 'GT' not in vcf_columns: errors.log_and_raise('ERROR: No GT sub-column in VCF.') if FLAGS.num_records == -1: variants = reader.iterate() else: variants = itertools.islice(reader.iterate(), FLAGS.num_records) vcf_stats.create_vcf_report(variants, output_basename=FLAGS.outfile_base, sample_name=sample_name, vcf_reader=reader)
def main(argv): logging.set_stderrthreshold('info') with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: show_examples.py does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv[1:])), errors.CommandLineError) run()
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: make_examples does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() hts_verbose.set(hts_verbose.htsLogLevel[FLAGS.hts_logging_level]) # Set up options; may do I/O. options = default_options(add_flags=True, flags_obj=FLAGS) check_options_are_valid(options) # Run! make_examples_core.make_examples_runner(options)
def check_options_are_valid(options): """Checks that all the options chosen make sense together.""" # Check for general flags (shared for DeepVariant and DeepTrio). make_examples_options.check_options_are_valid( options, main_sample_index=MAIN_SAMPLE_INDEX) child = options.sample_options[MAIN_SAMPLE_INDEX] # Sanity check the sample_names (specific to trio). if (child.variant_caller_options.sample_name == FLAGS.sample_name_parent1 or child.variant_caller_options.sample_name == FLAGS.sample_name_parent2): errors.log_and_raise( 'The sample_name of the child is the same as one of ' 'the parents.', errors.CommandLineError) if options.pic_options.alt_aligned_pileup == 'rows': errors.log_and_raise('--alt_aligned_pileup="rows" cannot be used with ' 'DeepTrio because the pileup images would become ' 'too tall for InceptionV3.')
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: call_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() model = modeling.get_model(FLAGS.model_name) call_variants( examples_filename=FLAGS.examples, checkpoint_path=FLAGS.checkpoint, model=model, execution_hardware=FLAGS.execution_hardware, output_file=FLAGS.outfile, max_batches=FLAGS.max_batches, batch_size=FLAGS.batch_size)
def test_log_and_raise(self, msg, cls): with mock.patch.object(logging, 'error') as mock_logging: with self.assertRaisesRegexp(cls, msg): errors.log_and_raise(msg, cls) mock_logging.assert_called_once_with(msg)
def check_options_are_valid(options: deepvariant_pb2.MakeExamplesOptions, main_sample_index: int): """Checks that all the options chosen make sense together.""" # Check arguments that apply to any mode. if not options.reference_filename: errors.log_and_raise('ref argument is required.', errors.CommandLineError) if not options.examples_filename: errors.log_and_raise('examples argument is required.', errors.CommandLineError) if options.n_cores != 1: errors.log_and_raise( 'Currently only supports n_cores == 1 but got {}.'.format( options.n_cores), errors.CommandLineError) main_sample = options.sample_options[main_sample_index] if not main_sample.reads_filenames: errors.log_and_raise('reads argument is required.', errors.CommandLineError) if make_examples_core.in_training_mode(options): if not options.truth_variants_filename: errors.log_and_raise( 'truth_variants is required when in training mode.', errors.CommandLineError) if not options.confident_regions_filename: if (options.variant_caller == deepvariant_pb2.MakeExamplesOptions. VCF_CANDIDATE_IMPORTER): logging.info( 'Note: --confident_regions is optional with ' 'vcf_candidate_importer. ' 'You did not specify --confident_regions, which means ' 'examples will be generated for the whole region.') else: errors.log_and_raise( 'confident_regions is required when in training mode.', errors.CommandLineError) if options.gvcf_filename: errors.log_and_raise('gvcf is not allowed in training mode.', errors.CommandLineError) if (options.variant_caller == deepvariant_pb2.MakeExamplesOptions.VCF_CANDIDATE_IMPORTER and main_sample.proposed_variants_filename): errors.log_and_raise( '--proposed_variants* should not be used with ' 'vcf_candidate_importer in training mode. ' 'Use --truth_variants to pass in the candidates ' 'with correct labels for training.', errors.CommandLineError) else: # Check for argument issues specific to calling mode. for sample in options.sample_options: # If there are reads, there must be a sample name too. if sample.reads_filenames: if sample.variant_caller_options.sample_name == _UNKNOWN_SAMPLE: errors.log_and_raise( 'sample_name must be specified for all samples in calling mode.', errors.CommandLineError) if main_sample.variant_caller_options.gq_resolution < 1: errors.log_and_raise('gq_resolution must be a positive integer.', errors.CommandLineError) if options.truth_variants_filename: errors.log_and_raise( 'Do not specify --truth_variants in calling mode.', errors.CommandLineError) if (options.variant_caller == deepvariant_pb2.MakeExamplesOptions.VCF_CANDIDATE_IMPORTER): if any(o.proposed_variants_filename is None for o in options.sample_options): errors.log_and_raise( '--proposed_variants* is required with vcf_candidate_importer in ' 'calling mode.', errors.CommandLineError) multiplier = FLAGS.vsc_min_fraction_multiplier if multiplier <= 0 or multiplier > 1.0: errors.log_and_raise( '--vsc_min_fraction_multiplier must be within (0-1] interval.', errors.CommandLineError) for sample in options.sample_options: if sample.pileup_height < 10 or sample.pileup_height > 100: errors.log_and_raise( 'Pileup image heights must be between 10 and 100.')
def shared_flags_to_options( add_flags, flags_obj, samples_in_order, sample_role_to_train, main_sample_index) -> deepvariant_pb2.MakeExamplesOptions: """Creates options from flags that are shared, along with given samples.""" read_reqs = reads_pb2.ReadRequirements( keep_duplicates=flags_obj.keep_duplicates, keep_supplementary_alignments=flags_obj.keep_supplementary_alignments, keep_secondary_alignments=flags_obj.keep_secondary_alignments, min_base_quality=flags_obj.min_base_quality, min_mapping_quality=flags_obj.min_mapping_quality, min_base_quality_mode=reads_pb2.ReadRequirements.ENFORCED_BY_CLIENT) logging.vlog(3, 'ReadRequirements are: %s', read_reqs) pic_options = pileup_image.default_options(read_requirements=read_reqs) allele_counter_options = deepvariant_pb2.AlleleCounterOptions( partition_size=flags_obj.partition_size, read_requirements=read_reqs, track_ref_reads=flags_obj.track_ref_reads, normalize_reads=flags_obj.normalize_reads, keep_legacy_behavior=flags_obj.keep_legacy_allele_counter_behavior) options = deepvariant_pb2.MakeExamplesOptions( exclude_contigs=exclude_contigs.EXCLUDED_HUMAN_CONTIGS, # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'. random_seed=609314161, # # Not specified by default: calling_regions = 3; read_requirements=read_reqs, allele_counter_options=allele_counter_options, pic_options=pic_options, n_cores=1, task_id=0, num_shards=0, min_shared_contigs_basepairs=0.9, sample_options=samples_in_order, main_sample_index=main_sample_index, sample_role_to_train=sample_role_to_train) if add_flags: options.mode = make_examples_core.parse_proto_enum_flag( deepvariant_pb2.MakeExamplesOptions.Mode, flags_obj.mode.upper()) options.labeler_algorithm = make_examples_core.parse_proto_enum_flag( deepvariant_pb2.MakeExamplesOptions.LabelerAlgorithm, flags_obj.labeler_algorithm.upper()) options.variant_caller = make_examples_core.parse_proto_enum_flag( deepvariant_pb2.MakeExamplesOptions.VariantCaller, flags_obj.variant_caller.upper()) if flags_obj.ref: options.reference_filename = flags_obj.ref if flags_obj.confident_regions: options.confident_regions_filename = flags_obj.confident_regions if flags_obj.truth_variants: options.truth_variants_filename = flags_obj.truth_variants if flags_obj.sequencing_type: options.pic_options.sequencing_type = make_examples_core.parse_proto_enum_flag( deepvariant_pb2.PileupImageOptions.SequencingType, flags_obj.sequencing_type) if flags_obj.channels: channel_set = flags_obj.channels.split(',') for channel in channel_set: if channel and channel not in dv_constants.OPT_CHANNELS: err_msg = 'Channel "{}" is not one of the available opt channels: {}'.format( channel, ', '.join(dv_constants.OPT_CHANNELS)) errors.log_and_raise(err_msg, errors.CommandLineError) options.pic_options.channels[:] = channel_set options.pic_options.num_channels += len(channel_set) if flags_obj.multi_allelic_mode: multi_allelic_enum = { 'include_het_alt_images': deepvariant_pb2.PileupImageOptions.ADD_HET_ALT_IMAGES, 'exclude_het_alt_images': deepvariant_pb2.PileupImageOptions.NO_HET_ALT_IMAGES, }[flags_obj.multi_allelic_mode] options.pic_options.multi_allelic_mode = multi_allelic_enum if flags_obj.pileup_image_width: options.pic_options.width = flags_obj.pileup_image_width options.pic_options.alt_aligned_pileup = flags_obj.alt_aligned_pileup options.pic_options.types_to_alt_align = flags_obj.types_to_alt_align if flags_obj.add_supporting_other_alt_color: options.pic_options.other_allele_supporting_read_alpha = 0.3 if flags_obj.select_variant_types: options.select_variant_types[:] = flags_obj.select_variant_types.split( ) for svt in options.select_variant_types: if svt not in make_examples_core.VARIANT_TYPE_SELECTORS: errors.log_and_raise( 'Select variant type {} not recognized. Allowed values are {}' .format( svt, ', '.join( make_examples_core.VARIANT_TYPE_SELECTORS)), errors.CommandLineError) num_shards, examples, candidates, gvcf, runtime_by_region = ( sharded_file_utils.resolve_filespecs( flags_obj.task, flags_obj.examples or '', flags_obj.candidates or '', flags_obj.gvcf or '', flags_obj.runtime_by_region or '')) options.examples_filename = examples options.candidates_filename = candidates options.gvcf_filename = gvcf options.include_med_dp = flags_obj.include_med_dp options.task_id = flags_obj.task options.num_shards = num_shards options.runtime_by_region = runtime_by_region options.parse_sam_aux_fields = make_examples_core.resolve_sam_aux_fields( flags_obj=flags_obj) if flags_obj.aux_fields_to_keep: options.aux_fields_to_keep[:] = flags_obj.aux_fields_to_keep.split( ',') else: options.aux_fields_to_keep = None options.use_original_quality_scores = flags_obj.use_original_quality_scores if flags_obj.add_hp_channel: options.pic_options.num_channels += 1 options.pic_options.add_hp_channel = True if flags_obj.hp_tag_for_assembly_polishing < 0: errors.log_and_raise( '--hp_tag_for_assembly_polishing has to be set to a positive int.', errors.CommandLineError) if (flags_obj.hp_tag_for_assembly_polishing > 0 and not flags_obj.sort_by_haplotypes): errors.log_and_raise( '--hp_tag_for_assembly_polishing requires --sort_by_haplotypes to be ' 'set ', errors.CommandLineError) options.pic_options.sort_by_haplotypes = flags_obj.sort_by_haplotypes options.pic_options.hp_tag_for_assembly_polishing = flags_obj.hp_tag_for_assembly_polishing if flags_obj.write_run_info: options.run_info_filename = examples + _RUN_INFO_FILE_EXTENSION options.calling_regions.extend( make_examples_core.parse_regions_flag(flags_obj.regions)) options.exclude_calling_regions.extend( make_examples_core.parse_regions_flag(flags_obj.exclude_regions)) options.realigner_enabled = flags_obj.realign_reads options.realigner_options.CopyFrom( realigner.realigner_config(flags_obj)) if (options.mode == deepvariant_pb2.MakeExamplesOptions.TRAINING and flags_obj.training_random_emit_ref_sites != NO_RANDOM_REF): options.sample_options[ main_sample_index].variant_caller_options.fraction_reference_sites_to_emit = ( flags_obj.training_random_emit_ref_sites) if (flags_obj.use_allele_frequency and not flags_obj.population_vcfs): errors.log_and_raise( 'If use_allele_frequency is set then population_vcfs ' 'must be provided.', errors.CommandLineError) if flags_obj.use_allele_frequency: options.use_allele_frequency = flags_obj.use_allele_frequency options.pic_options.num_channels += 1 options.pic_options.use_allele_frequency = True if flags_obj.population_vcfs: options.population_vcf_filenames.extend( re.split(',| ', flags_obj.population_vcfs)) options.max_reads_per_partition = flags_obj.max_reads_per_partition options.use_ref_for_cram = flags_obj.use_ref_for_cram options.hts_block_size = flags_obj.hts_block_size options.logging_every_n_candidates = flags_obj.logging_every_n_candidates options.customized_classes_labeler_classes_list = flags_obj.customized_classes_labeler_classes_list options.customized_classes_labeler_info_field_name = flags_obj.customized_classes_labeler_info_field_name return options
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: postprocess_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. if (not FLAGS.nonvariant_site_tfrecord_path) != ( not FLAGS.gvcf_outfile): errors.log_and_raise( 'gVCF creation requires both nonvariant_site_tfrecord_path and ' 'gvcf_outfile flags to be set.', errors.CommandLineError) proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() fasta_reader = fasta.IndexedFastaReader(FLAGS.ref, cache_size=_FASTA_CACHE_SIZE) contigs = fasta_reader.header.contigs paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile) # Read one CallVariantsOutput record and extract the sample name from it. # Note that this assumes that all CallVariantsOutput protos in the infile # contain a single VariantCall within their constituent Variant proto, and # that the call_set_name is identical in each of the records. record = tf_utils.get_one_example_from_examples_path( ','.join(paths), proto=deepvariant_pb2.CallVariantsOutput) if record is None: raise ValueError('Cannot find any records in {}'.format( ','.join(paths))) sample_name = _extract_single_sample_name(record) header = dv_vcf_constants.deepvariant_header( contigs=contigs, sample_names=[sample_name]) with tempfile.NamedTemporaryFile() as temp: postprocess_variants_lib.process_single_sites_tfrecords( contigs, paths, temp.name) independent_variants = _transform_call_variants_output_to_variants( input_sorted_tfrecord_path=temp.name, qual_filter=FLAGS.qual_filter, multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter, sample_name=sample_name) variant_generator = haplotypes.maybe_resolve_conflicting_variants( independent_variants) write_variants_to_vcf(variant_generator=variant_generator, output_vcf_path=FLAGS.outfile, header=header) # Also write out the gVCF file if it was provided. if FLAGS.nonvariant_site_tfrecord_path: nonvariant_generator = io_utils.read_shard_sorted_tfrecords( FLAGS.nonvariant_site_tfrecord_path, key=_get_contig_based_variant_sort_keyfn(contigs), proto=variants_pb2.Variant) with vcf.VcfReader(FLAGS.outfile) as variant_reader: lessthanfn = _get_contig_based_lessthan(contigs) gvcf_variants = (_transform_to_gvcf_record(variant) for variant in variant_reader.iterate()) merged_variants = merge_variants_and_nonvariants( gvcf_variants, nonvariant_generator, lessthanfn, fasta_reader) write_variants_to_vcf(variant_generator=merged_variants, output_vcf_path=FLAGS.gvcf_outfile, header=header)
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: postprocess_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. if (not FLAGS.nonvariant_site_tfrecord_path) != (not FLAGS.gvcf_outfile): errors.log_and_raise( 'gVCF creation requires both nonvariant_site_tfrecord_path and ' 'gvcf_outfile flags to be set.', errors.CommandLineError) proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() fasta_reader = fasta.IndexedFastaReader( FLAGS.ref, cache_size=_FASTA_CACHE_SIZE) contigs = fasta_reader.header.contigs paths = sharded_file_utils.maybe_generate_sharded_filenames(FLAGS.infile) # Read one CallVariantsOutput record and extract the sample name from it. # Note that this assumes that all CallVariantsOutput protos in the infile # contain a single VariantCall within their constituent Variant proto, and # that the call_set_name is identical in each of the records. record = tf_utils.get_one_example_from_examples_path( ','.join(paths), proto=deepvariant_pb2.CallVariantsOutput) if record is None: logging.info('call_variants_output is empty. Writing out empty VCF.') sample_name = dv_constants.DEFAULT_SAMPLE_NAME if FLAGS.sample_name: logging.info( '--sample_name is set in postprocess_variant. Using %s as the ' 'sample name.', FLAGS.sample_name) sample_name = FLAGS.sample_name variant_generator = iter([]) else: sample_name = _extract_single_sample_name(record) temp = tempfile.NamedTemporaryFile() start_time = time.time() postprocess_variants_lib.process_single_sites_tfrecords( contigs, paths, temp.name) logging.info('CVO sorting took %s minutes', (time.time() - start_time) / 60) logging.info('Transforming call_variants_output to variants.') independent_variants = _transform_call_variants_output_to_variants( input_sorted_tfrecord_path=temp.name, qual_filter=FLAGS.qual_filter, multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter, sample_name=sample_name, group_variants=FLAGS.group_variants, use_multiallelic_model=FLAGS.use_multiallelic_model) variant_generator = haplotypes.maybe_resolve_conflicting_variants( independent_variants) header = dv_vcf_constants.deepvariant_header( contigs=contigs, sample_names=[sample_name]) use_csi = _decide_to_use_csi(contigs) start_time = time.time() if not FLAGS.nonvariant_site_tfrecord_path: logging.info('Writing variants to VCF.') write_variants_to_vcf( variant_iterable=variant_generator, output_vcf_path=FLAGS.outfile, header=header) if FLAGS.outfile.endswith('.gz'): build_index(FLAGS.outfile, use_csi) logging.info('VCF creation took %s minutes', (time.time() - start_time) / 60) else: logging.info('Merging and writing variants to VCF and gVCF.') lessthanfn = _get_contig_based_lessthan(contigs) with vcf.VcfWriter( FLAGS.outfile, header=header, round_qualities=True) as vcf_writer, \ vcf.VcfWriter( FLAGS.gvcf_outfile, header=header, round_qualities=True) \ as gvcf_writer: nonvariant_generator = tfrecord.read_shard_sorted_tfrecords( FLAGS.nonvariant_site_tfrecord_path, key=_get_contig_based_variant_sort_keyfn(contigs), proto=variants_pb2.Variant) merge_and_write_variants_and_nonvariants(variant_generator, nonvariant_generator, lessthanfn, fasta_reader, vcf_writer, gvcf_writer) if FLAGS.outfile.endswith('.gz'): build_index(FLAGS.outfile, use_csi) if FLAGS.gvcf_outfile.endswith('.gz'): build_index(FLAGS.gvcf_outfile, use_csi) logging.info('Finished writing VCF and gVCF in %s minutes.', (time.time() - start_time) / 60) if FLAGS.vcf_stats_report: outfile_base = _get_base_path(FLAGS.outfile) with vcf.VcfReader(FLAGS.outfile) as reader: vcf_stats.create_vcf_report( variants=reader.iterate(), output_basename=outfile_base, sample_name=sample_name, vcf_reader=reader) if record: temp.close()
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: make_examples does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() hts_verbose.set(hts_verbose.htsLogLevel[FLAGS.hts_logging_level]) # Set up options; may do I/O. options = default_options(add_flags=True, flags_obj=FLAGS) # Check arguments that apply to any mode. if not options.reference_filename: errors.log_and_raise('ref argument is required.', errors.CommandLineError) if not options.reads_filename: errors.log_and_raise('reads argument is required.', errors.CommandLineError) if not options.examples_filename: errors.log_and_raise('examples argument is required.', errors.CommandLineError) if options.n_cores != 1: errors.log_and_raise( 'Currently only supports n_cores == 1 but got {}.'.format( options.n_cores), errors.CommandLineError) # Check for argument issues specific to train mode. if in_training_mode(options): if not options.truth_variants_filename: errors.log_and_raise( 'truth_variants is required when in training mode.', errors.CommandLineError) if not options.confident_regions_filename: errors.log_and_raise( 'confident_regions is required when in training mode.', errors.CommandLineError) if options.gvcf_filename: errors.log_and_raise('gvcf is not allowed in training mode.', errors.CommandLineError) else: # Check for argument issues specific to calling mode. if options.variant_caller_options.sample_name == _UNKNOWN_SAMPLE: errors.log_and_raise('sample_name must be specified in calling mode.', errors.CommandLineError) if options.variant_caller_options.gq_resolution < 1: errors.log_and_raise('gq_resolution must be a non-negative integer.', errors.CommandLineError) # Run! make_examples_runner(options)
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: postprocess_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. if (not FLAGS.nonvariant_site_tfrecord_path) != (not FLAGS.gvcf_outfile): errors.log_and_raise( 'gVCF creation requires both nonvariant_site_tfrecord_path and ' 'gvcf_outfile flags to be set.', errors.CommandLineError) proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() fasta_reader = fasta.RefFastaReader(FLAGS.ref, cache_size=_FASTA_CACHE_SIZE) contigs = fasta_reader.header.contigs paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile) # Read one CallVariantsOutput record and extract the sample name from it. # Note that this assumes that all CallVariantsOutput protos in the infile # contain a single VariantCall within their constituent Variant proto, and # that the call_set_name is identical in each of the records. record = next( io_utils.read_tfrecords( paths[0], proto=deepvariant_pb2.CallVariantsOutput, max_records=1)) sample_name = _extract_single_sample_name(record) header = dv_vcf_constants.deepvariant_header( contigs=contigs, sample_names=[sample_name]) with tempfile.NamedTemporaryFile() as temp: postprocess_variants_lib.process_single_sites_tfrecords( contigs, paths, temp.name) independent_variants = _transform_call_variants_output_to_variants( input_sorted_tfrecord_path=temp.name, qual_filter=FLAGS.qual_filter, multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter, sample_name=sample_name) variant_generator = haplotypes.maybe_resolve_conflicting_variants( independent_variants) write_variants_to_vcf( variant_generator=variant_generator, output_vcf_path=FLAGS.outfile, header=header) # Also write out the gVCF file if it was provided. if FLAGS.nonvariant_site_tfrecord_path: nonvariant_generator = io_utils.read_shard_sorted_tfrecords( FLAGS.nonvariant_site_tfrecord_path, key=_get_contig_based_variant_sort_keyfn(contigs), proto=variants_pb2.Variant) with vcf.VcfReader(FLAGS.outfile, use_index=False) as variant_reader: lessthanfn = _get_contig_based_lessthan(contigs) gvcf_variants = ( _transform_to_gvcf_record(variant) for variant in variant_reader.iterate()) merged_variants = merge_variants_and_nonvariants( gvcf_variants, nonvariant_generator, lessthanfn, fasta_reader) write_variants_to_vcf( variant_generator=merged_variants, output_vcf_path=FLAGS.gvcf_outfile, header=header)
def trio_samples_from_flags(add_flags=True, flags_obj=None): """Collects sample-related options into a list of samples.""" # Sample-specific options. child_sample_name = make_examples_core.assign_sample_name( sample_name_flag=flags_obj.sample_name, reads_filenames=flags_obj.reads) parent1_sample_name = make_examples_core.assign_sample_name( sample_name_flag=flags_obj.sample_name_parent1, reads_filenames=flags_obj.reads_parent1) parent2_sample_name = make_examples_core.assign_sample_name( sample_name_flag=flags_obj.sample_name_parent2, reads_filenames=flags_obj.reads_parent2) parent1_options = deepvariant_pb2.SampleOptions( role='parent1', name=parent1_sample_name, variant_caller_options=make_examples_core.make_vc_options( sample_name=parent1_sample_name, flags_obj=flags_obj), order=[0, 1, 2], pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_PARENT) child_options = deepvariant_pb2.SampleOptions( role='child', name=child_sample_name, variant_caller_options=make_examples_core.make_vc_options( sample_name=child_sample_name, flags_obj=flags_obj), order=[0, 1, 2], pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_CHILD) parent2_options = deepvariant_pb2.SampleOptions( role='parent2', name=parent2_sample_name, variant_caller_options=make_examples_core.make_vc_options( sample_name=parent2_sample_name, flags_obj=flags_obj), # Swap the two parents when calling on parent2. order=[2, 1, 0], pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_PARENT) # If --sample_name_to_train is not set, train on the child. # This is for backward compatibility. sample_role_to_train = 'child' if add_flags: if flags_obj.reads: child_options.reads_filenames.extend(flags_obj.reads.split(',')) if flags_obj.reads_parent1: parent1_options.reads_filenames.extend(flags_obj.reads_parent1.split(',')) if flags_obj.reads_parent2: parent2_options.reads_filenames.extend(flags_obj.reads_parent2.split(',')) if flags_obj.proposed_variants_child: child_options.proposed_variants_filename = flags_obj.proposed_variants_child if flags_obj.proposed_variants_parent1: parent1_options.proposed_variants_filename = flags_obj.proposed_variants_parent1 if flags_obj.proposed_variants_parent2: parent2_options.proposed_variants_filename = flags_obj.proposed_variants_parent2 if flags_obj.downsample_fraction_child != NO_DOWNSAMPLING: child_options.downsample_fraction = flags_obj.downsample_fraction_child if flags_obj.downsample_fraction_parents != NO_DOWNSAMPLING: parent1_options.downsample_fraction = flags_obj.downsample_fraction_parents parent2_options.downsample_fraction = flags_obj.downsample_fraction_parents if flags_obj.pileup_image_height_child: child_options.pileup_height = flags_obj.pileup_image_height_child if flags_obj.pileup_image_height_parent: parent1_options.pileup_height = parent2_options.pileup_height = flags_obj.pileup_image_height_parent if flags_obj.sample_name_to_train: if flags_obj.sample_name_to_train == flags_obj.sample_name: sample_role_to_train = child_options.role elif flags_obj.sample_name_to_train == flags_obj.sample_name_parent1: sample_role_to_train = parent1_options.role else: errors.log_and_raise( '--sample_name_to_train must match either --sample_name or ' '--sample_name_parent1, or it can be unset to default to ' '--sample_name.', errors.CommandLineError) # Ordering here determines the default order of samples, and when a sample # above has a custom .order, then this is the list those indices refer to. samples_in_order = [parent1_options, child_options, parent2_options] return samples_in_order, sample_role_to_train
def test_log_and_raise(self, msg, cls): with mock.patch.object(logging, 'error') as mock_logging: with self.assertRaisesRegexp(cls, msg): errors.log_and_raise(msg, cls) mock_logging.assert_called_once_with(msg)