def default_options(add_flags=True, flags=None): """Creates a DeepVariantOptions proto populated with reasonable defaults. Args: add_flags: bool. defaults to True. If True, we will push the value of certain FLAGS into our options. If False, those option fields are left uninitialized. flags: object. If not None, use as the source of flags, else use global FLAGS. Returns: deepvariant_pb2.DeepVariantOptions protobuf. Raises: ValueError: If we observe invalid flag values. """ if not flags: flags = FLAGS read_reqs = core_pb2.ReadRequirements( min_base_quality=10, min_mapping_quality=10, min_base_quality_mode=core_pb2.ReadRequirements.ENFORCED_BY_CLIENT) pic_options = pileup_image.default_options(read_requirements=read_reqs) allele_counter_options = deepvariant_pb2.AlleleCounterOptions( partition_size=flags.partition_size, read_requirements=read_reqs) if flags.sample_name: sample_name = flags.sample_name elif flags.reads: sample_name = extract_sample_name_from_reads(flags.reads) else: sample_name = _UNKNOWN_SAMPLE variant_caller_options = deepvariant_pb2.VariantCallerOptions( min_count_snps=flags.vsc_min_count_snps, min_count_indels=flags.vsc_min_count_indels, min_fraction_snps=flags.vsc_min_fraction_snps, min_fraction_indels=flags.vsc_min_fraction_indels, # Not specified by default: fraction_reference_sites_to_emit, # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'. random_seed=1400605801, sample_name=sample_name, p_error=0.001, max_gq=50, gq_resolution=1, ploidy=2) options = deepvariant_pb2.DeepVariantOptions( exclude_contigs=[ # The two canonical names for the contig representing the human # mitochondrial sequence. 'chrM', 'MT', # From hs37d5. # (ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/README_human_reference_20110707) # pylint:disable=line-too-long 'GL000207.1', 'GL000226.1', 'GL000229.1', 'GL000231.1', 'GL000210.1', 'GL000239.1', 'GL000235.1', 'GL000201.1', 'GL000247.1', 'GL000245.1', 'GL000197.1', 'GL000203.1', 'GL000246.1', 'GL000249.1', 'GL000196.1', 'GL000248.1', 'GL000244.1', 'GL000238.1', 'GL000202.1', 'GL000234.1', 'GL000232.1', 'GL000206.1', 'GL000240.1', 'GL000236.1', 'GL000241.1', 'GL000243.1', 'GL000242.1', 'GL000230.1', 'GL000237.1', 'GL000233.1', 'GL000204.1', 'GL000198.1', 'GL000208.1', 'GL000191.1', 'GL000227.1', 'GL000228.1', 'GL000214.1', 'GL000221.1', 'GL000209.1', 'GL000218.1', 'GL000220.1', 'GL000213.1', 'GL000211.1', 'GL000199.1', 'GL000217.1', 'GL000216.1', 'GL000215.1', 'GL000205.1', 'GL000219.1', 'GL000224.1', 'GL000223.1', 'GL000195.1', 'GL000212.1', 'GL000222.1', 'GL000200.1', 'GL000193.1', 'GL000194.1', 'GL000225.1', 'GL000192.1', 'NC_007605', 'hs37d5', ], # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'. random_seed=609314161, # # Not specified by default: calling_regions = 3; read_requirements=read_reqs, allele_counter_options=allele_counter_options, variant_caller_options=variant_caller_options, pic_options=pic_options, n_cores=1, task_id=0, num_shards=0, min_shared_contigs_basepairs=0.9, ) if add_flags: if flags.mode == 'training': options.mode = deepvariant_pb2.DeepVariantOptions.TRAINING elif flags.mode == 'calling': options.mode = deepvariant_pb2.DeepVariantOptions.CALLING else: raise ValueError('Unexpected mode', flags.mode) if flags.ref: options.reference_filename = flags.ref if flags.reads: options.reads_filename = flags.reads if flags.confident_regions: options.confident_regions_filename = flags.confident_regions if flags.truth_variants: options.truth_variants_filename = flags.truth_variants if flags.downsample_fraction != NO_DOWNSAMPLING: options.downsample_fraction = flags.downsample_fraction if flags.multi_allelic_mode: multi_allelic_enum = { 'include_het_alt_images': deepvariant_pb2.PileupImageOptions.ADD_HET_ALT_IMAGES, 'exclude_het_alt_images': deepvariant_pb2.PileupImageOptions.NO_HET_ALT_IMAGES, }[flags.multi_allelic_mode] options.pic_options.multi_allelic_mode = multi_allelic_enum if flags.pileup_image_height: options.pic_options.height = flags.pileup_image_height if flags.pileup_image_width: options.pic_options.width = flags.pileup_image_width num_shards, examples, candidates, gvcf = io_utils.resolve_filespecs( flags.task, flags.examples or '', flags.candidates or '', flags.gvcf or '') options.examples_filename = examples options.candidates_filename = candidates options.gvcf_filename = gvcf # redacted regions_flag = flags.regions if isinstance(regions_flag, str): regions_flag = regions_flag.split() options.calling_regions.extend(regions_flag) options.task_id = flags.task options.num_shards = 0 if num_shards is None else num_shards if flags.realign_reads: options.realigner_enabled = True options.realigner_options.CopyFrom( realigner.realigner_config(flags)) options.max_reads_per_partition = flags.max_reads_per_partition if (options.mode == deepvariant_pb2.DeepVariantOptions.TRAINING and flags.training_random_emit_ref_sites != NO_RANDOM_REF): options.variant_caller_options.fraction_reference_sites_to_emit = ( flags.training_random_emit_ref_sites) return options
def test_resolve_filespecs_raises_with_bad_inputs(self, task_id, outputs): with self.assertRaises(ValueError): io.resolve_filespecs(task_id, *outputs)
def default_options(add_flags=True, flags_obj=None): """Creates a DeepVariantOptions proto populated with reasonable defaults. Args: add_flags: bool. defaults to True. If True, we will push the value of certain FLAGS into our options. If False, those option fields are left uninitialized. flags_obj: object. If not None, use as the source of flags, else use global FLAGS. Returns: deepvariant_pb2.DeepVariantOptions protobuf. Raises: ValueError: If we observe invalid flag values. """ if not flags_obj: flags_obj = FLAGS read_reqs = core_pb2.ReadRequirements( min_base_quality=10, min_mapping_quality=10, min_base_quality_mode=core_pb2.ReadRequirements.ENFORCED_BY_CLIENT) pic_options = pileup_image.default_options(read_requirements=read_reqs) allele_counter_options = deepvariant_pb2.AlleleCounterOptions( partition_size=flags_obj.partition_size, read_requirements=read_reqs) if flags_obj.sample_name: sample_name = flags_obj.sample_name elif flags_obj.reads: with genomics_io.make_sam_reader(flags_obj.reads) as sam_reader: sample_name = extract_sample_name_from_sam_reader(sam_reader) else: sample_name = _UNKNOWN_SAMPLE variant_caller_options = deepvariant_pb2.VariantCallerOptions( min_count_snps=flags_obj.vsc_min_count_snps, min_count_indels=flags_obj.vsc_min_count_indels, min_fraction_snps=flags_obj.vsc_min_fraction_snps, min_fraction_indels=flags_obj.vsc_min_fraction_indels, # Not specified by default: fraction_reference_sites_to_emit, # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'. random_seed=1400605801, sample_name=sample_name, p_error=0.001, max_gq=50, gq_resolution=flags_obj.gvcf_gq_binsize, ploidy=2) options = deepvariant_pb2.DeepVariantOptions( exclude_contigs=exclude_contigs.EXCLUDED_HUMAN_CONTIGS, # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'. random_seed=609314161, # # Not specified by default: calling_regions = 3; read_requirements=read_reqs, allele_counter_options=allele_counter_options, variant_caller_options=variant_caller_options, pic_options=pic_options, n_cores=1, task_id=0, num_shards=0, min_shared_contigs_basepairs=0.9, ) if add_flags: if flags_obj.mode == 'training': options.mode = deepvariant_pb2.DeepVariantOptions.TRAINING elif flags_obj.mode == 'calling': options.mode = deepvariant_pb2.DeepVariantOptions.CALLING else: raise ValueError('Unexpected mode', flags_obj.mode) if flags_obj.ref: options.reference_filename = flags_obj.ref if flags_obj.reads: options.reads_filename = flags_obj.reads if flags_obj.confident_regions: options.confident_regions_filename = flags_obj.confident_regions if flags_obj.truth_variants: options.truth_variants_filename = flags_obj.truth_variants if flags_obj.downsample_fraction != NO_DOWNSAMPLING: options.downsample_fraction = flags_obj.downsample_fraction if flags_obj.multi_allelic_mode: multi_allelic_enum = { 'include_het_alt_images': deepvariant_pb2.PileupImageOptions.ADD_HET_ALT_IMAGES, 'exclude_het_alt_images': deepvariant_pb2.PileupImageOptions.NO_HET_ALT_IMAGES, }[flags_obj.multi_allelic_mode] options.pic_options.multi_allelic_mode = multi_allelic_enum if flags_obj.pileup_image_height: options.pic_options.height = flags_obj.pileup_image_height if flags_obj.pileup_image_width: options.pic_options.width = flags_obj.pileup_image_width num_shards, examples, candidates, gvcf = io_utils.resolve_filespecs( flags_obj.task, flags_obj.examples or '', flags_obj.candidates or '', flags_obj.gvcf or '') options.examples_filename = examples options.candidates_filename = candidates options.gvcf_filename = gvcf options.calling_regions.extend(parse_regions_flag(flags_obj.regions)) options.exclude_calling_regions.extend( parse_regions_flag(flags_obj.exclude_regions)) options.task_id = flags_obj.task options.num_shards = 0 if num_shards is None else num_shards options.realigner_enabled = flags_obj.realign_reads if options.realigner_enabled: options.realigner_options.CopyFrom( realigner.realigner_config(flags_obj)) options.max_reads_per_partition = flags_obj.max_reads_per_partition if (options.mode == deepvariant_pb2.DeepVariantOptions.TRAINING and flags_obj.training_random_emit_ref_sites != NO_RANDOM_REF): options.variant_caller_options.fraction_reference_sites_to_emit = ( flags_obj.training_random_emit_ref_sites) return options
def test_resolve_filespecs(self, task_id, outputs, expected): self.assertEqual(io.resolve_filespecs(task_id, *outputs), expected)