def test_runs_reproducible(self): """Makes sure that generate_trained_model returns the expected proto.""" output_model_proto = tempfile.NamedTemporaryFile(mode='w', dir=FLAGS.test_tmpdir, delete=False) fname = output_model_proto.name generate_trained_model.generate_trained_model_runner( truth_variants=testdata.TRUTH_VARIANTS_VCF, reads=testdata.CHR20_BAM, ref=testdata.CHR20_FASTA, output_model_proto=fname, output_model_pckl=None, exclude_contig=None, from_contig='chr20', random_seed=42, indel_weight=1) with tf.gfile.GFile(fname) as f: window_selector_model = text_format.Parse( f.read(), realigner_pb2.WindowSelectorModel()) # Hardcoded value obtained from a "golden" run. expected = realigner_pb2.WindowSelectorModel( model_type=realigner_pb2.WindowSelectorModel. ALLELE_COUNT_LINEAR, allele_count_linear_model=( realigner_pb2.WindowSelectorModel.AlleleCountLinearModel( bias=0.0259438883513, coeff_soft_clip=0.00196795910597, coeff_substitution=-0.545202672482, coeff_insertion=0.267441004515, coeff_deletion=0.211069211364, coeff_reference=0.191676750779, decision_boundary=3.0))) self.assertEqual(window_selector_model, expected)
def setUp(self): window_selector_model = realigner_pb2.WindowSelectorModel( model_type=realigner_pb2.WindowSelectorModel.VARIANT_READS, variant_reads_model=realigner_pb2.WindowSelectorModel. VariantReadsThresholdModel(min_num_supporting_reads=1, max_num_supporting_reads=10)) self.config = realigner_pb2.WindowSelectorOptions( min_mapq=20, min_base_quality=20, min_windows_distance=4, region_expansion_in_bp=20, window_selector_model=window_selector_model)
def model_to_proto(model): """Returns an allele count-based linear WindowSelectorModel.""" # AFAIK sklearn does not provide a way to extract the coefficients based on # the columns of its input, we thus have to use the fact we know their order. allele_count_linear_model = ( realigner_pb2.WindowSelectorModel.AlleleCountLinearModel( bias=model.intercept_[0], coeff_soft_clip=model.coef_[0][5], coeff_substitution=model.coef_[0][2], coeff_insertion=model.coef_[0][3], coeff_deletion=model.coef_[0][4], coeff_reference=model.coef_[0][1], decision_boundary=_DEFAULT_THRESHOLD)) return realigner_pb2.WindowSelectorModel( model_type=realigner_pb2.WindowSelectorModel.ALLELE_COUNT_LINEAR, allele_count_linear_model=allele_count_linear_model)
def setUp(self): window_selector_model = realigner_pb2.WindowSelectorModel( model_type=realigner_pb2.WindowSelectorModel.ALLELE_COUNT_LINEAR, allele_count_linear_model=realigner_pb2.WindowSelectorModel. AlleleCountLinearModel(bias=0, coeff_soft_clip=0, coeff_substitution=-0.5, coeff_insertion=1, coeff_deletion=1, coeff_reference=-0.5, decision_boundary=0)) self.config = realigner_pb2.WindowSelectorOptions( min_mapq=20, min_base_quality=20, min_windows_distance=4, region_expansion_in_bp=20, window_selector_model=window_selector_model)
def window_selector_config(flags_obj): """Creates a WindowSelectorOptions proto based on input and default settings. Args: flags_obj: configuration FLAGS. Returns: realigner_pb2.WindowSelector protobuf. Raises: ValueError: If either ws_{min,max}_supporting_reads are set and ws_use_window_selector_model is True. Or if ws_window_selector_model > ws_max_num_supporting_reads. Or if ws_use_window_selector_model is False and ws_window_selector_model is not None. """ if not flags_obj.ws_use_window_selector_model: if flags_obj.ws_window_selector_model is not None: raise ValueError('Cannot specify a ws_window_selector_model ' 'if ws_use_window_selector_model is False.') min_num_supporting_reads = ( _DEFAULT_MIN_SUPPORTING_READS if flags_obj.ws_min_num_supporting_reads == _UNSET_WS_INT_FLAG else flags_obj.ws_min_num_supporting_reads) max_num_supporting_reads = ( _DEFAULT_MAX_SUPPORTING_READS if flags_obj.ws_max_num_supporting_reads == _UNSET_WS_INT_FLAG else flags_obj.ws_max_num_supporting_reads) window_selector_model = realigner_pb2.WindowSelectorModel( model_type=realigner_pb2.WindowSelectorModel.VARIANT_READS, variant_reads_model=realigner_pb2.WindowSelectorModel. VariantReadsThresholdModel( min_num_supporting_reads=min_num_supporting_reads, max_num_supporting_reads=max_num_supporting_reads)) else: if flags_obj.ws_min_num_supporting_reads != _UNSET_WS_INT_FLAG: raise ValueError('Cannot use both ws_min_num_supporting_reads and ' 'ws_use_window_selector_model flags.') if flags_obj.ws_max_num_supporting_reads != _UNSET_WS_INT_FLAG: raise ValueError('Cannot use both ws_max_num_supporting_reads and ' 'ws_use_window_selector_model flags.') if flags_obj.ws_window_selector_model is None: window_selector_model = _ALLELE_COUNT_LINEAR_MODEL_DEFAULT else: with tf.gfile.GFile(flags_obj.ws_window_selector_model) as f: window_selector_model = text_format.Parse( f.read(), realigner_pb2.WindowSelectorModel()) if (window_selector_model.model_type == realigner_pb2.WindowSelectorModel.VARIANT_READS): model = window_selector_model.variant_reads_model if model.max_num_supporting_reads < model.min_num_supporting_reads: raise ValueError('ws_min_supporting_reads should be smaller than ' 'ws_max_supporting_reads.') ws_config = realigner_pb2.WindowSelectorOptions( min_mapq=flags_obj.ws_min_mapq, min_base_quality=flags_obj.ws_min_base_quality, min_windows_distance=flags_obj.ws_min_windows_distance, max_window_size=flags_obj.ws_max_window_size, region_expansion_in_bp=flags_obj.ws_region_expansion_in_bp, window_selector_model=window_selector_model) return ws_config
'alignment.') flags.DEFINE_integer('kmer_size', 32, 'K-mer size for fast pass alinger reads index.') # Margin added to the reference sequence for the aligner module. _REF_ALIGN_MARGIN = 20 _DEFAULT_MIN_SUPPORTING_READS = 2 _DEFAULT_MAX_SUPPORTING_READS = 300 _ALLELE_COUNT_LINEAR_MODEL_DEFAULT = realigner_pb2.WindowSelectorModel( model_type=realigner_pb2.WindowSelectorModel.ALLELE_COUNT_LINEAR, allele_count_linear_model=realigner_pb2.WindowSelectorModel. AlleleCountLinearModel( bias=-0.683379, coeff_soft_clip=2.997000, coeff_substitution=-0.086644, coeff_insertion=2.493585, coeff_deletion=1.795914, coeff_reference=-0.059787, decision_boundary=3)) # --------------------------------------------------------------------------- # Set configuration settings. # --------------------------------------------------------------------------- def window_selector_config(flags_obj): """Creates a WindowSelectorOptions proto based on input and default settings. Args: