def add_call_to_variant(variant, predictions, qual_filter=0, sample_name=None): """Fills in Variant record using the prediction probabilities. This functions sets the call[0].genotype, call[0].info['GQ'], call[0].genotype_probabilities, variant.filter, and variant.quality fields of variant based on the genotype likelihoods in predictions. Args: variant: third_party.nucleus.protos.Variant protobuf to be filled in with info derived from predictions. predictions: N element array-like. The real-space probabilities of each genotype state for this variant. qual_filter: float. If predictions implies that this isn't a reference call and the QUAL of the prediction isn't larger than qual_filter variant will be marked as FILTERed. sample_name: str. The name of the sample to assign to the Variant proto call_set_name field. Returns: A Variant record. Raises: ValueError: If variant doesn't have exactly one variant.call record. """ call = variant_utils.only_call(variant) n_alleles = len(variant.alternate_bases) + 1 index, genotype = most_likely_genotype(predictions, n_alleles=n_alleles) gq, variant.quality = compute_quals(predictions, index) call.call_set_name = sample_name variantcall_utils.set_gt(call, genotype) variantcall_utils.set_gq(call, gq) gls = [genomics_math.perror_to_bounded_log10_perror(gp) for gp in predictions] variantcall_utils.set_gl(call, gls) variant.filter[:] = compute_filter_fields(variant, qual_filter) return variant
def candidates_with_assigned_genotypes(self): """Gets a copy of our candidates with their matched genotypes. Returns: list[Variant protobuf]: Returns a copy of self.candidates in order, with genotypes corresponding to their matched genotypes. Any previous genotypes in these Variants will be overwrite. If no VariantCall is present one will be added. """ with_gts = [copy.deepcopy(v) for v in self.candidates] for variant, gt in zip(with_gts, self.candidate_genotypes): call = variant.calls[0] if variant.calls else variant.calls.add() variantcall_utils.set_gt(call, gt) return with_gts
def examples_to_variants(examples_path, max_records=None): """Yields Variant protos from the examples in examples_path. This function reads in tf.Examples produced by DeepVariant from examples_path, which may contain a sharded spec, sorts them, selects a representive example when there are multiple versions representing different alt_alleles, and yields the example_variant field from those examples. Args: examples_path: str. Path, or sharded spec, to labeled tf.Examples produced by DeepVariant in training mode. max_records: int or None. Maximum number of records to read, or None, to read all of the records. Yields: nucleus.protos.Variant protos in coordinate-sorted order. Raises: ValueError: if we find a Variant in any example that doesn't have genotypes. """ examples = tfrecord.read_tfrecords(examples_path, max_records=max_records) variants = sorted( (tf_utils.example_variant(example) for example in examples), key=variant_utils.variant_range_tuple) for _, group in itertools.groupby(variants, variant_utils.variant_range_tuple): variant = next(group) if not variantcall_utils.has_genotypes( variant_utils.only_call(variant)): if FLAGS.allow_unlabeled_examples: call = variant.calls[ 0] if variant.calls else variant.calls.add() variantcall_utils.set_gt(call, (-1, -1)) else: raise ValueError(( 'Variant {} does not have any genotypes. This tool only works ' 'with variants that have been labeled.').format( variant_utils.variant_key(variant))) yield variant
def add_call_to_variant(variant, predictions, qual_filter=0, sample_name=None): """Fills in Variant record using the prediction probabilities. This functions sets the call[0].genotype, call[0].info['GQ'], call[0].genotype_probabilities, variant.filter, and variant.quality fields of variant based on the genotype likelihoods in predictions. Args: variant: third_party.nucleus.protos.Variant protobuf to be filled in with info derived from predictions. predictions: N element array-like. The real-space probabilities of each genotype state for this variant. qual_filter: float. If predictions implies that this isn't a reference call and the QUAL of the prediction isn't larger than qual_filter variant will be marked as FILTERed. sample_name: str. The name of the sample to assign to the Variant proto call_set_name field. Returns: A Variant record. Raises: ValueError: If variant doesn't have exactly one variant.call record. """ call = variant_utils.only_call(variant) n_alleles = len(variant.alternate_bases) + 1 index, genotype = most_likely_genotype(predictions, n_alleles=n_alleles) gq, variant.quality = compute_quals(predictions, index) call.call_set_name = sample_name variantcall_utils.set_gt(call, genotype) variantcall_utils.set_gq(call, gq) gls = [ genomics_math.perror_to_bounded_log10_perror(gp) for gp in predictions ] variantcall_utils.set_gl(call, gls) variant.filter[:] = compute_filter_fields(variant, qual_filter) uncall_homref_gt_if_lowqual(variant, FLAGS.cnn_homref_call_min_gq) return variant