Beispiel #1
0
def compute_quals(predictions, prediction_index):
    """Computes GQ and QUAL values from a set of prediction probabilities.

  Prediction probabilities are represented as a probability distribution over
  the N genotype states (e.g., for 3 genotype states {HOM_REF, HET, HOM_VAR}).
  Genotype Quality (or GQ) represents the PHRED scaled confidence in the
  particular genotype assignment. Likewise the QUAL representes the PHRED scaled
  confidence in variant as compared to reference, that is, P(NON_REF) / P(ALL)
  which in the diploid genotype case is P(HET) + P(HOM_VAR) / P(ALL). These
  quality scores are capped by _MAX_CONFIDENCE.

  Args:
    predictions: N element array-like. The real-space probabilities of each
      genotype state for this variant.
    prediction_index: int. The actual called genotype from the distribution.

  Returns:
    GQ and QUAL values for output in a Variant record.
  """
    # GQ is prob(genotype) / prob(all genotypes)
    # GQ is rounded to the nearest integer to comply with the VCF spec.
    gq = int(
        np.around(
            genomics_math.ptrue_to_bounded_phred(
                predictions[prediction_index])))
    # QUAL is prob(variant genotype) / prob(all genotypes)
    # Taking the min to avoid minor numerical issues than can push sum > 1.0.
    # redacted
    #   genomics_math.perror_to_phred(max(predictions[0], min_ref_confidence))
    # where min_ref_confidence is something like 1e-15 (producing a qual of 150).
    qual = genomics_math.ptrue_to_bounded_phred(min(sum(predictions[1:]), 1.0))
    rounded_qual = round(qual, _QUAL_PRECISION)
    return gq, rounded_qual
def compute_quals(predictions, prediction_index):
  """Computes GQ and QUAL values from a set of prediction probabilities.

  Prediction probabilities are represented as a probability distribution over
  the N genotype states (e.g., for 3 genotype states {HOM_REF, HET, HOM_VAR}).
  Genotype Quality (or GQ) represents the PHRED scaled confidence in the
  particular genotype assignment. Likewise the QUAL representes the PHRED scaled
  confidence in variant as compared to reference, that is, P(NON_REF) / P(ALL)
  which in the diploid genotype case is P(HET) + P(HOM_VAR) / P(ALL). These
  quality scores are capped by _MAX_CONFIDENCE.

  Args:
    predictions: N element array-like. The real-space probabilities of each
      genotype state for this variant.
    prediction_index: int. The actual called genotype from the distribution.

  Returns:
    GQ and QUAL values for output in a Variant record.
  """
  # GQ is prob(genotype) / prob(all genotypes)
  # GQ is rounded to the nearest integer to comply with the VCF spec.
  gq = int(
      np.around(
          genomics_math.ptrue_to_bounded_phred(predictions[prediction_index])))
  # QUAL is prob(variant genotype) / prob(all genotypes)
  # Taking the min to avoid minor numerical issues than can push sum > 1.0.
  # redacted
  #   genomics_math.perror_to_phred(max(predictions[0], min_ref_confidence))
  # where min_ref_confidence is something like 1e-15 (producing a qual of 150).
  qual = genomics_math.ptrue_to_bounded_phred(min(sum(predictions[1:]), 1.0))
  rounded_qual = round(qual, _QUAL_PRECISION)
  return gq, rounded_qual
Beispiel #3
0
 def test_compute_quals_numerical_stability(self, probs, call, expected_gq):
   max_qual = round(
       genomics_math.ptrue_to_bounded_phred(1.0),
       postprocess_variants._QUAL_PRECISION)
   gq, qual = postprocess_variants.compute_quals(probs, call)
   self.assertEquals(expected_gq, gq)
   self.assertEquals(max_qual, qual)
 def test_compute_quals_numerical_stability(self, probs, call, expected_gq):
   max_qual = round(
       genomics_math.ptrue_to_bounded_phred(1.0),
       postprocess_variants._QUAL_PRECISION)
   gq, qual = postprocess_variants.compute_quals(probs, call)
   self.assertEquals(expected_gq, gq)
   self.assertEquals(max_qual, qual)
 def test_phred_scale(self, prob, bound, expected):
   if bound:
     actual = genomics_math.ptrue_to_bounded_phred(prob, bound)
   else:
     actual = genomics_math.ptrue_to_bounded_phred(prob)
   self.assertAlmostEqual(actual, expected, places=6)
 def test_phred_scale(self, prob, bound, expected):
   if bound:
     actual = genomics_math.ptrue_to_bounded_phred(prob, bound)
   else:
     actual = genomics_math.ptrue_to_bounded_phred(prob)
   self.assertAlmostEqual(actual, expected, places=6)