Esempio n. 1
0
def _get_variant_stats(variant, vaf_available=False, vcf_reader=None):
  """Returns a VariantStats object corresponding to the input variant."""
  vtype = _get_variant_type(variant)
  is_transition, is_transversion = _tstv(variant, vtype)
  vaf = None
  if vaf_available:
    vaf = _get_vaf(variant, vcf_reader)

  return VariantStats(
      reference_name=variant.reference_name,
      position=(variant.start + 1),
      reference_bases=variant.reference_bases,
      alternate_bases=list(variant.alternate_bases),
      variant_type=vtype,
      is_transition=is_transition,
      is_transversion=is_transversion,
      is_variant=variant_utils.is_variant_call(variant),
      depth=variantcall_utils.get_format(
          variant_utils.only_call(variant), 'DP'),
      genotype_quality=variantcall_utils.get_gq(
          variant_utils.only_call(variant)),
      genotype=str(
          sorted(variantcall_utils.get_gt(variant_utils.only_call(variant)))),
      vaf=vaf,
      qual=variant.quality)
Esempio n. 2
0
 def assertGVCF(self,
                gvcf,
                ref,
                gq,
                start,
                end,
                min_dp,
                chrom='chr1',
                gls=None,
                sample_name=None):
   if chrom:
     self.assertEqual(gvcf.reference_name, chrom)
   call = variant_utils.only_call(gvcf)
   self.assertNotEmpty(gvcf.reference_name)
   self.assertEqual(gvcf.reference_bases, ref)
   self.assertEqual(gvcf.alternate_bases, ['<*>'])
   self.assertEqual(gvcf.start, start)
   self.assertEqual(gvcf.end, end if end else start + 1)
   self.assertEqual(variantcall_utils.get_gq(call), gq)
   self.assertNotEmpty(call.genotype_likelihood)
   self.assertIn('MIN_DP', call.info)
   self.assertLen(call.info['MIN_DP'].values, 1)
   self.assertEqual(variantcall_utils.get_min_dp(call), min_dp)
   if gls is not None:
     npt.assert_allclose(list(gvcf.calls[0].genotype_likelihood), gls)
   if sample_name:
     self.assertEqual(gvcf.calls[0].call_set_name, sample_name)
Esempio n. 3
0
def uncall_homref_gt_if_lowqual(variant, min_homref_gq):
  """Converts genotype to "./." if variant is CNN RefCall and has low GQ.

  If the variant has "RefCall" filter (which means an example was created for
  this site but CNN didn't call this as variant) and if the GQ is less than
  the given min_homref_gq threshold, set the genotype of the variant proto
  to "./.". See http://internal for more info.

  Args:
    variant: third_party.nucleus.protos.Variant proto.
    min_homref_gq: float.
  """
  vcall = variant_utils.only_call(variant)
  if (variant.filter == [dv_vcf_constants.DEEP_VARIANT_REF_FILTER] and
      variantcall_utils.get_gq(vcall) < min_homref_gq):
    vcall.genotype[:] = [-1, -1]
Esempio n. 4
0
  def verify_variants(self, variants, region, options, is_gvcf):
    # Verifies simple properties of the Variant protos in variants. For example,
    # checks that the reference_name() is our expected chromosome. The flag
    # is_gvcf determines how we check the VariantCall field of each variant,
    # enforcing expectations for gVCF records if true or variant calls if false.
    for variant in variants:
      self.assertEqual(variant.reference_name, region.reference_name)
      self.assertNotEqual(variant.reference_bases, '')
      self.assertGreater(len(variant.alternate_bases), 0)
      self.assertGreaterEqual(variant.start, region.start)
      self.assertLessEqual(variant.start, region.end)
      self.assertEqual(len(variant.calls), 1)

      call = variant_utils.only_call(variant)
      self.assertEqual(call.call_set_name,
                       options.variant_caller_options.sample_name)
      if is_gvcf:
        # GVCF records should have 0/0 genotypes as they are reference sites,
        # have genotype likelihoods and a GQ value.
        self.assertEqual(call.genotype, [0, 0])
        self.assertEqual(len(call.genotype_likelihood), 3)
        self.assertGreaterEqual(variantcall_utils.get_gq(call), 0)
Esempio n. 5
0
  def verify_variants(self, variants, region, options, is_gvcf):
    # Verifies simple properties of the Variant protos in variants. For example,
    # checks that the reference_name() is our expected chromosome. The flag
    # is_gvcf determines how we check the VariantCall field of each variant,
    # enforcing expectations for gVCF records if true or variant calls if false.
    for variant in variants:
      if region:
        self.assertEqual(variant.reference_name, region.reference_name)
        self.assertGreaterEqual(variant.start, region.start)
        self.assertLessEqual(variant.start, region.end)
      self.assertNotEqual(variant.reference_bases, '')
      self.assertNotEmpty(variant.alternate_bases)
      self.assertLen(variant.calls, 1)

      call = variant_utils.only_call(variant)
      self.assertEqual(
          call.call_set_name,
          options.sample_options[0].variant_caller_options.sample_name)
      if is_gvcf:
        # GVCF records should have 0/0 or ./. (un-called) genotypes as they are
        # reference sites, have genotype likelihoods and a GQ value.
        self.assertIn(list(call.genotype), [[0, 0], [-1, -1]])
        self.assertLen(call.genotype_likelihood, 3)
        self.assertGreaterEqual(variantcall_utils.get_gq(call), 0)