def testExampleSetTruthVariant(self):
        example = tf_utils.make_example(self.variant, self.alts,
                                        self.encoded_image, self.default_shape,
                                        self.default_format)
        full_tvariant = variants_pb2.Variant(
            variant_set_id='variant_set_id',
            id='id',
            names=['name1'],
            created=1234,
            reference_name='1',
            start=10,
            end=11,
            reference_bases='C',
            alternate_bases=['A'],
            filter=['PASS'],
            quality=1234.5,
            calls=[
                variants_pb2.VariantCall(call_set_id='call_set_id',
                                         call_set_name='call_set_name',
                                         genotype=[0, 1],
                                         phaseset='phaseset',
                                         genotype_likelihood=[0.1, 0.2, 0.3])
            ])
        test_utils.set_list_values(full_tvariant.info['key'], [1])
        test_utils.set_list_values(full_tvariant.calls[0].info['key'], [2])

        simple_tvariant = variants_pb2.Variant(
            reference_name='1',
            start=10,
            end=11,
            reference_bases='C',
            alternate_bases=['A'],
            filter=['PASS'],
            quality=1234.5,
            calls=[
                variants_pb2.VariantCall(call_set_name='call_set_name',
                                         genotype=[0, 1])
            ])
        test_utils.set_list_values(simple_tvariant.calls[0].info['key'], [2])

        self.assertIsNotAFeature('truth_variant/encoded', example)
        tf_utils.example_set_truth_variant(example,
                                           full_tvariant,
                                           simplify=False)
        self.assertEqual(full_tvariant,
                         tf_utils.example_truth_variant(example))

        # Check that reencoding with simplify=True produces the simplified version.
        tf_utils.example_set_truth_variant(example,
                                           full_tvariant,
                                           simplify=True)
        self.assertEqual(simple_tvariant,
                         tf_utils.example_truth_variant(example))
def _create_record_from_template(template, start, end):
    """Returns a copy of the template variant with the new start and end."""
    retval = variants_pb2.Variant()
    retval.CopyFrom(template)
    retval.start = start
    retval.end = end
    return retval
Esempio n. 3
0
 def test_add_call_to_variant(self, probs, expected):
     raw_variant = variants_pb2.Variant(
         reference_name=expected.reference_name,
         reference_bases=expected.reference_bases,
         alternate_bases=expected.alternate_bases,
         start=expected.start,
         end=expected.end,
         calls=[
             variants_pb2.VariantCall(call_set_name=_DEFAULT_SAMPLE_NAME)
         ])
     variant = postprocess_variants.add_call_to_variant(
         variant=raw_variant,
         predictions=probs,
         sample_name=_DEFAULT_SAMPLE_NAME)
     self.assertEqual(variant.reference_bases, expected.reference_bases)
     self.assertEqual(variant.alternate_bases, expected.alternate_bases)
     self.assertEqual(variant.reference_name, expected.reference_name)
     self.assertEqual(variant.start, expected.start)
     self.assertEqual(variant.end, expected.end)
     self.assertAlmostEquals(variant.quality, expected.quality, places=6)
     self.assertEqual(variant.filter, expected.filter)
     self.assertEqual(len(variant.calls), 1)
     self.assertEqual(len(expected.calls), 1)
     self.assertEqual(variant.calls[0].genotype, expected.calls[0].genotype)
     self.assertEqual(variant.calls[0].info['GQ'],
                      expected.calls[0].info['GQ'])
     for gl, expected_gl in zip(variant.calls[0].genotype_likelihood,
                                expected.calls[0].genotype_likelihood):
         self.assertAlmostEquals(gl, expected_gl, places=6)
Esempio n. 4
0
 def test_compute_filter_fields(self):
     # This generates too many tests as a parameterized test.
     for qual, min_qual in itertools.product(range(100), range(100)):
         # First test with no call and filter threshold
         variant = variants_pb2.Variant()
         variant.quality = qual
         expected = []
         expected.append(
             postprocess_variants.DEEP_VARIANT_PASS if qual >= min_qual else
             postprocess_variants.DEEP_VARIANT_QUAL_FILTER)
         self.assertEqual(
             postprocess_variants.compute_filter_fields(variant, min_qual),
             expected)
         # Now add hom ref genotype --> qual shouldn't affect filter field
         del variant.filter[:]
         variant.calls.add(genotype=[0, 0])
         expected = []
         expected.append(postprocess_variants.DEEP_VARIANT_REF_FILTER)
         self.assertEqual(
             postprocess_variants.compute_filter_fields(variant, min_qual),
             expected)
         # Now add variant genotype --> qual filter should matter again
         del variant.filter[:]
         del variant.calls[:]
         variant.calls.add(genotype=[0, 1])
         expected = []
         expected.append(
             postprocess_variants.DEEP_VARIANT_PASS if qual >= min_qual else
             postprocess_variants.DEEP_VARIANT_QUAL_FILTER)
         self.assertEqual(
             postprocess_variants.compute_filter_fields(variant, min_qual),
             expected)
Esempio n. 5
0
    def test_read_support_is_respected(self, read_name, read_number,
                                       alt_allele, read_base, supports_alt):
        """supports_alt is encoded as the 5th channel out of the 7 channels."""
        dv_call = deepvariant_pb2.DeepVariantCall(
            variant=variants_pb2.Variant(reference_name='chr1',
                                         start=10,
                                         end=11,
                                         reference_bases='A',
                                         alternate_bases=[alt_allele]),
            allele_support={
                'C': _supporting_reads('read1/1', 'read3/2'),
                'G': _supporting_reads('read2/1', 'read2/2'),
            })
        read = test_utils.make_read(read_base,
                                    start=dv_call.variant.start,
                                    cigar='1M',
                                    quals=[50],
                                    name=read_name)
        read.read_number = read_number
        actual = _make_encoder().encode_read(dv_call, 'TAT', read,
                                             dv_call.variant.start - 1,
                                             alt_allele)
        expected_base_values = {'C': 30, 'G': 180}
        expected_supports_alt_channel = [152, 254]
        expected = [
            expected_base_values[read_base], 254, 211, 70,
            expected_supports_alt_channel[supports_alt], 254, 1
        ]

        self.assertEqual(list(actual[0, 1]), expected)
 def test_overlaps_variant_with_ranges(self):
   variant = variants_pb2.Variant(reference_name='chr2', start=10, end=11)
   range_set = ranges.RangeSet([ranges.make_range('chr1', 0, 5)])
   with mock.patch.object(range_set, 'overlaps') as mock_overlaps:
     mock_overlaps.return_value = True
     self.assertEqual(range_set.variant_overlaps(variant), True)
     mock_overlaps.assert_called_once_with('chr2', 10)
def _create_variant_with_alleles(ref=None, alts=None, start=0):
  """Creates a Variant record with specified alternate_bases."""
  return variants_pb2.Variant(
      reference_bases=ref,
      alternate_bases=alts,
      start=start,
      calls=[variants_pb2.VariantCall(call_set_name=_DEFAULT_SAMPLE_NAME)])
Esempio n. 8
0
def _make_dv_call(ref_bases='A', alt_bases='C'):
    return deepvariant_pb2.DeepVariantCall(
        variant=variants_pb2.Variant(reference_name='chr1',
                                     start=10,
                                     end=11,
                                     reference_bases=ref_bases,
                                     alternate_bases=[alt_bases]),
        allele_support={'C': _supporting_reads('read1/1', 'read2/1')})
Esempio n. 9
0
 def test_exception_extract_single_variant_name(self, names):
     variant_calls = [
         variants_pb2.VariantCall(call_set_name=name) for name in names
     ]
     variant = variants_pb2.Variant(calls=variant_calls)
     record = deepvariant_pb2.CallVariantsOutput(variant=variant)
     with self.assertRaisesRegexp(ValueError, 'Error extracting name:'):
         postprocess_variants._extract_single_sample_name(record)
Esempio n. 10
0
 def test_alt_combinations_no_het_alt(self, ref, alts, expected):
     options = pileup_image.default_options()
     options.multi_allelic_mode = (
         deepvariant_pb2.PileupImageOptions.NO_HET_ALT_IMAGES)
     pic = pileup_image.PileupImageCreator(options, self.mock_ref_reader,
                                           self.mock_sam_reader)
     variant = variants_pb2.Variant(reference_bases=ref,
                                    alternate_bases=alts)
     self.assertEqual(expected, list(pic._alt_allele_combinations(variant)))
 def setUp(self):
     self.alts = ['A']
     self.variant = variants_pb2.Variant(reference_name='1',
                                         start=10,
                                         end=11,
                                         reference_bases='C',
                                         alternate_bases=self.alts)
     self.encoded_image = 'encoded_image_data'
     self.default_shape = [5, 5, 7]
     self.default_format = 'raw'
 def test_transform_to_gvcf_no_allele_addition(self, alts, gls, vaf):
   variant = _create_variant(
       ref_name='chr1',
       start=10,
       ref_base='A',
       alt_bases=alts,
       qual=40,
       filter_field='PASS',
       genotype=[0, 1],
       gq=None,
       likelihoods=gls)
   vaf_values = [struct_pb2.Value(number_value=v) for v in vaf]
   variant.calls[0].info['VAF'].values.extend(vaf_values)
   expected = variants_pb2.Variant()
   expected.CopyFrom(variant)
   actual = postprocess_variants._transform_to_gvcf_record(variant)
   self.assertEqual(actual, expected)
Esempio n. 13
0
def _simplify_variant(variant):
    """Returns a new Variant with only the basic fields of variant."""
    def _simplify_variant_call(call):
        """Returns a new VariantCall with the basic fields of call."""
        return variants_pb2.VariantCall(
            call_set_name=call.call_set_name,
            genotype=call.genotype,
            info=dict(call.info))  # dict() is necessary to actually set info.

    return variants_pb2.Variant(
        reference_name=variant.reference_name,
        start=variant.start,
        end=variant.end,
        reference_bases=variant.reference_bases,
        alternate_bases=variant.alternate_bases,
        filter=variant.filter,
        quality=variant.quality,
        calls=[_simplify_variant_call(call) for call in variant.calls])
Esempio n. 14
0
  def _make_synthetic_hom_ref(self, variant):
    """Creates a version of variant with a hom-ref genotype.

    Args:
      variant: Our
        candidate learning.genomics.deepvariant.core.genomics.Variant
        variant.

    Returns:
      A new Variant with the same position and alleles as variant but with a
      hom-ref genotype.
    """
    return variants_pb2.Variant(
        reference_name=variant.reference_name,
        start=variant.start,
        end=variant.end,
        reference_bases=variant.reference_bases,
        alternate_bases=variant.alternate_bases,
        calls=[variants_pb2.VariantCall(genotype=[0, 0])])
Esempio n. 15
0
    def test_ignores_reads_with_low_quality_bases(self):
        dv_call = deepvariant_pb2.DeepVariantCall(
            variant=variants_pb2.Variant(reference_name='chr1',
                                         start=2,
                                         end=3,
                                         reference_bases='A',
                                         alternate_bases=['C']))
        pie = _make_encoder()

        # Get the threshold the encoder uses.
        min_qual = pileup_image.DEFAULT_MIN_BASE_QUALITY

        for qual in range(0, min_qual + 5):
            quals = [min_qual - 1, qual, min_qual + 1]
            read = test_utils.make_read('AAA',
                                        start=1,
                                        cigar='3M',
                                        quals=quals)
            actual = pie.encode_read(dv_call, 'AACAG', read, 1, 'C')
            if qual < min_qual:
                self.assertIsNone(actual)
            else:
                self.assertIsNotNone(actual)
def prune_alleles(variant, alt_alleles_to_remove):
    """Remove the alt alleles in alt_alleles_to_remove from canonical_variant.

  Args:
    variant: variants_pb2.Variant.
    alt_alleles_to_remove: iterable of str. Alt alleles to remove from
                           variant.
  Returns:
    variants_pb2.Variant with the alt alleles removed from alternate_bases.
  """
    # If we aren't removing any alt alleles, just return the unmodified variant.
    if not alt_alleles_to_remove:
        return variant

    new_variant = variants_pb2.Variant()
    new_variant.CopyFrom(variant)

    # Cleanup any VariantCall.info fields indexed by alt allele.
    remapper = AlleleRemapper(variant.alternate_bases, alt_alleles_to_remove)
    remapper.reindex_allele_indexed_fields(new_variant,
                                           _ALT_ALLELE_INDEXED_FORMAT_FIELDS)
    new_variant.alternate_bases[:] = remapper.retained_alt_alleles()

    return new_variant
Esempio n. 17
0
 def test_alt_combinations(self, ref, alts, expected):
     variant = variants_pb2.Variant(reference_bases=ref,
                                    alternate_bases=alts)
     self.assertEqual(expected,
                      list(self.pic._alt_allele_combinations(variant)))
Esempio n. 18
0
def make_variant(chrom='chr1',
                 start=10,
                 alleles=None,
                 end=None,
                 filters=None,
                 qual=None,
                 gt=None,
                 gq=None,
                 sample_name=None,
                 gls=None):
    """Creates a new Variant proto from args.

  Args:
    chrom: str. The reference_name for this variant. Defaults to 'chr1'.
    start: int. The starting position of this variant. Defaults to 10.
    alleles: list of str with at least one element. alleles[0] is the reference
      bases and alleles[1:] will be set to alternate_bases of variant. If None,
      defaults to ['A', 'C'].
    end: int or None. If not None, the variant's end will be set to this value.
      If None, will be set to the start + len(reference_bases).
    filters: str, list of str, or None. Sets the filters field of the variant to
      this value if not None. If filters is a string `value`, this is equivalent
      to an argument [`value`]. If None, no value will be assigned to the
      filters field.
    qual: int or None. The quality score for this variant. If None, no quality
      score will be written in the Variant.
    gt: A list of ints, or None. If present, creates a VariantCall in Variant
      with genotype field set to this value. The special 'DEFAULT' value, if
      provided, will set the genotype to [0, 1]. This is the default behavior.
    gq: int or None. If not None and gt is not None, we will add an this GQ
      value to our VariantCall.
    sample_name: str or None. If not None and gt is not None, sets the
      call_set_name of our VariantCall to this value.
    gls: array-list of float, or None. If not None and gt is not None, sets the
      genotype_likelihoods of our VariantCall to this value.

  Returns:
    learning.genomics.deepvariant.core.genomics.Variant proto.
  """
    if alleles is None:
        alleles = ['A', 'C']

    if not end:
        end = start + len(alleles[0])

    variant = variants_pb2.Variant(
        reference_name=chrom,
        start=start,
        end=end,
        reference_bases=alleles[0],
        alternate_bases=alleles[1:],
        quality=qual,
    )

    if filters is not None:
        if not isinstance(filters, (list, tuple)):
            filters = [filters]
        variant.filter[:] = filters

    if gt:
        call = variant.calls.add(genotype=gt)

        if sample_name:
            call.call_set_name = sample_name

        if gq:
            set_list_values(call.info['GQ'], [gq])

        if gls:
            call.genotype_likelihood.extend(gls)

    return variant
Esempio n. 19
0
 def test_allele_indices_with_num_alts(self, alt_bases, num_alts, expected):
     variant = variants_pb2.Variant(alternate_bases=alt_bases)
     actual = variantutils.allele_indices_with_num_alts(variant,
                                                        num_alts,
                                                        ploidy=2)
     self.assertEqual(actual, expected)
Esempio n. 20
0
 def test_invalid_allele_indices_with_num_alts(self, alt_bases, num_alts,
                                               ploidy):
     variant = variants_pb2.Variant(alternate_bases=alt_bases)
     with self.assertRaises((NotImplementedError, ValueError)):
         variantutils.allele_indices_with_num_alts(variant, num_alts,
                                                   ploidy)
Esempio n. 21
0
    def make_gvcfs(self, allele_count_summaries):
        """Primary interface function for computing gVCF confidence at a site.

    Looks at the counts in the provided list of AlleleCountSummary protos and
    returns properly-formatted Variant protos containing gVCF reference
    blocks for all sites in allele_count_summaries. The returned Variant has
    reference_name, start, end are set and contains a single VariantCall in the
    calls field with call_set_name of options.sample_name, genotypes set to 0/0
    (diploid reference), and a GQ value bound in the info field appropriate to
    the data in allele_count.

    The provided allele count must have either a canonical DNA sequence base (
    A, C, G, T) or be "N".

    Args:
      allele_count_summaries: iterable of AlleleCountSummary protos in
        coordinate-sorted order. Each proto is used to get the read counts for
        reference and alternate alleles, the reference position, and reference
        base.

    Yields:
      third_party.nucleus.protos.Variant proto in
      coordinate-sorted order containing gVCF records.
    """
        def with_gq_and_likelihoods(summary_counts):
            """Returns summary_counts along with GQ and genotype likelihoods.

      If the reference base is not in CANONICAL_DNA_BASES, both GQ and genotype
      likelihoods are set to None.

      Args:
        summary_counts: A single AlleleCountSummary.

      Returns:
        A tuple of summary_counts, quantized GQ, raw GQ, and genotype
        likelihoods for summary_counts where raw GQ and genotype_likelihood are
        calculated by self.reference_confidence.

      Raises:
        ValueError: The reference base is not a valid DNA or IUPAC base.
      """
            if summary_counts.ref_base not in CANONICAL_DNA_BASES:
                if summary_counts.ref_base in EXTENDED_IUPAC_CODES:
                    # Skip calculating gq and likelihoods, since this is an ambiguous
                    # reference base.
                    quantized_gq, raw_gq, likelihoods = None, None, None
                else:
                    raise ValueError(
                        'Invalid reference base={} found during gvcf '
                        'calculation'.format(summary_counts.ref_base))
            else:
                n_ref = summary_counts.ref_supporting_read_count
                n_total = summary_counts.total_read_count
                raw_gq, likelihoods = self.reference_confidence(n_ref, n_total)
                quantized_gq = _quantize_gq(raw_gq, self.options.gq_resolution)
            return summary_counts, quantized_gq, raw_gq, likelihoods

        # Combines contiguous, compatible single-bp blocks into larger gVCF blocks,
        # respecting non-reference variants interspersed among them. Yields each
        # combined gVCF Variant proto, in order. Compatible right now means that the
        # blocks to be merged have the same non-None GQ value.
        for key, combinable in itertools.groupby(
            (with_gq_and_likelihoods(sc) for sc in allele_count_summaries),
                key=operator.itemgetter(1)):
            if key is None:
                # A None key indicates that a non-DNA reference base was encountered, so
                # skip this group.
                continue
            combinable = list(combinable)
            min_gq = min(raw_gq_value for _, _, raw_gq_value, _ in combinable)
            summary_counts, _, _, likelihoods = combinable[0]
            call = variants_pb2.VariantCall(
                call_set_name=self.options.sample_name,
                genotype=[0, 0],
                genotype_likelihood=likelihoods)
            variantutils.set_variantcall_gq(call, min_gq)
            yield variants_pb2.Variant(
                reference_name=summary_counts.reference_name,
                reference_bases=summary_counts.ref_base,
                alternate_bases=[variantutils.GVCF_ALT_ALLELE],
                start=summary_counts.position,
                end=combinable[-1][0].position + 1,
                calls=[call])
Esempio n. 22
0
 def test_overlaps_variant_empty_range(self):
   variant = variants_pb2.Variant(reference_name='chr2', start=10, end=11)
   empty_set = ranges.RangeSet()
   self.assertEqual(
       empty_set.variant_overlaps(variant, empty_set_return_value='foo'),
       'foo')
Esempio n. 23
0
def _resolve_overlapping_variants(overlapping_variants):
    """Yields variants with compatible haplotypes, if possible.

  Args:
    overlapping_variants: list(Variant). A non-empty list of Variant protos in
      coordinate-sorted order that overlap on the reference genome and are
      predicted to contain alternate allele genotypes.

  Yields:
    Variant protos in coordinate-sorted order that try to resolve incompatible
    haplotypes.
  """
    # Short circuit the simplest case: A single variant in a region is compatible
    # with itself by definition.
    if len(overlapping_variants) == 1:
        yield overlapping_variants[0]
        return

    # If the actual genotype calls are compatible, we can safely return those
    # since they would be the most likely configuration also when restricting to
    # only valid configurations of genotype calls.
    calculator = _VariantCompatibilityCalculator(overlapping_variants)
    nonref_counts = [_nonref_genotype_count(v) for v in overlapping_variants]
    if calculator.all_variants_compatible(nonref_counts):
        logging.info('Overlapping variants are naturally compatible: %s',
                     overlapping_variants)
        for variant in overlapping_variants:
            yield variant
        return

    # The actual genotype calls produce an inconsistent haplotype. If the number
    # of affected variants is "too large", avoid processing since this is an
    # exponential process.
    if len(overlapping_variants) > _MAX_OVERLAPPING_VARIANTS_TO_RESOLVE:
        logging.warning(
            'Overlapping variants are not naturally compatible, and there are too '
            'many to exhaustively search (%s). Returning variants without '
            'modification, beginning with %s.', len(overlapping_variants),
            overlapping_variants[0])
        for variant in overlapping_variants:
            yield variant
        return

    # Otherwise, the actual genotype calls are incompatible. Since the genotype
    # likelihoods are generally well-calibrated, we examine all configurations of
    # genotypes that create compatible haplotypes and retain the single
    # configuration with the highest joint likelihood across all variants as the
    # proposed genotype assignment. Separately, we rescale the likelihood of each
    # individual variant using only the valid genotype configurations. If the
    # results are concordant (i.e., the genotype predicted by the marginal
    # likelihood for each variant is the same as the genotype predicted when
    # maximizing the joint likelihood across all variants), we return variants
    # with those calls and the rescaled likelihoods. Otherwise, we log a warning
    # and emit the original (incompatible) variants.
    #
    # For example, a biallelic deletion with probabilities of homref, het, homalt
    # = 0.01, 0.9, 0.09 and inside it a biallelic SNP with probs 0.02, 0.48, 0.5.
    # Naively this would be called as a heterozygous indel and a homozygous SNP,
    # which is impossible as there are three total alternate genotypes. The
    # algorithm does the following:
    #
    #   Indel    SNP    Joint prob
    #   0/0      0/0    0.01 * 0.02 = 0.0002
    #   0/0      0/1    0.01 * 0.48 = 0.0048
    #   0/0      1/1    0.01 * 0.50 = 0.0050
    #   0/1      0/0    0.90 * 0.02 = 0.0180
    #   0/1      0/1    0.90 * 0.48 = 0.4320*
    #   0/1      1/1    <invalid>   = 0
    #   1/1      0/0    0.09 * 0.02 = 0.0018
    #   1/1      0/1    <invalid>   = 0
    #   1/1      1/1    <invalid>   = 0
    #
    #   So using the highest joint likelihood, we predict het indel and het SNP.
    #
    #   The marginal probability of each genotype for the indel is:
    #   0/0:  0.0002 + 0.0048 + 0.0050 = 0.01
    #   0/1:  0.0180 + 0.4320          = 0.45
    #   1/1:  0.0018                   = 0.0018
    #
    #   which after normalizing to sum to 1 is roughly 0.022, 0.974, 0.004.
    #   The marginal probability for the SNP, after performing similar
    #   calculations, is 0.043, 0.946, 0.011. So the marginals also predict a het
    #   indel and a het SNP. Since the two calculations agree, we use this
    #   genotype call and modified likelihoods.
    #
    # First, we find all non-reference count configurations that are compatible.
    # This represents each variant solely based on its number of non-reference
    # genotypes, and assumes that variants are compatible if the total number of
    # non-reference genotypes at a single position is at most two. By using
    # non-reference counts, we avoid testing multiple allele configurations that
    # will return the same result (e.g. a variant with two possible alternate
    # alleles has three allele configurations that are homozygous alternate
    # [1/1, 1/2, 2/2] and either all or none of them will be valid depending on
    # the variants it interacts with).
    valid_nonref_count_configurations = [
        conf for conf in itertools.product([0, 1, 2],
                                           repeat=len(overlapping_variants))
        if calculator.all_variants_compatible(conf)
    ]

    # Next, we find the single compatible variant assignment with the individually
    # highest likelihood and track the total likelihood distributed to all variant
    # genotypes.
    likelihood_aggregators = [
        _LikelihoodAggregator(len(v.alternate_bases))
        for v in overlapping_variants
    ]
    most_likely_allele_indices_config = None
    most_likely_likelihood = None
    for nonref_count_config in valid_nonref_count_configurations:
        for allele_indices_config in _get_all_allele_indices_configurations(
                overlapping_variants, nonref_count_config):
            config_likelihood = _allele_indices_configuration_likelihood(
                overlapping_variants, allele_indices_config)
            if (most_likely_likelihood is None
                    or config_likelihood > most_likely_likelihood):
                most_likely_likelihood = config_likelihood
                most_likely_allele_indices_config = allele_indices_config
            for aggregator, allele_indices in zip(likelihood_aggregators,
                                                  allele_indices_config):
                aggregator.add(allele_indices, config_likelihood)

    marginal_allele_indices_config = tuple(agg.most_likely_allele_indices()
                                           for agg in likelihood_aggregators)
    if marginal_allele_indices_config == most_likely_allele_indices_config:
        logging.info(
            'Overlapping variants are not naturally compatible, but the genotype '
            'configuration with the most likely joint likelihood is the same as '
            'that from the scaled marginal likelihoods: %s',
            overlapping_variants[0])
        # Collapse the probabilities of all configurations to a single GL for each
        # allele, independently for each variant.
        scaled_gls = [
            agg.scaled_likelihoods() for agg in likelihood_aggregators
        ]

        for variant, allele_indices, gls in zip(
                overlapping_variants, most_likely_allele_indices_config,
                scaled_gls):
            newvariant = variants_pb2.Variant()
            newvariant.CopyFrom(variant)
            newvariant.calls[0].genotype[:] = allele_indices
            newvariant.calls[0].genotype_likelihood[:] = gls
            yield newvariant
    else:
        logging.warning(
            'Overlapping variants are not naturally compatible, and the genotype '
            'configuration with the most likely joint likelihood is different from '
            'that using the scaled marginal likelihoods: %s',
            overlapping_variants[0])
        # redacted
        for variant in overlapping_variants:
            yield variant