def test_variant_position_and_range(self):
     v1 = test_utils.make_variant(chrom='1', alleles=['A', 'C'], start=10)
     v2 = test_utils.make_variant(chrom='1',
                                  alleles=['AGCT', 'C'],
                                  start=10)
     pos = ranges.make_range('1', 10, 11)
     range_ = ranges.make_range('1', 10, 14)
     self.assertEqual(pos, variantutils.variant_position(v1))
     self.assertEqual(pos, variantutils.variant_position(v2))
     self.assertEqual(pos, variantutils.variant_range(v1))
     self.assertEqual(range_, variantutils.variant_range(v2))
Ejemplo n.º 2
0
def _transform_call_variants_output_to_variants(input_sorted_tfrecord_path,
                                                qual_filter,
                                                multi_allelic_qual_filter,
                                                sample_name):
    """Yields Variant protos in sorted order from CallVariantsOutput protos.

  Variants present in the input TFRecord are converted to Variant protos, with
  the following filters applied: 1) variants are omitted if their quality is
  lower than the `qual_filter` threshold. 2) multi-allelic variants omit
  individual alleles whose qualities are lower than the
  `multi_allelic_qual_filter` threshold.

  Args:
    input_sorted_tfrecord_path: str. TFRecord format file containing sorted
      CallVariantsOutput protos.
    qual_filter: double. The qual value below which to filter variants.
    multi_allelic_qual_filter: double. The qual value below which to filter
      multi-allelic variants.
    sample_name: str. Sample name to write to VCF file.

  Yields:
    Variant protos in sorted order representing the CallVariantsOutput calls.
  """
    for _, group in itertools.groupby(
            io_utils.read_tfrecords(input_sorted_tfrecord_path,
                                    proto=deepvariant_pb2.CallVariantsOutput),
            lambda x: variantutils.variant_range(x.variant)):
        outputs = list(group)
        canonical_variant, predictions = merge_predictions(
            outputs, multi_allelic_qual_filter)
        variant = add_call_to_variant(canonical_variant,
                                      predictions,
                                      qual_filter=qual_filter,
                                      sample_name=sample_name)
        yield variant
def write_call_variants_output_to_vcf(contigs, input_sorted_tfrecord_path,
                                      output_vcf_path, qual_filter,
                                      multi_allelic_qual_filter, sample_name):
    """Reads CallVariantsOutput protos and writes to a VCF file.

  Variants present in the input TFRecord are converted to VCF format, with the
  following filters applied: 1) variants are omitted if their quality is lower
  than the `qual_filter` threshold. 2) multi-allelic variants omit individual
  alleles whose qualities are lower than the `multi_allelic_qual_filter`
  threshold.

  Args:
    contigs: list(ContigInfo). A list of the reference genome contigs for
      writers that need contig information.
    input_sorted_tfrecord_path: str. TFRecord format file containing sorted
      CallVariantsOutput protos.
    output_vcf_path: str. Output file in VCF format.
    qual_filter: double. The qual value below which to filter variants.
    multi_allelic_qual_filter: double. The qual value below which to filter
      multi-allelic variants.
    sample_name: str. Sample name to write to VCF file.
  """
    logging.info('Writing calls to VCF file: %s', output_vcf_path)
    sync_writer, writer_fn = genomics_io.make_variant_writer(
        output_vcf_path, contigs, samples=[sample_name], filters=FILTERS)
    with sync_writer, io_utils.AsyncWriter(writer_fn) as writer:
        for _, group in itertools.groupby(
                io_utils.read_tfrecords(
                    input_sorted_tfrecord_path,
                    proto=deepvariant_pb2.CallVariantsOutput),
                lambda x: variantutils.variant_range(x.variant)):
            outputs = list(group)
            canonical_variant, predictions = merge_predictions(
                outputs, multi_allelic_qual_filter)
            variant = add_call_to_variant(canonical_variant,
                                          predictions,
                                          qual_filter=qual_filter,
                                          sample_name=sample_name)
            writer.write(variant)
 def _fake_query(region):
     return [
         variant for variant in variants if ranges.ranges_overlap(
             variantutils.variant_range(variant), region)
     ]