Ejemplo n.º 1
0
 def format_contig_matches():
     pieces = []
     common_map = ranges.contigs_dict(shared_contigs)
     for ref_contig in ref_contigs:
         status = 'matched' if ref_contig.name in common_map else 'IS MISSING'
         pieces.append('"{}" is {} bp and {}'.format(
             ref_contig.name, ref_contig.n_bases, status))
     return ', '.join(pieces)
Ejemplo n.º 2
0
    def common2(contigs1, contigs2):
        """Computes the common contigs between contigs1 and contigs2."""
        map2 = ranges.contigs_dict(contigs2)

        def is_common(contig1):
            contig2 = map2.get(contig1.name, None)
            return contig2 and contig1.n_bases == contig2.n_bases

        return [c for c in contigs1 if is_common(c)]
Ejemplo n.º 3
0
def processing_regions_from_options(options):
    """Computes the calling regions from our options.

  This function does all of the work needed to read our input files and region
  specifications to determine the list of regions we should generate examples
  over. It also computes the confident regions need to label variants.

  Args:
    options: deepvariant.DeepVariantOptions proto containing information about
      our input data sources.

  Returns:
    Two values. The first is a list of learning.genomics.v1.Range protos of the
    regions we should process. The second is a RangeSet containing the confident
    regions for labeling, or None if we are running in training mode.
  """
    ref_contigs = genomics_io.make_ref_reader(
        options.reference_filename).contigs
    sam_contigs = genomics_io.make_sam_reader(options.reads_filename).contigs

    # Add in confident regions and vcf_contigs if in training mode.
    vcf_contigs = None
    if in_training_mode(options):
        vcf_contigs = genomics_io.make_vcf_reader(
            options.truth_variants_filename).contigs

    # Compute the common contigs among our inputs, and check that the contigs are
    # sufficiently consistent among each other.
    contigs = common_contigs(only_true(ref_contigs, sam_contigs, vcf_contigs),
                             exclude_contig_names=options.exclude_contigs)
    validate_reference_contig_coverage(ref_contigs, contigs,
                                       options.min_shared_contigs_basepairs)
    logging.info('Common contigs are %s', [c.name for c in contigs])

    regions = regions_to_process(
        contigs,
        partition_size=options.allele_counter_options.partition_size,
        calling_regions=ranges.RangeSet.from_regions(
            options.calling_regions, ranges.contigs_dict(ref_contigs)),
        task_id=options.task_id,
        num_shards=options.num_shards)

    return regions
Ejemplo n.º 4
0
def build_calling_regions(contigs, regions_to_include, regions_to_exclude):
    """Builds a RangeSet containing the regions we should call variants in.

  This function intersects the Ranges spanning all of the contigs with those
  from regions_to_include, if not empty, and removes all of the regions in
  regions_to_exclude.

  Args:
    contigs: Sequence of ContigInfo protos. Used to determine the initial ranges
      to process (i.e., all bases of these contigs).
    regions_to_include: RangeSet or iterable that can be converted to a
      RangeSet.
    regions_to_exclude: RangeSet or iterable that can be converted to a
      RangeSet.

  Returns:
    A RangeSet.
  """
    # Initially we are going to call everything in the reference.
    regions = ranges.RangeSet.from_contigs(contigs)

    # If we provided a regions to include, intersect it with all of the regions,
    # producing a common set of regions between the reference and the provided
    # calling regions.
    contig_dict = ranges.contigs_dict(contigs)
    if regions_to_include:
        regions = regions.intersection(
            ranges.RangeSet.from_regions(regions_to_include, contig_dict))

    # If we provided regions to exclude, intersect those with the existing calling
    # regions to further refine our set of contigs to process.
    if regions_to_exclude:
        # exclude_regions mutates regions.
        regions.exclude_regions(
            ranges.RangeSet.from_regions(regions_to_exclude, contig_dict))

    return regions