コード例 #1
0
ファイル: realigner_test.py プロジェクト: palc/deepvariant
  def test_realigner_end2end(self):
    ref_reader = fasta.IndexedFastaReader(testdata.CHR20_FASTA)
    config = realigner.realigner_config(FLAGS)
    reads_realigner = realigner.Realigner(config, ref_reader)
    region_str = 'chr20:10,000,000-10,009,999'
    windows_count = 0

    regions = ranges.RangeSet.from_regions([region_str])
    for region in regions.partition(1000):
      with sam.SamReader(
          testdata.CHR20_BAM,
          read_requirements=reads_pb2.ReadRequirements()) as sam_reader:
        in_reads = list(sam_reader.query(region))
      windows, out_reads = reads_realigner.realign_reads(in_reads, region)

      # We should always get back all of the reads we sent in. Instead of just
      # checking the lengths are the same, make sure all the read names are the
      # same.
      self.assertCountEqual([r.fragment_name for r in in_reads],
                            [r.fragment_name for r in out_reads])

      # Check each window to make sure it's reasonable.
      for window in windows:
        # We always expect the reference sequence to be one of our haplotypes.
        ref_seq = ref_reader.query(window.span)
        self.assertIn(ref_seq, set(window.haplotypes))
      windows_count += len(windows)

    self.assertGreater(windows_count, 0)
コード例 #2
0
    def _initialize(self):
        """Initialize the resources needed for this work in the current env."""
        if self.initialized:
            raise ValueError('Cannot initialize this object twice')

        self.ref_reader = fasta.IndexedFastaReader(
            self.options.reference_filename)
        self.sam_reader = self._make_sam_reader()
        self.in_memory_sam_reader = sam.InMemorySamReader([])

        if self.options.realigner_enabled:
            self.realigner = realigner.Realigner(
                self.options.realigner_options, self.ref_reader)
        self.pic = pileup_image.PileupImageCreator(
            ref_reader=self.ref_reader,
            sam_reader=self.in_memory_sam_reader,
            options=self.options.pic_options)

        if in_training_mode(self.options):
            self.labeler = self._make_labeler_from_options()

        self.variant_caller = variant_caller.VariantCaller(
            self.options.variant_caller_options)
        self.random = np.random.RandomState(self.options.random_seed)
        self.initialized = True
コード例 #3
0
    def test_call_from_allele_counter(self):
        ref = fasta.IndexedFastaReader(testdata.CHR20_FASTA)
        sam_reader = sam.SamReader(testdata.CHR20_BAM)
        size = 1000
        region = ranges.make_range('chr20', 10000000, 10000000 + size)
        allele_counter = _allelecounter.AlleleCounter(
            ref.c_reader, region,
            deepvariant_pb2.AlleleCounterOptions(partition_size=size))
        caller = variant_calling.VariantCaller(
            deepvariant_pb2.VariantCallerOptions(min_count_snps=2,
                                                 min_count_indels=2,
                                                 min_fraction_snps=0.12,
                                                 min_fraction_indels=0.12,
                                                 sample_name='sample_name',
                                                 p_error=0.001,
                                                 max_gq=50,
                                                 gq_resolution=1,
                                                 ploidy=2))

        # Grab all of the reads in our region and add them to the allele_counter.
        reads = list(sam_reader.query(region))
        self.assertNotEmpty(reads)
        for read in reads:
            allele_counter.add(read)

        # Get the candidates records for this whole region.
        candidates = caller.calls_from_allele_counter(allele_counter)

        # We should have at least some candidates and some gvcf records.
        self.assertNotEmpty(candidates)

        # Each candidate should be a DeepVariantCall.
        for candidate in candidates:
            self.assertIsInstance(candidate, deepvariant_pb2.DeepVariantCall)
コード例 #4
0
def generate_trained_model_runner(truth_variants, reads, ref,
                                  output_model_proto, output_model_pckl,
                                  exclude_contig, from_contig, random_seed,
                                  indel_weight):
    """Runner for generate_trained_model.

  Args:
    truth_variants: path to the VCF.
    reads: path to the reads BAM.
    ref: path to the reference FASTA.
    output_model_proto: path to write the AlleleCountLinearModel proto.
    output_model_pckl: path to write the LogisticRegression pickle.
    exclude_contig: string identifier of a contig to exclude from training,
    from_contig: string identifier of the contig from which we sample baseline.
    random_seed: int used as random seed for reproducibility.
    indel_weight: float of the weight od indels relative to the rest in
      the training.
  """
    vcf_reader = vcf.VcfReader(truth_variants)
    ref_reader = fasta.IndexedFastaReader(ref)
    sam_reader = sam.SamReader(reads)

    random.seed(random_seed)

    dataframe = generate_data(vcf_reader, ref_reader, sam_reader, from_contig,
                              exclude_contig)
    model = train_model(dataframe, indel_weight=indel_weight)

    if output_model_pckl:
        joblib.dump(model, output_model_pckl)

    model_proto = model_to_proto(model)
    with tf.gfile.GFile(output_model_proto, 'w') as f:
        f.write(text_format.MessageToString(model_proto))
コード例 #5
0
 def setUp(self):
     self.ref_reader = fasta.IndexedFastaReader(testdata.CHR20_FASTA)
     # redacted
     FLAGS.ws_use_window_selector_model = True
     self.config = realigner.realigner_config(FLAGS)
     self.reads_realigner = realigner.Realigner(self.config,
                                                self.ref_reader)
コード例 #6
0
 def test_get_ref_haplotype_and_offset(self, dv_variant, cohort_variants,
                                       expected_ref_haplotype,
                                       expected_ref_offset):
     ref_reader = fasta.IndexedFastaReader(testdata.GRCH38_FASTA)
     ref_haplotype, ref_offset = allele_frequency.get_ref_haplotype_and_offset(
         dv_variant, cohort_variants, ref_reader)
     self.assertEqual(ref_haplotype, expected_ref_haplotype)
     self.assertEqual(ref_offset, expected_ref_offset)
コード例 #7
0
ファイル: fasta_test.py プロジェクト: zorrodong/deepvariant
  def setUpClass(cls):
    cls.fasta_reader = fasta.IndexedFastaReader(
        test_utils.genomics_core_testdata('test.fasta'))

    cls.in_mem = fasta.InMemoryFastaReader(
        [(contig.name, 0,
          cls.fasta_reader.query(
              ranges.make_range(contig.name, 0, contig.n_bases)))
         for contig in cls.fasta_reader.header.contigs])
コード例 #8
0
ファイル: fasta_test.py プロジェクト: zuozhaorui/deepvariant
 def test_iterate(self, fasta_filename):
     # Check the indexed fasta file's iterable matches that of the unindexed
     # fasta file.
     indexed_fasta_reader = fasta.IndexedFastaReader(
         test_utils.genomics_core_testdata(fasta_filename))
     unindexed_fasta_reader = fasta.UnindexedFastaReader(
         test_utils.genomics_core_testdata(fasta_filename))
     self.assertEqual(list(indexed_fasta_reader.iterate()),
                      list(unindexed_fasta_reader.iterate()))
コード例 #9
0
    def test_straightforward_region(self):
        ref_reader = fasta.IndexedFastaReader(testdata.CHR20_FASTA)
        bam_reader = sam.SamReader(testdata.CHR20_BAM)
        region = ranges.parse_literal('chr20:10,000,000-10,000,100')
        ref_seq = ref_reader.query(region)

        all_reads = list(bam_reader.query(region))
        dbg30 = debruijn_graph.build(ref_seq, all_reads,
                                     self.single_k_dbg_options(30))
        self.assertIsNotNone(dbg30)
        self.assertEqual([ref_seq], dbg30.candidate_haplotypes())
コード例 #10
0
def model_evaluation_runner(truth_variants, reads, ref, input_model_pckl,
                            eval_region, output_report_csv):
    """Outputs precision-recall for a sklearn model using AlleleCount features.

  Args:
    truth_variants: path to the VCF.
    reads: path to the reads BAM.
    ref: path to the reference FASTA.
    input_model_pckl: path to read the LogisticRegression pickle from.
    eval_region: str, region to evaluate on in the 'chr:start-end',
      'chr:position' or 'chr' format.
    output_report_csv: path to the output report csv.

  Raises:
    ValueError: if eval_region cannot be parsed.
  """
    sam_reader = sam.SamReader(reads)
    ref_reader = fasta.IndexedFastaReader(ref)

    read_reqs = reads_pb2.ReadRequirements(
        min_base_quality=10,
        min_mapping_quality=10,
        min_base_quality_mode=reads_pb2.ReadRequirements.ENFORCED_BY_CLIENT)
    allele_counter_options = deepvariant_pb2.AlleleCounterOptions(
        partition_size=1, read_requirements=read_reqs)

    model = joblib.load(input_model_pckl)

    with vcf.VcfReader(truth_variants) as vcf_reader:
        region = ranges.parse_literal(eval_region,
                                      contig_map=ranges.contigs_dict(
                                          ref_reader.header.contigs))
        true_indels = [
            var for var in vcf_reader.query(region)
            if (variant_utils.is_indel(var))
        ]

    precisions = compute_precision(model, true_indels, sam_reader, ref_reader,
                                   allele_counter_options, _THRESHOLDS, region)
    recalls = compute_effective_recall(model, true_indels, sam_reader,
                                       ref_reader, allele_counter_options,
                                       _THRESHOLDS)

    with tf.gfile.GFile(output_report_csv, 'w') as csvfile:
        fieldnames = ['threshold', 'precision', 'recall']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for threshold in _THRESHOLDS:
            writer.writerow({
                'threshold': threshold,
                'precision': precisions[threshold],
                'recall': recalls[threshold]
            })
コード例 #11
0
 def test_wrap(self):
   ref = fasta.IndexedFastaReader(testdata.CHR20_FASTA)
   sam_reader = sam.SamReader(testdata.CHR20_BAM)
   size = 100
   region = ranges.make_range('chr20', 10000000, 10000000 + size)
   options = deepvariant_pb2.AlleleCounterOptions(partition_size=size)
   allele_counter = _allelecounter.AlleleCounter(ref.c_reader, region, options)
   reads = list(sam_reader.query(region))
   self.assertGreater(len(reads), 0)
   for read in reads:
     allele_counter.add(read)
   counts = allele_counter.counts()
   self.assertEqual(len(counts), size)
コード例 #12
0
 def test_complex_region(self):
     # There is a heterozygous 9 bp deletion of tandem TGA repeat.
     # "chr20:10,095,379-10,095,500"
     ref_reader = fasta.IndexedFastaReader(testdata.CHR20_FASTA)
     bam_reader = sam.SamReader(testdata.CHR20_BAM)
     region = ranges.parse_literal('chr20:10,095,379-10,095,500')
     ref_seq = ref_reader.query(region)
     reads = list(bam_reader.query(region))
     dbg = debruijn_graph.build(ref_seq, reads, self.dbg_options())
     self.assertIsNotNone(dbg)
     self.assertEqual(44, dbg.kmer_size)
     self.assertEqual(2, len(dbg.candidate_haplotypes()))
     self.assertIn(ref_seq, dbg.candidate_haplotypes())
コード例 #13
0
def processing_regions_from_options(options):
    """Computes the calling regions from our options.

  This function does all of the work needed to read our input files and region
  specifications to determine the list of regions we should generate examples
  over. It also computes the confident regions needed to label variants.

  Args:
    options: deepvariant.DeepVariantOptions proto containing information about
      our input data sources.

  Raises:
    ValueError: if the regions to call is empty.

  Returns:
    Two values. The first is a list of nucleus.genomics.v1.Range protos of the
    regions we should process. The second is a RangeSet containing the confident
    regions for labeling, or None if we are running in training mode.
  """
    ref_contigs = fasta.IndexedFastaReader(
        options.reference_filename).header.contigs
    sam_contigs = sam.SamReader(options.reads_filename).header.contigs

    # Add in confident regions and vcf_contigs if in training mode.
    vcf_contigs = None
    if in_training_mode(options):
        vcf_contigs = vcf.VcfReader(
            options.truth_variants_filename).header.contigs

    contigs = _ensure_consistent_contigs(ref_contigs, sam_contigs, vcf_contigs,
                                         options.exclude_contigs,
                                         options.min_shared_contigs_basepairs)
    logging.info('Common contigs are %s', [c.name for c in contigs])
    calling_regions = build_calling_regions(ref_contigs,
                                            options.calling_regions,
                                            options.exclude_calling_regions)
    if not calling_regions:
        raise ValueError(
            'The regions to call is empty. Check your --regions and '
            '--exclude_regions flags to make sure they are not '
            'resulting in set of empty region to process. This also '
            'happens if you use "chr20" for a BAM where contig names '
            'don\'t have "chr"s (or vice versa).')
    regions = regions_to_process(
        contigs=contigs,
        partition_size=options.allele_counter_options.partition_size,
        calling_regions=calling_regions,
        task_id=options.task_id,
        num_shards=options.num_shards)

    return regions
コード例 #14
0
 def test_find_matching_allele_frequency(self, variant, expected_return,
                                         label):
     ref_reader = fasta.IndexedFastaReader(testdata.GRCH38_FASTA)
     vcf_reader = vcf.VcfReader(testdata.VCF_WITH_ALLELE_FREQUENCIES)
     allele_frequencies = allele_frequency.find_matching_allele_frequency(
         variant, vcf_reader, ref_reader)
     # Compare keys.
     self.assertSetEqual(set(allele_frequencies.keys()),
                         set(expected_return.keys()),
                         msg=label)
     # Compare values (almost equal).
     for key in allele_frequencies.keys():
         self.assertAlmostEqual(allele_frequencies[key],
                                expected_return[key],
                                msg=label)
コード例 #15
0
def main(argv):
  del argv

  contigs = fasta.IndexedFastaReader(FLAGS.ref).header.contigs
  max_records = FLAGS.max_records if FLAGS.max_records >= 0 else None
  variants_iter = examples_to_variants(FLAGS.examples, max_records=max_records)

  if not FLAGS.sample_name:
    sample_name, variants_iter = peek_sample_name(variants_iter)
  else:
    sample_name = FLAGS.sample_name
  header = dv_vcf_constants.deepvariant_header(
      contigs=contigs, sample_names=[sample_name])
  with vcf.VcfWriter(FLAGS.output_vcf, header=header) as writer:
    for variant in variants_iter:
      variant.calls[0].call_set_name = sample_name
      logging.log_every_n(logging.INFO, 'Converted %s', FLAGS.log_every,
                          variant_utils.variant_key(variant))
      writer.write(variant)
コード例 #16
0
 def test_add_allele_frequencies_to_candidates(self, dv_calls,
                                               expected_return, testcase):
     if testcase == 'valid':
         pop_vcf_reader = vcf.VcfReader(
             testdata.VCF_WITH_ALLELE_FREQUENCIES)
         ref_reader = fasta.IndexedFastaReader(testdata.GRCH38_FASTA)
     elif testcase == 'no VCF':
         pop_vcf_reader = None
         ref_reader = None
     else:
         raise ValueError('Invalid testcase for parameterized test.')
     updated_dv_call = list(
         allele_frequency.add_allele_frequencies_to_candidates(
             dv_calls, pop_vcf_reader, ref_reader))
     actual_frequency = updated_dv_call[0].allele_frequency
     # Compare keys.
     self.assertSetEqual(set(actual_frequency.keys()),
                         set(expected_return.keys()))
     # Compare values (almost equal).
     for key in actual_frequency.keys():
         self.assertAlmostEqual(actual_frequency[key], expected_return[key])
コード例 #17
0
    def setUp(self):
        super(RegionProcessorTest, self).setUp()
        self._saved_flags = flagsaver.save_flag_values()
        self.region = ranges.parse_literal('chr20:10,000,000-10,000,100')

        FLAGS.reads = ''
        self.options = make_examples.default_options(add_flags=False)
        self.options.reference_filename = testdata.CHR20_FASTA
        main_sample = self.options.sample_options[0]
        if not main_sample.reads_filenames:
            main_sample.reads_filenames.append(testdata.CHR20_BAM)
        main_sample.variant_caller_options.sample_name = 'sample_id'
        main_sample.name = 'sample_id'
        self.options.truth_variants_filename = testdata.TRUTH_VARIANTS_VCF
        self.options.mode = deepvariant_pb2.MakeExamplesOptions.TRAINING
        self.processor = make_examples_core.RegionProcessor(self.options)
        self.ref_reader = fasta.IndexedFastaReader(
            self.options.reference_filename)
        self.mock_init = self.add_mock('initialize')
        for sample in self.processor.samples:
            sample.in_memory_sam_reader = mock.Mock()
        self.default_shape = [5, 5, 7]
        self.default_format = 'raw'
コード例 #18
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: postprocess_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.

        if (not FLAGS.nonvariant_site_tfrecord_path) != (
                not FLAGS.gvcf_outfile):
            errors.log_and_raise(
                'gVCF creation requires both nonvariant_site_tfrecord_path and '
                'gvcf_outfile flags to be set.', errors.CommandLineError)

        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        fasta_reader = fasta.IndexedFastaReader(FLAGS.ref,
                                                cache_size=_FASTA_CACHE_SIZE)
        contigs = fasta_reader.header.contigs
        paths = io_utils.maybe_generate_sharded_filenames(FLAGS.infile)
        # Read one CallVariantsOutput record and extract the sample name from it.
        # Note that this assumes that all CallVariantsOutput protos in the infile
        # contain a single VariantCall within their constituent Variant proto, and
        # that the call_set_name is identical in each of the records.
        record = tf_utils.get_one_example_from_examples_path(
            ','.join(paths), proto=deepvariant_pb2.CallVariantsOutput)
        if record is None:
            raise ValueError('Cannot find any records in {}'.format(
                ','.join(paths)))

        sample_name = _extract_single_sample_name(record)
        header = dv_vcf_constants.deepvariant_header(
            contigs=contigs, sample_names=[sample_name])
        with tempfile.NamedTemporaryFile() as temp:
            postprocess_variants_lib.process_single_sites_tfrecords(
                contigs, paths, temp.name)
            independent_variants = _transform_call_variants_output_to_variants(
                input_sorted_tfrecord_path=temp.name,
                qual_filter=FLAGS.qual_filter,
                multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
                sample_name=sample_name)
            variant_generator = haplotypes.maybe_resolve_conflicting_variants(
                independent_variants)
            write_variants_to_vcf(variant_generator=variant_generator,
                                  output_vcf_path=FLAGS.outfile,
                                  header=header)

        # Also write out the gVCF file if it was provided.
        if FLAGS.nonvariant_site_tfrecord_path:
            nonvariant_generator = io_utils.read_shard_sorted_tfrecords(
                FLAGS.nonvariant_site_tfrecord_path,
                key=_get_contig_based_variant_sort_keyfn(contigs),
                proto=variants_pb2.Variant)
            with vcf.VcfReader(FLAGS.outfile) as variant_reader:
                lessthanfn = _get_contig_based_lessthan(contigs)
                gvcf_variants = (_transform_to_gvcf_record(variant)
                                 for variant in variant_reader.iterate())
                merged_variants = merge_variants_and_nonvariants(
                    gvcf_variants, nonvariant_generator, lessthanfn,
                    fasta_reader)
                write_variants_to_vcf(variant_generator=merged_variants,
                                      output_vcf_path=FLAGS.gvcf_outfile,
                                      header=header)
コード例 #19
0
def main(argv=()):
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: postprocess_variants does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv)), errors.CommandLineError)
    del argv  # Unused.

    if (not FLAGS.nonvariant_site_tfrecord_path) != (not FLAGS.gvcf_outfile):
      errors.log_and_raise(
          'gVCF creation requires both nonvariant_site_tfrecord_path and '
          'gvcf_outfile flags to be set.', errors.CommandLineError)

    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()

    fasta_reader = fasta.IndexedFastaReader(
        FLAGS.ref, cache_size=_FASTA_CACHE_SIZE)
    contigs = fasta_reader.header.contigs
    paths = sharded_file_utils.maybe_generate_sharded_filenames(FLAGS.infile)
    # Read one CallVariantsOutput record and extract the sample name from it.
    # Note that this assumes that all CallVariantsOutput protos in the infile
    # contain a single VariantCall within their constituent Variant proto, and
    # that the call_set_name is identical in each of the records.
    record = tf_utils.get_one_example_from_examples_path(
        ','.join(paths), proto=deepvariant_pb2.CallVariantsOutput)
    if record is None:
      logging.info('call_variants_output is empty. Writing out empty VCF.')
      sample_name = dv_constants.DEFAULT_SAMPLE_NAME
      if FLAGS.sample_name:
        logging.info(
            '--sample_name is set in postprocess_variant. Using %s as the '
            'sample name.', FLAGS.sample_name)
        sample_name = FLAGS.sample_name
      variant_generator = iter([])
    else:
      sample_name = _extract_single_sample_name(record)
      temp = tempfile.NamedTemporaryFile()
      start_time = time.time()
      postprocess_variants_lib.process_single_sites_tfrecords(
          contigs, paths, temp.name)
      logging.info('CVO sorting took %s minutes',
                   (time.time() - start_time) / 60)

      logging.info('Transforming call_variants_output to variants.')
      independent_variants = _transform_call_variants_output_to_variants(
          input_sorted_tfrecord_path=temp.name,
          qual_filter=FLAGS.qual_filter,
          multi_allelic_qual_filter=FLAGS.multi_allelic_qual_filter,
          sample_name=sample_name,
          group_variants=FLAGS.group_variants,
          use_multiallelic_model=FLAGS.use_multiallelic_model)
      variant_generator = haplotypes.maybe_resolve_conflicting_variants(
          independent_variants)

    header = dv_vcf_constants.deepvariant_header(
        contigs=contigs, sample_names=[sample_name])
    use_csi = _decide_to_use_csi(contigs)

    start_time = time.time()
    if not FLAGS.nonvariant_site_tfrecord_path:
      logging.info('Writing variants to VCF.')
      write_variants_to_vcf(
          variant_iterable=variant_generator,
          output_vcf_path=FLAGS.outfile,
          header=header)
      if FLAGS.outfile.endswith('.gz'):
        build_index(FLAGS.outfile, use_csi)
      logging.info('VCF creation took %s minutes',
                   (time.time() - start_time) / 60)
    else:
      logging.info('Merging and writing variants to VCF and gVCF.')
      lessthanfn = _get_contig_based_lessthan(contigs)
      with vcf.VcfWriter(
          FLAGS.outfile, header=header, round_qualities=True) as vcf_writer, \
          vcf.VcfWriter(
              FLAGS.gvcf_outfile, header=header, round_qualities=True) \
          as gvcf_writer:
        nonvariant_generator = tfrecord.read_shard_sorted_tfrecords(
            FLAGS.nonvariant_site_tfrecord_path,
            key=_get_contig_based_variant_sort_keyfn(contigs),
            proto=variants_pb2.Variant)
        merge_and_write_variants_and_nonvariants(variant_generator,
                                                 nonvariant_generator,
                                                 lessthanfn, fasta_reader,
                                                 vcf_writer, gvcf_writer)
      if FLAGS.outfile.endswith('.gz'):
        build_index(FLAGS.outfile, use_csi)
      if FLAGS.gvcf_outfile.endswith('.gz'):
        build_index(FLAGS.gvcf_outfile, use_csi)
      logging.info('Finished writing VCF and gVCF in %s minutes.',
                   (time.time() - start_time) / 60)
    if FLAGS.vcf_stats_report:
      outfile_base = _get_base_path(FLAGS.outfile)
      with vcf.VcfReader(FLAGS.outfile) as reader:
        vcf_stats.create_vcf_report(
            variants=reader.iterate(),
            output_basename=outfile_base,
            sample_name=sample_name,
            vcf_reader=reader)
    if record:
      temp.close()
コード例 #20
0
 def test_make_ref_reader_with_true_case(self, fasta_filename):
     fasta_path = test_utils.genomics_core_testdata(fasta_filename)
     with fasta.IndexedFastaReader(fasta_path,
                                   keep_true_case=True) as reader:
         self.assertEqual(reader.query(ranges.make_range('chrM', 22, 27)),
                          'TaaCC')
コード例 #21
0
ファイル: fasta_test.py プロジェクト: zorrodong/deepvariant
 def test_make_ref_reader_default(self, fasta_filename):
   fasta_path = test_utils.genomics_core_testdata(fasta_filename)
   with fasta.IndexedFastaReader(fasta_path) as reader:
     self.assertEqual(reader.query(ranges.make_range('chrM', 1, 6)), 'ATCAC')
コード例 #22
0
ファイル: realigner_test.py プロジェクト: palc/deepvariant
 def setUp(self):
   self.ref_reader = fasta.IndexedFastaReader(testdata.CHR20_FASTA)
   self.config = realigner.realigner_config(FLAGS)
   self.reads_realigner = realigner.Realigner(self.config, self.ref_reader)
コード例 #23
0
ファイル: fasta_test.py プロジェクト: zorrodong/deepvariant
 def test_make_ref_reader_cache_specified(self, fasta_filename):
   fasta_path = test_utils.genomics_core_testdata(fasta_filename)
   with fasta.IndexedFastaReader(fasta_path, cache_size=10) as reader:
     self.assertEqual(reader.query(ranges.make_range('chrM', 1, 5)), 'ATCA')
コード例 #24
0
ファイル: fasta_test.py プロジェクト: zorrodong/deepvariant
 def test_c_reader(self):
   with fasta.IndexedFastaReader(
       test_utils.genomics_core_testdata('test.fasta')) as reader:
     self.assertIsInstance(reader.c_reader,
                           reference.IndexedFastaReader)