Esempio n. 1
0
 def test_parse_literal_one_bp(self):
     self.assertEqual(ranges.parse_literal('1:10'),
                      ranges.make_range('1', 9, 10))
     self.assertEqual(ranges.parse_literal('1:100'),
                      ranges.make_range('1', 99, 100))
     self.assertEqual(ranges.parse_literal('1:1,000'),
                      ranges.make_range('1', 999, 1000))
Esempio n. 2
0
 def test_query_raises_with_bad_range(self):
   with sam_reader.SamReader.from_file(self.bam,
                                       self.indexed_options) as reader:
     with self.assertRaisesRegexp(ValueError, 'Unknown reference_name'):
       reader.query(ranges.parse_literal('XXX:1-10'))
     with self.assertRaisesRegexp(ValueError, 'unknown reference interval'):
       reader.query(ranges.parse_literal('chr20:10-5'))
Esempio n. 3
0
 def test_sam_query(self):
   reader = sam.SamReader(test_utils.genomics_core_testdata('test.bam'))
   expected = [(ranges.parse_literal('chr20:10,000,000-10,000,100'), 106),
               (ranges.parse_literal('chr20:10,000,000-10,000,000'), 45)]
   with reader:
     for interval, n_expected in expected:
       with reader.query(interval) as iterable:
         self.assertEqual(test_utils.iterable_len(iterable), n_expected)
Esempio n. 4
0
 def test_parse_literal_with_contig_map_and_bad_input_raises_exception(
     self, bad_literal):
   with self.assertRaises(ValueError):
     ranges.parse_literal(
         bad_literal,
         contig_map={
             'chr1': reference_pb2.ContigInfo(name='chr1', n_bases=10)
         })
Esempio n. 5
0
 def test_bam_query(self):
   reader = sam_reader.SamReader.from_file(self.bam, self.indexed_options)
   expected = [(ranges.parse_literal('chr20:10,000,000-10,000,100'), 106),
               (ranges.parse_literal('chr20:10,000,000-10,000,000'), 45)]
   with reader:
     for interval, n_expected in expected:
       with reader.query(interval) as iterable:
         self.assertIsInstance(iterable, clif_postproc.WrappedCppIterable)
         self.assertEqual(test_utils.iterable_len(iterable), n_expected)
Esempio n. 6
0
 def test_query_raises_with_bad_range(self):
     with self.assertRaisesRegexp(ValueError, 'Unknown reference_name'):
         self.samples_reader.query(ranges.parse_literal('XXX:1-10'))
     with self.assertRaisesRegexp(ValueError, 'Malformed region'):
         self.samples_reader.query(ranges.parse_literal('chr1:0-5'))
     with self.assertRaisesRegexp(ValueError, 'Malformed region'):
         self.samples_reader.query(ranges.parse_literal('chr1:6-5'))
     with self.assertRaisesRegexp(ValueError, 'Malformed region'):
         self.samples_reader.query(ranges.parse_literal('chr1:10-5'))
Esempio n. 7
0
 def test_parse_literal_with_contig_map(self, contig_name, expected):
     contig_map = {
         'chr1': reference_pb2.ContigInfo(name='chr1', n_bases=10),
         'chr2': reference_pb2.ContigInfo(name='chr2', n_bases=5),
     }
     self.assertEqual(
         ranges.parse_literal(contig_name, contig_map=contig_map), expected)
Esempio n. 8
0
    def test_vcf_query(self):
        tabix.build_index(self.output_file)
        self.input_reader = vcf.VcfReader(self.input_file)
        self.output_reader = vcf.VcfReader(self.output_file)

        range1 = ranges.parse_literal('chr3:100,000-500,000')
        self.assertEqual(list(self.input_reader.query(range1)),
                         list(self.output_reader.query(range1)))
Esempio n. 9
0
 def test_query_on_unindexed_reader_raises(self):
     window = ranges.parse_literal('chr1:10,000,000-10,000,100')
     unindexed_file = test_utils.genomics_core_testdata('test_samples.vcf')
     with vcf_reader.VcfReader.from_file(unindexed_file,
                                         self.options) as reader:
         with self.assertRaisesRegexp(ValueError,
                                      'Cannot query without an index'):
             reader.query(window)
Esempio n. 10
0
 def test_ops_on_closed_reader_raise(self):
     with self.samples_reader:
         pass
     # At this point the reader is closed.
     with self.assertRaisesRegexp(ValueError, 'Cannot Iterate a closed'):
         self.samples_reader.iterate()
     with self.assertRaisesRegexp(ValueError, 'Cannot Query a closed'):
         self.samples_reader.query(
             ranges.parse_literal('chr1:10,000,000-10,000,100'))
Esempio n. 11
0
  def test_fail_multiple_concurrent_iterations(self):
    range1 = ranges.parse_literal('chr3:100,000-500,000')
    reads = self.samples_reader.query(range1)
    for read in reads:
      pass

    r2 = self.samples_reader.query(range1)
    with self.assertRaisesRegexp(ValueError, 'No underlying iterable. This '):
      next(r2)
Esempio n. 12
0
 def test_query_without_index_raises(self, unindexed_file_name):
     path = test_utils.genomics_core_testdata(unindexed_file_name)
     window = ranges.parse_literal('chr20:10,000,000-10,000,100')
     with sam_reader.SamReader.from_file(reads_path=path,
                                         ref_path='',
                                         options=self.options) as reader:
         with self.assertRaisesRegex(ValueError,
                                     'Cannot query without an index'):
             reader.query(window)
Esempio n. 13
0
 def test_ops_on_closed_reader_raise(self):
     reader = sam_reader.SamReader.from_file(self.bam, self.indexed_options)
     with reader:
         pass
     # At this point the reader is closed.
     with self.assertRaisesRegexp(ValueError, 'Cannot Iterate a closed'):
         reader.iterate()
     with self.assertRaisesRegexp(ValueError, 'Cannot Query a closed'):
         reader.query(ranges.parse_literal('chr20:10,000,000-10,000,100'))
Esempio n. 14
0
 def test_context_manager(self):
   """Test that we can use context manager to do two queries in sequence."""
   reader = sam_reader.SamReader.from_file(self.bam, self.indexed_options)
   region = ranges.parse_literal('chr20:10,000,000-10,000,100')
   with reader:
     with reader.query(region) as query_iterable1:
       self.assertIsNotNone(query_iterable1)
       self.assertIsInstance(query_iterable1, clif_postproc.WrappedCppIterable)
     with reader.query(region) as query_iterable2:
       self.assertIsNotNone(query_iterable2)
       self.assertIsInstance(query_iterable2, clif_postproc.WrappedCppIterable)
Esempio n. 15
0
 def test_downsampling(self, method, maybe_range, fraction, expected_n_reads):
   reader = sam.SamReader(
       test_utils.genomics_core_testdata('test.bam'),
       downsample_fraction=fraction,
       random_seed=12345)
   with reader:
     if method == 'iterate':
       reads_iter = reader.iterate()
     elif method == 'query':
       reads_iter = reader.query(ranges.parse_literal(maybe_range))
     else:
       self.fail('Unexpected method ' + str(method))
     self.assertEqual(test_utils.iterable_len(reads_iter), expected_n_reads)
Esempio n. 16
0
def main(argv):
    if len(argv) != 3:
        print('Usage: {} <input_sam> <chromosome>:<position>'.format(argv[0]))
        sys.exit(-1)
    in_sam = argv[1]
    r = ranges.parse_literal(argv[2])
    position = r.start

    with sam.SamReader(in_sam) as sam_reader:
        reads = sam_reader.query(r)
        pos_seq_pairs = sorted(
            (read.alignment.position.position, read.aligned_sequence)
            for read in reads)
        if not pos_seq_pairs:
            print('No overlapping reads found for', argv[2])
            sys.exit(0)

        left_position = pos_seq_pairs[0][0]
        for start, seq in pos_seq_pairs:
            print_read(left_position, start, position, seq)
Esempio n. 17
0
def ascii_pileup(sam_filename, query):
  """Returns an ASCII pileup image for the query as a list of strings.

  Args:
    sam_filename: The filename of the BAM/SAM file.
    query: String version of range.
  """
  r = ranges.parse_literal(query)
  position = r.start

  with sam.SamReader(sam_filename) as sam_reader:
    reads = sam_reader.query(r)
    pos_seq_pairs = sorted(
        (read.alignment.position.position, read.aligned_sequence)
        for read in reads)
    if not pos_seq_pairs:
      print('No overlapping reads found for', query)
      return []

    left_position = pos_seq_pairs[0][0]
    return [read_str(left_position, start, position, seq)
            for start, seq in pos_seq_pairs]
Esempio n. 18
0
 def test_vcf_query(self):
     range1 = ranges.parse_literal('chr3:100,000-500,000')
     iterable = self.samples_reader.query(range1)
     self.assertEqual(test_utils.iterable_len(iterable), 4)
Esempio n. 19
0
 def test_parse_literal_bad(self, bad_literal):
     with self.assertRaisesRegexp(ValueError, bad_literal):
         ranges.parse_literal(bad_literal)
Esempio n. 20
0
 def test_parse_literal_numerics(self, literal, start_val, end_val):
     self.assertEqual(ranges.parse_literal(literal),
                      ranges.make_range('chr1', start_val, end_val))
Esempio n. 21
0
 def test_query(self, filename, has_embedded_ref):
     with self._make_reader(filename, has_embedded_ref) as reader:
         for interval, n_expected in [('chr1:1-100', 3), ('chr2:1-121', 0)]:
             with reader.query(ranges.parse_literal(interval)) as iterable:
                 self.assertEqual(test_utils.iterable_len(iterable),
                                  n_expected)
Esempio n. 22
0
 def test_query(self, query, expected_variant_indices):
   range1 = ranges.parse_literal(query, ranges.contigs_dict(
       self.header.contigs))
   self.assertEqual(
       list(self.reader.query(range1)),
       [self.variants[i] for i in expected_variant_indices])
Esempio n. 23
0
 def test_query_on_unindexed_reader_raises(self):
     with sam_reader.SamReader.from_file(self.bam, self.options) as reader:
         with self.assertRaisesRegexp(ValueError,
                                      'Cannot query without an index'):
             reader.query(
                 ranges.parse_literal('chr20:10,000,000-10,000,100'))
Esempio n. 24
0
 def test_query_on_unindexed_reader_raises(self):
   with vcf_reader.VcfReader.from_file(self.samples_vcf,
                                       self.unindexed_options) as reader:
     with self.assertRaisesRegexp(ValueError, 'Cannot query without an index'):
       reader.query(ranges.parse_literal('chr1:10,000,000-10,000,100'))
Esempio n. 25
0
 def test_parse_literal_chromosomes(self, chrom):
     self.assertEqual(ranges.parse_literal(chrom + ':1-20'),
                      ranges.make_range(chrom, 0, 20))