def test_parse_literal_one_bp(self): self.assertEqual(ranges.parse_literal('1:10'), ranges.make_range('1', 9, 10)) self.assertEqual(ranges.parse_literal('1:100'), ranges.make_range('1', 99, 100)) self.assertEqual(ranges.parse_literal('1:1,000'), ranges.make_range('1', 999, 1000))
def test_query_raises_with_bad_range(self): with sam_reader.SamReader.from_file(self.bam, self.indexed_options) as reader: with self.assertRaisesRegexp(ValueError, 'Unknown reference_name'): reader.query(ranges.parse_literal('XXX:1-10')) with self.assertRaisesRegexp(ValueError, 'unknown reference interval'): reader.query(ranges.parse_literal('chr20:10-5'))
def test_sam_query(self): reader = sam.SamReader(test_utils.genomics_core_testdata('test.bam')) expected = [(ranges.parse_literal('chr20:10,000,000-10,000,100'), 106), (ranges.parse_literal('chr20:10,000,000-10,000,000'), 45)] with reader: for interval, n_expected in expected: with reader.query(interval) as iterable: self.assertEqual(test_utils.iterable_len(iterable), n_expected)
def test_parse_literal_with_contig_map_and_bad_input_raises_exception( self, bad_literal): with self.assertRaises(ValueError): ranges.parse_literal( bad_literal, contig_map={ 'chr1': reference_pb2.ContigInfo(name='chr1', n_bases=10) })
def test_bam_query(self): reader = sam_reader.SamReader.from_file(self.bam, self.indexed_options) expected = [(ranges.parse_literal('chr20:10,000,000-10,000,100'), 106), (ranges.parse_literal('chr20:10,000,000-10,000,000'), 45)] with reader: for interval, n_expected in expected: with reader.query(interval) as iterable: self.assertIsInstance(iterable, clif_postproc.WrappedCppIterable) self.assertEqual(test_utils.iterable_len(iterable), n_expected)
def test_query_raises_with_bad_range(self): with self.assertRaisesRegexp(ValueError, 'Unknown reference_name'): self.samples_reader.query(ranges.parse_literal('XXX:1-10')) with self.assertRaisesRegexp(ValueError, 'Malformed region'): self.samples_reader.query(ranges.parse_literal('chr1:0-5')) with self.assertRaisesRegexp(ValueError, 'Malformed region'): self.samples_reader.query(ranges.parse_literal('chr1:6-5')) with self.assertRaisesRegexp(ValueError, 'Malformed region'): self.samples_reader.query(ranges.parse_literal('chr1:10-5'))
def test_parse_literal_with_contig_map(self, contig_name, expected): contig_map = { 'chr1': reference_pb2.ContigInfo(name='chr1', n_bases=10), 'chr2': reference_pb2.ContigInfo(name='chr2', n_bases=5), } self.assertEqual( ranges.parse_literal(contig_name, contig_map=contig_map), expected)
def test_vcf_query(self): tabix.build_index(self.output_file) self.input_reader = vcf.VcfReader(self.input_file) self.output_reader = vcf.VcfReader(self.output_file) range1 = ranges.parse_literal('chr3:100,000-500,000') self.assertEqual(list(self.input_reader.query(range1)), list(self.output_reader.query(range1)))
def test_query_on_unindexed_reader_raises(self): window = ranges.parse_literal('chr1:10,000,000-10,000,100') unindexed_file = test_utils.genomics_core_testdata('test_samples.vcf') with vcf_reader.VcfReader.from_file(unindexed_file, self.options) as reader: with self.assertRaisesRegexp(ValueError, 'Cannot query without an index'): reader.query(window)
def test_ops_on_closed_reader_raise(self): with self.samples_reader: pass # At this point the reader is closed. with self.assertRaisesRegexp(ValueError, 'Cannot Iterate a closed'): self.samples_reader.iterate() with self.assertRaisesRegexp(ValueError, 'Cannot Query a closed'): self.samples_reader.query( ranges.parse_literal('chr1:10,000,000-10,000,100'))
def test_fail_multiple_concurrent_iterations(self): range1 = ranges.parse_literal('chr3:100,000-500,000') reads = self.samples_reader.query(range1) for read in reads: pass r2 = self.samples_reader.query(range1) with self.assertRaisesRegexp(ValueError, 'No underlying iterable. This '): next(r2)
def test_query_without_index_raises(self, unindexed_file_name): path = test_utils.genomics_core_testdata(unindexed_file_name) window = ranges.parse_literal('chr20:10,000,000-10,000,100') with sam_reader.SamReader.from_file(reads_path=path, ref_path='', options=self.options) as reader: with self.assertRaisesRegex(ValueError, 'Cannot query without an index'): reader.query(window)
def test_ops_on_closed_reader_raise(self): reader = sam_reader.SamReader.from_file(self.bam, self.indexed_options) with reader: pass # At this point the reader is closed. with self.assertRaisesRegexp(ValueError, 'Cannot Iterate a closed'): reader.iterate() with self.assertRaisesRegexp(ValueError, 'Cannot Query a closed'): reader.query(ranges.parse_literal('chr20:10,000,000-10,000,100'))
def test_context_manager(self): """Test that we can use context manager to do two queries in sequence.""" reader = sam_reader.SamReader.from_file(self.bam, self.indexed_options) region = ranges.parse_literal('chr20:10,000,000-10,000,100') with reader: with reader.query(region) as query_iterable1: self.assertIsNotNone(query_iterable1) self.assertIsInstance(query_iterable1, clif_postproc.WrappedCppIterable) with reader.query(region) as query_iterable2: self.assertIsNotNone(query_iterable2) self.assertIsInstance(query_iterable2, clif_postproc.WrappedCppIterable)
def test_downsampling(self, method, maybe_range, fraction, expected_n_reads): reader = sam.SamReader( test_utils.genomics_core_testdata('test.bam'), downsample_fraction=fraction, random_seed=12345) with reader: if method == 'iterate': reads_iter = reader.iterate() elif method == 'query': reads_iter = reader.query(ranges.parse_literal(maybe_range)) else: self.fail('Unexpected method ' + str(method)) self.assertEqual(test_utils.iterable_len(reads_iter), expected_n_reads)
def main(argv): if len(argv) != 3: print('Usage: {} <input_sam> <chromosome>:<position>'.format(argv[0])) sys.exit(-1) in_sam = argv[1] r = ranges.parse_literal(argv[2]) position = r.start with sam.SamReader(in_sam) as sam_reader: reads = sam_reader.query(r) pos_seq_pairs = sorted( (read.alignment.position.position, read.aligned_sequence) for read in reads) if not pos_seq_pairs: print('No overlapping reads found for', argv[2]) sys.exit(0) left_position = pos_seq_pairs[0][0] for start, seq in pos_seq_pairs: print_read(left_position, start, position, seq)
def ascii_pileup(sam_filename, query): """Returns an ASCII pileup image for the query as a list of strings. Args: sam_filename: The filename of the BAM/SAM file. query: String version of range. """ r = ranges.parse_literal(query) position = r.start with sam.SamReader(sam_filename) as sam_reader: reads = sam_reader.query(r) pos_seq_pairs = sorted( (read.alignment.position.position, read.aligned_sequence) for read in reads) if not pos_seq_pairs: print('No overlapping reads found for', query) return [] left_position = pos_seq_pairs[0][0] return [read_str(left_position, start, position, seq) for start, seq in pos_seq_pairs]
def test_vcf_query(self): range1 = ranges.parse_literal('chr3:100,000-500,000') iterable = self.samples_reader.query(range1) self.assertEqual(test_utils.iterable_len(iterable), 4)
def test_parse_literal_bad(self, bad_literal): with self.assertRaisesRegexp(ValueError, bad_literal): ranges.parse_literal(bad_literal)
def test_parse_literal_numerics(self, literal, start_val, end_val): self.assertEqual(ranges.parse_literal(literal), ranges.make_range('chr1', start_val, end_val))
def test_query(self, filename, has_embedded_ref): with self._make_reader(filename, has_embedded_ref) as reader: for interval, n_expected in [('chr1:1-100', 3), ('chr2:1-121', 0)]: with reader.query(ranges.parse_literal(interval)) as iterable: self.assertEqual(test_utils.iterable_len(iterable), n_expected)
def test_query(self, query, expected_variant_indices): range1 = ranges.parse_literal(query, ranges.contigs_dict( self.header.contigs)) self.assertEqual( list(self.reader.query(range1)), [self.variants[i] for i in expected_variant_indices])
def test_query_on_unindexed_reader_raises(self): with sam_reader.SamReader.from_file(self.bam, self.options) as reader: with self.assertRaisesRegexp(ValueError, 'Cannot query without an index'): reader.query( ranges.parse_literal('chr20:10,000,000-10,000,100'))
def test_query_on_unindexed_reader_raises(self): with vcf_reader.VcfReader.from_file(self.samples_vcf, self.unindexed_options) as reader: with self.assertRaisesRegexp(ValueError, 'Cannot query without an index'): reader.query(ranges.parse_literal('chr1:10,000,000-10,000,100'))
def test_parse_literal_chromosomes(self, chrom): self.assertEqual(ranges.parse_literal(chrom + ':1-20'), ranges.make_range(chrom, 0, 20))