def test_invalid_access(): ga = create_genomic_array(GenomicIndexer.create_from_genomesize( {'chr10': 300}), stranded=False, typecode='int8', storage='ndarray') with pytest.raises(Exception): # access only via genomic interval ga[1] with pytest.raises(Exception): # access only via genomic interval and condition ga[1] = 1 ga = create_genomic_array(GenomicIndexer.create_from_genomesize( {'chr10': 300}), stranded=False, typecode='int8', storage='sparse') with pytest.raises(Exception): # access only via genomic interval ga[1] with pytest.raises(Exception): # access only via genomic interval and condition ga[1] = 1
def test_gindexer_short_interval(): data_path = pkg_resources.resource_filename('janggu', 'resources/') gi = GenomicIndexer.create_from_file(os.path.join(data_path, 'sample_equalsize.bed'), binsize=200, stepsize=200) assert len(gi) == 4 gi = GenomicIndexer.create_from_file(os.path.join(data_path, 'sample_equalsize.bed'), binsize=180, stepsize=20) assert len(gi) == 8 gi = GenomicIndexer.create_from_file(os.path.join(data_path, 'sample_equalsize.bed'), binsize=210, stepsize=20, zero_padding=False) assert len(gi) == 0 gi = GenomicIndexer.create_from_file(os.path.join(data_path, 'sample_equalsize.bed'), binsize=210, stepsize=20, zero_padding=True) assert len(gi) == 4
def test_gindexer_merged_variable_length_ranges(): data_path = pkg_resources.resource_filename('janggu', 'resources/') # with fixed size gi = GenomicIndexer.create_from_file(os.path.join(data_path, 'sample.bed'), binsize=3000, stepsize=3000, zero_padding=False) np.testing.assert_equal(len(gi), 6) iv = gi[0] np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand), ('chr1', 15000, 18000, '+')) iv = gi[-1] np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand), ('chr2', 21000, 24000, '-')) # with variable size regions gi = GenomicIndexer.create_from_file(os.path.join(data_path, 'sample.bed'), binsize=3000, stepsize=3000, zero_padding=True) np.testing.assert_equal(len(gi), 8) iv = gi[0] np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand), ('chr1', 15000, 18000, '+')) iv = gi[-1] np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand), ('chr2', 24000, 25000, '-'))
def test_gindexer_errors(): data_path = pkg_resources.resource_filename('janggu', 'resources/') with pytest.raises(ValueError): GenomicIndexer.create_from_file(os.path.join(data_path, 'sample.bed'), binsize=0, stepsize=50) with pytest.raises(ValueError): GenomicIndexer.create_from_file(os.path.join(data_path, 'sample.bed'), binsize=10, stepsize=0) with pytest.raises(ValueError): # due to flank < 0 GenomicIndexer.create_from_file(os.path.join(data_path, 'sample.bed'), binsize=200, stepsize=50, flank=-1) # due to unequal intervals gi = GenomicIndexer.create_from_file(os.path.join(data_path, 'scores.bed'), binsize=None, stepsize=None, flank=0) #print(len(gi)) #for reg in gi: # print(reg) GenomicIndexer.create_from_file(os.path.join(data_path, 'scores.bed'), binsize=200, stepsize=200, flank=0)
def test_tmp_normalization(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath def loading(garray): garray[Interval('chr1', 0, 150), 0] = np.repeat(10, 150).reshape(-1, 1) garray[Interval('chr2', 0, 300), 0] = np.repeat(1, 300).reshape(-1, 1) return garray for store in ['ndarray', 'hdf5']: ga = create_genomic_array(GenomicIndexer.create_from_genomesize({ 'chr1': 150, 'chr2': 300 }), stranded=False, typecode='float32', storage=store, cache="cache_file", resolution=50, loader=loading, collapser='sum', normalizer=['tpm']) np.testing.assert_allclose( ga[Interval('chr1', 100, 101)], np.asarray([[[10 * 1000 / 50 * 1e6 / (720.)]]])) np.testing.assert_allclose( ga[Interval('chr2', 100, 101)], np.asarray([[[1 * 1000 / 50 * 1e6 / (720.)]]]))
def test_bwga_instance_unstranded_taged(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath iv = Interval('chr10', 100, 120, strand='.') ga = create_genomic_array(GenomicIndexer.create_from_genomesize( {'chr10': 300}), stranded=False, typecode='int8', storage='ndarray', datatags='test_bwga_instance_unstranded') with pytest.raises(Exception): # access only via genomic interval ga[1] with pytest.raises(Exception): # access only via genomic interval and condition ga[1] = 1 np.testing.assert_equal(ga[iv].shape, (20, 1, 1)) np.testing.assert_equal(ga[iv], np.zeros((20, 1, 1))) ga[iv, 0] = np.ones((20, 1)) np.testing.assert_equal(ga[iv], np.ones((20, 1, 1))) np.testing.assert_equal(ga[iv].sum(), 20) iv = Interval('chr10', 0, 300, strand='.') np.testing.assert_equal(ga[iv].sum(), 20)
def test_output_bigwig_loss_resolution_unequal_stepsize(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath # generate loss # # resolution < stepsize inputs = Array("x", numpy.random.random((7, 4, 1, 10))) outputs = Array('y', numpy.random.random((7, 4, 1, 4)), conditions=['c1', 'c2', 'c3', 'c4']) bwm = get_janggu(inputs, outputs) data_path = pkg_resources.resource_filename('janggu', 'resources/10regions.bed') gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=50) dummy_eval = Scorer('loss', lambda t, p: [0.2] * len(t), exporter=ExportBigwig(gindexer=gi)) bwm.evaluate(inputs, outputs, callbacks=[dummy_eval]) file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name, 'loss.{}.bigwig') for cond in ['c1', 'c2', 'c3', 'c4']: assert os.path.exists(file_.format(cond)) bw = pyBigWig.open(file_.format('c1')) co = bw.values('chr1', 600, 2000-150) numpy.testing.assert_allclose(numpy.mean(co), 0.2, rtol=1e-5)
def test_janggu_variant_prediction(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath """Test Janggu creation by shape and name. """ data_path = pkg_resources.resource_filename('janggu', 'resources/') for order in [1, 2, 3]: refgenome = os.path.join(data_path, 'sample_genome.fa') vcffile = os.path.join(data_path, 'sample.vcf') dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome, storage='ndarray', binsize=50, store_whole_genome=True, order=order) def _cnn_model(inputs, inp, oup, params): inputs = Input( (50 - params['order'] + 1, 1, pow(4, params['order']))) layer = Flatten()(inputs) layer = Dense(params['hiddenunits'])(layer) output = Dense(4, activation='sigmoid')(layer) return inputs, output model = Janggu.create(_cnn_model, modelparams={ 'hiddenunits': 2, 'order': order }, name='dna_ctcf_HepG2-cnn') model.predict_variant_effect( dna, vcffile, conditions=['m' + str(i) for i in range(4)], output_folder=os.path.join(os.environ['JANGGU_OUTPUT'])) assert os.path.exists( os.path.join(os.environ['JANGGU_OUTPUT'], 'scores.hdf5')) assert os.path.exists( os.path.join(os.environ['JANGGU_OUTPUT'], 'snps.bed.gz')) f = h5py.File(os.path.join(os.environ['JANGGU_OUTPUT'], 'scores.hdf5'), 'r') gindexer = GenomicIndexer.create_from_file( os.path.join(os.environ['JANGGU_OUTPUT'], 'snps.bed.gz'), None, None) cov = Cover.create_from_array('snps', f['diffscore'], gindexer, store_whole_genome=True) print(cov['chr2', 55, 65].shape) print(cov['chr2', 55, 65]) assert np.abs(cov['chr2', 59, 60]).sum() > 0.0 assert np.abs(cov['chr2', 54, 55]).sum() == 0.0 f.close()
def test_resolution_negative(): with pytest.raises(Exception): ga = create_genomic_array(GenomicIndexer.create_from_genomesize( {'chr10': 300}), stranded=True, typecode='int8', storage='ndarray', cache=False, resolution=-1)
def test_invalid_storage(): with pytest.raises(Exception): ga = create_genomic_array(GenomicIndexer.create_from_genomesize( {'chr10': 300}), stranded=True, typecode='int8', storage='storgae', resolution=1, cache=False)
def test_create_from_array(tmpdir): inbed = resource_filename('janggu', 'resources/bed_test.bed') outbed = os.path.join(tmpdir.strpath, 'out.bed') trim_bed(inbed, outbed, 5) # original file gindexer = GenomicIndexer.create_from_file(inbed, None, None) reg = gindexer[0] assert (reg.start % 5) == 0 assert (reg.end % 5) > 0 # trimmed file gindexer = GenomicIndexer.create_from_file(outbed, None, None) gindexer = GenomicIndexer.create_from_file(outbed, None, None) reg = gindexer[0] assert (reg.start % 5) == 0 assert (reg.end % 5) == 0
def test_dna_dims_order_1_from_reference(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath order = 1 data_path = pkg_resources.resource_filename('janggu', 'resources/') bed_merged = os.path.join(data_path, 'sample.gtf') refgenome = os.path.join(data_path, 'sample_genome.fa') gindexer = GenomicIndexer.create_from_file(bed_merged, 200, 200) data = Bioseq.create_from_refgenome('train', refgenome=refgenome, storage='ndarray', order=order, store_whole_genome=True) data.gindexer = gindexer assert len(data.garray.handle) == 2 assert 'chr1' in data.garray.handle assert 'chr2' in data.garray.handle # for order 1 assert len(data) == 100 assert data.shape == (100, 200, 1, 4) # the correctness of the sequence extraction was also # validated using: # bedtools getfasta -fi sample_genome.fa -bed sample.bed # >chr1:15000-25000 # ATTGTGGTGA... # this sequence is read from the forward strand np.testing.assert_equal(data[0][0, :10, 0, :], np.asarray([[1, 0, 0, 0], # A [0, 0, 0, 1], # T [0, 0, 0, 1], # T [0, 0, 1, 0], # C [0, 0, 0, 1], # T [0, 0, 1, 0], # G [0, 0, 1, 0], # G [0, 0, 0, 1], # T [0, 0, 1, 0], # G [1, 0, 0, 0]], # A dtype='int8')) # bedtools getfasta -fi sample_genome.fa -bed sample.bed # >chr2:15000-25000 # ggggaagcaa... # this sequence is read from the reverse strand # so we have ...ttgcttcccc np.testing.assert_equal(data[50][0, -10:, 0, :], np.asarray([[0, 0, 0, 1], # T [0, 0, 0, 1], # T [0, 0, 1, 0], # G [0, 1, 0, 0], # C [0, 0, 0, 1], # T [0, 0, 0, 1], # T [0, 1, 0, 0], # C [0, 1, 0, 0], # C [0, 1, 0, 0], # C [0, 1, 0, 0]], # C dtype='int8'))
def test_hdf5_no_cache(): with pytest.raises(Exception): # cache must be True ga = create_genomic_array(GenomicIndexer.create_from_genomesize( {'chr10': 300}), stranded=True, typecode='int8', storage='hdf5', cache=None)
def test_gindexer_short_interval_with_dataframe(): data_path = pkg_resources.resource_filename('janggu', 'resources/') df = pd.read_csv(os.path.join(data_path, 'sample_equalsize.bed'), sep='\t', header=None, names=['chrom', 'start', 'end']) gi = GenomicIndexer.create_from_file(df, binsize=200, stepsize=200) assert len(gi) == 4 gi = GenomicIndexer.create_from_file(df, binsize=180, stepsize=20) assert len(gi) == 8 gi = GenomicIndexer.create_from_file(df, binsize=210, stepsize=20, zero_padding=False) assert len(gi) == 0 gi = GenomicIndexer.create_from_file(df, binsize=210, stepsize=20, zero_padding=True) assert len(gi) == 4
def test_bwga_instance_unstranded(tmpdir): iv = Interval('chr10', 100, 120, strand='.') ga = create_genomic_array(GenomicIndexer.create_from_genomesize( {'chr10': 300}), stranded=False, typecode='int8', storage='ndarray', cache=False) np.testing.assert_equal(ga[iv].shape, (20, 1, 1)) np.testing.assert_equal(ga[iv], np.zeros((20, 1, 1))) ga[iv, 0] = np.ones((20, 1)) np.testing.assert_equal(ga[iv], np.ones((20, 1, 1))) np.testing.assert_equal(ga[iv].sum(), 20) iv = Interval('chr10', 0, 300, strand='.') np.testing.assert_equal(ga[iv].sum(), 20)
def test_gindexer_merged(): data_path = pkg_resources.resource_filename('janggu', 'resources/') gi = GenomicIndexer.create_from_file(os.path.join(data_path, 'sample.bed'), binsize=200, stepsize=200) np.testing.assert_equal(len(gi), 100) gi2 = gi.filter_by_region(include='chr1') gi3 = gi.filter_by_region(include='chr10') gi4 = gi.filter_by_region(exclude='chr2') gi5 = gi.filter_by_region(exclude='chr10') np.testing.assert_equal(len(gi2), 50) np.testing.assert_equal(len(gi3), 0) np.testing.assert_equal(len(gi4), 50) np.testing.assert_equal(len(gi5), 100)
def test_output_bed_loss_resolution_equal_stepsize(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath # generate loss # # resolution < stepsize inputs = Array("x", numpy.random.random((7, 1, 1, 10))) outputs = Array('y', numpy.random.random((7, 1, 1, 4)), conditions=['c1', 'c2', 'c3', 'c4']) bwm = get_janggu_conv(inputs, outputs) data_path = pkg_resources.resource_filename('janggu', 'resources/10regions.bed') gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=200) dummy_eval = Scorer('loss', lambda t, p: [0.1] * len(t), exporter=export_bed) bwm.evaluate(inputs, outputs, callbacks=[dummy_eval], exporter_kwargs={ 'gindexer': gi, 'resolution': 200 }) file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name, 'loss.nptest.y.{}.bed') for cond in ['c1', 'c2', 'c3', 'c4']: assert os.path.exists(file_.format(cond)) bed = iter(HTSeq.BED_Reader(file_.format('c1'))) nreg = 0 for reg in bed: numpy.testing.assert_equal(reg.score, 0.1) nreg += 1 # numpy.testing.assert_equal(breg.score, value) assert nreg == 7, 'There should be 7 regions in the bed file.'
def test_bwga_instance_stranded(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath iv = Interval('chr10', 100, 120, strand='+') ga = create_genomic_array(GenomicIndexer.create_from_genomesize( {'chr10': 300}), stranded=True, typecode='int8', storage='ndarray') np.testing.assert_equal(ga[iv].shape, (20, 2, 1)) np.testing.assert_equal(ga[iv], np.zeros((20, 2, 1))) x = np.zeros((20, 2, 1)) x[:, :1, :] = 1 ga[iv, 0] = x[:, :, 0] np.testing.assert_equal(ga[iv], x) np.testing.assert_equal(ga[iv].sum(), 20) iv = Interval('chr10', 0, 300) np.testing.assert_equal(ga[iv].sum(), 20)
def test_output_bed_loss_resolution_unequal_stepsize(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath # generate loss # # resolution < stepsize inputs = Array("x", numpy.random.random((7, 4, 1, 10))) outputs = Array('y', numpy.random.random((7, 4, 1, 4)), conditions=['c1', 'c2', 'c3', 'c4']) bwm = get_janggu(inputs, outputs) data_path = pkg_resources.resource_filename('janggu', 'resources/10regions.bed') gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=200) # dummy_eval = Scorer('loss', lambda t, p: -t * numpy.log(p), # exporter=export_bed, export_args={'gindexer': gi}) dummy_eval = Scorer('loss', lambda t, p: [0.1] * len(t), exporter=ExportBed(gindexer=gi, resolution=50)) bwm.evaluate(inputs, outputs, callbacks=[dummy_eval]) file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name, 'loss.{}.bed') for cond in ['c1', 'c2', 'c3', 'c4']: assert os.path.exists(file_.format(cond)) bed = BedTool(file_.format('c1')) nreg = 0 for reg in bed: numpy.testing.assert_equal(float(reg.score), 0.1) nreg += 1 assert nreg == 28, 'There should be 28 regions in the bed file.'
def test_perctrim(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath def loading(garray): garray[Interval('chr1', 0, 150), 0] = np.random.normal(loc=10, size=150).reshape(-1, 1) garray[Interval('chr2', 0, 300), 0] = np.random.normal(loc=100, size=300).reshape(-1, 1) return garray for store in ['ndarray', 'hdf5']: ga = create_genomic_array(GenomicIndexer.create_from_genomesize({ 'chr1': 150, 'chr2': 300 }), stranded=False, typecode='float32', storage=store, cache="cache_file", loader=loading, normalizer=['binsizenorm', 'perctrim'])
def test_zscore_normalization(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath def loading(garray): garray[Interval('chr1', 0, 150), 0] = np.repeat(1, 150).reshape(-1, 1) garray[Interval('chr2', 0, 300), 0] = np.repeat(-1, 300).reshape(-1, 1) return garray for store in ['ndarray', 'hdf5']: ga = create_genomic_array(GenomicIndexer.create_from_genomesize({ 'chr1': 150, 'chr2': 300 }), stranded=False, typecode='float32', storage=store, cache=True, loader=loading, normalizer=['zscore']) np.testing.assert_allclose(ga.weighted_mean(), np.asarray([0.0]), rtol=1e-5, atol=1e-5) np.testing.assert_allclose(ga.weighted_sd(), np.asarray([1.]), rtol=1e-5, atol=1e-5) np.testing.assert_allclose(ga[Interval('chr1', 100, 101)], np.asarray([[[1.412641340027806]]]), rtol=1e-5, atol=1e-5) np.testing.assert_allclose(ga[Interval('chr2', 100, 101)], np.asarray([[[-0.706320670013903]]]), rtol=1e-5, atol=1e-5)
def test_output_bed_predict_denseout(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath # generate loss # # resolution < stepsize inputs = Array("x", numpy.random.random((7, 10))) outputs = Array('y', numpy.random.random((7, 4)), conditions=['c1', 'c2', 'c3', 'c4']) bwm = get_janggu(inputs, outputs) data_path = pkg_resources.resource_filename('janggu', 'resources/10regions.bed') gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=200) dummy_eval = Scorer('pred', lambda p: [0.1] * len(p), exporter=ExportBed(gindexer=gi, resolution=200), conditions=['c1', 'c2', 'c3', 'c4']) bwm.predict(inputs, callbacks=[dummy_eval]) file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name, 'pred.nptest.y.{}.bed') for cond in ['c1', 'c2', 'c3', 'c4']: assert os.path.exists(file_.format(cond)) bed = iter(HTSeq.BED_Reader(file_.format('c1'))) nreg = 0 for reg in bed: numpy.testing.assert_equal(reg.score, 0.1) nreg += 1 assert nreg == 7, 'There should be 7 regions in the bed file.'
def test_output_bigwig_predict_denseout(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath # generate loss # # resolution < stepsize inputs = Array("x", numpy.random.random((7, 10))) outputs = Array('y', numpy.random.random((7, 4)), conditions=['c1', 'c2', 'c3', 'c4']) bwm = get_janggu(inputs, outputs) data_path = pkg_resources.resource_filename('janggu', 'resources/10regions.bed') gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=200) dummy_eval = Scorer('pred', lambda p: [0.1] * len(p), exporter=export_bigwig, conditions=['c1', 'c2', 'c3', 'c4']) bwm.predict(inputs, callbacks=[dummy_eval], exporter_kwargs={'gindexer': gi}) file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name, 'pred.nptest.y.{}.bigwig') for cond in ['c1', 'c2', 'c3', 'c4']: assert os.path.exists(file_.format(cond)) bw = pyBigWig.open(file_.format('c1')) co = bw.values('chr1', 600, 2000) numpy.testing.assert_allclose(numpy.mean(co), 0.1, rtol=1e-5)
def test_check_resolution_collapse_compatibility(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath def loading(garray): garray[Interval('chr1', 0, 150), 0] = np.repeat(10, 150).reshape(-1, 1) garray[Interval('chr2', 0, 300), 0] = np.repeat(1, 300).reshape(-1, 1) return garray with pytest.raises(Exception): # Error because resolution=50 but no collapser defined ga = create_genomic_array(GenomicIndexer.create_from_genomesize({ 'chr1': 150, 'chr2': 300 }), stranded=False, typecode='float32', storage="ndarray", cache=None, resolution=50, loader=loading, collapser=None, normalizer=['tpm']) with pytest.raises(Exception): # Error because resolution=None but no collapser defined ga = create_genomic_array(GenomicIndexer.create_from_genomesize({ 'chr1': 150, 'chr2': 300 }), stranded=False, typecode='float32', storage="ndarray", cache=None, resolution=None, loader=loading, collapser=None, normalizer=['tpm']) ga = create_genomic_array(GenomicIndexer.create_from_file( [ Interval('chr1', 0, 150), Interval('chr2', 0, 150), Interval('chr2', 150, 300) ], binsize=150, stepsize=None, ), stranded=False, typecode='float32', storage="ndarray", cache=None, resolution=1, loader=loading) ga = create_genomic_array(GenomicIndexer.create_from_file( [Interval('chr1', 0, 150), Interval('chr2', 0, 300)], binsize=None, stepsize=None, collapse=True), stranded=False, typecode='float32', storage="ndarray", cache='test', resolution=None, loader=loading, store_whole_genome=None, collapser='sum') ga = create_genomic_array(GenomicIndexer.create_from_file( [Interval('chr1', 0, 150), Interval('chr2', 0, 300)], binsize=None, stepsize=None, collapse=True), stranded=False, typecode='float32', storage="ndarray", cache=None, resolution=None, loader=loading, collapser='sum', normalizer=['tpm'])
os.makedirs(vcfoutput, exist_ok=True) # perform variant effect prediction using Bioseq object and # a VCF file scoresfile, variantsfile = model.predict_variant_effect( DNA, VCFFILE, conditions=['feature'], output_folder=vcfoutput) scoresfile = os.path.join(vcfoutput, 'scores.hdf5') variantsfile = os.path.join(vcfoutput, 'snps.bed.gz') # parse the variant effect predictions (difference between # reference and alternative variant) into a Cover object # for the purpose of visualization f = h5py.File(scoresfile, 'r') gindexer = GenomicIndexer.create_from_file(variantsfile, None, None) snpcov = Cover.create_from_array('snps', f['diffscore'], gindexer, store_whole_genome=True, padding_value=np.nan) snpcov = Cover.create_from_array('snps', f['diffscore'], gindexer, store_whole_genome=False, padding_value=np.nan) gi = DNA.gindexer[3] chrom = gi.chrom start = gi.start
def test_filter_by_region(): roi_file = pkg_resources.resource_filename('janggu', 'resources/bed_test.bed') f1 = GenomicIndexer.create_from_file(regions=roi_file, binsize=2, stepsize=2) np.testing.assert_equal(len(f1), 9) j = "" for i in f1: j += str(i) + "\n" prv = "chr1:[0,2)/+\n" \ "chr1:[2,4)/+\n" \ "chr1:[4,6)/+\n" \ "chr1:[6,8)/+\n" \ "chr1:[8,10)/+\n" \ "chr1:[10,12)/+\n" \ "chr1:[12,14)/+\n" \ "chr1:[14,16)/+\n" \ "chr1:[16,18)/+\n" np.testing.assert_equal(j, prv) test1 = f1.filter_by_region(include='chr1', start=0, end=18) k = "" for i in test1: k += str(i) + "\n" np.testing.assert_equal(j, k) test2 = f1.filter_by_region(include='chr1', start=5, end=10) z = "" for i in test2: z += str(i) + "\n" prv2 = "chr1:[4,6)/+\n" \ "chr1:[6,8)/+\n" \ "chr1:[8,10)/+\n" np.testing.assert_equal(z, prv2) test3 = f1.filter_by_region(include='chr1', start=5, end=11) q = "" for i in test3: q += str(i) + "\n" prv3 = "chr1:[4,6)/+\n" \ "chr1:[6,8)/+\n" \ "chr1:[8,10)/+\n" \ "chr1:[10,12)/+\n" np.testing.assert_equal(q, prv3) test4 = f1.filter_by_region(include='chr1', start=6, end=10) z1 = "" for i in test4: z1 += str(i) + "\n" prv4 = "chr1:[6,8)/+\n" \ "chr1:[8,10)/+\n" np.testing.assert_equal(z1, prv4) test5 = f1.filter_by_region(include='chr1', start=6, end=11) q1 = "" for i in test5: q1 += str(i) + "\n" prv5 = "chr1:[6,8)/+\n" \ "chr1:[8,10)/+\n" \ "chr1:[10,12)/+\n" np.testing.assert_equal(q1, prv5) test6 = f1.filter_by_region(include='chr1', start=20, end=30) np.testing.assert_equal(len(test6), 0)