def test_iterate_exclude_filters(self): reads = Reads(self.sam1_file) reads.filter_unmapped(queue=True) reads.filter_quality(35, queue=True) reads.filter_non_unique(strict=True, queue=True) reads.run_queued_filters() assert len( list( reads.reads( excluded_filters=['unmapped', 'mapq', 'uniqueness' ]))) == 271 assert len( list(reads.reads( excluded_filters=['unmapped', 'uniqueness']))) == 246 assert len(list( reads.reads(excluded_filters=['unmapped', 'mapq']))) == 153 assert len(list( reads.reads(excluded_filters=['mapq', 'uniqueness']))) == 271 assert len(list(reads.reads(excluded_filters=['unmapped']))) == 153 assert len(list(reads.reads(excluded_filters=['mapq']))) == 153 assert len(list(reads.reads(excluded_filters=['uniqueness']))) == 246 reads.close()
class TestAccessOptimisedReadPairs(TestFragmentMappedReadPairs): @classmethod def setup_method(self, method): self.dir = os.path.dirname(os.path.realpath(__file__)) sam1_file = self.dir + "/test_seq/lambda_reads1.sam" sam2_file = self.dir + "/test_seq/lambda_reads2.sam" self.reads1 = Reads(sam1_file) self.reads2 = Reads(sam2_file) self.reads1.filter_unmapped() self.reads2.filter_unmapped() self.genome = Genome.from_folder(self.dir + "/test_seq/lambda_genome/") self.pairs_class = AccessOptimisedReadPairs self.pairs = self.pairs_class() regions = self.genome.get_regions(1000) self.pairs.load(self.reads1, self.reads2, regions=regions) regions.close() def teardown_method(self, method): self.reads1.close() self.reads2.close() self.genome.close() self.pairs.close()
class TestBWAFragmentMappedReads: def setup_method(self, method): self.dir = os.path.dirname(os.path.realpath(__file__)) sam1_file = self.dir + "/test_seq/test_bwa1.sam" sam2_file = self.dir + "/test_seq/test_bwa2.sam" self.reads1 = Reads() self.reads2 = Reads() self.reads1.load(sam1_file) self.reads2.load(sam2_file) self.reads1.filter_unmapped() self.reads2.filter_unmapped() self.genome = Genome.from_folder(self.dir + "/test_seq/dmel_genome/") self.pairs = FragmentMappedReadPairs() regions = self.genome.get_regions('MboI') self.pairs.load(self.reads1, self.reads2, regions=regions) regions.close() def teardown_method(self, method): self.reads1.close() self.reads2.close() self.genome.close() self.pairs.close() def test_loaded_bwamem_pairs(self): assert self.pairs._single_count == 896 assert self.pairs._pair_count == 515 def test_pcr_duplicate_filter(self): mask = self.pairs.add_mask_description( 'pcr_duplicate', 'Mask suspected PCR duplicates.') pcr_duplicate_filter = PCRDuplicateFilter(pairs=self.pairs, threshold=3) assert len(self.pairs) == 515 self.pairs.filter(pcr_duplicate_filter) assert len(self.pairs) == 512
class TestFragmentMappedReadPairs: @classmethod def setup_method(self, method): self.dir = os.path.dirname(os.path.realpath(__file__)) sam1_file = self.dir + "/test_seq/lambda_reads1.sam" sam2_file = self.dir + "/test_seq/lambda_reads2.sam" self.reads1 = Reads(sam1_file) self.reads2 = Reads(sam2_file) self.reads1.filter_unmapped() self.reads2.filter_unmapped() self.genome = Genome.from_folder(self.dir + "/test_seq/lambda_genome/") self.pairs_class = FragmentMappedReadPairs self.pairs = self.pairs_class() regions = self.genome.get_regions(1000) self.pairs.load(self.reads1, self.reads2, regions=regions) regions.close() def teardown_method(self, method): self.reads1.close() self.reads2.close() self.genome.close() self.pairs.close() def test_select(self): pair = self.pairs[0] assert isinstance(pair, FragmentReadPair) assert pair.left.position == 40884 assert pair.right.position == 41039 assert pair.left.strand == 1 assert pair.right.strand == -1 assert isinstance(pair.left.fragment, GenomicRegion) assert isinstance(pair.right.fragment, GenomicRegion) assert pair.left.fragment.start == 40001 assert pair.left.fragment.end == 41000 assert pair.left.fragment.chromosome == 'gi|9626243|ref|NC_001416.1|' assert pair.right.fragment.start == 41001 assert pair.right.fragment.end == 42000 assert pair.right.fragment.chromosome == 'gi|9626243|ref|NC_001416.1|' pair = self.pairs[1] assert isinstance(pair, FragmentReadPair) assert pair.left.position == 19617 assert pair.right.position == 19736 assert pair.left.strand == 1 assert pair.right.strand == -1 assert isinstance(pair.left.fragment, GenomicRegion) assert isinstance(pair.right.fragment, GenomicRegion) assert pair.left.fragment.start == 19001 assert pair.left.fragment.end == 20000 assert pair.left.fragment.chromosome == 'gi|9626243|ref|NC_001416.1|' assert pair.right.fragment.start == 19001 assert pair.right.fragment.end == 20000 assert pair.right.fragment.chromosome == 'gi|9626243|ref|NC_001416.1|' pair = self.pairs[-1] assert isinstance(pair, FragmentReadPair) assert pair.left.position == 25765 assert pair.right.position == 25622 assert pair.left.strand == -1 assert pair.right.strand == 1 assert isinstance(pair.left.fragment, GenomicRegion) assert isinstance(pair.right.fragment, GenomicRegion) assert pair.left.fragment.start == 25001 assert pair.left.fragment.end == 26000 assert pair.left.fragment.chromosome == 'gi|9626243|ref|NC_001416.1|' assert pair.right.fragment.start == 25001 assert pair.right.fragment.end == 26000 assert pair.right.fragment.chromosome == 'gi|9626243|ref|NC_001416.1|' def test_iter(self): for pair in self.pairs: assert isinstance(pair, FragmentReadPair) assert isinstance(pair.left, FragmentRead) assert isinstance(pair.right, FragmentRead) assert isinstance(pair.left.fragment, GenomicRegion) assert isinstance(pair.right.fragment, GenomicRegion) assert pair.left.position > 0 or pair.left.position == -1 assert pair.right.position > 0 or pair.right.position == -1 assert pair.left.strand == -1 or pair.left.strand == 1 assert pair.right.strand == -1 or pair.right.strand == 1 assert 0 < pair.left.fragment.start <= pair.left.fragment.end assert 0 < pair.right.fragment.start <= pair.right.fragment.end if pair.left.position > 0: assert pair.left.fragment.start <= pair.left.position <= pair.left.fragment.end if pair.right.position > 0: assert pair.right.fragment.start <= pair.right.position <= pair.right.fragment.end def test_len(self): assert len(self.pairs) == 44 def test_single(self): assert len(self.pairs._single) == 6 def test_auto_mindist(self): ad = self.pairs_class._auto_dist np.random.seed(101) x = np.linspace(1, 100, 10) i = [ 4.0, 3.0, 1.0, 1.2, 0.4, 0.8, 0.5, 0.4, 0.6, 0.5, 0.6, 0.5, 0.5, 0.5 ] o = [ 0.2, 0.4, 0.3, 0.6, 0.5, 0.4, 0.5, 0.5, 0.6, 0.5, 0.5, 0.4, 0.5, 0.5 ] b = np.random.normal(500, 200, 14).astype(int) assert ad(x, i, b, 0.05) == 67 assert ad(x, o, b, 0.05) == 45 def test_filter_inward(self): mask = self.pairs.add_mask_description( 'inwards', 'Mask read pairs that inward facing and closer than 100bp') in_filter = InwardPairsFilter(minimum_distance=100, mask=mask) assert len(self.pairs) == 44 self.pairs.filter(in_filter) assert len(self.pairs) == 18 def test_filter_outward(self): mask = self.pairs.add_mask_description( 'outwards', 'Mask read pairs that outward facing and closer than 100bp') out_filter = OutwardPairsFilter(minimum_distance=100, mask=mask) assert len(self.pairs) == 44 self.pairs.filter(out_filter) assert len(self.pairs) == 28 def test_filter_redist(self): mask = self.pairs.add_mask_description( 're-dist', 'Mask read pairs where one half maps more than 100bp away from both RE sites' ) re_filter = ReDistanceFilter(maximum_distance=300, mask=mask) assert len(self.pairs) == 44 self.pairs.filter(re_filter) assert len(self.pairs) == 13 def test_filter_self_ligated(self): mask = self.pairs.add_mask_description( 'self_ligated', 'Mask read pairs that represent self-ligated fragments') self_ligation_filter = SelfLigationFilter(mask=mask) assert len(self.pairs) == 44 self.pairs.filter(self_ligation_filter) assert len(self.pairs) == 7 def test_get_ligation_structure_biases(self): reads1 = Reads(self.dir + "/../data/test_genomic/yeast.sample.chrI.1.sam") reads2 = Reads(self.dir + "/../data/test_genomic/yeast.sample.chrI.2.sam") chrI = Chromosome.from_fasta(self.dir + "/../data/test_genomic/chrI.fa") genome = Genome(chromosomes=[chrI]) pairs = self.pairs_class() regions = genome.get_regions('HindIII') pairs.load(reads1, reads2, regions) reads1.close() reads2.close() genome.close() regions.close() x, i, o, b = pairs.get_ligation_structure_biases( sampling=200, skip_self_ligations=False) assert len(x) == len(i) == len(o) == len(b) == 3 assert x.tolist() == [494, 4487, 19399] assert i.tolist() == [ 2.616915422885572, 0.8059701492537313, 0.6417910447761194 ] assert o.tolist() == [ 0.2537313432835821, 0.24378109452736318, 0.46766169154228854 ] assert b.tolist() == [778, 412, 424] pairs.close() def test_re_dist(self): read1 = FragmentRead(GenomicRegion(chromosome='chr1', start=1, end=1000), position=200, strand=-1) assert read1.re_distance() == 199 read2 = FragmentRead(GenomicRegion(chromosome='chr1', start=1, end=1000), position=990, strand=-1) assert read2.re_distance() == 10 def test_iterate_exclude_filters(self): mask = self.pairs.add_mask_description( 'inwards', 'Mask read pairs that inward facing and closer than 100bp') in_filter = InwardPairsFilter(minimum_distance=100, mask=mask) self.pairs.filter(in_filter) mask = self.pairs.add_mask_description( 'outwards', 'Mask read pairs that outward facing and closer than 100bp') out_filter = OutwardPairsFilter(minimum_distance=100, mask=mask) self.pairs.filter(out_filter) mask = self.pairs.add_mask_description( 're-dist', 'Mask read pairs where one half maps more than 100bp away from both RE sites' ) re_filter = ReDistanceFilter(maximum_distance=300, mask=mask) self.pairs.filter(re_filter) assert len(self.pairs) == 0 assert len( list( self.pairs.pairs( excluded_filters=['inwards', 'outwards', 're-dist' ]))) == 44 assert len( list(self.pairs.pairs( excluded_filters=['inwards', 'outwards']))) == 13 assert len( list(self.pairs.pairs( excluded_filters=['inwards', 're-dist']))) == 28 assert len( list(self.pairs.pairs( excluded_filters=['outwards', 're-dist']))) == 18 assert len(list(self.pairs.pairs(excluded_filters=['inwards']))) == 11 assert len(list(self.pairs.pairs(excluded_filters=['outwards']))) == 2 assert len(list(self.pairs.pairs(excluded_filters=['re-dist']))) == 2 assert len( list(self.pairs.pairs( excluded_filters=[in_filter, re_filter]))) == 28 assert len(list( self.pairs.pairs(excluded_filters=[in_filter, mask]))) == 28 assert len(list( self.pairs.pairs(excluded_filters=[in_filter, 3]))) == 28 @pytest.mark.parametrize("gz", [False, True]) @pytest.mark.parametrize("include_filtered", [False, True]) def test_export_homer(self, gz, include_filtered, tmpdir_factory): fn = str( tmpdir_factory.mktemp("output").join(".tsv.gz" if gz else ".tsv")) mask = self.pairs.add_mask_description( 'self_ligated', 'Mask read pairs that represent self-ligated fragments') self_ligation_filter = SelfLigationFilter(mask=mask) self.pairs.filter(self_ligation_filter) self.pairs.to_homer(fn, include_filtered=include_filtered) if gz: import gzip, sys read_handle = gzip.open( fn, mode="rt" if sys.version_info.major == 3 else "r") else: read_handle = open(fn, mode="r") with read_handle: lines = read_handle.readlines() assert len(lines) == 44 if include_filtered else 7