def test_get_ligation_structure_biases(self): sam_file1 = os.path.join(self.dir, "test_matrix", "yeast.sample.chrI.1_sorted.sam") sam_file2 = os.path.join(self.dir, "test_matrix", "yeast.sample.chrI.2_sorted.sam") chrI = Chromosome.from_fasta( os.path.join(self.dir, "test_matrix", "chrI.fa")) genome = Genome(chromosomes=[chrI]) pairs = self.pairs_class() regions = genome.get_regions('HindIII') pairs.add_regions(regions.regions) pair_generator = SamBamReadPairGenerator(sam_file1, sam_file2) pairs.add_read_pairs(pair_generator) genome.close() regions.close() x, i, o, b = pairs.get_ligation_structure_biases( sampling=200, skip_self_ligations=False) assert len(x) == len(i) == len(o) == len(b) == 3 assert x.tolist() == [470, 4489, 19259] assert i.tolist() == [ 2.8756218905472637, 0.8059701492537313, 0.6368159203980099 ] assert o.tolist() == [ 0.2537313432835821, 0.24875621890547264, 0.46766169154228854 ] assert b.tolist() == [830, 413, 423] pairs.close()
def test_from_pairs(self): sam_file1 = os.path.join(self.dir, "test_matrix", "yeast.sample.chrI.1_sorted.sam") sam_file2 = os.path.join(self.dir, "test_matrix", "yeast.sample.chrI.2_sorted.sam") chrI = Chromosome.from_fasta( os.path.join(self.dir, "test_matrix", "chrI.fa")) genome = Genome(chromosomes=[chrI]) pairs = ReadPairs() regions = genome.get_regions('HindIII') pairs.add_regions(regions) g = SamBamReadPairGenerator(sam_file1, sam_file2) pairs.add_read_pairs(g) genome.close() regions.close() pl = len(pairs) hic = pairs.to_hic(_hic_class=self.hic_class) assert len(hic.regions) == len(pairs.regions) pairs.close() reads = 0 edge_set = set() for edge in hic.edges(): key = (edge.source, edge.sink) assert key not in edge_set edge_set.add(key) reads += edge.weight assert reads == pl hic.close()
def setup_method(self, method): chr1 = Chromosome(name='chr1', length=10000, sequence='agcgctgctgaagcttcgatcgtaagcttc') chr2 = Chromosome(name='chr2', length=5000, sequence='gcgctgctgaagcttcgatcgtaagcttc') self.genome = Genome(chromosomes=[chr1, chr2])
def test_from_string(self): dir = os.path.dirname(os.path.realpath(__file__)) genome = Genome.from_string(dir + '/test_regions/chromosomes.fa') chr1 = genome[0] assert len(chr1) == 5 chr2 = genome[1] assert len(chr2) == 3 chr3 = genome[2] assert len(chr3) == 4 chr4 = genome[3] assert len(chr4) == 2 genome.close()
def test_knight_matrix_balancing_per_chromosome(self): chrI = Chromosome.from_fasta(self.dir + "/test_matrix/chrI.fa") genome = Genome(chromosomes=[chrI]) hic = self.hic_class() regions = genome.get_regions(10000) genome.close() hic.add_regions(regions) regions.close() hic.load_from_hic(self.hic_cerevisiae) m = hic[:, :] assert is_symmetric(m) kr_balancing(hic, whole_matrix=False) m_corr = hic[:, :] assert is_symmetric(m_corr) for n in sum(m_corr): if np.ma.is_masked(n): continue assert abs(1.0 - n) < 1e-5 or n == 0 hic.close()
def setup_method(self, method): self.dir = os.path.dirname(os.path.realpath(__file__)) sam1_file = os.path.join(self.dir, "test_pairs", "lambda_reads1_sort.sam") sam2_file = os.path.join(self.dir, "test_pairs", "lambda_reads2_sort.sam") pair_generator = SamBamReadPairGenerator(sam1_file, sam2_file) self.pairs = ReadPairs() self.genome = Genome.from_folder( os.path.join(self.dir, "test_pairs", "lambda_genome")) regions = self.genome.get_regions(1000) self.pairs.add_regions(regions.regions) regions.close() self.pairs.add_read_pairs(pair_generator) self.pairs_class = ReadPairs
def test_ice_matrix_balancing(self): chrI = Chromosome.from_fasta(self.dir + "/test_matrix/chrI.fa") genome = Genome(chromosomes=[chrI]) hic = self.hic_class() regions = genome.get_regions(10000) genome.close() hic.add_regions(regions) regions.close() hic.load_from_hic(self.hic_cerevisiae) m = hic[:, :] assert is_symmetric(m) ice_balancing(hic) m_corr = hic[:, :] assert is_symmetric(m_corr) sum_m_corr = sum(m_corr) for n in sum_m_corr: if np.ma.is_masked(n): continue assert (sum_m_corr[0] - 5 < n < sum_m_corr[0] + 5) or n == 0 hic.close()
class TestGenome: def setup_method(self, method): chr1 = Chromosome(name='chr1', length=10000, sequence='agcgctgctgaagcttcgatcgtaagcttc') chr2 = Chromosome(name='chr2', length=5000, sequence='gcgctgctgaagcttcgatcgtaagcttc') self.genome = Genome(chromosomes=[chr1, chr2]) def teardown_method(self, method): self.genome.close() def test_iter(self): i = 0 for chromosome in self.genome: if i == 0: assert chromosome.name == 'chr1' assert chromosome.length == 10000 assert chromosome.sequence == 'agcgctgctgaagcttcgatcgtaagcttc' if i == 1: assert chromosome.name == 'chr2' assert chromosome.length == 5000 assert chromosome.sequence == 'gcgctgctgaagcttcgatcgtaagcttc' i += 1 def test_node_list(self): regions = self.genome.get_regions('HindIII') assert len(regions) == 6 for i in range(0, len(regions)): region = regions[i] if i == 0: assert region.chromosome == 'chr1' assert region.start == 1 assert region.end == 12 if i == 5: assert region.chromosome == 'chr2' assert region.start == 25 assert region.end == 5000 i += 1 regions.close() nl = self.genome.get_regions(4000) assert len(nl) == 5 for i in range(0, len(nl)): node = nl[i] if i == 0: assert node.chromosome == 'chr1' assert node.start == 1 assert node.end == 4000 if i == 5: assert node.chromosome == 'chr2' assert node.start == 4001 assert node.end == 5000 i += 1 nl.close() def test_from_string(self): dir = os.path.dirname(os.path.realpath(__file__)) genome = Genome.from_string(dir + '/test_regions/chromosomes.fa') chr1 = genome[0] assert len(chr1) == 5 chr2 = genome[1] assert len(chr2) == 3 chr3 = genome[2] assert len(chr3) == 4 chr4 = genome[3] assert len(chr4) == 2 genome.close()