class TestHic(RegionMatrixContainerTestFactory): def setup_method(self, method): hic_file = os.path.join(test_dir, 'test_matrix', 'test_fanc.hic') self.matrix = Hic(hic_file, mode='r') def teardown_method(self, method): self.matrix.close()
def test_masked_matrix(self): hic = Hic() m = np.zeros((12, 12)) row_regions = [] col_regions = [] # add some nodes (120 to be exact) nodes = [] for i in range(1, 5000, 1000): node = GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1) nodes.append(node) row_regions.append(node) col_regions.append(node) for i in range(1, 3000, 1000): node = GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1) nodes.append(node) row_regions.append(node) col_regions.append(node) for i in range(1, 2000, 500): node = GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1) nodes.append(node) row_regions.append(node) col_regions.append(node) hic.add_regions(nodes) # add some edges with increasing weight for testing edges = [] weight = 1 for i in range(0, len(nodes)): for j in range(i, len(nodes)): if i != 1 and j != 1 and i != 5 and j != 5: edges.append(Edge(source=i, sink=j, weight=weight)) m[i, j] = weight m[j, i] = weight weight += 1 hic.add_edges(edges) m = hic.matrix() hic.close() # check masking for i in range(m.shape[0]): assert np.ma.is_masked(m[1, i]) assert np.ma.is_masked(m[i, 1]) assert np.ma.is_masked(m[5, i]) assert np.ma.is_masked(m[i, 5]) # check not masked not_masked = {0, 2, 3, 4, 6, 7, 8, 9, 10, 11} masked = {1, 5} for j in not_masked: for i in range(m.shape[0]): if i not in masked: assert not np.ma.is_masked(m[i, j]) assert not np.ma.is_masked(m[j, i]) else: assert np.ma.is_masked(m[i, j]) assert np.ma.is_masked(m[j, i])
def test_merge(self): hic = self.hic_class() # add some nodes (120 to be exact) nodes = [] for i in range(1, 5000, 1000): nodes.append( GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1)) for i in range(1, 3000, 1000): nodes.append( GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1)) for i in range(1, 2000, 500): nodes.append( GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1)) hic.add_regions(nodes, preserve_attributes=False) # add some edges with increasing weight for testing edges = [] weight = 1 for i in range(0, len(nodes)): for j in range(i, len(nodes)): edges.append(Edge(source=i, sink=j, weight=weight)) weight += 1 hic.add_edges(edges) # check length merged_hic_2x = Hic.merge([self.hic, hic]) merged_hic_3x = Hic.merge([self.hic, hic, hic]) hic.close() m = self.hic[:, :] m_merged_2x = merged_hic_2x[:, :] m_merged_3x = merged_hic_3x[:, :] for i in range(m.shape[0]): for j in range(m.shape[1]): assert m[i, j] == 0 or m[i, j] == m_merged_2x[i, j] / 2 assert m[i, j] == 0 or m[i, j] == m_merged_3x[i, j] / 3 merged_hic_2x.close() merged_hic_3x.close()
def setup_method(self, method): self.dir = os.path.dirname(os.path.realpath(__file__)) hic = Hic() # add some nodes (120 to be exact) nodes = [] for i in range(1, 5000, 1000): nodes.append( GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1)) for i in range(1, 3000, 1000): nodes.append( GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1)) for i in range(1, 2000, 500): nodes.append( GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1)) hic.add_regions(nodes) # add some edges with increasing weight for testing edges = [] weight = 1 for i in range(0, len(nodes)): for j in range(i, len(nodes)): edges.append(Edge(source=i, sink=j, weight=weight)) weight += 1 hic.add_edges(edges) self.hic = hic self.hic_cerevisiae = load( self.dir + "/test_matrix/cerevisiae.chrI.HindIII_upgrade.hic", mode='r') self.hic_class = Hic
def sample_fa_hic(file_name=None, zero_indices=set(), tmpdir=None): hic = Hic(file_name=file_name, tmpdir=tmpdir, mode='w') # add some nodes (120 to be exact) nodes = [] for i in range(1, 50000, 1000): nodes.append(GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1)) for i in range(1, 30000, 1000): nodes.append(GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1)) for i in range(1, 20000, 500): nodes.append(GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1)) hic.add_regions(nodes) # add some edges with increasing weight for testing edges = [] weight = 1 for i in range(0, len(nodes)): for j in range(i, len(nodes)): if i not in zero_indices and j not in zero_indices: edges.append(Edge(source=i, sink=j, weight=weight)) weight += 1 hic.add_edges(edges) return hic
def sample_hic_matrix1(file_name=None, tmpdir=None): # 0 1 2 3 4 5 6 7 8 9 # ##################### # 0 # 0 1 0 2 0 3 0 4 0 5 # 1 # 6 0 7 0 8 0 9 0 1 # 2 # 2 3 4 0 5 0 0 6 # 3 # 7 0 8 9 0 1 0 # 4 # 0 2 3 0 0 4 # 5 # 5 6 7 8 9 # 6 # 1 0 0 0 # 7 # 2 3 0 # 8 # 0 4 # 9 # 5 nodes = [ GenomicRegion('chr1', 1, 1000), GenomicRegion('chr1', 1001, 2000), GenomicRegion('chr1', 2001, 3000), GenomicRegion('chr1', 3001, 4000), GenomicRegion('chr1', 4001, 5000), GenomicRegion('chr1', 5001, 6000), GenomicRegion('chr1', 6001, 7000), GenomicRegion('chr1', 7001, 8000), GenomicRegion('chr1', 8001, 9000), GenomicRegion('chr1', 9001, 10000) ] edges = [ Edge(source=0, sink=1, weight=1), Edge(source=3, sink=5, weight=8), Edge(source=0, sink=3, weight=2), Edge(source=3, sink=6, weight=9), Edge(source=0, sink=5, weight=3), Edge(source=3, sink=8, weight=1), Edge(source=0, sink=7, weight=4), Edge(source=4, sink=5, weight=2), Edge(source=0, sink=9, weight=5), Edge(source=4, sink=6, weight=3), Edge(source=1, sink=1, weight=6), Edge(source=4, sink=9, weight=4), Edge(source=1, sink=3, weight=7), Edge(source=5, sink=5, weight=5), Edge(source=1, sink=5, weight=8), Edge(source=5, sink=6, weight=6), Edge(source=1, sink=7, weight=9), Edge(source=5, sink=7, weight=7), Edge(source=1, sink=9, weight=1), Edge(source=5, sink=8, weight=8), Edge(source=2, sink=2, weight=2), Edge(source=5, sink=9, weight=9), Edge(source=2, sink=3, weight=3), Edge(source=6, sink=6, weight=1), Edge(source=2, sink=4, weight=4), Edge(source=7, sink=7, weight=2), Edge(source=2, sink=6, weight=5), Edge(source=7, sink=8, weight=3), Edge(source=2, sink=9, weight=6), Edge(source=8, sink=9, weight=4), Edge(source=3, sink=3, weight=7), Edge(source=9, sink=9, weight=5) ] hic = Hic(file_name=file_name, tmpdir=tmpdir) hic.add_regions(nodes) hic.add_edges(edges) return hic
def sample_hic_matrix2(file_name=None, tmpdir=None): # 0 1 2 3 4 5 6 7 8 9 # ##################### # 0 # 0 1 0 2 - 3 0 - 0 5 # 1 # 6 0 7 - 8 0 - 0 1 # 2 # 2 3 - 0 5 - 0 6 # 3 # 7 - 8 9 - 1 0 # 4 # - - - - - - # 5 # 5 6 - 8 9 # 6 # 1 - 0 0 # 7 # - - - # 8 # 0 4 # 9 # 5 nodes = [ GenomicRegion('chr1', 1, 1000), GenomicRegion('chr1', 1001, 2000), GenomicRegion('chr1', 2001, 3000), GenomicRegion('chr1', 3001, 4000), GenomicRegion('chr1', 4001, 5000), GenomicRegion('chr1', 5001, 6000), GenomicRegion('chr1', 6001, 7000), GenomicRegion('chr1', 7001, 8000), GenomicRegion('chr1', 8001, 9000), GenomicRegion('chr1', 9001, 10000) ] edges = [ Edge(source=0, sink=1, weight=1), Edge(source=3, sink=5, weight=8), Edge(source=0, sink=3, weight=2), Edge(source=3, sink=6, weight=9), Edge(source=0, sink=5, weight=3), Edge(source=3, sink=8, weight=1), Edge(source=0, sink=9, weight=5), Edge(source=1, sink=1, weight=6), Edge(source=1, sink=3, weight=7), Edge(source=5, sink=5, weight=5), Edge(source=1, sink=5, weight=8), Edge(source=5, sink=6, weight=6), Edge(source=1, sink=9, weight=1), Edge(source=5, sink=8, weight=8), Edge(source=2, sink=2, weight=2), Edge(source=5, sink=9, weight=9), Edge(source=2, sink=3, weight=3), Edge(source=6, sink=6, weight=1), Edge(source=2, sink=6, weight=5), Edge(source=2, sink=9, weight=6), Edge(source=8, sink=9, weight=4), Edge(source=3, sink=3, weight=7), Edge(source=9, sink=9, weight=5) ] hic = Hic(file_name=file_name, tmpdir=tmpdir) hic.add_regions(nodes) hic.add_edges(edges) return hic
def setup_method(self, method): hic = Hic() m = np.zeros((12, 12)) row_regions = [] col_regions = [] # add some nodes (120 to be exact) nodes = [] for i in range(1, 5000, 1000): node = GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1) nodes.append(node) row_regions.append(node) col_regions.append(node) for i in range(1, 3000, 1000): node = GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1) nodes.append(node) row_regions.append(node) col_regions.append(node) for i in range(1, 2000, 500): node = GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1) nodes.append(node) row_regions.append(node) col_regions.append(node) hic.add_regions(nodes) # add some edges with increasing weight for testing edges = [] weight = 1 for i in range(0, len(nodes)): for j in range(i, len(nodes)): edges.append(Edge(source=i, sink=j, weight=weight)) m[i, j] = weight m[j, i] = weight weight += 1 hic.add_edges(edges) self.hic = hic self.m = RegionMatrix(m, row_regions=row_regions, col_regions=col_regions)
def sample_hic_big(file_name=None, tmpdir=None): hic = Hic(file_name=file_name, tmpdir=tmpdir, mode='w') # add some nodes (120 to be exact) nodes = [] for i in range(1, 50000, 1000): nodes.append(GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1)) for i in range(1, 30000, 1000): nodes.append(GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1)) for i in range(1, 20000, 500): nodes.append(GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1)) hic.add_regions(nodes) # add some edges with increasing weight for testing edges = [] for i in range(0, len(nodes)): for j in range(i, len(nodes)): weight = i + len(nodes) - (j - i) edges.append(Edge(source=i, sink=j, weight=weight)) hic.add_edges(edges) return hic
def setup_method(self, method): hic_file = os.path.join(test_dir, 'test_matrix', 'test_fanc.hic') self.matrix = Hic(hic_file, mode='r')