예제 #1
0
class TestHic(RegionMatrixContainerTestFactory):
    def setup_method(self, method):
        hic_file = os.path.join(test_dir, 'test_matrix', 'test_fanc.hic')
        self.matrix = Hic(hic_file, mode='r')

    def teardown_method(self, method):
        self.matrix.close()
예제 #2
0
    def test_masked_matrix(self):
        hic = Hic()

        m = np.zeros((12, 12))
        row_regions = []
        col_regions = []
        # add some nodes (120 to be exact)
        nodes = []
        for i in range(1, 5000, 1000):
            node = GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1)
            nodes.append(node)
            row_regions.append(node)
            col_regions.append(node)
        for i in range(1, 3000, 1000):
            node = GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1)
            nodes.append(node)
            row_regions.append(node)
            col_regions.append(node)
        for i in range(1, 2000, 500):
            node = GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1)
            nodes.append(node)
            row_regions.append(node)
            col_regions.append(node)
        hic.add_regions(nodes)

        # add some edges with increasing weight for testing
        edges = []
        weight = 1
        for i in range(0, len(nodes)):
            for j in range(i, len(nodes)):
                if i != 1 and j != 1 and i != 5 and j != 5:
                    edges.append(Edge(source=i, sink=j, weight=weight))
                    m[i, j] = weight
                    m[j, i] = weight
                weight += 1

        hic.add_edges(edges)

        m = hic.matrix()
        hic.close()

        # check masking
        for i in range(m.shape[0]):
            assert np.ma.is_masked(m[1, i])
            assert np.ma.is_masked(m[i, 1])
            assert np.ma.is_masked(m[5, i])
            assert np.ma.is_masked(m[i, 5])

        # check not masked
        not_masked = {0, 2, 3, 4, 6, 7, 8, 9, 10, 11}
        masked = {1, 5}

        for j in not_masked:
            for i in range(m.shape[0]):
                if i not in masked:
                    assert not np.ma.is_masked(m[i, j])
                    assert not np.ma.is_masked(m[j, i])
                else:
                    assert np.ma.is_masked(m[i, j])
                    assert np.ma.is_masked(m[j, i])
예제 #3
0
    def test_merge(self):
        hic = self.hic_class()

        # add some nodes (120 to be exact)
        nodes = []
        for i in range(1, 5000, 1000):
            nodes.append(
                GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1))
        for i in range(1, 3000, 1000):
            nodes.append(
                GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1))
        for i in range(1, 2000, 500):
            nodes.append(
                GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1))
        hic.add_regions(nodes, preserve_attributes=False)

        # add some edges with increasing weight for testing
        edges = []
        weight = 1
        for i in range(0, len(nodes)):
            for j in range(i, len(nodes)):
                edges.append(Edge(source=i, sink=j, weight=weight))
                weight += 1

        hic.add_edges(edges)

        # check length
        merged_hic_2x = Hic.merge([self.hic, hic])
        merged_hic_3x = Hic.merge([self.hic, hic, hic])
        hic.close()

        m = self.hic[:, :]
        m_merged_2x = merged_hic_2x[:, :]
        m_merged_3x = merged_hic_3x[:, :]

        for i in range(m.shape[0]):
            for j in range(m.shape[1]):
                assert m[i, j] == 0 or m[i, j] == m_merged_2x[i, j] / 2
                assert m[i, j] == 0 or m[i, j] == m_merged_3x[i, j] / 3
        merged_hic_2x.close()
        merged_hic_3x.close()
예제 #4
0
    def setup_method(self, method):
        self.dir = os.path.dirname(os.path.realpath(__file__))

        hic = Hic()

        # add some nodes (120 to be exact)
        nodes = []
        for i in range(1, 5000, 1000):
            nodes.append(
                GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1))
        for i in range(1, 3000, 1000):
            nodes.append(
                GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1))
        for i in range(1, 2000, 500):
            nodes.append(
                GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1))
        hic.add_regions(nodes)

        # add some edges with increasing weight for testing
        edges = []
        weight = 1
        for i in range(0, len(nodes)):
            for j in range(i, len(nodes)):
                edges.append(Edge(source=i, sink=j, weight=weight))
                weight += 1

        hic.add_edges(edges)

        self.hic = hic
        self.hic_cerevisiae = load(
            self.dir + "/test_matrix/cerevisiae.chrI.HindIII_upgrade.hic",
            mode='r')
        self.hic_class = Hic
예제 #5
0
def sample_fa_hic(file_name=None, zero_indices=set(), tmpdir=None):
    hic = Hic(file_name=file_name, tmpdir=tmpdir, mode='w')

    # add some nodes (120 to be exact)
    nodes = []
    for i in range(1, 50000, 1000):
        nodes.append(GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1))
    for i in range(1, 30000, 1000):
        nodes.append(GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1))
    for i in range(1, 20000, 500):
        nodes.append(GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1))
    hic.add_regions(nodes)

    # add some edges with increasing weight for testing
    edges = []
    weight = 1
    for i in range(0, len(nodes)):
        for j in range(i, len(nodes)):
            if i not in zero_indices and j not in zero_indices:
                edges.append(Edge(source=i, sink=j, weight=weight))
            weight += 1

    hic.add_edges(edges)

    return hic
예제 #6
0
def sample_hic_matrix1(file_name=None, tmpdir=None):
    #     0 1 2 3 4 5 6 7 8 9
    #   #####################
    # 0 # 0 1 0 2 0 3 0 4 0 5
    # 1 #   6 0 7 0 8 0 9 0 1
    # 2 #     2 3 4 0 5 0 0 6
    # 3 #       7 0 8 9 0 1 0
    # 4 #         0 2 3 0 0 4
    # 5 #           5 6 7 8 9
    # 6 #             1 0 0 0
    # 7 #               2 3 0
    # 8 #                 0 4
    # 9 #                   5
    nodes = [
        GenomicRegion('chr1', 1, 1000),
        GenomicRegion('chr1', 1001, 2000),
        GenomicRegion('chr1', 2001, 3000),
        GenomicRegion('chr1', 3001, 4000),
        GenomicRegion('chr1', 4001, 5000),
        GenomicRegion('chr1', 5001, 6000),
        GenomicRegion('chr1', 6001, 7000),
        GenomicRegion('chr1', 7001, 8000),
        GenomicRegion('chr1', 8001, 9000),
        GenomicRegion('chr1', 9001, 10000)
    ]

    edges = [
        Edge(source=0, sink=1, weight=1), Edge(source=3, sink=5, weight=8),
        Edge(source=0, sink=3, weight=2), Edge(source=3, sink=6, weight=9),
        Edge(source=0, sink=5, weight=3), Edge(source=3, sink=8, weight=1),
        Edge(source=0, sink=7, weight=4), Edge(source=4, sink=5, weight=2),
        Edge(source=0, sink=9, weight=5), Edge(source=4, sink=6, weight=3),
        Edge(source=1, sink=1, weight=6), Edge(source=4, sink=9, weight=4),
        Edge(source=1, sink=3, weight=7), Edge(source=5, sink=5, weight=5),
        Edge(source=1, sink=5, weight=8), Edge(source=5, sink=6, weight=6),
        Edge(source=1, sink=7, weight=9), Edge(source=5, sink=7, weight=7),
        Edge(source=1, sink=9, weight=1), Edge(source=5, sink=8, weight=8),
        Edge(source=2, sink=2, weight=2), Edge(source=5, sink=9, weight=9),
        Edge(source=2, sink=3, weight=3), Edge(source=6, sink=6, weight=1),
        Edge(source=2, sink=4, weight=4), Edge(source=7, sink=7, weight=2),
        Edge(source=2, sink=6, weight=5), Edge(source=7, sink=8, weight=3),
        Edge(source=2, sink=9, weight=6), Edge(source=8, sink=9, weight=4),
        Edge(source=3, sink=3, weight=7), Edge(source=9, sink=9, weight=5)
    ]

    hic = Hic(file_name=file_name, tmpdir=tmpdir)
    hic.add_regions(nodes)
    hic.add_edges(edges)

    return hic
예제 #7
0
def sample_hic_matrix2(file_name=None, tmpdir=None):
    #     0 1 2 3 4 5 6 7 8 9
    #   #####################
    # 0 # 0 1 0 2 - 3 0 - 0 5
    # 1 #   6 0 7 - 8 0 - 0 1
    # 2 #     2 3 - 0 5 - 0 6
    # 3 #       7 - 8 9 - 1 0
    # 4 #         - - - - - -
    # 5 #           5 6 - 8 9
    # 6 #             1 - 0 0
    # 7 #               - - -
    # 8 #                 0 4
    # 9 #                   5
    nodes = [
        GenomicRegion('chr1', 1, 1000),
        GenomicRegion('chr1', 1001, 2000),
        GenomicRegion('chr1', 2001, 3000),
        GenomicRegion('chr1', 3001, 4000),
        GenomicRegion('chr1', 4001, 5000),
        GenomicRegion('chr1', 5001, 6000),
        GenomicRegion('chr1', 6001, 7000),
        GenomicRegion('chr1', 7001, 8000),
        GenomicRegion('chr1', 8001, 9000),
        GenomicRegion('chr1', 9001, 10000)
    ]

    edges = [
        Edge(source=0, sink=1, weight=1), Edge(source=3, sink=5, weight=8),
        Edge(source=0, sink=3, weight=2), Edge(source=3, sink=6, weight=9),
        Edge(source=0, sink=5, weight=3), Edge(source=3, sink=8, weight=1),
        Edge(source=0, sink=9, weight=5),
        Edge(source=1, sink=1, weight=6),
        Edge(source=1, sink=3, weight=7), Edge(source=5, sink=5, weight=5),
        Edge(source=1, sink=5, weight=8), Edge(source=5, sink=6, weight=6),
        Edge(source=1, sink=9, weight=1), Edge(source=5, sink=8, weight=8),
        Edge(source=2, sink=2, weight=2), Edge(source=5, sink=9, weight=9),
        Edge(source=2, sink=3, weight=3), Edge(source=6, sink=6, weight=1),
        Edge(source=2, sink=6, weight=5),
        Edge(source=2, sink=9, weight=6), Edge(source=8, sink=9, weight=4),
        Edge(source=3, sink=3, weight=7), Edge(source=9, sink=9, weight=5)
    ]

    hic = Hic(file_name=file_name, tmpdir=tmpdir)
    hic.add_regions(nodes)
    hic.add_edges(edges)

    return hic
예제 #8
0
    def setup_method(self, method):
        hic = Hic()

        m = np.zeros((12, 12))
        row_regions = []
        col_regions = []
        # add some nodes (120 to be exact)
        nodes = []
        for i in range(1, 5000, 1000):
            node = GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1)
            nodes.append(node)
            row_regions.append(node)
            col_regions.append(node)
        for i in range(1, 3000, 1000):
            node = GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1)
            nodes.append(node)
            row_regions.append(node)
            col_regions.append(node)
        for i in range(1, 2000, 500):
            node = GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1)
            nodes.append(node)
            row_regions.append(node)
            col_regions.append(node)
        hic.add_regions(nodes)

        # add some edges with increasing weight for testing
        edges = []
        weight = 1
        for i in range(0, len(nodes)):
            for j in range(i, len(nodes)):
                edges.append(Edge(source=i, sink=j, weight=weight))
                m[i, j] = weight
                m[j, i] = weight
                weight += 1

        hic.add_edges(edges)

        self.hic = hic
        self.m = RegionMatrix(m,
                              row_regions=row_regions,
                              col_regions=col_regions)
예제 #9
0
def sample_hic_big(file_name=None, tmpdir=None):
    hic = Hic(file_name=file_name, tmpdir=tmpdir, mode='w')

    # add some nodes (120 to be exact)
    nodes = []
    for i in range(1, 50000, 1000):
        nodes.append(GenomicRegion(chromosome="chr1", start=i, end=i + 1000 - 1))
    for i in range(1, 30000, 1000):
        nodes.append(GenomicRegion(chromosome="chr2", start=i, end=i + 1000 - 1))
    for i in range(1, 20000, 500):
        nodes.append(GenomicRegion(chromosome="chr3", start=i, end=i + 1000 - 1))
    hic.add_regions(nodes)

    # add some edges with increasing weight for testing
    edges = []
    for i in range(0, len(nodes)):
        for j in range(i, len(nodes)):
            weight = i + len(nodes) - (j - i)
            edges.append(Edge(source=i, sink=j, weight=weight))

    hic.add_edges(edges)

    return hic
예제 #10
0
 def setup_method(self, method):
     hic_file = os.path.join(test_dir, 'test_matrix', 'test_fanc.hic')
     self.matrix = Hic(hic_file, mode='r')