예제 #1
0
    def test_to_table_maximum_cache_memory_in_bytes_limits(self):
        bm = BlockMatrix._create(5, 2, [float(i) for i in range(10)], 2)
        try:
            bm.to_table_row_major(2, maximum_cache_memory_in_bytes=15)._force_count()
        except Exception as exc:
            assert 'BlockMatrixCachedPartFile must be able to hold at least one row of every block in memory' in exc.args[0]
        else:
            assert False

        bm = BlockMatrix._create(5, 2, [float(i) for i in range(10)], 2)
        bm.to_table_row_major(2, maximum_cache_memory_in_bytes=16)._force_count()
예제 #2
0
    def test_to_table(self):
        schema = hl.tstruct(row_idx=hl.tint64, entries=hl.tarray(hl.tfloat64))
        rows = [{
            'row_idx': 0,
            'entries': [0.0, 1.0]
        }, {
            'row_idx': 1,
            'entries': [2.0, 3.0]
        }, {
            'row_idx': 2,
            'entries': [4.0, 5.0]
        }, {
            'row_idx': 3,
            'entries': [6.0, 7.0]
        }, {
            'row_idx': 4,
            'entries': [8.0, 9.0]
        }]

        for n_partitions in [1, 2, 3]:
            for block_size in [1, 2, 5]:
                expected = hl.Table.parallelize(rows, schema, 'row_idx',
                                                n_partitions)
                bm = BlockMatrix._create(5, 2, [float(i) for i in range(10)],
                                         block_size)
                actual = bm.to_table_row_major(n_partitions)
                self.assertTrue(expected._same(actual))
예제 #3
0
    def test_to_from_numpy(self):
        n_rows = 10
        n_cols = 11
        data = np.random.rand(n_rows * n_cols)

        bm = BlockMatrix._create(n_rows,
                                 n_cols,
                                 data.tolist(),
                                 row_major=True,
                                 block_size=4)
        a = data.reshape((n_rows, n_cols))

        with tempfile.NamedTemporaryFile() as bm_f:
            with tempfile.NamedTemporaryFile() as a_f:
                bm.tofile(bm_f.name)
                a.tofile(a_f.name)

                a1 = bm.to_numpy()
                a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy()
                a3 = np.fromfile(bm_f.name).reshape((n_rows, n_cols))
                a4 = BlockMatrix.fromfile(a_f.name,
                                          n_rows,
                                          n_cols,
                                          block_size=3).to_numpy()
                a5 = BlockMatrix.fromfile(bm_f.name, n_rows, n_cols).to_numpy()

                self._assert_eq(a1, a)
                self._assert_eq(a2, a)
                self._assert_eq(a3, a)
                self._assert_eq(a4, a)
                self._assert_eq(a5, a)

        bmt = bm.T
        at = a.T

        with tempfile.NamedTemporaryFile() as bmt_f:
            with tempfile.NamedTemporaryFile() as at_f:
                bmt.tofile(bmt_f.name)
                at.tofile(at_f.name)

                at1 = bmt.to_numpy()
                at2 = BlockMatrix.from_numpy(at).to_numpy()
                at3 = np.fromfile(bmt_f.name).reshape((n_cols, n_rows))
                at4 = BlockMatrix.fromfile(at_f.name, n_cols,
                                           n_rows).to_numpy()
                at5 = BlockMatrix.fromfile(bmt_f.name, n_cols,
                                           n_rows).to_numpy()

                self._assert_eq(at1, at)
                self._assert_eq(at2, at)
                self._assert_eq(at3, at)
                self._assert_eq(at4, at)
                self._assert_eq(at5, at)
예제 #4
0
    def test_to_table(self):
        schema = hl.tstruct(row_idx=hl.tint64, entries=hl.tarray(hl.tfloat64))
        rows = [{'row_idx': 0, 'entries': [0.0, 1.0]},
                {'row_idx': 1, 'entries': [2.0, 3.0]},
                {'row_idx': 2, 'entries': [4.0, 5.0]},
                {'row_idx': 3, 'entries': [6.0, 7.0]},
                {'row_idx': 4, 'entries': [8.0, 9.0]}]

        for n_partitions in [1, 2, 3]:
            for block_size in [1, 2, 5]:
                expected = hl.Table.parallelize(rows, schema, 'row_idx', n_partitions)
                bm = BlockMatrix._create(5, 2, [float(i) for i in range(10)], block_size)
                actual = bm.to_table_row_major(n_partitions)
                self.assertTrue(expected._same(actual))
예제 #5
0
    def test_to_from_numpy(self):
        n_rows = 10
        n_cols = 11
        data = np.random.rand(n_rows * n_cols)

        bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), block_size=4)
        a = data.reshape((n_rows, n_cols))

        with hl.TemporaryFilename() as bm_f, hl.TemporaryFilename() as a_f:
            bm.tofile(bm_f)
            a.tofile(a_f)

            a1 = bm.to_numpy()
            a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy()
            a3 = np.frombuffer(
                hl.current_backend().fs.open(bm_f, mode='rb').read()
            ).reshape((n_rows, n_cols))
            a4 = BlockMatrix.fromfile(a_f, n_rows, n_cols, block_size=3).to_numpy()
            a5 = BlockMatrix.fromfile(bm_f, n_rows, n_cols).to_numpy()

            self._assert_eq(a1, a)
            self._assert_eq(a2, a)
            self._assert_eq(a3, a)
            self._assert_eq(a4, a)
            self._assert_eq(a5, a)

        bmt = bm.T
        at = a.T

        with hl.TemporaryFilename() as bmt_f, hl.TemporaryFilename() as at_f:
            bmt.tofile(bmt_f)
            at.tofile(at_f)

            at1 = bmt.to_numpy()
            at2 = BlockMatrix.from_numpy(at).to_numpy()
            at3 = np.frombuffer(
                hl.current_backend().fs.open(bmt_f, mode='rb').read()
            ).reshape((n_cols, n_rows))
            at4 = BlockMatrix.fromfile(at_f, n_cols, n_rows).to_numpy()
            at5 = BlockMatrix.fromfile(bmt_f, n_cols, n_rows).to_numpy()

            self._assert_eq(at1, at)
            self._assert_eq(at2, at)
            self._assert_eq(at3, at)
            self._assert_eq(at4, at)
            self._assert_eq(at5, at)

        self._assert_eq(bm.to_numpy(_force_blocking=True), a)
예제 #6
0
    def test_to_matrix_table(self):
        n_partitions = 2
        rows, cols = 2, 5
        bm = BlockMatrix._create(rows, cols, [float(i) for i in range(10)])
        actual = bm.to_matrix_table_row_major(n_partitions)

        expected = hl.utils.range_matrix_table(rows, cols)
        expected = expected.annotate_entries(element=hl.float64(expected.row_idx * cols + expected.col_idx))
        expected = expected.key_cols_by(col_idx=hl.int64(expected.col_idx))
        expected = expected.key_rows_by(row_idx=hl.int64(expected.row_idx))
        assert expected._same(actual)

        bm = BlockMatrix.random(50, 100, block_size=25, seed=0)
        mt = bm.to_matrix_table_row_major(n_partitions)
        mt_round_trip = BlockMatrix.from_entry_expr(mt.element).to_matrix_table_row_major()
        assert mt._same(mt_round_trip)
예제 #7
0
    def test_to_matrix_table(self):
        n_partitions = 2
        rows, cols = 2, 5
        bm = BlockMatrix._create(rows, cols, [float(i) for i in range(10)])
        actual = bm.to_matrix_table_row_major(n_partitions)

        expected = hl.utils.range_matrix_table(rows, cols)
        expected = expected.annotate_entries(element=hl.float64(expected.row_idx * cols + expected.col_idx))
        expected = expected.key_cols_by(col_idx=hl.int64(expected.col_idx))
        expected = expected.key_rows_by(row_idx=hl.int64(expected.row_idx))
        assert expected._same(actual)

        bm = BlockMatrix.random(50, 100, block_size=25, seed=0)
        mt = bm.to_matrix_table_row_major(n_partitions)
        mt_round_trip = BlockMatrix.from_entry_expr(mt.element).to_matrix_table_row_major()
        assert mt._same(mt_round_trip)
예제 #8
0
    def test_to_from_numpy(self):
        n_rows = 10
        n_cols = 11
        data = np.random.rand(n_rows * n_cols)

        bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), block_size=4)
        a = data.reshape((n_rows, n_cols))

        with tempfile.NamedTemporaryFile() as bm_f:
            with tempfile.NamedTemporaryFile() as a_f:
                bm.tofile(bm_f.name)
                a.tofile(a_f.name)

                a1 = bm.to_numpy()
                a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy()
                a3 = np.fromfile(bm_f.name).reshape((n_rows, n_cols))
                a4 = BlockMatrix.fromfile(a_f.name, n_rows, n_cols, block_size=3).to_numpy()
                a5 = BlockMatrix.fromfile(bm_f.name, n_rows, n_cols).to_numpy()

                self._assert_eq(a1, a)
                self._assert_eq(a2, a)
                self._assert_eq(a3, a)
                self._assert_eq(a4, a)
                self._assert_eq(a5, a)

        bmt = bm.T
        at = a.T

        with tempfile.NamedTemporaryFile() as bmt_f:
            with tempfile.NamedTemporaryFile() as at_f:
                bmt.tofile(bmt_f.name)
                at.tofile(at_f.name)

                at1 = bmt.to_numpy()
                at2 = BlockMatrix.from_numpy(at).to_numpy()
                at3 = np.fromfile(bmt_f.name).reshape((n_cols, n_rows))
                at4 = BlockMatrix.fromfile(at_f.name, n_cols, n_rows).to_numpy()
                at5 = BlockMatrix.fromfile(bmt_f.name, n_cols, n_rows).to_numpy()

                self._assert_eq(at1, at)
                self._assert_eq(at2, at)
                self._assert_eq(at3, at)
                self._assert_eq(at4, at)
                self._assert_eq(at5, at)

        self._assert_eq(bm.to_numpy(_force_blocking=True), a)