def test_to_table_maximum_cache_memory_in_bytes_limits(self): bm = BlockMatrix._create(5, 2, [float(i) for i in range(10)], 2) try: bm.to_table_row_major(2, maximum_cache_memory_in_bytes=15)._force_count() except Exception as exc: assert 'BlockMatrixCachedPartFile must be able to hold at least one row of every block in memory' in exc.args[0] else: assert False bm = BlockMatrix._create(5, 2, [float(i) for i in range(10)], 2) bm.to_table_row_major(2, maximum_cache_memory_in_bytes=16)._force_count()
def test_to_table(self): schema = hl.tstruct(row_idx=hl.tint64, entries=hl.tarray(hl.tfloat64)) rows = [{ 'row_idx': 0, 'entries': [0.0, 1.0] }, { 'row_idx': 1, 'entries': [2.0, 3.0] }, { 'row_idx': 2, 'entries': [4.0, 5.0] }, { 'row_idx': 3, 'entries': [6.0, 7.0] }, { 'row_idx': 4, 'entries': [8.0, 9.0] }] for n_partitions in [1, 2, 3]: for block_size in [1, 2, 5]: expected = hl.Table.parallelize(rows, schema, 'row_idx', n_partitions) bm = BlockMatrix._create(5, 2, [float(i) for i in range(10)], block_size) actual = bm.to_table_row_major(n_partitions) self.assertTrue(expected._same(actual))
def test_to_from_numpy(self): n_rows = 10 n_cols = 11 data = np.random.rand(n_rows * n_cols) bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), row_major=True, block_size=4) a = data.reshape((n_rows, n_cols)) with tempfile.NamedTemporaryFile() as bm_f: with tempfile.NamedTemporaryFile() as a_f: bm.tofile(bm_f.name) a.tofile(a_f.name) a1 = bm.to_numpy() a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() a3 = np.fromfile(bm_f.name).reshape((n_rows, n_cols)) a4 = BlockMatrix.fromfile(a_f.name, n_rows, n_cols, block_size=3).to_numpy() a5 = BlockMatrix.fromfile(bm_f.name, n_rows, n_cols).to_numpy() self._assert_eq(a1, a) self._assert_eq(a2, a) self._assert_eq(a3, a) self._assert_eq(a4, a) self._assert_eq(a5, a) bmt = bm.T at = a.T with tempfile.NamedTemporaryFile() as bmt_f: with tempfile.NamedTemporaryFile() as at_f: bmt.tofile(bmt_f.name) at.tofile(at_f.name) at1 = bmt.to_numpy() at2 = BlockMatrix.from_numpy(at).to_numpy() at3 = np.fromfile(bmt_f.name).reshape((n_cols, n_rows)) at4 = BlockMatrix.fromfile(at_f.name, n_cols, n_rows).to_numpy() at5 = BlockMatrix.fromfile(bmt_f.name, n_cols, n_rows).to_numpy() self._assert_eq(at1, at) self._assert_eq(at2, at) self._assert_eq(at3, at) self._assert_eq(at4, at) self._assert_eq(at5, at)
def test_to_table(self): schema = hl.tstruct(row_idx=hl.tint64, entries=hl.tarray(hl.tfloat64)) rows = [{'row_idx': 0, 'entries': [0.0, 1.0]}, {'row_idx': 1, 'entries': [2.0, 3.0]}, {'row_idx': 2, 'entries': [4.0, 5.0]}, {'row_idx': 3, 'entries': [6.0, 7.0]}, {'row_idx': 4, 'entries': [8.0, 9.0]}] for n_partitions in [1, 2, 3]: for block_size in [1, 2, 5]: expected = hl.Table.parallelize(rows, schema, 'row_idx', n_partitions) bm = BlockMatrix._create(5, 2, [float(i) for i in range(10)], block_size) actual = bm.to_table_row_major(n_partitions) self.assertTrue(expected._same(actual))
def test_to_from_numpy(self): n_rows = 10 n_cols = 11 data = np.random.rand(n_rows * n_cols) bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), block_size=4) a = data.reshape((n_rows, n_cols)) with hl.TemporaryFilename() as bm_f, hl.TemporaryFilename() as a_f: bm.tofile(bm_f) a.tofile(a_f) a1 = bm.to_numpy() a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() a3 = np.frombuffer( hl.current_backend().fs.open(bm_f, mode='rb').read() ).reshape((n_rows, n_cols)) a4 = BlockMatrix.fromfile(a_f, n_rows, n_cols, block_size=3).to_numpy() a5 = BlockMatrix.fromfile(bm_f, n_rows, n_cols).to_numpy() self._assert_eq(a1, a) self._assert_eq(a2, a) self._assert_eq(a3, a) self._assert_eq(a4, a) self._assert_eq(a5, a) bmt = bm.T at = a.T with hl.TemporaryFilename() as bmt_f, hl.TemporaryFilename() as at_f: bmt.tofile(bmt_f) at.tofile(at_f) at1 = bmt.to_numpy() at2 = BlockMatrix.from_numpy(at).to_numpy() at3 = np.frombuffer( hl.current_backend().fs.open(bmt_f, mode='rb').read() ).reshape((n_cols, n_rows)) at4 = BlockMatrix.fromfile(at_f, n_cols, n_rows).to_numpy() at5 = BlockMatrix.fromfile(bmt_f, n_cols, n_rows).to_numpy() self._assert_eq(at1, at) self._assert_eq(at2, at) self._assert_eq(at3, at) self._assert_eq(at4, at) self._assert_eq(at5, at) self._assert_eq(bm.to_numpy(_force_blocking=True), a)
def test_to_matrix_table(self): n_partitions = 2 rows, cols = 2, 5 bm = BlockMatrix._create(rows, cols, [float(i) for i in range(10)]) actual = bm.to_matrix_table_row_major(n_partitions) expected = hl.utils.range_matrix_table(rows, cols) expected = expected.annotate_entries(element=hl.float64(expected.row_idx * cols + expected.col_idx)) expected = expected.key_cols_by(col_idx=hl.int64(expected.col_idx)) expected = expected.key_rows_by(row_idx=hl.int64(expected.row_idx)) assert expected._same(actual) bm = BlockMatrix.random(50, 100, block_size=25, seed=0) mt = bm.to_matrix_table_row_major(n_partitions) mt_round_trip = BlockMatrix.from_entry_expr(mt.element).to_matrix_table_row_major() assert mt._same(mt_round_trip)
def test_to_matrix_table(self): n_partitions = 2 rows, cols = 2, 5 bm = BlockMatrix._create(rows, cols, [float(i) for i in range(10)]) actual = bm.to_matrix_table_row_major(n_partitions) expected = hl.utils.range_matrix_table(rows, cols) expected = expected.annotate_entries(element=hl.float64(expected.row_idx * cols + expected.col_idx)) expected = expected.key_cols_by(col_idx=hl.int64(expected.col_idx)) expected = expected.key_rows_by(row_idx=hl.int64(expected.row_idx)) assert expected._same(actual) bm = BlockMatrix.random(50, 100, block_size=25, seed=0) mt = bm.to_matrix_table_row_major(n_partitions) mt_round_trip = BlockMatrix.from_entry_expr(mt.element).to_matrix_table_row_major() assert mt._same(mt_round_trip)
def test_to_from_numpy(self): n_rows = 10 n_cols = 11 data = np.random.rand(n_rows * n_cols) bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), block_size=4) a = data.reshape((n_rows, n_cols)) with tempfile.NamedTemporaryFile() as bm_f: with tempfile.NamedTemporaryFile() as a_f: bm.tofile(bm_f.name) a.tofile(a_f.name) a1 = bm.to_numpy() a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() a3 = np.fromfile(bm_f.name).reshape((n_rows, n_cols)) a4 = BlockMatrix.fromfile(a_f.name, n_rows, n_cols, block_size=3).to_numpy() a5 = BlockMatrix.fromfile(bm_f.name, n_rows, n_cols).to_numpy() self._assert_eq(a1, a) self._assert_eq(a2, a) self._assert_eq(a3, a) self._assert_eq(a4, a) self._assert_eq(a5, a) bmt = bm.T at = a.T with tempfile.NamedTemporaryFile() as bmt_f: with tempfile.NamedTemporaryFile() as at_f: bmt.tofile(bmt_f.name) at.tofile(at_f.name) at1 = bmt.to_numpy() at2 = BlockMatrix.from_numpy(at).to_numpy() at3 = np.fromfile(bmt_f.name).reshape((n_cols, n_rows)) at4 = BlockMatrix.fromfile(at_f.name, n_cols, n_rows).to_numpy() at5 = BlockMatrix.fromfile(bmt_f.name, n_cols, n_rows).to_numpy() self._assert_eq(at1, at) self._assert_eq(at2, at) self._assert_eq(at3, at) self._assert_eq(at4, at) self._assert_eq(at5, at) self._assert_eq(bm.to_numpy(_force_blocking=True), a)