def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) bm = BlockMatrix.from_numpy(nd, block_size=block_size) bm.export_rectangles(rect_uri, rects) self._assert_rectangles_eq(nd, rect_path, rects) rect_path_bytes = new_local_temp_dir() rect_uri_bytes = local_path_uri(rect_path_bytes) bm.export_rectangles(rect_uri_bytes, rects, binary=True) self._assert_rectangles_eq(nd, rect_path_bytes, rects, binary=True)
def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: bm_uri = new_temp_file() rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) (BlockMatrix.from_numpy( nd, block_size=block_size).sparsify_rectangles(rects).write( bm_uri, force_row_major=True)) BlockMatrix.export_rectangles(bm_uri, rect_uri, rects) for (i, r) in enumerate(rects): file = rect_path + '/rect-' + str(i) + '_' + '-'.join( map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.loadtxt(file, ndmin=2) self._assert_eq(expected, actual) rect_path_bytes = new_local_temp_dir() rect_uri_bytes = local_path_uri(rect_path_bytes) BlockMatrix.export_rectangles(bm_uri, rect_uri_bytes, rects, binary=True) for (i, r) in enumerate(rects): file = rect_path_bytes + '/rect-' + str( i) + '_' + '-'.join(map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.reshape(np.fromfile(file), (r[1] - r[0], r[3] - r[2])) self._assert_eq(expected, actual) bm_uri = new_temp_file() rect_uri = new_temp_file() (BlockMatrix.from_numpy(nd, block_size=5).sparsify_rectangles( [[0, 1, 0, 1]]).write(bm_uri, force_row_major=True)) with self.assertRaises(FatalError) as e: BlockMatrix.export_rectangles(bm_uri, rect_uri, [[5, 6, 5, 6]]) self.assertEquals( e.msg, 'block (1, 1) missing for rectangle 0 with bounds [5, 6, 5, 6]' )
def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: bm_uri = new_temp_file() rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) (BlockMatrix.from_numpy(nd, block_size=block_size) .sparsify_rectangles(rects) .write(bm_uri, force_row_major=True)) BlockMatrix.export_rectangles(bm_uri, rect_uri, rects) for (i, r) in enumerate(rects): file = rect_path + '/rect-' + str(i) + '_' + '-'.join(map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.loadtxt(file, ndmin = 2) self._assert_eq(expected, actual) rect_path_bytes = new_local_temp_dir() rect_uri_bytes = local_path_uri(rect_path_bytes) BlockMatrix.export_rectangles(bm_uri, rect_uri_bytes, rects, binary=True) for (i, r) in enumerate(rects): file = rect_path_bytes + '/rect-' + str(i) + '_' + '-'.join(map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.reshape(np.fromfile(file), (r[1] - r[0], r[3] - r[2])) self._assert_eq(expected, actual) bm_uri = new_temp_file() rect_uri = new_temp_file() (BlockMatrix.from_numpy(nd, block_size=5) .sparsify_rectangles([[0, 1, 0, 1]]) .write(bm_uri, force_row_major=True)) with self.assertRaises(FatalError) as e: BlockMatrix.export_rectangles(bm_uri, rect_uri, [[5, 6, 5, 6]]) self.assertEquals(e.msg, 'block (1, 1) missing for rectangle 0 with bounds [5, 6, 5, 6]')
def to_numpy(self): """Collects the block matrix into a `NumPy ndarray <https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html>`__. Examples -------- >>> from hail.linalg import BlockMatrix >>> bm = BlockMatrix.random(10, 20) >>> a = bm.to_numpy() Notes ----- The number of entries must be less than :math:`2^{31}`. The resulting ndarray will have the same shape as the block matrix. Returns ------- :class:`numpy.ndarray` """ path = new_local_temp_file() uri = local_path_uri(path) self.tofile(uri) return np.fromfile(path).reshape((self.n_rows, self.n_cols))
def _jarray_from_ndarray(nd): if nd.size >= (1 << 31): raise ValueError(f'size of ndarray must be less than 2^31, found {nd.size}') nd = _ndarray_as_float64(nd) path = new_local_temp_file() uri = local_path_uri(path) nd.tofile(path) return Env.hail().utils.richUtils.RichArray.importFromDoubles(Env.hc()._jhc, uri, nd.size)
def test_export_blocks(self): nd = np.ones(shape=(8, 10)) bm = BlockMatrix.from_numpy(nd, block_size=20) bm_path = new_local_temp_dir() bm_uri = local_path_uri(bm_path) bm.export_blocks(bm_uri, binary=True) actual = BlockMatrix.rectangles_to_numpy(bm_path, binary=True) self._assert_eq(nd, actual)
def copy_log(self, path: str) -> None: log = Env.hc()._log try: if self.is_dir(path): _, tail = os.path.split(log) path = os.path.join(path, tail) info(f"copying log to {repr(path)}...") self.copy(local_path_uri(Env.hc()._log), path) except Exception as e: sys.stderr.write(f'Could not copy log: encountered error:\n {e}')
def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) rects = [[0, 3, 0, 1], [1, 2, 0, 2]] rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects) rect_bytes_path = new_local_temp_dir() rect_bytes_uri = local_path_uri(rect_bytes_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True) expected = np.array([[1.0, 0.0], [4.0, 5.0], [7.0, 0.0]]) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_path)) self._assert_eq( expected, BlockMatrix.rectangles_to_numpy(rect_bytes_path, binary=True))
def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) rects = [[0, 3, 0, 1], [1, 2, 0, 2]] rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects) rect_bytes_path = new_local_temp_dir() rect_bytes_uri = local_path_uri(rect_bytes_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True) expected = np.array([[1.0, 0.0], [4.0, 5.0], [7.0, 0.0]]) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_path)) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_bytes_path, binary=True))
def test_export_rectangles_filtered(self): rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd) bm = bm[1:3, 1:3] export_rects = [[0, 1, 0, 2], [1, 2, 0, 2]] bm.export_rectangles(rect_uri, export_rects) expected = np.array([[6.0, 7.0], [10.0, 11.0]]) self._assert_rectangles_eq(expected, rect_path, export_rects)
def from_numpy(cls, ndarray, block_size=None): """Distributes a `NumPy ndarray <https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html>`__ as a block matrix. Examples -------- >>> import numpy as np >>> a = np.random.rand(10, 20) >>> bm = BlockMatrix.from_numpy(a) Notes ----- The ndarray must have two dimensions, each of non-zero size. The number of entries must be less than :math:`2^{31}`. Parameters ---------- ndarray: :class:`numpy.ndarray` ndarray with two dimensions, each of non-zero size. block_size: :obj:`int`, optional Block size. Default given by :meth:`default_block_size`. Returns ------- :class:`.BlockMatrix` """ if not block_size: block_size = BlockMatrix.default_block_size() if ndarray.ndim != 2: raise FatalError( "from_numpy: ndarray must have two axes, found shape {}". format(ndarray.shape)) n_rows, n_cols = ndarray.shape if n_rows == 0 or n_cols == 0: raise FatalError( "from_numpy: ndarray dimensions must be non-zero, found shape {}" .format(ndarray.shape)) if ndarray.dtype != np.float64: ndarray = ndarray.astype(np.float64) local_temp_dir = new_local_temp_dir() path = local_temp_dir + '/binary' uri = local_path_uri(path) ndarray.tofile(path) return cls.fromfile(uri, n_rows, n_cols, block_size)
def test_export_rectangles_sparse(self): rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) sparsify_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4]] export_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4], [2, 4, 2, 4]] bm.sparsify_rectangles(sparsify_rects).export_rectangles( rect_uri, export_rects) expected = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 0.0, 0.0], [13.0, 14.0, 0.0, 0.0]]) self._assert_rectangles_eq(expected, rect_path, export_rects)
def from_numpy(cls, ndarray, block_size=None): """Distributes a `NumPy ndarray <https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html>`__ as a block matrix. Examples -------- >>> import numpy as np >>> a = np.random.rand(10, 20) >>> bm = BlockMatrix.from_numpy(a) Notes ----- The ndarray must have two dimensions, each of non-zero size. The number of entries must be less than :math:`2^{31}`. Parameters ---------- ndarray: :class:`numpy.ndarray` ndarray with two dimensions, each of non-zero size. block_size: :obj:`int`, optional Block size. Default given by :meth:`default_block_size`. Returns ------- :class:`.BlockMatrix` """ if not block_size: block_size = BlockMatrix.default_block_size() if any(i == 0 for i in ndarray.shape): raise ValueError( f'from_numpy: ndarray dimensions must be non-zero, found shape {ndarray.shape}' ) nd = _ndarray_as_2d(ndarray) nd = _ndarray_as_float64(nd) n_rows, n_cols = nd.shape path = new_local_temp_file() uri = local_path_uri(path) nd.tofile(path) return cls.fromfile(uri, n_rows, n_cols, block_size)
def copy_log(path: str) -> None: """Attempt to copy the session log to a hadoop-API-compatible location. Examples -------- Specify a manual path: >>> hl.copy_log('gs://my-bucket/analysis-10-jan19.log') # DOCTEST: +SKIP INFO: copying log to 'gs://my-bucket/analysis-10-jan19.log'... Copy to a directory: >>> hl.copy_log('gs://my-bucket/') # DOCTEST: +SKIP INFO: copying log to 'gs://my-bucket/hail-20180924-2018-devel-46e5fad57524.log'... Notes ----- Since Hail cannot currently log directly to distributed file systems, this function is provided as a utility for offloading logs from ephemeral nodes. If `path` is a directory, then the log file will be copied using its base name to the directory (e.g. ``/home/hail.log`` would be copied as ``gs://my-bucket/hail.log`` if `path` is ``gs://my-bucket``. Parameters ---------- path: :obj:`str` """ log = Env.hc()._log try: if hadoop_is_dir(path): _, tail = os.path.split(log) path = os.path.join(path, tail) info(f"copying log to {repr(path)}...") hadoop_copy(local_path_uri(Env.hc()._log), path) except Exception as e: sys.stderr.write(f'Could not copy log: encountered error:\n {e}')
def _ndarray_from_jarray(ja): path = new_local_temp_file() uri = local_path_uri(path) Env.hail().utils.richUtils.RichArray.exportToDoubles(Env.hc()._jhc, uri, ja) return np.fromfile(path)