def test_mask(self): msk_arr = np.random.choice([0, 1], size=(1, 128)) msk_arr = comm.bcast(msk_arr, root=0) if not mpirank: dat_arr = np.random.rand(2, 128) else: dat_arr = np.random.rand(1, 128) cov_arr = np.random.rand( mpi_arrange(128)[1] - mpi_arrange(128)[0], 128) # mask by methods dat_msk = mask_obs(dat_arr, msk_arr) cov_msk = mask_cov(cov_arr, msk_arr) # mask manually test_dat = dat_arr * msk_arr test_dat = test_dat[test_dat != 0] dat_msk = dat_msk[dat_msk != 0] self.assertListEqual(list(test_dat), list(dat_msk)) # cov_mat = np.vstack(comm.allgather(cov_arr)) cov_mat = cov_mat * msk_arr cov_mat = np.transpose(cov_mat) cov_mat = cov_mat * msk_arr cov_mat = np.transpose(cov_mat) cov_mat = cov_mat[cov_mat != 0] test_cov = np.vstack(comm.allgather(cov_msk)) test_cov = test_cov[test_cov != 0] self.assertListEqual(list(test_cov), list(test_cov))
def test_slogdet_odd(self): cols = 32 rows = mpi_arrange(cols)[1] - mpi_arrange(cols)[0] arr = np.random.rand(rows, cols) sign, logdet = mpi_slogdet(arr) full_arr = np.vstack(comm.allgather(arr)) test_sign, test_logdet = np.linalg.slogdet(full_arr) self.assertEqual(sign, test_sign) self.assertAlmostEqual(logdet, test_logdet)
def test_lu_solve_odd(self): cols = 32 rows = mpi_arrange(cols)[1] - mpi_arrange(cols)[0] arr = np.random.rand(rows, cols) full_arr = np.vstack(comm.allgather(arr)) brr = np.random.rand(1, cols) comm.Bcast(brr, root=0) xrr = mpi_lu_solve(arr, brr) test_xrr = (np.linalg.solve(full_arr, brr.T)).T for i in range(xrr.shape[1]): self.assertAlmostEqual(xrr[0, i], test_xrr[0, i])
def mpi_slogdet_timing(data_size): local_row_size = mpi_arrange(data_size)[1] - mpi_arrange(data_size)[0] random_data = np.random.rand(local_row_size, data_size) tmr = Timer() tmr.tick('mpi_slogdet') sign, logdet = mpi_slogdet(random_data) tmr.tock('mpi_slogdet') if not mpirank: print('@ tools_profiles::mpi_slogdet_timing with ' + str(mpisize) + ' nodes') print('global matrix size (' + str(data_size) + ',' + str(data_size) + ')') print('elapse time ' + str(tmr.record['mpi_slogdet']) + '\n')
def oas_estimator_timing(data_size): local_row_size = mpi_arrange(data_size)[1] - mpi_arrange(data_size)[0] random_data = np.random.rand(local_row_size, data_size) tmr = Timer() tmr.tick('oas_estimator') mean, local_cov = oas_mcov(random_data) tmr.tock('oas_estimator') if not mpirank: print('@ tools_profiles::oas_estimator_timing with ' + str(mpisize) + ' nodes') print('global matrix size (' + str(data_size) + ',' + str(data_size) + ')') print('elapse time ' + str(tmr.record['oas_estimator']) + '\n')
def mpi_trans_timing(ensemble_size, data_size): local_ensemble_size = mpi_arrange(ensemble_size)[1] - mpi_arrange( ensemble_size)[0] random_data = np.random.rand(local_ensemble_size, data_size) tmr = Timer() tmr.tick('mpi_trans') transed_data = mpi_trans(random_data) tmr.tock('mpi_trans') if not mpirank: print('@ tools_profiles::mpi_trans_timing with ' + str(mpisize) + ' nodes') print('global matrix size (' + str(ensemble_size) + ',' + str(data_size) + ')') print('elapse time ' + str(tmr.record['mpi_trans']) + '\n')
def read_dist(self, file, key): """ Reads from a HDF5 file and returns a distributed data-set. Note that the binary file data should contain enough rows to be distributed on the available computing nodes, otherwise the mpi_arrange function will raise an error Parameters ---------- data : numpy.ndarray distributed data file : str filename key : str in form 'group name/dataset name' Returns ------- distributed numpy.ndarray the output must be in either at least (1,n), or (m,n) shape on each node """ log.debug('@ io_handler::read_dist') assert isinstance(file, str) assert isinstance(key, str) # combine wk_path with filename self.file_path = os.path.join(self._wk_dir, file) # write permission, create if not exist with h5py.File(self._file_path, mode='r') as fh: global_shape = fh[key].shape offset_begin, offset_end = mpi_arrange(global_shape[0]) data = fh[key][offset_begin:offset_end,:] comm.Barrier() return data
def test_trans(self): if not mpirank: arr = np.random.rand(2, 128) else: arr = np.random.rand(1, 128) test_arr = mpi_trans(arr) full_arr = np.transpose(np.vstack(comm.allgather(arr))) local_begin, local_end = mpi_arrange(full_arr.shape[0]) part_arr = full_arr[local_begin:local_end] for i in range(part_arr.shape[0]): self.assertListEqual(list(part_arr[i]), list(test_arr[i]))
def test_mpi_local(self): if not mpirank: arr_a = np.random.rand(32, 128) else: arr_a = None test_a = mpi_local(arr_a) arr_a = comm.bcast(arr_a, root=0) local_a_begin, local_a_end = mpi_arrange(arr_a.shape[0]) part_a = arr_a[local_a_begin:local_a_end, :] part_a = part_a.reshape(1, -1) test_a = test_a.reshape(1, -1) for i in range(len(part_a)): self.assertAlmostEqual(part_a[0][i], test_a[0][i])
def test_mult(self): if not mpirank: arr_a = np.random.rand(2, 128) else: arr_a = np.random.rand(1, 128) arr_b = mpi_trans(arr_a) test_c = mpi_mult(arr_a, arr_b) # make comparison full_a = np.vstack(comm.allgather(arr_a)) full_b = np.vstack(comm.allgather(arr_b)) full_c = np.dot(full_a, full_b) local_begin, local_end = mpi_arrange(full_c.shape[0]) part_c = (full_c[local_begin:local_end]).reshape(1, -1) test_c = test_c.reshape(1, -1) for i in range(len(part_c)): self.assertAlmostEqual(part_c[0][i], test_c[0][i])
def mask_cov(cov, mask): """ Applies mask to the observable covariance Parameters ---------- cov : distributed numpy.ndarray covariance matrix of observalbes in global shape (data size, data size) each node contains part of the global rows mask : numpy.ndarray copied mask map in shape (1, data size) Returns ------- numpy.ndarray Masked covariance matrix of shape (masked data size, masked data size) """ log.debug('@ masker::mask_cov') assert isinstance(cov, np.ndarray) assert isinstance(mask, np.ndarray) assert (mask.shape[0] == 1) assert (cov.shape[1] == mask.shape[1]) new_cov = deepcopy(cov) raw_mask = (deepcopy(mask)).astype(np.bool) # masking cols col_idx = int(0) for ptr in raw_mask[0]: if not ptr: new_cov = np.delete(new_cov, col_idx, 1) else: col_idx += int(1) assert (new_cov.shape[1] == col_idx) # masking rows row_idx = int(0) row_min, row_max = mpi_arrange(raw_mask.shape[1]) for ptr in raw_mask[0, row_min:row_max]: if ptr == 0: new_cov = np.delete(new_cov, row_idx, 0) else: row_idx += int(1) return new_cov
def mask_cov(cov, mask): """ Applies mask to the observable covariance. Parameters ---------- cov : (distributed) numpy.ndarray Covariance matrix of observables in global shape (data size, data size) each node contains part of the global rows (if `imagine.rc['distributed_arrays']=True`). mask : numpy.ndarray Copied mask map in shape (1, data size). Returns ------- masked_cov : numpy.ndarray Masked covariance matrix of shape (masked data size, masked data size). """ log.debug('@ masker::mask_cov') assert (mask.shape[0] == 1) assert (cov.shape[1] == mask.shape[1]) # Creates a 1D boolean mask bool_mask_1D = mask[0].astype(bool) # Constructs a 2D boolean mask and replaces 1D mask bool_mask = np.outer(bool_mask_1D, bool_mask_1D) # If mpi distributed_arrays are being used, the shape of the mask # needs to be adjusted, as each node accesses only some rows row_min, row_max = mpi_arrange(bool_mask_1D.size) nrows, ncolumns = bool_mask_1D[row_min:row_max].sum(), bool_mask_1D.sum() bool_mask = bool_mask[row_min:row_max, :] # Applies the mask and reshapes masked_cov = cov[bool_mask].reshape((nrows, ncolumns)) return masked_cov