def _read_elemental_dense(self, distribution='MC_MR'): comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() constructor = elemental_dense.get_constructor(distribution) # only the root process touches the filesystem if rank == 0: f = h5py.File(self.fpath, 'r') dataset_obj = f[self.dataset] shape = dataset_obj.shape if rank == 0 else None shape = comm.bcast(shape, root=0) height = shape[0] width = shape[1] num_entries = height * width # max memory capacity per process assumed/hardcoded to 10 blocks # XXX should this number be passed as a parameter? max_blocks_per_process = 10 max_block_entries = int((1.0 * num_entries) / (max_blocks_per_process * size)) # XXX We could set up a different block generating scheme, e.g. more # square-ish blocks block_height = int(numpy.sqrt(max_block_entries)) while max_block_entries % block_height != 0: block_height = block_height + 1 block_width = max_block_entries / block_height num_height_blocks = int(numpy.ceil(height / (1.0 * block_height))) num_width_blocks = int(numpy.ceil(width / (1.0 * block_width))) num_blocks = num_height_blocks * num_width_blocks A = constructor(height, width) for block in range(num_blocks): # the global coordinates of the block corners i_start = (block / num_width_blocks) * block_height j_start = (block % num_width_blocks) * block_width i_end = min(height, i_start + block_height) j_end = min(width, j_start + block_width) # the block size local_height = i_end - i_start local_width = j_end - j_start # [CIRC, CIRC] matrix is populated by the reader process (i.e. the root)... A_block = elem.DistMatrix_d_CIRC_CIRC(local_height, local_width) if rank == 0: A_block.Matrix[:] = dataset_obj[i_start:i_end, j_start:j_end] # ... then a view into the full matrix A is constructed... A_block_view = constructor() elem.View(A_block_view, A, i_start, j_start, local_height, local_width) # ... and finally this view is updated by redistribution of the [CIRC, CIRC] block elem.Copy(A_block, A_block_view) if rank == 0: f.close() return A
def _write_elemental_dense(self, A): comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() # XXX currently gathers at root height, width = A.Height, A.Width A_CIRC_CIRC = elem.DistMatrix_d_CIRC_CIRC(height, width) elem.Copy(A, A_CIRC_CIRC) if rank == 0: A_numpy_dense = A_CIRC_CIRC.Matrix[:] self._write_numpy_dense(A_numpy_dense)
# If missing features, then augment the data if X.shape[1] < model.RFTs[0].getindim(): fulldim = model.RFTs[0].getindim() n = X.shape[0] partialdim = X.shape[1] X = numpy.concatenate((X, numpy.zeros((n, fulldim - partialdim))), axis=1) shape_X = X.shape if rank == 0 else None shape_X = comm.bcast(shape_X, root=0) if rank == 0: print "Distributing the matrix..." # Get X, Y in VC,* distributed matrix, i.e. row distributed X_cc = elem.DistMatrix_d_CIRC_CIRC(shape_X[0], shape_X[1]) Y_cc = elem.DistMatrix_d_CIRC_CIRC(shape_X[0], 1) if rank == 0: X_cc.Matrix[:] = X data[1].resize((shape_X[0], 1)) np.copyto(Y_cc.Matrix, data[1]) X = elem.DistMatrix_d_VC_STAR() elem.Copy(X_cc, X) Y = elem.DistMatrix_d_VC_STAR() elem.Copy(Y_cc, Y) # predict with the model predictions, labels = model.predict(X.Matrix) # need distributed accuracy computation
comm = MPI.COMM_WORLD # Create a HDF5 file and encapsulate this and its metadata in store filename = 'mydataset.hdf5' store = skylark.io.hdf5(filename, dataset='MyDataset') # Create a 5 x 10 dat matrix and populate its store if comm.Get_rank() == 0: m = 8 n = 10 matrix = numpy.array(range(1, 81)).reshape(m, n) store.write(matrix) # Let all processes wait till the file is created. comm.barrier() # All processes read into the file A = store.read('elemental-dense', distribution='VC_STAR') # Check the Frobenius norm of the difference of generated/written and read-back matrices # Gather at root A_CIRC_CIRC = elem.DistMatrix_d_CIRC_CIRC() elem.Copy(A, A_CIRC_CIRC) # Compute the norm at root and output if comm.rank == 0: diff_fro_norm = numpy.linalg.norm(A_CIRC_CIRC.Matrix[:] - matrix, ord='fro') print '||generated_matrix - read_matrix||_F = %f' % diff_fro_norm