def distribute(mat): """Distributes the mat from root to individual nodes The data will be distributed along the first axis, as even as possible. You should make sure that the matrix is in C-contiguous format. """ # quick check if SIZE == 1: return mat if is_root(): shape = mat.shape[1:] segments = get_segments(mat.shape[0]) dtype = mat.dtype else: shape = None segments = None dtype = None shape = COMM.bcast(shape) dtype = COMM.bcast(dtype) segments = COMM.bcast(segments) if is_root(): if mat.flags['C_CONTIGUOUS'] != True: logging.warning('Warning: mat is not contiguous.') mat = np.ascontiguousarray(mat) for i in range(1, SIZE): safe_send_matrix(mat[segments[i]:segments[i + 1]], dest=i) data = mat[:segments[1]].copy() else: data = np.empty((segments[RANK + 1] - segments[RANK], ) + shape, dtype=dtype) safe_recv_matrix(data) return data
def distribute(mat): """Distributes the mat from root to individual nodes The data will be distributed along the first axis, as even as possible. You should make sure that the matrix is in C-contiguous format. """ # quick check if SIZE == 1: return mat if is_root(): shape = mat.shape[1:] segments = get_segments(mat.shape[0]) dtype = mat.dtype else: shape = None segments = None dtype = None shape = COMM.bcast(shape) dtype = COMM.bcast(dtype) segments = COMM.bcast(segments) if is_root(): if mat.flags['C_CONTIGUOUS'] != True: logging.warning('Warning: mat is not contiguous.') mat = np.ascontiguousarray(mat) for i in range(1,SIZE): safe_send_matrix(mat[segments[i]:segments[i+1]], dest=i) data = mat[:segments[1]].copy() else: data = np.empty((segments[RANK+1] - segments[RANK],) + shape, dtype = dtype) safe_recv_matrix(data) return data
def load_matrix_multi(filename, N = None): """Loads the matrix previously dumped by dump_matrix_multi. The MPI size might be different. The stored files are in the format filename-xxxxx-of-xxxxx, which we obtain using glob. Input: N: (optional) if given, specify the number of parts the matrix is separated too. Otherwise, the number is automatically inferred by listing all the files using regexp matching. """ files= glob.glob('%s-?????-of-?????.npy' % (filename)) N = len(files) logging.debug("Loading the matrix from %d parts" % N) # we will load the length of the data, and then try to distribute them # as even as possible. if RANK == 0: # the root will first taste each file sizes = np.array([np.load('%s-%05d-of-%05d.npy' % (filename, i, N), mmap_mode='r').shape[0] for i in range(N)]) temp = np.load('%s-%05d-of-%05d.npy' % (filename, 0, N), mmap_mode='r') shape = temp.shape[1:] dtype = temp.dtype else: sizes = None shape = None dtype = None barrier() sizes = COMM.bcast(sizes) shape = COMM.bcast(shape) dtype = COMM.bcast(dtype) total = sizes.sum() segments = get_segments(total) # now, each node opens the file that overlaps with its data, and reads # the contents. my_start = segments[RANK] my_end = segments[RANK+1] my_size = my_end - my_start mat = np.empty((my_size,) + shape, dtype = dtype) mat = np.empty((my_size,) + shape) f_start = 0 f_end = 0 for i, size in enumerate(sizes): f_end += size if f_start < my_end and f_end > my_start: file_mat = np.load('%s-%05d-of-%05d.npy' % (filename, i, N), mmap_mode='r') mat[max(f_start - my_start, 0):\ min(f_end - my_start, my_size)] = \ file_mat[max(my_start - f_start,0):\ min(my_end - f_start, size)] f_start += size return mat
def load_matrix_multi(filename, N=None): """Loads the matrix previously dumped by dump_matrix_multi. The MPI size might be different. The stored files are in the format filename-xxxxx-of-xxxxx, which we obtain using glob. Input: N: (optional) if given, specify the number of parts the matrix is separated too. Otherwise, the number is automatically inferred by listing all the files using regexp matching. """ files = glob.glob('%s-?????-of-?????.npy' % (filename)) N = len(files) logging.debug("Loading the matrix from %d parts" % N) # we will load the length of the data, and then try to distribute them # as even as possible. if RANK == 0: # the root will first taste each file sizes = np.array([ np.load('%s-%05d-of-%05d.npy' % (filename, i, N), mmap_mode='r').shape[0] for i in range(N) ]) temp = np.load('%s-%05d-of-%05d.npy' % (filename, 0, N), mmap_mode='r') shape = temp.shape[1:] dtype = temp.dtype else: sizes = None shape = None dtype = None barrier() sizes = COMM.bcast(sizes) shape = COMM.bcast(shape) dtype = COMM.bcast(dtype) total = sizes.sum() segments = get_segments(total) # now, each node opens the file that overlaps with its data, and reads # the contents. my_start = segments[RANK] my_end = segments[RANK + 1] my_size = my_end - my_start mat = np.empty((my_size, ) + shape, dtype=dtype) mat = np.empty((my_size, ) + shape) f_start = 0 f_end = 0 for i, size in enumerate(sizes): f_end += size if f_start < my_end and f_end > my_start: file_mat = np.load('%s-%05d-of-%05d.npy' % (filename, i, N), mmap_mode='r') mat[max(f_start - my_start, 0):\ min(f_end - my_start, my_size)] = \ file_mat[max(my_start - f_start,0):\ min(my_end - f_start, size)] f_start += size return mat
def distribute_list(source): """Distributes the list from root to individual nodes """ # quick check if SIZE == 1: return source if is_root(): length = len(source) if length == 0: logging.warning("Warning: List has length 0") else: length = 0 length = COMM.bcast(length) if length == 0: return [] segments = get_segments(length) if is_root(): for i in range(1, SIZE): send_list = source[segments[i]:segments[i + 1]] COMM.send(send_list, dest=i) data = source[:segments[1]] del source else: data = COMM.recv() return data
def distribute_list(source): """Distributes the list from root to individual nodes """ # quick check if SIZE == 1: return source if is_root(): length = len(source) if length == 0: logging.warning("Warning: List has length 0") else: length = 0 length = COMM.bcast(length) if length == 0: return [] segments = get_segments(length) if is_root(): for i in range(1,SIZE): send_list = source[segments[i]:segments[i+1]] COMM.send(send_list, dest=i) data = source[:segments[1]] del source else: data = COMM.recv() return data
def elect(): '''elect() randomly chooses a node from all the nodes as the president. Input: None Output: the rank of the president ''' president = COMM.bcast(np.random.randint(SIZE)) return president
def agree(decision): """agree() makes the decision consistent by propagating the decision of the root to everyone """ return COMM.bcast(decision)
def load_matrix_multi(filename, N=None, name=None): """Loads the matrix previously dumped by dump_matrix_multi. The MPI size might be different. The stored files are in the format filename-xxxxx-of-xxxxx, which we obtain using glob. Input: name: if the input is a hdf5 mat file, specify the name here. """ if type(filename) is str: # we use our default format files = glob.glob('%s-?????-of-?????.npy' % (filename)) files.sort() else: files = list(filename) N = len(files) logging.debug("Loading the matrix from %d parts" % N) # we will load the length of the data, and then try to distribute them # as even as possible. if RANK == 0: # the root will first taste each file if files[0][-3:] == 'npy': sizes = np.array( [np.load(f, mmap_mode='r').shape[0] for f in files]) temp = np.load(files[0], mmap_mode='r') shape = temp.shape[1:] dtype = temp.dtype elif files[0][-3:] == 'mat': sizes = [] for f in files: fid = h5py.File(f, 'r') sizes.append(fid[name].shape[0]) shape = fid[name].shape[1:] dtype = fid[name].dtype fid.close() sizes = np.array(sizes) else: sizes = None shape = None dtype = None barrier() sizes = COMM.bcast(sizes) shape = COMM.bcast(shape) dtype = COMM.bcast(dtype) total = sizes.sum() segments = get_segments(total) # now, each node opens the file that overlaps with its data, and reads # the contents. my_start = segments[RANK] my_end = segments[RANK + 1] my_size = my_end - my_start mat = np.empty((my_size, ) + shape, dtype=dtype) mat = np.empty((my_size, ) + shape) f_start = 0 f_end = 0 for i, size in enumerate(sizes): f_end += size if f_start < my_end and f_end > my_start: if files[i][-3:] == 'npy': file_mat = np.load(files[i], mmap_mode='r') mat[max(f_start - my_start, 0):\ min(f_end - my_start, my_size)] = \ file_mat[max(my_start - f_start,0):\ min(my_end - f_start, size)] elif files[i][-3:] == 'mat': fid = h5py.File(files[i], 'r') mat[max(f_start - my_start, 0):\ min(f_end - my_start, my_size)] = \ fid[name][max(my_start - f_start,0):\ min(my_end - f_start, size)] fid.close() f_start += size return mat
def load_matrix_multi(filename, N = None, name=None): """Loads the matrix previously dumped by dump_matrix_multi. The MPI size might be different. The stored files are in the format filename-xxxxx-of-xxxxx, which we obtain using glob. Input: name: if the input is a hdf5 mat file, specify the name here. """ if type(filename) is str: # we use our default format files = glob.glob('%s-?????-of-?????.npy' % (filename)) files.sort() else: files = list(filename) N = len(files) logging.debug("Loading the matrix from %d parts" % N) # we will load the length of the data, and then try to distribute them # as even as possible. if RANK == 0: # the root will first taste each file if files[0][-3:] == 'npy': sizes = np.array([np.load(f, mmap_mode='r').shape[0] for f in files]) temp = np.load(files[0], mmap_mode='r') shape = temp.shape[1:] dtype = temp.dtype elif files[0][-3:] == 'mat': sizes = [] for f in files: fid = h5py.File(f, 'r') sizes.append(fid[name].shape[0]) shape = fid[name].shape[1:] dtype = fid[name].dtype fid.close() sizes = np.array(sizes) else: sizes = None shape = None dtype = None barrier() sizes = COMM.bcast(sizes) shape = COMM.bcast(shape) dtype = COMM.bcast(dtype) total = sizes.sum() segments = get_segments(total) # now, each node opens the file that overlaps with its data, and reads # the contents. my_start = segments[RANK] my_end = segments[RANK+1] my_size = my_end - my_start mat = np.empty((my_size,) + shape, dtype = dtype) mat = np.empty((my_size,) + shape) f_start = 0 f_end = 0 for i, size in enumerate(sizes): f_end += size if f_start < my_end and f_end > my_start: if files[i][-3:] == 'npy': file_mat = np.load(files[i], mmap_mode='r') mat[max(f_start - my_start, 0):\ min(f_end - my_start, my_size)] = \ file_mat[max(my_start - f_start,0):\ min(my_end - f_start, size)] elif files[i][-3:] == 'mat': fid = h5py.File(files[i], 'r') mat[max(f_start - my_start, 0):\ min(f_end - my_start, my_size)] = \ fid[name][max(my_start - f_start,0):\ min(my_end - f_start, size)] fid.close() f_start += size return mat