def func(shape, broadcast, expected): assert_equal(strshape(shape, broadcast=broadcast), expected)
def write_fits(filename, data, header, extension, extname, comm): """ Collectively write local arrays into a single FITS file. Parameters ---------- filename : str The FITS file name. data : ndarray The array to be written. header : pyfits.Header The data FITS header. None can be set, in which case a minimal FITS header will be inferred from the data. extension : boolean If True, the data will be written as an extension to an already existing FITS file. extname : str The FITS extension name. Use None to write the primary HDU. comm : mpi4py.Comm The MPI communicator of the local arrays. Use MPI.COMM_SELF if the data are not meant to be combined into a global array. Make sure that the MPI processes are not executing this routine with the same file name. """ # check if the file name is the same for all MPI jobs files = comm.allgather(filename + str(extname)) all_equal = all(f == files[0] for f in files) if comm.size > 1 and not all_equal: raise ValueError('The file name is not the same for all MPI jobs.') ndims = comm.allgather(data.ndim) if any(n != ndims[0] for n in ndims): raise ValueError("The arrays have an incompatible number of dimensions" ": '{0}'.".format(', '.join(str(n) for n in ndims))) ndim = ndims[0] shapes = comm.allgather(data.shape) if any(s[1:] != shapes[0][1:] for s in shapes): raise ValueError("The arrays have incompatible shapes: '{0}'.".format( strshape(shapes))) # get header if header is None: header = create_fitsheader_for(data, extname=extname) else: header = header.copy() if extname is not None: header['extname'] = extname # we remove the file first to avoid an annoying pyfits informative message if not extension: if comm.rank == 0: try: os.remove(filename) except OSError: pass # case without MPI communication if comm.size == 1: if not extension: hdu = pyfits.PrimaryHDU(data, header) hdu.writeto(filename, overwrite=True) else: pyfits.append(filename, data, header) return # get global/local parameters nglobal = sum(s[0] for s in shapes) s = split(nglobal, comm.size, comm.rank) nlocal = s.stop - s.start if data.shape[0] != nlocal: raise ValueError( "On rank {}, the local array shape '{}' is invalid. T" "he first dimension does not match the expected local" " number '{}' given the global number '{}'.{}".format( comm.rank, data.shape, nlocal, nglobal, '' if comm.rank > 0 else ' Shapes are: {}.'.format(shapes))) # write FITS header if comm.rank == 0: header['NAXIS' + str(ndim)] = nglobal shdu = pyfits.StreamingHDU(filename, header) data_loc = shdu._datLoc shdu.close() else: data_loc = None data_loc = comm.bcast(data_loc) # get a communicator excluding the processes which have no work to do # (Create_subarray does not allow 0-sized subarrays) chunk = product(data.shape[1:]) rank_nowork = min(comm.size, nglobal) group = comm.Get_group() group.Incl(list(range(rank_nowork))) newcomm = comm.Create(group) # collectively write data if comm.rank < rank_nowork: # mpi4py 1.2.2: pb with viewing data as big endian KeyError '>d' if sys.byteorder == 'little' and data.dtype.byteorder == '=' or \ data.dtype.byteorder == '<': data = data.byteswap() data = data.newbyteorder('=') mtype = DTYPE_MAP[data.dtype] ftype = mtype.Create_subarray([nglobal * chunk], [nlocal * chunk], [s.start * chunk]) ftype.Commit() f = MPI.File.Open(newcomm, filename, amode=MPI.MODE_APPEND | MPI.MODE_WRONLY | MPI.MODE_CREATE) f.Set_view(data_loc, mtype, ftype, 'native', MPI.INFO_NULL) f.Write_all(data) f.Close() ftype.Free() newcomm.Free() # pad FITS file with zeros if comm.rank == 0: datasize = nglobal * chunk * data.dtype.itemsize BLOCK_SIZE = 2880 padding = BLOCK_SIZE - (datasize % BLOCK_SIZE) with open(filename, 'a') as f: if f.tell() - data_loc != datasize: raise RuntimeError('Unexpected file size.') f.write(padding * '\0') comm.Barrier()
def write_fits(filename, data, header, extension, extname, comm): """ Collectively write local arrays into a single FITS file. Parameters ---------- filename : str The FITS file name. data : ndarray The array to be written. header : pyfits.Header The data FITS header. None can be set, in which case a minimal FITS header will be inferred from the data. extension : boolean If True, the data will be written as an extension to an already existing FITS file. extname : str The FITS extension name. Use None to write the primary HDU. comm : mpi4py.Comm The MPI communicator of the local arrays. Use MPI.COMM_SELF if the data are not meant to be combined into a global array. Make sure that the MPI processes are not executing this routine with the same file name. """ # check if the file name is the same for all MPI jobs files = comm.allgather(filename+str(extname)) all_equal = all(f == files[0] for f in files) if comm.size > 1 and not all_equal: raise ValueError('The file name is not the same for all MPI jobs.') ndims = comm.allgather(data.ndim) if any(n != ndims[0] for n in ndims): raise ValueError("The arrays have an incompatible number of dimensions:" " '{0}'.".format(', '.join(str(n) for n in ndims))) ndim = ndims[0] shapes = comm.allgather(data.shape) if any(s[1:] != shapes[0][1:] for s in shapes): raise ValueError("The arrays have incompatible shapes: '{0}'.".format( strshape(shapes))) # get header if header is None: header = create_fitsheader(fromdata=data, extname=extname) else: header = header.copy() if extname is not None: header.update('extname', extname) # we remove the file first to avoid an annoying pyfits informative message if not extension: if comm.rank == 0: try: os.remove(filename) except OSError: pass # case without MPI communication if comm.size == 1: if not extension: hdu = pyfits.PrimaryHDU(data, header) hdu.writeto(filename, clobber=True) else: pyfits.append(filename, data, header) return # get global/local parameters nglobal = sum(s[0] for s in shapes) s = distribute_slice(nglobal) nlocal = s.stop - s.start if data.shape[0] != nlocal: raise ValueError("On rank {}, the local array shape '{}' is invalid. Th" "e first dimension does not match the expected local number '{}' gi" "ven the global number '{}'.{}".format(comm.rank, data.shape, nlocal, nglobal, '' if comm.rank > 0 else ' Shapes are: {}.'.format( shapes))) # write FITS header if comm.rank == 0: header['NAXIS' + str(ndim)] = nglobal shdu = pyfits.StreamingHDU(filename, header) data_loc = shdu._datLoc shdu.close() else: data_loc = None data_loc = comm.bcast(data_loc) # get a communicator excluding the processes which have no work to do # (Create_subarray does not allow 0-sized subarrays) chunk = product(data.shape[1:]) rank_nowork = min(comm.size, nglobal) group = comm.Get_group() group.Incl(range(rank_nowork)) newcomm = comm.Create(group) # collectively write data if comm.rank < rank_nowork: # mpi4py 1.2.2: pb with viewing data as big endian KeyError '>d' if sys.byteorder == 'little' and data.dtype.byteorder == '=' or \ data.dtype.byteorder == '<': data = data.byteswap() data = data.newbyteorder('=') mtype = DTYPE_MAP[data.dtype] ftype = mtype.Create_subarray([nglobal*chunk], [nlocal*chunk], [s.start*chunk]) ftype.Commit() f = MPI.File.Open(newcomm, filename, amode=MPI.MODE_APPEND | MPI.MODE_WRONLY | MPI.MODE_CREATE) f.Set_view(data_loc, mtype, ftype, 'native', MPI.INFO_NULL) f.Write_all(data) f.Close() ftype.Free() newcomm.Free() # pad FITS file with zeros if comm.rank == 0: datasize = nglobal * chunk * data.dtype.itemsize BLOCK_SIZE = 2880 padding = BLOCK_SIZE - (datasize % BLOCK_SIZE) with open(filename, 'a') as f: if f.tell() - data_loc != datasize: raise RuntimeError('Unexpected file size.') f.write(padding * '\0') comm.Barrier()