Beispiel #1
0
 def func(shape, broadcast, expected):
     assert_equal(strshape(shape, broadcast=broadcast), expected)
Beispiel #2
0
def write_fits(filename, data, header, extension, extname, comm):
    """
    Collectively write local arrays into a single FITS file.

    Parameters
    ----------
    filename : str
        The FITS file name.
    data : ndarray
        The array to be written.
    header : pyfits.Header
        The data FITS header. None can be set, in which case a minimal FITS
        header will be inferred from the data.
    extension : boolean
        If True, the data will be written as an extension to an already
        existing FITS file.
    extname : str
        The FITS extension name. Use None to write the primary HDU.
    comm : mpi4py.Comm
        The MPI communicator of the local arrays. Use MPI.COMM_SELF if the data
        are not meant to be combined into a global array. Make sure that the
        MPI processes are not executing this routine with the same file name.

    """
    # check if the file name is the same for all MPI jobs
    files = comm.allgather(filename + str(extname))
    all_equal = all(f == files[0] for f in files)
    if comm.size > 1 and not all_equal:
        raise ValueError('The file name is not the same for all MPI jobs.')
    ndims = comm.allgather(data.ndim)
    if any(n != ndims[0] for n in ndims):
        raise ValueError("The arrays have an incompatible number of dimensions"
                         ": '{0}'.".format(', '.join(str(n) for n in ndims)))
    ndim = ndims[0]
    shapes = comm.allgather(data.shape)
    if any(s[1:] != shapes[0][1:] for s in shapes):
        raise ValueError("The arrays have incompatible shapes: '{0}'.".format(
            strshape(shapes)))

    # get header
    if header is None:
        header = create_fitsheader_for(data, extname=extname)
    else:
        header = header.copy()
    if extname is not None:
        header['extname'] = extname

    # we remove the file first to avoid an annoying pyfits informative message
    if not extension:
        if comm.rank == 0:
            try:
                os.remove(filename)
            except OSError:
                pass

    # case without MPI communication
    if comm.size == 1:
        if not extension:
            hdu = pyfits.PrimaryHDU(data, header)
            hdu.writeto(filename, overwrite=True)
        else:
            pyfits.append(filename, data, header)
        return

    # get global/local parameters
    nglobal = sum(s[0] for s in shapes)
    s = split(nglobal, comm.size, comm.rank)
    nlocal = s.stop - s.start
    if data.shape[0] != nlocal:
        raise ValueError(
            "On rank {}, the local array shape '{}' is invalid. T"
            "he first dimension does not match the expected local"
            " number '{}' given the global number '{}'.{}".format(
                comm.rank, data.shape, nlocal, nglobal,
                '' if comm.rank > 0 else ' Shapes are: {}.'.format(shapes)))

    # write FITS header
    if comm.rank == 0:
        header['NAXIS' + str(ndim)] = nglobal
        shdu = pyfits.StreamingHDU(filename, header)
        data_loc = shdu._datLoc
        shdu.close()
    else:
        data_loc = None
    data_loc = comm.bcast(data_loc)

    # get a communicator excluding the processes which have no work to do
    # (Create_subarray does not allow 0-sized subarrays)
    chunk = product(data.shape[1:])
    rank_nowork = min(comm.size, nglobal)
    group = comm.Get_group()
    group.Incl(list(range(rank_nowork)))
    newcomm = comm.Create(group)

    # collectively write data
    if comm.rank < rank_nowork:
        # mpi4py 1.2.2: pb with viewing data as big endian KeyError '>d'
        if sys.byteorder == 'little' and data.dtype.byteorder == '=' or \
           data.dtype.byteorder == '<':
            data = data.byteswap()
        data = data.newbyteorder('=')
        mtype = DTYPE_MAP[data.dtype]
        ftype = mtype.Create_subarray([nglobal * chunk], [nlocal * chunk],
                                      [s.start * chunk])
        ftype.Commit()
        f = MPI.File.Open(newcomm,
                          filename,
                          amode=MPI.MODE_APPEND | MPI.MODE_WRONLY
                          | MPI.MODE_CREATE)
        f.Set_view(data_loc, mtype, ftype, 'native', MPI.INFO_NULL)
        f.Write_all(data)
        f.Close()
        ftype.Free()
    newcomm.Free()

    # pad FITS file with zeros
    if comm.rank == 0:
        datasize = nglobal * chunk * data.dtype.itemsize
        BLOCK_SIZE = 2880
        padding = BLOCK_SIZE - (datasize % BLOCK_SIZE)
        with open(filename, 'a') as f:
            if f.tell() - data_loc != datasize:
                raise RuntimeError('Unexpected file size.')
            f.write(padding * '\0')

    comm.Barrier()
Beispiel #3
0
 def func(shape, broadcast, expected):
     assert_equal(strshape(shape, broadcast=broadcast), expected)
Beispiel #4
0
def write_fits(filename, data, header, extension, extname, comm):
    """
    Collectively write local arrays into a single FITS file.

    Parameters
    ----------
    filename : str
        The FITS file name.
    data : ndarray
        The array to be written.
    header : pyfits.Header
        The data FITS header. None can be set, in which case a minimal FITS
        header will be inferred from the data.
    extension : boolean
        If True, the data will be written as an extension to an already
        existing FITS file.
    extname : str
        The FITS extension name. Use None to write the primary HDU.
    comm : mpi4py.Comm
        The MPI communicator of the local arrays. Use MPI.COMM_SELF if the data
        are not meant to be combined into a global array. Make sure that the MPI
        processes are not executing this routine with the same file name.
    """

    # check if the file name is the same for all MPI jobs
    files = comm.allgather(filename+str(extname))
    all_equal = all(f == files[0] for f in files)
    if comm.size > 1 and not all_equal:
        raise ValueError('The file name is not the same for all MPI jobs.')
    ndims = comm.allgather(data.ndim)
    if any(n != ndims[0] for n in ndims):
        raise ValueError("The arrays have an incompatible number of dimensions:"
                         " '{0}'.".format(', '.join(str(n) for n in ndims)))
    ndim = ndims[0]
    shapes = comm.allgather(data.shape)
    if any(s[1:] != shapes[0][1:] for s in shapes):
        raise ValueError("The arrays have incompatible shapes: '{0}'.".format(
                         strshape(shapes)))

    # get header
    if header is None:
        header = create_fitsheader(fromdata=data, extname=extname)
    else:
        header = header.copy()
    if extname is not None:
        header.update('extname', extname)

    # we remove the file first to avoid an annoying pyfits informative message
    if not extension:
        if comm.rank == 0:
            try:
                os.remove(filename)
            except OSError:
                pass

    # case without MPI communication
    if comm.size == 1:
        if not extension:
            hdu = pyfits.PrimaryHDU(data, header)
            hdu.writeto(filename, clobber=True)
        else:
            pyfits.append(filename, data, header)
        return

    # get global/local parameters
    nglobal = sum(s[0] for s in shapes)
    s = distribute_slice(nglobal)
    nlocal = s.stop - s.start
    if data.shape[0] != nlocal:
        raise ValueError("On rank {}, the local array shape '{}' is invalid. Th"
            "e first dimension does not match the expected local number '{}' gi"
            "ven the global number '{}'.{}".format(comm.rank, data.shape,
            nlocal, nglobal, '' if comm.rank > 0 else ' Shapes are: {}.'.format(
            shapes)))

    # write FITS header
    if comm.rank == 0:
        header['NAXIS' + str(ndim)] = nglobal
        shdu = pyfits.StreamingHDU(filename, header)
        data_loc = shdu._datLoc
        shdu.close()
    else:
        data_loc = None
    data_loc = comm.bcast(data_loc)

    # get a communicator excluding the processes which have no work to do
    # (Create_subarray does not allow 0-sized subarrays)
    chunk = product(data.shape[1:])
    rank_nowork = min(comm.size, nglobal)
    group = comm.Get_group()
    group.Incl(range(rank_nowork))
    newcomm = comm.Create(group)

    # collectively write data
    if comm.rank < rank_nowork:
        # mpi4py 1.2.2: pb with viewing data as big endian KeyError '>d'
        if sys.byteorder == 'little' and data.dtype.byteorder == '=' or \
           data.dtype.byteorder == '<':
            data = data.byteswap()
        data = data.newbyteorder('=')
        mtype = DTYPE_MAP[data.dtype]
        ftype = mtype.Create_subarray([nglobal*chunk], [nlocal*chunk],
                                      [s.start*chunk])
        ftype.Commit()
        f = MPI.File.Open(newcomm, filename, amode=MPI.MODE_APPEND |
                          MPI.MODE_WRONLY | MPI.MODE_CREATE)
        f.Set_view(data_loc, mtype, ftype, 'native', MPI.INFO_NULL)
        f.Write_all(data)
        f.Close()
        ftype.Free()
    newcomm.Free()

    # pad FITS file with zeros
    if comm.rank == 0:
        datasize = nglobal * chunk * data.dtype.itemsize
        BLOCK_SIZE = 2880
        padding = BLOCK_SIZE - (datasize % BLOCK_SIZE)
        with open(filename, 'a') as f:
            if f.tell() - data_loc != datasize:
                raise RuntimeError('Unexpected file size.')
            f.write(padding * '\0')

    comm.Barrier()