Esempio n. 1
0
def createnc(comm,casename,vname,periods,units,calendar,fields,nx,ny,ntime,nz=None):
  from mpi4py import MPI

  filename            = "%s_%s_%s.nc"%(casename,vname,periods)
  rootgrp             = Dataset(filename     , "w" ,
                                parallel=True, comm=comm,
                                info=MPI.Info())
  rootgrp.createDimension("west_east"        , nx )
  rootgrp.createDimension("south_north"      , ny )
  #rootgrp.createDimension("time"     , None)
  rootgrp.createDimension("time"     , ntime)
  T                   = rootgrp.createVariable("time"    , "f4"  , ("time" , ))
  T.units             = units
  T.calendar          = calendar
  if nz:
    rootgrp.createDimension("bottom_top"      , nz )
    nc_dim=("time","bottom_top","south_north","west_east",)
  else:
    nc_dim=("time","south_north","west_east",)
  for field in fields:
    rootgrp.createVariable(field,"f4",nc_dim)
  return rootgrp 
Esempio n. 2
0
# to run: mpirun -np 4 python mpi_example.py
from mpi4py import MPI
import numpy as np
from netCDF4 import Dataset
rank = MPI.COMM_WORLD.rank  # The process ID (integer 0-3 for 4-process run)
nc = Dataset('parallel_test.nc',
             'w',
             parallel=True,
             comm=MPI.COMM_WORLD,
             info=MPI.Info(),
             format='NETCDF4_CLASSIC')
# below should work also - MPI_COMM_WORLD and MPI_INFO_NULL will be used.
#nc = Dataset('parallel_test.nc', 'w', parallel=True)
d = nc.createDimension('dim', 4)
v = nc.createVariable('var', np.int, 'dim')
v[rank] = rank
# switch to collective mode, rewrite the data.
v.set_collective(True)
v[rank] = rank
nc.close()
# reopen the file read-only, check the data
nc = Dataset('parallel_test.nc',
             parallel=True,
             comm=MPI.COMM_WORLD,
             info=MPI.Info())
assert rank == nc['var'][rank]
nc.close()
# reopen the file in append mode, modify the data on the last rank.
nc = Dataset('parallel_test.nc',
             'a',
             parallel=True,
# to run: mpirun -np 4 python mpi_example.py
import sys
from mpi4py import MPI
import numpy as np
from netCDF4 import Dataset

rank = MPI.COMM_WORLD.rank  # The process ID (integer 0-3 for 4-process run)
testfile = "simple_xy_par.nc"

nc = Dataset(testfile, parallel=True, comm=MPI.COMM_WORLD, info=MPI.Info())

if rank == 0:
    # obviously rank 0 can see the whole data set
    # whis is interesting but unnecessary and not scalable since
    # the data might not fit into the RAM of a single node
    for k, v in nc.variables.items():
        print("Variablename={0}".format(k))
        print("Value\n")
        print(v)
        print("Value as numpy.array \n")
        print(np.array(v))

    # The aim of the exercise is actually to read only the part relevant
    # for the computation the actual rank is responsible for

nc.close()
Esempio n. 4
0
# based on https://github.com/Unidata/netcdf4-python/blob/master/examples/mpi_example.py
# to run: mpirun -np 4 python mpi_example.py
from mpi4py import MPI
import numpy as np
from netCDF4 import Dataset

rank = MPI.COMM_WORLD.rank  # The process ID (integer 0-3 for 4-process run)
if rank == 0:
    print('Creating file with format {}'.format(format))
nc = Dataset('parallel_test.nc', 'w', parallel=True, comm=MPI.COMM_WORLD,
             info=MPI.Info(), format='NETCDF4_CLASSIC')
d = nc.createDimension('dim', 4)
v = nc.createVariable('var', np.int32, 'dim')
v[rank] = rank
# switch to collective mode, rewrite the data.
v.set_collective(True)
v[rank] = rank
nc.close()
# reopen the file read-only, check the data
nc = Dataset('parallel_test.nc', parallel=True, comm=MPI.COMM_WORLD,
             info=MPI.Info())
assert rank == nc['var'][rank]
nc.close()

# reopen the file in append mode, modify the data on the last rank.
nc = Dataset('parallel_test.nc', 'a', parallel=True, comm=MPI.COMM_WORLD,
             info=MPI.Info())
if rank == 3:
    v[rank] = 2 * rank
nc.close()
Esempio n. 5
0
# to run: mpirun -np 4 python mpi_example.py
import sys
from mpi4py import MPI
import numpy as np
from netCDF4 import Dataset
if len(sys.argv) == 2:
	format = sys.argv[1]
else:
	format = 'NETCDF4_CLASSIC'
rank = MPI.COMM_WORLD.rank  # The process ID (integer 0-3 for 4-process run)
if rank == 0:
	print('Creating file with format {}'.format(format))
nc = Dataset('parallel_test.nc', 'w', parallel=True, comm=MPI.COMM_WORLD,
        info=MPI.Info(),format=format)
# below should work also - MPI_COMM_WORLD and MPI_INFO_NULL will be used.
#nc = Dataset('parallel_test.nc', 'w', parallel=True)
d = nc.createDimension('dim',4)
v = nc.createVariable('var', np.int32, 'dim')
v[rank] = rank
# switch to collective mode, rewrite the data.
v.set_collective(True)
v[rank] = rank
nc.close()
# reopen the file read-only, check the data
nc = Dataset('parallel_test.nc', parallel=True, comm=MPI.COMM_WORLD,
        info=MPI.Info())
assert rank==nc['var'][rank]
nc.close()
# reopen the file in append mode, modify the data on the last rank.
nc = Dataset('parallel_test.nc', 'a',parallel=True, comm=MPI.COMM_WORLD,
        info=MPI.Info())
Esempio n. 6
0
def ncfilter(
    infile      = 'default.nc',
    outfile      = 'filtered.nc',
    ndx_file    = "groups.ndx",
    g_sel       = "indenter",
    selvarname  = 'id',    # select subset based on NetCDF variable 'id'
    seldimname  = "atom",  # dimension to resize
    pardimname  = "frame", # dimension for chunk-wise parallelization
    format      = 'NETCDF4',
    collective  = True, # NetCDF MPIIO mode: collective or independent
    loglvl      = standard_loglevel,
    logfmt      = standard_logformat,
    logout      = None
  ):
  """Filters NetCDF trajectory by atom indices from GROMACS sytle .ndx file.

  Args:
      infile (str, optional):     NetCDF trajectory, defaults to 'default.nc'.
      outfile (str, optional):    NetCDF trajectory, defaults to 'filtered.nc'.
      ndx_file (str, optional):   GROMACS style .ndx file,
                                  defaults to 'groups.ndx'.
      g_sel (str, optional):      Name of group to select, as found in .ndx
                                  file, defaults to 'indenter'.
      selvarname (str, optional): Name of NetCDF variable to compare against
                                  indices in .ndx file, defaults to 'id'.
      seldimname (str, optional): Name of NetCDF dimension to reduce. Defaults
                                  to 'atom'.
      pardimname (str, optional): Name of NetCDF dimension to chop into chunks
                                  for parallel processing via MPI. Defaults to
                                  'frame'.
      format (str, optional):     NetCDF format, defaults to 'NETCDF4'.
      collective (bool, optional):Parallel IO mode, defaults to 'True'.
                                  Independent mode is used and unlimited are
                                  written as finite dimensions if 'False'.
                                  Please refert to
          http://unidata.github.io/netcdf4-python/netCDF4/index.html#section13
      loglvl (int, optional):     Log level, defaults to 'logging.ERROR'.
      logfmt (str, optional):     Override default log format.
      logout (str, optional):     Name of log file. Defaults to 'None',
                                  in which case the log streams to the terminal.

  Returns:
      Nothing.
  """


  # MPI communicator
  comm = MPI.COMM_WORLD

  size = comm.Get_size()
  rank = comm.Get_rank()

  # set up logging to terminal (and to file)
  # use same format for file and stream
  logger        = logging.getLogger("rank[{:03d}]:{}".format(rank,__name__))
  logger.setLevel(loglvl)

  logformatter  = logging.Formatter(logfmt)

  ch = logging.StreamHandler()
  ch.setFormatter(logformatter)
  ch.setLevel(loglvl)
  logger.addHandler(ch)

  if isinstance(logout,str):
    logger.setLevel(logging.DEBUG)
    fh = MPIFileHandler(logout)
    fh.setFormatter(logformatter)
    fh.setLevel(logging.DEBUG) # always verbose to log file
    logger.addHandler(fh)

  logger.debug('Hello from rank {}/{}.'.format(rank, size))

  # read groups from .ndx file:
  ndx = NDX()

  if rank == 0:
    logger.info(
      "Looking for '{:s}' in current working directory '{:s}'...".format(
        ndx_file, os.getcwd()) )

  ndx.read(ndx_file)

  if rank == 0:
    logger.info("Read {:d} groups from '{:s}':".format(len(ndx),ndx_file))
    for g in ndx.keys():
      logger.info("{:48s} - {: 24d} atoms".format(g,len(ndx[g])))

  # https://unidata.github.io/netcdf4-python/netCDF4/index.html#section15 You
  # can create netCDF Datasets whose content is held in memory instead of in a
  # disk file. There are two ways to do this. If you don't need access to the
  # memory buffer containing the Dataset from within python, the best way is to
  # use the diskless=True keyword argument when creating the Dataset. If you
  # want to save the Dataset to disk when you close it, also set persist=True.
  with Dataset(infile, 'r', parallel=True, comm=comm, info=MPI.Info(),
      format=format) as ncin, \
    Dataset(outfile, 'w', parallel=True, comm=comm, info=MPI.Info(),
      format=format, diskless=True, persist=True) as ncout:

    # get atom ids from netcdf, assume unsorted
    selvar = ncin.variables[selvarname]

    # look for parallelization dimension in selection variable:
    selvardnames = np.array(selvar.dimensions,dtype=str)
    selvarndims = len(selvardnames) # number of dimensions in selvar

    # select everything in selection variable to begin with
    selvarsel = [slice(None)]*selvarndims

    # check for parallelization along pardimname
    if rank == 0:
      logger.debug(
        "Parallelization dimension '{}' in selection variable {}{} ?".format(
          pardimname,selvarname,selvardnames))

    selvarpardimpos = np.argwhere(selvardnames == pardimname).flatten()
    if rank == 0:
      logger.debug("np.argwhere returned {}".format(selvarpardimpos))

    # in the standard case, selection bases on variable 'id' with dimensions
    # ('frame','atom'), where processing ist parallelized chunk-wise over
    # 'frame', while only a subset is processed over 'atom'.
    if len(selvarpardimpos) != 1:
      if rank == 0:
        logger.error(
          "Selection variable {}{} has no parallelization dimension {}!".format(
            selvarname, selvar.shape, pardimname) )
        raise ValueError()
    else:
      selvarpardimpos = selvarpardimpos[0]
      if rank == 0:
        logger.info(' '.join(("Selection variable {}{} has parallelization",
          "dimension {} at position {}")).format(
            selvarname, selvar.shape, pardimname, selvarpardimpos))

    # create a reference selection based on the first index in pardim
    # to assure the total numbert of particles not to change along pardim
    selvarsel[selvarpardimpos] = 0

    # TODO: split ndx-specific functionality from this function
    # with the ndx object here, selvar is expected to be only 1d after reduction
    # by parallelization dimension:
    refsel    = np.isin(selvar[selvarpardimpos], ndx[g_sel])
    refnumsel = np.count_nonzero(refsel)
    # number of selected entries along selection dimension

    if rank == 0:
      logger.info(' '.join(("Selection variable {}{} has {} entries",
        "along selection dimension {} in selection '{}'")).format(
          selvarname, selvarsel, refnumsel, seldimname, g_sel ) )

    # discretize trajectory along pardim and process chunk-wise
    pardimlen = len(ncin.dimensions[pardimname])
    if rank == 0:
      logger.info('Parallelization dimension: {}[{}]'.format(
        pardimname, pardimlen))

    if pardimlen < size: # more ranks than pardimlen needed!
        logger.error(' '.join(('Number of ranks {} exceeds size of',
           'parallelization dimension: {}[{}]. Reduce!')).format(
             size, pardimname, pardimlen))
        raise ValueError()

    n1 = rank*(pardimlen//size)
    n2 = (rank+1)*(pardimlen//size)
    logger.info('Parallelization dimension: {}[{}]'.format(
      pardimname, pardimlen))

    # TODO: this intended treatment of pardimlen < size does not work
    # treatment for special case where pardimlen < size
    # if n1 == 0 and n2 == 0:
    #   if rank == 0:
    #     logger.warn(' '.join(('Number of ranks {} exceeds size of',
    #       'parallelization dimension: {}[{}]')).format(
    #         size, pardimname, pardimlen))
    #   # treatment for special case where rank >= size
    #   if rank < pardimlen:
    #     n1 = rank  
    #     n2 = rank+1
    #   else:
    #     logger.warn(' '.join(('Apparently, rank {}/{} exceeds parallelization',
    #       'dimension {}[{}] and will thus be idle.')).format(
    #         rank, size, pardimname, pardimlen))
    #     n1 = 0
    #     n2 = 0
    # elif rank == size-1: # treatment for last rank if pardimlen >= size
    #   n2 = pardimlen
    
    if rank == size-1: # treatment for last rank if pardimlen >= size
      n2 = pardimlen


    logger.info(
      'Rank {}/{} treats parallelization dimension slice {}[{}:{}]'.format(
        rank, size, pardimname, n1, n2))

    # create global selection along all dimensions in selection variable
    # selection_shape = [ len(dim) for dim in selvar.get_dims() ]
    selection = np.zeros(selvar.shape, dtype=bool)

    # TODO: split ndx-specific functionality from this function
    for i in range(n1, n2):
      # select only selvar values that are within the specified index group
      selvarsel[selvarpardimpos] = i
      selvari = selvar[selvarsel]
      selection[selvarsel]  = np.isin(selvari, ndx[g_sel])
      numsel = np.count_nonzero(selection[selvarsel])
      logger.debug(' '.join(("Selection variable {}{} has {} entries along",
        "selection dimension {} in selection '{}'")).format(
          selvarname, selvarsel, numsel, seldimname, g_sel ))
      if numsel != refnumsel:
        logger.error(' '.join(("Selection variable {}{} has {} entries along",
          "selection dimension {} in selection '{}', differing from {}",
          "in reference selection!")).format(
          selvarname, selvarsel, numsel, seldimname, g_sel, refnumsel ))
        raise ValueError()
      elif np.any( np.not_equal( selection[i], refsel ) ):
        logger.warn(' '.join(("Selection variable {}{} and reference selection",
          "both have {} entries along selection dimension {}",
          "in selection '{}', but in differing order.")).format(
            selvarname, selvarsel, numsel, seldimname, g_sel ))

    # copy attributes, src:
    # https://stackoverflow.com/questions/15141563/
    # python-netcdf-making-a-copy-of-all-variables-and-attributes-but-one
    if rank == 0:
      logger.info("Copying global attributes...")

    for aname in ncin.ncattrs():
      if rank == 0:
        logger.info("Copy attibute {}".format(aname))
      ncout.setncattr(aname, ncin.getncattr(aname))

    # copy dimensions, src: https://gist.github.com/guziy/8543562
    if rank == 0:
      logger.info("Copying dimensions...")
    for dname, dim in ncin.dimensions.items():
      if dname == seldimname:
        lendim = refnumsel # reduce size along filter dimension
        if rank == 0:
          logger.info("Shrink dimension {}[{}] to [{}]".format(
            dname, len(dim), lendim))
      else:
        if collective:
          lendim = len(dim) if not dim.isunlimited() else None
        else:
          lendim = len(dim) # ignore unlimited dimension in this case

        if rank == 0:
          logger.info("Copy dimension {}[{}] to {}[{}]".format(
            dname, len(dim), dname, lendim))

      ncout.createDimension(dname, lendim)

    # copy variables
    if rank == 0:
      logger.info("Copying variables...")

    for vname, inVar in ncin.variables.items():
      logger.debug(
        "Going to copy innput variable {}{}: {}. Rank {}/{} still alive".format(
          vname, inVar.shape, inVar.datatype, rank, size))
      outVar = ncout.createVariable(vname, inVar.datatype, inVar.dimensions)
      if rank == 0:
        logger.info(
          "Copy input variable {}{}: {} to output variable {}{}: {}".format(
            vname, inVar.shape, inVar.datatype,
            vname, outVar.shape, outVar.datatype))
      if collective:
        outVar.set_collective(True) # output in collective mode
      else:
        outVar.set_collective(False) # output in independent mode

      # Copy variable attributes
      outVar.setncatts({k: inVar.getncattr(k) for k in inVar.ncattrs()})

      # look at dimensions in variable
      dnames = np.array(inVar.dimensions,dtype=str)
      ndims = len(dnames) # number of dimensions in current variable

      # select everything in variable to begin with
      in_selection = [slice(None)]*ndims
      out_selection = [slice(None)]*ndims

      # check for selection dimension:
      if rank == 0:
        logger.debug(' '.join(("Is the selection dimension '{}'",
          "in dimensions {} of variable {}{}?")).format(
          seldimname, dnames, vname, inVar.shape))
      seldimpos = np.argwhere(dnames == seldimname).flatten()
      if rank == 0:
        logger.debug("np.argwhere(...) returned '{}'".format(seldimpos))
      if len(seldimpos) != 1:
        seldimpos = None
        if rank == 0:
          logger.info(
            "Input variable {}{} has no selection dimension {}.".format(
              vname, inVar.shape, seldimname))
      else:
        seldimpos = seldimpos[0]
        if rank == 0:
          logger.info(' '.join(("Input variable {}{} has selection",
            "dimension {} at position {}")).format(
            vname, inVar.shape, seldimname, seldimpos))

      # check for parallelization dimension:
      if rank == 0:
        logger.debug(' '.join(("Is the parallelization dimension '{}'",
          "in dimensions {} of variable {}{}?")).format(
          pardimname, dnames, vname, inVar.shape))

      pardimpos = np.argwhere(dnames == pardimname).flatten()
      if rank == 0:
        logger.debug("np.argwhere(...) returned {}".format(pardimpos))

      # no parallelization dimension in current inVar:
      if len(pardimpos) != 1:
        if rank == 0:
          logger.info(
            "Input variable {}{} has no parallelization dimension {}.".format(
              vname, inVar.shape, pardimname))

        # in the current implementation, the selection dimension is expected
        # only to occur in variables that have parallelization dimension as well
        if (seldimpos is not None) and (rank == 0):
          logger.error(' '.joint((
            "Input variable {}{} has selection dimension {},"
            "but no parallelization dimension {}!")).format(
            vname, inVar.shape, seldimname, pardimname))

        if rank == 0:
          logger.info("Copy input variable {}{} as is at once.".format(
            vname, inVar.shape))
        outVar[:] = inVar[:]

      # parallelization dimension found in current inVar:
      else:
        pardimpos = pardimpos[0]
        if rank == 0:
          logger.info(' '.join((
            "Input variable {}{} has parallelization dimension {}",
            "at position {}")).format(
              vname, inVar.shape, pardimname, pardimpos))
        
        if collective:
           # issue: outVar.shape has zero entry for unlimited dim
          if rank == 0:
            logger.info("outVar {}{} has {} unlimited dims".format( 
              vname, outVar.shape, np.equal( outVar.shape, 0) ))
          tmpVar_shape = np.where( np.equal( outVar.shape, 0 ), inVar.shape, outVar.shape)
          tmpVar = np.zeros( tmpVar_shape, dtype=outVar.dtype )
        else:
          tmpVar = np.zeros( outVar.shape, dtype=outVar.dtype )

        if rank == 0:
          logger.debug("np.array tmpVar{}: {} created".format(
            tmpVar.shape, tmpVar.dtype))

        # https://unidata.github.io/netcdf4-python/netCDF4/index.html#section6
        # Boolean array and integer sequence indexing behaves differently for
        # netCDF variables than for numpy arrays. Only 1-d boolean arrays and
        # integer sequences are allowed, and these indices work independently
        # along each dimension (similar to the way vector subscripts work in
        # fortran).

        # Here, we iterate subsequently over temporal axis as selection may
        # vary for every frame and multidimensional selection arrays are no
        # option.
        for i in range(n1, n2):
          if seldimpos is not None:
            selvarsel[selvarpardimpos] = i
            in_selection[seldimpos] = selection[selvarsel]

          in_selection[pardimpos]  = i
          out_selection[pardimpos] = i

          logger.debug("Assigning input variable {}{} to tmpVar{}".format(
              vname, in_selection, out_selection))

          tmpVar[out_selection] = inVar[in_selection]

          logger.debug("Assigned input variable {}{} to tmpVar{}".format(
              vname, in_selection, out_selection))

        # TODO:
        # this commented conditional clause intends to alleviate issue when 
        # number of ranks exceeds number of prallelizable entried in par dim.
        # if n2 > n1: # only if there is a finite range of elements to assign:
        out_selection[pardimpos] = slice(n1,n2)
        logger.info(' '.join(("Assigning filtered input variable {}{}",
          "to output var {}{}")).format(
            vname, out_selection, vname, out_selection))

        # apparently, accumulating the desired subset index by index in an numpy
        # array tmpVar and then assigning the accumulated chunk to the NetCDF
        # variable outVar works better than assigning subset to NetCDF variable
        # index by index directly
        outVar[out_selection] = tmpVar[out_selection]
        logger.info(' '.join(("Assigned filtered input variable {}{}",
          "to output var {}{}")).format(
            vname, out_selection, vname, out_selection))
        # else:
        #  logger.info("Skipped input variable {} on rank {}/{}.".format(
        #    vname, rank, size))

    # nc.close() not necessary due to 'with' statement
  logger.debug('Goodbye from rank {}/{}.'.format(rank, size))
Esempio n. 7
0
outputinterval = 86400. / 4.  # interval between frames in seconds
tmin = 0. * 86400.  # time to start saving data (in days)
tmax = 50. * 86400.  # time to stop (in days)
nsteps = int(tmax / outputinterval)  # number of time steps to animate
# set number of timesteps to integrate for each call to model.advance
model.timesteps = int(outputinterval / model.dt)
savedata = 'sqg_N%s_6hrly_ens.nc' % N  # save data to netcdf file

if savedata is not None:
    from netCDF4 import Dataset
    nc = Dataset(savedata,
                 mode='w',
                 format='NETCDF4_CLASSIC',
                 parallel=True,
                 comm=MPI.COMM_WORLD,
                 info=MPI.Info())
    nc.r = model.r
    nc.f = model.f
    nc.U = model.U
    nc.L = model.L
    nc.H = model.H
    nc.g = g
    nc.theta0 = theta0
    nc.nsq = model.nsq
    nc.tdiab = model.tdiab
    nc.dt = model.dt
    nc.diff_efold = model.diff_efold
    nc.diff_order = model.diff_order
    nc.symmetric = int(model.symmetric)
    nc.dealias = int(model.dealias)
    x = nc.createDimension('x', N)