Ejemplo n.º 1
0
    def read(self, name, ranges=None, asBoutArray=True):
        """Read a variable from the file."""
        if self.handle is None:
            return None

        try:
            var = self.handle.variables[name]
            n = name
        except KeyError:
            # Not found. Try to find using case-insensitive search
            var = None
            for n in list(self.handle.variables.keys()):
                if n.lower() == name.lower():
                    print("WARNING: Reading '" + n + "' instead of '" + name +
                          "'")
                    var = self.handle.variables[n]
            if var is None:
                return None

        if asBoutArray:
            attributes = self.attributes(n)

        ndims = len(var.dimensions)
        if ndims == 0:
            data = var.getValue()
            if asBoutArray:
                data = BoutArray(data, attributes=attributes)
            return data  # [0]
        else:
            if ranges:
                if len(ranges) == 2 * ndims:
                    # Reform list of pairs of ints into slices
                    ranges = [
                        slice(a, b) for a, b in zip(ranges[::2], ranges[1::2])
                    ]
                elif len(ranges) != ndims:
                    raise ValueError(
                        "Incorrect number of elements in ranges argument "
                        "(got {}, expected {} or {})".format(
                            len(ranges), ndims, 2 * ndims))

                if library == "Scientific":
                    # Passing ranges to var[] doesn't seem to work
                    data = var[:]
                    data = data[ranges[:ndims]]
                else:
                    data = var[ranges[:ndims]]
                if asBoutArray:
                    data = BoutArray(data, attributes=attributes)
                return data
            else:
                data = var[:]
                if asBoutArray:
                    data = BoutArray(data, attributes=attributes)
                return data
Ejemplo n.º 2
0
    def read(self, name, ranges=None, asBoutArray=True):
        if self.handle is None:
            return None

        try:
            var = self.handle[name]
            n = name
        except KeyError:
            # Not found. Try to find using case-insensitive search
            var = None
            for n in self.handle:
                if n.lower() == name.lower():
                    print("WARNING: Reading '" + n + "' instead of '" + name +
                          "'")
                    var = self.handle[n]
            if var is None:
                return None

        attributes = self.attributes(n) if asBoutArray else {}

        time_dependent = attributes.get("bout_type", "none").endswith("_t")

        ndims = len(var.shape)
        if ndims == 1 and var.shape[0] == 1 and not time_dependent:
            data = var
            if asBoutArray:
                data = BoutArray(data, attributes=attributes)
            return data[0]
        else:
            if ranges:
                if len(ranges) == 2 * ndims:
                    # Reform list of pairs of ints into slices
                    ranges = [
                        slice(a, b) for a, b in zip(ranges[::2], ranges[1::2])
                    ]
                elif len(ranges) != ndims:
                    raise ValueError(
                        "Incorrect number of elements in ranges argument "
                        "(got {}, expected {} or {})".format(
                            len(ranges), ndims, 2 * ndims))
                # Probably a bug in h5py, work around by passing tuple
                data = var[tuple(ranges[:ndims])]
                if asBoutArray:
                    data = BoutArray(data, attributes=attributes)
                return data
            else:
                data = var[...]
                if asBoutArray:
                    data = BoutArray(data, attributes=attributes)
                return data
Ejemplo n.º 3
0
def _read_scalar(f, varname, dimensions, var_attributes, tind):
    """
    Read a scalar variable from a single file

    Parameters
    ----------
    f : DataFile
        File to read from. This function does *not* close f.
    varname : str
        Name of variable to read
    dimensions : tuple
        Dimensions of the variable
    var_attributes : dict
        Attributes of the variable
    tind : slice
        Slice to apply to the t-dimension, if there is one
    """
    if "t" in dimensions:
        if not dimensions[0] == "t":
            # 't' should be the first dimension in the list if present
            raise ValueError(
                "{} has a 't' dimension, but it is not the first dimension "
                "in dimensions={}".format(varname, dimensions))
        data = f.read(varname, ranges=[tind] + (len(dimensions) - 1) * [None])
    else:
        # No time or space dimensions, so no slicing
        data = f.read(varname)
    return BoutArray(data, attributes=var_attributes)
Ejemplo n.º 4
0
def squashoutput(datadir=".",
                 outputname="BOUT.dmp.nc",
                 format="NETCDF4",
                 tind=None,
                 xind=None,
                 yind=None,
                 zind=None,
                 singleprecision=False,
                 compress=False,
                 least_significant_digit=None,
                 quiet=False,
                 complevel=None,
                 append=False,
                 delete=False):
    """
    Collect all data from BOUT.dmp.* files and create a single output file.

    Parameters
    ----------
    datadir : str
        Directory where dump files are and where output file will be created.
        default "."
    outputname : str
        Name of the output file. File suffix specifies whether to use NetCDF or
        HDF5 (see boututils.datafile.DataFile for suffixes).
        default "BOUT.dmp.nc"
    format : str
        format argument passed to DataFile
        default "NETCDF4"
    tind : slice, int, or [int, int, int]
        tind argument passed to collect
        default None
    xind : slice, int, or [int, int, int]
        xind argument passed to collect
        default None
    yind : slice, int, or [int, int, int]
        yind argument passed to collect
        default None
    zind : slice, int, or [int, int, int]
        zind argument passed to collect
        default None
    singleprecision : bool
        If true convert data to single-precision floats
        default False
    compress : bool
        If true enable compression in the output file
    least_significant_digit : int or None
        How many digits should be retained? Enables lossy
        compression. Default is lossless compression. Needs
        compression to be enabled.
    complevel : int or None
        Compression level, 1 should be fastest, and 9 should yield
        highest compression.
    quiet : bool
        Be less verbose. default False
    append : bool
        Append to existing squashed file
    delete : bool
        Delete the original files after squashing.
    """

    fullpath = os.path.join(datadir, outputname)

    if append:
        datadirnew = tempfile.mkdtemp(dir=datadir)
        for f in glob.glob(datadir + "/BOUT.dmp.*.??"):
            if not quiet:
                print("moving", f)
            shutil.move(f, datadirnew)
        oldfile = datadirnew + "/" + outputname
        datadir = datadirnew

    if os.path.isfile(fullpath) and not append:
        raise ValueError(
            fullpath +
            " already exists. Collect may try to read from this file, which is presumably not desired behaviour."
        )

    # useful object from BOUT pylib to access output data
    outputs = BoutOutputs(datadir,
                          info=False,
                          xguards=True,
                          yguards=True,
                          tind=tind,
                          xind=xind,
                          yind=yind,
                          zind=zind)
    outputvars = outputs.keys()
    # Read a value to cache the files
    outputs[outputvars[0]]

    if append:
        # move only after the file list is cached
        shutil.move(fullpath, oldfile)

    t_array_index = outputvars.index("t_array")
    outputvars.append(outputvars.pop(t_array_index))

    kwargs = {}
    if compress:
        kwargs['zlib'] = True
        if least_significant_digit is not None:
            kwargs['least_significant_digit'] = least_significant_digit
        if complevel is not None:
            kwargs['complevel'] = complevel
    if append:
        old = DataFile(oldfile)
        # Check if dump on restart was enabled
        # If so, we want to drop the duplicated entry
        cropnew = 0
        if old['t_array'][-1] == outputs['t_array'][0]:
            cropnew = 1
        # Make sure we don't end up with duplicated data:
        for ot in old['t_array']:
            if ot in outputs['t_array'][cropnew:]:
                raise RuntimeError(
                    "For some reason t_array has some duplicated entries in the new and old file."
                )
    # Create single file for output and write data
    with DataFile(fullpath, create=True, write=True, format=format,
                  **kwargs) as f:
        for varname in outputvars:
            if not quiet:
                print(varname)

            var = outputs[varname]
            if append:
                dims = outputs.dimensions[varname]
                if 't' in dims:
                    var = var[cropnew:, ...]
                    varold = old[varname]
                    var = BoutArray(numpy.append(varold, var, axis=0),
                                    var.attributes)

            if singleprecision:
                if not isinstance(var, int):
                    var = BoutArray(numpy.float32(var), var.attributes)

            f.write(varname, var)
            # Write changes, free memory
            f.sync()
            var = None
            gc.collect()

    if delete:
        if append:
            os.remove(oldfile)
        for f in glob.glob(datadir + "/BOUT.dmp.*.??"):
            if not quiet:
                print("Deleting", f)
            os.remove(f)
        if append:
            os.rmdir(datadir)
Ejemplo n.º 5
0
def redistribute(npes,
                 path="data",
                 nxpe=None,
                 output=".",
                 informat=None,
                 outformat=None,
                 mxg=2,
                 myg=2):
    """Resize restart files across NPES processors.

    Does not check if new processor arrangement is compatible with the
    branch cuts. In this respect :py:func:`restart.split` is
    safer. However, BOUT++ checks the topology during initialisation
    anyway so this is not too serious.

    Parameters
    ----------
    npes : int
        Number of processors for the new restart files
    path : str, optional
        Path to original restart files (default: "data")
    nxpe : int, optional
        Number of processors to use in the x-direction (determines
        split: npes = nxpe * nype). Default is None which uses the
        same algorithm as BoutMesh (but without topology information)
        to determine a suitable value for nxpe.
    output : str, optional
        Location to save new restart files (default: current directory)
    informat : str, optional
        Specify file format of old restart files (must be a suffix
        understood by DataFile, e.g. 'nc'). Default uses the format of
        the first 'BOUT.restart.*' file listed by glob.glob.
    outformat : str, optional
        Specify file format of new restart files (must be a suffix
        understood by DataFile, e.g. 'nc'). Default is to use the same
        as informat.

    Returns
    -------
    True on success

    TODO
    ----
    - Replace printing errors with raising `ValueError`

    """

    if npes <= 0:
        print("ERROR: Negative or zero number of processors")
        return False

    if path == output:
        print("ERROR: Can't overwrite restart files")
        return False

    if informat is None:
        file_list = glob.glob(os.path.join(path, "BOUT.restart.*"))
    else:
        file_list = glob.glob(os.path.join(path, "BOUT.restart.*." + informat))

    nfiles = len(file_list)

    # Read old processor layout
    f = DataFile(file_list[0])

    # Get list of variables
    var_list = f.list()
    if len(var_list) == 0:
        print("ERROR: No data found")
        return False

    old_processor_layout = get_processor_layout(f, has_t_dimension=False)
    print("Grid sizes: ", old_processor_layout.nx, old_processor_layout.ny,
          old_processor_layout.mz)

    if nfiles != old_processor_layout.npes:
        print("WARNING: Number of restart files inconsistent with NPES")
        print("Setting nfiles = " + str(old_processor_layout.npes))
        nfiles = old_processor_layout.npes

    if nfiles == 0:
        print("ERROR: No restart files found")
        return False

    informat = file_list[0].split(".")[-1]
    if outformat is None:
        outformat = informat

    try:
        new_processor_layout = create_processor_layout(old_processor_layout,
                                                       npes,
                                                       nxpe=nxpe)
    except ValueError as e:
        print("Could not find valid processor split. " + e.what())

    nx = old_processor_layout.nx
    ny = old_processor_layout.ny
    mz = old_processor_layout.mz
    mxg = old_processor_layout.mxg
    myg = old_processor_layout.myg
    old_npes = old_processor_layout.npes
    old_nxpe = old_processor_layout.nxpe
    old_nype = old_processor_layout.nype
    old_mxsub = old_processor_layout.mxsub
    old_mysub = old_processor_layout.mysub

    nxpe = new_processor_layout.nxpe
    nype = new_processor_layout.nype
    mxsub = new_processor_layout.mxsub
    mysub = new_processor_layout.mysub

    outfile_list = []
    for i in range(npes):
        outpath = os.path.join(output,
                               "BOUT.restart." + str(i) + "." + outformat)
        outfile_list.append(DataFile(outpath, write=True, create=True))
    infile_list = []
    for i in range(old_npes):
        inpath = os.path.join(path, "BOUT.restart." + str(i) + "." + outformat)
        infile_list.append(DataFile(inpath))

    for v in var_list:
        ndims = f.ndims(v)

        # collect data
        if ndims == 0:
            # scalar
            data = f.read(v)
        elif ndims == 2:
            data = np.zeros((nx + 2 * mxg, ny + 2 * myg))
            for i in range(old_npes):
                ix = i % old_nxpe
                iy = int(i / old_nxpe)
                ixstart = mxg
                if ix == 0:
                    ixstart = 0
                ixend = -mxg
                if ix == old_nxpe - 1:
                    ixend = 0
                iystart = myg
                if iy == 0:
                    iystart = 0
                iyend = -myg
                if iy == old_nype - 1:
                    iyend = 0
                data[ix * old_mxsub + ixstart:(ix + 1) * old_mxsub + 2 * mxg +
                     ixend,
                     iy * old_mysub + iystart:(iy + 1) * old_mysub + 2 * myg +
                     iyend] = infile_list[i].read(v)[ixstart:old_mxsub +
                                                     2 * mxg + ixend,
                                                     iystart:old_mysub +
                                                     2 * myg + iyend]
            data = BoutArray(data, attributes=infile_list[0].attributes(v))
        elif ndims == 3:
            data = np.zeros((nx + 2 * mxg, ny + 2 * myg, mz))
            for i in range(old_npes):
                ix = i % old_nxpe
                iy = int(i / old_nxpe)
                ixstart = mxg
                if ix == 0:
                    ixstart = 0
                ixend = -mxg
                if ix == old_nxpe - 1:
                    ixend = 0
                iystart = myg
                if iy == 0:
                    iystart = 0
                iyend = -myg
                if iy == old_nype - 1:
                    iyend = 0
                data[ix * old_mxsub + ixstart:(ix + 1) * old_mxsub + 2 * mxg +
                     ixend,
                     iy * old_mysub + iystart:(iy + 1) * old_mysub + 2 * myg +
                     iyend, :] = infile_list[i].read(v)[ixstart:old_mxsub +
                                                        2 * mxg + ixend,
                                                        iystart:old_mysub +
                                                        2 * myg + iyend, :]
            data = BoutArray(data, attributes=infile_list[0].attributes(v))
        else:
            print(
                "ERROR: variable found with unexpected number of dimensions,",
                ndims, v)
            return False

        # write data
        for i in range(npes):
            ix = i % nxpe
            iy = int(i / nxpe)
            outfile = outfile_list[i]
            if v == "NPES":
                outfile.write(v, npes)
            elif v == "NXPE":
                outfile.write(v, nxpe)
            elif v == "NYPE":
                outfile.write(v, nype)
            elif ndims == 0:
                # scalar
                outfile.write(v, data)
            elif ndims == 2:
                # Field2D
                outfile.write(
                    v, data[ix * mxsub:(ix + 1) * mxsub + 2 * mxg,
                            iy * mysub:(iy + 1) * mysub + 2 * myg])
            elif ndims == 3:
                # Field3D
                outfile.write(
                    v, data[ix * mxsub:(ix + 1) * mxsub + 2 * mxg,
                            iy * mysub:(iy + 1) * mysub + 2 * myg, :])
            else:
                print(
                    "ERROR: variable found with unexpected number of dimensions,",
                    f.ndims(v))

    f.close()
    for infile in infile_list:
        infile.close()
    for outfile in outfile_list:
        outfile.close()

    return True
Ejemplo n.º 6
0
def resizeZ(newNz, path="data", output="./", informat="nc", outformat=None):
    """Increase the number of Z points in restart files

    NOTE:
        * Can't overwrite
        * Will not yield a result close to the original if there are
          asymmetries in the z-direction

    Parameters
    ----------
    newNz : int
        nz for the new file
    path : str, optional
        Path to original restart files (default: "data")
    output : str, optional
        Path to write new restart files (default: current directory)
    informat : str, optional
        File extension of original files (default: "nc")
    outformat : str, optional
        File extension of new files (default: use the same as `informat`)

    Returns
    -------
    True on success, else False

    TODO
    ----
    - Replace printing errors with raising `ValueError`
    - Make informat work like `redistribute`

    """

    if outformat is None:
        outformat = informat

    if path == output:
        print("ERROR: Can't overwrite restart files when expanding")
        return False

    def is_pow2(x):
        """Returns true if x is a power of 2"""
        return (x > 0) and ((x & (x - 1)) == 0)

    if not is_pow2(newNz):
        print("ERROR: New Z size must be a power of 2")
        return False

    file_list = glob.glob(os.path.join(path, "BOUT.restart.*." + informat))
    file_list.sort()
    nfiles = len(file_list)

    if nfiles == 0:
        print("ERROR: No data found")
        return False

    print("Number of files found: " + str(nfiles))

    for f in file_list:
        new_f = os.path.join(output, f.split('/')[-1])
        print("Changing {} => {}".format(f, new_f))

        # Open the restart file in read mode and create the new file
        with DataFile(f) as old,\
                DataFile(new_f, write=True, create=True) as new:
            # Loop over the variables in the old file
            for var in old.list():
                # Read the data
                data = old.read(var)
                attributes = old.attributes(var)

                # Find 3D variables
                if old.ndims(var) == 3:
                    print("    Resizing " + var)

                    nx, ny, nz = data.shape

                    newdata = np.zeros((nx, ny, newNz))
                    for x in range(nx):
                        for y in range(ny):
                            f_old = np.fft.fft(data[x, y, :])

                            # Number of points in f is power of 2
                            f_new = np.zeros(newNz)

                            # Copy coefficients across (ignoring Nyquist)
                            f_new[0] = f_old[0]  # DC
                            for m in range(1, int(nz / 2)):
                                # + ve frequencies
                                f_new[m] = f_old[m]
                                # - ve frequencies
                                f_new[newNz - m] = f_old[nz - m]

                            # Invert fft
                            newdata[x, y, :] = np.fft.ifft(f_new).real
                            newdata[x, y, :] = newdata[x, y, 0]

                    # Multiply with the ratio of newNz/nz
                    # This is not needed in the IDL routine as the
                    # forward transfrom has the scaling factor 1/N in
                    # the forward transform, whereas the scaling factor
                    # 1/N is the inverse transform in np.fft
                    # Note that ifft(fft(a)) = a for the same number of
                    # points in both IDL and np.ftt
                    newdata *= (newNz / nz)
                else:
                    print("    Copying " + var)
                    newdata = data.copy()

                newdata = BoutArray(newdata, attributes=attributes)

                new.write(var, newdata)

    return True
Ejemplo n.º 7
0
def resize(newNx,
           newNy,
           newNz,
           mxg=2,
           myg=2,
           path="data",
           output="./",
           informat="nc",
           outformat=None,
           method='linear',
           maxProc=None,
           mute=False):
    """Increase/decrease the number of points in restart files.

    NOTE: Can't overwrite
    WARNING: Currently only implemented with uniform BOUT++ grid

    Parameters
    ----------
    newNx, newNy, newNz : int
        nx, ny, nz for the new file (including ghost points)
    mxg, myg : int, optional
        Number of ghost points in x, y (default: 2)
    path : str, optional
        Input path to data files
    output : str, optional
        Path to write new files
    informat : str, optional
        File extension of input
    outformat : {None, str}, optional
        File extension of output (default: use the same as `informat`)
    method : {'linear', 'nearest'}, optional
        What interpolation method to be used
    maxProc : {None, int}, optional
        Limits maximum processors to use when interpolating if set
    mute : bool, optional
        Whether or not output should be printed from this function

    Returns
    -------
    return : bool
        True on success, else False

    TODO
    ----
    - Add 2D field interpolation
    - Replace printing errors with raising `ValueError`
    - Make informat work like `redistribute`

    """

    if method is None:
        # Make sure the method is set
        method = 'linear'

    if outformat is None:
        outformat = informat

    if path == output:
        print("ERROR: Can't overwrite restart files when expanding")
        return False

    def is_pow2(x):
        """Returns true if x is a power of 2"""
        return (x > 0) and ((x & (x - 1)) == 0)

    if not is_pow2(newNz):
        print("ERROR: New Z size {} must be a power of 2".format(newNz))
        return False

    file_list = glob.glob(os.path.join(path, "BOUT.restart.*." + informat))
    file_list.sort()
    nfiles = len(file_list)

    if nfiles == 0:
        print("ERROR: No data found in {}".format(path))
        return False

    if not (mute):
        print("Number of files found: " + str(nfiles))

    for f in file_list:
        new_f = os.path.join(output, f.split('/')[-1])
        if not (mute):
            print("Changing {} => {}".format(f, new_f))

        # Open the restart file in read mode and create the new file
        with DataFile(f) as old, DataFile(new_f, write=True,
                                          create=True) as new:

            # Find the dimension
            for var in old.list():
                # Read the data
                data = old.read(var)
                # Find 3D variables
                if old.ndims(var) == 3:
                    break

            nx, ny, nz = data.shape
            # Make coordinates
            # NOTE: The max min of the coordinates are irrelevant when
            #       interpolating (as long as old and new coordinates
            #       are consistent), so we just choose all variable to
            #       be between 0 and 1 Calculate the old coordinates
            xCoordOld = np.linspace(0, 1, nx)
            yCoordOld = np.linspace(0, 1, ny)
            zCoordOld = np.linspace(0, 1, nz)

            # Calculate the new coordinates
            xCoordNew = np.linspace(xCoordOld[0], xCoordOld[-1], newNx)
            yCoordNew = np.linspace(yCoordOld[0], yCoordOld[-1], newNy)
            zCoordNew = np.linspace(zCoordOld[0], zCoordOld[-1], newNz)

            # Make a pool of workers
            pool = multiprocessing.Pool(maxProc)
            # List of jobs and results
            jobs = []
            # Pack input to resize3DField together
            coordsAndSizesTuple = (xCoordOld, yCoordOld, zCoordOld, xCoordNew,
                                   yCoordNew, zCoordNew, newNx, newNy, newNz)

            # Loop over the variables in the old file
            for var in old.list():
                # Read the data
                data = old.read(var)
                attributes = old.attributes(var)

                # Find 3D variables
                if old.ndims(var) == 3:

                    # Asynchronous call (locks first at .get())
                    jobs.append(
                        pool.apply_async(resize3DField,
                                         args=(
                                             var,
                                             data,
                                             coordsAndSizesTuple,
                                             method,
                                             mute,
                                         )))

                else:
                    if not (mute):
                        print("    Copying " + var)
                        newData = data.copy()
                    if not (mute):
                        print("Writing " + var)
                    new.write(var, newData)

            for job in jobs:
                var, newData = job.get()
                newData = BoutArray(newData, attributes=attributes)
                if not (mute):
                    print("Writing " + var)
                new.write(var, newData)

            # Close the pool of workers
            pool.close()
            # Wait for all processes to finish
            pool.join()

    return True
Ejemplo n.º 8
0
def collect(varname, xind=None, yind=None, zind=None, tind=None, path=".",
            yguards=False, xguards=True, info=True, prefix="BOUT.dmp",
            strict=False, tind_auto=False, datafile_cache=None):
    """Collect a variable from a set of BOUT++ outputs.

    Parameters
    ----------
    varname : str
        Name of the variable
    xind, yind, zind, tind : int, slice or list of int, optional
        Range of X, Y, Z or time indices to collect. Either a single
        index to collect, a list containing [start, end] (inclusive
        end), or a slice object (usual python indexing). Default is to
        fetch all indices
    path : str, optional
        Path to data files (default: ".")
    prefix : str, optional
        File prefix (default: "BOUT.dmp")
    yguards : bool, optional
        Collect Y boundary guard cells? (default: False)
    xguards : bool, optional
        Collect X boundary guard cells? (default: True)
        (Set to True to be consistent with the definition of nx)
    info : bool, optional
        Print information about collect? (default: True)
    strict : bool, optional
        Fail if the exact variable name is not found? (default: False)
    tind_auto : bool, optional
        Read all files, to get the shortest length of time_indices.
        Useful if writing got interrupted (default: False)
    datafile_cache : datafile_cache_tuple, optional
        Optional cache of open DataFile instances: namedtuple as returned
        by create_cache. Used by BoutOutputs to pass in a cache so that we
        do not have to re-open the dump files to read another variable
        (default: None)

    Examples
    --------

    >>> collect(name)
    BoutArray([[[[...]]]])

    """

    if datafile_cache is None:
        # Search for BOUT++ dump files
        file_list, parallel, _ = findFiles(path, prefix)
    else:
        parallel = datafile_cache.parallel
        file_list = datafile_cache.file_list

    def getDataFile(i):
        """Get the DataFile from the cache, if present, otherwise open the
        DataFile

        """
        if datafile_cache is not None:
            return datafile_cache.datafile_list[i]
        else:
            return DataFile(file_list[i])

    if parallel:
        if info:
            print("Single (parallel) data file")

        f = getDataFile(0)

        dimensions = f.dimensions(varname)

        try:
            mxg = f["MXG"]
        except KeyError:
            mxg = 0
            print("MXG not found, setting to {}".format(mxg))
        try:
            myg = f["MYG"]
        except KeyError:
            myg = 0
            print("MYG not found, setting to {}".format(myg))

        if xguards:
            nx = f["nx"]
        else:
            nx = f["nx"] - 2*mxg
        if yguards:
            ny = f["ny"] + 2*myg
        else:
            ny = f["ny"]
        nz = f["MZ"]
        t_array = f.read("t_array")
        if t_array is None:
            nt = 1
            t_array = np.zeros(1)
        else:
            try:
                nt = len(t_array)
            except TypeError:
                # t_array is not an array here, which probably means it was a
                # one-element array and has been read as a scalar.
                nt = 1

        xind = _convert_to_nice_slice(xind, nx, "xind")
        yind = _convert_to_nice_slice(yind, ny, "yind")
        zind = _convert_to_nice_slice(zind, nz, "zind")
        tind = _convert_to_nice_slice(tind, nt, "tind")

        if not xguards:
            xind = slice(xind.start+mxg, xind.stop+mxg, xind.step)
        if not yguards:
            yind = slice(yind.start+myg, yind.stop+myg, yind.step)

        if len(dimensions) == ():
            ranges = []
        elif dimensions == ('t'):
            ranges = [tind]
        elif dimensions == ('x', 'y'):
            # Field2D
            ranges = [xind, yind]
        elif dimensions == ('x', 'z'):
            # FieldPerp
            ranges = [xind, zind]
        elif dimensions == ('t', 'x', 'y'):
            # evolving Field2D
            ranges = [tind, xind, yind]
        elif dimensions == ('t', 'x', 'z'):
            # evolving FieldPerp
            ranges = [tind, xind, zind]
        elif dimensions == ('x', 'y', 'z'):
            # Field3D
            ranges = [xind, yind, zind]
        elif dimensions == ('t', 'x', 'y', 'z'):
            # evolving Field3D
            ranges = [tind, xind, yind, zind]
        else:
            raise ValueError("Variable has incorrect dimensions ({})"
                             .format(dimensions))

        data = f.read(varname, ranges)
        var_attributes = f.attributes(varname)
        return BoutArray(data, attributes=var_attributes)

    nfiles = len(file_list)

    # Read data from the first file
    f = getDataFile(0)

    dimensions = f.dimensions(varname)

    if varname not in f.keys():
        if strict:
            raise ValueError("Variable '{}' not found".format(varname))
        else:
            varname = findVar(varname, f.list())

    var_attributes = f.attributes(varname)
    ndims = len(dimensions)

    # ndims is 0 for reals, and 1 for f.ex. t_array
    if ndims == 0:
        # Just read from file
        data = f.read(varname)
        if datafile_cache is None:
            # close the DataFile if we are not keeping it in a cache
            f.close()
        return BoutArray(data, attributes=var_attributes)

    if ndims > 4:
        raise ValueError("ERROR: Too many dimensions")

    def load_and_check(varname):
        var = f.read(varname)
        if var is None:
            raise ValueError("Missing " + varname + " variable")
        return var

    mxsub = load_and_check("MXSUB")
    mysub = load_and_check("MYSUB")
    mz = load_and_check("MZ")
    mxg = load_and_check("MXG")
    myg = load_and_check("MYG")
    t_array = f.read("t_array")
    if t_array is None:
        nt = 1
        t_array = np.zeros(1)
    else:
        try:
            nt = len(t_array)
        except TypeError:
            # t_array is not an array here, which probably means it was a
            # one-element array and has been read as a scalar.
            nt = 1
        if tind_auto:
            for i in range(nfiles):
                t_array_ = getDataFile(i).read("t_array")
                nt = min(len(t_array_), nt)

    if info:
        print("mxsub = %d mysub = %d mz = %d\n" % (mxsub, mysub, mz))

    # Get the version of BOUT++ (should be > 0.6 for NetCDF anyway)
    try:
        version = f["BOUT_VERSION"]
    except KeyError:
        print("BOUT++ version : Pre-0.2")
        version = 0
    if version < 3.5:
        # Remove extra point
        nz = mz-1
    else:
        nz = mz

    # Fallback to sensible (?) defaults
    try:
        nxpe = f["NXPE"]
    except KeyError:
        nxpe = 1
        print("NXPE not found, setting to {}".format(nxpe))
    try:
        nype = f["NYPE"]
    except KeyError:
        nype = nfiles
        print("NYPE not found, setting to {}".format(nype))

    npe = nxpe * nype
    if info:
        print("nxpe = %d, nype = %d, npe = %d\n" % (nxpe, nype, npe))
        if npe < nfiles:
            print("WARNING: More files than expected (" + str(npe) + ")")
        elif npe > nfiles:
            print("WARNING: Some files missing. Expected " + str(npe))

    if xguards:
        nx = nxpe * mxsub + 2*mxg
    else:
        nx = nxpe * mxsub

    if yguards:
        ny = mysub * nype + 2*myg
    else:
        ny = mysub * nype

    xind = _convert_to_nice_slice(xind, nx, "xind")
    yind = _convert_to_nice_slice(yind, ny, "yind")
    zind = _convert_to_nice_slice(zind, nz, "zind")
    tind = _convert_to_nice_slice(tind, nt, "tind")

    xsize = xind.stop - xind.start
    ysize = yind.stop - yind.start
    zsize = int(np.ceil(float(zind.stop - zind.start)/zind.step))
    tsize = int(np.ceil(float(tind.stop - tind.start)/tind.step))

    if ndims == 1:
        if tind is None:
            data = f.read(varname)
        else:
            data = f.read(varname, ranges=[tind])
        if datafile_cache is None:
            # close the DataFile if we are not keeping it in a cache
            f.close()
        return BoutArray(data, attributes=var_attributes)

    if datafile_cache is None:
        # close the DataFile if we are not keeping it in a cache
        f.close()

    # Map between dimension names and output size
    sizes = {'x': xsize, 'y': ysize, 'z': zsize, 't': tsize}

    # Create a list with size of each dimension
    ddims = [sizes[d] for d in dimensions]

    # Create the data array
    data = np.zeros(ddims)

    if dimensions == ('t', 'x', 'z') or dimensions == ('x', 'z'):
        yindex_global = None
        # The pe_yind that this FieldPerp is going to be read from
        fieldperp_yproc = None

    for i in range(npe):
        # Get X and Y processor indices
        pe_yind = int(i/nxpe)
        pe_xind = i % nxpe

        inrange = True

        if yguards:
            # Get local ranges
            ystart = yind.start - pe_yind*mysub
            ystop = yind.stop - pe_yind*mysub

            # Check lower y boundary
            if pe_yind == 0:
                # Keeping inner boundary
                if ystop <= 0:
                    inrange = False
                if ystart < 0:
                    ystart = 0
            else:
                if ystop < myg-1:
                    inrange = False
                if ystart < myg:
                    ystart = myg

            # Upper y boundary
            if pe_yind == (nype - 1):
                # Keeping outer boundary
                if ystart >= (mysub + 2*myg):
                    inrange = False
                if ystop > (mysub + 2*myg):
                    ystop = (mysub + 2*myg)
            else:
                if ystart >= (mysub + myg):
                    inrange = False
                if ystop > (mysub + myg):
                    ystop = (mysub + myg)

            # Calculate global indices
            ygstart = ystart + pe_yind * mysub
            ygstop = ystop + pe_yind * mysub

        else:
            # Get local ranges
            ystart = yind.start - pe_yind*mysub + myg
            ystop = yind.stop - pe_yind*mysub + myg

            if (ystart >= (mysub + myg)) or (ystop <= myg):
                inrange = False  # Y out of range

            if ystart < myg:
                ystart = myg
            if ystop > mysub + myg:
                ystop = myg + mysub

            # Calculate global indices
            ygstart = ystart + pe_yind * mysub - myg
            ygstop = ystop + pe_yind * mysub - myg

        if xguards:
            # Get local ranges
            xstart = xind.start - pe_xind*mxsub
            xstop = xind.stop - pe_xind*mxsub

            # Check lower x boundary
            if pe_xind == 0:
                # Keeping inner boundary
                if xstop <= 0:
                    inrange = False
                if xstart < 0:
                    xstart = 0
            else:
                if xstop <= mxg:
                    inrange = False
                if xstart < mxg:
                    xstart = mxg

            # Upper x boundary
            if pe_xind == (nxpe - 1):
                # Keeping outer boundary
                if xstart >= (mxsub + 2*mxg):
                    inrange = False
                if xstop > (mxsub + 2*mxg):
                    xstop = (mxsub + 2*mxg)
            else:
                if xstart >= (mxsub + mxg):
                    inrange = False
                if xstop > (mxsub + mxg):
                    xstop = (mxsub+mxg)

            # Calculate global indices
            xgstart = xstart + pe_xind * mxsub
            xgstop = xstop + pe_xind * mxsub

        else:
            # Get local ranges
            xstart = xind.start - pe_xind*mxsub + mxg
            xstop = xind.stop - pe_xind*mxsub + mxg

            if (xstart >= (mxsub + mxg)) or (xstop <= mxg):
                inrange = False  # X out of range

            if xstart < mxg:
                xstart = mxg
            if xstop > mxsub + mxg:
                xstop = mxg + mxsub

            # Calculate global indices
            xgstart = xstart + pe_xind * mxsub - mxg
            xgstop = xstop + pe_xind * mxsub - mxg

        # Number of local values
        nx_loc = xstop - xstart
        ny_loc = ystop - ystart

        if not inrange:
            continue  # Don't need this file

        if info:
            sys.stdout.write("\rReading from " + file_list[i] + ": [" +
                             str(xstart) + "-" + str(xstop-1) + "][" +
                             str(ystart) + "-" + str(ystop-1) + "] -> [" +
                             str(xgstart) + "-" + str(xgstop-1) + "][" +
                             str(ygstart) + "-" + str(ygstop-1) + "]")

        f = getDataFile(i)

        if dimensions == ('t', 'x', 'y', 'z'):
            d = f.read(varname, ranges=[tind,
                                        slice(xstart, xstop),
                                        slice(ystart, ystop),
                                        zind])
            data[:, (xgstart-xind.start):(xgstart-xind.start+nx_loc),
                 (ygstart-yind.start):(ygstart-yind.start+ny_loc), :] = d
        elif dimensions == ('x', 'y', 'z'):
            d = f.read(varname, ranges=[slice(xstart, xstop),
                                        slice(ystart, ystop),
                                        zind])
            data[(xgstart-xind.start):(xgstart-xind.start+nx_loc),
                 (ygstart-yind.start):(ygstart-yind.start+ny_loc), :] = d
        elif dimensions == ('t', 'x', 'y'):
            d = f.read(varname, ranges=[tind,
                                        slice(xstart, xstop),
                                        slice(ystart, ystop)])
            data[:, (xgstart-xind.start):(xgstart-xind.start+nx_loc),
                 (ygstart-yind.start):(ygstart-yind.start+ny_loc)] = d
        elif dimensions == ('t', 'x', 'z'):
            # FieldPerp should only be defined on processors which contain its yindex_global
            f_attributes = f.attributes(varname)
            temp_yindex = f_attributes["yindex_global"]

            if temp_yindex >= 0:
                if yindex_global is None:
                    yindex_global = temp_yindex

                    # we have found a file with containing the FieldPerp, get the attributes from here
                    var_attributes = f_attributes
                assert temp_yindex == yindex_global

            if temp_yindex >= 0:
                # Check we only read from one pe_yind
                assert fieldperp_yproc is None or fieldperp_yproc == pe_yind

                fieldperp_yproc = pe_yind

                d = f.read(varname, ranges=[tind,
                                            slice(xstart, xstop),
                                            zind])
                data[:, (xgstart-xind.start):(xgstart-xind.start+nx_loc), :] = d
        elif dimensions == ('x', 'y'):
            d = f.read(varname, ranges=[slice(xstart, xstop),
                                        slice(ystart, ystop)])
            data[(xgstart-xind.start):(xgstart-xind.start+nx_loc),
                 (ygstart-yind.start):(ygstart-yind.start+ny_loc)] = d
        elif dimensions == ('x', 'z'):
            # FieldPerp should only be defined on processors which contain its yindex_global
            f_attributes = f.attributes(varname)
            temp_yindex = f_attributes["yindex_global"]

            if temp_yindex >= 0:
                if yindex_global is None:
                    yindex_global = temp_yindex

                    # we have found a file with containing the FieldPerp, get the attributes from here
                    var_attributes = f_attributes
                assert temp_yindex == yindex_global

            if temp_yindex >= 0:
                # Check we only read from one pe_yind
                assert fieldperp_yproc is None or fieldperp_yproc == pe_yind

                fieldperp_yproc = pe_yind

                d = f.read(varname, ranges=[slice(xstart, xstop), zind])
                data[(xgstart-xind.start):(xgstart-xind.start+nx_loc), :] = d
        else:
            raise ValueError('Incorrect dimensions '+str(dimensions)+' in collect')

        if datafile_cache is None:
            # close the DataFile if we are not keeping it in a cache
            f.close()

    # if a step was requested in x or y, need to apply it here
    if xind.step is not None or yind.step is not None:
        if dimensions == ('t', 'x', 'y', 'z'):
            data = data[:, ::xind.step, ::yind.step]
        elif dimensions == ('x', 'y', 'z'):
            data = data[::xind.step, ::yind.step, :]
        elif dimensions == ('t', 'x', 'y'):
            data = data[:, ::xind.step, ::yind.step]
        elif dimensions == ('t', 'x', 'z'):
            data = data[:, ::xind.step, :]
        elif dimensions == ('x', 'y'):
            data = data[::xind.step, ::yind.step]
        elif dimensions == ('x', 'z'):
            data = data[::xind.step, :]
        else:
            raise ValueError('Incorrect dimensions '+str(dimensions)+' applying steps in collect')

    # Force the precision of arrays of dimension>1
    if ndims > 1:
        try:
            data = data.astype(t_array.dtype, copy=False)
        except TypeError:
            data = data.astype(t_array.dtype)

    # Finished looping over all files
    if info:
        sys.stdout.write("\n")
    return BoutArray(data, attributes=var_attributes)
Ejemplo n.º 9
0
def _collect_from_single_file(
    f,
    varname,
    xind,
    yind,
    zind,
    tind,
    path,
    yguards,
    xguards,
    info,
    prefix,
    strict,
    datafile_cache,
):
    """
    Collect data from a single file

    Single file may be created by parallel writing saving all BOUT++ output to a single
    file, or by squashoutput() 'squashing' data from one file per processor into a
    single file.

    Parameters
    ----------
    f : DataFile
        Single file to read data from
    For description of remaining arguments, see docstring of collect().
    """
    if info:
        print("Single (parallel) data file")

    if varname not in f.keys():
        if strict:
            raise ValueError("Variable '{}' not found".format(varname))
        else:
            varname = findVar(varname, f.list())

    dimensions = f.dimensions(varname)

    try:
        mxg = f["MXG"]
    except KeyError:
        mxg = 0
        print("MXG not found, setting to {}".format(mxg))
    try:
        myg = f["MYG"]
    except KeyError:
        myg = 0
        print("MYG not found, setting to {}".format(myg))

    if xguards:
        nx = f["nx"]
    else:
        nx = f["nx"] - 2 * mxg
    if yguards:
        ny = f["ny"] + 2 * myg
        if yguards == "include_upper" and f["jyseps2_1"] != f["jyseps1_2"]:
            # Simulation has a second (upper) target, with a second set of y-boundary
            # points
            ny = ny + 2 * myg
    else:
        ny = f["ny"]
    nz = f["MZ"]
    t_array = f.read("t_array")
    if t_array is None:
        nt = 1
        t_array = np.zeros(1)
    else:
        try:
            nt = len(t_array)
        except TypeError:
            # t_array is not an array here, which probably means it was a
            # one-element array and has been read as a scalar.
            nt = 1

    xind = _convert_to_nice_slice(xind, nx, "xind")
    yind = _convert_to_nice_slice(yind, ny, "yind")
    zind = _convert_to_nice_slice(zind, nz, "zind")
    tind = _convert_to_nice_slice(tind, nt, "tind")

    if not xguards:
        xind = slice(xind.start + mxg, xind.stop + mxg, xind.step)
    if not yguards:
        yind = slice(yind.start + myg, yind.stop + myg, yind.step)

    dim_ranges = {"t": tind, "x": xind, "y": yind, "z": zind}
    ranges = [dim_ranges.get(dim, None) for dim in dimensions]

    data = f.read(varname, ranges)
    var_attributes = f.attributes(varname)
    return BoutArray(data, attributes=var_attributes)
Ejemplo n.º 10
0
def collect(
    varname,
    xind=None,
    yind=None,
    zind=None,
    tind=None,
    path=".",
    yguards=False,
    xguards=True,
    info=True,
    prefix="BOUT.dmp",
    strict=False,
    tind_auto=False,
    datafile_cache=None,
):
    """Collect a variable from a set of BOUT++ outputs.

    Parameters
    ----------
    varname : str
        Name of the variable
    xind, yind, zind, tind : int, slice or list of int, optional
        Range of X, Y, Z or time indices to collect. Either a single
        index to collect, a list containing [start, end] (inclusive
        end), or a slice object (usual python indexing). Default is to
        fetch all indices
    path : str, optional
        Path to data files (default: ".")
    prefix : str, optional
        File prefix (default: "BOUT.dmp")
    yguards : bool or "include_upper", optional
        Collect Y boundary guard cells? (default: False)
        If yguards=="include_upper" the y-boundary cells from the upper (second) target
        are also included.
    xguards : bool, optional
        Collect X boundary guard cells? (default: True)
        (Set to True to be consistent with the definition of nx)
    info : bool, optional
        Print information about collect? (default: True)
    strict : bool, optional
        Fail if the exact variable name is not found? (default: False)
    tind_auto : bool, optional
        Read all files, to get the shortest length of time_indices.
        Useful if writing got interrupted (default: False)
    datafile_cache : datafile_cache_tuple, optional
        Optional cache of open DataFile instances: namedtuple as returned
        by create_cache. Used by BoutOutputs to pass in a cache so that we
        do not have to re-open the dump files to read another variable
        (default: None)

    Examples
    --------

    >>> collect(name)
    BoutArray([[[[...]]]])

    """

    if datafile_cache is None:
        # Search for BOUT++ dump files
        file_list, parallel, _ = findFiles(path, prefix)
    else:
        parallel = datafile_cache.parallel
        file_list = datafile_cache.file_list

    def getDataFile(i):
        """Get the DataFile from the cache, if present, otherwise open the
        DataFile

        """
        if datafile_cache is not None:
            return datafile_cache.datafile_list[i]
        else:
            return DataFile(file_list[i])

    if parallel:
        return _collect_from_single_file(
            getDataFile(0),
            varname,
            xind,
            yind,
            zind,
            tind,
            path,
            yguards,
            xguards,
            info,
            prefix,
            strict,
            datafile_cache,
        )

    nfiles = len(file_list)

    # Read data from the first file
    f = getDataFile(0)
    grid_info, tind, xind, yind, zind = _get_grid_info(
        f,
        xguards=xguards,
        yguards=yguards,
        tind=tind,
        xind=xind,
        yind=yind,
        zind=zind,
        nfiles=len(file_list),
    )

    if varname not in grid_info["varNames"]:
        if strict:
            raise ValueError("Variable '{}' not found".format(varname))
        else:
            varname = findVar(varname, f.list())

    dimensions = f.dimensions(varname)

    var_attributes = f.attributes(varname)
    ndims = len(dimensions)

    # ndims is 0 for reals, and 1 for f.ex. t_array
    if ndims == 0:
        # Just read from file
        data = f.read(varname)
        if datafile_cache is None:
            # close the DataFile if we are not keeping it in a cache
            f.close()
        return BoutArray(data, attributes=var_attributes)

    if ndims > 4:
        raise ValueError("ERROR: Too many dimensions")

    if tind_auto:
        nt = grid_info["nt"]
        for i in range(1, nfiles):
            f = getDataFile(i)
            t_array_ = f.read("t_array")
            nt = min(len(t_array_), nt)
            if datafile_cache is None:
                # close the DataFile if we are not keeping it in a cache
                f.close()
        grid_info["nt"] = nt

    if info:
        print("mxsub = {} mysub = {} mz = {}\n".format(grid_info["mxsub"],
                                                       grid_info["mysub"],
                                                       grid_info["nz"]))

        print("nxpe = {}, nype = {}, npes = {}\n".format(
            grid_info["nxpe"], grid_info["nype"], grid_info["npes"]))
        if grid_info["npes"] < nfiles:
            print("WARNING: More files than expected ({})".format(
                grid_info["npes"]))
        elif grid_info["npes"] > nfiles:
            print("WARNING: Some files missing. Expected {}".format(
                grid_info["npes"]))

    if not any(dim in dimensions for dim in ("x", "y", "z")):
        # Not a Field (i.e. no spatial dependence) so only read from the 0'th file
        result = _read_scalar(f, varname, dimensions, var_attributes, tind)
        if datafile_cache is None:
            # close the DataFile if we are not keeping it in a cache
            f.close()
        return result

    if datafile_cache is None:
        # close the DataFile if we are not keeping it in a cache
        f.close()

    # Create a list with size of each dimension
    ddims = [grid_info["sizes"][d] for d in dimensions]

    # Create the data array
    data = np.zeros(ddims)

    if dimensions == ("t", "x", "z") or dimensions == ("x", "z"):
        is_fieldperp = True
        yindex_global = None
        # The pe_yind that this FieldPerp is going to be read from
        fieldperp_yproc = None
    else:
        is_fieldperp = False

    for i in range(grid_info["npes"]):
        f = getDataFile(i)
        temp_yindex, temp_f_attributes = _collect_from_one_proc(
            i,
            f,
            varname,
            result=data,
            is_fieldperp=is_fieldperp,
            dimensions=dimensions,
            grid_info=grid_info,
            tind=tind,
            xind=xind,
            yind=yind,
            zind=zind,
            xguards=xguards,
            yguards=(yguards is not False),
            info=info,
        )
        if is_fieldperp:
            (
                yindex_global,
                fieldperp_yproc,
                var_attributes,
            ) = _check_fieldperp_attributes(
                varname,
                yindex_global,
                temp_yindex,
                i // grid_info["nxpe"],
                fieldperp_yproc,
                var_attributes,
                temp_f_attributes,
            )
        if datafile_cache is None:
            # close the DataFile if we are not keeping it in a cache
            f.close()

    # if a step was requested in x or y, need to apply it here
    data = _apply_step(data, dimensions, xind.step, yind.step)

    # Finished looping over all files
    if info:
        sys.stdout.write("\n")
    return BoutArray(data, attributes=var_attributes)
Ejemplo n.º 11
0
def redistribute(npes,
                 path="data",
                 nxpe=None,
                 output=".",
                 informat=None,
                 outformat=None,
                 mxg=2,
                 myg=2):
    """Resize restart files across NPES processors.

    Does not check if new processor arrangement is compatible with the
    branch cuts. In this respect :py:func:`restart.split` is
    safer. However, BOUT++ checks the topology during initialisation
    anyway so this is not too serious.

    Parameters
    ----------
    npes : int
        Number of processors for the new restart files
    path : str, optional
        Path to original restart files (default: "data")
    nxpe : int, optional
        Number of processors to use in the x-direction (determines
        split: npes = nxpe * nype). Default is None which uses the
        same algorithm as BoutMesh (but without topology information)
        to determine a suitable value for nxpe.
    output : str, optional
        Location to save new restart files (default: current directory)
    informat : str, optional
        Specify file format of old restart files (must be a suffix
        understood by DataFile, e.g. 'nc'). Default uses the format of
        the first 'BOUT.restart.*' file listed by glob.glob.
    outformat : str, optional
        Specify file format of new restart files (must be a suffix
        understood by DataFile, e.g. 'nc'). Default is to use the same
        as informat.

    Returns
    -------
    True on success

    TODO
    ----
    - Replace printing errors with raising `ValueError`

    """

    if npes <= 0:
        print("ERROR: Negative or zero number of processors")
        return False

    if path == output:
        print("ERROR: Can't overwrite restart files")
        return False

    if informat is None:
        file_list = glob.glob(os.path.join(path, "BOUT.restart.*"))
    else:
        file_list = glob.glob(os.path.join(path, "BOUT.restart.*." + informat))

    nfiles = len(file_list)

    # Read old processor layout
    f = DataFile(file_list[0])

    # Get list of variables
    var_list = f.list()
    if len(var_list) == 0:
        print("ERROR: No data found")
        return False

    old_processor_layout = get_processor_layout(f, has_t_dimension=False)
    print("Grid sizes: ", old_processor_layout.nx, old_processor_layout.ny,
          old_processor_layout.mz)

    if nfiles != old_processor_layout.npes:
        print("WARNING: Number of restart files inconsistent with NPES")
        print("Setting nfiles = " + str(old_processor_layout.npes))
        nfiles = old_processor_layout.npes

    if nfiles == 0:
        print("ERROR: No restart files found")
        return False

    informat = file_list[0].split(".")[-1]
    if outformat is None:
        outformat = informat

    try:
        new_processor_layout = create_processor_layout(old_processor_layout,
                                                       npes,
                                                       nxpe=nxpe)
    except ValueError as e:
        print("Could not find valid processor split. " + e.what())

    nx = old_processor_layout.nx
    ny = old_processor_layout.ny
    mz = old_processor_layout.mz
    mxg = old_processor_layout.mxg
    myg = old_processor_layout.myg
    old_npes = old_processor_layout.npes
    old_nxpe = old_processor_layout.nxpe
    old_nype = old_processor_layout.nype
    old_mxsub = old_processor_layout.mxsub
    old_mysub = old_processor_layout.mysub

    nxpe = new_processor_layout.nxpe
    nype = new_processor_layout.nype
    mxsub = new_processor_layout.mxsub
    mysub = new_processor_layout.mysub
    mzsub = new_processor_layout.mz

    outfile_list = []
    for i in range(npes):
        outpath = os.path.join(output,
                               "BOUT.restart." + str(i) + "." + outformat)
        outfile_list.append(DataFile(outpath, write=True, create=True))

    DataFileCache = create_cache(path, "BOUT.restart")

    for v in var_list:
        dimensions = f.dimensions(v)
        ndims = len(dimensions)

        # collect data
        data = collect(v,
                       xguards=True,
                       yguards=True,
                       info=False,
                       datafile_cache=DataFileCache)

        # write data
        for i in range(npes):
            ix = i % nxpe
            iy = int(i / nxpe)
            outfile = outfile_list[i]
            if v == "NPES":
                outfile.write(v, npes)
            elif v == "NXPE":
                outfile.write(v, nxpe)
            elif v == "NYPE":
                outfile.write(v, nype)
            elif v == "MXSUB":
                outfile.write(v, mxsub)
            elif v == "MYSUB":
                outfile.write(v, mysub)
            elif v == "MZSUB":
                outfile.write(v, mzsub)
            elif dimensions == ():
                # scalar
                outfile.write(v, data)
            elif dimensions == ('x', 'y'):
                # Field2D
                outfile.write(
                    v, data[ix * mxsub:(ix + 1) * mxsub + 2 * mxg,
                            iy * mysub:(iy + 1) * mysub + 2 * myg])
            elif dimensions == ('x', 'z'):
                # FieldPerp
                yindex_global = data.attributes['yindex_global']
                if yindex_global + myg >= iy * mysub and yindex_global + myg < (
                        iy + 1) * mysub + 2 * myg:
                    outfile.write(
                        v, data[ix * mxsub:(ix + 1) * mxsub + 2 * mxg, :])
                else:
                    nullarray = BoutArray(np.zeros(
                        [mxsub + 2 * mxg, mysub + 2 * myg]),
                                          attributes={
                                              "bout_type": "FieldPerp",
                                              "yindex_global": -myg - 1
                                          })
                    outfile.write(v, nullarray)
            elif dimensions == ('x', 'y', 'z'):
                # Field3D
                outfile.write(
                    v, data[ix * mxsub:(ix + 1) * mxsub + 2 * mxg,
                            iy * mysub:(iy + 1) * mysub + 2 * myg, :])
            else:
                print("ERROR: variable found with unexpected dimensions,",
                      dimensions, v)

    f.close()
    for outfile in outfile_list:
        outfile.close()

    return True
Ejemplo n.º 12
0
def squashoutput(
    datadir=".",
    outputname="BOUT.dmp.nc",
    format="NETCDF4",
    tind=None,
    xind=None,
    yind=None,
    zind=None,
    xguards=True,
    yguards="include_upper",
    singleprecision=False,
    compress=False,
    least_significant_digit=None,
    quiet=False,
    complevel=None,
    append=False,
    delete=False,
    tind_auto=False,
    parallel=False,
    time_split_size=None,
    time_split_first_label=0,
):
    """
    Collect all data from BOUT.dmp.* files and create a single output file.

    Parameters
    ----------
    datadir : str
        Directory where dump files are and where output file will be created.
        default "."
    outputname : str
        Name of the output file. File suffix specifies whether to use NetCDF or
        HDF5 (see boututils.datafile.DataFile for suffixes).
        default "BOUT.dmp.nc"
    format : str
        format argument passed to DataFile
        default "NETCDF4"
    tind : slice, int, or [int, int, int]
        tind argument passed to collect
        default None
    xind : slice, int, or [int, int, int]
        xind argument passed to collect
        default None
    yind : slice, int, or [int, int, int]
        yind argument passed to collect
        default None
    zind : slice, int, or [int, int, int]
        zind argument passed to collect
        default None
    xguards : bool
        xguards argument passed to collect
        default True
    yguards : bool or "include_upper"
        yguards argument passed to collect (note different default to collect's)
        default "include_upper"
    singleprecision : bool
        If true convert data to single-precision floats
        default False
    compress : bool
        If true enable compression in the output file
    least_significant_digit : int or None
        How many digits should be retained? Enables lossy
        compression. Default is lossless compression. Needs
        compression to be enabled.
    complevel : int or None
        Compression level, 1 should be fastest, and 9 should yield
        highest compression.
    quiet : bool
        Be less verbose. default False
    append : bool
        Append to existing squashed file
    delete : bool
        Delete the original files after squashing.
    tind_auto : bool, optional
        Read all files, to get the shortest length of time_indices. All data truncated
        to the shortest length.  Useful if writing got interrupted (default: False)
    parallel : bool or int, default False
        If set to True or 0, use the multiprocessing library to read data in parallel
        with the maximum number of available processors. If set to an int, use that many
        processes.
    time_split_size : int, optional
        By default no splitting is done. If an integer value is passed, the output is
        split into files with length in the t-dimension equal to that value. The outputs
        are labelled by prefacing a counter (starting by default at 0, but see
        time_split_first_label) to the file name before the .nc suffix.
    time_split_first_label : int, default 0
        Value at which to start the counter labelling output files when time_split_size
        is used.
    """
    # use local imports to allow fast import for tab-completion
    from boutdata.data import BoutOutputs
    from boututils.datafile import DataFile
    from boututils.boutarray import BoutArray
    import numpy
    import os
    import gc
    import tempfile
    import shutil
    import glob

    try:
        # If we are using the netCDF4 module (the usual case) set caching to zero, since
        # each variable is read and written exactly once so caching does not help, only
        # uses memory - for large data sets, the memory usage may become excessive.
        from netCDF4 import get_chunk_cache, set_chunk_cache
    except ImportError:
        netcdf4_chunk_cache = None
    else:
        netcdf4_chunk_cache = get_chunk_cache()
        set_chunk_cache(0)

    fullpath = os.path.join(datadir, outputname)

    if append:
        if time_split_size is not None:
            raise ValueError(
                "'time_split_size' is not compatible with append=True")
        datadirnew = tempfile.mkdtemp(dir=datadir)
        for f in glob.glob(os.path.join(datadir, "BOUT.dmp.*.??")):
            if not quiet:
                print("moving", f, flush=True)
            shutil.move(f, datadirnew)
        oldfile = datadirnew + "/" + outputname
        datadir = datadirnew

    # useful object from BOUT pylib to access output data
    outputs = BoutOutputs(
        datadir,
        info=False,
        xguards=xguards,
        yguards=yguards,
        tind=tind,
        xind=xind,
        yind=yind,
        zind=zind,
        tind_auto=tind_auto,
        parallel=parallel,
    )

    # Create file(s) for output and write data
    filenames, t_slices = _get_filenames_t_slices(time_split_size,
                                                  time_split_first_label,
                                                  fullpath, outputs.tind)

    if not append:
        for f in filenames:
            if os.path.isfile(f):
                raise ValueError(
                    "{} already exists, squashoutput() will not overwrite. Also, "
                    "for some filenames collect may try to read from this file, which "
                    "is presumably not desired behaviour.".format(fullpath))

    outputvars = outputs.keys()

    # Read a value to cache the files
    outputs[outputvars[0]]

    if append:
        # move only after the file list is cached
        shutil.move(fullpath, oldfile)

    t_array_index = outputvars.index("t_array")
    outputvars.append(outputvars.pop(t_array_index))

    kwargs = {}
    if compress:
        kwargs["zlib"] = True
        if least_significant_digit is not None:
            kwargs["least_significant_digit"] = least_significant_digit
        if complevel is not None:
            kwargs["complevel"] = complevel
    if append:
        old = DataFile(oldfile)
        # Check if dump on restart was enabled
        # If so, we want to drop the duplicated entry
        cropnew = 0
        if old["t_array"][-1] == outputs["t_array"][0]:
            cropnew = 1
        # Make sure we don't end up with duplicated data:
        for ot in old["t_array"]:
            if ot in outputs["t_array"][cropnew:]:
                raise RuntimeError(
                    "For some reason t_array has some duplicated entries in the new "
                    "and old file.")
    kwargs["format"] = format

    files = [
        DataFile(name, create=True, write=True, **kwargs) for name in filenames
    ]

    for varname in outputvars:
        if not quiet:
            print(varname, flush=True)

        var = outputs[varname]
        dims = outputs.dimensions[varname]
        if append:
            if "t" in dims:
                var = var[cropnew:, ...]
                varold = old[varname]
                var = BoutArray(numpy.append(varold, var, axis=0),
                                var.attributes)

        if singleprecision:
            if not isinstance(var, int):
                var = BoutArray(numpy.float32(var), var.attributes)

        if "t" in dims:
            for f, t_slice in zip(files, t_slices):
                f.write(varname, var[t_slice])
        else:
            for f in files:
                f.write(varname, var)

        var = None
        gc.collect()

    # Copy file attributes
    for attrname in outputs.list_file_attributes():
        attrval = outputs.get_file_attribute(attrname)
        for f in files:
            f.write_file_attribute(attrname, attrval)

    for f in files:
        f.close()

    del outputs
    gc.collect()

    if delete:
        if append:
            os.remove(oldfile)
        for f in glob.glob(datadir + "/BOUT.dmp.*.??"):
            if not quiet:
                print("Deleting", f, flush=True)
            os.remove(f)
        if append:
            os.rmdir(datadir)

    if netcdf4_chunk_cache is not None:
        # Reset the default chunk_cache size that was changed for squashoutput
        # Note that get_chunk_cache() returns a tuple, so we have to unpack it when
        # passing to set_chunk_cache.
        set_chunk_cache(*netcdf4_chunk_cache)