Example #1
0
def read_multifile(root: str = data_dir):
    path = f"{root}/gfs.0p25*.nc"
    print(path)
    f = MFDataset(path)
    print(f.data_model)
    print(f.dimensions)
    print(f.ncattrs())
    # for n, variable in f.variables.items():  # type: (str, Variable)
    #     print(n)
    #     print(variable[:])
    ghi_raw: Variable = f.variables["DSWRF_P8_L1_GLL0_avg3h"]
    print(ghi_raw.dimensions)
    ghi_raw.getncattr("initial_time0_hours")
    # print(ghi_raw.ncattrs())
    print(ghi_raw[0][0][:])
def preprocess(filelist, begin_slice, end_slice, variable, output_filename):
    # read in the existing dset
    ds = MFDataset(filelist)

    # build a new output dataset
    with Dataset(output_filename, 'w', format='NETCDF4_CLASSIC') as sub_ds:
        sub_ds.createDimension('lat', ds['lat'][:].shape[0])
        lon = sub_ds.createDimension('lon', ds['lon'][:].shape[0])
        time = sub_ds.createDimension('time', None)

        # Create coordinate variables for 4-dimensions
        times = sub_ds.createVariable('time', np.float64, ('time', ))
        latitudes = sub_ds.createVariable('lat', np.float32, ('lat', ))
        longitudes = sub_ds.createVariable('lon', np.float32, ('lon', ))

        # Create the actual 3-d variable
        data = sub_ds.createVariable(variable, np.float32,
                                     ('time', 'lat', 'lon'))

        # set the data
        times[:] = ds['time'][begin_slice:end_slice]
        latitudes[:] = ds['lat'][:]
        longitudes[:] = ds['lon'][:]
        data[:] = ds[variable][begin_slice:end_slice, ...].astype(np.float32)

        # set up the global CF-convention-style meta attrs.
        sub_ds.setncatts({k: getattr(ds, k) for k in ds.ncattrs()})

        # local variable meta attrs...
        sub_ds[variable].setncatts({
            k: getattr(ds[variable], k)
            for k in ds[variable].ncattrs() if not k.startswith('_')
        })

        # time attrs...
        sub_ds['time'].setncatts(
            {k: getattr(ds['time'], k)
             for k in ds['time'].ncattrs()})

        # lon/lat attrs...
        sub_ds['lon'].setncatts(
            {k: getattr(ds['lon'], k)
             for k in ds['lon'].ncattrs()})
        sub_ds['lat'].setncatts(
            {k: getattr(ds['lat'], k)
             for k in ds['lat'].ncattrs()})

    return output_filename
class EcoFOCI_mfnetCDF(object):

    def __init__(self, file_name=None, aggdim=None):
        """Initialize opening of multiple netcdf files along
        same dimension (aggdim) in same path.

        Parameters
        ----------
        file_name : str
            full path to file on disk (with wildcards)
        aggdim : str
            dimesion name to aggregate along.  Slowest varying
            dimension or unlimited dimension will be choosen if 
            no option is passed.

        """

        self.nchandle = MFDataset(file_name,'a',aggdim=aggdim)
        self.file_name = file_name
        

    def get_global_atts(self):

        g_atts = {}
        att_names = self.nchandle.ncattrs()
        
        for name in att_names:
            g_atts[name] = self.nchandle.getncattr(name)
            
        return g_atts

    def get_vars(self):
        self.variables = self.nchandle.variables
        return self.nchandle.variables

    def ncreadfile_dic(self):

        data = {}
        for j, v in enumerate(self.nchandle.variables): 
            if v in self.nchandle.variables.keys(): #check for nc variable
                    data[v] = self.nchandle.variables[v][:]

            else: #if parameter doesn't exist fill the array with zeros
                data[v] = None
        return (data)

    def close(self):
        self.nchandle.close()
Example #4
0
class NC4MFDataset(NC4Dataset):
    def __init__(self, file_list, time_axis='time', **kwargs):
        self.time_axis = time_axis
        self.ds = MFDataset(file_list, **kwargs)

    def ncattrs_dict(self, varname=None):
        if varname:
            return {
                attr: getattr(self.ds.variables[varname], attr)
                for attr in self.ds.variables[varname].ncattrs()
            }
        else:
            return {attr: getattr(self.ds, attr) for attr in self.ds.ncattrs()}

    def getncattr(self, attr):
        return getattr(self.ds, attr)
Example #5
0
def ncread(file, vars=None, dims=False, noisy=False, atts=False, datetimes=False):
    """
    Read in the FVCOM results file and spit out numpy arrays for each of the
    variables specified in the vars list.

    Optionally specify a dict with keys whose names match the dimension names
    in the netCDF file and whose values are strings specifying alternative
    ranges or lists of indices. For example, to extract the first hundred time
    steps, supply dims as:

        dims = {'time':'0:100'}

    To extract the first, 400th and 10,000th values of any array with nodes:

        dims = {'node':'[0, 3999, 9999]'}

    Any dimension not given in dims will be extracted in full.

    Specify atts=True to extract the variable attributes. Set datetimes=True
    to convert the FVCOM Modified Julian Day values to python datetime objects.

    Parameters
    ----------
    file : str, list
        If a string, the full path to an FVCOM netCDF output file. If a list,
        a series of files to be loaded. Data will be concatenated into a single
        dict.
    vars : list, optional
        List of variable names to be extracted. If omitted, all variables are
        returned.
    dims : dict, optional
        Dict whose keys are dimensions and whose values are a string of either
        a range (e.g. {'time':'0:100'}) or a list of individual indices (e.g.
        {'time':'[0, 1, 80, 100]'}). Slicing is supported (::5 for every fifth
        value).
    noisy : bool, optional
        Set to True to enable verbose output.
    atts : bool, optional
        Set to True to enable output of the attributes (defaults to False).
    datetimes : bool, optional
        Set to True to convert FVCOM Modified Julian Days to Python datetime
        objects (creates a new `datetime' key in the output dict. Only
        applies if `vars' includes either the `Times' or `time' variables.
        Note: if FVCOM has been run with single precision output, then the
        conversion of the `time' values to a datetime object suffers rounding
        errors. It's best to either run FVCOM in double precision or specify
        only the `Times' data in the `vars' list.

    Returns
    -------
    FVCOM : dict
        Dict of data extracted from the netCDF file. Keys are those given in
        vars and the data are stored as ndarrays. If `datetimes' is True,
        then this also includes a `datetime' key in which is the FVCOM
        Modified Julian Day time series converted to Python datetime objects.
    attributes : dict, optional
        If atts=True, returns the attributes as a dict for each
        variable in vars. The key `dims' contains the array dimensions (each
        variable contains the names of its dimensions) as well as the shape of
        the dimensions defined in the netCDF file. The key `global' contains
        the global attributes.

    See Also
    --------
    read_probes : read in FVCOM ASCII probes output files.

    """

    # Set to True when we've converted from Modified Julian Day so we don't
    # end up doing the conversion twice, once for `Times' and again for
    # `time' if both variables have been requested in `vars'.
    done_datetimes = False
    # Check whether we'll be able to fulfill the datetime request.
    if datetimes and vars and not list(set(vars) & set(('Times', 'time'))):
        raise ValueError("Conversion from Modified Julian Day to python "
                         "datetimes has been requested but no time variable "
                         "(`Times' or `time') has been requested in vars.")

    # If we have a list, assume it's lots of files and load them all.
    if isinstance(file, list):
        try:
            try:
                rootgrp = MFDataset(file, 'r')
            except IOError as msg:
                raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg))
        except:
            # Try aggregating along a 'time' dimension (for POLCOMS,
            # for example).
            try:
                rootgrp = MFDataset(file, 'r', aggdim='time')
            except IOError as msg:
                raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg))

    else:
        rootgrp = Dataset(file, 'r')

    # Create a dict of the dimension names and their current sizes
    read_dims = {}
    for key, var in list(rootgrp.dimensions.items()):
        # Make the dimensions ranges so we can use them to extract all the
        # values.
        read_dims[key] = '0:' + str(len(var))

    # Compare the dimensions in the netCDF file with those provided. If we've
    # been given a dict of dimensions which differs from those in the netCDF
    # file, then use those.
    if dims:
        commonKeys = set(read_dims).intersection(list(dims.keys()))
        for k in commonKeys:
            read_dims[k] = dims[k]

    if noisy:
        print("File format: {}".format(rootgrp.file_format))

    if not vars:
        vars = iter(list(rootgrp.variables.keys()))

    FVCOM = {}

    # Save the dimensions in the attributes dict.
    if atts:
        attributes = {}
        attributes['dims'] = read_dims
        attributes['global'] = {}
        for g in rootgrp.ncattrs():
            attributes['global'][g] = getattr(rootgrp, g)

    for key, var in list(rootgrp.variables.items()):
        if noisy:
            print('Found ' + key, end=' ')
            sys.stdout.flush()

        if key in vars:
            vDims = rootgrp.variables[key].dimensions

            toExtract = [read_dims[d] for d in vDims]

            # If we have no dimensions, we must have only a single value, in
            # which case set the dimensions to empty and append the function to
            # extract the value.
            if not toExtract:
                toExtract = '.getValue()'

            # Thought I'd finally figured out how to replace the eval approach,
            # but I still can't get past the indexing needed to be able to
            # subset the data.
            # FVCOM[key] = rootgrp.variables.get(key)[0:-1]
            # I know, I know, eval() is evil.
            getData = 'rootgrp.variables[\'{}\']{}'.format(key, str(toExtract).replace('\'', ''))
            FVCOM[key] = eval(getData)

            # Add the units and dimensions for this variable to the list of
            # attributes.
            if atts:
                attributes[key] = {}
                try:
                    attributes[key]['units'] = rootgrp.variables[key].units
                except:
                    pass

                try:
                    attributes[key]['dims'] = rootgrp.variables[key].dimensions
                except:
                    pass

            if datetimes and key in ('Times', 'time') and not done_datetimes:
                # Convert the time data to datetime objects. How we do this
                # depends on which we hit first - `Times' or `time'. For the
                # former, we need to parse the strings, for the latter we can
                # leverage num2date from the netCDF4 module and use the time
                # units attribute.
                if key == 'Times':
                    try:
                        FVCOM['datetime'] = [datetime.strptime(''.join(i), '%Y-%m-%dT%H:%M:%S.%f') for i in FVCOM[key]]
                    except ValueError:
                        # Try a different format before bailing out.
                        FVCOM['datetime'] = [datetime.strptime(''.join(i), '%Y/%m/%d %H:%M:%S.%f') for i in    FVCOM[key]]

                    done_datetimes = True
                elif key == 'time':
                    FVCOM['datetime'] = num2date(FVCOM[key],
                                                 rootgrp.variables[key].units)
                    done_datetimes = True

            if noisy:
                if len(str(toExtract)) < 60:
                    print('(extracted {})'.format(str(toExtract).replace('\'', '')))
                else:
                    print('(extracted given indices)')

        elif noisy:
                print()

    # Close the open file.
    rootgrp.close()

    if atts:
        return FVCOM, attributes
    else:
        return FVCOM
Example #6
0
def readFVCOM(file, varList=None, clipDims=False, noisy=False, atts=False):
    """
    Read in the FVCOM results file and spit out numpy arrays for each of the
    variables specified in the varList list.

    Optionally specify a dict with keys whose names match the dimension names
    in the NetCDF file and whose values are strings specifying alternative
    ranges or lists of indices. For example, to extract the first hundred time
    steps, supply clipDims as:

        clipDims = {'time':'0:100'}

    To extract the first, 400th and 10,000th values of any array with nodes:

        clipDims = {'node':'[0, 3999, 9999]'}

    Any dimension not given in clipDims will be extracted in full.

    Specify atts=True to extract the variable attributes.

    Parameters
    ----------
    file : str, list
        If a string, the full path to an FVCOM NetCDF output file. If a list,
        a series of files to be loaded. Data will be concatenated into a single
        dict.
    varList : list, optional
        List of variable names to be extracted. If omitted, all variables are
        returned.
    clipDims : dict, optional
        Dict whose keys are dimensions and whose values are a string of either
        a range (e.g. {'time':'0:100'}) or a list of individual indices (e.g.
        {'time':'[0, 1, 80, 100]'}). Slicing is supported (::5 for every fifth
        value) but it is not possible to extract data from the end of the array
        with a negative index (e.g. 0:-4).
    noisy : bool, optional
        Set to True to enable verbose output.
    atts : bool, optional
        Set to True to enable output of the attributes (defaults to False).

    Returns
    -------
    FVCOM : dict
        Dict of data extracted from the NetCDF file. Keys are those given in
        varList and the data are stored as ndarrays.
    attributes : dict, optional
        If atts=True, returns the attributes as a dict for each
        variable in varList. The key 'dims' contains the array dimensions (each
        variable contains the names of its dimensions) as well as the shape of
        the dimensions defined in the NetCDF file. The key 'global' contains
        the global attributes.

    See Also
    --------
    readProbes : read in FVCOM ASCII probes output files.

    """

    # If we have a list, assume it's lots of files and load them all.
    if isinstance(file, list):
        try:
            try:
                rootgrp = MFDataset(file, 'r')
            except IOError as msg:
                raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg))
        except:
            # Try aggregating along a 'time' dimension (for POLCOMS, for example)
            try:
                rootgrp = MFDataset(file, 'r', aggdim='time')
            except IOError as msg:
                raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg))

    else:
        rootgrp = Dataset(file, 'r')

    # Create a dict of the dimension names and their current sizes
    dims = {}
    for key, var in list(rootgrp.dimensions.items()):
        # Make the dimensions ranges so we can use them to extract all the
        # values.
        dims[key] = '0:' + str(len(var))

    # Compare the dimensions in the NetCDF file with those provided. If we've
    # been given a dict of dimensions which differs from those in the NetCDF
    # file, then use those.
    if clipDims:
        commonKeys = set(dims).intersection(list(clipDims.keys()))
        for k in commonKeys:
            dims[k] = clipDims[k]

    if noisy:
        print("File format: {}".format(rootgrp.file_format))

    if not varList:
        varList = iter(list(rootgrp.variables.keys()))

    FVCOM = {}

    # Save the dimensions in the attributes dict.
    if atts:
        attributes = {}
        attributes['dims'] = dims
        attributes['global'] = {}
        for g in rootgrp.ncattrs():
            attributes['global'][g] = getattr(rootgrp, g)

    for key, var in list(rootgrp.variables.items()):
        if noisy:
            print('Found ' + key, end=' ')
            sys.stdout.flush()

        if key in varList:
            vDims = rootgrp.variables[key].dimensions

            toExtract = [dims[d] for d in vDims]

            # If we have no dimensions, we must have only a single value, in
            # which case set the dimensions to empty and append the function to
            # extract the value.
            if not toExtract:
                toExtract = '.getValue()'

            # Thought I'd finally figured out how to replace the eval approach,
            # but I still can't get past the indexing needed to be able to
            # subset the data.
            # FVCOM[key] = rootgrp.variables.get(key)[0:-1]
            # I know, I know, eval() is evil.
            getData = 'rootgrp.variables[\'{}\']{}'.format(key, str(toExtract).replace('\'', ''))
            FVCOM[key] = eval(getData)

            # Add the units and dimensions for this variable to the list of
            # attributes.
            if atts:
                attributes[key] = {}
                try:
                    attributes[key]['units'] = rootgrp.variables[key].units
                except:
                    pass

                try:
                    attributes[key]['dims'] = rootgrp.variables[key].dimensions
                except:
                    pass

            if noisy:
                if len(str(toExtract)) < 60:
                    print('(extracted {})'.format(str(toExtract).replace('\'', '')))
                else:
                    print('(extracted given indices)')

        elif noisy:
                print()

    # Close the open file.
    rootgrp.close()

    if atts:
        return FVCOM, attributes
    else:
        return FVCOM