def open(filename, value_override = {}, dimtypes = {}, namemap = {}, varlist = [], cfmeta = True, **kwargs): # {{{ from pygeode.dataset import Dataset from pygeode.formats import finalize_open file = GribFile(filename) vars = [GribVar(file,i) for i in range(lib.get_nvars(file.index))] # append level type to vars with the same name names = [v.name for v in vars] for i, v in enumerate(vars): if names.count(v.name) > 1: v.name = v.name + '_' + level_types[v.level_type][1] d = Dataset(vars) return finalize_open(d, dimtypes, namemap, varlist, cfmeta)
def open(filename, value_override = {}, dimtypes = {}, namemap = {}, varlist = [], cfmeta = True): # {{{ ''' open (filename, [value_override = {}, dimtypes = {}, namemap = {}, varlist = [] ]) Returns a Dataset or dictionary of Datasets of PyGeode variables contained in the specified files. The axes of the variables are created from the dimensions of the NetCDF file. NetCDF variables in the file that do not correspond to dimensions are imported as PyGeode variables. filename - NetCDF file to open value_override - an optional dictionary with replacement values for one or more variables. The only known use for this dictionary is to avoid loading in values from a severely scattered variable (such as a 'time' axis or other slowest-varying dimension). dimtypes - a dictionary mapping dimension names to axis classes. The keys should be axis names as defined in the NetCDF file; values should be one of: 1) an axis instance, 2) an axis class, or 3) a tuple of an axis class and a dictionary with keyword arguments to pass to that axis' constructor If no dictionary is included, an attempt is made to automatically identify the axis types. namemap - an optional dictionary to map NetCDF variable names (keys) to PyGeode variable names (values); also works for axes/dimensions varlist - a list containing the variables that should be loaded into the data set (if the list is empty, all NetCDF variables will be loaded) Note: -The identifiers used in varlist and dimtypes are the original names used in the NetCDF file, not the names given in namemap. -The optional arguments are not currently supported for netcdf4 files containing groups.''' import netCDF4 as nc from pygeode.dataset import asdataset from pygeode.formats import finalize_open from pygeode.axis import Axis # Read the file try: f = nc.Dataset(filename,"r") if f.groups: dataset = {str(key): make_dataset(value) for key, value in f.groups.items()} dataset = {str(key): dims2axes(value) for key, value in dataset.items()} return {str(key): finalize_open(value) for key, value in dataset.items()} else: dataset = make_dataset(f) # Add the object stuff from dimtypes to value_override, so we don't trigger a # load operation on those dims. # (We could use any values here, since they'll be overridden again later, # but we might as well use something relevant). value_override = dict(value_override) # don't use the default (static) empty dict for k,v in list(dimtypes.items()): if isinstance(v,Axis): value_override[k] = v.values #### Filters to apply to the data #### # Override values from the source? if len(value_override) > 0: dataset = override_values(dataset, value_override) # Set up the proper axes (get coordinate values / metadata from a 1D variable # with the same name as the dimension) dataset = dims2axes(dataset) return finalize_open(dataset, dimtypes, namemap, varlist, cfmeta) except IOError: # Problem accessing the file? raise
def open(filename, value_override={}, dimtypes={}, namemap={}, varlist=[], cfmeta=True): from numpy import empty from ctypes import c_long, byref from pygeode.axis import DummyAxis from pygeode.dataset import asdataset from pygeode.formats import finalize_open f = HDF4_File(filename) num_datasets = c_long() num_global_attrs = c_long() ret = lib.SDfileinfo(f.sd_id, byref(num_datasets), byref(num_global_attrs)) assert ret == 0 num_datasets = num_datasets.value num_global_attrs = num_global_attrs.value global_atts = get_attributes(f.sd_id, num_global_attrs) # Get the HDF vars SD_arr = [None] * num_datasets for i in range(num_datasets): SD_arr[i] = HDF4_SD(f, i) # If there are 2 vars of the name XXXX and XXXX:EOSGRID, then # ignore the first one and use the latter one. # (Based some some GMAO files from the IPY dataset) SD_arr = [ sd for sd in SD_arr if sd.name.endswith(':EOSGRID') or not any( sd2.name == sd.name + ':EOSGRID' for sd2 in SD_arr) ] # Find any 'axes' # (look for unique 1D vars which contain a particular dimension id) sd_1d = [sd for sd in SD_arr if sd.rank == 1] # Determine which dimensions map to a unique 1D array dimids = [sd.dimids[0] for sd in sd_1d] dimsds = [ s for s in sd_1d if dimids.count(s.dimids[0]) == 1 or s.iscoord == 1 ] # Load axis values for s in dimsds: s.values = empty(s.shape, numpy_type[s.type]) load_values(s.sds_id, [0], s.shape, s.values) #for s in dimsds: print s; print s.values # Create axis objects from pygeode.axis import NamedAxis axes = [None] * len(dimsds) for i, s in enumerate(dimsds): # Append attributes for the axis atts = get_attributes(s.sds_id, s.natts) # if len(atts) > 0: axes[i].atts = atts axes[i] = NamedAxis(s.values, s.name, atts=atts) # Reference axes by dimension ids axis_lookup = {} for i, a in enumerate(axes): axis_lookup[dimids[i]] = a # Add dummy axes for dimensions without coordinate info. for s in SD_arr: for d in s.dimids: if d not in axis_lookup: dimname, dimsize, dimtype, dim_natts = get_dim_info(d) axis_lookup[d] = DummyAxis(dimsize, dimname) # Create var objects vars = [None] * len(SD_arr) for i, s in enumerate(SD_arr): axes = [axis_lookup[d] for d in s.dimids] vars[i] = HDF4_Var(s, axes) vars = [v for v in vars if v.sd not in dimsds] # Return a dataset d = asdataset(vars) d.atts = global_atts return finalize_open(d, dimtypes, namemap, varlist, cfmeta)
def open(filename, value_override={}, dimtypes={}, namemap={}, varlist=[], cfmeta=True): # {{{ ''' open (filename, [value_override = {}, dimtypes = {}, namemap = {}, varlist = [] ]) Returns a Dataset of PyGeode variables contained in the specified files. The axes of the variables are created from the dimensions of the NetCDF file. NetCDF variables in the file that do not correspond to dimensions are imported as PyGeode variables. filename - NetCDF file to open value_override - an optional dictionary with replacement values for one or more variables. The only known use for this dictionary is to avoid loading in values from a severely scattered variable (such as a 'time' axis or other slowest-varying dimension). dimtypes - a dictionary mapping dimension names to axis classes. The keys should be axis names as defined in the NetCDF file; values should be one of: 1) an axis instance, 2) an axis class, or 3) a tuple of an axis class and a dictionary with keyword arguments to pass to that axis' constructor If no dictionary is included, an attempt is made to automatically identify the axis types. namemap - an optional dictionary to map NetCDF variable names (keys) to PyGeode variable names (values); also works for axes/dimensions varlist - a list containing the variables that should be loaded into the data set (if the list is empty, all NetCDF variables will be loaded) Note: The identifiers used in varlist and dimtypes are the original names used in the NetCDF file, not the names given in namemap.''' from os.path import exists from ctypes import c_int, byref from pygeode.dataset import asdataset from pygeode.formats import finalize_open from pygeode.axis import Axis if not filename.startswith('http://'): assert exists( filename), 'File open failed. "%s" does not exist.' % filename # Read variable dimensions and metadata from the file f = NCFile(filename) f.open() try: fileid = f.fileid # Get number of variables nvars = c_int() ret = lib.nc_inq_nvars(fileid, byref(nvars)) assert ret == 0, lib.nc_strerror(ret) nvars = nvars.value # Construct all the variables, put in a list vars = [NCVar(f, i) for i in range(nvars)] # Construct a dataset from these Vars dataset = asdataset(vars) dataset.atts = get_attributes(fileid, -1) finally: f.close() # Add the object stuff from dimtypes to value_override, so we don't trigger a # load operation on those dims. # (We could use any values here, since they'll be overridden again later, # but we might as well use something relevant). value_override = dict( value_override) # don't use the default (static) empty dict for k, v in list(dimtypes.items()): if isinstance(v, Axis): value_override[k] = v.values #### Filters to apply to the data #### # Override values from the source? if len(value_override) > 0: dataset = override_values(dataset, value_override) # Set up the proper axes (get coordinate values / metadata from a 1D variable # with the same name as the dimension) dataset = dims2axes(dataset) return finalize_open(dataset, dimtypes, namemap, varlist, cfmeta)
def open(filename, value_override = {}, dimtypes = {}, namemap = {}, varlist = [], cfmeta = True): # {{{ ''' open (filename, [value_override = {}, dimtypes = {}, namemap = {}, varlist = [] ]) Returns a Dataset of PyGeode variables contained in the specified files. The axes of the variables are created from the dimensions of the NetCDF file. NetCDF variables in the file that do not correspond to dimensions are imported as PyGeode variables. filename - NetCDF file to open value_override - an optional dictionary with replacement values for one or more variables. The only known use for this dictionary is to avoid loading in values from a severely scattered variable (such as a 'time' axis or other slowest-varying dimension). dimtypes - a dictionary mapping dimension names to axis classes. The keys should be axis names as defined in the NetCDF file; values should be one of: 1) an axis instance, 2) an axis class, or 3) a tuple of an axis class and a dictionary with keyword arguments to pass to that axis' constructor If no dictionary is included, an attempt is made to automatically identify the axis types. namemap - an optional dictionary to map NetCDF variable names (keys) to PyGeode variable names (values); also works for axes/dimensions varlist - a list containing the variables that should be loaded into the data set (if the list is empty, all NetCDF variables will be loaded) Note: The identifiers used in varlist and dimtypes are the original names used in the NetCDF file, not the names given in namemap.''' from os.path import exists from ctypes import c_int, byref from pygeode.dataset import asdataset from pygeode.formats import finalize_open from pygeode.axis import Axis if not filename.startswith('http://'): assert exists(filename), 'File open failed. "%s" does not exist.' % filename # Read variable dimensions and metadata from the file f = NCFile(filename) f.open() try: fileid = f.fileid # Get number of variables nvars = c_int() ret = lib.nc_inq_nvars(fileid, byref(nvars)) assert ret == 0, lib.nc_strerror(ret) nvars = nvars.value # Construct all the variables, put in a list vars = [NCVar(f,i) for i in range(nvars)] # Construct a dataset from these Vars dataset = asdataset(vars) dataset.atts = get_attributes (fileid, -1) finally: f.close() # Add the object stuff from dimtypes to value_override, so we don't trigger a # load operation on those dims. # (We could use any values here, since they'll be overridden again later, # but we might as well use something relevant). value_override = dict(value_override) # don't use the default (static) empty dict for k,v in list(dimtypes.items()): if isinstance(v,Axis): value_override[k] = v.values #### Filters to apply to the data #### # Override values from the source? if len(value_override) > 0: dataset = override_values(dataset, value_override) # Set up the proper axes (get coordinate values / metadata from a 1D variable # with the same name as the dimension) dataset = dims2axes(dataset) return finalize_open(dataset, dimtypes, namemap, varlist, cfmeta)
def open (filename, value_override = {}, dimtypes = {}, namemap = {}, varlist = [], cfmeta = True): from numpy import empty from ctypes import c_long, byref from pygeode.axis import DummyAxis from pygeode.dataset import asdataset from pygeode.formats import finalize_open f = HDF4_File (filename) num_datasets = c_long() num_global_attrs = c_long() ret = lib.SDfileinfo (f.sd_id, byref(num_datasets), byref(num_global_attrs)) assert ret == 0 num_datasets = num_datasets.value num_global_attrs = num_global_attrs.value global_atts = get_attributes(f.sd_id, num_global_attrs) # Get the HDF vars SD_arr = [None] * num_datasets for i in range(num_datasets): SD_arr[i] = HDF4_SD(f, i) # If there are 2 vars of the name XXXX and XXXX:EOSGRID, then # ignore the first one and use the latter one. # (Based some some GMAO files from the IPY dataset) SD_arr = [sd for sd in SD_arr if sd.name.endswith(':EOSGRID') or not any(sd2.name == sd.name+':EOSGRID' for sd2 in SD_arr) ] # Find any 'axes' # (look for unique 1D vars which contain a particular dimension id) sd_1d = [sd for sd in SD_arr if sd.rank == 1] # Determine which dimensions map to a unique 1D array dimids = [sd.dimids[0] for sd in sd_1d] dimsds = [s for s in sd_1d if dimids.count(s.dimids[0]) == 1 or s.iscoord == 1] # Load axis values for s in dimsds: s.values = empty(s.shape, numpy_type[s.type]) load_values (s.sds_id, [0], s.shape, s.values) #for s in dimsds: print s; print s.values # Create axis objects from pygeode.axis import NamedAxis axes = [None] * len(dimsds) for i,s in enumerate(dimsds): # Append attributes for the axis atts = get_attributes (s.sds_id, s.natts) # if len(atts) > 0: axes[i].atts = atts axes[i] = NamedAxis (s.values, s.name, atts=atts) # Reference axes by dimension ids axis_lookup = {} for i,a in enumerate(axes): axis_lookup[dimids[i]] = a # Add dummy axes for dimensions without coordinate info. for s in SD_arr: for d in s.dimids: if d not in axis_lookup: dimname, dimsize, dimtype, dim_natts = get_dim_info(d) axis_lookup[d] = DummyAxis(dimsize,dimname) # Create var objects vars = [None]*len(SD_arr) for i,s in enumerate(SD_arr): axes = [axis_lookup[d] for d in s.dimids] vars[i] = HDF4_Var(s, axes) vars = [v for v in vars if v.sd not in dimsds] # Return a dataset d = asdataset(vars) d.atts = global_atts return finalize_open(d, dimtypes, namemap, varlist, cfmeta)