def test_issue005(): from pygeode.timeaxis import ModelTime365 from pygeode.axis import TAxis import numpy as np from pygeode.var import Var from pygeode.formats import netcdf as nc from pygeode import timeutils # Make a time axis starting at year 0 startdate = dict(year=0,month=1,day=1) taxis = ModelTime365(values=10200, startdate=startdate, units='days') # Make some dummy variable np.random.seed(len(taxis)) values = np.random.randn(len(taxis)) var = Var(axes=[taxis], values=values, name='x') # Save it nc.save("issue005_test.nc", var) # Load it f = nc.open("issue005_test.nc") # Make sure we have a regular time axis # (no climatologies!) assert f.time.__class__ == ModelTime365 assert hasattr(f.time,'year') # Okay, now reload it, but override the axis coming in f = nc.open("issue005_test.nc", dimtypes=dict(time=TAxis(taxis.values))) # Make sure we dimtypes is still working properly assert f.x.axes[0].__class__ == TAxis # For good measure, test that climatologies are still produced taxis = timeutils.modify(taxis,exclude='year',uniquify=True) values = np.random.randn(len(taxis)) var = Var(axes=[taxis], values=values, name='c') nc.save("issue005_test.nc", var) f = nc.open("issue005_test.nc") assert not hasattr(f.time,'year')
def test_issue005(): from pygeode.timeaxis import ModelTime365 from pygeode.axis import TAxis import numpy as np from pygeode.var import Var from pygeode.formats import netcdf as nc from pygeode import timeutils # Make a time axis starting at year 0 startdate = dict(year=0, month=1, day=1) taxis = ModelTime365(values=10200, startdate=startdate, units='days') # Make some dummy variable np.random.seed(len(taxis)) values = np.random.randn(len(taxis)) var = Var(axes=[taxis], values=values, name='x') # Save it nc.save("issue005_test.nc", var) # Load it f = nc.open("issue005_test.nc") # Make sure we have a regular time axis # (no climatologies!) assert f.time.__class__ == ModelTime365 assert hasattr(f.time, 'year') # Okay, now reload it, but override the axis coming in f = nc.open("issue005_test.nc", dimtypes=dict(time=TAxis(taxis.values))) # Make sure we dimtypes is still working properly assert f.x.axes[0].__class__ == TAxis # For good measure, test that climatologies are still produced taxis = timeutils.modify(taxis, exclude='year', uniquify=True) values = np.random.randn(len(taxis)) var = Var(axes=[taxis], values=values, name='c') nc.save("issue005_test.nc", var) f = nc.open("issue005_test.nc") assert not hasattr(f.time, 'year')
def __init__(self, taxis, coef=None, A=None, B=None): from pygeode.tools import merge_coefs from pygeode.var import Var from pygeode.timeaxis import Time from pygeode.timeutils import modify # Get the coefficients if coef is None: assert A is not None and B is not None coef = merge_coefs(B, A) else: assert A is None and B is None # Ignore 'year' field if it's constant? # (I.e., if there's a 'year' field that's all zeros, then drop it) # - This is an artifact of reading climatological data from a file which can't/doesn't specify it's a climatology coeft = coef.getaxis(Time) if hasattr(coeft, 'year'): import numpy as np from warnings import warn if len(np.unique(coeft.year.values)) == 1: warn("ignoring degenerate 'year' field", stacklevel=2) coeft = modify(coeft, exclude='year') coef = coef.replace_axes({Time: coeft}) self.coef = coef self.secs = Var([taxis], values=taxis.reltime(units='seconds')) self.ti = ti = coef.whichaxis(Time) self.ci = ci = coef.whichaxis('coef') self.caxis = coef.axes[ci] axes = list(coef.axes) assert axes[ti].map_to(taxis) is not None, ( "the given time axis is not compatible with the coefficients.\n" + "time axis: %s\n coefficient time axis: %s" % (repr(str(taxis)), repr(str(axes[ti])))) axes[ti] = taxis axes = axes[:ci] + axes[ci + 1:] # Var.__init__(self, axes, dtype=coef.dtype) Var.__init__(self, axes, dtype='float64') # because secs is float64
def __init__ (self, taxis, coef=None, A=None, B=None): from pygeode.tools import merge_coefs from pygeode.var import Var from pygeode.timeaxis import Time from pygeode.timeutils import modify # Get the coefficients if coef is None: assert A is not None and B is not None coef = merge_coefs (B, A) else: assert A is None and B is None # Ignore 'year' field if it's constant? # (I.e., if there's a 'year' field that's all zeros, then drop it) # - This is an artifact of reading climatological data from a file which can't/doesn't specify it's a climatology coeft = coef.getaxis(Time) if hasattr(coeft,'year'): import numpy as np from warnings import warn if len(np.unique(coeft.year.values)) == 1: warn ("ignoring degenerate 'year' field", stacklevel=2) coeft = modify(coeft, exclude='year') coef = coef.replace_axes({Time:coeft}) self.coef = coef self.secs = Var([taxis], values=taxis.reltime(units='seconds')) self.ti = ti = coef.whichaxis(Time) self.ci = ci = coef.whichaxis('coef') self.caxis = coef.axes[ci] axes = list(coef.axes) assert axes[ti].map_to(taxis) is not None, ( "the given time axis is not compatible with the coefficients.\n"+ "time axis: %s\n coefficient time axis: %s" % (repr(str(taxis)),repr(str(axes[ti]))) ) axes[ti] = taxis axes = axes[:ci] + axes[ci+1:] # Var.__init__(self, axes, dtype=coef.dtype) Var.__init__(self, axes, dtype='float64') # because secs is float64
def get_outtime (intime): from pygeode.timeutils import modify outtime = modify(intime, resolution='month', uniquify=True) assert hasattr(outtime, 'month') # can we even do a monthly mean? return outtime
def get_outtime (intime): from pygeode.timeutils import modify return modify(intime, exclude=['year','month','day'], uniquify=True)
def get_outtime(intime): from pygeode.timeutils import modify return modify(intime, exclude=['year', 'month', 'day'], uniquify=True)
def get_outtime(intime): from pygeode.timeutils import modify outtime = modify(intime, resolution='year', uniquify=True) assert hasattr(outtime, 'year') # can we even do a yearly mean? return outtime
def decode_cf (dataset, ignore=[]): from pygeode.dataset import asdataset, Dataset from pygeode.axis import Axis, NamedAxis, Lat, Lon, Pres, Hybrid, XAxis, YAxis, ZAxis, TAxis, Station, DummyAxis, NonCoordinateAxis from pygeode.timeaxis import Time, ModelTime365, ModelTime360, StandardTime, Yearless from pygeode import timeutils from warnings import warn import re # dataset = asdataset(dataset, copy=True) dataset = asdataset(dataset) varlist = list(dataset) axisdict = dataset.axisdict.copy() global_atts = dataset.atts del dataset # Decode string variables for i,var in enumerate(varlist): if var.name.endswith("_name") and var.dtype.name in ("string8","bytes8") and var.axes[-1].name.endswith("_strlen"): varlist[i] = decode_string_var(var) # data for auxiliary arrays auxdict = {} for name in axisdict.keys(): auxdict[name] = {} # fill values / scale / offset (if applicable) fillvalues = {} scales = {} offsets = {} for v in varlist: name = v.name fillvalues[name] = None scales[name] = None offsets[name] = None for name,a in list(axisdict.items()): # Skip over this axis? if name in ignore: continue atts = a.atts.copy() plotatts = a.plotatts.copy() # just carry along and pass to new Axis instance # Find any auxiliary arrays aux = auxdict[name] if 'ancillary_variables' in atts: _anc = atts.pop('ancillary_variables') remove_from_dataset = [] # vars to remove from the dataset for auxname in _anc.split(' '): assert any(v.name == auxname for v in varlist), "ancillary variable '%s' not found"%auxname newname = auxname # Remove the axis name prefix, if it was used if newname.startswith(name+'_'): newname = newname[len(name)+1:] aux[newname] = [v for v in varlist if v.name == auxname].pop().get() # Don't need this as a var anymore remove_from_dataset.append(auxname) # Remove some stuff varlist = [v for v in varlist if v.name not in remove_from_dataset] # Determine the best Axis subclass to use # cls = NamedAxis cls = type(a) # Generic 'axis' identifiers first if 'axis' in atts: _axis = atts.pop('axis') if _axis == 'X': cls = XAxis if _axis == 'Y': cls = YAxis if _axis == 'Z': cls = ZAxis if _axis == 'T': cls = TAxis # Check specific standard names, and also units? #TODO: don't *pop* the standard_name, units, etc. until the end of this routine - in case we didn't end up mapping them to an axis _ln = atts.get('long_name', a.name).lower() _st = atts.get('standard_name',_ln).lower() _units = atts.pop('units','') if _st == 'latitude' or _units == 'degrees_north': cls = Lat if _st == 'longitude' or _units == 'degrees_east': cls = Lon if _st == 'air_pressure' or _units in ('hPa','mbar'): cls = Pres # Don't need this in the metadata anymore (it will be put back in encode_cf) atts.pop('positive',None) if _st == 'atmosphere_hybrid_sigma_pressure_coordinate': #TODO: check formula_terms?? #TODO: for ccc2nc files, look for long_name == "Model Level", use_AB = <formula>, # A & B embedded as metadata or as data arrays not attached to ancillary_variables if 'A' in aux and 'B' in aux: cls = Hybrid else: warn ("Cannot create a proper Hybrid vertical axis, since 'A' and 'B' coefficients aren't found.") if _st == 'station': cls = Station if (_st == 'time' or cls == TAxis or _units.startswith('days since') or _units.startswith('hours since') or _units.startswith('minutes since') or _units.startswith('seconds since')) and ' since ' in _units: _calendar = atts.pop('calendar', 'standard') if _calendar in ('standard', 'gregorian', 'proleptic_gregorian'): cls = StandardTime elif _calendar in ('365_day', 'noleap', '365day'): cls = ModelTime365 elif _calendar in ('360_day', '360day'): cls = ModelTime360 elif _calendar in ('none'): cls = Yearless else: warn ("unknown calendar '%s'"%_calendar) continue # Extract the time resolution (day, hour, etc), and the reference date res, date = re.match("([a-z]+)\s+since\s+(.*)", _units).groups() # Pluralize the increment (i.e. day->days)? if not res.endswith('s'): res += 's' # Extract the rest of the date date = date.rstrip() year, month, day, hour, minute, second = 0,1,1,0,0,0 if len(date) > 0: year, date = re.match("(\d+)-?(.*)", date).groups() if len(date) > 0: month, date = re.match("(\d+)-?(.*)", date).groups() if len(date) > 0: day, date = re.match("(\d+)\s*(.*)", date).groups() if date.startswith('T'): date = date[1:] if len(date) > 0: hour, date = re.match("(\d+):?(.*)", date).groups() if len(date) > 0: minute, date = re.match("(\d+):?(.*)", date).groups() if len(date) > 0 and date[0] != ' ': second, date = re.match("(\d+)(.*)", date).groups() # convert from strings to integers #TODO: milliseconds? time zone? year, month, day, hour, minute, second = list(map(int, [year, month, day, hour, minute, float(second)])) # Create the time axis startdate={'year':year, 'month':month, 'day':day, 'hour':hour, 'minute':minute, 'second':second} axisdict[name] = cls(a.values, startdate=startdate, units=res, name=name, atts=atts) # Special case: start year=0 implies a climatology #NOTE: 'climatology' attribute not used, since we don't currently keep # track of the interval that was used for the climatology. if year == 0: # Don't climatologize(?) the axis if there's more than a year if not all(axisdict[name].year == 0): warn ("cfmeta: data starts at year 0 (which usually indicates a climatology), but there's more than one year's worth of data! Keeping it on a regular calendar.", stacklevel=3) continue axisdict[name] = timeutils.modify(axisdict[name], exclude='year') continue # we've constructed the time axis, so move onto the next axis # Check for a match from the custom axes (from add-ons). if _st in custom_axes: cls = custom_axes[_st] # Find any other information that should be put inside this axis. # Look for anything that's identified as a coordinate or anicllary # variable, and that has this axis as its only dimension. dependencies = set() for var in varlist: if var.hasaxis(a.name): dependencies.update(var.atts.get('coordinates','').split()) dependencies.update(var.atts.get('ancillary_variables','').split()) # Look up these dependencies. Only consider 1D information, since we # don't yet have a way to associate multidimensional arrays as auxarrays # in an axis. dependencies = [v for v in varlist if v.name in dependencies and v.naxes == 1 and v.hasaxis(a.name)] # If we found any such information, then this is no longer a simple # "dummy" axis. if issubclass(cls, DummyAxis) and len(dependencies) > 0: cls = NonCoordinateAxis # Attach the information from these dependent variables as auxiliary arrays. aux.update((dep.name,dep.get()) for dep in dependencies) # Anything that got attached to this axis should be removed from the # list of variables, since it's just extra info specific to the axis. varlist = [v for v in varlist if v.name not in aux] # put the units back (if we didn't use them)? if cls in [Axis, NamedAxis, XAxis, YAxis, ZAxis, TAxis] and _units != '': atts['units'] = _units # create new axis instance if need be. if cls != type(a): axisdict[name] = cls(values=a.values, name=name, atts=atts, **aux) # Apply these new axes to the variables # Check for fill values, etc. # Extract to a list first, then back to a dataset # (ensures the dataset axis list is up to date) for i,oldvar in enumerate(list(varlist)): # name = [n for n,v in six.iteritems(dataset.vardict) if v is oldvar].pop() name = oldvar.name atts = oldvar.atts.copy() plotatts = oldvar.atts.copy() fillvalue = [atts.pop(f,None) for f in ('FillValue', '_FillValue', 'missing_value')] fillvalue = [_f for _f in fillvalue if _f] fillvalue = fillvalue[0] if len(fillvalue) > 0 else None scale = atts.pop('scale_factor', None) offset = atts.pop('add_offset', None) varlist[i] = var_newaxes(oldvar, [axisdict[a.name] for a in oldvar.axes], name=name, fillvalue=fillvalue, scale=scale, offset=offset, atts=atts, plotatts=plotatts) dataset = Dataset(varlist, atts=global_atts) return dataset
def modify (self, resolution=None, exclude=[], include=[], uniquify=False): from pygeode import timeutils from warnings import warn warn ("Deprecated. Use timeutils module.") return timeutils.modify(self, resolution, exclude, include, uniquify)
def decode_cf (dataset, ignore=[]): from pygeode.dataset import asdataset, Dataset from pygeode.axis import Axis, NamedAxis, Lat, Lon, Pres, Hybrid, XAxis, YAxis, ZAxis, TAxis from pygeode.timeaxis import Time, ModelTime365, ModelTime360, StandardTime, Yearless from pygeode import timeutils from warnings import warn import re # dataset = asdataset(dataset, copy=True) dataset = asdataset(dataset) varlist = list(dataset) axisdict = dataset.axisdict.copy() global_atts = dataset.atts del dataset # data for auxiliary arrays auxdict = {} for name in axisdict.iterkeys(): auxdict[name] = {} # fill values / scale / offset (if applicable) fillvalues = {} scales = {} offsets = {} for v in varlist: name = v.name fillvalues[name] = None scales[name] = None offsets[name] = None for name,a in axisdict.items(): # Skip over this axis? if name in ignore: continue atts = a.atts.copy() plotatts = a.plotatts.copy() # just carry along and pass to new Axis instance (l.282) # Find any auxiliary arrays aux = auxdict[name] if 'ancillary_variables' in atts: _anc = atts.pop('ancillary_variables') remove_from_dataset = [] # vars to remove from the dataset for auxname in _anc.split(' '): assert any(v.name == auxname for v in varlist), "ancilliary variable '%s' not found"%auxname newname = auxname # Remove the axis name prefix, if it was used if newname.startswith(name+'_'): newname = newname[len(name)+1:] aux[newname] = [v for v in varlist if v.name == auxname].pop().get() # Don't need this as a var anymore remove_from_dataset.append(auxname) # Remove some stuff varlist = [v for v in varlist if v.name not in remove_from_dataset] # Determine the best Axis subclass to use # cls = NamedAxis cls = type(a) # Generic 'axis' identifiers first if 'axis' in atts: _axis = atts.pop('axis') if _axis == 'X': cls = XAxis if _axis == 'Y': cls = YAxis if _axis == 'Z': cls = ZAxis if _axis == 'T': cls = TAxis # Check specific standard names, and also units? #TODO: don't *pop* the standard_name, units, etc. until the end of this routine - in case we didn't end up mapping them to an axis _ln = atts.get('long_name', a.name).lower() _st = atts.get('standard_name',_ln).lower() _units = atts.pop('units','') if _st == 'latitude' or _units == 'degrees_north': cls = Lat if _st == 'longitude' or _units == 'degrees_east': cls = Lon if _st == 'air_pressure' or _units in ('hPa','mbar'): cls = Pres # Don't need this in the metadata anymore (it will be put back in encode_cf) atts.pop('positive',None) if _st == 'atmosphere_hybrid_sigma_pressure_coordinate': #TODO: check formula_terms?? #TODO: for ccc2nc files, look for long_name == "Model Level", use_AB = <formula>, # A & B embedded as metadata or as data arrays not attached to ancillary_variables if 'A' in aux and 'B' in aux: cls = Hybrid else: warn ("Cannot create a proper Hybrid vertical axis, since 'A' and 'B' coefficients aren't found.") if (_st == 'time' or cls == TAxis or _units.startswith('days since') or _units.startswith('hours since') or _units.startswith('minutes since') or _units.startswith('seconds since')) and ' since ' in _units: _calendar = atts.pop('calendar', 'standard') if _calendar in ('standard', 'gregorian', 'proleptic_gregorian'): cls = StandardTime elif _calendar in ('365_day', 'noleap', '365day'): cls = ModelTime365 elif _calendar in ('360_day', '360day'): cls = ModelTime360 elif _calendar in ('none'): cls = Yearless else: warn ("unknown calendar '%s'"%_calendar) continue # Extract the time resolution (day, hour, etc), and the reference date res, date = re.match("([a-z]+)\s+since\s+(.*)", _units).groups() # Pluralize the increment (i.e. day->days)? if not res.endswith('s'): res += 's' # Extract the rest of the date date = date.rstrip() year, month, day, hour, minute, second = 0,1,1,0,0,0 if len(date) > 0: year, date = re.match("(\d+)-?(.*)", date).groups() if len(date) > 0: month, date = re.match("(\d+)-?(.*)", date).groups() if len(date) > 0: day, date = re.match("(\d+)\s*(.*)", date).groups() if len(date) > 0: hour, date = re.match("(\d+):?(.*)", date).groups() if len(date) > 0: minute, date = re.match("(\d+):?(.*)", date).groups() if len(date) > 0 and date[0] != ' ': second, date = re.match("(\d+)(.*)", date).groups() # convert from strings to integers #TODO: milliseconds? time zone? year, month, day, hour, minute, second = map(int, [year, month, day, hour, minute, float(second)]) # Create the time axis startdate={'year':year, 'month':month, 'day':day, 'hour':hour, 'minute':minute, 'second':second} axisdict[name] = cls(a.values, startdate=startdate, units=res, name=name, atts=atts) # Special case: start year=0 implies a climatology #NOTE: 'climatology' attribute not used, since we don't currently keep # track of the interval that was used for the climatology. if year == 0: # Don't climatologize(?) the axis if there's more than a year if not all(axisdict[name].year == 0): warn ("cfmeta: data starts at year 0 (which usually indicates a climatology), but there's more than one year's worth of data! Keeping it on a regular calendar.", stacklevel=3) continue axisdict[name] = timeutils.modify(axisdict[name], exclude='year') continue # we've constructed the time axis, so move onto the next axis # put the units back (if we didn't use them)? if cls in [Axis, NamedAxis, XAxis, YAxis, ZAxis, TAxis] and _units != '': atts['units'] = _units # create new axis instance if need be (only if a is a generic axis, to prevent replacement of custom axes) # TODO: don't do this check. This filter *should* be called before any # custom axis overrides, so we *should* be able to assume we only have # generic Axis objects at this point (at least, from the netcdf_new module) if (type(a) in (Axis, NamedAxis, XAxis, YAxis, ZAxis, TAxis)) and (cls != type(a)): axisdict[name] = cls(values=a.values, name=name, atts=atts, **aux) # Apply these new axes to the variables # Check for fill values, etc. # Extract to a list first, then back to a dataset # (ensures the dataset axis list is up to date) for i,oldvar in enumerate(list(varlist)): # name = [n for n,v in dataset.vardict.iteritems() if v is oldvar].pop() name = oldvar.name atts = oldvar.atts.copy() plotatts = oldvar.atts.copy() fillvalue = [atts.pop(f,None) for f in ('FillValue', '_FillValue', 'missing_value')] fillvalue = filter(None, fillvalue) fillvalue = fillvalue[0] if len(fillvalue) > 0 else None scale = atts.pop('scale_factor', None) offset = atts.pop('add_offset', None) varlist[i] = var_newaxes(oldvar, [axisdict[a.name] for a in oldvar.axes], name=name, fillvalue=fillvalue, scale=scale, offset=offset, atts=atts, plotatts=plotatts) dataset = Dataset(varlist, atts=global_atts) return dataset