class DemoClass(ParamStorage): ''' A demonstration class using SpecialVariable The behaviour we desire is that a SpecialVariable acts like a ParamStorage (i.e. we can get or set by attribute or item e.g. x.state = 3 and x['state'] = 3 give the same result, and print x['state'] and print x.state give the same result. This is easy enough to achieve for all cases other than getting from x.state. It turns out that __getattr__ does not override the default method for state *if* state is set in the class instance. To get around that, we have to use a fake name (fakes here) and instead of storing state, we store _state. This makes daling with with all of the conditions a little more complicated and a little slower, but it allows a much more consistent interface. At any time, a SpecialVariable can simply be over-written by using assigning to its fake name. e.g. instance the class x = demonstration() set a non-special value 'cheese' x.foo = 'bar' we can use this as x.foo or x['foo'] print x.foo,x['foo'] which should give bar bar now use the SpecialVariable. There are many way to load this up, but an easy one is via a dictionary. data = {'state':np.ones(2)*5. ,'foo':np.ones(10)} name = {'state':'of the nation','foo':'bar'} this = {'data':data,'name':name} x.state = this print x.state,x['state'] which gives [ 5. 5.] [ 5. 5.], so we get the same from either approach. Note that what is returned from the SpecialVariable is only what is in this['data']['state'], and that is fully the intention of the SpecialVariable class. It can be loaded with rich information from a range of sources, but if you want a quick interpretation of the data (i.e. x.state) you only get what is in x.state, or more fully, x._state.data.state The other data that we passed to the SpecialVariable is as it was when read in, but relative to x._state, i.e. we have: x._state.name.foo which is bar. If you want to directly access the SpecialVariable, you can use: x.get(x.fakes['state']) which is the same as x._state or x[x.fakes['state']] It is not adviseable to directly use the underscore access as the fakes lookup dictionary can be changed. It is best to always use x.fakes['state']. Indeed, if you want to override the 'special' nature of a term such as 'state', you can simply remove their entry from the table: old_dict = x.fakes.copy() del x.fakes['state'] Now, if you type: print x.state You get a KeyError for state, so it would have been better to: x.fakes = old_dict.copy() del x.fakes['state'] x['state'] = x[old_dict['state']] print x.state which should give [ 5. 5.], but the type of x.state will have changed from SpecialVariable to np.ndarray. If you want to convert the SpecialVariable back to a dictionary you can do: print x[x.fakes['state']].to_dict() or a little less verbosely: print x._state.to_dict() ''' def __init__(self,info=[],thisname=None,readers=[],\ datadir=["."],\ env=None,\ header=None,\ logger=None, log_terms={},simple=False): ''' Class initialisation. Set up self.state and self.other as SpecialVariables and initialise them to None. ''' self.set('fakes',{'state':'_state','other':'_other'}) nSpecial = len(self.get('fakes')) for i in self.fakes: thatname = thisname and "%s.%s"%(thisname,i) self[i] = SpecialVariable(logger=logger,info=info,thisname=thatname,\ readers=readers,datadir=datadir,\ env=env,\ header=header,\ log_terms=log_terms,\ simple=False) self[i] = None get = lambda self,this :ParamStorage.__getattr__(self,this) get.__name__ = 'get' get.__doc__ = ''' An alternative interface to get the value of a class member that by-passes any more complex mechanisms. This returns the 'true' value of a class member, as opposed to an interpreted value. ''' set = lambda self,this,that :ParamStorage.__setattr__(self,this,that) set.__name__ = 'set' set.__doc__ = ''' An alternative interface to set the value of a class member that by-passes any more complex mechanisms. This sets the 'true' value of a class member, as opposed to an interpreted value. ''' var = lambda self,this : self[self['fakes'][this]] var.__name__='var' var.__doc__ = ''' Return the data associated with SpecialVariable this, rather than an interpretation of it ''' def __set_if_unset(self,name,value): ''' A utility to check if the requested attribute is not currently set, and to set it if so. ''' if name in self.fakes: fname = self.fakes[name] if not fname in self.__dict__: ParamStorage.__setattr__(self,fname,value) return True else: if not name in self.__dict__: ParamStorage.__setattr__(self,name,value) return True return False def __getattr__(self,name): ''' get attribute, e.g. return self.state ''' return self.__getitem__(name) def __setattr__(self,name,value): ''' set attribute, e.g. self.state = 3 ''' if not self.__set_if_unset(name,value): self.__setitem__(name,value,nocheck=True) def __getitem__(self,name): ''' get item for class, e.g. x = self['state'] ''' if name in ['Data','Name']: return self._state[name.lower()] elif name in ['Control','Location']: return self._state[name.lower()] elif name in self.fakes: this = self.get(self.fakes[name]) return SpecialVariable.__getitem__(this,name) else: this = self.get(name) return self.__dict__.__getitem__ ( name ) def __setitem__(self,name,value,nocheck=False): ''' set item for class e.g. self['state'] = 3 ''' if nocheck or not self.__set_if_unset(name,value): if name in ['Data','Name']: self._state[name.lower()] = value elif name in ['Control','Location']: self._state[name.lower()] = value elif name in self.fakes: this = self.get(self.fakes[name]) SpecialVariable.__setattr__(this,name,value) else: this = self.get(name) ParamStorage.__setattr__(self,name,value)
class SpecialVariable(ParamStorage): ''' A class that can deal with the datatypes needed for eoldas It allows a variable to be set to various data types and interprets these into the data structure. The data structure imposed here is: self.data.this self.name.this to store some item called 'this'. Other information can be stored as well but part of the idea here is to have some imposed constraints on the data structure so that we can sensibly load up odfferent datasets. The idea is that data will be stored in self.data and associated metadata in self.name. If items are given the same name in both sub-structures, we can easily keep track of them. There is no actual requirement that this is adhered to, but it is certainy permitted and encouraged for the indended use of this class. Probably the most important thing then about this class is that is a SpecialVariable is assigned different data types, then it can do sensible things with them in the context of the EOLDAS (and wider applications). When an assignment takes place (either of ther form self.state = foo or self['state'] = foo then what is actually stored depends on the type and nature of foo. The main features as follows: If foo is a string: A guess is made that it is a filename, and an attempt is made to read the file. All directories in the list self.dirnames are searched for the filename, and any readable files found are considered candidates, Each of these is read in turn. A set of potential data formats, specified by the readers in readers (self.reader_functions) is considered, as if a sucessful interpretation takes place the data is returned and stiored in the derired variable. So, for example, if we have self.state = foo as above and foo is a valid, readable file in the list of directories specified, and it is interprable with one of the formats defined, then the main dataset is loaded into: self.data.state (alternatively known as self.data['state']). If foo is a ParamStorage, it should have the same structure as that here (i.e. self.data and self.name) and these structures are then loaded. If foo is a dictionary (type dict) It is first converted to a ParamStorage and then loaded as above. If foo is any other datatype, it is left pretty much as it, except that an attempt to convert to a np.array is made. Depending on the format, there might be data other than the main dataset (e.g. locational information) and these are loaded by the loaders into relevant oarts of self.data and self.name. For classes that use this class for EOLDAS, we will typically use: self.data.state : state variable data self.data.sd : uncertainty information as sd (or a similar fuller representation) self.data.control : control information (e.g. view angles) self.data.location : location information with associated descriptor data in the relevant parts of self.name. The idea for simple use of the data structure then is for all of these datasets represented as 2D datasets, where the number of rows in each of the self.data.state etc field will be the same, but the number of columns will tend to vary (e.g different numbers of state variables). The reason for considering such a 2D 'flat'(ish) representation is that it is easy to tabulate and understand. In fact the data will be of quite high dimension. E.g. is the data vary by time, x and y, then we would have 3 columns for self.data.location, with descriptors for the columns in self.name.location, and corresponding state data in self.data.state (with the number of state variables determining the number of columns in that table). As mentioned, at the pooint of this class, there is no strict requirement for any such structure ti the data loaded or used, but that is the plan for EOLDAS use, so worth documenting at this point. ''' def __init__(self,info=[],name=None,thisname=None,readers=[],log_terms={},\ datadir=["."],env=None,\ header=None,writers={}, simple=False,logger=None): ''' Class SpecialVariable initialisation. Sets up the class as a ParamStorage and calls self.init() See init() fort a fuller descripotion of the options. ''' ParamStorage.__init__(self,logger=logger) name = name or thisname if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name,thistime) self.thisname = name self.init(info=[],name=self.thisname,readers=readers,log_terms={},\ datadir=datadir,env=env,\ header=header,writers=writers,\ simple=False,logger=logger) def init(self,info=[],name=None,readers=[],log_terms={},\ datadir=None,env=None,\ header=None,writers={},\ simple=False,logger=None): ''' Initialise information in a SpecialVariable instance. Options: info : Information tthat can be passed through to reader methods (a list). thisname : a name to use to identify this instance in any logging. By default this is None. If thisname is set to True, then logging is to stdout. readers : A list of reader methods that are pre-pended to those already contained in the class. log_terms : A dictionary of log options. By default {'logfile':None,'logdir':'log','debug':True} If thisname is set, and logfile specified, then logs are logged to that file. If thisname is set to True, then logging is to stdout. datadir : A list of directories to search for data files to interpret if the SpecialVariable is set to a string. env : An environment variable that can be used to extend the datadir variable. header : A header string to use to identify pickle files. By default, this is set to "EOLDAS -- plewis -- UCL -- V0.1" simple : A flag to swicth off the 'complicated' interpretation methods, i.e. just set and return variables literally, do not try to interpret them. ''' self.set('simple',True) if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name,thistime) self.thisname = name # this is where we will put any data self.data = ParamStorage() self.name = ParamStorage() self.info = info self.datadir = datadir or ['.'] self.env = env init_read_write(self,header,readers,writers) # sort logging and log if thisname != None self.log_terms = {'logfile':None,'logdir':'log','debug':True} # override logging info for (key,value) in log_terms.iteritems(): self.log_terms[key] = value self.logger= sortlog(self,self.log_terms['logfile'],logger,name=self.thisname,\ logdir=self.log_terms['logdir'],\ debug=self.log_terms['debug']) self.simple = simple set = lambda self,this,value :ParamStorage.__setattr__(self,this,value) set.__name__ = 'set' set.__doc__ = """ A method to set the literal value of this, rather than attempt an interpretation (e.g. used when self.simple is True) """ get = lambda self,this :ParamStorage.__getattr__(self,this) get.__name__ = 'get' get.__doc__ = """ A method to get the literal value of this, rather than attempt an interpretation (e.g. used when self.simple is True) """ def __setitem__(self,this,value): ''' Variable setting method for style self['this']. Interpreted the same as via __setattr__. ''' # always set the item self.__setattr__(this,value) def __setattr__(self,this,value): ''' Variable setting method for style self.this Varies what it does depending on the type of value. The method interprets and sets the SpecialVariable value: 1. ParamStorage or SpecialVariable. The data are directly loaded. This is one of the most flexible formats for input. It expects fields 'data' and/or 'name', which are loaded into self. There will normally be a field data.this, where this is the variable name passed here. 2. A dictionary, same format as the ParamStorage. 3. A tuple, interpreted as (data,name) and loaded accordingly. 4. *string* as filename (various formats). An attempt to read the string as a file (of a set of formats) is made. If none pass then it it maintained as a string. 5. A numpy array (np.array) that is loaded into self.data.this. 6. Anything else. Loaded into self.data.this as a numpy array. ''' if self.simple: self.set(this,value) return t = type(value) try: if t == ParamStorage or t == SpecialVariable: # update the whole structure #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) self.data.update(value.data,combine=True) self.name.update(value.name,combine=True) elif t == dict: n_value = ParamStorage().from_dict(value) self.__setattr__(this,n_value) elif t == tuple or t == list: # assumed to be (data,name) or [data,name] #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) #ParamStorage.__setattr__(self['data'],this,value[0]) #ParamStorage.__setattr__(self['name'],this,value[1]) ParamStorage.__setattr__(self['data'],this,np.array(value)) elif t == str: # set the term #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) ParamStorage.__setattr__(self['data'],this,value) # interpret as a file read if possible self.process_data_string(this,info=self.info) elif t == np.ndarray: #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) ParamStorage.__setattr__(self['data'],this,value) else: ParamStorage.__setattr__(self['data'],this,\ np.array(value)) except: if self.logger: self.logger.info("Failed to set SpecialVariable %s from %s %s"\ %(this,t.__name__,value)) return if self.logger: self.logger.info("Set variable %s from type %s"%(this,t.__name__)) def __getattr__(self,name): ''' Variable getting method for style self.this If the field 'data' exists in self.__dict__ and 'name' is in the dictionary, then the field self.data.this is returned. Otherwise, if the field 'name' is in self.__dict__, self.name is returned. Otherwise return None. ''' if 'data' in self.__dict__ and name in self.data.__dict__: return self.data.__dict__.__getitem__ ( name ) elif name in self.__dict__: return self.__dict__.__getitem__ ( name ) else: return None def __getitem__(self,name): ''' Variable getting method for style self['this']. Interpreted the same as via __getattr__. ''' # first look in data if 'data' in self.__dict__ and name in self.data.__dict__: return self.data.__dict__.__getitem__ ( name ) elif name in self.__dict__: return self.__dict__.__getitem__ ( name ) else: return None def process_data_string(self,name,info=[],fmt=None): ''' Attempt to load data from a string, assuming the string is a filename. The array self.datadir is searched for readable files with the string 'name' (also self.env), and a list of potential files considered for reading. Each readable file is passed to self.read, and if it is interpretable, it is loaded according to the read method. Note tha the format can be specified. If not, then all formats are attempted until a sucessful read is made. ''' from eoldas_Lib import get_filename orig = self.data[name] if self.logger: self.logger.debug('%s is a string ... see if its a readable file ...' \ % name) # find a list of potential files goodfiles, is_error = get_filename(orig,datadir=self.datadir,\ env=self.env,multiple=True) if is_error[0] and self.logger: self.logger.debug(str(is_error[1])) return if self.logger: self.logger.debug("*** looking at potential files %s"%str(goodfiles)) # loop over all files that it might be for goodfile in goodfiles: stuff,is_error = reader(self,goodfile,name,fmt=fmt,info=info) if not is_error[0] and self.logger: self.logger.info("Read file %s "%goodfile) return if self.logger: self.logger.debug(self.error_msg) return write = lambda self,filename,fmt : writer(self,filename,None,fmt=fmt) read = lambda self,filename,fmt : reader(self,filename,None,fmt=fmt,info=[])
class State(ParamStorage): ''' A data class to represent and manipulate state vector data ''' def __init__(self,options,limits=None,bounds=None,\ datatype='x',names=None,logdir=None,writers={},\ control=None,location=None,env=None,debug=None,\ grid=True,logger=None,\ datadir=None,logfile=None,name=None,info=[],readers=[]): ''' Initialise State class Inputs: options : A ParamStorage data type. Most terms can be over-ridden via Options, but it should normally contain: options.thisname : same as name below options.names : same as names below options.datadir : same as datadir options.location : same as location options.limits : same as limits options.control : same as control options.env : an environment variable containing directory names to search for input files. options.bounds : same as bounds Options: limits : List of limits to apply when reading data. Limits atre also used to quantize locational data. Should be of the same length as `location` with each sub-list of the form [min,max,step]. The default is [0,None,1]. bounds : List of bounds to be applied to the data, or the same length as datatypes, with each sub-list of the form [min,max] datatype : The state variable data types. This will normally be `x` or `y`but may for instance be `x1` or `y2` names : List of state vector names e.g. [`lai`, `chlorophyl`] control : List of strings describing control variables e.g. [`mask`,`vza`,`sza`,`vaa`,saa`] location : List of strings describing location, e.g. [`time`,`row`,`col`] datadir : List of ditrectories to search for data in. logfile : File name to be used for logging logdir : Directory to put logfile if logfile is not an absolute pathname. name : Name to be used in logging. The default is None so there is no logging. If set to True, logs to stdout. Otherwise logs to logfile if set. header : Over ride the default header string used for pickle files. info : A list that is passed through to file readers etc readers : A list of file readers that is pre-prended to existing ones. writers : A list of file readers that is pre-prended to existing ones. grid : Flag to interpret the data on a grid This is done by interpolation, but a mask is set to 2 for such values. ''' self.reinit(options,limits=limits,bounds=bounds,datatype=datatype,\ control=control,names=names,location=location,env=env,\ logdir=logdir,writers=writers,logger=logger,\ datadir=datadir,logfile=logfile,name=name,info=info,\ readers=readers,debug=debug,grid=grid) def reinit(self,options,names=None,datatype=None,limits=None,\ bounds=None,control=None,location=None,env=None,header=None,\ logdir=None,writers={},grid=False,logger=None,\ datadir=None,logfile=None,name=None,info=[],readers=[],debug=None): ''' Method to re-initialise the class instance The setup is on the whole controlled by the datatype which contains e.g. 'x'. This is used to set up the members self.x and self.y as SpecialVariables (see SpecialVariable in eoldas_SpecialVariable.py). There are some special attributes for datatypes starting with 'y'. These are assumed to be observational data, which means that when they are read, the data names associated with them are not limited to those in self.names but rather set to whatever is read in in the data. This is because the data names for observational data may be terms such as waveband names etc that need special interpretation. Also, the default output format for observational data is different to that of other data. The elements self.state is a SpecialVariables which means that they can be assigned various data types (see SpecialVariables) and loaded accordingly (e.g. if a filename is specified, this is read in to the data structure. The SpecialVariables contain 'hidden' datasets, which here are mainly the 'control' and 'location' information. A SpecialVariable has two internal structures: `data` and `name`. The former is used to store data values (e.g. the state values) and the latter to store associated metadata. For example, `control` is passed here e.g. as [`mask`,`vza`] and this gives the metadata that are stored in `name`. The actual values of the control data are stored in the `data` section. For location, we might be passed [`time`,`row`,`col`], so this is set in names.location, and the data.location contains the values of the location at each of these elements. For the actual state dataset, this is stored according to its name, so for `x` the values are stored in data.x and the associated data names in name.x. State datasets must represent at least the mean and standard deviation of a state for them to be of value in EOLDAS. TThe mean is accessed as e.g. self.state for the state dataset. The sd is accessed can be accessed as self._state.sd if it has been set. This reference can also be used to directly set data associated with a SpecialVariable, e.g. self.Data.control = np.zeros([2,3]) to represent 2 samples with 3 control variables. You can access name information similarly with print self.Name.control but this will generate a KeyError if the term has not been set. You can check it exists with: key = 'control' if key in self.Name: this = (self.Data[key],self.Name[key]) To get e.g. a dictionary representation of a SpecialVariable you can use eg: self.Name.to_dict() to get the name dictionary, or thisdict = self._state.to_dict() to get the full representation, which then contains 'data' and 'name' as well as some other information stored in the SpecialVariable. You can similarly load them using e.g. self.Data.update( ParamStorage().from_dict(thisdict['data']) combine=True) ''' # set up a fakes dictionary from the data types self.set('datatype', datatype) self.set('fakes', {'state': '_state'}) # first check that options is sensible self.__check_type(options, ParamStorage, fatal=True) self.options = options from eoldas_Lib import set_default_limits,\ check_limits_valid,quantize_location, sortopt nSpecial = 1 if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name, thistime) self.thisname = name self.options.thisname = str(name).replace(' ', '_') log_terms = {\ 'logfile':logfile or sortopt(self.options,'logfile',None),\ 'logdir':logdir or sortopt(self.options,'logdir',None),\ 'debug' : debug or sortopt(self.options,'debug',True)} self.datadir = datadir or sortopt(self.options, 'datadir', ["."]) self.header = header or "EOLDAS pickle V1.0 - plewis" env = env or sortopt(self.options, 'env', None) names = names or sortopt(self.options, 'names', None) location = location or sortopt(self.options, 'location', ['time']) control = control or sortopt(self.options, 'control', []) limits = limits or sortopt(self.options,'limits',\ set_default_limits(np.array(location))) limits = limits or self.options.limits limits = np.array(check_limits_valid(limits)) bounds = bounds or sortopt(self.options,'bounds',\ [[None,None]] * xlen(names)) self.options.bounds = bounds self.headers = {'PARAMETERS-V2':"PARAMETERS-V2", \ 'PARAMETERS':"PARAMETERS", \ 'BRDF-UCL':'BRDF-UCL',\ 'BRDF': 'BRDF'} self.headers_2 = {'BRDF-UCL': 'location'} # The ones pre-loaded are # self.read_functions = [self.read_pickle,self.read_numpy_fromfile] self._state = SpecialVariable(info=info,name=self.thisname,\ readers=readers,datadir=self.datadir,\ env=env,writers=writers,\ header=self.header,\ logger=logger,log_terms=log_terms,\ simple=False) # self._state is where data are read into # but self.Data and self.Name are where we access them from self.grid = grid # this is so we can access this object from # inside a SpecialVariable self.state = np.array([0.]) # a default data fmt output if datatype[0] == 'y': self.Name.fmt = 'BRDF' self.Name.state = np.array(['dummy']) else: self.Name.fmt = 'PARAMETERS' n_params = xlen(names) if not n_params: error_msg = \ "The field 'names' must be defined in options or"+ \ "passed directly to this method if you have the data type x" raise Exception(error_msg) self.Name.state = np.array(names) self.Name.location = np.array(location) self.Name.control = np.array(control) self.Name.header = self.header self.Name.bounds = np.array(bounds) self.Name.qlocation = np.array(limits) self.Name.datadir = datadir # # sort this object's name # sort logging self.logger = sortlog(self, log_terms['logfile'], logger, name=self.thisname, logdir=log_terms['logdir'], debug=log_terms['debug']) self.logger.info('Initialising %s' % type(self).__name__) get = lambda self, this: ParamStorage.__getattr__(self, this) get.__name__ = 'get' get.__doc__ = ''' An alternative interface to get the value of a class member that by-passes any more complex mechanisms. This returns the 'true' value of a class member, as opposed to an interpreted value. ''' set = lambda self, this, that: ParamStorage.__setattr__(self, this, that) set.__name__ = 'set' set.__doc__ = ''' An alternative interface to set the value of a class member that by-passes any more complex mechanisms. This sets the 'true' value of a class member, as opposed to an interpreted value. ''' def __set_if_unset(self, name, value): ''' A utility to check if the requested attribute is not currently set, and to set it if so. ''' if name in self.fakes.keys(): fname = self.fakes[name] if not fname in self.__dict__: ParamStorage.__setattr__(self, fname, value) return True else: if not name in self.__dict__: ParamStorage.__setattr__(self, name, value) return True return False def __getattr__(self, name): ''' get attribute, e.g. return self.state ''' return self.__getitem__(name) def __setattr__(self, name, value): ''' set attribute, e.g. self.state = 3 ''' if not self.__set_if_unset(name, value): self.__setitem__(name, value, nocheck=True) def __getitem__(self, name): ''' get item for class, e.g. x = self['state'] ''' if name in ['Data', 'Name']: return self._state[name.lower()] elif name in ['Control', 'Location']: return self._state[name.lower()] elif name == 'state': return SpecialVariable.__getattr__(self._state, name) #return super( State, self ).__getattr__(name ) else: return self.__dict__.__getitem__(name) def __setitem__(self, name, value, nocheck=False): ''' set item for class e.g. self['state'] = 3 ''' if nocheck or not self.__set_if_unset(name, value): if name in ['Data', 'Name']: self._state[name.lower()] = value elif name in ['Control', 'Location']: self._state[name.lower()] = value elif name == 'state': # NB 'self' during the __setitem__ call # will be self._state as far as we are # concerned here try: self._state.name.datatype = self.datatype except: pass SpecialVariable.__setattr__(self._state, name, value) #super( State, self ).__setattr__(name,value) # apply any overrides from options self.apply_defaults(self._state, self.options) if self.grid: if 'location' in self.Name.dict().keys(): #pdb.set_trace() self.regrid() else: ParamStorage.__setattr__(self, name, value) def apply_defaults(self, this, options): datatypes = np.array(options.datatypes).flatten() for i in datatypes: try: this.data.sd = np.array([float(v) for v in options.sd]) n_samples = this.data.state.shape[0] if (np.array(options.sd)).size == 1: this.data.sd = this.data.state * 0. + np.array( options.sd)[0] elif this.data.sd.size != this.data.state.size: this.data.sd = np.tile(np.array(options.sd),n_samples).\ reshape(this.data.state.shape) except: pass try: default = options[i].default options[i].sddefault = np.array(options.sd).flatten() for jj in xrange(this.data.state.shape[1]): ww = np.where(np.array([np.isnan(i) for \ i in this.data.state[:,jj].astype(float)])) this.data.state[ww, jj] = default[jj] except: pass def ungrid(self, state, sd): ''' Utility to take a gridded dataset that has been gridded using self.regrid() and ungrid it. The ungridding is applied to self.x.state and self.x.sd Locational information is formed in self.Data.ungridder self.Names.ungridder returns: (locations,qlocations,state,sd) where: state : state array sd : sd array location: location qlocation:quantised location data ''' try: qlocations = self.ungridder.qlocation locations = self.ungridder.location nloc = self.ungridder.nloc except: raise Exception("You are trying to ungrid a dataset that wasn't gridded using State.regrid()" +\ " so the ungridder information is not available. Either load the data using State.grid " +\ " or set it up some other way or avoid calling this method with this type of data") h0 = [qlocations[..., 0].flatten()] for i in xrange(1, nloc): h1 = qlocations[..., i].flatten() h0.append(h1) qlocations = np.array(h0).T h0 = [locations[..., 0].flatten()] for i in xrange(1, nloc): h1 = locations[..., i].flatten() h0.append(h1) locations = np.array(h0).T h0 = [state[..., 0].flatten()] for i in xrange(1, state.shape[-1]): h1 = state[..., i].flatten() h0.append(h1) state = np.array(h0).T h0 = [sd[..., 0].flatten()] for i in xrange(1, sd.shape[-1]): h1 = sd[..., i].flatten() h0.append(h1) sd = np.array(h0).T return locations, qlocations, state, sd def regrid(self): ''' Utility to regrid non-gridded state (& associated) data If no data are specified (we can see this because self.Data.qlocation doesnt exist) then a default grid is generated. Outputs: self.Name.gridder self.Data.gridder containing: grid : state vector grid (offset in name) sdgrid : state vector sd grid (offset in name) ngrid : n samples from ip data for grids wheregrid : where grid points are data points ''' try: qlocation = self.Data.qlocation has_data = True self.logger.info("Looking at loading a grid") except: nd = len(self.Name.qlocation) qlocation = np.zeros((2, nd)) for i in xrange(nd): qlocation[:, i] = self.Name.qlocation[0][0:2] # no data defined, so set up a default grid has_data = False self.logger.info("No input data given: using default grid") if type(self.Data.state) == str: # somehow the magic reader hasnt worked raise Exception("Datafile %s hasn't been interpreted as state data"\ %self.Data.state) if np.array(self.Data.state).size == 1 and np.array( self.Data.state) == np.array(None): has_data = False datatype = self.options.datatype if datatype == 'y': self.logger.info("Not loading grid as datatype is y") return try: if not self.options[datatype].apply_grid: self.logger.info("Not loading grid as apply_grid not set") return except: return try: default = self.options[datatype].default except: default = np.zeros_like(self.Name.state).astype(float) try: sddefault = self.options[datatype].sddefault except: sddefault = list(np.array(default) * 0.) self.ungridder = ParamStorage() nd = qlocation.shape[1] limits = self.Name.qlocation nparams = len(default) x = [] minx = [] stepx = [] # number of location dimensions == nd for i in xrange(nd): xmin = qlocation[:, i].min() xmax = qlocation[:, i].max() xstep = 1 lim = limits[i] if lim[0] != None: xmin = lim[0] if lim[1] != None: xmax = lim[1] if lim[2] != None: xstep = lim[2] # LEWIS: 20 June 2012 ERROR if xstep used # x.append((xmax-xmin + 1)/xstep) x.append((xmax - xmin) / xstep + 1) minx.append(xmin) stepx.append(xstep) self.Name.qlocation_min = minx self.Name.qlocation_step = stepx # x contains the number of desired samples # in each dimension x.append(nparams) minx.append(0) stepx.append(0) # now loop over all observations and place in grid ntot = np.array(x).prod() grid = np.zeros(ntot).reshape(tuple(x)) sdgrid = np.zeros(ntot).reshape(tuple(x)) ngrid = np.zeros(ntot / x[-1], dtype=int).reshape(tuple(x[:nd])) # now fill the grid all = ':,' for i in xrange(1, nd): all = '%s:,' % all for i in xrange(nparams): if i >= len(default): self.logger.error("Incorrect length for default") if i >= len(sddefault): self.logger.error("Incorrect length for sd") exec('grid[%s%d] = default[%d]' % (all, i, i)) exec('sdgrid[%s%d] = sddefault[%d]' % (all, i, i)) if has_data: for i in xrange(qlocation.shape[0]): loc = tuple(qlocation[i, :]) #-minx[:nd]) ngrid[loc] += 1 thisdata = self.Data.state[i] if ngrid[loc] == 1: grid[loc][:] = thisdata else: grid[loc][:] += thisdata # LEWIS 20 June 2012 if self.Name.datatype == 'x': # take mean for loc in np.where(ngrid > 1)[0]: grid[loc][:] = grid[loc][:] / float(ngrid[loc]) wheregrid = np.where(ngrid > 0) self.Name.gridder = ParamStorage() self.Name.gridder.nd = len(self.Name.qlocation) self.Data.gridder = ParamStorage() self.Data.gridder.grid = grid self.Data.gridder.ngrid = ngrid self.Data.gridder.sdgrid = sdgrid self.Data.gridder.wheregrid = wheregrid self.Name.gridder.grid = minx self.Name.gridder.sdgrid = minx self.Name.gridder.stepx = stepx self.Name.gridder.wheregrid = wheregrid[0].size self.Data.gridder.ngrid = x # now form the information needed for ungridding state = grid nloc = len(state.shape) - 1 ss = np.array(state.shape) ss[-1] = nloc ss = tuple(ss) qlocation_min = minx qlocation_step = stepx locations = np.zeros(ss) qlocations = np.zeros(ss, dtype=int) for i in xrange(nloc): aa = np.zeros(nloc, dtype=object) for jj in xrange(ss[i]): if i == 0: qlocations[jj, ..., i] = jj locations[jj, ..., i] = jj * stepx[i] + qlocation_min[i] elif i == 1: qlocations[:, jj, ..., i] = jj locations[:, jj, ..., i] = jj * stepx[i] + qlocation_min[i] elif i == 2: qlocations[:, :, jj, ..., i] = jj locations[:, :, jj, ..., i] = jj * stepx[i] + qlocation_min[i] elif i == 3: qlocations[:, :, :, jj, ..., i] = jj locations[:, :, :, jj, ..., i] = jj * stepx[i] + qlocation_min[i] elif i == 4: qlocations[:, :, :, :, jj, ..., i] = jj locations[:, :, :, :, jj, ..., i] = jj * stepx[i] + qlocation_min[i] else: raise Exception('How many dimensions in your dataset ??? > 4 ??? thats ridiculous'+\ " ... I can't write that ") self.ungridder.qlocation = qlocations self.ungridder.location = locations self.ungridder.nloc = nloc def tester(self): ''' Run a test using all methods to check competence ''' print '== Data type:', self.datatype print '===============' print " name: state" print " fmt: %s:" % self.Name.fmt state = self.state print " n_samples: %d" % xlen(state) print "data:" if xlen(state): print state else: print 'not set' # look for items that appear in both name and data name = self.Name data = self.Data for i in name.dict(): if i != 'state' and i in data.dict(): print "Sub Dataset: %s" % i print "----------------" print name[i] print "----------------" print data[i] def get_dimension_span(self): ''' Return the number of unique samples in each dimension ''' retval = {} locations = self.Data.location for i in xrange(self.Name.location): n = xlen(np.unique(data[i])) if n == 1: n = 0 retval[i] = n return retval def __check_type(self, this, thistype, fatal=False, where=None): if type(thistype) != type: thistype = type(thistype) if type(this) != thistype: self.error = True self.error_msg = "Unexpected type %s to variable " % \ type(this).__name__ if where != None: self.error_msg = self.error_msg + " " + str(where) self.error_msg = self.error_msg + ": should be type %s" \ % thistype if self.logger: self.logger.error("Error %s: " % str(self.error), self.error_msg) if fatal: raise Exception(self.error_msg) return not self.error self.error = False return not self.error def __name_guess_suffix(self): ''' If no name is assigned, we get the default name from self.getname() and add this string on the back of it. Here, it is defined as a time string for now. ''' import time # need to make one up return str(time.time()) def __logger(self, x): if self.log: print x def getname(self): ''' Return the name associated with this class ("eoldas") or self.options.thisname ''' # must be better way of getting this ... leave for later if not self.__pcheck(self.options, "thisname"): return type(self).__name__ else: return self.options.thisname __pcheck = lambda self, this, name: this.dict().has_key(name) def startlog(self, log_terms, name=None): ''' Start the logger. This is called on initialisation and you shouldn't normally need to access it. ''' import logging from eoldas_Lib import set_up_logfile try: self.logger.shutdown() except: self.logger = ParamStorage() logfile = log_terms['logfile'] or self.options.logfile logdir = log_terms['logdir'] or self.options.logdir name = name or self.options.thisname self.logger = set_up_logfile(logfile,\ name=name,logdir=logdir) def write(self, filename, dataset, fmt='pickle'): ''' A state data write method ''' writer(self, filename, dataset, fmt=fmt)