def __init__(self,info=[],name=None,thisname=None,readers=[],log_terms={},\ datadir=["."],env=None,\ header=None,writers={}, simple=False,logger=None): ''' Class SpecialVariable initialisation. Sets up the class as a ParamStorage and calls self.init() See init() fort a fuller descripotion of the options. ''' ParamStorage.__init__(self,logger=logger) name = name or thisname if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name,thistime) self.thisname = name self.init(info=[],name=self.thisname,readers=readers,log_terms={},\ datadir=datadir,env=env,\ header=header,writers=writers,\ simple=False,logger=logger)
def __setitem__(self,name,value,nocheck=False): ''' set item for class e.g. self['state'] = 3 ''' if nocheck or not self.__set_if_unset(name,value): if name in ['Data','Name']: self._state[name.lower()] = value elif name in ['Control','Location']: self._state[name.lower()] = value elif name == 'state': # NB 'self' during the __setitem__ call # will be self._state as far as we are # concerned here try: self._state.name.datatype = self.datatype except: pass SpecialVariable.__setattr__(self._state,name,value) #super( State, self ).__setattr__(name,value) # apply any overrides from options self.apply_defaults(self._state,self.options) if self.grid: if 'location' in self.Name.dict().keys(): #pdb.set_trace() self.regrid() else: ParamStorage.__setattr__(self,name,value)
def __setitem__(self, name, value, nocheck=False): ''' set item for class e.g. self['state'] = 3 ''' if nocheck or not self.__set_if_unset(name, value): if name in ['Data', 'Name']: self._state[name.lower()] = value elif name in ['Control', 'Location']: self._state[name.lower()] = value elif name == 'state': # NB 'self' during the __setitem__ call # will be self._state as far as we are # concerned here try: self._state.name.datatype = self.datatype except: pass SpecialVariable.__setattr__(self._state, name, value) #super( State, self ).__setattr__(name,value) # apply any overrides from options self.apply_defaults(self._state, self.options) if self.grid: if 'location' in self.Name.dict().keys(): #pdb.set_trace() self.regrid() else: ParamStorage.__setattr__(self, name, value)
def sortlog(self,logfile,logger,name="eoldas",logdir=None,debug=True ): ''' A safe interface to logging for passing log information between lots of classes ''' import logging import time from eoldas_Lib import set_up_logfile,dummyprint if type(self).__name__ == 'SpecialVariable' and logger != None: return logger try: if 'logger' in self.dict(): return self.logger except: pass try: if 'logger' in self.keys(): return self.logger except: pass try: if type(self).__name__ != 'SpecialVariable': this = self.logger return self.logger except: pass if logdir == None: logdir = '.' if name == None: name = type(self).__name__ + '.' + str(time.time()) if logger: if type(self).__name__ == 'SpecialVariable': return logger logger.info('Setting up logger for %s'%name) logger = logging.getLogger(name) logger.info('Set up') return logger if logfile == None or name == None: logger = ParamStorage() logger.info = lambda x:dummyprint( "Info: %s"%x) logger.debug = lambda x:dummyprint( "Debug: %s"%x) logger.error = lambda x:dummyprint( "Error: %s"%x) return logger logger = set_up_logfile(logfile,name=name,logdir=logdir,debug=debug) return logger
def __set_if_unset(self,name,value): ''' A utility to check if the requested attribute is not currently set, and to set it if so. ''' if name in self.fakes: fname = self.fakes[name] if not fname in self.__dict__: ParamStorage.__setattr__(self,fname,value) return True else: if not name in self.__dict__: ParamStorage.__setattr__(self,name,value) return True return False
def __setitem__(self,name,value,nocheck=False): ''' set item for class e.g. self['state'] = 3 ''' if nocheck or not self.__set_if_unset(name,value): if name in ['Data','Name']: self._state[name.lower()] = value elif name in ['Control','Location']: self._state[name.lower()] = value elif name in self.fakes: this = self.get(self.fakes[name]) SpecialVariable.__setattr__(this,name,value) else: this = self.get(name) ParamStorage.__setattr__(self,name,value)
def __init__(self,argv,name='eoldas',logger=None): from eoldas.eoldas_Lib import sortopt, sortlog argv = argv or sys.argv here = os.getcwd() self.thisname = name Parser.__init__(self,argv,name=self.thisname,logger=logger,\ general=None,outdir=".",getopdir=False,parse=True) os.chdir(here) if not hasattr(self,'configs'): self.logger.error('No configration file specfied') help(eoldas) return self.thisname = name solver = eoldas_Solver(self,logger=self.logger,name=self.thisname+'.solver') self.general = sortopt(self.root[0],'general',ParamStorage()) self.general.write_results = sortopt(self.general,'write_results',True) self.general.calc_posterior_unc = sortopt(self.general,'calc_posterior_unc',False) self.general.passer = sortopt(self.general,'passer',False) self.solver = solver self.logger.info('testing full cost functions') for i in xrange(len(solver.confs.infos)): self.logger.info('%d/%d ...'%(i+1,len(solver.confs.infos))) # try an initial solver.prep(i) J = solver.cost(None) J_prime = solver.cost_df(None) self.logger.info('done') # give the user some info on where the log file is # in case theyve forgotten print 'logging to',self.general.logfile
def __unload(self,options): from eoldas_ConfFile import array_type_convert this = ParamStorage() this.general = ParamStorage() for (k,v) in options.iteritems(): ps = this that = k.split('.') if len(that) == 1: ps = this.general else: for i in xrange(len(that)-1): if not hasattr(ps,that[i]): ps[that[i]] = ParamStorage() ps = ps[that[i]] # set the value v which needs to # to be interpreted ps[that[-1]] = array_type_convert(self.top,v) return this
def __unload(self, options): from eoldas_ConfFile import array_type_convert this = ParamStorage() this.general = ParamStorage() for (k, v) in options.iteritems(): ps = this that = k.split('.') if len(that) == 1: ps = this.general else: for i in xrange(len(that) - 1): if not hasattr(ps, that[i]): ps[that[i]] = ParamStorage() ps = ps[that[i]] # set the value v which needs to # to be interpreted ps[that[-1]] = array_type_convert(self.top, v) return this
def read_numpy_fromfile(self,goodfile,dataset,info=None): ''' Utility to try to read a file goodfile This simple version of the utility just tries a numpy.fromfile It is not generally recommended to use this first as it will skip headers etc that may contain information However, this is a good illustration of the required interface format for file readers. Inputs: goodfile : a filename (ideally one that exists) dataset : dataset name (e.g. x_state) info : a list of other information Outputs: retval : ParamStorage error : tuple where: retval contains dataset 'this' in data['this'] and other information (e.g. locations etc) in retval.data and retval.names ''' #self.store_header = goodfile.open().readline().close() this = np.genfromtxt(goodfile,skip_header=1) l = len(np.atleast_1d(this).shape) if this.size > 0: retval = ParamStorage() retval.name = ParamStorage() retval.data = ParamStorage() retval.data[dataset] = this retval.name.filename = goodfile retval.name.fmt = 'np.genfromtxt' return retval,(False,"") error = True error_msg = "Failed numpy read of %s"%goodfile return 0,(error,error_msg)
def safesplit(text, character): ''' A function to split a string, taking account of [] and () and quotes ''' lent = len(text) intoken = ParamStorage() qtoken = ParamStorage() for i in ["'",'"']: qtoken[i] = False for i in ["()","[]","{}"]: intoken[i] = 0 start = 0 lst = [] i = 0 while i < lent: # are any of intoken, qtoken open isopen = False for (j,k) in intoken.iteritems(): if text[i] == j[0]: intoken[j] += 1 elif text[i] == j[1]: intoken[j] -= 1 isopen = isopen or (intoken[j]!=0) for (j,k) in qtoken.iteritems(): if text[i] == j[0]: qtoken[j] = not qtoken[j] isopen = isopen or qtoken[j] if text[i] == character and not isopen: lst.append(text[start:i]) start = i+1 elif text[i] == '\\': i += 2 continue i += 1 lst.append(text[start:]) return lst
def __init__ ( self, nbands, nbands_max, npt, bandwith, obscovar, \ location, whichfile, doys, qa, theta_v, theta_i, \ phi_v, phi_i, isobs, params_x, \ obs=0.0, bandpass_library=False ) : # Define a configuration container. Makes everything look like Java # Containers can then be useful for quickly listing all variables :) self.config = ParamStorage () self.config.spectral = ParamStorage () self.config.rt_model = ParamStorage () self.observations = ParamStorage () self.config.rt_model.nparams = rt_getnparams () self.config.npt = npt # Number of points self.config.nv = 1 # Always set to 1. No questions asked self._setup_spectral_config ( nbands, nbands_max, bandwith ) self._setup_rt_model () self._setup_bandpass_funcs ( bandwidth, bandpass_library ) self._setup_geometry ( theta_v, phi_v, theta_i, phi_i ) self.params_x = np.zeros (( self.config.npt, \ self.config.rt_model.nparams ) ) self.observations.brf = np.zeros([self.config.npt, \ self.config.spectral.nbands_max] ) self.observations.obs = np.zeros([self.config.npt, \ self.config.spectral.nbands_max] ) # obs will be set to zero if not loaded self.observations.obs[:,:] = obs # setting self.brf_ad to 1 means that we calculate the model derivative # by default self.brf_ad = np.ones([self.config.npt, \ self.config.spectral.nbands_max] ) self._set_minmax ( ) self._set_x( x )
def startlog(self, log_terms, name=None): ''' Start the logger. This is called on initialisation and you shouldn't normally need to access it. ''' import logging from eoldas_Lib import set_up_logfile try: self.logger.shutdown() except: self.logger = ParamStorage() logfile = log_terms['logfile'] or self.options.logfile logdir = log_terms['logdir'] or self.options.logdir name = name or self.options.thisname self.logger = set_up_logfile(logfile,\ name=name,logdir=logdir)
def setup_rt_model(self): """ This sets up the RT model (and adjoint if available) by calling any preparation methods. """ if not 'linear' in self.dict(): self.linear = ParamStorage() if 'y' in self.dict(): self.linear.H = np.zeros(self.y.state.shape) else: self.linear.H = np.zeros(self.x.state.shape) self.nv = 1 self.npt = len(self.y.state) self.linear.J_prime = np.zeros(self.x.state.shape) if not self.rt_model.use_median: self.bandIndex = self.y_meta.spectral.all_bands else: self.bandIndex = self.y_meta.spectral.median_bands self.linear.brf_ad = np.ones((self.npt, len(self.bandIndex))) self.rt_library.rt_modelpre(np.array(self.bandIndex) + 1) self.rt_library.rt_modeldpre(self.npt) self.x_orig = self.x.state.copy() if self.rt_model.use_median: bands_to_use = self.y_meta.spectral.median_bands_to_use bandpass_library = self.y_meta.spectral.median_bandpass_library index = self.y_meta.spectral.median_bandpass_index else: bands_to_use = self.y_meta.spectral.bands_to_use bandpass_library = self.y_meta.spectral.bandpass_library index = self.y_meta.spectral.bandpass_index self.y_meta.spectral.bands_to_use = \ np.zeros((len(bands_to_use),len(self.bandIndex))) for (i, bandname) in enumerate(self.y_meta.spectral.bandnames): fullb = bandpass_library[bandname] this = fullb[index[bandname]] this = this / this.sum() ww = np.where(np.in1d(self.bandIndex, index[bandname]))[0] self.y_meta.spectral.bands_to_use[i, ww] = this
def read_numpy_fromfile(self, goodfile, dataset, info=None): ''' Utility to try to read a file goodfile This simple version of the utility just tries a numpy.fromfile It is not generally recommended to use this first as it will skip headers etc that may contain information However, this is a good illustration of the required interface format for file readers. Inputs: goodfile : a filename (ideally one that exists) dataset : dataset name (e.g. x_state) info : a list of other information Outputs: retval : ParamStorage error : tuple where: retval contains dataset 'this' in data['this'] and other information (e.g. locations etc) in retval.data and retval.names ''' #self.store_header = goodfile.open().readline().close() this = np.genfromtxt(goodfile, skip_header=1) l = len(np.atleast_1d(this).shape) if this.size > 0: retval = ParamStorage() retval.name = ParamStorage() retval.data = ParamStorage() retval.data[dataset] = this retval.name.filename = goodfile retval.name.fmt = 'np.genfromtxt' return retval, (False, "") error = True error_msg = "Failed numpy read of %s" % goodfile return 0, (error, error_msg)
def demonstration(conf='default.conf'): ''' Need to develop a new demo ''' from eoldas_Lib import eoldas_setup from os import getcwd from eoldas_ParamStorage import ParamStorage options = ParamStorage() options.here = getcwd() options.logdir = 'logs' options.logfile = "logfile.log" options.datadir = ['.','~/.eoldas'] #self = eoldas_setup('default.conf',options) #return self return True
def safesplit(text, character): ''' A function to split a string, taking account of [] and () and quotes ''' lent = len(text) intoken = ParamStorage() qtoken = ParamStorage() for i in ["'", '"']: qtoken[i] = False for i in ["()", "[]", "{}"]: intoken[i] = 0 start = 0 lst = [] i = 0 while i < lent: # are any of intoken, qtoken open isopen = False for (j, k) in intoken.iteritems(): if text[i] == j[0]: intoken[j] += 1 elif text[i] == j[1]: intoken[j] -= 1 isopen = isopen or (intoken[j] != 0) for (j, k) in qtoken.iteritems(): if text[i] == j[0]: qtoken[j] = not qtoken[j] isopen = isopen or qtoken[j] if text[i] == character and not isopen: lst.append(text[start:i]) start = i + 1 elif text[i] == '\\': i += 2 continue i += 1 lst.append(text[start:]) return lst
class eoldas_setup(object): """ This is redundant """ def __init__(self,datafile,options): self.options = options # sort logging self.options.general.logdir \ = self.__getopt(self.options.general,'logdir',"logs") self.options.general.logfile \ = self.__getopt(self.options.general,'logfile',"logfile.log") # 1.- set up logging for this particular run self.logger = set_up_logfile(self.options.general.logfile,\ name="eoldas_setup",logdir=self.options.general.logdir) try: self.setup() except: self.logger.error("Unable to access critical elements of the options. See help(eoldas_setup.setup) for details") sys.exit(-1) # read conf file(s) self.configfile = data_file #config = ConfFile(self.configfile,dirs=dirs,log_name=self.logger) if len(config.infos) == 0: self.fail = True return # not sure what to do if multiple config files??? # just take the first one at the moment self.config = config.infos[0] # update with cmd line options self.config.update(self.options,combine=True) self.logger.info("Model sd scaling by %f over that defined in the config file" % self.config.general.model_sd) self.ok = self.process_config_file() def setup(self): ''' Access and set up critical elements of the problem. This includes: The existence of: options.parameter.names (a list) ''' options = self.options options.parameter.n_params = len(options.parameter.names) self.options = options def __getopt(self,options,key,value): if not hasattr(options,key): options[key] = value return options[key] def __pcheck(self,thisdict,name): ''' Check that name exists in thisdict ''' try: if name in thisdict.dict(): return True else: return False except: return False def __min(self,a,b): ''' Min utility for 2 numbers, ignoring None ''' if a == None: out = b elif b == None: out = a else: out = np.min([a,b]) if out == None: return 0 else: return out # the next critical thing is some observations obs = load_brdf_file (brf,self.config,bandpass_names={}) if obs == False: return False self.config.operator.obs.update(obs,combine=True) # sets up an initial version of x_init # which is in the observation 'space' (ie one per obs) for n_par in xrange ( self.config.params.n_params ): #self.default_vals[n_par] = prior_mean[n_par] if np.all( self.obs.x_init[ :, n_par] == 0 ): # No self.obs.x_init [ :, n_par ] = self.default_vals [ n_par ] # try brfinit_files # which can overwrite x_init try: if self.options.preload != []: brfinit_files = self.options.preload self.brfinit_files['override'] = brfinit_files except: if self.options.preload != []: self.brfinit_files = ParamStorage () self.brfinit_files['override'] = self.options.preload # this is a hack to get the same structure self.brfinit_files = self.brfinit_files.dict() thisdoys = None if self.brfinit_files is not None: # this is not consistent with having multiple files # and is a bit of a mess for key in self.brfinit_files.keys(): if type(self.brfinit_files[key]) == type([]): initfile = self.brfinit_files[key][0] else: initfile = self.brfinit_files[key] #(acovar, abandwidth, abands, anpt, anbands_max, alocation, \ # awhichfile, anbands, adoys, aqa, atheta_v, atheta_i,aphi_v, \ # aphi_i, aisobs, aobs, aobscovar, aparams_x) = \ # load_brdf_file(initfile) (thisdoys,thisparams) = self.read_parameters(initfile,confdir=confdir) # if fail, thisdoys is None #self.obs.x_init[:,:] = aparams_x[:,:] if thisdoys == None: self.brfinit_files = None # For convenience, we can invert the observation covariance matrices self.obs.obsinvcovar = [] self.obs.real_obsinvcovar = [] for sample_no in xrange( self.obs.npt ): temp_mtx = np.matrix( self.obs.obscovar[ sample_no ] ).I if self.config.params.scale_cost: self.logger.info ("Scaling obs by %f" % \ float(self.obs.npt*self.obs.nbands[0] ) ) self.obs.obsinvcovar.append ( \ temp_mtx/float((self.obs.npt*self.obs.nbands[sample_no] ))) else: self.obs.obsinvcovar.append( temp_mtx ) self.obs.real_obsinvcovar.append (temp_mtx) # if there is anything non zero in x_init, set params_x to that if self.obs.x_init.sum() > 0: self.params_x = self.obs.x_init.copy() else: self.params_x = np.zeros ((self.obs.npt, \ self.config.params.n_params)) # determine which params to fix, based primarily on solve_for flags fix_params = define_fixparams(self.parameters, \ solve_for=self.solve_for,prior_sd=self.prior_sd,model_unc_cfg=self.model_unc_cfg) self.config.params.n_model_params = np.sum(fix_params==3) + np.sum(fix_params==4) # set up the grid based on the span of unique doys self.unique_doys, self.quantised_doys, self.obs_shift = quantise_time ( self.obs.doys, \ self.time_quant ,grid=grid) self.grid_n_obs = self.unique_doys.shape[0] self.fix_params = np.tile(fix_params, self.grid_n_obs).reshape((self.grid_n_obs,self.config.params.n_params)) self.logger.info ("%d days, %d quantised days" % ( len(self.unique_doys), \ len(self.quantised_doys) ) ) self.grid_n_params = fix_params.shape[0] # set up a grid model representation from self.params_x # we will use then when loading # self.params_x is a full representation in obs space # so we expand it to the model grid space self.store_params = self.get_x(self.params_x,self.fix_params*0.) # but this may contain zeros if a parameter has not been defined so should be set to the default value # or maybe interpolations is better udoys = np.unique(self.obs.doys) try: where_udoys = np.in1d(self.unique_doys,udoys) except: where_udoys = np.zeros_like(self.unique_doys).astype(np.bool) for i in udoys: w = np.where(self.unique_doys == i) where_udoys[w] = True for i in xrange(self.grid_n_params): self.store_params[:,i] = np.interp(self.unique_doys,self.unique_doys[where_udoys],self.store_params[where_udoys,i]) # override this with data from brfinit_files if self.brfinit_files is not None: # zeroth ... # pull out elements of thisdoys that appear in self.unique_doys # first interpolate thisparams onto the grid store_params = self.store_params*0. new_thisdoys = np.zeros( self.store_params.shape[0]).astype(np.int) # loop over thisdoys and load where appropriate for (i,j) in enumerate(thisdoys): ww = np.where(j == self.unique_doys) store_params[ww,:] = thisparams[i,:] new_thisdoys[ww] = j thisdoys = new_thisdoys udoys = np.unique(thisdoys) try: where_udoys = np.in1d(thisdoys,udoys) except: where_udoys = np.zeros_like(thisdoys).astype(np.bool) for i in udoys: w = np.where(where_udoys == i) where_udoys[w] = True for i in xrange(self.grid_n_params): self.store_params[:,i] = np.interp(self.unique_doys,self.unique_doys[where_udoys],store_params[where_udoys,i]) # deal with model uncert self.model_unc = np.ones((self.fix_params.shape[1])) for ( i, k ) in enumerate ( self.parameters ): if self.model_unc_cfg [ k ] > 0: self.model_unc[i] = self.model_unc[i] * self.model_unc_cfg [ k ] self.prior_m = np.array([self.prior_mean[k] for k in self.parameters ]) self.prior_std = np.array([self.prior_sd[k] for k in self.parameters ]) return #( prior_mean, prior_sd, model_unc, abs_tol, scale_cost) def get_x( self, x_obs, x_model, summer=False): """ return an x_model representation which has parameter values for the complete model grid. The array x_obs has a representation of the parameter values only for observation points, whereas x_model is typically defined over the whole assimilation period/region. When loading parameters in this way (from observation space to model space, only the parameter associated with the first observation at a particular point is taken (summer=False) When loading derivatives (e.g. when using the adjoint) we need to sum over all observation grid points (summer=True) Parameters ----------- x_obs : array-like The state vector representation that corresponds to the observations x_model : array-like The state vector representation that corresponds to the assimilation interval. """ if summer == False: for i in np.unique(self.obs_shift).astype(np.int): w = np.where(self.obs_shift == i)[0][0] x_model[i,:] = x_obs[w,:] else: x_model[:,:] = 0. for i in np.unique(self.obs_shift).astype(np.int): w = np.where(self.obs_shift == i)[0] for j in w: x_model[i,:] = x_model[i,:] + x_obs[j,:] return x_model def write_parameters(self,filename,params,ofmt='ASCII'): """ Write the parameters out to filename """ if ofmt == 'ASCII': self.logger.info ( "Saving parameters to %s" % filename) fp = open(filename,'w') fp.write("# PARAMETERS %s\n" % "".join ( [ "%s " % i for i in self.parameters])) for i in xrange(self.grid_n_obs): fp.write("%f %s\n" % (self.unique_doys[i],"".join ( [ "%s " % j for j in params[i,:]]))) fp.close()
def read_single_conf_file(self, conf_files, options=None): """ Purpose: parse the information from conf_files into a ConfigParser class instance and return this. Parameters: conf_files : list of one or more config files Options: options=None : pass an options structure through Uses: self.datadir=['.',,'~/.eoldas'] : list of directories to look for config files self.env=None : name of an environment variable where config files can be searched for if not found in datadir (or absolute path name not given) self.fatal=False : flag to state whether the call should fail if a requested config file is not found. Returns: tuple : (config, config_error) where: config : ConfigParser class instance or False if an error occurs config_error : string giving information on error """ import ConfigParser from eoldas_Lib import get_filename # Instantiate a parser config = ConfigParser.ConfigParser() # Read the config files. If it doesn't exist, raise exception. # if type(conf_files) == str: conf_files = [conf_files] all_conf_files = [] for fname in conf_files: fname,fname_err = get_filename(fname,datadir=self.datadir,\ env=self.env) if fname_err[0] != 0: if self.fatal: return False,False,\ "Cannot find configuration file %s\n%s" \ % (fname,fname_err[1]) else: all_conf_files.append(fname) thisdir = os.path.dirname(fname) if not thisdir in self.datadir: self.datadir.append(thisdir) if len(all_conf_files) == 0: return False,False,\ "%s: No valid conf files found in list %s in dirs %s" \ % (os.getcwd(),conf_files,self.datadir) config.config_files = config.read(all_conf_files) if len(config.config_files) == 0: return False,False,\ "%s: No valid conf files found in list %s in dirs %s" \ % (os.getcwd(),conf_files,self.datadir) # from here on, we attempt to pull specific information from # the conf files info = ParamStorage(name='info',doc=\ 'Configuration information for %s' % \ str(config.config_files)) # scan everything into config.info # but it helps to sort it to get the info in the right order sections = config.sections() #sections.sort() firstsections = [] secondsections = [] for this in sections: if this[:7] == 'general' or this[:9] == 'parameter': firstsections.append(this) else: secondsections.append(this) firstsections.sort() sections = firstsections [sections.append(i) for i in secondsections] for this in sections: self.logger.debug('...Section %s' % this) self.scan_info(config, this, info, this, info) self.rescan_info(config, this, info, this, info, 0) self.config = config self.info = info if options != None and type(options) == ParamStorage: self.info.update(options, combine=True) # sort any helper text looping over self.info # into self.loaders self.__sort_help(self.info, "") try: self.logger.info("Config: %s read correctly" \ % str(all_conf_files)) except: pass return self.config,self.info,"Config: %s read correctly" \ % str(all_conf_files)
def parseLoader(self,loaders): """ Utility to load a set of terms from the list loaders into the ParamStorage general If there are 3 terms in each loaders element, they refer to: 1. name 2. default value 3. helper text If there is a fourth, it is associated with extras (short parser option) """ general = ParamStorage () general.__default__ = ParamStorage () general.__extras__ = ParamStorage () general.__helper__ = ParamStorage () for this in loaders: if len(this) > 1: general[this[0]] = this[1] general.__default__[this[0]] = this[1] else: general[this[0]] = None general.__default__[this[0]] = None if len(this) > 2: general.__helper__[this[0]] = this[2] else: general.__helper__[this[0]] = optparse.SUPPRESS_HELP if len(this) > 3: general.__extras__[this[0]] = "%s" % this[3] else: general.__extras__[this[0]] = None # make sure arrays arent numpy.ndarray if type(general.__default__[this[0]]) == np.ndarray: general.__default__[this[0]] = \ list(general.__default__[this[0]]) self.top.update(self.__unload(general),combine=True)
def __init__(self, args, name=None, general=None, log=False, logger=None, outdir=".", getopdir=False, parse=True): """ Initialise parser class. This sets up the class defaults. Options: general=general: this over-rides and defaults with values set in parser general can be of the form: 1. class ParamStorage (i.e. the same form as self.general) 2. a command line list (where the first item in the list is ignored 3. a string containing a set of command line general See self.parse() for more details on general 2 and 3 as these simply make a call to tha method. log=True If log is set to True, then logging starts when this class is instanced. Note that the logfile and logdir might change if subsequent calls to Parser.parse() are made """ if type(args) == str: args = args.split() self.dolog = log self.log = log self.name = args[0] self.args = args[1:] self.fullargs = args self.store_fullargs = args if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name, thistime) self.thisname = name # find the following flags: # --conf | -c : conf # --datadir : datadir datadir = [".","~/.eoldas",sys.path[0]+'/../bin',sys.path[0]+'/../confs',\ sys.path[0]+'/../system_confs',sys.path[0]+'/../eoldaslib'] conf = "default.conf" logfile = None logdir = "." self.top = ParamStorage() self.top.general = ParamStorage() self.top.general.__helper__ = ParamStorage() self.top.general.__default__ = ParamStorage() self.top.general.__extras__ = ParamStorage() self.top.general.conf = [] for i in xrange(len(self.args)): theseargs = self.args[i].split('=') if theseargs[0] == "--conf": conf = theseargs[1] self.top.general.conf.append(conf) elif theseargs[0][0:2] == "-c": if len(theseargs) > 2: conf = theseargs[0][2:] else: conf = self.args[i + 1] self.top.general.conf.append(conf) elif theseargs[0] == "--datadir": datadir1 = theseargs[1].replace('[','').\ replace(']','').split() [datadir1.append(datadir[i]) for i in \ xrange(len(datadir))] datadir = datadir1 elif theseargs[0] == "--logfile": logfile = theseargs[1] elif theseargs[0] == "--logdir": logdir = theseargs[1] elif theseargs[0] == "--outdir": outdir = theseargs[1] if self.top.general.conf == []: self.top.general.conf = conf if logfile == None: logfile = conf.replace('conf', 'log') self.top.general.here = os.getcwd() self.top.general.datadir = datadir self.top.general.logfile = logfile self.top.general.logdir = logdir self.top.general.outdir = outdir # add here to datadir # in addition to '.' to take account of the change of directory self.top.general.datadir = self.__add_here_to_datadir(\ self.top.general.here,self.top.general.datadir) self.top.general.datadir = self.__add_here_to_datadir(\ self.top.general.outdir,self.top.general.datadir) # cd to where the output is to be self.__cd(self.top.general.outdir) # set up the default command line options self.default_loader() # update with anything passed here if general and type(general) == ParamStorage: self.top.update(\ self.__unload(general),combine=True) # read the conf files to get any cmd line options self.logger = sortlog(self,self.top.general.logfile,logger,name=self.thisname,\ logdir=self.top.general.logdir) self.config = ConfFile(self.top.general.conf,name=self.thisname+'.config',\ loaders=self.loaders,datadir=self.top.\ general.datadir,logger=self.logger,logdir=self.top.general.logdir,\ logfile=self.top.general.logfile) if len(self.config.configs) == 0: this = "Warning: Nothing doing ... you haven't set any configuration",\ self.config.storelog try: self.logger(this) except: print "Called with args:" print "eoldas", self.args pass raise Exception(this) # now loaders contains all of the defaults set here # plus those from the config (config opver-rides defaults here) self.loaders = self.config.loaders # now convert loaders into parser information self.parseLoader(self.loaders) self.parse(self.fullargs) if general and type(general) == ParamStorage: self.top.update(self.__unload(general), combine=True) if general and type(general) == str: self.parse(general.split()) if general and type(general) == list: self.parse(general) # now update the info in self.config for i in self.config.infos: i.update(self.top, combine=True) # so now all terms in self.config.infos # contain information from the config file, updated by # the cmd line i.logger = self.logger i.log() # move the information up a level self.infos = self.config.infos self.configs = self.config.configs self.config_log = self.config.storelog #if getopdir: # self.sortnames() self.config.loglist(self.top) #del(self.config.infos) #del(self.config.configs) #del(self.config) self.__cd(self.top.general.here)
def __setattr__(self,this,value): ''' Variable setting method for style self.this Varies what it does depending on the type of value. The method interprets and sets the SpecialVariable value: 1. ParamStorage or SpecialVariable. The data are directly loaded. This is one of the most flexible formats for input. It expects fields 'data' and/or 'name', which are loaded into self. There will normally be a field data.this, where this is the variable name passed here. 2. A dictionary, same format as the ParamStorage. 3. A tuple, interpreted as (data,name) and loaded accordingly. 4. *string* as filename (various formats). An attempt to read the string as a file (of a set of formats) is made. If none pass then it it maintained as a string. 5. A numpy array (np.array) that is loaded into self.data.this. 6. Anything else. Loaded into self.data.this as a numpy array. ''' if self.simple: self.set(this,value) return t = type(value) try: if t == ParamStorage or t == SpecialVariable: # update the whole structure #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) self.data.update(value.data,combine=True) self.name.update(value.name,combine=True) elif t == dict: n_value = ParamStorage().from_dict(value) self.__setattr__(this,n_value) elif t == tuple or t == list: # assumed to be (data,name) or [data,name] #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) #ParamStorage.__setattr__(self['data'],this,value[0]) #ParamStorage.__setattr__(self['name'],this,value[1]) ParamStorage.__setattr__(self['data'],this,np.array(value)) elif t == str: # set the term #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) ParamStorage.__setattr__(self['data'],this,value) # interpret as a file read if possible self.process_data_string(this,info=self.info) elif t == np.ndarray: #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) ParamStorage.__setattr__(self['data'],this,value) else: ParamStorage.__setattr__(self['data'],this,\ np.array(value)) except: if self.logger: self.logger.info("Failed to set SpecialVariable %s from %s %s"\ %(this,t.__name__,value)) return if self.logger: self.logger.info("Set variable %s from type %s"%(this,t.__name__))
def prep(self, thisconf): ''' A method to prepare the solver ''' self.root = self.confs.root[thisconf] root = self.confs.root[thisconf] self.sortMask() self.op = sortopt(root.general, 'optimisation', ParamStorage()) self.op.plot = sortopt(self.op, 'plot', 0) self.op.name = sortopt(self.op, 'name', 'solver') self.op.maxfunevals = sortopt(self.op, 'maxfunevals', 2e4) self.op.maxiter = sortopt(self.op, 'maxiter', 1e4) self.op.gtol = sortopt(self.op, 'gtol', 1e-3) self.op.iprint = sortopt(self.op, 'iprint', 1) self.op.solverfn = sortopt(self.op, 'solverfn', 'scipy_lbfgsb') self.op.randomise = sortopt(self.op, 'randomise', False) self.op.no_df = sortopt(self.op, 'no_df', False) self.result = sortopt(root.options, 'result', ParamStorage()) self.result.filename = sortopt(root.options.result,'filename',\ 'results.pkl') self.result.fmt = sortopt(root.options.result, 'format', 'pickle') try: self.transform = self.root.options.x.transform self.invtransform = self.root.options.x.transform except: self.transform = None self.invtransform = None # descend into the operators and identify any observation ops # we then want to be able to write out files (or plot data) # that are H(x). # We only do this for Operators that have both 'x' and 'y' # terms as we store the filename under 'y.result' self.Hx_ops = [] for i, op in enumerate(root.operators): op.loader(root) if 'y_meta' in op.dict() and op.options.y.datatype == 'y': # There is a potential observation op.y_state.options.y.result = \ sortopt(op.y_state.options.y,'result',ParamStorage()) op.y_state.options.y.result.filename = \ sortopt(op.y_state.options.y.result,\ 'filename',op.y_state.thisname) op.y_state.options.y.result.format = \ sortopt(op.y_state.options.y.result,\ 'format','PARAMETERS') state = op.y_state._state this = { \ 'filename':op.y_state.options.y.result.filename,\ 'format':op.y_state.options.y.result.format,\ 'state':state,\ 'y':op.y_state,\ 'op':op,\ 'transform':self.transform,\ 'invtransform':self.invtransform,\ 'Hx':op.linear.H} op.Hx_info = this self.Hx_ops.append(this) else: this = { \ 'transform':self.transform,\ 'invtransform':self.invtransform,\ } op.Hx_info = this
def read_numpy(self, filename, name, info=[]): ''' Try to read the file as as a NpzFile file ''' from eoldas_Lib import set_default_limits,check_limits_valid,\ quantize_location,dequantize_location # none of these ciritical to functioning try: info = self._state.info except: info = [] try: names = self.name.state except: try: names = self.Name.state except: names = None try: control = self.Name.control except: try: control = self.name.control except: control = None try: location = self.name.location except: try: location = self.Name.location except: location = ['time', 'row', 'col'] try: limits = self.name.qlocation except: try: limits = self.Name.qlocation except: limits = set_default_limits(location) # refl_check=False,names=None,\ # control=['mask','vza','vaa','sza','saa'],\ # location=['time','row','col'],limits=None # location specifies the dimesions and names of the # problem, e.g., & typically [time,row,col] limits = np.array(check_limits_valid(limits)) try: f = np.load(filename) if not type(f).__name__ == 'NpzFile': f.close() self.error_msg = "%s is not a NpzFile" % filename self.error = True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0, (self.error, self.error_msg) except: self.error_msg = "a problem opening %s as a NpzFile" % filename self.error = True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0, (self.error, self.error_msg) # ok so far then # lets have a look inside ncontents = np.array(f.files) contents = np.array(f.files) # translation table for default names def_names = 'b1 b2 b3 b4 b5 b6 b7'.split() if names == None: # assume MODIS names = def_names def_alt_names = \ '645.5 856.5 465.6 553.6 1241.6 1629.1 2114.1'.split() # look for any of names in contents datasets = [] alt_datasets = [] alt_names = names for i in xrange(len(np.atleast_1d(contents))): if contents[i] in names: datasets.append(i) if not len(np.atleast_1d(datasets)): if 'logger' in self or 'logger' in self.dict(): self.logger.error(\ "None of requested datasets %s found in %s ..." \ %(str(names),filename) + \ " trying default MODIS names: only %s"\ %(str(contents))) names = def_names alt_names = def_alt_names for i in xrange(len(np.atleast_1d(contents))): if contents[i] in names: datasets.append(i) if not len(np.atleast_1d(datasets)): self.error_msg = "None of requested datasets %s found in %s"\ %(str(names),filename) + ' ' + \ "... trying default MODIS names: only %s"\ %(str(contents)) self.error = True if 'logger' in self or 'logger' in self.dict(): self.logger.error(self.error_msg) return 0, (self.error, self.error_msg) trans_names = {} for (i, j) in enumerate(alt_names): trans_names[names[i]] = j #trans_names = {names[i]:j for (i,j) in enumerate(alt_names)} alt_name = [] this_name = [] for i in datasets: this_name.append(contents[i]) alt_name.append(trans_names[contents[i]]) # Translate some old stylies... trans = {'raa': 'vaa', 'doys': 'time'} for i in trans: if i in contents: ncontents[np.where(contents == i)[0]] = trans[i] # as a minimum, there needs to be some definition of one of # the terms in location # check how many dimensions this has # now find a dataset try: # This could be more general, but this will do for now as its useful # for spatial datasets QA_OK = np.array(\ [8, 72, 136, 200, 1032, 1288, 2056,2120, 2184, 2248]) doy = f['doys'] - 2004000 qa = f['qa'] vza = f['vza'] sza = f['sza'] raa = f['raa'] y = [] for i in this_name: y.append(f[i]) #mask = np.logical_or.reduce([qa==x for x in QA_OK ]) if 'logger' in self or 'logger' in self.dict(): self.logger.info(\ "sucessfully interpreted NpzFile dataset from %s"\ %filename) self.logger.info("sub-setting ...") controls = [] locations = [] grid = [] qlocations = [] thisshape = vza.shape starter = {'time': np.min(doy), 'row': 0, 'col': 0} delta = {'time': 1, 'row': 1, 'col': 1} if len(np.atleast_1d(limits)) < 3: from eoldas_Lib import set_default_limits old_loc = location location = np.array(['time', 'row', 'col']) lim2 = set_default_limits(location) for i in xrange(len(np.atleast_1d(limits))): ww = np.where(old_loc[i] == location)[0] lim2[ww] = list(limits[i]) limits = lim2 for i in xrange(len(np.atleast_1d(limits))): if limits[i][0] == None: limits[i][0] = starter[location[i]] if limits[i][1] == None: limits[i][1] = (thisshape[i] - 1) + starter[location[i]] if limits[i][2] == None: limits[i][2] = delta[location[i]] limits = np.array(limits) start_doy = limits[0][0] end_doy = limits[0][1] step_doy = limits[0][2] start_row = limits[1][0] end_row = limits[1][1] step_row = limits[1][2] start_col = limits[2][0] end_col = limits[2][1] step_col = limits[2][2] gooddays = np.logical_and.reduce(np.concatenate(\ ([doy >= start_doy],[doy <=end_doy]))) qa = qa[gooddays, start_row:end_row + 1, start_col:end_col + 1] vza = vza[gooddays, start_row:end_row + 1, start_col:end_col + 1] * 0.01 sza = sza[gooddays, start_row:end_row + 1, start_col:end_col + 1] * 0.01 raa = raa[gooddays, start_row:end_row + 1, start_col:end_col + 1] * 0.01 yy = [] for i in xrange(len(np.atleast_1d(this_name))): this = y[i] yy.append(this[gooddays,start_row:end_row+1,\ start_col:end_col+1]*0.0001) doy = doy[gooddays] # now do QA mask = np.zeros_like(qa).astype(bool) # loop over qa for j in xrange(len(np.atleast_1d(QA_OK))): ww = np.where(qa == QA_OK[j]) mask[ww] = True # better look over data to check valid for j in xrange(len(np.atleast_1d(yy))): ww = np.where(yy[j] < 0) mask[ww] = False ww = np.where(mask) if 'logger' in self or 'logger' in self.dict(): self.logger.debug('parsing dataset: %d samples look ok'\ %np.array(ww).shape[1]) vza = vza[ww] sza = sza[ww] raa = raa[ww] doy = doy[ww[0]] row = ww[1] + start_row col = ww[2] + start_col locations = np.array([doy, row, col]) nnn = len(np.atleast_1d(locations[0])) orig = np.repeat(np.array([start_doy, start_row, start_col]), locations.shape[1]).reshape(locations.shape).T div = np.repeat(np.array([step_doy, step_row, step_col]), locations.shape[1]).reshape(locations.shape).T qlocations = ((locations.T - orig) / div.astype(float)).astype(int).T controls = np.array([np.ones_like(doy).astype(bool),\ vza,raa,sza,0*doy]) y = [] for i in xrange(len(np.atleast_1d(this_name))): this = yy[i] y.append(this[ww]) grid = np.array(y) fmt = 'BRDF-UCL' control = ['mask', 'vza', 'vaa', 'sza', 'saa'] bands = alt_name if not np.array(grid).size: if 'logger' in self or 'logger' in self.dict(): self.logger.error(\ "Warning: returning a zero-sized dataset ... "+\ " I wouldn;t try to do anything with it") # in case we dont have data for all bands mask = np.logical_or.reduce([[this_name[i]==x for x in names] \ for i in xrange(len(np.atleast_1d(this_name)))]) sd = np.array('0.004 0.015 0.003 0.004 0.013 0.01 0.006'\ .split())[mask] sd = np.array([float(i) for i in sd.flatten()])\ .reshape(sd.shape) nsamps = grid.shape[1] sd = sd.repeat(nsamps).reshape(grid.shape).T datasets = ParamStorage() datasets.data = ParamStorage() datasets.name = ParamStorage() datasets.name.fmt = fmt grid = grid.T datasets.data[name] = np.zeros([grid.shape[0],len(np.atleast_1d(names))])\ .astype(object) datasets.data[name][:, :] = None for i in xrange(len(np.atleast_1d(this_name))): ww = np.where(names == this_name[i])[0][0] datasets.data[name][:, ww] = grid[:, i] datasets.data.location = np.array(locations).T datasets.data.control = np.array(controls).T datasets.data.qlocation = np.array(qlocations).T datasets.name[name] = np.array(names) datasets.name.location = np.array(['time', 'row', 'col']) datasets.name.control = np.array(control) datasets.name.qlocation = limits datasets.name.bands = np.array(bands) datasets.data.sd = np.zeros([grid.shape[0],len(np.atleast_1d(names))])\ .astype(object) # for i in xrange(grid.shape[0]): # datasets.data.sd[i,:] = self.options.sd datasets.data.sd[:, :] = None for i in xrange(len(np.atleast_1d(this_name))): ww = np.where(names == this_name[i])[0][0] datasets.data.sd[:, ww] = sd[:, i] datasets.name.sd = np.array(names) if 'logger' in self or 'logger' in self.dict(): self.logger.debug('finished parsing dataset') except: self.error_msg=\ "a problem processing information from %s as a NpzFile"\ %filename self.error = True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0, (self.error, self.error_msg) f.close() if 'logger' in self or 'logger' in self.dict(): self.logger.info('... done') self.error = False self.error_msg = "" return datasets, (self.error, self.error_msg)
def demonstration(): from eoldas_State import State from eoldas_ParamStorage import ParamStorage import numpy as np # a basic set up for State, setting names & bounds etc options = ParamStorage() options.logfile = 'test/data_type/logs/log.dat' options.names = \ 'gamma xlai xhc rpl xkab scen xkw xkm xleafn xs1 xs2 xs3 xs4 lad'.split() options.bounds = [[0.01,None],\ [0.01,0.99],\ [0.01,10.0],\ [0.001,0.10],\ [0.1,0.99],\ [0.0,1.0],\ [0.01,0.99],\ [0.3,0.9],\ [0.9,2.5],\ [0.0, 4.],\ [0.0, 5.],\ [None, None],\ [None, None],\ [None, None]] options.default = -1.0*np.ones(len(options.names)) options.location = 'time'.split() options.control = 'mask vza vaa sza saa'.split() options.datadir = ['.','test/data_type'] name = "eoldas_data_type test 0" options.limits = [[170,365,1]] self = State(options,datatype='y',name=name,datadir=\ options.datadir,env=None,logfile=options.logfile) self.tester() # Now we set some state data this = ParamStorage() # how many state vector elements should there be? n_states = len(self.Name.state) self.state = np.ones([100,n_states]) self.Data.sd = np.ones([1,n_states]) self.Name.sd = self.Name.state print '******************' print (self.Data.sd,self.Name.sd) this.data = ParamStorage() this.name = ParamStorage() this.data.state = self.state *2 controls = self.Name.control n_controls = len(controls) this.data.control = np.ones([100,n_controls]) this.data.location = np.ones([100,n_controls]) # we can load x_state from a ParamStorage self.state = this # now we should see the control data etc. self.tester() # change a dataset name to see if that works: # should load everything as numpy arrays self.Name.control = np.array(['vza','sza']) # change a dataset to see if that works .. deliberately load a bad one self.Data.control = 0 # now try a direct state data load self.state = np.zeros([100,100])+5. # which will load into self.Data.state self['state'] = np.zeros([100,100])+6. #now try accessing it: print self.state # change the control info self.Name.control = np.array(['vza']) print self.Name.control # reset it self.state = self.state * 2 print self.state # now try reading a file into state self.state = 'test/data_type/input/test.brf' print '========' print 'data from a BRDF file' self.tester() print '========' # written as a pickle self.write('test/data_type/output/test.pickle',None,fmt='pickle') self.logger.info("...DONE...") name = "eoldas_data_type test 1" del self self1 = State(options,datatype='y',name=name,datadir=\ options.datadir,env=None,logfile=options.logfile,grid=True) # read from pickle self1.state = 'test/data_type/output/test.pickle' print '========' print 'data from a pickle file' self1.tester() print '========' # try to load an npx file del self1 options.location = 'time row col'.split() options.limits = [[170,365,1],[0,500,1],[200,200,1]] self2 = State(options,datatype='y',name=name,datadir=\ options.datadir,env=None,logfile=options.logfile) self2.state = 'test/data_type/input/interpolated_data.npz' print '========' print 'data from a npz file' self2.tester() print '========' # write as BRDF-UCL self2.write('test/data_type/output/test.brf',None,fmt='BRDF-UCL') del self2 self3 = State(options,datatype='y',name=name,datadir=\ options.datadir,env=None,logfile=options.logfile) # then test the reader self3.state = 0. self3.state = 'test/data_type/output/test.brf' print '========' print 'data from a BRDF-UCL file' print '========' self3.tester() print '========' # then write as a PARAMETERS file self3.write('test/data_type/output/test.param',None,fmt='PARAMETERS') del self3 options.location = 'time row col'.split() options.limits = [[170,365,1],[0,500,1],[200,200,1]] options.control = np.array(['mask','vza','vaa','sza','saa']) self4 = State(options,datatype='y',name=name,datadir=\ options.datadir,env=None,logfile=options.logfile) # then test the reader self4.state = 0. self4.state = 'test/data_type/output/test.param' print '========' print 'data from a PARAMETERS file' print '========' self4.tester() print '========'
def _load_rt_library(self, rt_library): """ A method that loads up the compiled RT library code and sets some configuration options. This method tries to import all the methods and make them available through `self.rt_library.<method_name>`. It is also a safe importer: if some functions are not available in the library, it will provide safe methods from them. Additionally, while importing, a number of configuration options in the class are also updated or set to default values. Parameters ----------- rt_library : string This is the name of the library object (.so file) that will be loaded """ from eoldas_Lib import sortopt import_string = "from %s import " % (rt_library) self.logger.debug("Using %s..." % rt_library) self.rt_library = sortopt(self, 'rt_library', ParamStorage()) # 1. Import main functionality try: self.logger.debug("Loading rt_model") exec(import_string + "rt_model") self.rt_library.rt_model = rt_model except ImportError: self.logger.info(\ "Could not import basic RT functionality: rt_model") self.logger.info(\ "Check library paths, and whether %s.so is available" % \ rt_library) raise Exception('error importing library %s' % rt_library) # 1a. Import conditioning methods that can be ignored try: exec(import_string + 'rt_modelpre') self.rt_library.rt_modelpre = rt_modelpre except: self.rt_library.rt_modelpre = self._nowt try: exec(import_string + 'rt_modelpre') self.rt_library.rt_modelpost = rt_modelpre except: self.rt_library.rt_modelpost = self._nowt # 2. Try to import derivative self.rt_model.ignore_derivative = sortopt(self.rt_model,\ 'ignore_derivative',False) if self.rt_model.ignore_derivative == False: try: exec ( import_string + \ "rt_modeld, rt_modeldpre, rt_modeldpost" + \ ", rt_modelpred" ) self.rt_model.have_adjoint = True self.J_prime = self.J_prime_full self.rt_library.rt_modeld = rt_modeld self.rt_library.rt_modeldpre = rt_modeldpre self.rt_library.rt_modeldpost = rt_modeldpost self.rt_library.rt_modelpred = rt_modelpred except ImportError: self.logger.info( "No adjoint. Using finite differences approximation.") self.rt_model.have_adjoint = False else: self.logger.info( "ignoring adjoint. Using finite differences approximation.") self.rt_model.have_adjoint = False self._configure_adjoint() try: exec(import_string + "rt_model_deriv") self.rt_library.rt_model_deriv = rt_model_deriv except ImportError: self.rt_library.rt_model_deriv = None
def read_numpy(self,filename,name,info=[]): ''' Try to read the file as as a NpzFile file ''' from eoldas_Lib import set_default_limits,check_limits_valid,\ quantize_location,dequantize_location # none of these ciritical to functioning try: info = self._state.info except: info = [] try: names = self.name.state except: try: names = self.Name.state except: names = None try: control = self.Name.control except: try: control = self.name.control except: control = None try: location = self.name.location except: try: location = self.Name.location except: location = ['time','row','col'] try: limits = self.name.qlocation except: try: limits = self.Name.qlocation except: limits = set_default_limits(location) # refl_check=False,names=None,\ # control=['mask','vza','vaa','sza','saa'],\ # location=['time','row','col'],limits=None # location specifies the dimesions and names of the # problem, e.g., & typically [time,row,col] limits = np.array(check_limits_valid(limits)) try: f = np.load(filename) if not type(f).__name__ == 'NpzFile': f.close() self.error_msg="%s is not a NpzFile"%filename self.error=True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0,(self.error,self.error_msg) except: self.error_msg="a problem opening %s as a NpzFile"%filename self.error=True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0,(self.error,self.error_msg) # ok so far then # lets have a look inside ncontents = np.array(f.files) contents = np.array(f.files) # translation table for default names def_names = 'b1 b2 b3 b4 b5 b6 b7'.split() if names == None: # assume MODIS names = def_names def_alt_names = \ '645.5 856.5 465.6 553.6 1241.6 1629.1 2114.1'.split() # look for any of names in contents datasets = [] alt_datasets = [] alt_names = names for i in xrange(len(np.atleast_1d(contents))): if contents[i] in names: datasets.append(i) if not len(np.atleast_1d(datasets)): if 'logger' in self or 'logger' in self.dict(): self.logger.error(\ "None of requested datasets %s found in %s ..." \ %(str(names),filename) + \ " trying default MODIS names: only %s"\ %(str(contents))) names = def_names alt_names = def_alt_names for i in xrange(len(np.atleast_1d(contents))): if contents[i] in names: datasets.append(i) if not len(np.atleast_1d(datasets)): self.error_msg = "None of requested datasets %s found in %s"\ %(str(names),filename) + ' ' + \ "... trying default MODIS names: only %s"\ %(str(contents)) self.error = True if 'logger' in self or 'logger' in self.dict(): self.logger.error(self.error_msg) return 0,(self.error,self.error_msg) trans_names = {} for (i,j) in enumerate(alt_names): trans_names[names[i]] = j #trans_names = {names[i]:j for (i,j) in enumerate(alt_names)} alt_name = [] this_name = [] for i in datasets: this_name.append(contents[i]) alt_name.append(trans_names[contents[i]]) # Translate some old stylies... trans = {'raa':'vaa','doys':'time'} for i in trans: if i in contents: ncontents[np.where(contents==i)[0]]=trans[i] # as a minimum, there needs to be some definition of one of # the terms in location # check how many dimensions this has # now find a dataset try: # This could be more general, but this will do for now as its useful # for spatial datasets QA_OK = np.array(\ [8, 72, 136, 200, 1032, 1288, 2056,2120, 2184, 2248]) doy = f['doys'] - 2004000 qa = f['qa'] vza = f['vza'] sza = f['sza'] raa = f['raa'] y = [] for i in this_name: y.append(f[i]) #mask = np.logical_or.reduce([qa==x for x in QA_OK ]) if 'logger' in self or 'logger' in self.dict(): self.logger.info(\ "sucessfully interpreted NpzFile dataset from %s"\ %filename) self.logger.info("sub-setting ...") controls = [] locations = [] grid = [] qlocations = [] thisshape = vza.shape starter = {'time':np.min(doy),'row':0,'col':0} delta = {'time':1,'row':1,'col':1} if len(np.atleast_1d(limits)) <3: from eoldas_Lib import set_default_limits old_loc = location location = np.array(['time','row','col']) lim2 = set_default_limits(location) for i in xrange(len(np.atleast_1d(limits))): ww = np.where(old_loc[i] == location)[0] lim2[ww] = list(limits[i]) limits = lim2 for i in xrange(len(np.atleast_1d(limits))): if limits[i][0] == None: limits[i][0] = starter[location[i]] if limits[i][1] == None: limits[i][1] = (thisshape[i]-1) + starter[location[i]] if limits[i][2] == None: limits[i][2]= delta[location[i]] limits = np.array(limits) start_doy = limits[0][0] end_doy = limits[0][1] step_doy = limits[0][2] start_row = limits[1][0] end_row = limits[1][1] step_row = limits[1][2] start_col = limits[2][0] end_col = limits[2][1] step_col = limits[2][2] gooddays = np.logical_and.reduce(np.concatenate(\ ([doy >= start_doy],[doy <=end_doy]))) qa = qa[gooddays,start_row:end_row+1,start_col:end_col+1] vza = vza[gooddays,start_row:end_row+1,start_col:end_col+1]*0.01 sza = sza[gooddays,start_row:end_row+1,start_col:end_col+1]*0.01 raa = raa[gooddays,start_row:end_row+1,start_col:end_col+1]*0.01 yy = [] for i in xrange(len(np.atleast_1d(this_name))): this = y[i] yy.append(this[gooddays,start_row:end_row+1,\ start_col:end_col+1]*0.0001) doy = doy[gooddays] # now do QA mask = np.zeros_like(qa).astype(bool) # loop over qa for j in xrange(len(np.atleast_1d(QA_OK))): ww = np.where(qa==QA_OK[j]) mask[ww] = True # better look over data to check valid for j in xrange(len(np.atleast_1d(yy))): ww = np.where(yy[j] < 0) mask[ww] = False ww = np.where(mask) if 'logger' in self or 'logger' in self.dict(): self.logger.debug('parsing dataset: %d samples look ok'\ %np.array(ww).shape[1]) vza = vza[ww] sza = sza[ww] raa = raa[ww] doy= doy[ww[0]] row = ww[1]+start_row col = ww[2]+start_col locations = np.array([doy,row,col]) nnn = len(np.atleast_1d(locations[0])) orig = np.repeat(np.array([start_doy,start_row,start_col]),locations.shape[1]).reshape(locations.shape).T div = np.repeat(np.array([step_doy,step_row,step_col]),locations.shape[1]).reshape(locations.shape).T qlocations = ((locations.T - orig)/div.astype(float)).astype(int).T controls = np.array([np.ones_like(doy).astype(bool),\ vza,raa,sza,0*doy]) y = [] for i in xrange(len(np.atleast_1d(this_name))): this = yy[i] y.append(this[ww]) grid = np.array(y) fmt = 'BRDF-UCL' control = ['mask','vza','vaa','sza','saa'] bands = alt_name if not np.array(grid).size: if 'logger' in self or 'logger' in self.dict(): self.logger.error(\ "Warning: returning a zero-sized dataset ... "+\ " I wouldn;t try to do anything with it") # in case we dont have data for all bands mask = np.logical_or.reduce([[this_name[i]==x for x in names] \ for i in xrange(len(np.atleast_1d(this_name)))]) sd = np.array('0.004 0.015 0.003 0.004 0.013 0.01 0.006'\ .split())[mask] sd = np.array([float(i) for i in sd.flatten()])\ .reshape(sd.shape) nsamps = grid.shape[1] sd = sd.repeat(nsamps).reshape(grid.shape).T datasets = ParamStorage() datasets.data = ParamStorage() datasets.name = ParamStorage() datasets.name.fmt = fmt grid = grid.T datasets.data[name] = np.zeros([grid.shape[0],len(np.atleast_1d(names))])\ .astype(object) datasets.data[name][:,:] = None for i in xrange(len(np.atleast_1d(this_name))): ww = np.where(names == this_name[i])[0][0] datasets.data[name][:,ww] = grid[:,i] datasets.data.location = np.array(locations).T datasets.data.control = np.array(controls).T datasets.data.qlocation = np.array(qlocations).T datasets.name[name] = np.array(names) datasets.name.location = np.array(['time','row','col']) datasets.name.control = np.array(control) datasets.name.qlocation = limits datasets.name.bands = np.array(bands) datasets.data.sd = np.zeros([grid.shape[0],len(np.atleast_1d(names))])\ .astype(object) # for i in xrange(grid.shape[0]): # datasets.data.sd[i,:] = self.options.sd datasets.data.sd[:,:] = None for i in xrange(len(np.atleast_1d(this_name))): ww = np.where(names == this_name[i])[0][0] datasets.data.sd[:,ww] = sd[:,i] datasets.name.sd = np.array(names) if 'logger' in self or 'logger' in self.dict(): self.logger.debug('finished parsing dataset') except: self.error_msg=\ "a problem processing information from %s as a NpzFile"\ %filename self.error=True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0,(self.error,self.error_msg) f.close() if 'logger' in self or 'logger' in self.dict(): self.logger.info('... done') self.error=False self.error_msg="" return datasets,(self.error,self.error_msg)
def parseLoader(self, loaders): """ Utility to load a set of terms from the list loaders into the ParamStorage general If there are 3 terms in each loaders element, they refer to: 1. name 2. default value 3. helper text If there is a fourth, it is associated with extras (short parser option) """ general = ParamStorage() general.__default__ = ParamStorage() general.__extras__ = ParamStorage() general.__helper__ = ParamStorage() for this in loaders: if len(this) > 1: general[this[0]] = this[1] general.__default__[this[0]] = this[1] else: general[this[0]] = None general.__default__[this[0]] = None if len(this) > 2: general.__helper__[this[0]] = this[2] else: general.__helper__[this[0]] = optparse.SUPPRESS_HELP if len(this) > 3: general.__extras__[this[0]] = "%s" % this[3] else: general.__extras__[this[0]] = None # make sure arrays arent numpy.ndarray if type(general.__default__[this[0]]) == np.ndarray: general.__default__[this[0]] = \ list(general.__default__[this[0]]) self.top.update(self.__unload(general), combine=True)
def __init__(self, confs, logger=None, logfile=None, thisname=None, name=None, datadir=None, logdir=None): ''' Initialise the solver. This does the following: 1. Read configuration file(s) 2. Load operators 3. Test the call to the cost function There can be multiple groups of configuration files, so self.confs, that holds the core information setup here can contain multiple configurations. The number of configurations is len(confs.infos) and the ith configuration is conf = self.confs.infos[i]. Various loggers are available throughout the classes used, but the top level logger is self.confs.logger, so you can log with e.g. self.confs.logger.info('this is some info') The root operator is stored in self.confs.root[i] for the ith configuration, so the basic call to the cost function is: J,J_prime = self.confs[i].parameter.cost(None) ''' from eoldas_ConfFile import ConfFile from eoldas_Lib import sortopt name = name or thisname if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name, thistime) self.thisname = name self.confs = confs self.top = sortopt(self, 'top', ParamStorage()) self.top.general = sortopt(self.top, 'general', ParamStorage()) thisname = sortopt(self.top.general, 'name', thisname or self.thisname) logfile = sortopt(self.top.general, 'logfile', logfile or 'log.dat') logdir = sortopt(self.top.general, 'logdir', logfile or 'logs') datadir = sortopt(self.top.general, 'datadir', datadir or ['.']) self.logger = sortlog(self,logfile,logger or self.confs.logger,\ name=self.thisname,logdir=logdir,debug=True) n_configs = len(self.confs.infos) self.confs.root = [] self.have_unc = False try: logdir = logdir except: logdir = self.top.general.logdir # first set up parameter conf = confs.infos[0] general = conf.general op = conf.parameter if not 'parameter' in conf.dict(): raise Exception('No parameter field found in %s item %d'%\ (conf.__doc__,0)) general.is_spectral = sortopt(general, 'is_spectral', True) if not general.is_spectral: sort_non_spectral_model(op, conf.operator, logger=confs.logger) general.init_test = sortopt(general, 'init_test', False) confs.logger.info('loading parameter state') conf.parameter.name = 'Operator' parameter = eval(op.name)(op,general,\ parameter=None,\ logger=confs.logger,\ name=name+".parameter",\ datatype=list(conf.parameter.datatypes),\ logdir=logdir,\ logfile=logfile,\ datadir=datadir) try: parameter.transform = parameter.options.x.transform parameter.invtransform = parameter.options.x.invtransform except: parameter.transform = parameter.options.x.names parameter.invtransform = parameter.options.x.names # we now have access to parameter.x.state, parameter.x.sd etc # and possibly parameter.y.state etc. operators = [] for (opname, op) in conf.operator.dict().iteritems(): if opname != 'helper': #pdb.set_trace() exec('from eoldas_%s import %s' % (op.name, op.name)) # make sure the data limits and x bounds are the same # ... inherit from parameter op.limits = parameter.options.limits if not 'datatypes' in op.dict(): op.datatypes = 'x' thisop = eval(op.name)(op,general,parameter=parameter,\ logger=confs.logger,\ name=name+".%s-%s"%(thisname,opname),\ datatype=list(op.datatypes),\ logdir=logdir,\ logfile=logfile,\ datadir=datadir) # load from parameter thisop.loader(parameter) operators.append(thisop) try: thisop.transform = parameter.options.x.transform thisop.invtransform = parameter.options.x.invtransform except: thisop.transform = parameter.options.x.names thisop.invtransform = parameter.options.x.names thisop.ploaderMask = np.in1d(parameter.x_meta.state, thisop.x_meta.state) try: thisop.invtransform = np.array( thisop.invtransform[thisop.ploaderMask]) thisop.transform = np.array( thisop.transform[thisop.ploaderMask]) except: ww = thisop.ploaderMask thisop.invtransform = np.array(thisop.transform)[ww] thisop.transform = np.array(thisop.transform)[ww] # sort the loaders parameter.operators = operators self.confs.root.append(parameter) # Now we have set up the operators # try out the cost function if general.init_test: self.logger.info('testing cost function calls') J, J_prime = self.confs.root[0].cost() self.logger.info('done') self.confs.root = np.array(self.confs.root)
def demonstration(): # set state to a filename # and it will be loaded with the data x = DemoClass() data = {'state':np.ones(2)*5. ,'foo':np.ones(10)} name = {'state':'of the nation','foo':'bar'} this = {'data':data,'name':name} x.state = this print 1,x.state,x['state'] x.oats = 'beans and barley-o' # nothing set so far print 2,x['state'] # should return the same print 3,x.state x.state = 'test/data_type/input/test.brf' print 4,x.state print 5,x.Name.fmt # set state to a dict and # it will load from that data = {'state':np.zeros(10)} name = {'state':'foo'} x.state = {'data':data,'name':name} print 6,x.state # set from a ParamStorage # and it will be loaded this = ParamStorage() this.data = ParamStorage() this.name = ParamStorage() this.data.state = np.ones(10) this.name.state = 'bar' this.data.sd = np.ones(10)*2. this.name.sd = 'sd info' # assign the data x.state = this # access the data print 7,x.state # access another member # Data, Name == implicitly .state print 8,x.Data.sd print 9,x.Name.sd # set directly x.Name.sd = 'bar' print 10,x.Name.sd # set from a tuple (data,name) # or a list [data,name] data = 'foo' name = 'bar' x.state = (data,name) print 11,x.state x.state = [name,data] print 12,x.state # set from a numpy array x.state = np.array(np.arange(10)) print 13,x.state # set from another state y = DemoClass() y.state = x.state x.state = x.state * 2 print 'x state',x.state print 'y state',y.state # set from a float x.state = 100. print 14,x.state # another interesting feature # we have 2 special terms in demonstration # state and other # if we set up some strcture for data # for other this = ParamStorage() this.data = ParamStorage() this.name = ParamStorage() this.data.other = np.ones(10) this.name.other = 'bar' # and the assign it to state x.state = this print 15,'state',x.state # we see state is unchanged # but other is also not set. print 16,'other',x.other # we load into other using: x.other = this print 'other',x.other # but if you look at the information contained print 17,x._other.to_dict() print 18,x._state.to_dict() # or better writtem as: print 19,x.var('state').to_dict() # you will see that state contains the other data that was loaded # a simple way to write out the data is to a pickle # x.write_pickle('xstate','x_state.pkl') # but try to avoid using the underscores print 20,"x state in pickle:",x.state SpecialVariable.write(x._state,'x_state.pkl',fmt='pickle') # which we can reload: z = DemoClass() z.state = 'x_state.pkl' print 21,"z state read from pickle",z.state # which is the same as a forced read ... zz = DemoClass() zz.state = 'x_state.pkl' print 22,zz.state # read a brf file zz.Name.qlocation = [[170,365,1],[0,500,1],[200,200,1]] zz.state = 'test/data_type/input/interpolated_data.npz' print zz.state SpecialVariable.write(zz._state,'test/data_type/output/interpolated_data.pkl',fmt='pickle') zz.state = 'test/data_type/input/test.brf' print zz.state
def init(self,info=[],name=None,readers=[],log_terms={},\ datadir=None,env=None,\ header=None,writers={},\ simple=False,logger=None): ''' Initialise information in a SpecialVariable instance. Options: info : Information tthat can be passed through to reader methods (a list). thisname : a name to use to identify this instance in any logging. By default this is None. If thisname is set to True, then logging is to stdout. readers : A list of reader methods that are pre-pended to those already contained in the class. log_terms : A dictionary of log options. By default {'logfile':None,'logdir':'log','debug':True} If thisname is set, and logfile specified, then logs are logged to that file. If thisname is set to True, then logging is to stdout. datadir : A list of directories to search for data files to interpret if the SpecialVariable is set to a string. env : An environment variable that can be used to extend the datadir variable. header : A header string to use to identify pickle files. By default, this is set to "EOLDAS -- plewis -- UCL -- V0.1" simple : A flag to swicth off the 'complicated' interpretation methods, i.e. just set and return variables literally, do not try to interpret them. ''' self.set('simple',True) if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name,thistime) self.thisname = name # this is where we will put any data self.data = ParamStorage() self.name = ParamStorage() self.info = info self.datadir = datadir or ['.'] self.env = env init_read_write(self,header,readers,writers) # sort logging and log if thisname != None self.log_terms = {'logfile':None,'logdir':'log','debug':True} # override logging info for (key,value) in log_terms.iteritems(): self.log_terms[key] = value self.logger= sortlog(self,self.log_terms['logfile'],logger,name=self.thisname,\ logdir=self.log_terms['logdir'],\ debug=self.log_terms['debug']) self.simple = simple
class Parser(): def __init__(self,args,name=None,general=None,log=False,logger=None,outdir=".",getopdir=False,parse=True): """ Initialise parser class. This sets up the class defaults. Options: general=general: this over-rides and defaults with values set in parser general can be of the form: 1. class ParamStorage (i.e. the same form as self.general) 2. a command line list (where the first item in the list is ignored 3. a string containing a set of command line general See self.parse() for more details on general 2 and 3 as these simply make a call to tha method. log=True If log is set to True, then logging starts when this class is instanced. Note that the logfile and logdir might change if subsequent calls to Parser.parse() are made """ if type(args) == str: args = args.split() self.dolog = log self.log = log self.name = args[0] self.args = args[1:] self.fullargs = args self.store_fullargs = args if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name,thistime) self.thisname = name # find the following flags: # --conf | -c : conf # --datadir : datadir datadir = [".","~/.eoldas",sys.path[0]+'/../bin',sys.path[0]+'/../confs',\ sys.path[0]+'/../system_confs',sys.path[0]+'/../eoldaslib'] conf = "default.conf" logfile = None logdir = "." self.top = ParamStorage () self.top.general = ParamStorage () self.top.general.__helper__ = ParamStorage () self.top.general.__default__ = ParamStorage () self.top.general.__extras__ = ParamStorage () self.top.general.conf = [] for i in xrange(len(self.args)): theseargs = self.args[i].split('=') if theseargs[0] == "--conf": conf = theseargs[1] self.top.general.conf.append(conf) elif theseargs[0][0:2] == "-c": if len(theseargs) > 2: conf = theseargs[0][2:] else: conf = self.args[i+1] self.top.general.conf.append(conf) elif theseargs[0] == "--datadir": datadir1 = theseargs[1].replace('[','').\ replace(']','').split() [datadir1.append(datadir[i]) for i in \ xrange(len(datadir))] datadir = datadir1 elif theseargs[0] == "--logfile": logfile= theseargs[1] elif theseargs[0] == "--logdir": logdir = theseargs[1] elif theseargs[0] == "--outdir": outdir = theseargs[1] if self.top.general.conf == []: self.top.general.conf = conf if logfile == None: logfile = conf.replace('conf','log') self.top.general.here = os.getcwd() self.top.general.datadir = datadir self.top.general.logfile = logfile self.top.general.logdir = logdir self.top.general.outdir = outdir # add here to datadir # in addition to '.' to take account of the change of directory self.top.general.datadir = self.__add_here_to_datadir(\ self.top.general.here,self.top.general.datadir) self.top.general.datadir = self.__add_here_to_datadir(\ self.top.general.outdir,self.top.general.datadir) # cd to where the output is to be self.__cd(self.top.general.outdir) # set up the default command line options self.default_loader() # update with anything passed here if general and type(general) == ParamStorage: self.top.update(\ self.__unload(general),combine=True) # read the conf files to get any cmd line options self.logger = sortlog(self,self.top.general.logfile,logger,name=self.thisname,\ logdir=self.top.general.logdir) self.config = ConfFile(self.top.general.conf,name=self.thisname+'.config',\ loaders=self.loaders,datadir=self.top.\ general.datadir,logger=self.logger,logdir=self.top.general.logdir,\ logfile=self.top.general.logfile) if len(self.config.configs) == 0: this = "Warning: Nothing doing ... you haven't set any configuration",\ self.config.storelog try: self.logger(this) except: print "Called with args:" print "eoldas",self.args pass raise Exception(this) # now loaders contains all of the defaults set here # plus those from the config (config opver-rides defaults here) self.loaders = self.config.loaders # now convert loaders into parser information self.parseLoader(self.loaders) self.parse(self.fullargs) if general and type(general) == ParamStorage: self.top.update(self.__unload(general),combine=True) if general and type(general) == str: self.parse(general.split()) if general and type(general) == list: self.parse(general) # now update the info in self.config for i in self.config.infos: i.update(self.top,combine=True) # so now all terms in self.config.infos # contain information from the config file, updated by # the cmd line i.logger = self.logger i.log() # move the information up a level self.infos = self.config.infos self.configs = self.config.configs self.config_log = self.config.storelog #if getopdir: # self.sortnames() self.config.loglist(self.top) #del(self.config.infos) #del(self.config.configs) #del(self.config) self.__cd(self.top.general.here) def __add_here_to_datadir(self,here,datadir): from os import sep,curdir,pardir if type(datadir) == str: datadir = [datadir] iadd = 0 for i in xrange(len(datadir)): j = i + iadd if datadir[j] == curdir or datadir[j] == pardir: tmp = datadir[:j] rest = datadir[j+1:] tmp.append("%s%s%s" % (here,sep,datadir[j])) tmp.append(datadir[j]) iadd += 1 for k in xrange(len(rest)): tmp.append(rest[k]) datadir = tmp return datadir def default_loader(self): """ Load up parser information for first pass """ self.loaders = [] self.top.general.here = os.getcwd() self.loaders.append(["datadir",['.',self.top.general.here],\ "Specify where the data and or conf files are"]) self.loaders.append(["passer",False,\ "Pass over optimisation (i.e. report and plot the initial values)"]) self.loaders.append(["outdir",None,\ "Explicitly mspecify the results and processing output directory"]) self.loaders.append(["verbose",False,"Switch ON verbose mode","v"]) self.loaders.append(["debug",False,optparse.SUPPRESS_HELP,"d"]) self.loaders.append(["conf","default.conf",\ "Specify configuration file. Set multiple files by using the flag multiple times.","c"]) self.loaders.append(["logdir","logs",\ "Subdirectory to put log file in"]) self.loaders.append(["logfile","logfile.logs","Log file name"]) def parseLoader(self,loaders): """ Utility to load a set of terms from the list loaders into the ParamStorage general If there are 3 terms in each loaders element, they refer to: 1. name 2. default value 3. helper text If there is a fourth, it is associated with extras (short parser option) """ general = ParamStorage () general.__default__ = ParamStorage () general.__extras__ = ParamStorage () general.__helper__ = ParamStorage () for this in loaders: if len(this) > 1: general[this[0]] = this[1] general.__default__[this[0]] = this[1] else: general[this[0]] = None general.__default__[this[0]] = None if len(this) > 2: general.__helper__[this[0]] = this[2] else: general.__helper__[this[0]] = optparse.SUPPRESS_HELP if len(this) > 3: general.__extras__[this[0]] = "%s" % this[3] else: general.__extras__[this[0]] = None # make sure arrays arent numpy.ndarray if type(general.__default__[this[0]]) == np.ndarray: general.__default__[this[0]] = \ list(general.__default__[this[0]]) self.top.update(self.__unload(general),combine=True) def __list_to_string__(self,thisstr): """ Utility to convert a list to some useable string """ return(str(thisstr).replace('[','_').strip("']").\ replace('.dat','').replace("_'","_").replace(",","").\ replace(" ","").replace("''","_").replace("___","_").\ replace("__","_")); def __cd(self,outdir): if not os.path.exists(outdir): try: os.makedirs(outdir) except OSerror: print "Fatal: Prevented from creating",outdir sys.exit(-1) try: os.chdir(outdir) except: print "Fatal: unable to cd to",outdir raise Exception("Fatal: unable to cd to %s"%outdir) def sortnames(self): """ Utility code to sort out some useful filenames & directories """ if self.top.general.outdir == None: basename = self.top.general.basename confnames = self.__list_to_string__(self.top.general.conf) self.top.general.outdir = basename + "_conf_" + confnames self.__cd(self.top.general.outdir) def parse(self,args,log=False): ''' Given a list such as sys.argv (of that form) or an equivalent string parse the general and store in self.parser ''' self.dolog = log or self.dolog if type(args) == type(""): args = args.split() args = args[1:] self.top.general.cmdline = str(args) usage = "usage: %prog [general] arg1 arg2" parser = OptionParser(usage,version="%prog 0.1") # we go through items in self.top.general and set up # parser general for each for this in sorted(self.top.general.__helper__.__dict__.keys()): # sort out the action # based on type of the default default=self.top.general.__default__[this] action="store" thistype=type(default) if thistype == type(None): thistype = type("") argss = '--%s'%this dest = "%s"%this helper = self.top.general.__helper__[this] if type(default) == type([]): # list, so append action="store" elif type(default) == type(True): action="store_true" typer = "string" if thistype != type([]) and thistype != type(True): typer='%s' % str(thistype).split("'")[1] # has it got extras? if self.top.general.__extras__[this] != None: parser.add_option('-%s'%self.top.general.__extras__[\ this].lower(),argss,type="string",action=action,\ help=helper,default=str(default)) else: parser.add_option(argss,action=action,help=helper,\ type="string",default=str(default)) elif ( thistype != type(True)): if self.top.general.__extras__[this] != None: parser.add_option('-%s'%self.top.general.__extras__[\ this].lower(),argss,type="string",action=action,\ help=helper,default=str(default)) else: parser.add_option(argss,action=action,help=helper,\ default=str(default)) if thistype == type(True): if self.top.general.__extras__[this] != None: parser.add_option('-%s'%self.top.general.__extras__[\ this].lower(),argss,dest=dest,action=action,\ help=helper,default=default) else: parser.add_option(argss,action=action,help=helper,\ dest=dest,default=default) that = this.split('.') argss = '--' for i in xrange(len(that)-1): argss = argss + "%s." % that[i] argss = argss + 'no_%s' % that[-1] helper='The opposite of --%s'%this action='store_false' typer='%s' % str(thistype).split("'")[1] # has it got extras? if self.top.general.__extras__[this] != None: parser.add_option('-%s'%self.top.general.__extras__[\ this].capitalize(),argss,dest=dest,action=action,\ help=helper) else: parser.add_option(argss,action=action,dest=dest,\ help=helper) # we have set all option types as str, so we need to interpret # them in__unload (general, args) = parser.parse_args(args) #for data_file in args: # general.data_file.append(data_file) #general.data_file = list(np.array(general.data_file).flatten()) #general.brf= list(np.array(general.brf).flatten()) # load these into self.general self.top.update(self.__unload(general.__dict__),combine=True) #self.sortnames() if self.dolog: self.log = set_up_logfile(self.top.general.logfile,\ logdir=self.top.general.logdir) self.log_report() def __unload(self,options): from eoldas_ConfFile import array_type_convert this = ParamStorage() this.general = ParamStorage() for (k,v) in options.iteritems(): ps = this that = k.split('.') if len(that) == 1: ps = this.general else: for i in xrange(len(that)-1): if not hasattr(ps,that[i]): ps[that[i]] = ParamStorage() ps = ps[that[i]] # set the value v which needs to # to be interpreted ps[that[-1]] = array_type_convert(self.top,v) return this
class DemoClass(ParamStorage): ''' A demonstration class using SpecialVariable The behaviour we desire is that a SpecialVariable acts like a ParamStorage (i.e. we can get or set by attribute or item e.g. x.state = 3 and x['state'] = 3 give the same result, and print x['state'] and print x.state give the same result. This is easy enough to achieve for all cases other than getting from x.state. It turns out that __getattr__ does not override the default method for state *if* state is set in the class instance. To get around that, we have to use a fake name (fakes here) and instead of storing state, we store _state. This makes daling with with all of the conditions a little more complicated and a little slower, but it allows a much more consistent interface. At any time, a SpecialVariable can simply be over-written by using assigning to its fake name. e.g. instance the class x = demonstration() set a non-special value 'cheese' x.foo = 'bar' we can use this as x.foo or x['foo'] print x.foo,x['foo'] which should give bar bar now use the SpecialVariable. There are many way to load this up, but an easy one is via a dictionary. data = {'state':np.ones(2)*5. ,'foo':np.ones(10)} name = {'state':'of the nation','foo':'bar'} this = {'data':data,'name':name} x.state = this print x.state,x['state'] which gives [ 5. 5.] [ 5. 5.], so we get the same from either approach. Note that what is returned from the SpecialVariable is only what is in this['data']['state'], and that is fully the intention of the SpecialVariable class. It can be loaded with rich information from a range of sources, but if you want a quick interpretation of the data (i.e. x.state) you only get what is in x.state, or more fully, x._state.data.state The other data that we passed to the SpecialVariable is as it was when read in, but relative to x._state, i.e. we have: x._state.name.foo which is bar. If you want to directly access the SpecialVariable, you can use: x.get(x.fakes['state']) which is the same as x._state or x[x.fakes['state']] It is not adviseable to directly use the underscore access as the fakes lookup dictionary can be changed. It is best to always use x.fakes['state']. Indeed, if you want to override the 'special' nature of a term such as 'state', you can simply remove their entry from the table: old_dict = x.fakes.copy() del x.fakes['state'] Now, if you type: print x.state You get a KeyError for state, so it would have been better to: x.fakes = old_dict.copy() del x.fakes['state'] x['state'] = x[old_dict['state']] print x.state which should give [ 5. 5.], but the type of x.state will have changed from SpecialVariable to np.ndarray. If you want to convert the SpecialVariable back to a dictionary you can do: print x[x.fakes['state']].to_dict() or a little less verbosely: print x._state.to_dict() ''' def __init__(self,info=[],thisname=None,readers=[],\ datadir=["."],\ env=None,\ header=None,\ logger=None, log_terms={},simple=False): ''' Class initialisation. Set up self.state and self.other as SpecialVariables and initialise them to None. ''' self.set('fakes',{'state':'_state','other':'_other'}) nSpecial = len(self.get('fakes')) for i in self.fakes: thatname = thisname and "%s.%s"%(thisname,i) self[i] = SpecialVariable(logger=logger,info=info,thisname=thatname,\ readers=readers,datadir=datadir,\ env=env,\ header=header,\ log_terms=log_terms,\ simple=False) self[i] = None get = lambda self,this :ParamStorage.__getattr__(self,this) get.__name__ = 'get' get.__doc__ = ''' An alternative interface to get the value of a class member that by-passes any more complex mechanisms. This returns the 'true' value of a class member, as opposed to an interpreted value. ''' set = lambda self,this,that :ParamStorage.__setattr__(self,this,that) set.__name__ = 'set' set.__doc__ = ''' An alternative interface to set the value of a class member that by-passes any more complex mechanisms. This sets the 'true' value of a class member, as opposed to an interpreted value. ''' var = lambda self,this : self[self['fakes'][this]] var.__name__='var' var.__doc__ = ''' Return the data associated with SpecialVariable this, rather than an interpretation of it ''' def __set_if_unset(self,name,value): ''' A utility to check if the requested attribute is not currently set, and to set it if so. ''' if name in self.fakes: fname = self.fakes[name] if not fname in self.__dict__: ParamStorage.__setattr__(self,fname,value) return True else: if not name in self.__dict__: ParamStorage.__setattr__(self,name,value) return True return False def __getattr__(self,name): ''' get attribute, e.g. return self.state ''' return self.__getitem__(name) def __setattr__(self,name,value): ''' set attribute, e.g. self.state = 3 ''' if not self.__set_if_unset(name,value): self.__setitem__(name,value,nocheck=True) def __getitem__(self,name): ''' get item for class, e.g. x = self['state'] ''' if name in ['Data','Name']: return self._state[name.lower()] elif name in ['Control','Location']: return self._state[name.lower()] elif name in self.fakes: this = self.get(self.fakes[name]) return SpecialVariable.__getitem__(this,name) else: this = self.get(name) return self.__dict__.__getitem__ ( name ) def __setitem__(self,name,value,nocheck=False): ''' set item for class e.g. self['state'] = 3 ''' if nocheck or not self.__set_if_unset(name,value): if name in ['Data','Name']: self._state[name.lower()] = value elif name in ['Control','Location']: self._state[name.lower()] = value elif name in self.fakes: this = self.get(self.fakes[name]) SpecialVariable.__setattr__(this,name,value) else: this = self.get(name) ParamStorage.__setattr__(self,name,value)
class Parser(): def __init__(self, args, name=None, general=None, log=False, logger=None, outdir=".", getopdir=False, parse=True): """ Initialise parser class. This sets up the class defaults. Options: general=general: this over-rides and defaults with values set in parser general can be of the form: 1. class ParamStorage (i.e. the same form as self.general) 2. a command line list (where the first item in the list is ignored 3. a string containing a set of command line general See self.parse() for more details on general 2 and 3 as these simply make a call to tha method. log=True If log is set to True, then logging starts when this class is instanced. Note that the logfile and logdir might change if subsequent calls to Parser.parse() are made """ if type(args) == str: args = args.split() self.dolog = log self.log = log self.name = args[0] self.args = args[1:] self.fullargs = args self.store_fullargs = args if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name, thistime) self.thisname = name # find the following flags: # --conf | -c : conf # --datadir : datadir datadir = [".","~/.eoldas",sys.path[0]+'/../bin',sys.path[0]+'/../confs',\ sys.path[0]+'/../system_confs',sys.path[0]+'/../eoldaslib'] conf = "default.conf" logfile = None logdir = "." self.top = ParamStorage() self.top.general = ParamStorage() self.top.general.__helper__ = ParamStorage() self.top.general.__default__ = ParamStorage() self.top.general.__extras__ = ParamStorage() self.top.general.conf = [] for i in xrange(len(self.args)): theseargs = self.args[i].split('=') if theseargs[0] == "--conf": conf = theseargs[1] self.top.general.conf.append(conf) elif theseargs[0][0:2] == "-c": if len(theseargs) > 2: conf = theseargs[0][2:] else: conf = self.args[i + 1] self.top.general.conf.append(conf) elif theseargs[0] == "--datadir": datadir1 = theseargs[1].replace('[','').\ replace(']','').split() [datadir1.append(datadir[i]) for i in \ xrange(len(datadir))] datadir = datadir1 elif theseargs[0] == "--logfile": logfile = theseargs[1] elif theseargs[0] == "--logdir": logdir = theseargs[1] elif theseargs[0] == "--outdir": outdir = theseargs[1] if self.top.general.conf == []: self.top.general.conf = conf if logfile == None: logfile = conf.replace('conf', 'log') self.top.general.here = os.getcwd() self.top.general.datadir = datadir self.top.general.logfile = logfile self.top.general.logdir = logdir self.top.general.outdir = outdir # add here to datadir # in addition to '.' to take account of the change of directory self.top.general.datadir = self.__add_here_to_datadir(\ self.top.general.here,self.top.general.datadir) self.top.general.datadir = self.__add_here_to_datadir(\ self.top.general.outdir,self.top.general.datadir) # cd to where the output is to be self.__cd(self.top.general.outdir) # set up the default command line options self.default_loader() # update with anything passed here if general and type(general) == ParamStorage: self.top.update(\ self.__unload(general),combine=True) # read the conf files to get any cmd line options self.logger = sortlog(self,self.top.general.logfile,logger,name=self.thisname,\ logdir=self.top.general.logdir) self.config = ConfFile(self.top.general.conf,name=self.thisname+'.config',\ loaders=self.loaders,datadir=self.top.\ general.datadir,logger=self.logger,logdir=self.top.general.logdir,\ logfile=self.top.general.logfile) if len(self.config.configs) == 0: this = "Warning: Nothing doing ... you haven't set any configuration",\ self.config.storelog try: self.logger(this) except: print "Called with args:" print "eoldas", self.args pass raise Exception(this) # now loaders contains all of the defaults set here # plus those from the config (config opver-rides defaults here) self.loaders = self.config.loaders # now convert loaders into parser information self.parseLoader(self.loaders) self.parse(self.fullargs) if general and type(general) == ParamStorage: self.top.update(self.__unload(general), combine=True) if general and type(general) == str: self.parse(general.split()) if general and type(general) == list: self.parse(general) # now update the info in self.config for i in self.config.infos: i.update(self.top, combine=True) # so now all terms in self.config.infos # contain information from the config file, updated by # the cmd line i.logger = self.logger i.log() # move the information up a level self.infos = self.config.infos self.configs = self.config.configs self.config_log = self.config.storelog #if getopdir: # self.sortnames() self.config.loglist(self.top) #del(self.config.infos) #del(self.config.configs) #del(self.config) self.__cd(self.top.general.here) def __add_here_to_datadir(self, here, datadir): from os import sep, curdir, pardir if type(datadir) == str: datadir = [datadir] iadd = 0 for i in xrange(len(datadir)): j = i + iadd if datadir[j] == curdir or datadir[j] == pardir: tmp = datadir[:j] rest = datadir[j + 1:] tmp.append("%s%s%s" % (here, sep, datadir[j])) tmp.append(datadir[j]) iadd += 1 for k in xrange(len(rest)): tmp.append(rest[k]) datadir = tmp return datadir def default_loader(self): """ Load up parser information for first pass """ self.loaders = [] self.top.general.here = os.getcwd() self.loaders.append(["datadir",['.',self.top.general.here],\ "Specify where the data and or conf files are"]) self.loaders.append(["passer",False,\ "Pass over optimisation (i.e. report and plot the initial values)"]) self.loaders.append(["outdir",None,\ "Explicitly mspecify the results and processing output directory"]) self.loaders.append(["verbose", False, "Switch ON verbose mode", "v"]) self.loaders.append(["debug", False, optparse.SUPPRESS_HELP, "d"]) self.loaders.append(["conf","default.conf",\ "Specify configuration file. Set multiple files by using the flag multiple times.","c"]) self.loaders.append(["logdir","logs",\ "Subdirectory to put log file in"]) self.loaders.append(["logfile", "logfile.logs", "Log file name"]) def parseLoader(self, loaders): """ Utility to load a set of terms from the list loaders into the ParamStorage general If there are 3 terms in each loaders element, they refer to: 1. name 2. default value 3. helper text If there is a fourth, it is associated with extras (short parser option) """ general = ParamStorage() general.__default__ = ParamStorage() general.__extras__ = ParamStorage() general.__helper__ = ParamStorage() for this in loaders: if len(this) > 1: general[this[0]] = this[1] general.__default__[this[0]] = this[1] else: general[this[0]] = None general.__default__[this[0]] = None if len(this) > 2: general.__helper__[this[0]] = this[2] else: general.__helper__[this[0]] = optparse.SUPPRESS_HELP if len(this) > 3: general.__extras__[this[0]] = "%s" % this[3] else: general.__extras__[this[0]] = None # make sure arrays arent numpy.ndarray if type(general.__default__[this[0]]) == np.ndarray: general.__default__[this[0]] = \ list(general.__default__[this[0]]) self.top.update(self.__unload(general), combine=True) def __list_to_string__(self, thisstr): """ Utility to convert a list to some useable string """ return(str(thisstr).replace('[','_').strip("']").\ replace('.dat','').replace("_'","_").replace(",","").\ replace(" ","").replace("''","_").replace("___","_").\ replace("__","_")) def __cd(self, outdir): if not os.path.exists(outdir): try: os.makedirs(outdir) except OSerror: print "Fatal: Prevented from creating", outdir sys.exit(-1) try: os.chdir(outdir) except: print "Fatal: unable to cd to", outdir raise Exception("Fatal: unable to cd to %s" % outdir) def sortnames(self): """ Utility code to sort out some useful filenames & directories """ if self.top.general.outdir == None: basename = self.top.general.basename confnames = self.__list_to_string__(self.top.general.conf) self.top.general.outdir = basename + "_conf_" + confnames self.__cd(self.top.general.outdir) def parse(self, args, log=False): ''' Given a list such as sys.argv (of that form) or an equivalent string parse the general and store in self.parser ''' self.dolog = log or self.dolog if type(args) == type(""): args = args.split() args = args[1:] self.top.general.cmdline = str(args) usage = "usage: %prog [general] arg1 arg2" parser = OptionParser(usage, version="%prog 0.1") # we go through items in self.top.general and set up # parser general for each for this in sorted(self.top.general.__helper__.__dict__.keys()): # sort out the action # based on type of the default default = self.top.general.__default__[this] action = "store" thistype = type(default) if thistype == type(None): thistype = type("") argss = '--%s' % this dest = "%s" % this helper = self.top.general.__helper__[this] if type(default) == type([]): # list, so append action = "store" elif type(default) == type(True): action = "store_true" typer = "string" if thistype != type([]) and thistype != type(True): typer = '%s' % str(thistype).split("'")[1] # has it got extras? if self.top.general.__extras__[this] != None: parser.add_option('-%s'%self.top.general.__extras__[\ this].lower(),argss,type="string",action=action,\ help=helper,default=str(default)) else: parser.add_option(argss,action=action,help=helper,\ type="string",default=str(default)) elif (thistype != type(True)): if self.top.general.__extras__[this] != None: parser.add_option('-%s'%self.top.general.__extras__[\ this].lower(),argss,type="string",action=action,\ help=helper,default=str(default)) else: parser.add_option(argss,action=action,help=helper,\ default=str(default)) if thistype == type(True): if self.top.general.__extras__[this] != None: parser.add_option('-%s'%self.top.general.__extras__[\ this].lower(),argss,dest=dest,action=action,\ help=helper,default=default) else: parser.add_option(argss,action=action,help=helper,\ dest=dest,default=default) that = this.split('.') argss = '--' for i in xrange(len(that) - 1): argss = argss + "%s." % that[i] argss = argss + 'no_%s' % that[-1] helper = 'The opposite of --%s' % this action = 'store_false' typer = '%s' % str(thistype).split("'")[1] # has it got extras? if self.top.general.__extras__[this] != None: parser.add_option('-%s'%self.top.general.__extras__[\ this].capitalize(),argss,dest=dest,action=action,\ help=helper) else: parser.add_option(argss,action=action,dest=dest,\ help=helper) # we have set all option types as str, so we need to interpret # them in__unload (general, args) = parser.parse_args(args) #for data_file in args: # general.data_file.append(data_file) #general.data_file = list(np.array(general.data_file).flatten()) #general.brf= list(np.array(general.brf).flatten()) # load these into self.general self.top.update(self.__unload(general.__dict__), combine=True) #self.sortnames() if self.dolog: self.log = set_up_logfile(self.top.general.logfile,\ logdir=self.top.general.logdir) self.log_report() def __unload(self, options): from eoldas_ConfFile import array_type_convert this = ParamStorage() this.general = ParamStorage() for (k, v) in options.iteritems(): ps = this that = k.split('.') if len(that) == 1: ps = this.general else: for i in xrange(len(that) - 1): if not hasattr(ps, that[i]): ps[that[i]] = ParamStorage() ps = ps[that[i]] # set the value v which needs to # to be interpreted ps[that[-1]] = array_type_convert(self.top, v) return this
class DModel_Operator(Operator): def preload_prepare(self): ''' Here , we use preload_prepare to make sure the x & any y data are gridded for this operator. This greatly simplifies the application of the differential operator. This method is called before any data are loaded, so ensures they are loaded as a grid. ''' from eoldas_Lib import sortopt for i in np.array(self.options.datatypes).flatten(): # mimic setting the apply_grid flag in options if self.dict().has_key('%s_state' % i): self['%s_state' % i].options[i].apply_grid = True self.novar = sortopt(self, 'novar', False) self.gamma_col = sortopt(self, 'gamma_col', None) self.beenHere = False def postload_prepare(self): ''' This is called on initialisation, after data have been read in Here, we load parameters specifically associated with the model H(x). In the case of this differential operator, there are: model_order : order of the differential operator (integer) wraparound : edge conditions Can be: periodic none reflexive lag : The (time/space) lag at which the finite difference is calculated in the differential operator here. If this is 1, then we take the difference between each sample point and its neighbour. This is what we normally use. The main purpose of this mechanism is to allow differences at multiple lags to be calculated (fuller autocorrelation function constraints as in kriging) Multiple lags can be specified (which you could use to perform kriging), in which case lag weight should also be specified. lag_weight : The weight associated with each lag. This will generally be decreasing with increasing lag for a 'usual' autocorrelation function. There is no point specifying this if only a single lag is specified as the function is normalised. If the conditions are specified as periodic the period of the function can also be specified, e.g. for time varying data, you could specify 365 for the periodic period. These are specified in the configuration file as operator.modelt.rt_model.model_order operator.modelt.rt_model.wraparound operator.modelt.rt_model.lag operator.modelt.rt_model.lag_weight The default values (set here) are 1, 'none', 1 and 1 respectively. To specify the period for `periodic` specify e.g.: [operator.modelt.rt_model] wraparound=periodic,365 The default period is set to 0, which implies that it is periodic on whatever the data extent is. Or for multiple lags: [operator.modelt.rt_model] lag=1,2,3,4,5 lag_weight=1,0.7,0.5,0.35,0.2 NB this lag mechanism has not yet been fully tested and should be used with caution. It is intended more as a placeholder for future developments. Finally, we can also decide to work with inverse gamma (i.e. an uncertainty-based measure) This is achieved by setting the flag operator.modelt.rt_model.inverse_gamma=True This flag should be set if you intend to estimate gamma in the Data Assimilation. Again, the is experimental and should be used with caution. ''' from eoldas_Lib import sortopt self.rt_model = sortopt(self.options, 'rt_model', ParamStorage()) self.rt_model.lag = sortopt(self.rt_model, 'lag', 1) self.rt_model.inverse_gamma= \ sortopt(self.rt_model,'inverse_gamma',False) self.rt_model.model_order = \ sortopt(self.rt_model,'model_order',1) self.rt_model.wraparound = \ sortopt(self.rt_model,'wraparound','none') self.rt_model.wraparound_mod = 0 if np.array(self.rt_model.wraparound).size == 2 and \ np.array(self.rt_model.wraparound)[0] == 'periodic': self.rt_model.wraparound_mod = \ np.array(self.rt_model.wraparound)[1] self.rt_model.wraparound = \ np.array(self.rt_model.wraparound)[0] self.rt_model.lag = \ sortopt(self.rt_model,'lag',[1]) self.rt_model.lag = np.array(self.rt_model.lag).flatten() self.rt_model.lag_weight = \ sortopt(self.rt_model,'lag_weight',[1.]*\ self.rt_model.lag.size) self.rt_model.lag_weight = np.array(\ self.rt_model.lag_weight).flatten().astype(float) if self.rt_model.lag_weight.sum() == 0: self.rt_model.lag_weight[:] = np.ones( self.rt_model.lag_weight.size) self.rt_model.lag_weight = self.rt_model.lag_weight\ / self.rt_model.lag_weight.sum() def setH(self): ''' This method sets up the matrices required for the model. This operator is written so that it can apply smoothing in different dimensions. This is controlled by the model state vector. The names of the states are stored in self.x_meta.location and the associated location information in self.x_meta.location. So, we look through these looking for matches, e.g. 'row' in location and 'gamma_row' in names would mean that we want to apply the model over the row dimension. There should be only one gamma term in the state vectors for this operator. If you give more than one, only the last one will be used. NOT YET IMPLEMENTED: The model can be applied to multiple dimensions by specifying e.g. gamma_time_row. If you want separate gammas for e.g. time and row, then you should use separate operators. If gamma_roccol is specified, then the model applies to Euclidean distance in row/col space. Formally, the problem can be stated most simply as a matrix D so that gamma D x is the rate of change of x with respect to the target location variable (time, row, col etc). The job of this method then is to form and store D. The main complication to this is we have to split up x into those terms that we will apply D to (x2 here) and separately pull out the gamma terms. The resultant matrix D then needs to be re-formed so as to apply to the whole vector x, rather than just x2. We do this with masks. On input, x is a 1D vector. ''' x, Cx1, xshape, y, Cy1, yshape = self.getxy() # the names of the variables in x names = np.array(self.x_meta.state) # the names of the location information (e.g. time, row, col) location = self.x_meta.location self.logger.info('Setting up model matrices...') if self.x_meta.is_grid: try: self.x.location = self.x_state.ungridder.location self.x.qlocation = self.x_state.ungridder.qlocation except: raise Exception("You are trying to ungrid a dataset that wasn't gridded using State.regrid()" +\ " so the ungridder information is not available. Either load the data using State.grid " +\ " or set it up some other way or avoid calling this method with this type of data") # first, reshape x from its 1-D form to # have the same shape as self.x.state. We store # this shape as xshape. xshape = self.x.state.shape # we can't change the tuple directly, so need a # vector representation that we can manipulate # This is xshaper xshaper = np.array(xshape) # the data are assumed loaded into x # At this point, x2 is just a copy of the full input vector x # mask then is a mask of the same size as self.x_meta.state # by deafult, this mask is True. We will modify it to # take out bits we dont want later. x2 = x.reshape(xshape) mask = np.ones_like(x2).astype(bool) # We now need to recognise any gamma terms that might be in # the state vector. Candidates are 'gamma_%s'%(location) # e.g. gamma_time. # The number of dimensions of x can vary, depending on how many # loaction terms are used, so its a little tricky to # pull the information out. # We loop over the locations, indexed as i self.linear.datamask = np.ones(xshape[-1]).astype(bool) for i in xrange(len(location)): # and form the name of the candidate term in the variable 'this' this = 'gamma_%s' % location[i] ww = np.where(this == names)[0] # Then we see if it appears in the names of the state variables if len(ww): # form a mask so we dont apply the operator to gamma # terms. Note that *all* gamma terms are masked # even though we only actually use the last one we # come across. # we use [...,ww[0]] because the identifier for the # state is always in the final dimension. mask[..., ww[0]] = False # We store ww[0] as it will alllow us to access gamma # in subsequent calls in this same way. This is # self.linear.gamma_col self.linear.gamma_col = ww[0] # and is used as ... gammas = x2[..., self.linear.gamma_col] self.linear.datamask[self.linear.gamma_col] = False # We want to store an index into which of the # location vector terms we are dealing with here. # This is self.linear.gamma_loc = i # Once we apply the mask to get rid of the gamma columns # we need to keep track of the new shape for x2 # This will be x2shape xshaper[-1] -= 1 self.linear.x2shape = tuple(xshaper) self.linear.x2mask = mask.flatten() # so, apply the mask to take out the gamma columns x2 = x[self.linear.x2mask].reshape(self.linear.x2shape) # We next need access to the location information # for the selected dimension self.linear.gamma_loc. # If the data are gridded, we need to form the relevant information # Ungridded data we can access location directly as it is explicitly # stored. We store the location vector as 'locations' try: locshape = gammas.shape except: # If no gamma term is given, it is implicit that it is # the first dimension of location, but we have no data to mask self.linear.gamma_col = None self.linear.gamma_loc = 0 locshape = (0) gammas = x2[..., 0] * 0. + 1.0 #if self.x_meta.is_grid: # the locational variable of interest is self.linear.gamma_loc # the grid is dimensioned e.g. [t,r,c,p] # so we need e.g. locations which is of dimension # e.g. [t,r,c] # locations = self.x.location # access the ungridded location data lim = self.x_meta.qlocation[self.linear.gamma_loc] nloc = lim[1] - lim[0] + 1 locations = self.x.location[..., self.linear.gamma_loc] locshape = tuple(np.array(self.x.location.shape)[:-1]) for (i, lag) in enumerate(self.rt_model.lag): wt = self.rt_model.lag_weight[i] slocations = wt*(np.roll(locations,lag,\ axis=self.linear.gamma_loc) - locations).astype(float) slocations2 = (locations - np.roll(locations,-lag,\ axis=self.linear.gamma_loc)).astype(float) # If there is no variation, it is a waste of time to calculate # the derivative if i == 0 and np.abs(slocations).sum() + np.abs( slocations2).sum() == 0: # there is no variation here self.novar = True return 0 self.novar = False ww = np.where(slocations > 0) mod = int(self.rt_model.wraparound_mod ) / lim[-1] or slocations.shape[self.linear.gamma_loc] if self.rt_model.wraparound == 'reflexive': slocations[ww] = 0. #slocations[ww] = -np.fmod(mod - slocations[ww],mod) elif self.rt_model.wraparound == 'periodic': if self.rt_model.wraparound_mod == 0: slocations[ww] = slocations2[ww] else: slocations[ww] = -np.fmod(mod - slocations[ww], mod) else: # none slocations[ww] = 0. ww = np.where(slocations != 0) slocations[ww] = 1. / slocations[ww] if i == 0: # Form the D matrix. This is of the size required to # process the x2 data, and this is the most convenient # form to use it in m = np.zeros(slocations.shape * 2) ww = np.where(slocations != 0) ww2 = np.array(ww).copy() ww2[self.linear.gamma_loc] = ww2[self.linear.gamma_loc] - lag ww2 = tuple(ww2) m[ww * 2] = m[ww * 2] - slocations[ww] if False and self.rt_model.wraparound == 'reflexive': ww2 = np.abs(ww - lag) # this is looped as there might be multiple elements with the # same index for the reflecxive case if m.ndim > 2: raise Exception( "Not yet implemented: Can't use reflexive mode for multi-dimensions yet" ) for (c, j) in enumerate(ww2): m[j, ww[c]] = m[j, ww[c]] + slocations[ww[c]] else: ww = tuple(ww) ww2 = tuple(ww2) m[ww2 + ww] = m[ww2 + ww] + slocations[ww] # fix for edge conditions dd = m.copy() dd = dd.reshape(tuple([np.array(self.linear.x2shape[:-1]).prod()]) * 2) ddw = np.where(dd.diagonal() == 0)[0] for d in (ddw): ds = -dd[d, :].sum() dd[d, :] += dd[d, :] dd[d, d] = ds m = dd.reshape(m.shape) self.logger.info('Caching model matrices...') # if np.array(xshape).prod() == Cy1.size: self.linear.C1 = Cy1.reshape(xshape)[mask]\ .reshape( self.linear.x2shape ) elif xshape[1] == Cy1.size: self.linear.C1 = np.tile(Cy1, xshape[0])[mask.flatten()].reshape( self.linear.x2shape) else: raise Exception("Can't deal with full covar matrix in DModel yet") nn = slocations.flatten().size m = m.reshape(nn, nn) self.linear.D1 = np.matrix(m).T for i in xrange(1, self.rt_model.model_order): m = np.matrix(self.linear.D1).T * m self.linear.D1 = m self.logger.info('... Done') return True def J(self): ''' A slightly modified J as its efficient to precalculate things for this model J = 0.5 * x.T D1.T gamma^2 D1 x ''' x, Cx1, xshape, y, Cy1, yshape = self.getxy() self.Hsetup() if self.novar: return 0 xshape = self.x.state.shape try: if self.linear.gamma_col != None: gamma = x.reshape(self.x.state.shape)\ [...,self.linear.gamma_col].flatten() else: # no gamma variable, so use 1.0 gamma = x.reshape(self.x.state.shape)\ [...,0].flatten()*0.+1. except: self.logger.error( 'gamma_col not set ... recovering and assuming no variation here' ) self.linear.gamma_col = None gamma = x.reshape(self.x.state.shape)[..., 0].flatten() * 0. + 1. self.novar = True self.Hsetup() return 0 x2 = x[self.linear.x2mask].reshape(self.linear.x2shape) J = 0. i = 0 if self.rt_model.inverse_gamma: tgamma = 1. / gamma else: tgamma = gamma for count in xrange(self.x.state.shape[-1]): if count != self.linear.gamma_col: C1 = np.diag(self.linear.C1[...,i].\ reshape(self.linear.D1.shape[0])) x2a = x2[..., i].reshape(self.linear.D1.shape[0]) xg = np.matrix(x2a * tgamma).T dxg = self.linear.D1.T * xg J += np.array(0.5 * dxg.T * C1 * dxg)[0][0] i += 1 #print x[0],J return np.array(J).flatten()[0] def J_prime_prime(self): ''' Calculation of J'' We already have the differntial operator self.linear.D1 and self.gamma after we call self.J_prime() Here, J'' = D1.T gamma^2 D1 J' is of shape (nobs,nstates) which is the same as the shape of x D1 is of shape (nobs,nobs) which needs to be expanded to (nobs,nstates,nobs,nstates) ''' x, Cx1, xshape, y, Cy1, yshape = self.getxy() J, J_prime = self.J_prime() xshape = self.x.state.shape if not 'linear' in self.dict(): self.linear = ParamStorage() if not 'J_prime_prime' in self.linear.dict(): self.linear.J_prime_prime = \ np.zeros(xshape*2) else: self.linear.J_prime_prime[:] = 0 # we need an indexing system in case of multiple # nobs columns x2a = np.diag(np.ones(self.linear.x2shape[:-1]).flatten()) try: gamma = self.linear.gamma.flatten() except: if self.linear.gamma_col != None: gamma = x.reshape(self.x.state.shape)\ [...,self.linear.gamma_col].flatten() else: # no gamma variable, so use 1.0 gamma = x.reshape(self.x.state.shape)\ [...,0].flatten()*0.+1. gamma = self.linear.gamma.flatten() if self.rt_model.inverse_gamma: tgamma = 1. / gamma dg = 2. / (gamma * gamma * gamma) else: tgamma = gamma dg = 1.0 nshape = tuple([np.array(self.linear.x2shape[:-1]).prod()]) D1 = np.matrix(self.linear.D1.reshape(nshape * 2)) i = 0 # so, e.g. we have xshape as (50, 100, 2) # because one of those columns refers to the gamma value # self.linear.gamma_col will typically be 0 for count in xrange(xshape[-1]): if count != self.linear.gamma_col: # we only want to process the non gamma col C1 = np.diag(self.linear.C1[...,i].\ reshape(self.linear.D1.shape[0])) xg = np.matrix(x2a * tgamma * tgamma) dxg = D1 * xg deriv = np.array(dxg.T * C1 * D1) # so we have gamma^2 D^2 which is the Hessian # we just have to put it in the right place now # the technical issue is indexing an array of eg # (50, 100, 2, 50, 100, 2) # but it might have more or fewer dimensions nd = len(np.array(xshape)[:-1]) nshape = tuple(np.array(xshape)[:-1]) if nd == 1: self.linear.J_prime_prime[:, count, :, count] = deriv.reshape(nshape * 2) elif nd == 2: self.linear.J_prime_prime[:, :, count, :, :, count] = deriv.reshape(nshape * 2) elif nd == 3: self.linear.J_prime_prime[:, :, :, count, :, :, :, count] = deriv.reshape(nshape * 2) else: self.logger.error( "Can't calculate Hessian for %d dimensions ... I can only do up to 3" % nd) #ww = np.where(deriv) #ww2 = tuple([ww[0]]) + tuple([ww[0]*0+count]) \ # + tuple([ww[1]] )+ tuple([ww[0]*0+count]) #x1 = deriv.shape[0] #x2 = self.linear.J_prime_prime.shape[-1] #xx = self.linear.J_prime_prime.copy() #xx = xx.reshape(x1,x2,x1,x2) #xx[ww2] = deriv[ww] #self.linear.J_prime_prime = xx.reshape(self.linear.J_prime_prime.shape) i += 1 if self.linear.gamma_col != None: c = self.linear.gamma_col nd = len(np.array(xshape)[:-1]) nshape = tuple(np.array(xshape)[:-1]) deriv = np.diag(dg * 2 * J / (tgamma * tgamma)).reshape(nshape * 2) if nd == 1: self.linear.J_prime_prime[:, c, :, c] = deriv elif nd == 2: self.linear.J_prime_prime[:, :, c, :, :, c] = deriv elif nd == 3: self.linear.J_prime_prime[:, :, :, c, :, :, :, c] = deriv else: self.logger.error( "Can't calculate Hessian for %d dimensions ... I can only do up to 3" % nd) #dd = np.arange(nshape[0]) #x1 = dd.shape[0] #x2 = self.linear.J_prime_prime.shape[-1] #xx = self.linear.J_prime_prime.copy() #xx = xx.reshape(x1,x2,x1,x2) #xx[dd,dd*0+self.linear.gamma_col,\ # dd,dd*0+self.linear.gamma_col] = dg*2*J/(tgamma*tgamma) #self.linear.J_prime_prime = xx.reshape(self.linear.J_prime_prime.shape) n = np.array(xshape).prod() return J, J_prime, self.linear.J_prime_prime.reshape(n, n) def J_prime(self): ''' A slightly modified J as its efficient to precalculate things for this model J' = D.T gamma^2 D x ''' J = self.J() if self.novar: return 0, self.nowt x, Cx1, xshape, y, Cy1, yshape = self.getxy() x2 = x[self.linear.x2mask].reshape(self.linear.x2shape) if self.linear.gamma_col != None: gamma = x.reshape(self.x.state.shape)\ [...,self.linear.gamma_col].flatten() else: # no gamma variable, so use 1.0 gamma = x.reshape(self.x.state.shape)\ [...,0].flatten()*0.+1. #gamma = self.linear.gamma.flatten() if self.rt_model.inverse_gamma: tgamma = 1. / gamma dg = -1. / (gamma * gamma) else: tgamma = gamma dg = 1.0 g2 = tgamma * tgamma xshape = self.x.state.shape J_prime = np.zeros((x.shape[0] / xshape[-1], xshape[-1])) D2x_sum = 0. # loop over the non gamma variables i = 0 # store gamma in case needed elsewhere self.linear.gamma = gamma for count in xrange(self.x.state.shape[-1]): if count != self.linear.gamma_col: C1 = np.diag(self.linear.C1[...,i].\ reshape(self.linear.D1.shape[0])) x2a = x2[..., i].reshape(self.linear.D1.shape[0]) xg = np.matrix(x2a * tgamma).T dxg = self.linear.D1 * xg deriv = np.array(dxg.T * C1 * self.linear.D1)[0] J_prime[..., count] = deriv * tgamma #if self.linear.gamma_col != None: # J_prime_gamma = deriv * x2a # D2x_sum = D2x_sum + J_prime_gamma i += 1 if self.linear.gamma_col != None: J_prime[..., self.linear.gamma_col] = dg * 2 * J / tgamma return J, J_prime def Hsetup(self): ''' setup for the differential operator H(x) ''' if not self.beenHere and not 'H_prime' in self.linear.dict(): self.logger.info('Setting up storage for efficient model operator') if 'y' in self.dict(): self.linear.H = np.zeros(self.y.state.shape) self.linear.H_prime = np.zeros(self.y.state.shape * 2) else: self.linear.H = np.zeros(self.x.state.shape) self.linear.H_prime = np.zeros(self.x.state.shape * 2) self.setH() if self.novar: self.nowt = 0. * self.x.state #del self.linear.H_prime, self.linear.H self.beenHere = True
def scan_info(self,config,this,info,fullthis,fullinfo): """ Take a ConfigParser instance config and scan info into config.info. This is called recursively if needed. Parameters: config : the configuration object this : the current item to be parsed info : where this item is to go fullthis : the full name of this fullinfo : the full (top level) version of info. """ from eoldas_ConfFile import assoc_to_flat # find the keys in the top level # loop over thiss = np.array(this.split('.')) # just in case .. is used as separator ww = np.where(thiss != '') thiss = thiss[ww] nextone = '' for i in xrange(1,len(thiss)-1): nextone = nextone + thiss[i] + '.' if len(thiss) > 1: nextone = nextone + thiss[-1] # first, check if its already there if not hasattr(info,thiss[0]): info[thiss[0]] = ParamStorage() info[thiss[0]].helper = [] # load up the info if len(thiss) == 1: for option in config.options(fullthis): fulloption = option # option may have a '.' separated term as well options = np.array(option.split('.')) # tidy up any double dot stuff ww = np.where(options != '') options = options[ww] # need to iterate to make sure it is loaded # at the right level # of the hierachy this_info = info[this] # so now this_info is at the base for i in xrange(len(options)-1): if not hasattr(this_info,options[i]): this_info[options[i]] = ParamStorage() this_info[options[i]].helper = [] this_info = this_info[options[i]] option = options[-1] this_info[option] = array_type_convert(fullinfo,\ config.get(fullthis,fulloption)) if option[:6] == 'assoc_': noption = option[6:] this_info[noption] = assoc_to_flat(\ fullinfo.parameter.names,this_info[option],\ this_info[noption]) is_assoc = True else: is_assoc = False if not hasattr(this_info,'helper'): this_info.helper = [] ndot = len(fullthis.split('.')) pres = '' for i in xrange(1,ndot): pres += ' ' if type(this_info.helper) == str: this_info.helper += "\n%s%s.%-8s = %-8s" % \ (pres,fullthis,fulloption,str(this_info[option])) elif type(this_info.helper) == list: this_info.helper.append("%s%s.%-8s = %-8s" % \ (pres,fullthis,fulloption,\ str(this_info[option]))) if is_assoc: if type(this_info.helper) == str: this_info.helper += "\n%s%s.%-8s = %-8s" % \ (pres,fullthis,fulloption.replace\ ('assoc_',''),str(this_info[noption])) elif type(this_info.helper) == list: this_info.helper.append("%s%s.%-8s = %-8s" % \ (pres,fullthis,fulloption.replace\ ('assoc_',''),str(this_info[noption]))) else: self.scan_info(config,nextone,info[thiss[0]],fullthis,fullinfo) if thiss[-1][:6] == 'assoc_' and thiss[0] in fullinfo.dict(): # only do this operation when at the top level noption = thiss[-1][6:] option = thiss[-1] this_info = info fulloption = thiss[0] this_info = this_info[thiss[0]] for i in xrange(1,len(thiss)-1): this_info = this_info[thiss[i]] fulloption = '%s.%s' % (fulloption,thiss[i]) fulloption = '%s.%s' % (fulloption,noption) #this_info[noption] = assoc_to_flat(fullinfo.parameter.names\ # ,this_info[option],\ # this_info[noption]) if not 'names' in this_info.dict(): this_info.names = fullinfo.parameter.names if not option in this_info.dict(): this_info[option] = [0]*len(this_info.names) if not noption in this_info.dict(): this_info[noption] = [0]*len(this_info.names) this_info[noption] = assoc_to_flat(this_info.names\ ,this_info[option],\ this_info[noption]) ndot = len(fullthis.split('.')) pres = '' for i in xrange(1,ndot): pres += ' ' if type(this_info.helper) == str: this_info.helper += "\n%s%-8s = %-8s" % (pres,\ fulloption,str(this_info[noption])) elif type(this_info.helper) == list: this_info.helper.append("%s%-8s = %-8s" % (pres,\ fulloption,str(this_info[noption])))
class SpecialVariable(ParamStorage): ''' A class that can deal with the datatypes needed for eoldas It allows a variable to be set to various data types and interprets these into the data structure. The data structure imposed here is: self.data.this self.name.this to store some item called 'this'. Other information can be stored as well but part of the idea here is to have some imposed constraints on the data structure so that we can sensibly load up odfferent datasets. The idea is that data will be stored in self.data and associated metadata in self.name. If items are given the same name in both sub-structures, we can easily keep track of them. There is no actual requirement that this is adhered to, but it is certainy permitted and encouraged for the indended use of this class. Probably the most important thing then about this class is that is a SpecialVariable is assigned different data types, then it can do sensible things with them in the context of the EOLDAS (and wider applications). When an assignment takes place (either of ther form self.state = foo or self['state'] = foo then what is actually stored depends on the type and nature of foo. The main features as follows: If foo is a string: A guess is made that it is a filename, and an attempt is made to read the file. All directories in the list self.dirnames are searched for the filename, and any readable files found are considered candidates, Each of these is read in turn. A set of potential data formats, specified by the readers in readers (self.reader_functions) is considered, as if a sucessful interpretation takes place the data is returned and stiored in the derired variable. So, for example, if we have self.state = foo as above and foo is a valid, readable file in the list of directories specified, and it is interprable with one of the formats defined, then the main dataset is loaded into: self.data.state (alternatively known as self.data['state']). If foo is a ParamStorage, it should have the same structure as that here (i.e. self.data and self.name) and these structures are then loaded. If foo is a dictionary (type dict) It is first converted to a ParamStorage and then loaded as above. If foo is any other datatype, it is left pretty much as it, except that an attempt to convert to a np.array is made. Depending on the format, there might be data other than the main dataset (e.g. locational information) and these are loaded by the loaders into relevant oarts of self.data and self.name. For classes that use this class for EOLDAS, we will typically use: self.data.state : state variable data self.data.sd : uncertainty information as sd (or a similar fuller representation) self.data.control : control information (e.g. view angles) self.data.location : location information with associated descriptor data in the relevant parts of self.name. The idea for simple use of the data structure then is for all of these datasets represented as 2D datasets, where the number of rows in each of the self.data.state etc field will be the same, but the number of columns will tend to vary (e.g different numbers of state variables). The reason for considering such a 2D 'flat'(ish) representation is that it is easy to tabulate and understand. In fact the data will be of quite high dimension. E.g. is the data vary by time, x and y, then we would have 3 columns for self.data.location, with descriptors for the columns in self.name.location, and corresponding state data in self.data.state (with the number of state variables determining the number of columns in that table). As mentioned, at the pooint of this class, there is no strict requirement for any such structure ti the data loaded or used, but that is the plan for EOLDAS use, so worth documenting at this point. ''' def __init__(self,info=[],name=None,thisname=None,readers=[],log_terms={},\ datadir=["."],env=None,\ header=None,writers={}, simple=False,logger=None): ''' Class SpecialVariable initialisation. Sets up the class as a ParamStorage and calls self.init() See init() fort a fuller descripotion of the options. ''' ParamStorage.__init__(self,logger=logger) name = name or thisname if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name,thistime) self.thisname = name self.init(info=[],name=self.thisname,readers=readers,log_terms={},\ datadir=datadir,env=env,\ header=header,writers=writers,\ simple=False,logger=logger) def init(self,info=[],name=None,readers=[],log_terms={},\ datadir=None,env=None,\ header=None,writers={},\ simple=False,logger=None): ''' Initialise information in a SpecialVariable instance. Options: info : Information tthat can be passed through to reader methods (a list). thisname : a name to use to identify this instance in any logging. By default this is None. If thisname is set to True, then logging is to stdout. readers : A list of reader methods that are pre-pended to those already contained in the class. log_terms : A dictionary of log options. By default {'logfile':None,'logdir':'log','debug':True} If thisname is set, and logfile specified, then logs are logged to that file. If thisname is set to True, then logging is to stdout. datadir : A list of directories to search for data files to interpret if the SpecialVariable is set to a string. env : An environment variable that can be used to extend the datadir variable. header : A header string to use to identify pickle files. By default, this is set to "EOLDAS -- plewis -- UCL -- V0.1" simple : A flag to swicth off the 'complicated' interpretation methods, i.e. just set and return variables literally, do not try to interpret them. ''' self.set('simple',True) if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name,thistime) self.thisname = name # this is where we will put any data self.data = ParamStorage() self.name = ParamStorage() self.info = info self.datadir = datadir or ['.'] self.env = env init_read_write(self,header,readers,writers) # sort logging and log if thisname != None self.log_terms = {'logfile':None,'logdir':'log','debug':True} # override logging info for (key,value) in log_terms.iteritems(): self.log_terms[key] = value self.logger= sortlog(self,self.log_terms['logfile'],logger,name=self.thisname,\ logdir=self.log_terms['logdir'],\ debug=self.log_terms['debug']) self.simple = simple set = lambda self,this,value :ParamStorage.__setattr__(self,this,value) set.__name__ = 'set' set.__doc__ = """ A method to set the literal value of this, rather than attempt an interpretation (e.g. used when self.simple is True) """ get = lambda self,this :ParamStorage.__getattr__(self,this) get.__name__ = 'get' get.__doc__ = """ A method to get the literal value of this, rather than attempt an interpretation (e.g. used when self.simple is True) """ def __setitem__(self,this,value): ''' Variable setting method for style self['this']. Interpreted the same as via __setattr__. ''' # always set the item self.__setattr__(this,value) def __setattr__(self,this,value): ''' Variable setting method for style self.this Varies what it does depending on the type of value. The method interprets and sets the SpecialVariable value: 1. ParamStorage or SpecialVariable. The data are directly loaded. This is one of the most flexible formats for input. It expects fields 'data' and/or 'name', which are loaded into self. There will normally be a field data.this, where this is the variable name passed here. 2. A dictionary, same format as the ParamStorage. 3. A tuple, interpreted as (data,name) and loaded accordingly. 4. *string* as filename (various formats). An attempt to read the string as a file (of a set of formats) is made. If none pass then it it maintained as a string. 5. A numpy array (np.array) that is loaded into self.data.this. 6. Anything else. Loaded into self.data.this as a numpy array. ''' if self.simple: self.set(this,value) return t = type(value) try: if t == ParamStorage or t == SpecialVariable: # update the whole structure #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) self.data.update(value.data,combine=True) self.name.update(value.name,combine=True) elif t == dict: n_value = ParamStorage().from_dict(value) self.__setattr__(this,n_value) elif t == tuple or t == list: # assumed to be (data,name) or [data,name] #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) #ParamStorage.__setattr__(self['data'],this,value[0]) #ParamStorage.__setattr__(self['name'],this,value[1]) ParamStorage.__setattr__(self['data'],this,np.array(value)) elif t == str: # set the term #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) ParamStorage.__setattr__(self['data'],this,value) # interpret as a file read if possible self.process_data_string(this,info=self.info) elif t == np.ndarray: #self.__set_if_unset('data',ParamStorage()) #self.__set_if_unset('name',ParamStorage()) ParamStorage.__setattr__(self['data'],this,value) else: ParamStorage.__setattr__(self['data'],this,\ np.array(value)) except: if self.logger: self.logger.info("Failed to set SpecialVariable %s from %s %s"\ %(this,t.__name__,value)) return if self.logger: self.logger.info("Set variable %s from type %s"%(this,t.__name__)) def __getattr__(self,name): ''' Variable getting method for style self.this If the field 'data' exists in self.__dict__ and 'name' is in the dictionary, then the field self.data.this is returned. Otherwise, if the field 'name' is in self.__dict__, self.name is returned. Otherwise return None. ''' if 'data' in self.__dict__ and name in self.data.__dict__: return self.data.__dict__.__getitem__ ( name ) elif name in self.__dict__: return self.__dict__.__getitem__ ( name ) else: return None def __getitem__(self,name): ''' Variable getting method for style self['this']. Interpreted the same as via __getattr__. ''' # first look in data if 'data' in self.__dict__ and name in self.data.__dict__: return self.data.__dict__.__getitem__ ( name ) elif name in self.__dict__: return self.__dict__.__getitem__ ( name ) else: return None def process_data_string(self,name,info=[],fmt=None): ''' Attempt to load data from a string, assuming the string is a filename. The array self.datadir is searched for readable files with the string 'name' (also self.env), and a list of potential files considered for reading. Each readable file is passed to self.read, and if it is interpretable, it is loaded according to the read method. Note tha the format can be specified. If not, then all formats are attempted until a sucessful read is made. ''' from eoldas_Lib import get_filename orig = self.data[name] if self.logger: self.logger.debug('%s is a string ... see if its a readable file ...' \ % name) # find a list of potential files goodfiles, is_error = get_filename(orig,datadir=self.datadir,\ env=self.env,multiple=True) if is_error[0] and self.logger: self.logger.debug(str(is_error[1])) return if self.logger: self.logger.debug("*** looking at potential files %s"%str(goodfiles)) # loop over all files that it might be for goodfile in goodfiles: stuff,is_error = reader(self,goodfile,name,fmt=fmt,info=info) if not is_error[0] and self.logger: self.logger.info("Read file %s "%goodfile) return if self.logger: self.logger.debug(self.error_msg) return write = lambda self,filename,fmt : writer(self,filename,None,fmt=fmt) read = lambda self,filename,fmt : reader(self,filename,None,fmt=fmt,info=[])
def J_prime_prime(self): ''' Calculation of J'' We already have the differntial operator self.linear.D1 and self.gamma after we call self.J_prime() Here, J'' = D1.T gamma^2 D1 J' is of shape (nobs,nstates) which is the same as the shape of x D1 is of shape (nobs,nobs) which needs to be expanded to (nobs,nstates,nobs,nstates) ''' x,Cx1,xshape,y,Cy1,yshape = self.getxy() J,J_prime = self.J_prime() xshape = self.x.state.shape if not 'linear' in self.dict(): self.linear = ParamStorage() if not 'J_prime_prime' in self.linear.dict(): self.linear.J_prime_prime = \ np.zeros(xshape*2) else: self.linear.J_prime_prime[:] = 0 # we need an indexing system in case of multiple # nobs columns x2a = np.diag(np.ones(self.linear.x2shape[:-1]).flatten()) try: gamma = self.linear.gamma.flatten() except: if self.linear.gamma_col != None: gamma = x.reshape(self.x.state.shape)\ [...,self.linear.gamma_col].flatten() else: # no gamma variable, so use 1.0 gamma = x.reshape(self.x.state.shape)\ [...,0].flatten()*0.+1. gamma = self.linear.gamma.flatten() if self.rt_model.inverse_gamma: tgamma = 1./gamma dg = 2./(gamma*gamma*gamma) else: tgamma = gamma dg = 1.0 nshape = tuple([np.array(self.linear.x2shape[:-1]).prod()]) D1 = np.matrix(self.linear.D1.reshape(nshape*2)) i = 0 # so, e.g. we have xshape as (50, 100, 2) # because one of those columns refers to the gamma value # self.linear.gamma_col will typically be 0 for count in xrange(xshape[-1]): if count != self.linear.gamma_col: # we only want to process the non gamma col C1 = np.diag(self.linear.C1[...,i].\ reshape(self.linear.D1.shape[0])) xg = np.matrix(x2a*tgamma*tgamma) dxg = D1 * xg deriv = np.array(dxg.T * C1 * D1) # so we have gamma^2 D^2 which is the Hessian # we just have to put it in the right place now # the technical issue is indexing an array of eg # (50, 100, 2, 50, 100, 2) # but it might have more or fewer dimensions nd = len(np.array(xshape)[:-1]) nshape = tuple(np.array(xshape)[:-1]) if nd == 1: self.linear.J_prime_prime[:,count,:,count] = deriv.reshape(nshape*2) elif nd == 2: self.linear.J_prime_prime[:,:,count,:,:,count] = deriv.reshape(nshape*2) elif nd == 3: self.linear.J_prime_prime[:,:,:,count,:,:,:,count] = deriv.reshape(nshape*2) else: self.logger.error("Can't calculate Hessian for %d dimensions ... I can only do up to 3"%nd) #ww = np.where(deriv) #ww2 = tuple([ww[0]]) + tuple([ww[0]*0+count]) \ # + tuple([ww[1]] )+ tuple([ww[0]*0+count]) #x1 = deriv.shape[0] #x2 = self.linear.J_prime_prime.shape[-1] #xx = self.linear.J_prime_prime.copy() #xx = xx.reshape(x1,x2,x1,x2) #xx[ww2] = deriv[ww] #self.linear.J_prime_prime = xx.reshape(self.linear.J_prime_prime.shape) i += 1 if self.linear.gamma_col != None: c = self.linear.gamma_col nd = len(np.array(xshape)[:-1]) nshape = tuple(np.array(xshape)[:-1]) deriv = np.diag(dg*2*J/(tgamma*tgamma)).reshape(nshape*2) if nd == 1: self.linear.J_prime_prime[:,c,:,c] = deriv elif nd == 2: self.linear.J_prime_prime[:,:,c,:,:,c] = deriv elif nd == 3: self.linear.J_prime_prime[:,:,:,c,:,:,:,c] = deriv else: self.logger.error("Can't calculate Hessian for %d dimensions ... I can only do up to 3"%nd) #dd = np.arange(nshape[0]) #x1 = dd.shape[0] #x2 = self.linear.J_prime_prime.shape[-1] #xx = self.linear.J_prime_prime.copy() #xx = xx.reshape(x1,x2,x1,x2) #xx[dd,dd*0+self.linear.gamma_col,\ # dd,dd*0+self.linear.gamma_col] = dg*2*J/(tgamma*tgamma) #self.linear.J_prime_prime = xx.reshape(self.linear.J_prime_prime.shape) n = np.array(xshape).prod() return J,J_prime,self.linear.J_prime_prime.reshape(n,n)
def __init__(self,args,name=None,general=None,log=False,logger=None,outdir=".",getopdir=False,parse=True): """ Initialise parser class. This sets up the class defaults. Options: general=general: this over-rides and defaults with values set in parser general can be of the form: 1. class ParamStorage (i.e. the same form as self.general) 2. a command line list (where the first item in the list is ignored 3. a string containing a set of command line general See self.parse() for more details on general 2 and 3 as these simply make a call to tha method. log=True If log is set to True, then logging starts when this class is instanced. Note that the logfile and logdir might change if subsequent calls to Parser.parse() are made """ if type(args) == str: args = args.split() self.dolog = log self.log = log self.name = args[0] self.args = args[1:] self.fullargs = args self.store_fullargs = args if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name,thistime) self.thisname = name # find the following flags: # --conf | -c : conf # --datadir : datadir datadir = [".","~/.eoldas",sys.path[0]+'/../bin',sys.path[0]+'/../confs',\ sys.path[0]+'/../system_confs',sys.path[0]+'/../eoldaslib'] conf = "default.conf" logfile = None logdir = "." self.top = ParamStorage () self.top.general = ParamStorage () self.top.general.__helper__ = ParamStorage () self.top.general.__default__ = ParamStorage () self.top.general.__extras__ = ParamStorage () self.top.general.conf = [] for i in xrange(len(self.args)): theseargs = self.args[i].split('=') if theseargs[0] == "--conf": conf = theseargs[1] self.top.general.conf.append(conf) elif theseargs[0][0:2] == "-c": if len(theseargs) > 2: conf = theseargs[0][2:] else: conf = self.args[i+1] self.top.general.conf.append(conf) elif theseargs[0] == "--datadir": datadir1 = theseargs[1].replace('[','').\ replace(']','').split() [datadir1.append(datadir[i]) for i in \ xrange(len(datadir))] datadir = datadir1 elif theseargs[0] == "--logfile": logfile= theseargs[1] elif theseargs[0] == "--logdir": logdir = theseargs[1] elif theseargs[0] == "--outdir": outdir = theseargs[1] if self.top.general.conf == []: self.top.general.conf = conf if logfile == None: logfile = conf.replace('conf','log') self.top.general.here = os.getcwd() self.top.general.datadir = datadir self.top.general.logfile = logfile self.top.general.logdir = logdir self.top.general.outdir = outdir # add here to datadir # in addition to '.' to take account of the change of directory self.top.general.datadir = self.__add_here_to_datadir(\ self.top.general.here,self.top.general.datadir) self.top.general.datadir = self.__add_here_to_datadir(\ self.top.general.outdir,self.top.general.datadir) # cd to where the output is to be self.__cd(self.top.general.outdir) # set up the default command line options self.default_loader() # update with anything passed here if general and type(general) == ParamStorage: self.top.update(\ self.__unload(general),combine=True) # read the conf files to get any cmd line options self.logger = sortlog(self,self.top.general.logfile,logger,name=self.thisname,\ logdir=self.top.general.logdir) self.config = ConfFile(self.top.general.conf,name=self.thisname+'.config',\ loaders=self.loaders,datadir=self.top.\ general.datadir,logger=self.logger,logdir=self.top.general.logdir,\ logfile=self.top.general.logfile) if len(self.config.configs) == 0: this = "Warning: Nothing doing ... you haven't set any configuration",\ self.config.storelog try: self.logger(this) except: print "Called with args:" print "eoldas",self.args pass raise Exception(this) # now loaders contains all of the defaults set here # plus those from the config (config opver-rides defaults here) self.loaders = self.config.loaders # now convert loaders into parser information self.parseLoader(self.loaders) self.parse(self.fullargs) if general and type(general) == ParamStorage: self.top.update(self.__unload(general),combine=True) if general and type(general) == str: self.parse(general.split()) if general and type(general) == list: self.parse(general) # now update the info in self.config for i in self.config.infos: i.update(self.top,combine=True) # so now all terms in self.config.infos # contain information from the config file, updated by # the cmd line i.logger = self.logger i.log() # move the information up a level self.infos = self.config.infos self.configs = self.config.configs self.config_log = self.config.storelog #if getopdir: # self.sortnames() self.config.loglist(self.top) #del(self.config.infos) #del(self.config.configs) #del(self.config) self.__cd(self.top.general.here)
class DModel_Operator ( Operator ): def preload_prepare(self): ''' Here , we use preload_prepare to make sure the x & any y data are gridded for this operator. This greatly simplifies the application of the differential operator. This method is called before any data are loaded, so ensures they are loaded as a grid. ''' from eoldas_Lib import sortopt for i in np.array(self.options.datatypes).flatten(): # mimic setting the apply_grid flag in options if self.dict().has_key('%s_state'%i): self['%s_state'%i].options[i].apply_grid = True self.novar = sortopt(self,'novar',False) self.gamma_col = sortopt(self,'gamma_col',None) self.beenHere =False def postload_prepare(self): ''' This is called on initialisation, after data have been read in Here, we load parameters specifically associated with the model H(x). In the case of this differential operator, there are: model_order : order of the differential operator (integer) wraparound : edge conditions Can be: periodic none reflexive lag : The (time/space) lag at which the finite difference is calculated in the differential operator here. If this is 1, then we take the difference between each sample point and its neighbour. This is what we normally use. The main purpose of this mechanism is to allow differences at multiple lags to be calculated (fuller autocorrelation function constraints as in kriging) Multiple lags can be specified (which you could use to perform kriging), in which case lag weight should also be specified. lag_weight : The weight associated with each lag. This will generally be decreasing with increasing lag for a 'usual' autocorrelation function. There is no point specifying this if only a single lag is specified as the function is normalised. If the conditions are specified as periodic the period of the function can also be specified, e.g. for time varying data, you could specify 365 for the periodic period. These are specified in the configuration file as operator.modelt.rt_model.model_order operator.modelt.rt_model.wraparound operator.modelt.rt_model.lag operator.modelt.rt_model.lag_weight The default values (set here) are 1, 'none', 1 and 1 respectively. To specify the period for `periodic` specify e.g.: [operator.modelt.rt_model] wraparound=periodic,365 The default period is set to 0, which implies that it is periodic on whatever the data extent is. Or for multiple lags: [operator.modelt.rt_model] lag=1,2,3,4,5 lag_weight=1,0.7,0.5,0.35,0.2 NB this lag mechanism has not yet been fully tested and should be used with caution. It is intended more as a placeholder for future developments. Finally, we can also decide to work with inverse gamma (i.e. an uncertainty-based measure) This is achieved by setting the flag operator.modelt.rt_model.inverse_gamma=True This flag should be set if you intend to estimate gamma in the Data Assimilation. Again, the is experimental and should be used with caution. ''' from eoldas_Lib import sortopt self.rt_model = sortopt(self.options,'rt_model',ParamStorage()) self.rt_model.lag = sortopt(self.rt_model,'lag',1) self.rt_model.inverse_gamma= \ sortopt(self.rt_model,'inverse_gamma',False) self.rt_model.model_order = \ sortopt(self.rt_model,'model_order',1) self.rt_model.wraparound = \ sortopt(self.rt_model,'wraparound','none') self.rt_model.wraparound_mod = 0 if np.array(self.rt_model.wraparound).size == 2 and \ np.array(self.rt_model.wraparound)[0] == 'periodic': self.rt_model.wraparound_mod = \ np.array(self.rt_model.wraparound)[1] self.rt_model.wraparound = \ np.array(self.rt_model.wraparound)[0] self.rt_model.lag = \ sortopt(self.rt_model,'lag',[1]) self.rt_model.lag = np.array(self.rt_model.lag).flatten() self.rt_model.lag_weight = \ sortopt(self.rt_model,'lag_weight',[1.]*\ self.rt_model.lag.size) self.rt_model.lag_weight = np.array(\ self.rt_model.lag_weight).flatten().astype(float) if self.rt_model.lag_weight.sum() == 0: self.rt_model.lag_weight[:] = np.ones(self.rt_model.lag_weight.size) self.rt_model.lag_weight = self.rt_model.lag_weight\ / self.rt_model.lag_weight.sum() def setH(self): ''' This method sets up the matrices required for the model. This operator is written so that it can apply smoothing in different dimensions. This is controlled by the model state vector. The names of the states are stored in self.x_meta.location and the associated location information in self.x_meta.location. So, we look through these looking for matches, e.g. 'row' in location and 'gamma_row' in names would mean that we want to apply the model over the row dimension. There should be only one gamma term in the state vectors for this operator. If you give more than one, only the last one will be used. NOT YET IMPLEMENTED: The model can be applied to multiple dimensions by specifying e.g. gamma_time_row. If you want separate gammas for e.g. time and row, then you should use separate operators. If gamma_roccol is specified, then the model applies to Euclidean distance in row/col space. Formally, the problem can be stated most simply as a matrix D so that gamma D x is the rate of change of x with respect to the target location variable (time, row, col etc). The job of this method then is to form and store D. The main complication to this is we have to split up x into those terms that we will apply D to (x2 here) and separately pull out the gamma terms. The resultant matrix D then needs to be re-formed so as to apply to the whole vector x, rather than just x2. We do this with masks. On input, x is a 1D vector. ''' x,Cx1,xshape,y,Cy1,yshape = self.getxy() # the names of the variables in x names = np.array(self.x_meta.state) # the names of the location information (e.g. time, row, col) location = self.x_meta.location self.logger.info('Setting up model matrices...') if self.x_meta.is_grid: try: self.x.location = self.x_state.ungridder.location self.x.qlocation = self.x_state.ungridder.qlocation except: raise Exception("You are trying to ungrid a dataset that wasn't gridded using State.regrid()" +\ " so the ungridder information is not available. Either load the data using State.grid " +\ " or set it up some other way or avoid calling this method with this type of data") # first, reshape x from its 1-D form to # have the same shape as self.x.state. We store # this shape as xshape. xshape = self.x.state.shape # we can't change the tuple directly, so need a # vector representation that we can manipulate # This is xshaper xshaper = np.array(xshape) # the data are assumed loaded into x # At this point, x2 is just a copy of the full input vector x # mask then is a mask of the same size as self.x_meta.state # by deafult, this mask is True. We will modify it to # take out bits we dont want later. x2 = x.reshape(xshape) mask = np.ones_like(x2).astype(bool) # We now need to recognise any gamma terms that might be in # the state vector. Candidates are 'gamma_%s'%(location) # e.g. gamma_time. # The number of dimensions of x can vary, depending on how many # loaction terms are used, so its a little tricky to # pull the information out. # We loop over the locations, indexed as i self.linear.datamask = np.ones(xshape[-1]).astype(bool) for i in xrange(len(location)): # and form the name of the candidate term in the variable 'this' this = 'gamma_%s'%location[i] ww = np.where(this == names)[0] # Then we see if it appears in the names of the state variables if len(ww): # form a mask so we dont apply the operator to gamma # terms. Note that *all* gamma terms are masked # even though we only actually use the last one we # come across. # we use [...,ww[0]] because the identifier for the # state is always in the final dimension. mask[...,ww[0]] = False # We store ww[0] as it will alllow us to access gamma # in subsequent calls in this same way. This is # self.linear.gamma_col self.linear.gamma_col = ww[0] # and is used as ... gammas = x2[...,self.linear.gamma_col] self.linear.datamask[self.linear.gamma_col] = False # We want to store an index into which of the # location vector terms we are dealing with here. # This is self.linear.gamma_loc = i # Once we apply the mask to get rid of the gamma columns # we need to keep track of the new shape for x2 # This will be x2shape xshaper[-1] -= 1 self.linear.x2shape = tuple(xshaper) self.linear.x2mask = mask.flatten() # so, apply the mask to take out the gamma columns x2 = x[self.linear.x2mask].reshape(self.linear.x2shape) # We next need access to the location information # for the selected dimension self.linear.gamma_loc. # If the data are gridded, we need to form the relevant information # Ungridded data we can access location directly as it is explicitly # stored. We store the location vector as 'locations' try: locshape = gammas.shape except: # If no gamma term is given, it is implicit that it is # the first dimension of location, but we have no data to mask self.linear.gamma_col = None self.linear.gamma_loc = 0 locshape = (0) gammas = x2[...,0]*0.+1.0 #if self.x_meta.is_grid: # the locational variable of interest is self.linear.gamma_loc # the grid is dimensioned e.g. [t,r,c,p] # so we need e.g. locations which is of dimension # e.g. [t,r,c] # locations = self.x.location # access the ungridded location data lim = self.x_meta.qlocation[self.linear.gamma_loc] nloc = lim[1] - lim[0] + 1 locations = self.x.location[...,self.linear.gamma_loc] locshape = tuple(np.array(self.x.location.shape)[:-1]) for (i,lag) in enumerate(self.rt_model.lag): wt = self.rt_model.lag_weight[i] slocations = wt*(np.roll(locations,lag,\ axis=self.linear.gamma_loc) - locations).astype(float) slocations2 = (locations - np.roll(locations,-lag,\ axis=self.linear.gamma_loc)).astype(float) # If there is no variation, it is a waste of time to calculate # the derivative if i == 0 and np.abs(slocations).sum() + np.abs(slocations2).sum() == 0: # there is no variation here self.novar = True return 0 self.novar = False ww = np.where(slocations > 0) mod = int(self.rt_model.wraparound_mod)/lim[-1] or slocations.shape[self.linear.gamma_loc] if self.rt_model.wraparound == 'reflexive': slocations[ww] = 0. #slocations[ww] = -np.fmod(mod - slocations[ww],mod) elif self.rt_model.wraparound == 'periodic': if self.rt_model.wraparound_mod == 0: slocations[ww] = slocations2[ww] else: slocations[ww] = -np.fmod( mod - slocations[ww],mod) else: # none slocations[ww] = 0. ww = np.where(slocations != 0) slocations[ww] = 1./slocations[ww] if i == 0: # Form the D matrix. This is of the size required to # process the x2 data, and this is the most convenient # form to use it in m = np.zeros(slocations.shape * 2) ww = np.where(slocations != 0) ww2 = np.array(ww).copy() ww2[self.linear.gamma_loc] = ww2[self.linear.gamma_loc] - lag ww2 = tuple(ww2) m[ww*2] = m[ww*2] - slocations[ww] if False and self.rt_model.wraparound == 'reflexive': ww2 = np.abs(ww-lag) # this is looped as there might be multiple elements with the # same index for the reflecxive case if m.ndim > 2: raise Exception("Not yet implemented: Can't use reflexive mode for multi-dimensions yet") for (c,j) in enumerate(ww2): m[j,ww[c]] = m[j,ww[c]] + slocations[ww[c]] else: ww = tuple(ww) ww2 = tuple(ww2) m[ww2+ww] = m[ww2+ww] + slocations[ww] # fix for edge conditions dd = m.copy() dd = dd.reshape(tuple([np.array(self.linear.x2shape[:-1]).prod()])*2) ddw = np.where(dd.diagonal() == 0)[0] for d in (ddw): ds = -dd[d,:].sum() dd[d,:] += dd[d,:] dd[d,d] = ds m = dd.reshape(m.shape) self.logger.info('Caching model matrices...') # if np.array(xshape).prod() == Cy1.size: self.linear.C1 = Cy1.reshape(xshape)[mask]\ .reshape( self.linear.x2shape ) elif xshape[1] == Cy1.size: self.linear.C1 = np.tile(Cy1,xshape[0])[mask.flatten()].reshape( self.linear.x2shape ) else: raise Exception("Can't deal with full covar matrix in DModel yet") nn = slocations.flatten().size m = m.reshape(nn,nn) self.linear.D1 = np.matrix(m).T for i in xrange(1,self.rt_model.model_order): m = np.matrix(self.linear.D1).T * m self.linear.D1 = m self.logger.info('... Done') return True def J(self): ''' A slightly modified J as its efficient to precalculate things for this model J = 0.5 * x.T D1.T gamma^2 D1 x ''' x,Cx1,xshape,y,Cy1,yshape = self.getxy() self.Hsetup() if self.novar: return 0 xshape = self.x.state.shape try: if self.linear.gamma_col != None: gamma = x.reshape(self.x.state.shape)\ [...,self.linear.gamma_col].flatten() else: # no gamma variable, so use 1.0 gamma = x.reshape(self.x.state.shape)\ [...,0].flatten()*0.+1. except: self.logger.error('gamma_col not set ... recovering and assuming no variation here') self.linear.gamma_col = None gamma = x.reshape(self.x.state.shape)[...,0].flatten()*0.+1. self.novar = True self.Hsetup() return 0 x2 = x[self.linear.x2mask].reshape(self.linear.x2shape) J = 0. i = 0 if self.rt_model.inverse_gamma: tgamma = 1./gamma else: tgamma = gamma for count in xrange(self.x.state.shape[-1]): if count != self.linear.gamma_col: C1 = np.diag(self.linear.C1[...,i].\ reshape(self.linear.D1.shape[0])) x2a = x2[...,i].reshape(self.linear.D1.shape[0]) xg = np.matrix(x2a*tgamma).T dxg = self.linear.D1.T * xg J += np.array(0.5 * dxg.T * C1 * dxg)[0][0] i += 1 #print x[0],J return np.array(J).flatten()[0] def J_prime_prime(self): ''' Calculation of J'' We already have the differntial operator self.linear.D1 and self.gamma after we call self.J_prime() Here, J'' = D1.T gamma^2 D1 J' is of shape (nobs,nstates) which is the same as the shape of x D1 is of shape (nobs,nobs) which needs to be expanded to (nobs,nstates,nobs,nstates) ''' x,Cx1,xshape,y,Cy1,yshape = self.getxy() J,J_prime = self.J_prime() xshape = self.x.state.shape if not 'linear' in self.dict(): self.linear = ParamStorage() if not 'J_prime_prime' in self.linear.dict(): self.linear.J_prime_prime = \ np.zeros(xshape*2) else: self.linear.J_prime_prime[:] = 0 # we need an indexing system in case of multiple # nobs columns x2a = np.diag(np.ones(self.linear.x2shape[:-1]).flatten()) try: gamma = self.linear.gamma.flatten() except: if self.linear.gamma_col != None: gamma = x.reshape(self.x.state.shape)\ [...,self.linear.gamma_col].flatten() else: # no gamma variable, so use 1.0 gamma = x.reshape(self.x.state.shape)\ [...,0].flatten()*0.+1. gamma = self.linear.gamma.flatten() if self.rt_model.inverse_gamma: tgamma = 1./gamma dg = 2./(gamma*gamma*gamma) else: tgamma = gamma dg = 1.0 nshape = tuple([np.array(self.linear.x2shape[:-1]).prod()]) D1 = np.matrix(self.linear.D1.reshape(nshape*2)) i = 0 # so, e.g. we have xshape as (50, 100, 2) # because one of those columns refers to the gamma value # self.linear.gamma_col will typically be 0 for count in xrange(xshape[-1]): if count != self.linear.gamma_col: # we only want to process the non gamma col C1 = np.diag(self.linear.C1[...,i].\ reshape(self.linear.D1.shape[0])) xg = np.matrix(x2a*tgamma*tgamma) dxg = D1 * xg deriv = np.array(dxg.T * C1 * D1) # so we have gamma^2 D^2 which is the Hessian # we just have to put it in the right place now # the technical issue is indexing an array of eg # (50, 100, 2, 50, 100, 2) # but it might have more or fewer dimensions nd = len(np.array(xshape)[:-1]) nshape = tuple(np.array(xshape)[:-1]) if nd == 1: self.linear.J_prime_prime[:,count,:,count] = deriv.reshape(nshape*2) elif nd == 2: self.linear.J_prime_prime[:,:,count,:,:,count] = deriv.reshape(nshape*2) elif nd == 3: self.linear.J_prime_prime[:,:,:,count,:,:,:,count] = deriv.reshape(nshape*2) else: self.logger.error("Can't calculate Hessian for %d dimensions ... I can only do up to 3"%nd) #ww = np.where(deriv) #ww2 = tuple([ww[0]]) + tuple([ww[0]*0+count]) \ # + tuple([ww[1]] )+ tuple([ww[0]*0+count]) #x1 = deriv.shape[0] #x2 = self.linear.J_prime_prime.shape[-1] #xx = self.linear.J_prime_prime.copy() #xx = xx.reshape(x1,x2,x1,x2) #xx[ww2] = deriv[ww] #self.linear.J_prime_prime = xx.reshape(self.linear.J_prime_prime.shape) i += 1 if self.linear.gamma_col != None: c = self.linear.gamma_col nd = len(np.array(xshape)[:-1]) nshape = tuple(np.array(xshape)[:-1]) deriv = np.diag(dg*2*J/(tgamma*tgamma)).reshape(nshape*2) if nd == 1: self.linear.J_prime_prime[:,c,:,c] = deriv elif nd == 2: self.linear.J_prime_prime[:,:,c,:,:,c] = deriv elif nd == 3: self.linear.J_prime_prime[:,:,:,c,:,:,:,c] = deriv else: self.logger.error("Can't calculate Hessian for %d dimensions ... I can only do up to 3"%nd) #dd = np.arange(nshape[0]) #x1 = dd.shape[0] #x2 = self.linear.J_prime_prime.shape[-1] #xx = self.linear.J_prime_prime.copy() #xx = xx.reshape(x1,x2,x1,x2) #xx[dd,dd*0+self.linear.gamma_col,\ # dd,dd*0+self.linear.gamma_col] = dg*2*J/(tgamma*tgamma) #self.linear.J_prime_prime = xx.reshape(self.linear.J_prime_prime.shape) n = np.array(xshape).prod() return J,J_prime,self.linear.J_prime_prime.reshape(n,n) def J_prime(self): ''' A slightly modified J as its efficient to precalculate things for this model J' = D.T gamma^2 D x ''' J = self.J() if self.novar: return 0,self.nowt x,Cx1,xshape,y,Cy1,yshape = self.getxy() x2 = x[self.linear.x2mask].reshape(self.linear.x2shape) if self.linear.gamma_col != None: gamma = x.reshape(self.x.state.shape)\ [...,self.linear.gamma_col].flatten() else: # no gamma variable, so use 1.0 gamma = x.reshape(self.x.state.shape)\ [...,0].flatten()*0.+1. #gamma = self.linear.gamma.flatten() if self.rt_model.inverse_gamma: tgamma = 1./gamma dg = -1./(gamma*gamma) else: tgamma = gamma dg = 1.0 g2 = tgamma * tgamma xshape = self.x.state.shape J_prime = np.zeros((x.shape[0]/xshape[-1],xshape[-1])) D2x_sum = 0. # loop over the non gamma variables i = 0 # store gamma in case needed elsewhere self.linear.gamma = gamma for count in xrange(self.x.state.shape[-1]): if count != self.linear.gamma_col: C1 = np.diag(self.linear.C1[...,i].\ reshape(self.linear.D1.shape[0])) x2a = x2[...,i].reshape(self.linear.D1.shape[0]) xg = np.matrix(x2a*tgamma).T dxg = self.linear.D1 * xg deriv = np.array(dxg.T * C1 * self.linear.D1)[0] J_prime[...,count] = deriv * tgamma #if self.linear.gamma_col != None: # J_prime_gamma = deriv * x2a # D2x_sum = D2x_sum + J_prime_gamma i += 1 if self.linear.gamma_col != None: J_prime[...,self.linear.gamma_col] = dg*2*J/tgamma return J,J_prime def Hsetup(self): ''' setup for the differential operator H(x) ''' if not self.beenHere and not 'H_prime' in self.linear.dict(): self.logger.info('Setting up storage for efficient model operator') if 'y' in self.dict(): self.linear.H = np.zeros(self.y.state.shape) self.linear.H_prime = np.zeros(self.y.state.shape*2) else: self.linear.H = np.zeros(self.x.state.shape) self.linear.H_prime = np.zeros(self.x.state.shape*2) self.setH() if self.novar: self.nowt = 0. * self.x.state #del self.linear.H_prime, self.linear.H self.beenHere = True
def __min(self,a,b): ''' Min utility for 2 numbers, ignoring None ''' if a == None: out = b elif b == None: out = a else: out = np.min([a,b]) if out == None: return 0 else: return out # the next critical thing is some observations obs = load_brdf_file (brf,self.config,bandpass_names={}) if obs == False: return False self.config.operator.obs.update(obs,combine=True) # sets up an initial version of x_init # which is in the observation 'space' (ie one per obs) for n_par in xrange ( self.config.params.n_params ): #self.default_vals[n_par] = prior_mean[n_par] if np.all( self.obs.x_init[ :, n_par] == 0 ): # No self.obs.x_init [ :, n_par ] = self.default_vals [ n_par ] # try brfinit_files # which can overwrite x_init try: if self.options.preload != []: brfinit_files = self.options.preload self.brfinit_files['override'] = brfinit_files except: if self.options.preload != []: self.brfinit_files = ParamStorage () self.brfinit_files['override'] = self.options.preload # this is a hack to get the same structure self.brfinit_files = self.brfinit_files.dict() thisdoys = None if self.brfinit_files is not None: # this is not consistent with having multiple files # and is a bit of a mess for key in self.brfinit_files.keys(): if type(self.brfinit_files[key]) == type([]): initfile = self.brfinit_files[key][0] else: initfile = self.brfinit_files[key] #(acovar, abandwidth, abands, anpt, anbands_max, alocation, \ # awhichfile, anbands, adoys, aqa, atheta_v, atheta_i,aphi_v, \ # aphi_i, aisobs, aobs, aobscovar, aparams_x) = \ # load_brdf_file(initfile) (thisdoys,thisparams) = self.read_parameters(initfile,confdir=confdir) # if fail, thisdoys is None #self.obs.x_init[:,:] = aparams_x[:,:] if thisdoys == None: self.brfinit_files = None # For convenience, we can invert the observation covariance matrices self.obs.obsinvcovar = [] self.obs.real_obsinvcovar = [] for sample_no in xrange( self.obs.npt ): temp_mtx = np.matrix( self.obs.obscovar[ sample_no ] ).I if self.config.params.scale_cost: self.logger.info ("Scaling obs by %f" % \ float(self.obs.npt*self.obs.nbands[0] ) ) self.obs.obsinvcovar.append ( \ temp_mtx/float((self.obs.npt*self.obs.nbands[sample_no] ))) else: self.obs.obsinvcovar.append( temp_mtx ) self.obs.real_obsinvcovar.append (temp_mtx) # if there is anything non zero in x_init, set params_x to that if self.obs.x_init.sum() > 0: self.params_x = self.obs.x_init.copy() else: self.params_x = np.zeros ((self.obs.npt, \ self.config.params.n_params)) # determine which params to fix, based primarily on solve_for flags fix_params = define_fixparams(self.parameters, \ solve_for=self.solve_for,prior_sd=self.prior_sd,model_unc_cfg=self.model_unc_cfg) self.config.params.n_model_params = np.sum(fix_params==3) + np.sum(fix_params==4) # set up the grid based on the span of unique doys self.unique_doys, self.quantised_doys, self.obs_shift = quantise_time ( self.obs.doys, \ self.time_quant ,grid=grid) self.grid_n_obs = self.unique_doys.shape[0] self.fix_params = np.tile(fix_params, self.grid_n_obs).reshape((self.grid_n_obs,self.config.params.n_params)) self.logger.info ("%d days, %d quantised days" % ( len(self.unique_doys), \ len(self.quantised_doys) ) ) self.grid_n_params = fix_params.shape[0] # set up a grid model representation from self.params_x # we will use then when loading # self.params_x is a full representation in obs space # so we expand it to the model grid space self.store_params = self.get_x(self.params_x,self.fix_params*0.) # but this may contain zeros if a parameter has not been defined so should be set to the default value # or maybe interpolations is better udoys = np.unique(self.obs.doys) try: where_udoys = np.in1d(self.unique_doys,udoys) except: where_udoys = np.zeros_like(self.unique_doys).astype(np.bool) for i in udoys: w = np.where(self.unique_doys == i) where_udoys[w] = True for i in xrange(self.grid_n_params): self.store_params[:,i] = np.interp(self.unique_doys,self.unique_doys[where_udoys],self.store_params[where_udoys,i]) # override this with data from brfinit_files if self.brfinit_files is not None: # zeroth ... # pull out elements of thisdoys that appear in self.unique_doys # first interpolate thisparams onto the grid store_params = self.store_params*0. new_thisdoys = np.zeros( self.store_params.shape[0]).astype(np.int) # loop over thisdoys and load where appropriate for (i,j) in enumerate(thisdoys): ww = np.where(j == self.unique_doys) store_params[ww,:] = thisparams[i,:] new_thisdoys[ww] = j thisdoys = new_thisdoys udoys = np.unique(thisdoys) try: where_udoys = np.in1d(thisdoys,udoys) except: where_udoys = np.zeros_like(thisdoys).astype(np.bool) for i in udoys: w = np.where(where_udoys == i) where_udoys[w] = True for i in xrange(self.grid_n_params): self.store_params[:,i] = np.interp(self.unique_doys,self.unique_doys[where_udoys],store_params[where_udoys,i]) # deal with model uncert self.model_unc = np.ones((self.fix_params.shape[1])) for ( i, k ) in enumerate ( self.parameters ): if self.model_unc_cfg [ k ] > 0: self.model_unc[i] = self.model_unc[i] * self.model_unc_cfg [ k ] self.prior_m = np.array([self.prior_mean[k] for k in self.parameters ]) self.prior_std = np.array([self.prior_sd[k] for k in self.parameters ]) return #( prior_mean, prior_sd, model_unc, abs_tol, scale_cost)
def scan_info(self, config, this, info, fullthis, fullinfo): """ Take a ConfigParser instance config and scan info into config.info. This is called recursively if needed. Parameters: config : the configuration object this : the current item to be parsed info : where this item is to go fullthis : the full name of this fullinfo : the full (top level) version of info. """ from eoldas_ConfFile import assoc_to_flat # find the keys in the top level # loop over thiss = np.array(this.split('.')) # just in case .. is used as separator ww = np.where(thiss != '') thiss = thiss[ww] nextone = '' for i in xrange(1, len(thiss) - 1): nextone = nextone + thiss[i] + '.' if len(thiss) > 1: nextone = nextone + thiss[-1] # first, check if its already there if not hasattr(info, thiss[0]): info[thiss[0]] = ParamStorage() info[thiss[0]].helper = [] # load up the info if len(thiss) == 1: for option in config.options(fullthis): fulloption = option # option may have a '.' separated term as well options = np.array(option.split('.')) # tidy up any double dot stuff ww = np.where(options != '') options = options[ww] # need to iterate to make sure it is loaded # at the right level # of the hierachy this_info = info[this] # so now this_info is at the base for i in xrange(len(options) - 1): if not hasattr(this_info, options[i]): this_info[options[i]] = ParamStorage() this_info[options[i]].helper = [] this_info = this_info[options[i]] option = options[-1] this_info[option] = array_type_convert(fullinfo,\ config.get(fullthis,fulloption)) if option[:6] == 'assoc_': noption = option[6:] this_info[noption] = assoc_to_flat(\ fullinfo.parameter.names,this_info[option],\ this_info[noption]) is_assoc = True else: is_assoc = False if not hasattr(this_info, 'helper'): this_info.helper = [] ndot = len(fullthis.split('.')) pres = '' for i in xrange(1, ndot): pres += ' ' if type(this_info.helper) == str: this_info.helper += "\n%s%s.%-8s = %-8s" % \ (pres,fullthis,fulloption,str(this_info[option])) elif type(this_info.helper) == list: this_info.helper.append("%s%s.%-8s = %-8s" % \ (pres,fullthis,fulloption,\ str(this_info[option]))) if is_assoc: if type(this_info.helper) == str: this_info.helper += "\n%s%s.%-8s = %-8s" % \ (pres,fullthis,fulloption.replace\ ('assoc_',''),str(this_info[noption])) elif type(this_info.helper) == list: this_info.helper.append("%s%s.%-8s = %-8s" % \ (pres,fullthis,fulloption.replace\ ('assoc_',''),str(this_info[noption]))) else: self.scan_info(config, nextone, info[thiss[0]], fullthis, fullinfo) if thiss[-1][:6] == 'assoc_' and thiss[0] in fullinfo.dict(): # only do this operation when at the top level noption = thiss[-1][6:] option = thiss[-1] this_info = info fulloption = thiss[0] this_info = this_info[thiss[0]] for i in xrange(1, len(thiss) - 1): this_info = this_info[thiss[i]] fulloption = '%s.%s' % (fulloption, thiss[i]) fulloption = '%s.%s' % (fulloption, noption) #this_info[noption] = assoc_to_flat(fullinfo.parameter.names\ # ,this_info[option],\ # this_info[noption]) if not 'names' in this_info.dict(): this_info.names = fullinfo.parameter.names if not option in this_info.dict(): this_info[option] = [0] * len(this_info.names) if not noption in this_info.dict(): this_info[noption] = [0] * len(this_info.names) this_info[noption] = assoc_to_flat(this_info.names\ ,this_info[option],\ this_info[noption]) ndot = len(fullthis.split('.')) pres = '' for i in xrange(1, ndot): pres += ' ' if type(this_info.helper) == str: this_info.helper += "\n%s%-8s = %-8s" % (pres,\ fulloption,str(this_info[noption])) elif type(this_info.helper) == list: this_info.helper.append("%s%-8s = %-8s" % (pres,\ fulloption,str(this_info[noption])))