def read_input_file(self, filename, name, info=[]): ''' Read state data from ASCII filename Returns: thisdata,is_error where: is_error = (error,error_msg) Load data from a file (e.g. BRDF data or parameters data) The file format is flat ASCII with a header, and needs to be one of the formats appearing in self.headers ''' from eoldas_Lib import set_default_limits,\ check_limits_valid,quantize_location, sortopt try: f = open(filename, 'r') except: return 0,(True,'Failed to open load file %s with call to %s' % \ (filename,str('read_input_file'))) try: if f.errors != None: error_msg = str(f.errors) return 0, (True, error_msg) except: pass # try to read a PARAMETERS file find_col = lambda name: np.where(np.array(params) == name) # read the first line header = f.readline().replace('#', '').split() MAGIC = header[0] found = False nl = 0 for (k, v) in self.headers.iteritems(): if MAGIC == v: found = True nl = 1 basic = header[1:] if k in self.headers_2: header2 = f.readline().replace('#', '').split() if header2[0] != self.headers_2[k]: found = False else: nl = 2 extras = header2[1:] if found: fmt = k break if nl == 0: f.close() return 0,(True,'File %s not recognised by %s'\ % (filename,str('read_input_file'))) if 'logger' in self or 'logger' in self.dict(): self.logger.info("Interpreted format of %s as %s" % (filename, k)) f.close() f = open(filename, 'r') [f.readline() for i in xrange(nl)] # the limits info is used to only read observations # within these limits # The size depends on location and should have 3 numbers # for each location entry try: location = self.Name.location except: try: location = self.name.location except: if fmt == 'BRDF': location = ['time'] else: location = 'time row col'.split() location = np.array( [i.replace('[', '').replace(']', '') for i in location]) try: limits = self.name.qlocation except: limits = set_default_limits(location) try: names = np.array(self._state.name.state) except: try: names = np.array(self.name.state) except: names = ['default'] limits = np.array(check_limits_valid(limits)) sd_params = [] names = np.atleast_1d(names) try: for i in xrange(len(names)): sd_params.append("sd-%s" % names[i]) except: pass sd_params = np.array(sd_params) if (fmt == 'BRDF' or fmt == 'BRDF-UCL'): # unpack the header nbands = int(basic[1]) bands = basic[2:nbands + 2] try: if self.name.datatype == 'y': names = bands except: names = bands sd_params = [] for i in xrange(len(np.atleast_1d(names))): sd_params.append("sd-%s" % names[i]) sd_params = np.array(sd_params) sd = np.zeros(sd_params.shape[0]) for i in xrange(len(np.atleast_1d(names))): this = np.where(np.array(bands) == names[i])[0] if this.size: sd[i] = float(basic[2 + nbands + this[0]]) #sd = np.array([float(i) for i in basic[2+nbands:]]) if fmt == 'BRDF-UCL': params = extras #location = extras else: params = ['time'] nlocation = len(np.atleast_1d(params)) params.extend("mask vza vaa sza saa".split()) params.extend(bands) if fmt == 'BRDF-UCL': params.extend(sd_params) params = np.array(params) #names = bands else: params = basic sd = np.zeros_like(names).astype(float) # check to see if any location information given # loop over self._state.name.location and see which # columns appear in params loccols = [] for i in xrange(len(np.atleast_1d(location))): ccc = find_col(location[i]) if len(np.atleast_1d(ccc)): loccols.append(ccc[0]) else: loccols.append(0) # now do the same for control controlcols = [] try: control = self.name.control except: try: control = self.Name.control except: control = 'mask vza vaa sza saa'.split() try: if len(np.atleast_1d(control)) == 0: control = np.array("mask".split()) except: if control.size == 0: control = np.array("mask".split()) control = control.reshape(control.size) #strip out superflous brackets control = np.array([i.replace('[', '').replace(']', '') for i in control]) for i in xrange(control.size): ccc = find_col(control[i]) if len(np.atleast_1d(ccc)): controlcols.append(ccc[0]) # if the datatype is y, then we get the names from the file # which we suppose by default to be anything # other than options & control # but first we see if we can find anything defined in names # now for the bands wnames = [find_col(i) for i in names] # and sd wsdnames = [find_col(i) for i in sd_params] have_names = False # check to see if any names data found nnames = np.array([np.array(i).size for i in wnames]).sum() if nnames ==0 and (self.datatype == None or \ self.datatype[0] == 'y'): # we found no names so check datatype is None or y & guess the # names from the params fields that arent used as control or location names = [] sd_params = [] p_orig = params wnames = [] wsdnames = [] for i in xrange(len(np.atleast_1d(p_orig))): taken = False params = control taken = taken or \ bool(np.array(find_col(p_orig[i])).flatten().shape[0]) params = location taken = taken or \ bool(np.array(find_col(p_orig[i])).flatten().shape[0]) params = names taken = taken or \ bool(np.array(find_col(p_orig[i])).flatten().shape[0]) params = sd_params taken = taken or \ bool(np.array(find_col(p_orig[i])).flatten().shape[0]) if not taken: names.append(p_orig[i]) sd_params.append("sd-%s" % p_orig[i]) params = p_orig wnames.append(find_col(names[-1])) wsdnames.append(find_col(sd_params[-1])) params = p_orig data = f.readlines() f.close() # check to see if there is a mask column is_mask = 'mask' in params want_mask = True or 'mask' in control # so we need a grid to stroe the data # [p,t,r,c ...] or similar # the total datasize will be len(data) * (len(names) + len(location)) # set to nan ... but we'll return a mask later grid = [] locations = [] qlocations = [] controls = [] sd2 = [] maxi = [(limits[i, 1] - limits[i, 0] * 1.) / limits[i, 2] for i in xrange(len(np.atleast_1d(limits)))] for i in xrange(len(np.atleast_1d(data))): ok = True liner = data[i].split() get_col = lambda index, liner: float( len(np.atleast_1d(index)) and liner[index]) ldata = [] for c in xrange(len(np.atleast_1d(location))): ldata.append(get_col(loccols[c], liner)) qldata = quantize_location(ldata, limits) if (np.array(qldata) < 0).all() or (maxi - np.array(qldata) < 0).all(): ok = False cdata = [] for c in xrange(len(np.atleast_1d(controlcols))): if want_mask and not is_mask and \ (control[c] == 'mask' or control[c] == '[mask]'): cdata.append(1) else: cdata.append(get_col(controlcols[c], liner)) # check the mask value try: if not (want_mask and not is_mask): c = np.where(control == 'mask')[0] if c.size == 0: c = np.where(control == '[mask]')[0] if c.size == 0: ok = True elif int(cdata[c]) != 1: ok = False except: ok = True cdata.append(1) if ok: this = np.zeros(len(np.atleast_1d(names))) this[:] = None # this will set unread fields to nan for (j, k) in enumerate(wnames): if np.array(k).size > 0: this[j] = float(liner[k[0]]) that = np.zeros(len(np.atleast_1d(names))) that[:] = None # this will set unread fields to nan for (j, k) in enumerate(wsdnames): if np.array(k[0]).shape[0] > 0: that[j] = float(liner[k[0]]) locations.extend(ldata) controls.append(cdata) qlocations.append(qldata) grid.append(this) sd2.append(that) # check to see if the sd data are any good sd2a = np.array(sd2) if sd2a.flatten().sum() > 0: sd = sd2a n_samples = len(np.atleast_1d(data)) data = {} name = {} data['state'] = np.array(grid) nsamples = data['state'].shape[0] if not 'datatype' in self.name.dict() or self.name.datatype == None or \ self.name.datatype[0] == 'y': # its a y or its a bit broken name['state'] = np.array(names) # note, the state list can be updated # by what it finds, but only for y states name['fmt'] = fmt name['location'] = np.array(location) nlocations = name['location'].shape[0] data['location'] = np.array(locations).reshape(nsamples, nlocations) name['location'] = np.array(location) name['qlocation'] = np.array(limits) #orig = np.repeat(np.array(name['qlocation'][:,0]),nsamples).reshape(nlocations,nsamples).T data['qlocation'] = np.array(qlocations).reshape(nsamples, nlocations) #+ orig name['qlocation'] = np.array(limits) name['control'] = np.array(control) ncontrol = np.max((1, name['control'].shape[0])) if name['control'].shape[0]: data['control'] = np.array(controls).reshape(nsamples, ncontrol) else: data['control'] = np.array(controls) # only return sd if its > 0 if sd.size != data['state'].size: try: sd = np.tile(np.array(sd), nsamples).reshape(data['state'].shape) except: self.logger.info("can't tile sd data: %s" % str(sd)) sd = np.array([0.]) if sd.flatten().sum() > 0: name['sd'] = np.array(names) data['sd'] = sd datasets = {'data': data, 'name': name} return datasets,(False,'Data read from %s with %s fmt %s'% \ (filename,str('read_input_file'),fmt))
def read_numpy(self, filename, name, info=[]): ''' Try to read the file as as a NpzFile file ''' from eoldas_Lib import set_default_limits,check_limits_valid,\ quantize_location,dequantize_location # none of these ciritical to functioning try: info = self._state.info except: info = [] try: names = self.name.state except: try: names = self.Name.state except: names = None try: control = self.Name.control except: try: control = self.name.control except: control = None try: location = self.name.location except: try: location = self.Name.location except: location = ['time', 'row', 'col'] try: limits = self.name.qlocation except: try: limits = self.Name.qlocation except: limits = set_default_limits(location) # refl_check=False,names=None,\ # control=['mask','vza','vaa','sza','saa'],\ # location=['time','row','col'],limits=None # location specifies the dimesions and names of the # problem, e.g., & typically [time,row,col] limits = np.array(check_limits_valid(limits)) try: f = np.load(filename) if not type(f).__name__ == 'NpzFile': f.close() self.error_msg = "%s is not a NpzFile" % filename self.error = True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0, (self.error, self.error_msg) except: self.error_msg = "a problem opening %s as a NpzFile" % filename self.error = True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0, (self.error, self.error_msg) # ok so far then # lets have a look inside ncontents = np.array(f.files) contents = np.array(f.files) # translation table for default names def_names = 'b1 b2 b3 b4 b5 b6 b7'.split() if names == None: # assume MODIS names = def_names def_alt_names = \ '645.5 856.5 465.6 553.6 1241.6 1629.1 2114.1'.split() # look for any of names in contents datasets = [] alt_datasets = [] alt_names = names for i in xrange(len(np.atleast_1d(contents))): if contents[i] in names: datasets.append(i) if not len(np.atleast_1d(datasets)): if 'logger' in self or 'logger' in self.dict(): self.logger.error(\ "None of requested datasets %s found in %s ..." \ %(str(names),filename) + \ " trying default MODIS names: only %s"\ %(str(contents))) names = def_names alt_names = def_alt_names for i in xrange(len(np.atleast_1d(contents))): if contents[i] in names: datasets.append(i) if not len(np.atleast_1d(datasets)): self.error_msg = "None of requested datasets %s found in %s"\ %(str(names),filename) + ' ' + \ "... trying default MODIS names: only %s"\ %(str(contents)) self.error = True if 'logger' in self or 'logger' in self.dict(): self.logger.error(self.error_msg) return 0, (self.error, self.error_msg) trans_names = {} for (i, j) in enumerate(alt_names): trans_names[names[i]] = j #trans_names = {names[i]:j for (i,j) in enumerate(alt_names)} alt_name = [] this_name = [] for i in datasets: this_name.append(contents[i]) alt_name.append(trans_names[contents[i]]) # Translate some old stylies... trans = {'raa': 'vaa', 'doys': 'time'} for i in trans: if i in contents: ncontents[np.where(contents == i)[0]] = trans[i] # as a minimum, there needs to be some definition of one of # the terms in location # check how many dimensions this has # now find a dataset try: # This could be more general, but this will do for now as its useful # for spatial datasets QA_OK = np.array(\ [8, 72, 136, 200, 1032, 1288, 2056,2120, 2184, 2248]) doy = f['doys'] - 2004000 qa = f['qa'] vza = f['vza'] sza = f['sza'] raa = f['raa'] y = [] for i in this_name: y.append(f[i]) #mask = np.logical_or.reduce([qa==x for x in QA_OK ]) if 'logger' in self or 'logger' in self.dict(): self.logger.info(\ "sucessfully interpreted NpzFile dataset from %s"\ %filename) self.logger.info("sub-setting ...") controls = [] locations = [] grid = [] qlocations = [] thisshape = vza.shape starter = {'time': np.min(doy), 'row': 0, 'col': 0} delta = {'time': 1, 'row': 1, 'col': 1} if len(np.atleast_1d(limits)) < 3: from eoldas_Lib import set_default_limits old_loc = location location = np.array(['time', 'row', 'col']) lim2 = set_default_limits(location) for i in xrange(len(np.atleast_1d(limits))): ww = np.where(old_loc[i] == location)[0] lim2[ww] = list(limits[i]) limits = lim2 for i in xrange(len(np.atleast_1d(limits))): if limits[i][0] == None: limits[i][0] = starter[location[i]] if limits[i][1] == None: limits[i][1] = (thisshape[i] - 1) + starter[location[i]] if limits[i][2] == None: limits[i][2] = delta[location[i]] limits = np.array(limits) start_doy = limits[0][0] end_doy = limits[0][1] step_doy = limits[0][2] start_row = limits[1][0] end_row = limits[1][1] step_row = limits[1][2] start_col = limits[2][0] end_col = limits[2][1] step_col = limits[2][2] gooddays = np.logical_and.reduce(np.concatenate(\ ([doy >= start_doy],[doy <=end_doy]))) qa = qa[gooddays, start_row:end_row + 1, start_col:end_col + 1] vza = vza[gooddays, start_row:end_row + 1, start_col:end_col + 1] * 0.01 sza = sza[gooddays, start_row:end_row + 1, start_col:end_col + 1] * 0.01 raa = raa[gooddays, start_row:end_row + 1, start_col:end_col + 1] * 0.01 yy = [] for i in xrange(len(np.atleast_1d(this_name))): this = y[i] yy.append(this[gooddays,start_row:end_row+1,\ start_col:end_col+1]*0.0001) doy = doy[gooddays] # now do QA mask = np.zeros_like(qa).astype(bool) # loop over qa for j in xrange(len(np.atleast_1d(QA_OK))): ww = np.where(qa == QA_OK[j]) mask[ww] = True # better look over data to check valid for j in xrange(len(np.atleast_1d(yy))): ww = np.where(yy[j] < 0) mask[ww] = False ww = np.where(mask) if 'logger' in self or 'logger' in self.dict(): self.logger.debug('parsing dataset: %d samples look ok'\ %np.array(ww).shape[1]) vza = vza[ww] sza = sza[ww] raa = raa[ww] doy = doy[ww[0]] row = ww[1] + start_row col = ww[2] + start_col locations = np.array([doy, row, col]) nnn = len(np.atleast_1d(locations[0])) orig = np.repeat(np.array([start_doy, start_row, start_col]), locations.shape[1]).reshape(locations.shape).T div = np.repeat(np.array([step_doy, step_row, step_col]), locations.shape[1]).reshape(locations.shape).T qlocations = ((locations.T - orig) / div.astype(float)).astype(int).T controls = np.array([np.ones_like(doy).astype(bool),\ vza,raa,sza,0*doy]) y = [] for i in xrange(len(np.atleast_1d(this_name))): this = yy[i] y.append(this[ww]) grid = np.array(y) fmt = 'BRDF-UCL' control = ['mask', 'vza', 'vaa', 'sza', 'saa'] bands = alt_name if not np.array(grid).size: if 'logger' in self or 'logger' in self.dict(): self.logger.error(\ "Warning: returning a zero-sized dataset ... "+\ " I wouldn;t try to do anything with it") # in case we dont have data for all bands mask = np.logical_or.reduce([[this_name[i]==x for x in names] \ for i in xrange(len(np.atleast_1d(this_name)))]) sd = np.array('0.004 0.015 0.003 0.004 0.013 0.01 0.006'\ .split())[mask] sd = np.array([float(i) for i in sd.flatten()])\ .reshape(sd.shape) nsamps = grid.shape[1] sd = sd.repeat(nsamps).reshape(grid.shape).T datasets = ParamStorage() datasets.data = ParamStorage() datasets.name = ParamStorage() datasets.name.fmt = fmt grid = grid.T datasets.data[name] = np.zeros([grid.shape[0],len(np.atleast_1d(names))])\ .astype(object) datasets.data[name][:, :] = None for i in xrange(len(np.atleast_1d(this_name))): ww = np.where(names == this_name[i])[0][0] datasets.data[name][:, ww] = grid[:, i] datasets.data.location = np.array(locations).T datasets.data.control = np.array(controls).T datasets.data.qlocation = np.array(qlocations).T datasets.name[name] = np.array(names) datasets.name.location = np.array(['time', 'row', 'col']) datasets.name.control = np.array(control) datasets.name.qlocation = limits datasets.name.bands = np.array(bands) datasets.data.sd = np.zeros([grid.shape[0],len(np.atleast_1d(names))])\ .astype(object) # for i in xrange(grid.shape[0]): # datasets.data.sd[i,:] = self.options.sd datasets.data.sd[:, :] = None for i in xrange(len(np.atleast_1d(this_name))): ww = np.where(names == this_name[i])[0][0] datasets.data.sd[:, ww] = sd[:, i] datasets.name.sd = np.array(names) if 'logger' in self or 'logger' in self.dict(): self.logger.debug('finished parsing dataset') except: self.error_msg=\ "a problem processing information from %s as a NpzFile"\ %filename self.error = True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0, (self.error, self.error_msg) f.close() if 'logger' in self or 'logger' in self.dict(): self.logger.info('... done') self.error = False self.error_msg = "" return datasets, (self.error, self.error_msg)
def reinit(self,options,names=None,datatype=None,limits=None,\ bounds=None,control=None,location=None,env=None,header=None,\ logdir=None,writers={},grid=False,logger=None,\ datadir=None,logfile=None,name=None,info=[],readers=[],debug=None): ''' Method to re-initialise the class instance The setup is on the whole controlled by the datatype which contains e.g. 'x'. This is used to set up the members self.x and self.y as SpecialVariables (see SpecialVariable in eoldas_SpecialVariable.py). There are some special attributes for datatypes starting with 'y'. These are assumed to be observational data, which means that when they are read, the data names associated with them are not limited to those in self.names but rather set to whatever is read in in the data. This is because the data names for observational data may be terms such as waveband names etc that need special interpretation. Also, the default output format for observational data is different to that of other data. The elements self.state is a SpecialVariables which means that they can be assigned various data types (see SpecialVariables) and loaded accordingly (e.g. if a filename is specified, this is read in to the data structure. The SpecialVariables contain 'hidden' datasets, which here are mainly the 'control' and 'location' information. A SpecialVariable has two internal structures: `data` and `name`. The former is used to store data values (e.g. the state values) and the latter to store associated metadata. For example, `control` is passed here e.g. as [`mask`,`vza`] and this gives the metadata that are stored in `name`. The actual values of the control data are stored in the `data` section. For location, we might be passed [`time`,`row`,`col`], so this is set in names.location, and the data.location contains the values of the location at each of these elements. For the actual state dataset, this is stored according to its name, so for `x` the values are stored in data.x and the associated data names in name.x. State datasets must represent at least the mean and standard deviation of a state for them to be of value in EOLDAS. TThe mean is accessed as e.g. self.state for the state dataset. The sd is accessed can be accessed as self._state.sd if it has been set. This reference can also be used to directly set data associated with a SpecialVariable, e.g. self.Data.control = np.zeros([2,3]) to represent 2 samples with 3 control variables. You can access name information similarly with print self.Name.control but this will generate a KeyError if the term has not been set. You can check it exists with: key = 'control' if key in self.Name: this = (self.Data[key],self.Name[key]) To get e.g. a dictionary representation of a SpecialVariable you can use eg: self.Name.to_dict() to get the name dictionary, or thisdict = self._state.to_dict() to get the full representation, which then contains 'data' and 'name' as well as some other information stored in the SpecialVariable. You can similarly load them using e.g. self.Data.update( ParamStorage().from_dict(thisdict['data']) combine=True) ''' # set up a fakes dictionary from the data types self.set('datatype',datatype) self.set('fakes', {'state':'_state'}) # first check that options is sensible self.__check_type(options,ParamStorage,fatal=True) self.options = options from eoldas_Lib import set_default_limits,\ check_limits_valid,quantize_location, sortopt nSpecial = 1 if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name,thistime) self.thisname = name self.options.thisname = str(name).replace(' ','_') log_terms = {\ 'logfile':logfile or sortopt(self.options,'logfile',None),\ 'logdir':logdir or sortopt(self.options,'logdir',None),\ 'debug' : debug or sortopt(self.options,'debug',True)} self.datadir = datadir or sortopt(self.options,'datadir',["."]) self.header = header or "EOLDAS pickle V1.0 - plewis" env = env or sortopt(self.options,'env',None) names = names or sortopt(self.options,'names',None) location = location or sortopt(self.options,'location',['time']) control = control or sortopt(self.options,'control',[]) limits = limits or sortopt(self.options,'limits',\ set_default_limits(np.array(location))) limits = limits or self.options.limits limits = np.array(check_limits_valid(limits)) bounds = bounds or sortopt(self.options,'bounds',\ [[None,None]] * xlen(names)) self.options.bounds = bounds self.headers = {'PARAMETERS-V2':"PARAMETERS-V2", \ 'PARAMETERS':"PARAMETERS", \ 'BRDF-UCL':'BRDF-UCL',\ 'BRDF': 'BRDF'} self.headers_2 = {'BRDF-UCL':'location'} # The ones pre-loaded are # self.read_functions = [self.read_pickle,self.read_numpy_fromfile] self._state = SpecialVariable(info=info,name=self.thisname,\ readers=readers,datadir=self.datadir,\ env=env,writers=writers,\ header=self.header,\ logger=logger,log_terms=log_terms,\ simple=False) # self._state is where data are read into # but self.Data and self.Name are where we access them from self.grid=grid # this is so we can access this object from # inside a SpecialVariable self.state = np.array([0.]) # a default data fmt output if datatype[0] == 'y': self.Name.fmt = 'BRDF' self.Name.state = np.array(['dummy']) else: self.Name.fmt = 'PARAMETERS' n_params = xlen(names) if not n_params: error_msg = \ "The field 'names' must be defined in options or"+ \ "passed directly to this method if you have the data type x" raise Exception(error_msg) self.Name.state = np.array(names) self.Name.location = np.array(location) self.Name.control = np.array(control) self.Name.header = self.header self.Name.bounds = np.array(bounds) self.Name.qlocation = np.array(limits) self.Name.datadir = datadir # # sort this object's name # sort logging self.logger = sortlog(self,log_terms['logfile'],logger,name=self.thisname, logdir=log_terms['logdir'],debug=log_terms['debug']) self.logger.info('Initialising %s' % type(self).__name__)
def read_input_file(self,filename,name,info=[]): ''' Read state data from ASCII filename Returns: thisdata,is_error where: is_error = (error,error_msg) Load data from a file (e.g. BRDF data or parameters data) The file format is flat ASCII with a header, and needs to be one of the formats appearing in self.headers ''' from eoldas_Lib import set_default_limits,\ check_limits_valid,quantize_location, sortopt try: f = open(filename,'r') except: return 0,(True,'Failed to open load file %s with call to %s' % \ (filename,str('read_input_file'))) try: if f.errors != None: error_msg = str(f.errors) return 0,(True,error_msg) except: pass # try to read a PARAMETERS file find_col = lambda name :np.where(np.array(params) == name) # read the first line header = f.readline().replace('#','').split() MAGIC = header[0] found = False nl = 0 for (k,v) in self.headers.iteritems(): if MAGIC == v: found = True nl = 1 basic = header[1:] if k in self.headers_2: header2 = f.readline().replace('#','').split() if header2[0] != self.headers_2[k]: found = False else: nl = 2 extras = header2[1:] if found: fmt = k break if nl == 0: f.close() return 0,(True,'File %s not recognised by %s'\ % (filename,str('read_input_file'))) if 'logger' in self or 'logger' in self.dict(): self.logger.info("Interpreted format of %s as %s"%(filename,k)) f.close() f = open(filename,'r') [f.readline() for i in xrange(nl)] # the limits info is used to only read observations # within these limits # The size depends on location and should have 3 numbers # for each location entry try: location = self.Name.location except: try: location = self.name.location except: if fmt == 'BRDF': location = ['time'] else: location = 'time row col'.split() location = np.array([i.replace('[','').replace(']','') for i in location]) try: limits = self.name.qlocation except: limits = set_default_limits(location) try: names = np.array(self._state.name.state) except: try: names = np.array(self.name.state) except: names = ['default'] limits = np.array(check_limits_valid(limits)) sd_params = [] names = np.atleast_1d(names) try: for i in xrange(len(names)): sd_params.append("sd-%s"%names[i]) except: pass sd_params = np.array(sd_params) if (fmt == 'BRDF' or fmt == 'BRDF-UCL'): # unpack the header nbands = int(basic[1]) bands = basic[2:nbands+2] try: if self.name.datatype == 'y': names = bands except: names = bands sd_params = [] for i in xrange(len(np.atleast_1d(names))): sd_params.append("sd-%s"%names[i]) sd_params = np.array(sd_params) sd = np.zeros(sd_params.shape[0]) for i in xrange(len(np.atleast_1d(names))): this = np.where(np.array(bands) == names[i])[0] if this.size: sd[i] = float(basic[2+nbands+this[0]]) #sd = np.array([float(i) for i in basic[2+nbands:]]) if fmt == 'BRDF-UCL': params = extras #location = extras else: params = ['time'] nlocation = len(np.atleast_1d(params)) params.extend("mask vza vaa sza saa".split()) params.extend(bands) if fmt == 'BRDF-UCL': params.extend(sd_params) params = np.array(params) #names = bands else: params = basic sd = np.zeros_like(names).astype(float) # check to see if any location information given # loop over self._state.name.location and see which # columns appear in params loccols = [] for i in xrange(len(np.atleast_1d(location))): ccc = find_col(location[i]) if len(np.atleast_1d(ccc)): loccols.append(ccc[0]) else: loccols.append(0) # now do the same for control controlcols = [] try: control=self.name.control except: try: control=self.Name.control except: control = 'mask vza vaa sza saa'.split() try: if len(np.atleast_1d(control)) == 0: control = np.array("mask".split()) except: if control.size == 0: control = np.array("mask".split()) control = control.reshape(control.size) #strip out superflous brackets control = np.array([i.replace('[','').replace(']','') for i in control]) for i in xrange(control.size): ccc = find_col(control[i]) if len(np.atleast_1d(ccc)): controlcols.append(ccc[0]) # if the datatype is y, then we get the names from the file # which we suppose by default to be anything # other than options & control # but first we see if we can find anything defined in names # now for the bands wnames = [find_col(i) for i in names] # and sd wsdnames = [find_col(i) for i in sd_params] have_names = False # check to see if any names data found nnames = np.array([np.array(i).size for i in wnames]).sum() if nnames ==0 and (self.datatype == None or \ self.datatype[0] == 'y'): # we found no names so check datatype is None or y & guess the # names from the params fields that arent used as control or location names = [] sd_params = [] p_orig = params wnames = [] wsdnames = [] for i in xrange(len(np.atleast_1d(p_orig))): taken = False params = control taken = taken or \ bool(np.array(find_col(p_orig[i])).flatten().shape[0]) params = location taken = taken or \ bool(np.array(find_col(p_orig[i])).flatten().shape[0]) params = names taken = taken or \ bool(np.array(find_col(p_orig[i])).flatten().shape[0]) params = sd_params taken = taken or \ bool(np.array(find_col(p_orig[i])).flatten().shape[0]) if not taken: names.append(p_orig[i]) sd_params.append("sd-%s"%p_orig[i]) params = p_orig wnames.append(find_col(names[-1])) wsdnames.append(find_col(sd_params[-1])) params = p_orig data = f.readlines() f.close() # check to see if there is a mask column is_mask = 'mask' in params want_mask = True or 'mask' in control # so we need a grid to stroe the data # [p,t,r,c ...] or similar # the total datasize will be len(data) * (len(names) + len(location)) # set to nan ... but we'll return a mask later grid = [] locations = [] qlocations = [] controls = [] sd2 = [] maxi = [(limits[i,1]-limits[i,0]*1.)/limits[i,2] for i in xrange(len(np.atleast_1d(limits)))] for i in xrange(len(np.atleast_1d(data))): ok = True liner = data[i].split() get_col = lambda index,liner : float(len(np.atleast_1d(index)) and liner[index]) ldata = [] for c in xrange(len(np.atleast_1d(location))): ldata.append(get_col(loccols[c],liner)) qldata = quantize_location(ldata,limits) if (np.array(qldata) < 0).all() or (maxi - np.array(qldata) < 0).all(): ok = False cdata = [] for c in xrange(len(np.atleast_1d(controlcols))): if want_mask and not is_mask and \ (control[c] == 'mask' or control[c] == '[mask]'): cdata.append(1) else: cdata.append(get_col(controlcols[c],liner)) # check the mask value try: if not (want_mask and not is_mask): c = np.where(control=='mask')[0] if c.size == 0: c = np.where(control=='[mask]')[0] if c.size == 0: ok = True elif int(cdata[c]) != 1: ok = False except: ok = True cdata.append(1) if ok: this = np.zeros(len(np.atleast_1d(names))) this[:] = None # this will set unread fields to nan for (j,k) in enumerate(wnames): if np.array(k).size >0: this[j] = float(liner[k[0]]) that = np.zeros(len(np.atleast_1d(names))) that[:] = None # this will set unread fields to nan for (j,k) in enumerate(wsdnames): if np.array(k[0]).shape[0] > 0: that[j] = float(liner[k[0]]) locations.extend(ldata) controls.append(cdata) qlocations.append(qldata) grid.append(this) sd2.append(that) # check to see if the sd data are any good sd2a = np.array(sd2) if sd2a.flatten().sum() > 0: sd = sd2a n_samples = len(np.atleast_1d(data)) data = {} name = {} data['state'] = np.array(grid) nsamples = data['state'].shape[0] if not 'datatype' in self.name.dict() or self.name.datatype == None or \ self.name.datatype[0] == 'y': # its a y or its a bit broken name['state'] = np.array(names) # note, the state list can be updated # by what it finds, but only for y states name['fmt'] = fmt name['location'] = np.array(location) nlocations = name['location'].shape[0] data['location'] = np.array(locations).reshape(nsamples,nlocations) name['location'] = np.array(location) name['qlocation'] = np.array(limits) #orig = np.repeat(np.array(name['qlocation'][:,0]),nsamples).reshape(nlocations,nsamples).T data['qlocation'] = np.array(qlocations).reshape(nsamples,nlocations) #+ orig name['qlocation'] = np.array(limits) name['control'] = np.array(control) ncontrol = np.max((1,name['control'].shape[0])) if name['control'].shape[0]: data['control'] = np.array(controls).reshape(nsamples,ncontrol) else: data['control'] = np.array(controls) # only return sd if its > 0 if sd.size != data['state'].size: try: sd = np.tile(np.array(sd),nsamples).reshape(data['state'].shape) except: self.logger.info("can't tile sd data: %s"%str(sd)) sd = np.array([0.]) if sd.flatten().sum() > 0: name['sd'] = np.array(names) data['sd'] = sd datasets = {'data':data,'name':name} return datasets,(False,'Data read from %s with %s fmt %s'% \ (filename,str('read_input_file'),fmt))
def read_numpy(self,filename,name,info=[]): ''' Try to read the file as as a NpzFile file ''' from eoldas_Lib import set_default_limits,check_limits_valid,\ quantize_location,dequantize_location # none of these ciritical to functioning try: info = self._state.info except: info = [] try: names = self.name.state except: try: names = self.Name.state except: names = None try: control = self.Name.control except: try: control = self.name.control except: control = None try: location = self.name.location except: try: location = self.Name.location except: location = ['time','row','col'] try: limits = self.name.qlocation except: try: limits = self.Name.qlocation except: limits = set_default_limits(location) # refl_check=False,names=None,\ # control=['mask','vza','vaa','sza','saa'],\ # location=['time','row','col'],limits=None # location specifies the dimesions and names of the # problem, e.g., & typically [time,row,col] limits = np.array(check_limits_valid(limits)) try: f = np.load(filename) if not type(f).__name__ == 'NpzFile': f.close() self.error_msg="%s is not a NpzFile"%filename self.error=True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0,(self.error,self.error_msg) except: self.error_msg="a problem opening %s as a NpzFile"%filename self.error=True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0,(self.error,self.error_msg) # ok so far then # lets have a look inside ncontents = np.array(f.files) contents = np.array(f.files) # translation table for default names def_names = 'b1 b2 b3 b4 b5 b6 b7'.split() if names == None: # assume MODIS names = def_names def_alt_names = \ '645.5 856.5 465.6 553.6 1241.6 1629.1 2114.1'.split() # look for any of names in contents datasets = [] alt_datasets = [] alt_names = names for i in xrange(len(np.atleast_1d(contents))): if contents[i] in names: datasets.append(i) if not len(np.atleast_1d(datasets)): if 'logger' in self or 'logger' in self.dict(): self.logger.error(\ "None of requested datasets %s found in %s ..." \ %(str(names),filename) + \ " trying default MODIS names: only %s"\ %(str(contents))) names = def_names alt_names = def_alt_names for i in xrange(len(np.atleast_1d(contents))): if contents[i] in names: datasets.append(i) if not len(np.atleast_1d(datasets)): self.error_msg = "None of requested datasets %s found in %s"\ %(str(names),filename) + ' ' + \ "... trying default MODIS names: only %s"\ %(str(contents)) self.error = True if 'logger' in self or 'logger' in self.dict(): self.logger.error(self.error_msg) return 0,(self.error,self.error_msg) trans_names = {} for (i,j) in enumerate(alt_names): trans_names[names[i]] = j #trans_names = {names[i]:j for (i,j) in enumerate(alt_names)} alt_name = [] this_name = [] for i in datasets: this_name.append(contents[i]) alt_name.append(trans_names[contents[i]]) # Translate some old stylies... trans = {'raa':'vaa','doys':'time'} for i in trans: if i in contents: ncontents[np.where(contents==i)[0]]=trans[i] # as a minimum, there needs to be some definition of one of # the terms in location # check how many dimensions this has # now find a dataset try: # This could be more general, but this will do for now as its useful # for spatial datasets QA_OK = np.array(\ [8, 72, 136, 200, 1032, 1288, 2056,2120, 2184, 2248]) doy = f['doys'] - 2004000 qa = f['qa'] vza = f['vza'] sza = f['sza'] raa = f['raa'] y = [] for i in this_name: y.append(f[i]) #mask = np.logical_or.reduce([qa==x for x in QA_OK ]) if 'logger' in self or 'logger' in self.dict(): self.logger.info(\ "sucessfully interpreted NpzFile dataset from %s"\ %filename) self.logger.info("sub-setting ...") controls = [] locations = [] grid = [] qlocations = [] thisshape = vza.shape starter = {'time':np.min(doy),'row':0,'col':0} delta = {'time':1,'row':1,'col':1} if len(np.atleast_1d(limits)) <3: from eoldas_Lib import set_default_limits old_loc = location location = np.array(['time','row','col']) lim2 = set_default_limits(location) for i in xrange(len(np.atleast_1d(limits))): ww = np.where(old_loc[i] == location)[0] lim2[ww] = list(limits[i]) limits = lim2 for i in xrange(len(np.atleast_1d(limits))): if limits[i][0] == None: limits[i][0] = starter[location[i]] if limits[i][1] == None: limits[i][1] = (thisshape[i]-1) + starter[location[i]] if limits[i][2] == None: limits[i][2]= delta[location[i]] limits = np.array(limits) start_doy = limits[0][0] end_doy = limits[0][1] step_doy = limits[0][2] start_row = limits[1][0] end_row = limits[1][1] step_row = limits[1][2] start_col = limits[2][0] end_col = limits[2][1] step_col = limits[2][2] gooddays = np.logical_and.reduce(np.concatenate(\ ([doy >= start_doy],[doy <=end_doy]))) qa = qa[gooddays,start_row:end_row+1,start_col:end_col+1] vza = vza[gooddays,start_row:end_row+1,start_col:end_col+1]*0.01 sza = sza[gooddays,start_row:end_row+1,start_col:end_col+1]*0.01 raa = raa[gooddays,start_row:end_row+1,start_col:end_col+1]*0.01 yy = [] for i in xrange(len(np.atleast_1d(this_name))): this = y[i] yy.append(this[gooddays,start_row:end_row+1,\ start_col:end_col+1]*0.0001) doy = doy[gooddays] # now do QA mask = np.zeros_like(qa).astype(bool) # loop over qa for j in xrange(len(np.atleast_1d(QA_OK))): ww = np.where(qa==QA_OK[j]) mask[ww] = True # better look over data to check valid for j in xrange(len(np.atleast_1d(yy))): ww = np.where(yy[j] < 0) mask[ww] = False ww = np.where(mask) if 'logger' in self or 'logger' in self.dict(): self.logger.debug('parsing dataset: %d samples look ok'\ %np.array(ww).shape[1]) vza = vza[ww] sza = sza[ww] raa = raa[ww] doy= doy[ww[0]] row = ww[1]+start_row col = ww[2]+start_col locations = np.array([doy,row,col]) nnn = len(np.atleast_1d(locations[0])) orig = np.repeat(np.array([start_doy,start_row,start_col]),locations.shape[1]).reshape(locations.shape).T div = np.repeat(np.array([step_doy,step_row,step_col]),locations.shape[1]).reshape(locations.shape).T qlocations = ((locations.T - orig)/div.astype(float)).astype(int).T controls = np.array([np.ones_like(doy).astype(bool),\ vza,raa,sza,0*doy]) y = [] for i in xrange(len(np.atleast_1d(this_name))): this = yy[i] y.append(this[ww]) grid = np.array(y) fmt = 'BRDF-UCL' control = ['mask','vza','vaa','sza','saa'] bands = alt_name if not np.array(grid).size: if 'logger' in self or 'logger' in self.dict(): self.logger.error(\ "Warning: returning a zero-sized dataset ... "+\ " I wouldn;t try to do anything with it") # in case we dont have data for all bands mask = np.logical_or.reduce([[this_name[i]==x for x in names] \ for i in xrange(len(np.atleast_1d(this_name)))]) sd = np.array('0.004 0.015 0.003 0.004 0.013 0.01 0.006'\ .split())[mask] sd = np.array([float(i) for i in sd.flatten()])\ .reshape(sd.shape) nsamps = grid.shape[1] sd = sd.repeat(nsamps).reshape(grid.shape).T datasets = ParamStorage() datasets.data = ParamStorage() datasets.name = ParamStorage() datasets.name.fmt = fmt grid = grid.T datasets.data[name] = np.zeros([grid.shape[0],len(np.atleast_1d(names))])\ .astype(object) datasets.data[name][:,:] = None for i in xrange(len(np.atleast_1d(this_name))): ww = np.where(names == this_name[i])[0][0] datasets.data[name][:,ww] = grid[:,i] datasets.data.location = np.array(locations).T datasets.data.control = np.array(controls).T datasets.data.qlocation = np.array(qlocations).T datasets.name[name] = np.array(names) datasets.name.location = np.array(['time','row','col']) datasets.name.control = np.array(control) datasets.name.qlocation = limits datasets.name.bands = np.array(bands) datasets.data.sd = np.zeros([grid.shape[0],len(np.atleast_1d(names))])\ .astype(object) # for i in xrange(grid.shape[0]): # datasets.data.sd[i,:] = self.options.sd datasets.data.sd[:,:] = None for i in xrange(len(np.atleast_1d(this_name))): ww = np.where(names == this_name[i])[0][0] datasets.data.sd[:,ww] = sd[:,i] datasets.name.sd = np.array(names) if 'logger' in self or 'logger' in self.dict(): self.logger.debug('finished parsing dataset') except: self.error_msg=\ "a problem processing information from %s as a NpzFile"\ %filename self.error=True if 'logger' in self or 'logger' in self.dict(): self.logger.info(self.error_msg) return 0,(self.error,self.error_msg) f.close() if 'logger' in self or 'logger' in self.dict(): self.logger.info('... done') self.error=False self.error_msg="" return datasets,(self.error,self.error_msg)
def reinit(self,options,names=None,datatype=None,limits=None,\ bounds=None,control=None,location=None,env=None,header=None,\ logdir=None,writers={},grid=False,logger=None,\ datadir=None,logfile=None,name=None,info=[],readers=[],debug=None): ''' Method to re-initialise the class instance The setup is on the whole controlled by the datatype which contains e.g. 'x'. This is used to set up the members self.x and self.y as SpecialVariables (see SpecialVariable in eoldas_SpecialVariable.py). There are some special attributes for datatypes starting with 'y'. These are assumed to be observational data, which means that when they are read, the data names associated with them are not limited to those in self.names but rather set to whatever is read in in the data. This is because the data names for observational data may be terms such as waveband names etc that need special interpretation. Also, the default output format for observational data is different to that of other data. The elements self.state is a SpecialVariables which means that they can be assigned various data types (see SpecialVariables) and loaded accordingly (e.g. if a filename is specified, this is read in to the data structure. The SpecialVariables contain 'hidden' datasets, which here are mainly the 'control' and 'location' information. A SpecialVariable has two internal structures: `data` and `name`. The former is used to store data values (e.g. the state values) and the latter to store associated metadata. For example, `control` is passed here e.g. as [`mask`,`vza`] and this gives the metadata that are stored in `name`. The actual values of the control data are stored in the `data` section. For location, we might be passed [`time`,`row`,`col`], so this is set in names.location, and the data.location contains the values of the location at each of these elements. For the actual state dataset, this is stored according to its name, so for `x` the values are stored in data.x and the associated data names in name.x. State datasets must represent at least the mean and standard deviation of a state for them to be of value in EOLDAS. TThe mean is accessed as e.g. self.state for the state dataset. The sd is accessed can be accessed as self._state.sd if it has been set. This reference can also be used to directly set data associated with a SpecialVariable, e.g. self.Data.control = np.zeros([2,3]) to represent 2 samples with 3 control variables. You can access name information similarly with print self.Name.control but this will generate a KeyError if the term has not been set. You can check it exists with: key = 'control' if key in self.Name: this = (self.Data[key],self.Name[key]) To get e.g. a dictionary representation of a SpecialVariable you can use eg: self.Name.to_dict() to get the name dictionary, or thisdict = self._state.to_dict() to get the full representation, which then contains 'data' and 'name' as well as some other information stored in the SpecialVariable. You can similarly load them using e.g. self.Data.update( ParamStorage().from_dict(thisdict['data']) combine=True) ''' # set up a fakes dictionary from the data types self.set('datatype', datatype) self.set('fakes', {'state': '_state'}) # first check that options is sensible self.__check_type(options, ParamStorage, fatal=True) self.options = options from eoldas_Lib import set_default_limits,\ check_limits_valid,quantize_location, sortopt nSpecial = 1 if name == None: import time thistime = str(time.time()) name = type(self).__name__ name = "%s.%s" % (name, thistime) self.thisname = name self.options.thisname = str(name).replace(' ', '_') log_terms = {\ 'logfile':logfile or sortopt(self.options,'logfile',None),\ 'logdir':logdir or sortopt(self.options,'logdir',None),\ 'debug' : debug or sortopt(self.options,'debug',True)} self.datadir = datadir or sortopt(self.options, 'datadir', ["."]) self.header = header or "EOLDAS pickle V1.0 - plewis" env = env or sortopt(self.options, 'env', None) names = names or sortopt(self.options, 'names', None) location = location or sortopt(self.options, 'location', ['time']) control = control or sortopt(self.options, 'control', []) limits = limits or sortopt(self.options,'limits',\ set_default_limits(np.array(location))) limits = limits or self.options.limits limits = np.array(check_limits_valid(limits)) bounds = bounds or sortopt(self.options,'bounds',\ [[None,None]] * xlen(names)) self.options.bounds = bounds self.headers = {'PARAMETERS-V2':"PARAMETERS-V2", \ 'PARAMETERS':"PARAMETERS", \ 'BRDF-UCL':'BRDF-UCL',\ 'BRDF': 'BRDF'} self.headers_2 = {'BRDF-UCL': 'location'} # The ones pre-loaded are # self.read_functions = [self.read_pickle,self.read_numpy_fromfile] self._state = SpecialVariable(info=info,name=self.thisname,\ readers=readers,datadir=self.datadir,\ env=env,writers=writers,\ header=self.header,\ logger=logger,log_terms=log_terms,\ simple=False) # self._state is where data are read into # but self.Data and self.Name are where we access them from self.grid = grid # this is so we can access this object from # inside a SpecialVariable self.state = np.array([0.]) # a default data fmt output if datatype[0] == 'y': self.Name.fmt = 'BRDF' self.Name.state = np.array(['dummy']) else: self.Name.fmt = 'PARAMETERS' n_params = xlen(names) if not n_params: error_msg = \ "The field 'names' must be defined in options or"+ \ "passed directly to this method if you have the data type x" raise Exception(error_msg) self.Name.state = np.array(names) self.Name.location = np.array(location) self.Name.control = np.array(control) self.Name.header = self.header self.Name.bounds = np.array(bounds) self.Name.qlocation = np.array(limits) self.Name.datadir = datadir # # sort this object's name # sort logging self.logger = sortlog(self, log_terms['logfile'], logger, name=self.thisname, logdir=log_terms['logdir'], debug=log_terms['debug']) self.logger.info('Initialising %s' % type(self).__name__)