def runAdjoint_sp(self): # Create an empty adjoint structure: region = Region(self.rcf) categories = [ c for c in self.rcf.get('emissions.categories') if self.rcf.get('emissions.%s.optimize' % c) == 1 ] start = datetime(*self.rcf.get('time.start')) end = datetime(*self.rcf.get('time.end')) dt = time_interval(self.rcf.get('emissions.*.interval')) adj = CreateStruct(categories, region, start, end, dt) # Loop over the footprint files: db = self.obs.observations files = unique(db.footprint.dropna()) for fpfile in tqdm(files, total=len(files), desc='Adjoint run', leave=False, disable=self.batch): fp = Footprint(fpfile) msg = f"Adjoint run {fpfile}" # Loop over the obs in the file for obs in tqdm(db.loc[db.footprint == fpfile, :].itertuples(), desc=msg, leave=False, disable=self.batch): adj = fp.applyAdjoint(obs.time, obs.dy, adj, categories) fp.close() # Write the adjoint field WriteStruct(adj, self.emfile)
def runForward_sp(self): # Read the emissions: emis = ReadStruct(self.emfile) # Create containers dy = {} dy['tot'] = [] dy['id'] = [] dy['model'] = [] for cat in self.categories.list: dy[cat] = [] # Loop over the footprint files nsites = len(unique(self.obs.observations.footprint.dropna())) msg = 'Forward run' for fpfile in tqdm(unique(self.obs.observations.footprint.dropna()), total=nsites, desc=msg, disable=self.batch, leave=False): fp = Footprint(fpfile) # Loop over the obs in the file msg = "Forward run (%s)" % fpfile nobs = sum(self.obs.observations.footprint == fpfile) for obs in tqdm(self.obs.observations.loc[ self.obs.observations.footprint == fpfile, :].itertuples(), desc=msg, leave=False, total=nobs, disable=self.batch): dym, tot = fp.applyEmis(obs.time, emis) if dym is not None: for cat in self.categories.list: dy[cat].append(dym.get(cat)) dy['tot'].append(tot) dy['id'].append(obs.Index) dy['model'].append(dym) fp.close() try: self.obs.observations.loc[dy['id'], 'id'] = dy['id'] except: import pdb pdb.set_trace() self.obs.observations.loc[dy['id'], 'totals'] = dy['tot'] self.obs.observations.loc[dy['id'], 'model'] = dy['model'] self.obs.observations.loc[:, 'foreground'] = 0. for cat in self.categories.list: self.obs.observations.loc[dy['id'], cat] = dy[cat] self.obs.observations.loc[dy['id'], 'foreground'] += array(dy[cat]) # Write db: self.obs.save_tar(self.obsfile)
def clusterize(field, nmax, mask=None): clusters = [Cluster(field, mask=mask, crop=False)] clusters_final = [ ] # Offload the clusters that cannot be further divided to speed up the calculations nclmax = min(nmax, (clusters[0].mask > 0).sum()) with tqdm(total=nclmax, desc="spatial aggregation") as pbar: ncl = len(clusters + clusters_final) while ncl < nclmax: # and len(Cluster) > 0 : ranks = [c.rank for c in clusters] ind = ranks.index(max(ranks)) new_clusters = clusters[ind].split() clusters.pop(ind) for cl in new_clusters: if cl.mask.any(): if cl.size == 1: clusters_final.append(cl) else: if mask is not None: clusters.extend(cl.splitByMask()) else: clusters.append(cl) inc = len(clusters + clusters_final) - ncl pbar.update(inc) ncl += inc return clusters + clusters_final
def calc_spatial_coarsening(self, lsm=None): clusters = clusterize(self.ancilliary_data['sensi_map'], self.rcf.get('optimize.ngridpoints'), mask=lsm) mapping = { 'clusters_map': zeros((self.region.nlat, self.region.nlon)) + nan, 'cluster_specs': [] } lons, lats = meshgrid(self.region.lons, self.region.lats) ilons, ilats = meshgrid(range(self.region.nlon), range(self.region.nlat)) lats, lons, ilats, ilons = lats.reshape(-1), lons.reshape( -1), ilats.reshape(-1), ilons.reshape(-1) area = self.region.area.reshape(-1) lsm = lsm.reshape(-1) for icl, cl in enumerate(tqdm(clusters)): indices = cl.ind.reshape(-1) mapping['clusters_map'].reshape(-1)[indices] = icl cl.ind = icl cl.lats = lats[indices] cl.lons = lons[indices] cl.ilats = ilats[indices] cl.ilons = ilons[indices] cl.area = area[indices] cl.mean_lat = average(cl.lats, weights=cl.area) cl.mean_lon = average(cl.lons, weights=cl.area) cl.area_tot = cl.area.sum() cl.land_fraction = average(lsm[indices], weights=cl.area) mapping['cluster_specs'].append(cl) return mapping
def _genFootprintNames(self, fnames=None, leave_pbar=False): """ Deduct the names of the footprint files based on their sitename, sampling height and observation time Optionally, a user-specified list (for example following a different pattern) can be speficied here. :param fnames: A list of footprint file names. :return: A list of footprint file names, or the optional "fnames" argument (if it isn't set to None) """ if fnames is None : # Create the footprint theoretical filenames : for isite, site in tqdm(self.sites.iterrows(), leave=leave_pbar, desc='Generate footprint file names (step 1/3)', total=self.sites.shape[0]): self.observations.loc[self.observations.site == isite, 'code'] = site.code # codes = [self.sites.loc[s].code for s in tqdm(self.observations.site, leave=False, desc='Generate footprint file names (step 1/3)')] fnames = array( ['%s.%im.%s.h5'%(c.lower(), z, t.strftime('%Y-%m')) for (c, z, t) in tqdm(zip( self.observations.code, self.observations.height, self.observations.time ), leave=leave_pbar, desc='Generate footprint file names (step 2/3)', total=self.observations.shape[0])] ) fnames = [os.path.join(self.footprints_path, f) for f in tqdm(fnames, leave=leave_pbar, desc='Generate footprint file names (step 2/3)')] return fnames
def xc_to_x(G_state, Temp_L, Hor_L, x_c, ipos, dummy, path=None): n_state = len(G_state) nt = shape(Temp_L)[0] nhor = shape(Hor_L)[0] x = zeros(n_state) for i in tqdm(range(nt), desc='xc_to_x', leave=True): for j in range(nt): #, desc='step %i/%i'%(i, nt), leave=False): x[ipos + i * nhor:ipos + (i + 1) * nhor] += G_state[ipos + i * nhor:ipos + (i + 1) * nhor] * dot( Temp_L[i, j] * Hor_L, x_c[ipos + j * nhor:ipos + (j + 1) * nhor]) return x
def g_to_gc(G_state, Temp_Lt, Hor_Lt, g, ipos, dummy, path=None): n_state = len(G_state) nt = shape(Temp_Lt)[0] nhor = shape(Hor_Lt)[0] g_c = zeros([n_state]) for i in tqdm(range(nt), desc='preconditioning gradient', leave=False): for j in range(nt): g_c[ipos + i * nhor:ipos + (i + 1) * nhor] += dot( Temp_Lt[i, j] * Hor_Lt, G_state[ipos + j * nhor:ipos + (j + 1) * nhor] * g[ipos + j * nhor:ipos + (j + 1) * nhor]) return g_c
def calcSensitivityMap(self, recompute=False): if not hasattr(self, 'sensi_map') or recompute : footprint_files = unique(self.observations.footprint) with Pool() as p : fields = list(tqdm(p.imap(concat_footprints, footprint_files), total=len(footprint_files), desc="Computing network sensitivity map")) #import pdb; pdb.set_trace() field = DataArray(array([f.data for f in fields]).sum(0), coords=[fields[0].lats, fields[0].lons], dims=['lats', 'lons']) self.sensi_map = field self.add_sensiMapIO() # if save is not None : # field.to_netcdf(save) return self.sensi_map
def _checkFootprints(self, cache=None): footprint_files = unique(self.observations.footprint) # Loop over the footprint files (not on the obs, for efficiency) for fpf in tqdm(footprint_files, desc='Checking footprints'): # 1st, check if the footprint exists, and migrate it to cache if needed: fpf = self._checkCacheFile(fpf, cache) # Then, look if the file has all the individual obs footprints it's supposed to have if fpf is not None : fp = h5py.File(fpf, mode='r') # Times of the obs that are supposed to be in this file times = [x.to_pydatetime() for x in self.observations.loc[self.observations.footprint == fpf, 'time']] # Check if a footprint exists, for each time fp_exists = array([x.strftime('%Y%m%d%H%M%S') in fp for x in times]) # Some footprints may exist but be empty, get rid of them fp_exists[fp_exists] = [size(fp[x.strftime('%Y%m%d%H%M%S')].keys()) > 0 for x in array(times)[fp_exists]] # Store that ... self.observations.loc[self.observations.footprint == fpf, 'footprint_exists'] = fp_exists.astype(bool)
def importFromPath(self, path, pattern='data/nc/co2_*.nc', date_range=(datetime(1000,1,1),datetime(3000,1,1)), lat_range=[-inf,inf], lon_range=(-inf,inf), exclude_mobile=True): """ Import all the observations from an obspack file (in netcdf format). The space/time domain can be limited using the date_range, lat_range and lon_range arguments. by default the observations from mobile platform are skipped (this method works with them but is not adapted, as it would create one "site" entry for each observation. """ files = sorted(glob.glob(os.path.join(path, pattern))) for file in tqdm(files, desc=f'Import obs files from {os.path.join(path, pattern)}'): # Import data with Dataset(file) as ds: if ds.dataset_parameter == 'co2' : scale = 1.e6 if exclude_mobile : platform = ds.dataset_platform continue_import = platform in ['fixed'] if continue_import : if platform in ['fixed'] : continue_import = lat_range[0] <= ds.site_latitude <= lat_range[1] and lon_range[0] <= ds.site_longitude <= lon_range[1] selection_ll = None else : lons = ds['longitude'][:] lats = ds['latitude'][:] selection_ll = (lon_range[0] <= lons) * (lons <= lon_range[1]) selection_ll *= (lat_range[0] <= lats) * (lats <= lat_range[1]) continue_import = any(selection) else : logger.debug(f'File {os.path.basename(file)} skipped because of platform {platform}') if continue_import : time = array([datetime(*x) for x in ds['time_components'][:]]) try : selection = (date_range[0] <= time) * (time <= date_range[1]) except : import pdb; pdb.set_trace() continue_import = any(selection) else : logger.debug(f'No data imported from file {os.path.basename(file)}, because of lat/lon range') if continue_import : if selection_ll is not None : selection *= selection_ll nobs = len(time[selection]) if 'value_unc' in ds.variables.keys(): err = ds.variables['value_unc'][selection]*scale else : err = zeros(sum(selection)) try : observations = { 'time':time[selection], 'lat':ds['latitude'][selection], 'lon':ds['longitude'][selection], 'alt':ds['altitude'][selection], 'height':ds['altitude'][selection]-ds.site_elevation, 'obs':ds['value'][selection]*scale, 'err':err, 'file':array([file]*nobs), 'code':array([ds.site_code]*nobs), 'name':array([ds.site_name]*nobs) } except : logger.error("Import failed for file %s"%file) import pdb; pdb.set_trace() else : logger.debug(f'No data imported from file {os.path.basename(file)}, because of time range') if continue_import : logger.info(f"{nobs} observations imported from {os.path.basename(file)}") # Fill in dataframes : observations = DataFrame.from_dict(observations) sites = observations.loc[:, ['lat', 'lon', 'alt', 'height', 'file', 'code', 'name']].drop_duplicates() for dummy, row in sites.iterrows(): # Check if a corresponding site already exists in the database : if self.sites.loc[(self.sites.reindex(columns=sites.columns) == row).all(axis=1)].shape[0] == 0 : self.sites = self.sites.append(row, ignore_index=True) # Retrieve the index of the site (there should be only one!): site = self.sites.loc[(self.sites.loc[:, sites.columns] == row).all(axis=1)] assert site.shape[0] == 1, logger.error(f"Error importing data from {os.path.basename(file)}, {site.shape[0]} entries found for site, instead of exactly 1 expected") isite = site.index[0] # Create the rows to be added to the self.observations dataframe: obs = observations.loc[ (observations.loc[:, sites.columns] == row).all(axis=1), ['time', 'lat', 'lon', 'alt', 'height', 'obs', 'err', 'code'] ] obs.loc[:, 'site'] = isite # Append to self.observations: self.observations = self.observations.append(obs, sort=False)