Ejemplo n.º 1
0
    def runAdjoint_sp(self):
        # Create an empty adjoint structure:
        region = Region(self.rcf)
        categories = [
            c for c in self.rcf.get('emissions.categories')
            if self.rcf.get('emissions.%s.optimize' % c) == 1
        ]
        start = datetime(*self.rcf.get('time.start'))
        end = datetime(*self.rcf.get('time.end'))
        dt = time_interval(self.rcf.get('emissions.*.interval'))
        adj = CreateStruct(categories, region, start, end, dt)

        # Loop over the footprint files:
        db = self.obs.observations
        files = unique(db.footprint.dropna())
        for fpfile in tqdm(files,
                           total=len(files),
                           desc='Adjoint run',
                           leave=False,
                           disable=self.batch):
            fp = Footprint(fpfile)
            msg = f"Adjoint run {fpfile}"

            # Loop over the obs in the file
            for obs in tqdm(db.loc[db.footprint == fpfile, :].itertuples(),
                            desc=msg,
                            leave=False,
                            disable=self.batch):
                adj = fp.applyAdjoint(obs.time, obs.dy, adj, categories)
            fp.close()

        # Write the adjoint field
        WriteStruct(adj, self.emfile)
Ejemplo n.º 2
0
    def runForward_sp(self):
        # Read the emissions:
        emis = ReadStruct(self.emfile)

        # Create containers
        dy = {}
        dy['tot'] = []
        dy['id'] = []
        dy['model'] = []
        for cat in self.categories.list:
            dy[cat] = []

        # Loop over the footprint files
        nsites = len(unique(self.obs.observations.footprint.dropna()))
        msg = 'Forward run'
        for fpfile in tqdm(unique(self.obs.observations.footprint.dropna()),
                           total=nsites,
                           desc=msg,
                           disable=self.batch,
                           leave=False):
            fp = Footprint(fpfile)

            # Loop over the obs in the file
            msg = "Forward run (%s)" % fpfile
            nobs = sum(self.obs.observations.footprint == fpfile)
            for obs in tqdm(self.obs.observations.loc[
                    self.obs.observations.footprint == fpfile, :].itertuples(),
                            desc=msg,
                            leave=False,
                            total=nobs,
                            disable=self.batch):
                dym, tot = fp.applyEmis(obs.time, emis)
                if dym is not None:
                    for cat in self.categories.list:
                        dy[cat].append(dym.get(cat))
                    dy['tot'].append(tot)
                    dy['id'].append(obs.Index)
                    dy['model'].append(dym)
            fp.close()

        try:
            self.obs.observations.loc[dy['id'], 'id'] = dy['id']
        except:
            import pdb
            pdb.set_trace()
        self.obs.observations.loc[dy['id'], 'totals'] = dy['tot']
        self.obs.observations.loc[dy['id'], 'model'] = dy['model']
        self.obs.observations.loc[:, 'foreground'] = 0.
        for cat in self.categories.list:
            self.obs.observations.loc[dy['id'], cat] = dy[cat]
            self.obs.observations.loc[dy['id'], 'foreground'] += array(dy[cat])

        # Write db:
        self.obs.save_tar(self.obsfile)
Ejemplo n.º 3
0
def clusterize(field, nmax, mask=None):
    clusters = [Cluster(field, mask=mask, crop=False)]
    clusters_final = [
    ]  # Offload the clusters that cannot be further divided to speed up the calculations
    nclmax = min(nmax, (clusters[0].mask > 0).sum())
    with tqdm(total=nclmax, desc="spatial aggregation") as pbar:
        ncl = len(clusters + clusters_final)
        while ncl < nclmax:  # and len(Cluster) > 0 :
            ranks = [c.rank for c in clusters]
            ind = ranks.index(max(ranks))
            new_clusters = clusters[ind].split()
            clusters.pop(ind)
            for cl in new_clusters:
                if cl.mask.any():
                    if cl.size == 1:
                        clusters_final.append(cl)
                    else:
                        if mask is not None:
                            clusters.extend(cl.splitByMask())
                        else:
                            clusters.append(cl)
            inc = len(clusters + clusters_final) - ncl
            pbar.update(inc)
            ncl += inc
    return clusters + clusters_final
Ejemplo n.º 4
0
 def calc_spatial_coarsening(self, lsm=None):
     clusters = clusterize(self.ancilliary_data['sensi_map'],
                           self.rcf.get('optimize.ngridpoints'),
                           mask=lsm)
     mapping = {
         'clusters_map': zeros((self.region.nlat, self.region.nlon)) + nan,
         'cluster_specs': []
     }
     lons, lats = meshgrid(self.region.lons, self.region.lats)
     ilons, ilats = meshgrid(range(self.region.nlon),
                             range(self.region.nlat))
     lats, lons, ilats, ilons = lats.reshape(-1), lons.reshape(
         -1), ilats.reshape(-1), ilons.reshape(-1)
     area = self.region.area.reshape(-1)
     lsm = lsm.reshape(-1)
     for icl, cl in enumerate(tqdm(clusters)):
         indices = cl.ind.reshape(-1)
         mapping['clusters_map'].reshape(-1)[indices] = icl
         cl.ind = icl
         cl.lats = lats[indices]
         cl.lons = lons[indices]
         cl.ilats = ilats[indices]
         cl.ilons = ilons[indices]
         cl.area = area[indices]
         cl.mean_lat = average(cl.lats, weights=cl.area)
         cl.mean_lon = average(cl.lons, weights=cl.area)
         cl.area_tot = cl.area.sum()
         cl.land_fraction = average(lsm[indices], weights=cl.area)
         mapping['cluster_specs'].append(cl)
     return mapping
Ejemplo n.º 5
0
    def _genFootprintNames(self, fnames=None, leave_pbar=False):
        """
        Deduct the names of the footprint files based on their sitename, sampling height and observation time
        Optionally, a user-specified list (for example following a different pattern) can be speficied here.
        :param fnames: A list of footprint file names.
        :return: A list of footprint file names, or the optional "fnames" argument (if it isn't set to None)
        """
        if fnames is None :
            # Create the footprint theoretical filenames :
            for isite, site in tqdm(self.sites.iterrows(), leave=leave_pbar, desc='Generate footprint file names (step 1/3)', total=self.sites.shape[0]):
                self.observations.loc[self.observations.site == isite, 'code'] = site.code
#            codes = [self.sites.loc[s].code for s in tqdm(self.observations.site, leave=False, desc='Generate footprint file names (step 1/3)')]
            fnames = array(
                ['%s.%im.%s.h5'%(c.lower(), z, t.strftime('%Y-%m')) for (c, z, t) in tqdm(zip(
                    self.observations.code, self.observations.height, self.observations.time
                ), leave=leave_pbar, desc='Generate footprint file names (step 2/3)', total=self.observations.shape[0])]
            )
        fnames = [os.path.join(self.footprints_path, f) for f in tqdm(fnames, leave=leave_pbar, desc='Generate footprint file names (step 2/3)')]
        return fnames
Ejemplo n.º 6
0
def xc_to_x(G_state, Temp_L, Hor_L, x_c, ipos, dummy, path=None):
    n_state = len(G_state)
    nt = shape(Temp_L)[0]
    nhor = shape(Hor_L)[0]
    x = zeros(n_state)
    for i in tqdm(range(nt), desc='xc_to_x', leave=True):
        for j in range(nt):  #, desc='step %i/%i'%(i, nt), leave=False):
            x[ipos + i * nhor:ipos + (i + 1) *
              nhor] += G_state[ipos + i * nhor:ipos + (i + 1) * nhor] * dot(
                  Temp_L[i, j] * Hor_L, x_c[ipos + j * nhor:ipos +
                                            (j + 1) * nhor])
    return x
Ejemplo n.º 7
0
def g_to_gc(G_state, Temp_Lt, Hor_Lt, g, ipos, dummy, path=None):
    n_state = len(G_state)
    nt = shape(Temp_Lt)[0]
    nhor = shape(Hor_Lt)[0]
    g_c = zeros([n_state])
    for i in tqdm(range(nt), desc='preconditioning gradient', leave=False):
        for j in range(nt):
            g_c[ipos + i * nhor:ipos + (i + 1) * nhor] += dot(
                Temp_Lt[i, j] * Hor_Lt,
                G_state[ipos + j * nhor:ipos + (j + 1) * nhor] *
                g[ipos + j * nhor:ipos + (j + 1) * nhor])
    return g_c
Ejemplo n.º 8
0
    def calcSensitivityMap(self, recompute=False):
        if not hasattr(self, 'sensi_map') or recompute :
            footprint_files = unique(self.observations.footprint)
            with Pool() as p :
                fields = list(tqdm(p.imap(concat_footprints, footprint_files), total=len(footprint_files), desc="Computing network sensitivity map"))
            #import pdb; pdb.set_trace()
            field = DataArray(array([f.data for f in fields]).sum(0), coords=[fields[0].lats, fields[0].lons], dims=['lats', 'lons'])
            self.sensi_map = field
            self.add_sensiMapIO()
#        if save is not None :
#            field.to_netcdf(save)
        return self.sensi_map
Ejemplo n.º 9
0
    def _checkFootprints(self, cache=None):
        footprint_files = unique(self.observations.footprint)

        # Loop over the footprint files (not on the obs, for efficiency)
        for fpf in tqdm(footprint_files, desc='Checking footprints'):

            # 1st, check if the footprint exists, and migrate it to cache if needed:
            fpf = self._checkCacheFile(fpf, cache)

            # Then, look if the file has all the individual obs footprints it's supposed to have
            if fpf is not None :
                fp = h5py.File(fpf, mode='r')

                # Times of the obs that are supposed to be in this file
                times = [x.to_pydatetime() for x in self.observations.loc[self.observations.footprint == fpf, 'time']]

                # Check if a footprint exists, for each time
                fp_exists = array([x.strftime('%Y%m%d%H%M%S') in fp for x in times])

                # Some footprints may exist but be empty, get rid of them
                fp_exists[fp_exists] = [size(fp[x.strftime('%Y%m%d%H%M%S')].keys()) > 0 for x in array(times)[fp_exists]]

                # Store that ...
                self.observations.loc[self.observations.footprint == fpf, 'footprint_exists'] = fp_exists.astype(bool)
Ejemplo n.º 10
0
    def importFromPath(self, path, pattern='data/nc/co2_*.nc', date_range=(datetime(1000,1,1),datetime(3000,1,1)), lat_range=[-inf,inf], lon_range=(-inf,inf), exclude_mobile=True):
        """
        Import all the observations from an obspack file (in netcdf format). 
        The space/time domain can be limited using the date_range, lat_range and lon_range arguments.
        by default the observations from mobile platform are skipped (this method works with them but is not adapted, as it would create one "site" entry for each observation.
        """

        files = sorted(glob.glob(os.path.join(path, pattern)))
        for file in tqdm(files, desc=f'Import obs files from {os.path.join(path, pattern)}'):

            # Import data
            with Dataset(file) as ds:
                if ds.dataset_parameter == 'co2' : scale = 1.e6
                if exclude_mobile :
                    platform = ds.dataset_platform
                    continue_import = platform in ['fixed']
                if continue_import :
                    if platform in ['fixed'] :
                        continue_import = lat_range[0] <= ds.site_latitude <= lat_range[1] and lon_range[0] <= ds.site_longitude <= lon_range[1]
                        selection_ll = None
                    else :
                        lons = ds['longitude'][:]
                        lats = ds['latitude'][:]
                        selection_ll = (lon_range[0] <= lons) * (lons <= lon_range[1])
                        selection_ll *= (lat_range[0] <= lats) * (lats  <= lat_range[1])
                        continue_import = any(selection)
                else :
                    logger.debug(f'File {os.path.basename(file)} skipped because of platform {platform}')
                if continue_import :
                    time = array([datetime(*x) for x in ds['time_components'][:]])
                    try :
                        selection = (date_range[0] <= time) * (time <= date_range[1])
                    except :
                        import pdb; pdb.set_trace()
                    continue_import = any(selection)
                else :
                    logger.debug(f'No data imported from file {os.path.basename(file)}, because of lat/lon range')


                if continue_import :
                    if selection_ll is not None : selection *= selection_ll
                    nobs = len(time[selection])
                    if 'value_unc' in ds.variables.keys():
                        err = ds.variables['value_unc'][selection]*scale
                    else :
                        err = zeros(sum(selection))
                    try :
                        observations = {
                            'time':time[selection],
                            'lat':ds['latitude'][selection],
                            'lon':ds['longitude'][selection],
                            'alt':ds['altitude'][selection],
                            'height':ds['altitude'][selection]-ds.site_elevation,
                            'obs':ds['value'][selection]*scale,
                            'err':err,
                            'file':array([file]*nobs),
                            'code':array([ds.site_code]*nobs),
                            'name':array([ds.site_name]*nobs)
                        }
                    except :
                        logger.error("Import failed for file %s"%file)
                        import pdb; pdb.set_trace()
                else :
                    logger.debug(f'No data imported from file {os.path.basename(file)}, because of time range')


            if continue_import :
                logger.info(f"{nobs} observations imported from {os.path.basename(file)}")

                # Fill in dataframes :
                observations = DataFrame.from_dict(observations)
                sites = observations.loc[:, ['lat', 'lon', 'alt', 'height', 'file', 'code', 'name']].drop_duplicates()

                for dummy, row in sites.iterrows():
                    # Check if a corresponding site already exists in the database :
                    if self.sites.loc[(self.sites.reindex(columns=sites.columns) == row).all(axis=1)].shape[0] == 0 :
                        self.sites = self.sites.append(row, ignore_index=True)

                    # Retrieve the index of the site (there should be only one!):
                    site = self.sites.loc[(self.sites.loc[:, sites.columns] == row).all(axis=1)]
                    assert site.shape[0] == 1, logger.error(f"Error importing data from {os.path.basename(file)}, {site.shape[0]} entries found for site, instead of exactly 1 expected")
                    isite = site.index[0]

                    # Create the rows to be added to the self.observations dataframe:
                    obs = observations.loc[
                            (observations.loc[:, sites.columns] == row).all(axis=1), 
                            ['time', 'lat', 'lon', 'alt', 'height', 'obs', 'err', 'code']
                    ]
                    obs.loc[:, 'site'] = isite

                    # Append to self.observations:
                    self.observations = self.observations.append(obs, sort=False)