예제 #1
0
    def add_simulations(self, filename, silent=False):
        """ Adds model simulated values to the mole fraction objects """


        if not os.path.exists(filename):
            msg = "Sample output filename for observations could not be found : %s" % filename 
            logging.error(msg)
            logging.error("Did the sampling step succeed?")
            logging.error("...exiting")
            raise IOError, msg

        ncf = io.ct_read(filename, method='read')
        ids = ncf.get_variable('obs_num')
        simulated = ncf.get_variable('flask')
        ncf.close()
        logging.info("Successfully read data from model sample file (%s)" % filename)

        obs_ids = self.getvalues('id').tolist()
        ids = map(int, ids)

        missing_samples = []

        for idx, val in zip(ids, simulated): 
            if idx in obs_ids:
                index = obs_ids.index(idx)

                self.datalist[index].simulated = val  # in mol/mol
            else:     
                missing_samples.append(idx)

        if not silent and missing_samples != []:
            logging.warning('Model samples were found that did not match any ID in the observation list. Skipping them...')
            #msg = '%s'%missing_samples ; logging.warning(msg)

        logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples)))
    def get_covariance(self, date, dacycle):
        """ Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector. 
            Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
            The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]
        """    

        
        try:
            import matplotlib.pyplot as plt
        except:
            pass

        # Get the needed matrices from the specified covariance files

        file_ocn_cov = dacycle.dasystem['ocn.covariance'] 

        cov_files = os.listdir(dacycle.dasystem['bio.cov.dir'])
        cov_files = [os.path.join(dacycle.dasystem['bio.cov.dir'], f) for f in cov_files if dacycle.dasystem['bio.cov.prefix'] in f]

        logging.debug("Found %d covariances to use for biosphere" % len(cov_files))

        # replace YYYY.MM in the ocean covariance file string

        file_ocn_cov = file_ocn_cov.replace('2000.01', date.strftime('%Y.%m'))

        cov_files.append(file_ocn_cov)

        covariancematrixlist = []
        for file in cov_files:
            if not os.path.exists(file):
                msg = "Cannot find the specified file %s" % file 
                logging.error(msg)
                raise IOError, msg
            else:
                logging.debug("Using covariance file: %s" % file)

            f = io.ct_read(file, 'read')

            if 'pco2' in file or 'cov_ocean' in file: 
                cov_ocn = f.get_variable('CORMAT')
                parnr = range(9805,9835)
                cov = cov_ocn
            elif 'tropic' in file:
                cov = f.get_variable('covariance')
                parnr = f.get_variable('parameternumber')
            else: 
                cov = f.get_variable('covariance')
                parnr = f.get_variable('parameternumber')
                cov_sf = 90.0 / np.sqrt(cov.diagonal().sum())  # this scaling factor makes the total variance close to the value of a single ecoregion
                cov = cov * cov_sf

            f.close()
            covariancematrixlist.append([cov,parnr,file])

        logging.debug("Succesfully closed files after retrieving prior covariance matrices")

        # Once we have the matrices, we can start to make the full covariance matrix, and then decompose it

        return covariancematrixlist
예제 #3
0
    def add_observations(self):
        """ Returns a MoleFractionList holding individual MoleFractionSample objects for all obs in a file
      
            The ObsPack mole fraction files are provided as time series per site with all dates in sequence. 
            We will loop over all site files in the ObsPackage, and subset each to our needs
            
        """

        # Step 1: Read list of available site files in package

        infile = os.path.join(self.obspack_dir, 'summary', '%s_dataset_summary.txt' % (self.obspack_id,))
        f = open(infile, 'r')
        lines = f.readlines()
        f.close()

        ncfilelist = []
        for line in lines:
            if line.startswith('#'): continue # header

            items = line.split()
            #ncfile, lab , start_date, stop_date, data_comparison = items[0:5]
            ncfile, lab , start_date, stop_date, data_comparison= line[:105].split()


            ncfilelist += [ncfile]

        logging.debug("ObsPack dataset info read, proceeding with %d netcdf files" % len(ncfilelist))

        for ncfile in ncfilelist:

            infile = os.path.join(self.obspack_dir, 'data', 'nc', ncfile + '.nc')
            ncf = io.ct_read(infile, 'read')
            idates = ncf.get_variable('time_components')
            dates = array([dtm.datetime(*d) for d in idates])

            subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0]

            dates = dates.take(subselect, axis=0)
            
            obspacknum = ncf.get_variable('obspack_num').take(subselect)  # or should we propagate obs_num which is not unique across datasets??
            obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0)
            obspackid = [s.tostring().lower() for s in obspackid]
            obspackid = map(strip, obspackid)
            datasetname = ncfile  # use full name of dataset to propagate for clarity
            lats = ncf.get_variable('latitude').take(subselect, axis=0)
            lons = ncf.get_variable('longitude').take(subselect, axis=0)
            alts = ncf.get_variable('altitude').take(subselect, axis=0)
            obs = ncf.get_variable('value').take(subselect, axis=0)
            species = ncf.get_attribute('dataset_parameter')
            flags = ncf.get_variable('obs_flag').take(subselect, axis=0)
            ncf.close()

            for n in range(len(dates)): 
                self.datalist.append(MoleFractionSample(obspacknum[n], dates[n], datasetname, obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], obspackid[n], species, 1, 0.0, infile))

            logging.debug("Added %d observations from file (%s) to the Data list" % (len(dates), ncfile)) 

        logging.info("Observations list now holds %d values" % len(self.datalist))
예제 #4
0
    def add_observations(self):
        """ Returns a MoleFractionList holding individual MoleFractionSample objects for all obs in a file
      
            The CarbonTracker mole fraction files are provided as one long list of obs for all possible dates. So we can 
            either:
            
            (1) read all, and the subselect the data we will use in the rest of this cycle
            (2) Use nco to make a subset of the data
            
            For now, we will stick with option (1) 
        
        """
        ncf = io.ct_read(self.obs_filename, 'read')
        idates = ncf.get_variable('date_components')
        dates = array([dtm.datetime(*d) for d in idates])

        subselect = logical_and(dates >= self.startdate, dates <= self.enddate).nonzero()[0]

        dates = dates.take(subselect, axis=0)
        
        ids = ncf.get_variable('id').take(subselect, axis=0)
        evn = ncf.get_variable('eventnumber').take(subselect, axis=0)
        evn = [s.tostring().lower() for s in evn]
        evn = map(strip, evn)
        sites = ncf.get_variable('site').take(subselect, axis=0)
        sites = [s.tostring().lower() for s in sites]
        sites = map(strip, sites)
        lats = ncf.get_variable('lat').take(subselect, axis=0)
        lons = ncf.get_variable('lon').take(subselect, axis=0)
        alts = ncf.get_variable('alt').take(subselect, axis=0)
        obs = ncf.get_variable('obs').take(subselect, axis=0) * 1.e-6
        logging.info("Converting observed values from ppm to mol/mol!!!!")
        species = ncf.get_variable('species').take(subselect, axis=0)
        species = [s.tostring().lower() for s in species]
        species = map(strip, species)
        strategy = ncf.get_variable('sampling_strategy').take(subselect, axis=0)
        flags = ncf.get_variable('NOAA_QC_flags').take(subselect, axis=0)
        flags = [s.tostring().lower() for s in flags]
        flags = map(strip, flags)
        flags = [int(f == '...') for f in flags]
        ncf.close()

        logging.debug("Successfully read data from obs file (%s)" % self.obs_filename)

        for n in range(len(dates)): 
            self.datalist.append(MoleFractionSample(ids[n], dates[n], sites[n], obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], evn[n], species[n], strategy[n], 0.0, self.obs_filename))
        logging.debug("Added %d observations to the Data list" % len(dates))
예제 #5
0
    def read_from_legacy_file(self, filename, qual='opt'):
        """ 
        :param filename: the full filename for the input NetCDF file
        :param qual: a string indicating whether to read the 'prior' or 'opt'(imized) StateVector from file
        :rtype: None

        Read the StateVector information from a NetCDF file and put in a StateVector object
        In principle the input file will have only one four datasets inside 
        called:
            * `meanstate_prior`, dimensions [nlag, nparamaters]
            * `ensemblestate_prior`, dimensions [nlag,nmembers, nparameters]
            * `meanstate_opt`, dimensions [nlag, nparamaters]
            * `ensemblestate_opt`, dimensions [nlag,nmembers, nparameters]

        This NetCDF information can be written to file using 
        :meth:`~da.baseclasses.statevector.StateVector.write_to_file`

        """
        

        f = io.ct_read(filename, 'read')

        for n in range(self.nlag):
            if qual == 'opt':
                meanstate = f.get_variable('xac_%02d' % (n + 1))
                EnsembleMembers = f.get_variable('adX_%02d' % (n + 1))

            elif qual == 'prior':
                meanstate = f.get_variable('xpc_%02d' % (n + 1))
                EnsembleMembers = f.get_variable('pdX_%02d' % (n + 1))

            if not self.ensemble_members[n] == []:
                self.ensemble_members[n] = []
                logging.warning('Existing ensemble for lag=%d was removed to make place for newly read data' % (n + 1))

            for m in range(self.nmembers):
                newmember = EnsembleMember(m)
                newmember.param_values = EnsembleMembers[m, :].flatten() + meanstate  # add the mean to the deviations to hold the full parameter values
                self.ensemble_members[n].append(newmember)

        f.close()

        logging.info('Successfully read the State Vector from file (%s) ' % filename)
예제 #6
0
    def get_covariance(self, date, dacycle):
        """ Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector. 
            Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
            The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]
        """    
        try:
            import matplotlib.pyplot as plt
        except:
            pass

        # Get the needed matrices from the specified covariance files

        file_ocn_cov = dacycle.dasystem['ocn.covariance'] 
        file_bio_cov = dacycle.dasystem['bio.covariance'] 

        # replace YYYY.MM in the ocean covariance file string

        file_ocn_cov = file_ocn_cov.replace('2000.01', date.strftime('%Y.%m'))

        for fil in [file_ocn_cov, file_bio_cov]:
            if not os.path.exists(fil):
                msg = "Cannot find the specified file %s" % fil
                logging.error(msg)
                raise IOError, msg
            else:
                logging.info("Using covariance file: %s" % fil)

        f_ocn = io.ct_read(file_ocn_cov, 'read')
        f_bio = io.ct_read(file_bio_cov, 'read')

        cov_ocn = f_ocn.get_variable('CORMAT')
        if f_bio.variables.has_key('covariance'):
            cov_bio = f_bio.get_variable('covariance')  # newly created CTDAS covariance files
        else:
            cov_bio = f_bio.get_variable('qprior')  # old CarbonTracker covariance files

        f_ocn.close()
        f_bio.close()

        logging.debug("Succesfully closed files after retrieving prior covariance matrices")

        # Once we have the matrices, we can start to make the full covariance matrix, and then decompose it

        fullcov = np.zeros((self.nparams, self.nparams), float)

        nocn = cov_ocn.shape[0]
        nbio = cov_bio.shape[0]

        fullcov[0:nbio, 0:nbio] = cov_bio
        fullcov[nbio:nbio + nocn, nbio:nbio + nocn] = cov_ocn
        fullcov[nocn + nbio, nocn + nbio] = 1.e-10


        try:
            plt.imshow(fullcov)
            plt.colorbar()
            plt.savefig('fullcovariancematrix.png')
            plt.close('all')
            logging.debug("Covariance matrix visualized for inspection")
        except:
            pass

        return fullcov
예제 #7
0
def save_time_avg_data(dacycle, infile, avg='monthly'):
    """ Function saves time mean surface flux data to NetCDF files
        
        *** Inputs ***
        rundat : a RunInfo object

        *** Outputs ***
        daily NetCDF file containing 1-hourly global surface fluxes at 1x1 degree

        *** Example ***
        ./expand_savestate project=enkf_release sd=20000101 ed=20010101 """

    if 'weekly' in infile:
        intime = 'weekly'
    if 'monthly' in infile:
        intime = 'monthly'
    if 'yearly' in infile:
        intime = 'yearly'

    dirname, filename = os.path.split(infile)
    outdir = create_dirs(os.path.join(dacycle['dir.analysis'], dirname.replace(intime, avg)))

    dectime0 = date2num(datetime(2000, 1, 1))

# Create NetCDF output file
#
    saveas = os.path.join(outdir, filename)
    ncf = io.CT_CDF(saveas, 'create')
    dimdate = ncf.add_date_dim()
#
# Open input file specified from the command line
#
    if not os.path.exists(infile):
        logging.error("Needed input file (%s) not found. Please create this first:" % infile)
        logging.error("returning...")
        return None 
    else:
        pass

    file = io.ct_read(infile, 'read')
    datasets = file.variables.keys()
    date = file.get_variable('date')
    globatts = file.ncattrs()

    for att in globatts:
        attval = file.getncattr(att)
        if not att in ncf.ncattrs():
            ncf.setncattr(att, attval)


    time = [datetime(2000, 1, 1) + timedelta(days=d) for d in date]

# loop over datasets in infile, skip idate and date as we will make new time axis for the averaged data

    for sds in ['date'] + datasets:

# get original data

        data = file.get_variable(sds)
        varatts = file.variables[sds].ncattrs()
        vardims = file.variables[sds].dimensions
#
# Depending on dims of input dataset, create dims for output dataset. Note that we add the new dimdate now.
#

        for d in vardims:
            if 'date' in d:
                continue
            if d in ncf.dimensions.keys():
                pass
            else:
                dim = ncf.createDimension(d, size=len(file.dimensions[d]))

        savedict = ncf.standard_var(sds)
        savedict['name'] = sds
        savedict['dims'] = vardims
        savedict['units'] = file.variables[sds].units
        savedict['long_name'] = file.variables[sds].long_name
        savedict['comment'] = file.variables[sds].comment
        savedict['standard_name'] = file.variables[sds].standard_name
        savedict['count'] = 0

        if not 'date' in vardims:
            savedict['values'] = data
            ncf.add_data(savedict)
        else:

            if avg == 'monthly':
                time_avg, data_avg = timetools.monthly_avg(time, data)
            elif avg == 'seasonal':
                time_avg, data_avg = timetools.season_avg(time, data)
            elif avg == 'yearly':
                time_avg, data_avg = timetools.yearly_avg(time, data)
            elif avg == 'longterm':
                time_avg, data_avg = timetools.longterm_avg(time, data)
                time_avg = [time_avg]
                data_avg = [data_avg]
            else:
                raise ValueError, 'Averaging (%s) does not exist' % avg

            count = -1
            for dd, data in zip(time_avg, data_avg):
                count = count + 1
                if sds == 'date':
                    savedict['values'] = date2num(dd) - dectime0
                else:
                    savedict['values'] = data
                savedict['count'] = count
                ncf.add_data(savedict, silent=True)

                sys.stdout.write('.')

            sys.stdout.write('\n')
            sys.stdout.flush()

# end NetCDF file access
    file.close()
    ncf.close()

    logging.info("------------------- Finished time averaging---------------------------------")

    return saveas
예제 #8
0
def save_weekly_avg_agg_data(dacycle, region_aggregate='olson'):
    """
        Function creates a NetCDF file with output on TransCom regions. It uses the flux input from the 
        function `save_weekly_avg_1x1_data` to create fluxes of length `nparameters`, which are then projected
        onto TC regions using the internal methods from :class:`~da.baseclasses.statevector.StateVector`.
        
           :param dacycle: a :class:`~da.tools.initexit.CycleControl` object
           :param StateVector: a :class:`~da.baseclasses.statevector.StateVector`
           :rtype: None

        This function only read the prior fluxes from the flux_1x1.nc files created before, because we want to convolve 
        these with the parameters in the statevector. This creates posterior fluxes, and the posterior covariance for the complete
        statevector in units of mol/box/s which we then turn into TC fluxes and covariances.
    """

#
    dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_%s_weekly' % region_aggregate))
#
# Some help variables
#
    dectime0 = date2num(datetime(2000, 1, 1))
    dt = dacycle['cyclelength']
    startdate = dacycle['time.start'] 
    enddate = dacycle['time.end'] 
    ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0

    logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
    logging.debug("DA Cycle end   date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))

    logging.debug("Aggregating 1x1 fluxes to %s totals" % region_aggregate)


    # Write/Create NetCDF output file
    #
    saveas = os.path.join(dirname, '%s_fluxes.%s.nc' % (region_aggregate, startdate.strftime('%Y-%m-%d')))
    ncf = io.CT_CDF(saveas, 'write')
    dimdate = ncf.add_date_dim()
    dimidateformat = ncf.add_date_dim_format()
    dimgrid = ncf.add_latlon_dim()  # for mask
#
#   Select regions to aggregate to
# 

    if region_aggregate == "olson":
        regionmask = tc.olson240mask
        dimname = 'olson'
        dimregs = ncf.add_dim(dimname, regionmask.max())

        regionnames = []
        for i in range(11):
            for j in range(19):
                regionnames.append("%s_%s" % (tc.transnams[i], tc.olsonnams[j],))
        regionnames.extend(tc.oifnams)
        xform = False

        for i, name in enumerate(regionnames):
            lab = 'Aggregate_Region_%03d' % (i + 1,)
            setattr(ncf, lab, name)

    elif region_aggregate == "olson_extended":
        regionmask = tc.olson_ext_mask
        dimname = 'olson_ext'
        dimregs = ncf.add_dim(dimname, regionmask.max())
        xform = False

        for i, name in enumerate(tc.olsonextnams):
            lab = 'Aggreate_Region_%03d'%(i+1)
            setattr(ncf, lab, name)

    elif region_aggregate == "transcom":
        regionmask = tc.transcommask
        dimname = 'tc'
        dimregs = ncf.add_region_dim(type='tc')
        xform = False

    elif region_aggregate == "transcom_extended":
        regionmask = tc.transcommask
        dimname = 'tc_ext'
        dimregs = ncf.add_region_dim(type='tc_ext')
        xform = True


    elif region_aggregate == "country":

        xform = False
        countrydict = ct.get_countrydict()
        selected = ['Russia', 'Canada', 'China', 'United States', 'EU27', 'Brazil', 'Australia', 'India'] #,'G8','UNFCCC_annex1','UNFCCC_annex2']
        regionmask = np.zeros((180, 360,), 'float')

        for i, name in enumerate(selected):
            lab = 'Country_%03d' % (i + 1,)
            setattr(ncf, lab, name)

            if name == 'EU27':
                namelist = ct.EU27
            elif name == 'EU25':
                namelist = ct.EU25
            elif name == 'G8':
                namelist = ct.G8
            elif name == 'UNFCCC_annex1':
                namelist = ct.annex1
            elif name == 'UNFCCC_annex2':
                namelist = ct.annex2
            else:
                namelist = [name]

            for countryname in namelist:
                try:
                    country = countrydict[countryname]
                    regionmask.put(country.gridnr, i + 1)
                except:
                    continue

        dimname = 'country'
        dimregs = ncf.add_dim(dimname, regionmask.max())

    #

    skip = ncf.has_date(ncfdate)
    if skip:
        logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
    else:
        #
        # set title and tell GMT that we are using "pixel registration"
        #
        setattr(ncf, 'Title', 'CTDAS Aggregated fluxes')
        setattr(ncf, 'node_offset', 1)

        savedict = ncf.standard_var('unknown')
        savedict['name'] = 'regionmask'
        savedict['comment'] = 'numerical mask used to aggregate 1x1 flux fields, each integer 0,...,N is one region aggregated'
        savedict['values'] = regionmask.tolist()
        savedict['units'] = '-'
        savedict['dims'] = dimgrid
        savedict['count'] = 0
        ncf.add_data(savedict)

        # Get input data from 1x1 degree flux files

        area = globarea()

        infile = os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly', 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d'))
        if not os.path.exists(infile):
            logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile)
            return None

        ncf_in = io.ct_read(infile, 'read')

        # Transform data one by one

        # Get the date variable, and find index corresponding to the dacycle date

        try:
            dates = ncf_in.variables['date'][:]
        except KeyError:
            logging.error("The variable date cannot be found in the requested input file (%s) " % infile)
            logging.error("Please make sure you create gridded fluxes before making TC fluxes ")
            raise KeyError

        try:
            index = dates.tolist().index(ncfdate)
        except ValueError:
            logging.error("The requested cycle date is not yet available in file %s " % infile)
            logging.error("Please make sure you create state based fluxes before making TC fluxes ")
            raise ValueError

        # First add the date for this cycle to the file, this grows the unlimited dimension

        savedict = ncf.standard_var(varname='date')
        savedict['values'] = ncfdate
        savedict['dims'] = dimdate
        savedict['count'] = index
        ncf.add_data(savedict)

        # Now convert other variables that were inside the statevector file

        vardict = ncf_in.variables
        for vname, vprop in vardict.iteritems():
            if vname == 'latitude': continue
            elif vname == 'longitude': continue
            elif vname == 'date': continue
            elif vname == 'idate': continue
            elif 'std' in vname: continue
            elif 'ensemble' in vname:

                data = ncf_in.get_variable(vname)[index]
             
                dimensemble = ncf.add_dim('members', data.shape[0])

                regiondata = []
                for member in data:
                    aggdata = state_to_grid(member * area, regionmask, reverse=True, mapname=region_aggregate)
                    regiondata.append(aggdata)

                regiondata = np.array(regiondata)
                try:
                    regioncov = regiondata.transpose().dot(regiondata) / (data.shape[0] - 1) 
                except:
                    regioncov = np.dot(regiondata.transpose(), regiondata) / (data.shape[0] - 1) # Huygens fix 

                if xform:
                    regiondata = ExtendedTCRegions(regiondata,cov=False)
                    regioncov = ExtendedTCRegions(regioncov,cov=True)
                
                savedict = ncf.standard_var(varname=vname)
                savedict['name'] = vname.replace('ensemble','covariance') 
                savedict['units'] = '[mol/region/s]^2'
                savedict['dims'] = dimdate + dimregs + dimregs
                savedict['count'] = index
                savedict['values'] = regioncov
                ncf.add_data(savedict)

                savedict = ncf.standard_var(varname=vname)
                savedict['name'] = vname 
                savedict['units'] = 'mol/region/s'
                savedict['dims'] = dimdate + dimensemble + dimregs

                
            elif 'flux' in vname:

                data = ncf_in.get_variable(vname)[index]

                regiondata = state_to_grid(data * area, regionmask, reverse=True, mapname=region_aggregate)

                if xform:
                    regiondata = ExtendedTCRegions(regiondata)

                savedict = ncf.standard_var(varname=vname)
                savedict['dims'] = dimdate + dimregs
                savedict['units'] = 'mol/region/s'

            else:

                data = ncf_in.get_variable(vname)[:]
                regiondata = state_to_grid(data, regionmask, reverse=True, mapname=region_aggregate)
                if xform:
                    regiondata = ExtendedTCRegions(regiondata)

                savedict = ncf.standard_var(varname=vname)
                savedict['dims'] = dimdate + dimregs

            savedict['count'] = index
            savedict['values'] = regiondata
            ncf.add_data(savedict)

        ncf_in.close()
    ncf.close()

    logging.info("%s aggregated weekly average fluxes now written" % dimname)

    return saveas
예제 #9
0
def save_weekly_avg_ext_tc_data(dacycle):
    """ Function SaveTCDataExt saves surface flux data to NetCDF files for extended TransCom regions
        
        *** Inputs ***
        rundat : a RunInfo object

        *** Outputs ***
        NetCDF file containing n-hourly global surface fluxes per TransCom region

        *** Example ***
        ./expand_savestate project=enkf_release sd=20000101 ed=20010101 """


#
    dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_tc_weekly'))
#
# Some help variables
#
    dectime0 = date2num(datetime(2000, 1, 1))
    dt = dacycle['cyclelength']
    startdate = dacycle['time.start'] 
    enddate = dacycle['time.end'] 
    ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0

    logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
    logging.debug("DA Cycle end   date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))

    # Write/Create NetCDF output file
    #
    saveas = os.path.join(dirname, 'tc_extfluxes.nc')
    ncf = io.CT_CDF(saveas, 'write')
    dimdate = ncf.add_date_dim()
    dimidateformat = ncf.add_date_dim_format()
    dimregs = ncf.add_region_dim(type='tc_ext')
#
# set title and tell GMT that we are using "pixel registration"
#
    setattr(ncf, 'Title', 'CarbonTracker TransCom fluxes')
    setattr(ncf, 'node_offset', 1)
    #

    skip = ncf.has_date(ncfdate)
    if skip:
        logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
    else:
        infile = os.path.join(dacycle['dir.analysis'], 'data_tc_weekly', 'tcfluxes.nc')
        if not os.path.exists(infile):
            logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile)
            return None

        ncf_in = io.ct_read(infile, 'read')

        # Transform data one by one

        # Get the date variable, and find index corresponding to the dacycle date

        try:
            dates = ncf_in.variables['date'][:]
        except KeyError:
            logging.error("The variable date cannot be found in the requested input file (%s) " % infile)
            logging.error("Please make sure you create gridded fluxes before making extended TC fluxes")
            raise KeyError

        try:
            index = dates.tolist().index(ncfdate)
        except ValueError:
            logging.error("The requested cycle date is not yet available in file %s " % infile)
            logging.error("Please make sure you create state based fluxes before making extended TC fluxes ")
            raise ValueError

        # First add the date for this cycle to the file, this grows the unlimited dimension

        savedict = ncf.standard_var(varname='date')
        savedict['values'] = ncfdate
        savedict['dims'] = dimdate
        savedict['count'] = index
        ncf.add_data(savedict)

        # Now convert other variables that were inside the tcfluxes.nc file

        vardict = ncf_in.variables
        for vname, vprop in vardict.iteritems():

            data = ncf_in.get_variable(vname)[index]

            if vname == 'latitude': continue
            elif vname == 'longitude': continue
            elif vname == 'date': continue
            elif vname == 'idate': continue
            elif 'cov' in vname:
              
                tcdata = ExtendedTCRegions(data, cov=True)

                savedict = ncf.standard_var(varname=vname)
                savedict['units'] = '[mol/region/s]**2'
                savedict['dims'] = dimdate + dimregs + dimregs
                
            else:

                tcdata = ExtendedTCRegions(data, cov=False)

                savedict = ncf.standard_var(varname=vname)
                savedict['dims'] = dimdate + dimregs
                savedict['units'] = 'mol/region/s'

            savedict['count'] = index
            savedict['values'] = tcdata
            ncf.add_data(savedict)

        ncf_in.close()
    ncf.close()

    logging.info("TransCom weekly average extended fluxes now written")

    return saveas
예제 #10
0
def save_weekly_avg_tc_data(dacycle, statevector):
    """
        Function creates a NetCDF file with output on TransCom regions. It uses the flux input from the 
        function `save_weekly_avg_1x1_data` to create fluxes of length `nparameters`, which are then projected
        onto TC regions using the internal methods from :class:`~da.baseclasses.statevector.StateVector`.
        
           :param dacycle: a :class:`~da.tools.initexit.CycleControl` object
           :param statevector: a :class:`~da.baseclasses.statevector.StateVector`
           :rtype: None

        This function only read the prior fluxes from the flux_1x1.nc files created before, because we want to convolve 
        these with the parameters in the statevector. This creates posterior fluxes, and the posterior covariance for the complete
        statevector in units of mol/box/s which we then turn into TC fluxes and covariances.
    """

#
    dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_tc_weekly'))
#
# Some help variables
#
    dectime0 = date2num(datetime(2000, 1, 1))
    dt = dacycle['cyclelength']
    startdate = dacycle['time.start'] 
    enddate = dacycle['time.end'] 
    ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0

    logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
    logging.debug("DA Cycle end   date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))

    # Write/Create NetCDF output file
    #
    saveas = os.path.join(dirname, 'tcfluxes.nc')
    ncf = io.CT_CDF(saveas, 'write')
    dimdate = ncf.add_date_dim()
    dimidateformat = ncf.add_date_dim_format()
    dimregs = ncf.add_region_dim(type='tc')
#
# set title and tell GMT that we are using "pixel registration"
#
    setattr(ncf, 'Title', 'CarbonTracker TransCom fluxes')
    setattr(ncf, 'node_offset', 1)
    #

    skip = ncf.has_date(ncfdate)
    if skip:
        logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
    else:

        # Get input data

        area = globarea()

        infile = os.path.join(dacycle['dir.analysis'], 'data_state_weekly', 'statefluxes.nc')
        if not os.path.exists(infile):
            logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile)
            return None

        ncf_in = io.ct_read(infile, 'read')

        # Transform data one by one

        # Get the date variable, and find index corresponding to the dacycle date

        try:
            dates = ncf_in.variables['date'][:]
        except KeyError:
            logging.error("The variable date cannot be found in the requested input file (%s) " % infile)
            logging.error("Please make sure you create gridded fluxes before making TC fluxes ")
            raise KeyError

        try:
            index = dates.tolist().index(ncfdate)
        except ValueError:
            logging.error("The requested cycle date is not yet available in file %s " % infile)
            logging.error("Please make sure you create state based fluxes before making TC fluxes")
            raise ValueError

        # First add the date for this cycle to the file, this grows the unlimited dimension

        savedict = ncf.standard_var(varname='date')
        savedict['values'] = ncfdate
        savedict['dims'] = dimdate
        savedict['count'] = index
        ncf.add_data(savedict)

        # Now convert other variables that were inside the flux_1x1 file

        vardict = ncf_in.variables
        for vname, vprop in vardict.iteritems():

            data = ncf_in.get_variable(vname)[index]
    
            if vname in ['latitude','longitude', 'date', 'idate'] or 'std' in vname:
                continue
            elif 'ensemble' in vname:
                tcdata = []
                for member in data:
                    tcdata.append(statevector.vector2tc(vectordata=member))

                tcdata = np.array(tcdata)
                try:
                    cov = tcdata.transpose().dot(tcdata) / (statevector.nmembers - 1) 
                except:
                    cov = np.dot(tcdata.transpose(), tcdata) / (statevector.nmembers - 1) # Huygens fix 

                #print vname,cov.sum()

                tcdata = cov

                savedict = ncf.standard_var(varname=vname.replace('ensemble', 'cov'))
                savedict['units'] = '[mol/region/s]**2'
                savedict['dims'] = dimdate + dimregs + dimregs
                
            else:

                tcdata = statevector.vector2tc(vectordata=data) # vector to TC

                savedict = ncf.standard_var(varname=vname)
                savedict['dims'] = dimdate + dimregs
                savedict['units'] = 'mol/region/s'

            savedict['count'] = index
            savedict['values'] = tcdata
            ncf.add_data(savedict)

        ncf_in.close()
    ncf.close()

    logging.info("TransCom weekly average fluxes now written")

    return saveas
예제 #11
0
def save_weekly_avg_state_data(dacycle, statevector):
    """
        Function creates a NetCDF file with output for all parameters. It uses the flux data written by the 
        :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and
        variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`.
        
           :param dacycle: a :class:`~da.tools.initexit.CycleControl` object
           :param statevector: a :class:`~da.baseclasses.statevector.StateVector`
           :rtype: None
    """
 
    dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_state_weekly'))
#
# Some help variables
#
    dectime0 = date2num(datetime(2000, 1, 1))
    dt = dacycle['cyclelength']
    startdate = dacycle['time.start'] 
    enddate = dacycle['time.end'] 
    nlag = statevector.nlag

    area = globarea()
    vectorarea = statevector.grid2vector(griddata=area, method='sum')

    logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
    logging.debug("DA Cycle end   date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))

#
# Create or open NetCDF output file
#
    saveas = os.path.join(dirname, 'statefluxes.nc')
    ncf = io.CT_CDF(saveas, 'write')

#
# Create dimensions and lat/lon grid
#
    dimregs = ncf.add_dim('nparameters', statevector.nparams)
    dimmembers = ncf.add_dim('nmembers', statevector.nmembers)
    dimdate = ncf.add_date_dim()
#
# set title and tell GMT that we are using "pixel registration"
#
    setattr(ncf, 'Title', 'CarbonTracker fluxes')
    setattr(ncf, 'node_offset', 1)
#
# skip dataset if already in file
#
    ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0
    skip = ncf.has_date(ncfdate)
    if skip:
        logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
    else:
        next = ncf.inq_unlimlen()[0]

#
# if not, process this cycle. Start by getting flux input data from CTDAS
#
        filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H')))

        file = io.ct_read(filename, 'read')
        bio = np.array(file.get_variable(dacycle.dasystem['background.co2.bio.flux']))
        ocean = np.array(file.get_variable(dacycle.dasystem['background.co2.ocean.flux']))
        fire = np.array(file.get_variable(dacycle.dasystem['background.co2.fires.flux']))
        fossil = np.array(file.get_variable(dacycle.dasystem['background.co2.fossil.flux']))
        #mapped_parameters   = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1']))
        if dacycle.dasystem['background.co2.biosam.flux'] in file.variables.keys():
            sam = True
            biosam = np.array(file.get_variable(dacycle.dasystem['background.co2.biosam.flux']))
            firesam = np.array(file.get_variable(dacycle.dasystem['background.co2.firesam.flux']))
        else: sam = False
        file.close()

        if sam:
            bio = bio + biosam
            fire = fire + firesam
        
        next = ncf.inq_unlimlen()[0]

        vectorbio = statevector.grid2vector(griddata=bio * area, method='sum')
        vectorocn = statevector.grid2vector(griddata=ocean * area, method='sum')
        vectorfire = statevector.grid2vector(griddata=fire * area, method='sum')
        vectorfossil = statevector.grid2vector(griddata=fossil * area, method='sum')


# Start adding datasets from here on, both prior and posterior datasets for bio and ocn

        for prior in [True, False]:
#
# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step
# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date..
#
            if prior:
                qual_short = 'prior'
                for n in range(nlag, 0, -1):
                    priordate = enddate - timedelta(dt.days * n)
                    priordate = startdate + n*dt - timedelta(dt.days * n)
                    savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d'))
                    filename = os.path.join(savedir,'savestate_%s.nc' % priordate.strftime('%Y%m%d'))
                    if os.path.exists(filename):
                        statevector.read_from_file(filename, qual=qual_short)
# Replace the mean statevector by all ones (assumed priors)
                        statemean = np.ones((statevector.nparams,))
                        choicelag = n
                        logging.debug('Read prior dataset from file %s, lag %d: ' % (filename, choicelag))
                        break
            else:
                qual_short = 'opt'
                savedir = dacycle['dir.output']
                filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d'))
                statevector.read_from_file(filename)
                choicelag = 1
                statemean = statevector.ensemble_members[choicelag - 1][0].param_values
                logging.debug('Read posterior dataset from file %s, lag %d: ' % (filename, choicelag))
#
# if prior, do not multiply fluxes with parameters, otherwise do
#
            data = statemean * vectorbio # units of mole region-1 s-1

            savedict = ncf.standard_var(varname='bio_flux_%s' % qual_short)
            savedict['values'] = data
            savedict['dims'] = dimdate + dimregs
            savedict['count'] = next
            ncf.add_data(savedict)

#
# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to
# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the 
# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want.
#
# The implementation is done by multiplying the ensemble with the vectorbio only, and not with the statemean values
# which are assumed 1.0 in the prior always.
#

            members = statevector.ensemble_members[choicelag - 1] 
            deviations = np.array([mem.param_values * vectorbio for mem in members])
            deviations = deviations - deviations[0, :]

            savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short)

            savedict['values'] = deviations.tolist()
            savedict['dims'] = dimdate + dimmembers + dimregs
            savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance"
            savedict['units'] = "mol region-1 s-1"
            savedict['count'] = next
            ncf.add_data(savedict)

            savedict = ncf.standard_var('unknown')
            savedict['name'] = 'bio_flux_%s_std' % qual_short
            savedict['long_name'] = 'Biosphere flux standard deviation, %s' % qual_short
            savedict['values'] = deviations.std(axis=0)
            savedict['dims'] = dimdate + dimregs
            savedict['comment'] = "This is the standard deviation on each parameter"
            savedict['units'] = "mol region-1 s-1"
            savedict['count'] = next
            ncf.add_data(savedict)

            data = statemean * vectorocn # units of mole region-1 s-1

            savedict = ncf.standard_var(varname='ocn_flux_%s' % qual_short)
            savedict['values'] = data
            savedict['dims'] = dimdate + dimregs
            savedict['count'] = next
            ncf.add_data(savedict)


#
# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to
# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the 
# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want.
#
# The implementation is done by multiplying the ensemble with the vectorocn only, and not with the statemean values
# which are assumed 1.0 in the prior always.
#

            deviations = np.array([mem.param_values * vectorocn for mem in members])
            deviations = deviations - deviations[0, :]

            savedict = ncf.standard_var(varname='ocn_flux_%s_ensemble' % qual_short)
            savedict['values'] = deviations.tolist()
            savedict['dims'] = dimdate + dimmembers + dimregs
            savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance"
            savedict['units'] = "mol region-1 s-1"
            savedict['count'] = next
            ncf.add_data(savedict)

            savedict = ncf.standard_var('unknown')
            savedict['name'] = 'ocn_flux_%s_std' % qual_short
            savedict['long_name'] = 'Ocean flux standard deviation, %s' % qual_short
            savedict['values'] = deviations.std(axis=0)
            savedict['dims'] = dimdate + dimregs
            savedict['comment'] = "This is the standard deviation on each parameter"
            savedict['units'] = "mol region-1 s-1"
            savedict['count'] = next
            ncf.add_data(savedict)

        data = vectorfire

        savedict = ncf.standard_var(varname='fire_flux_imp')
        savedict['values'] = data
        savedict['dims'] = dimdate + dimregs
        savedict['count'] = next
        ncf.add_data(savedict)

        data = vectorfossil

        savedict = ncf.standard_var(varname='fossil_flux_imp')
        savedict['values'] = data
        savedict['dims'] = dimdate + dimregs
        savedict['count'] = next
        ncf.add_data(savedict)

        savedict = ncf.standard_var(varname='date')
        savedict['values'] = ncfdate
        savedict['dims'] = dimdate
        savedict['count'] = next
        ncf.add_data(savedict)

        sys.stdout.write('.')
        sys.stdout.flush()
#
#   Done, close the new NetCDF file
#
    ncf.close()
#
#   Return the full name of the NetCDF file so it can be processed by the next routine
#
    logging.info("Vector weekly average fluxes now written")

    return saveas
def write_mole_fractions(dacycle):
    """ 
    
    Write Sample information to NetCDF files. These files are organized by site and 
    have an unlimited time axis to which data is appended each cycle.

    The needed information is obtained from the sample_auxiliary.nc files and the original input data files from ObsPack. 

    The steps are:

    (1) Create a directory to hold timeseries output files
    (2) Read the sample_auxiliary.nc file for this cycle and get a list of original files they were obtained from
    (3) For each file, copy the original data file from ObsPack (if not yet present)
    (4) Open the copied file, find the index of each observation, fill in the simulated data
    
    """

    dirname = create_dirs(
        os.path.join(dacycle['dir.analysis'], 'data_molefractions'))
    #
    # Some help variables
    #
    dectime0 = date2num(datetime(2000, 1, 1))
    dt = dacycle['cyclelength']
    startdate = dacycle['time.start']
    enddate = dacycle['time.end']

    logging.debug("DA Cycle start date is %s" %
                  startdate.strftime('%Y-%m-%d %H:%M'))
    logging.debug("DA Cycle end   date is %s" %
                  enddate.strftime('%Y-%m-%d %H:%M'))

    dacycle['time.sample.stamp'] = "%s_%s" % (
        startdate.strftime("%Y%m%d%H"),
        enddate.strftime("%Y%m%d%H"),
    )

    # Step (1): Get the posterior sample output data file for this cycle

    infile = os.path.join(
        dacycle['dir.output'],
        'sample_auxiliary_%s.nc' % dacycle['time.sample.stamp'])

    ncf_in = io.ct_read(infile, 'read')

    obs_num = ncf_in.get_variable('obs_num')
    obs_val = ncf_in.get_variable('observed')
    simulated = ncf_in.get_variable('modelsamples')
    infilename = ncf_in.get_variable('inputfilename')
    infiles1 = netCDF4.chartostring(infilename).tolist()
    # In case of reanalysis on different platform, obspack-input-directory might have a different name.
    # This is checked here, and the filenames are corrected
    dir_from_rc = dacycle.dasystem['obspack.input.dir']
    dir_from_output = infiles1[0]
    d1 = dir_from_rc[:dir_from_rc.find('obspacks')]
    d2 = dir_from_output[:dir_from_output.find('obspacks')]
    if d1 == d2:
        infiles = infiles1
    else:
        infiles = []
        for ff in infiles1:
            infiles.append(ff.replace(d2, d1))

    #infiles   = [join(s.compressed(),'') for s in infilename]

    ncf_in.close()

    # Step (2): Get the prior sample output data file for this cycle

    infile = os.path.join(dacycle['dir.output'],
                          'optimizer.%s.nc' % startdate.strftime('%Y%m%d'))

    if os.path.exists(infile):
        optimized_present = True
    else:
        optimized_present = False

    if optimized_present:

        ncf_fc_in = io.ct_read(infile, 'read')

        fc_obs_num = ncf_fc_in.get_variable('obspack_num')
        fc_obs_val = ncf_fc_in.get_variable('observed')
        fc_simulated = ncf_fc_in.get_variable('modelsamplesmean_prior')
        fc_simulated_ens = ncf_fc_in.get_variable(
            'modelsamplesdeviations_prior')
        fc_flag = ncf_fc_in.get_variable('flag')
        if not dacycle.dasystem.has_key('opt.algorithm'):
            fc_r = ncf_fc_in.get_variable('modeldatamismatchvariance')
            fc_hphtr = ncf_fc_in.get_variable('totalmolefractionvariance')
        elif dacycle.dasystem['opt.algorithm'] == 'serial':
            fc_r = ncf_fc_in.get_variable('modeldatamismatchvariance')
            fc_hphtr = ncf_fc_in.get_variable('totalmolefractionvariance')
        elif dacycle.dasystem['opt.algorithm'] == 'bulk':
            fc_r = ncf_fc_in.get_variable(
                'modeldatamismatchvariance').diagonal()
            fc_hphtr = ncf_fc_in.get_variable(
                'totalmolefractionvariance').diagonal()
        filesitecode = ncf_fc_in.get_variable('sitecode')

        fc_sitecodes = netCDF4.chartostring(filesitecode).tolist()
        #fc_sitecodes = [join(s.compressed(),'') for s in filesitecode]

        ncf_fc_in.close()

        # Expand the list of input files with those available from the forecast list

        infiles_rootdir = os.path.split(infiles[0])[0]
        infiles.extend(
            os.path.join(infiles_rootdir, f + '.nc') for f in fc_sitecodes)

    #Step (2): For each observation timeseries we now have data for, open it and fill with data

    for orig_file in set(infiles):

        if not os.path.exists(orig_file):
            logging.error(
                "The original input file (%s) could not be found, continuing to next file..."
                % orig_file)
            continue

        copy_file = os.path.join(dirname, os.path.split(orig_file)[-1])
        if not os.path.exists(copy_file):
            shutil.copy(orig_file, copy_file)
            logging.debug(
                "Copied a new original file (%s) to the analysis directory" %
                orig_file)

            ncf_out = io.CT_CDF(copy_file, 'write')

            # Modify the attributes of the file to reflect added data from CTDAS properly

            try:
                host = os.environ['HOSTNAME']
            except:
                host = 'unknown'

            ncf_out.Caution = '==================================================================================='
            try:
                ncf_out.History += '\nOriginal observation file modified by user %s on %s\n' % (
                    os.environ['USER'],
                    datetime.today().strftime('%F'),
                )
            except:
                ncf_out.History = '\nOriginal observation file modified by user %s on %s\n' % (
                    os.environ['USER'],
                    datetime.today().strftime('%F'),
                )
            ncf_out.CTDAS_info = 'Simulated values added from a CTDAS run by %s on %s\n' % (os.environ['USER'], datetime.today().strftime('%F'),)\
                               + '\nCTDAS was run on platform %s' % (host,)\
                               + '\nCTDAS job directory was %s' % (dacycle['dir.da_run'],)\
                               + '\nCTDAS Da System was %s' % (dacycle['da.system'],)\
                               + '\nCTDAS Da ObsOperator was %s' % (dacycle['da.obsoperator'],)
            ncf_out.CTDAS_startdate = dacycle['time.start'].strftime('%F')
            ncf_out.CTDAS_enddate = dacycle['time.finish'].strftime("%F")
            ncf_out.original_file = orig_file

            # get nobs dimension

            if ncf_out.dimensions.has_key('id'):
                dimidob = ncf_out.dimensions['id']
                dimid = ('id', )
            elif ncf_out.dimensions.has_key('obs'):
                dimidob = ncf_out.dimensions['obs']
                dimid = ('obs', )

            if dimidob.isunlimited:
                nobs = ncf_out.inq_unlimlen()
            else:
                nobs = len(dimid)

            # add nmembers dimension

            dimmembersob = ncf_out.createDimension('nmembers',
                                                   size=simulated.shape[1])
            dimmembers = ('nmembers', )
            nmembers = len(dimmembers)

            # Create empty arrays for posterior samples, as well as for forecast sample statistics

            savedict = io.std_savedict.copy()
            savedict['name'] = "flag_forecast"
            savedict['long_name'] = "flag_for_obs_model in forecast"
            savedict['units'] = "None"
            savedict['dims'] = dimid
            savedict[
                'comment'] = 'Flag (0/1/2/99) for observation value, 0 means okay, 1 means QC error, 2 means rejected, 99 means not sampled'
            ncf_out.add_variable(savedict)

            savedict = io.std_savedict.copy()
            savedict['name'] = "modeldatamismatch"
            savedict['long_name'] = "modeldatamismatch"
            savedict['units'] = "[mol mol-1]^2"
            savedict['dims'] = dimid
            savedict[
                'comment'] = 'Variance of mole fractions resulting from model-data mismatch'
            ncf_out.add_variable(savedict)

            savedict = io.std_savedict.copy()
            savedict['name'] = "totalmolefractionvariance_forecast"
            savedict['long_name'] = "totalmolefractionvariance of forecast"
            savedict['units'] = "[mol mol-1]^2"
            savedict['dims'] = dimid
            savedict[
                'comment'] = 'Variance of mole fractions resulting from prior state and model-data mismatch'
            ncf_out.add_variable(savedict)

            savedict = io.std_savedict.copy()
            savedict['name'] = "modelsamplesmean"
            savedict['long_name'] = "mean modelsamples"
            savedict['units'] = "mol mol-1"
            savedict['dims'] = dimid
            savedict[
                'comment'] = 'simulated mole fractions based on optimized state vector'
            ncf_out.add_variable(savedict)

            savedict = io.std_savedict.copy()
            savedict['name'] = "modelsamplesmean_forecast"
            savedict['long_name'] = "mean modelsamples from forecast"
            savedict['units'] = "mol mol-1"
            savedict['dims'] = dimid
            savedict[
                'comment'] = 'simulated mole fractions based on prior state vector'
            ncf_out.add_variable(savedict)

            savedict = io.std_savedict.copy()
            savedict['name'] = "modelsamplesstandarddeviation"
            savedict[
                'long_name'] = "standard deviaton of modelsamples over all ensemble members"
            savedict['units'] = "mol mol-1"
            savedict['dims'] = dimid
            savedict[
                'comment'] = 'std dev of simulated mole fractions based on optimized state vector'
            ncf_out.add_variable(savedict)

            savedict = io.std_savedict.copy()
            savedict['name'] = "modelsamplesstandarddeviation_forecast"
            savedict[
                'long_name'] = "standard deviaton of modelsamples from forecast over all ensemble members"
            savedict['units'] = "mol mol-1"
            savedict['dims'] = dimid
            savedict[
                'comment'] = 'std dev of simulated mole fractions based on prior state vector'
            ncf_out.add_variable(savedict)

            savedict = io.std_savedict.copy()
            savedict['name'] = "modelsamplesensemble"
            savedict['long_name'] = "modelsamples over all ensemble members"
            savedict['units'] = "mol mol-1"
            savedict['dims'] = dimid + dimmembers
            savedict[
                'comment'] = 'ensemble of simulated mole fractions based on optimized state vector'
            ncf_out.add_variable(savedict)

            savedict = io.std_savedict.copy()
            savedict['name'] = "modelsamplesensemble_forecast"
            savedict[
                'long_name'] = "modelsamples from forecast over all ensemble members"
            savedict['units'] = "mol mol-1"
            savedict['dims'] = dimid + dimmembers
            savedict[
                'comment'] = 'ensemble of simulated mole fractions based on prior state vector'
            ncf_out.add_variable(savedict)

        else:
            logging.debug(
                "Modifying existing file (%s) in the analysis directory" %
                copy_file)

            ncf_out = io.CT_CDF(copy_file, 'write')

        # Get existing file obs_nums to determine match to local obs_nums

        if ncf_out.variables.has_key('id'):
            file_obs_nums = ncf_out.get_variable('id')
        elif ncf_out.variables.has_key('obspack_num'):
            file_obs_nums = ncf_out.get_variable('obspack_num')

        # Get all obs_nums related to this file, determine their indices in the local arrays

        selected_obs_nums = [
            num for infile, num in zip(infiles, obs_num) if infile == orig_file
        ]

        # Optimized data 1st: For each index, get the data and add to the file in the proper file index location

        for num in selected_obs_nums:

            model_index = obs_num.tolist().index(num)
            file_index = file_obs_nums.tolist().index(num)

            #var = ncf_out.variables['modeldatamismatch']   # Take from optimizer.yyyymmdd.nc file instead
            #var[file_index] = mdm[model_index]

            var = ncf_out.variables['modelsamplesmean']
            var[file_index] = simulated[model_index, 0]

            var = ncf_out.variables['modelsamplesstandarddeviation']
            var[file_index] = simulated[model_index, 1:].std()

            var = ncf_out.variables['modelsamplesensemble']
            var[file_index] = simulated[model_index, :]

        # Now forecast data too: For each index, get the data and add to the file in the proper file index location

        if optimized_present:

            selected_fc_obs_nums = [
                num for sitecode, num in zip(fc_sitecodes, fc_obs_num)
                if sitecode in orig_file
            ]

            for num in selected_fc_obs_nums:

                model_index = fc_obs_num.tolist().index(num)
                file_index = file_obs_nums.tolist().index(num)

                var = ncf_out.variables['modeldatamismatch']
                var[file_index] = np.sqrt(fc_r[model_index])

                var = ncf_out.variables['modelsamplesmean_forecast']
                var[file_index] = fc_simulated[model_index]

                var = ncf_out.variables[
                    'modelsamplesstandarddeviation_forecast']
                var[file_index] = fc_simulated_ens[model_index, 1:].std()

                var = ncf_out.variables['modelsamplesensemble_forecast']
                var[file_index] = fc_simulated_ens[model_index, :]

                var = ncf_out.variables['totalmolefractionvariance_forecast']
                var[file_index] = fc_hphtr[model_index]

                var = ncf_out.variables['flag_forecast']
                var[file_index] = fc_flag[model_index]

        # close the file

        status = ncf_out.close()

    return None
예제 #13
0
    def setup(self, dacycle):
        """
        setup the object by specifying the dimensions. 
        There are two major requirements for each statvector that you want to build:
        
            (1) is that the statevector can map itself onto a regular grid
            (2) is that the statevector can map itself (mean+covariance) onto TransCom regions

        An example is given below.
        """

        self.nlag = int(dacycle['time.nlag'])
        self.nmembers = int(dacycle['da.optimizer.nmembers'])
        self.nparams = int(dacycle.dasystem['nparameters'])
        self.nobs = 0
        
        self.obs_to_assimilate = ()  # empty containter to hold observations to assimilate later on

        # These list objects hold the data for each time step of lag in the system. Note that the ensembles for each time step consist 
        # of lists of EnsembleMember objects, we define member 0 as the mean of the distribution and n=1,...,nmembers as the spread.

        self.ensemble_members = range(self.nlag)

        for n in range(self.nlag):
            self.ensemble_members[n] = []


        # This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember
        #  that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid.

        mapfile = os.path.join(dacycle.dasystem['regionsfile'])
        ncf = io.ct_read(mapfile, 'read')
        self.gridmap = ncf.get_variable('regions')
        self.tcmap = ncf.get_variable('transcom_regions')
        ncf.close()

        logging.debug("A TransCom  map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile'])
        logging.debug("A parameter map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile'])

        # Create a dictionary for state <-> gridded map conversions

        nparams = self.gridmap.max()
        self.griddict = {}
        for r in range(1, int(nparams) + 1):
            sel = (self.gridmap.flat == r).nonzero()
            if len(sel[0]) > 0: 
                self.griddict[r] = sel

        logging.debug("A dictionary to map grids to states and vice versa was created")

        # Create a matrix for state <-> TransCom conversions

        self.tcmatrix = np.zeros((self.nparams, 23), 'float') 

        for r in range(1, self.nparams + 1):
            sel = (self.gridmap.flat == r).nonzero()
            if len(sel[0]) < 1: 
                continue
            else:
                n_tc = set(self.tcmap.flatten().take(sel[0]))
                if len(n_tc) > 1: 
                    logging.error("Parameter %d seems to map to multiple TransCom regions (%s), I do not know how to handle this" % (r, n_tc))
                    raise ValueError
                self.tcmatrix[r - 1, n_tc.pop() - 1] = 1.0

        logging.debug("A matrix to map states to TransCom regions and vice versa was created")

        # Create a mask for species/unknowns

        self.make_species_mask()
예제 #14
0
    def get_covariance(self, date, dacycle):
        """ Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector.
            Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
            The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]
        """

        try:
            import matplotlib.pyplot as plt
        except:
            pass

        # Get the needed matrices from the specified covariance files

        #file_ocn_cov = dacycle.dasystem['ocn.covariance']

        cov_files = os.listdir(dacycle.dasystem['bio.cov.dir'])
        cov_files = [
            os.path.join(dacycle.dasystem['bio.cov.dir'], f) for f in cov_files
            if dacycle.dasystem['bio.cov.prefix'] in f
        ]

        logging.debug("Found %d covariances to use for biosphere" %
                      len(cov_files))

        # replace YYYY.MM in the ocean covariance file string

        #file_ocn_cov = file_ocn_cov.replace('2000.01', date.strftime('%Y.%m'))

        #cov_files.append(file_ocn_cov)

        covariancematrixlist = []
        for file in cov_files:
            if not os.path.exists(file):
                msg = "Cannot find the specified file %s" % file
                logging.error(msg)
                raise IOError, msg
            else:
                logging.debug("Using covariance file: %s" % file)

            f = io.ct_read(file, 'read')

            if 'pco2' in file:
                cov_ocn = f.get_variable('CORMAT')
                cov = cov_ocn
            else:
                cov = f.get_variable('covariance')
                #cov_sf      = 10.0/np.sqrt(cov.diagonal().sum())  # this scaling factor makes the total variance close to the value of a single ecoregion
                cov_sf = 360. / np.sqrt(
                    cov.diagonal().sum()
                )  # this scaling factor makes the total variance close to the value of a single ecoregion #I use 360 to boost up the P matrix uncertainty
                cov1 = cov * cov_sf * (
                    1.e-6
                )**2  # here you assume that your P matrix has units of mol m-2 s-1 squared.

            f.close()
            covariancematrixlist.append(cov1)

        # Boundary conditions covariance

        cov = np.array([[2 * 2]])
        covariancematrixlist.append(cov)
        covariancematrixlist.append(cov)
        covariancematrixlist.append(cov)
        covariancematrixlist.append(cov)

        logging.debug(
            "Succesfully closed files after retrieving prior covariance matrices"
        )

        # Once we have the matrices, we can start to make the full covariance matrix, and then decompose it

        return covariancematrixlist
예제 #15
0
    def run(self):
        """
	 This Randomizer will take the original observation data in the Obs object, and simply copy each mean value. Next, the mean 
	 value will be perturbed by a random normal number drawn from a specified uncertainty of +/- 2 ppm
	"""

        import da.tools.io4 as io
        import numpy as np

        # Create a flask output file in TM5-style (to be updated later?) to hold simulated values for later reading

        f = io.CT_CDF(self.simulated_file, method='create')
        logging.debug(
            'Creating new simulated observation file in ObservationOperator (%s)'
            % self.simulated_file)

        dimid = f.createDimension('obs_num', size=None)
        dimid = ('obs_num', )
        savedict = io.std_savedict.copy()
        savedict['name'] = "obs_num"
        savedict['dtype'] = "int"
        savedict['long_name'] = "Unique_Dataset_observation_index_number"
        savedict['units'] = ""
        savedict['dims'] = dimid
        savedict[
            'comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED."
        f.add_data(savedict, nsets=0)

        dimmember = f.createDimension('nmembers', size=self.forecast_nmembers)
        dimmember = ('nmembers', )
        savedict = io.std_savedict.copy()
        savedict['name'] = "flask"
        savedict['dtype'] = "float"
        savedict['long_name'] = "mole_fraction_of_trace_gas_in_air"
        savedict['units'] = "mol tracer (mol air)^-1"
        savedict['dims'] = dimid + dimmember
        savedict[
            'comment'] = "Simulated model value created by RandomizerObservationOperator"
        f.add_data(savedict, nsets=0)

        # Open file with x,y,z,t of model samples that need to be sampled

        f_in = io.ct_read(self.dacycle['ObsOperator.inputfile'], method='read')

        # Get simulated values and ID

        ids = f_in.get_variable('obs_num')
        obs = f_in.get_variable('observed')
        mdm = f_in.get_variable('modeldatamismatch')

        # Loop over observations, add random white noise, and write to file

        for i, data in enumerate(zip(ids, obs, mdm)):
            f.variables['obs_num'][i] = data[0]
            f.variables['flask'][i, :] = data[1] + np.random.randn(
                self.forecast_nmembers) * data[2]

        f.close()
        f_in.close()

        # Report success and exit

        logging.info(
            'ObservationOperator finished successfully, output file written (%s)'
            % self.simulated_file)
예제 #16
0
def save_weekly_avg_1x1_data(dacycle, statevector):
    """
        Function creates a NetCDF file with output on 1x1 degree grid. It uses the flux data written by the 
        :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and
        variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`.
        
           :param dacycle: a :class:`~da.tools.initexit.CycleControl` object
           :param statevector: a :class:`~da.baseclasses.statevector.StateVector`
           :rtype: None
    """
#
    dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly'))
#
# Some help variables
#
    dectime0 = date2num(datetime(2000, 1, 1))
    dt = dacycle['cyclelength']
    startdate = dacycle['time.start'] 
    enddate = dacycle['time.end'] 
    nlag = statevector.nlag

    logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
    logging.debug("DA Cycle end   date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))

#
# Create or open NetCDF output file
#
    saveas = os.path.join(dirname, 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d'))
    ncf = io.CT_CDF(saveas, 'write')

#
# Create dimensions and lat/lon grid
#
    dimgrid = ncf.add_latlon_dim()
    dimensemble = ncf.add_dim('members', statevector.nmembers)
    dimdate = ncf.add_date_dim()
#
# set title and tell GMT that we are using "pixel registration"
#
    setattr(ncf, 'Title', 'CarbonTracker fluxes')
    setattr(ncf, 'node_offset', 1)
#
# skip dataset if already in file
#
    ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0
    skip = ncf.has_date(ncfdate)
    if skip:
        logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
    else:
        
#
# if not, process this cycle. Start by getting flux input data from CTDAS
#
        filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H')))

        file = io.ct_read(filename, 'read')
        bio = np.array(file.get_variable(dacycle.dasystem['background.co2.bio.flux']))
        ocean = np.array(file.get_variable(dacycle.dasystem['background.co2.ocean.flux']))
        fire = np.array(file.get_variable(dacycle.dasystem['background.co2.fires.flux']))
        fossil = np.array(file.get_variable(dacycle.dasystem['background.co2.fossil.flux']))
        #mapped_parameters   = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1']))
        if dacycle.dasystem['background.co2.biosam.flux'] in file.variables.keys():
            sam = True
            biosam = np.array(file.get_variable(dacycle.dasystem['background.co2.biosam.flux']))
            firesam = np.array(file.get_variable(dacycle.dasystem['background.co2.firesam.flux']))
        else: sam = False
        file.close()
        
        if sam:
            bio = bio + biosam
            fire = fire + firesam

        next = ncf.inq_unlimlen()[0]


# Start adding datasets from here on, both prior and posterior datasets for bio and ocn

        for prior in [True, False]:
#
# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step
# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date..
#

            if prior:
                qual_short = 'prior'
                for n in range(nlag, 0, -1):
                    priordate = startdate + n*dt - timedelta(dt.days * n)
                    savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d'))
                    filename = os.path.join(savedir, 'savestate_%s.nc' % priordate.strftime('%Y%m%d'))
                    if os.path.exists(filename):
                        statevector.read_from_file(filename, qual=qual_short)
                        gridmean, gridensemble = statevector.state_to_grid(lag=n)

# Replace the mean statevector by all ones (assumed priors)

                        gridmean = statevector.vector2grid(vectordata=np.ones(statevector.nparams,))

                        logging.debug('Read prior dataset from file %s, sds %d: ' % (filename, n))
                        break
            else:
                qual_short = 'opt'
                savedir = dacycle['dir.output']
                filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d'))
                statevector.read_from_file(filename, qual=qual_short)
                gridmean, gridensemble = statevector.state_to_grid(lag=1)

                logging.debug('Read posterior dataset from file %s, sds %d: ' % (filename, 1))
#
# if prior, do not multiply fluxes with parameters, otherwise do
#
            print gridensemble.shape, bio.shape, gridmean.shape
            biomapped = bio * gridmean 
            oceanmapped = ocean * gridmean 
            biovarmapped = bio * gridensemble
            oceanvarmapped = ocean * gridensemble

#
#
#  For each dataset, get the standard definitions from the module mysettings, add values, dimensions, and unlimited count, then write
#
            savedict = ncf.standard_var(varname='bio_flux_' + qual_short)
            savedict['values'] = biomapped.tolist()
            savedict['dims'] = dimdate + dimgrid
            savedict['count'] = next
            ncf.add_data(savedict)
#
            savedict = ncf.standard_var(varname='ocn_flux_' + qual_short)
            savedict['values'] = oceanmapped.tolist()
            savedict['dims'] = dimdate + dimgrid
            savedict['count'] = next
            ncf.add_data(savedict)

            print biovarmapped.shape
            savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short)
            savedict['values'] = biovarmapped.tolist()
            savedict['dims'] = dimdate + dimensemble + dimgrid
            savedict['count'] = next
            ncf.add_data(savedict)
#
            savedict = ncf.standard_var(varname='ocn_flux_%s_ensemble' % qual_short)
            savedict['values'] = oceanvarmapped.tolist()
            savedict['dims'] = dimdate + dimensemble + dimgrid
            savedict['count'] = next
            ncf.add_data(savedict)

        # End prior/posterior block

        savedict = ncf.standard_var(varname='fire_flux_imp')
        savedict['values'] = fire.tolist()
        savedict['dims'] = dimdate + dimgrid
        savedict['count'] = next
        ncf.add_data(savedict)
#
        savedict = ncf.standard_var(varname='fossil_flux_imp')
        savedict['values'] = fossil.tolist()
        savedict['dims'] = dimdate + dimgrid
        savedict['count'] = next
        ncf.add_data(savedict)

        area = globarea()
        savedict = ncf.standard_var(varname='cell_area')
        savedict['values'] = area.tolist()
        savedict['dims'] = dimgrid
        ncf.add_data(savedict)
#
        savedict = ncf.standard_var(varname='date')
        savedict['values'] = date2num(startdate) - dectime0 + dt.days / 2.0
        savedict['dims'] = dimdate
        savedict['count'] = next
        ncf.add_data(savedict)

        sys.stdout.write('.')
        sys.stdout.flush()
#
#   Done, close the new NetCDF file
#
    ncf.close()
#
#   Return the full name of the NetCDF file so it can be processed by the next routine
#
    logging.info("Gridded weekly average fluxes now written")

    return saveas
예제 #17
0
    def add_observations(self):
        """ Returns a MoleFractionList holding individual MoleFractionSample objects for all obs in a file

            The ObsPack mole fraction files are provided as time series per site with all dates in sequence.
            We will loop over all site files in the ObsPackage, and subset each to our needs

        """
	import string

        # Step 1: Read list of available site files in package

        infile = os.path.join(self.obspack_dir, 'summary', '%s_dataset_summary.txt' % (self.obspack_id,))
        f = open(infile, 'r')
        lines = f.readlines()
        f.close()

        ncfilelist = []
        for line in lines:
            if line.startswith('#'): continue # header

            items = line.split()
            #ncfile, lab , start_date, stop_date, data_comparison = items[0:5]
            ncfile, lab , start_date, stop_date, data_comparison= line[:105].split()


            ncfilelist += [ncfile]

        logging.debug("ObsPack dataset info read, proceeding with %d netcdf files" % len(ncfilelist))

        for ncfile in ncfilelist:
            logging.info('ncfile %s'%ncfile)
            infile = os.path.join(self.obspack_dir, 'data', 'nc', ncfile + '.nc')
            ncf = io.ct_read(infile, 'read')
            idates = ncf.get_variable('time_components')
            dates = array([dtm.datetime(*d) for d in idates])

            subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0]

	    if len(subselect) == 0:
                ncf.close()
		continue

	    logging.debug("Trying to add %d observations from file (%s) to the Data list" % (len(subselect), ncfile))

            dates = dates.take(subselect, axis=0)

            #ccgg_evn = ncf.get_variable('obspack_num').take(subselect)  # or should we propagate obs_num which is not unique across datasets??
            ccgg_evn = ncf.get_variable('ccgg_evn').take(subselect,axis=0)

            obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0)
            obspackid = [s.tostring().lower() for s in obspackid]
            obspackid = map(strip, obspackid)

            datasetname = ncfile  # use full name of dataset to propagate for clarity
            lats = ncf.get_variable('latitude').take(subselect, axis=0)
            lons = ncf.get_variable('longitude').take(subselect, axis=0)
            alts = ncf.get_variable('altitude').take(subselect, axis=0)
            intake_height = ncf.get_variable('intake_height').take(subselect, axis=0)
            obs = ncf.get_variable('value').take(subselect, axis=0)
            species = ncf.get_attribute('dataset_parameter')
            utc2loc = ncf.get_attribute('site_utc2lst')
            flags = ncf.get_variable('obs_flag').take(subselect, axis=0)
            ncf.close()

            for n in range(len(dates)):
                used_ids = self.getvalues('id',list)
                if ccgg_evn[n] in used_ids:
                    ii = used_ids.index(ccgg_evn[n])
                    logging.error("Error when reading from file: %s"%ncfile)
                    logging.error("This sample ID (%d) is not unique"%ccgg_evn[n])
                    logging.error("Previously used from file: %s"%self.datalist[ii].fromfile)
                    logging.error("...skipping")
                    raise IOError
                else:
                    self.datalist.append(MoleFractionSample(ccgg_evn[n], dates[n], datasetname, obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], intake_height[n], lats[n], lons[n], obspackid[n], species, utc2loc, 1, 0.0, infile))

        logging.info("Observations list now holds %d values" % len(self.datalist))