def write_to_file(self, filename, qual): """ :param filename: the full filename for the output NetCDF file :rtype: None Write the StateVector information to a NetCDF file for later use. In principle the output file will have only one two datasets inside called: * `meanstate`, dimensions [nlag, nparamaters] * `ensemblestate`, dimensions [nlag,nmembers, nparameters] This NetCDF information can be read back into a StateVector object using :meth:`~da.baseclasses.statevector.StateVector.read_from_file` """ #import da.tools.io4 as io #import da.tools.io as io if qual == 'prior': f = io.CT_CDF(filename, method='create') logging.debug('Creating new StateVector output file (%s)' % filename) #qual = 'prior' else: f = io.CT_CDF(filename, method='write') logging.debug('Opening existing StateVector output file (%s)' % filename) #qual = 'opt' dimparams = f.add_params_dim(self.nparams) dimmembers = f.add_members_dim(self.nmembers) dimlag = f.add_lag_dim(self.nlag, unlimited=True) for n in range(self.nlag): members = self.ensemble_members[n] mean_state = members[0].param_values savedict = f.standard_var(varname='meanstate_%s' % qual) savedict['dims'] = dimlag + dimparams savedict['values'] = mean_state savedict['count'] = n savedict['comment'] = 'this represents the mean of the ensemble' f.add_data(savedict) members = self.ensemble_members[n] devs = np.asarray([m.param_values.flatten() for m in members]) data = devs - np.asarray(mean_state) savedict = f.standard_var(varname='ensemblestate_%s' % qual) savedict['dims'] = dimlag + dimmembers + dimparams savedict['values'] = data savedict['count'] = n savedict['comment'] = 'this represents deviations from the mean of the ensemble' f.add_data(savedict) f.close() logging.info('Successfully wrote the State Vector to file (%s) ' % filename)
def write_members_to_file(self, lag, outdir,endswith='.nc'): """ :param: lag: Which lag step of the filter to write, must lie in range [1,...,nlag] :param: outdir: Directory where to write files :param: endswith: Optional label to add to the filename, default is simply .nc :rtype: None Write ensemble member information to a NetCDF file for later use. The standard output filename is *parameters.DDD.nc* where *DDD* is the number of the ensemble member. Standard output file location is the `dir.input` of the dacycle object. In principle the output file will have only two datasets inside called `parametervalues` which is of dimensions `nparameters` and `parametermap` which is of dimensions (180,360). This dataset can be read and used by a :class:`~da.baseclasses.observationoperator.ObservationOperator` object. .. note:: if more, or other information is needed to complete the sampling of the ObservationOperator you can simply inherit from the StateVector baseclass and overwrite this write_members_to_file function. """ # These import statements caused a crash in netCDF4 on MacOSX. No problems on Jet though. Solution was # to do the import already at the start of the module, not just in this method. #import da.tools.io as io #import da.tools.io4 as io members = self.ensemble_members[lag] for mem in members: filename = os.path.join(outdir, 'parameters.%03d%s' % (mem.membernumber, endswith)) ncf = io.CT_CDF(filename, method='create') dimparams = ncf.add_params_dim(self.nparams) dimgrid = ncf.add_latlon_dim() data = mem.param_values savedict = io.std_savedict.copy() savedict['name'] = "parametervalues" savedict['long_name'] = "parameter_values_for_member_%d" % mem.membernumber savedict['units'] = "unitless" savedict['dims'] = dimparams savedict['values'] = data savedict['comment'] = 'These are parameter values to use for member %d' % mem.membernumber ncf.add_data(savedict) griddata = self.vector2grid(vectordata=data) savedict = io.std_savedict.copy() savedict['name'] = "parametermap" savedict['long_name'] = "parametermap_for_member_%d" % mem.membernumber savedict['units'] = "unitless" savedict['dims'] = dimgrid savedict['values'] = griddata.tolist() savedict['comment'] = 'These are gridded parameter values to use for member %d' % mem.membernumber ncf.add_data(savedict) ncf.close() logging.debug('Successfully wrote data from ensemble member %d to file (%s) ' % (mem.membernumber, filename))
def timehistograms_new(fig, infile, option='final'): """ This routine makes two side-by-side histograms representing summer and winter PDFs of the residuals. It uses the special x-axis and y-axis definitions from above. Note that currently, the PDFs are based on forecast-observed CO2, and not on optimized-observed CO2. """ fontsize = 17 # # Get data # f = io.CT_CDF(infile, 'read') species = f.dataset_parameter if species == 'co2': molefac = 1e6 units = '$\mu$mol mol$^{-1}$' species = "CO$_2$" if species == 'co2c13': molefac = 1.0 units = 'permil' species = "$\delta^{13}$C" date = f.get_variable('time') obs = f.get_variable('value') * molefac mdm = f.get_variable('modeldatamismatch') * molefac hphtr = f.get_variable( 'totalmolefractionvariance_forecast') * molefac * molefac if option == 'final': simulated = f.get_variable('modelsamplesmean') * molefac if option == 'forecast': simulated = f.get_variable('modelsamplesmean_forecast') * molefac flags = f.get_variable('flag_forecast') longsitestring = f.site_name + ', ' + f.site_country location = nice_lat(f.site_latitude, 'python') + ', ' + nice_lon( f.site_longitude, 'python') + ', ' + nice_alt(f.site_elevation) SDSInfo = {} for k in f.ncattrs(): SDSInfo[k] = f.getncattr(k) f.close() pydates = np.array( [dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date]) sampled = (np.ma.getmaskarray(simulated) == False) if len(sampled.nonzero()[0]) < 2: logging.warning("Too few simulated values found, continuing...") return fig simulated = simulated.compress(sampled) obs = obs.compress(sampled) pydates = pydates.compress(sampled) mdm = mdm.compress(sampled) hphtr = hphtr.compress(sampled) flags = flags.compress(sampled) #mdm=ma.masked_invalid(mdm) residual = simulated - obs if option == 'final': chisquared = (residual**2) / mdm elif option == 'forecast': chisquared = (residual**2) / hphtr rejected = (flags == 2.0) notused = (flags == 99.0) #if notused.all(): # return fig #else: obslabel = 'Residual' sd = pydates[0] ed = pydates[-1] summer = [i for i, d in enumerate(pydates) if d.month in [6, 7, 8, 9]] # JJAS winter = [ i for i, d in enumerate(pydates) if d.month in [11, 12, 1, 2, 3, 4] ] # NDJFMA # Create two side-by-side axes, turn off their frame ax1 = fig.add_axes([0.05, 0.18, 0.4, 0.7]) ax2 = fig.add_axes([0.55, 0.18, 0.4, 0.7]) # Loop simultaneously over ax1/ax2 and summer/winter values for ax, sel in zip([ax1, ax2], [summer, winter]): if not np.array(sel).any(): continue # Subselect data for winter/summer sel_obs = obs.take(sel) sel_fc = simulated.take(sel) sel_hqhr = hphtr.take(sel) sel_mdm = mdm.take(sel) sel_flags = flags.take(sel) sel_rej = rejected.take(sel) # Calculate residual and chi squared values #res = sel_fc - sel_obs if option == 'final': res = sel_fc.compress(sel_flags != 2) - sel_obs.compress( sel_flags != 2) chi = res / np.sqrt(sel_mdm.compress(sel_flags != 2)) elif option == 'forecast': res = sel_fc - sel_obs chi = res / np.sqrt(sel_hqhr) #res=ma.masked_invalid(res) # Get a scaling factor for the x-axis range. Now we will include 5 standard deviations sc = res.std() # If there is too little data for a reasonable PDF, skip to the next value in the loop if res.shape[0] < 10: continue # make a histogram plot of the residuals with a minimum of 10 bins, and maximum of N/10 bins, normalize the PDF to an area of 1.0 n, bins, patches = ax.hist(res, max(res.shape[0] / 10, 10), normed=1) #print res.mean(), res.sum(),n # Change the colors on the bars p = plt.setp(patches, 'facecolor', 'tan', 'edgecolor', 'tan', label='None') # Create two normal distributions for the line plots over the interval of the x-axis bins = np.arange(-5 * sc, 5 * sc, 0.1) n = normpdf(bins, res.mean(), res.std()) l = ax.plot(bins, n, 'b-', linewidth=2) # plot the PDF of the histogram in blue n = normpdf(bins, 0.0, sel_mdm.mean()) l = ax.plot( bins, n, 'g-', linewidth=2) # plot the PDF of the model-data-mismatch in green # # Add a legend, not as a legend object but simply as text labels # if option == 'final': strX = '' elif option == 'forecast': strX = 'Inn. ' if chi.mean() != chi.mean() or mdm.mean() < 900: labs = [ '%.2f $\pm$ %.2f' % (res.mean(), res.std()) , \ 'N = %d' % sel_obs.shape[0], \ '%s$\chi^2$= %.2f'%(strX, (chi**2).mean()) ] else: labs = [ '%.2f $\pm$ %.2f' % (res.mean(), res.std()) , \ 'N = %d' % sel_obs.shape[0] ] # print the above labels onto the figure. Note that I use relative coordinates for their position by specifying the transform=ax.transAxes for i, l in enumerate(labs): ax.text(0.75, 0.9 - 0.07 * i, l, transform=ax.transAxes, fontsize=fontsize, horizontalalignment='center', color='blue') # # Set Tick Font Size on x and y labels # #dummy = [lab.set_fontsize(20) for lab in ax.get_xticklabels()] #dummy = [lab.set_fontsize(20) for lab in ax.get_yticklabels()] # set limits on x-axis and get limits on y-axis to determine the position of the x-axis labels (offset keyword to make_yaxis) ax.set_xlim(-5 * sc, 5 * sc) ax.spines['left'].set_position(('data', 0)) ax.spines['right'].set_color('none') ax.spines['right'].axis.set_ticks([]) ax.spines['bottom'].set_position(('data', 0)) ax.spines['top'].set_color('none') #ax.spines['left'].set_smart_bounds(True) #ax.spines['bottom'].set_smart_bounds(True) ax.spines['left'].set_linewidth(1.5) ax.spines['bottom'].set_linewidth(1.5) ax.spines['bottom'].set_position(('outward', 10)) matplotlib.rcParams.update({'font.size': 18}) ax.xaxis.set_ticks_position('bottom') ax.set_xlabel('[%s]' % units, size=16) # # All custom titles and auxiliary info are placed onto the figure directly (fig.text) in relative coordinates # fig.text(0.5, 0.02, 'Simulated - Observed %s [%s]\nData from %s to %s' % (species, units, pydates[0].strftime('%d-%b-%Y'), pydates[-1].strftime('%d-%b-%Y')), horizontalalignment='center', fontsize=fontsize) #fig.text(0.75,0.02,'Simulated - Observed\n CO$_2$ ($\mu$mol/mol)',horizontalalignment='center',fontsize=fontsize) fig.text(0.5, 0.35, 'model-data\nmismatch:\n%.2f %s' % (sel_mdm.mean(), units), horizontalalignment='center', fontsize=fontsize, color='green') fig.text(0.12, 0.75, 'NH Summer\n(Jun-Sep)', horizontalalignment='center', fontsize=fontsize) fig.text(0.62, 0.75, 'NH Winter\n(Nov-Apr)', horizontalalignment='center', fontsize=fontsize) # # Title # plt.suptitle('%s [%s]\n%s, %s, %s ' % ( longsitestring, location, SDSInfo['dataset_project'], SDSInfo['lab_1_name'], SDSInfo['lab_1_country'], ), fontsize=fontsize + 4) # # Add info to plot # font0 = FontProperties(size=15, style='italic', weight='bold') txt = '' #'CTDAS-WRF-STILT\n $\copyright$ University of Groningen' clr = 'red' #fig.text(0.82,0.01,txt,ha='left',font_properties = font0, color=clr ) #now = dt.datetime.today() #str1 = 'CTDAS2012\n' + now.strftime('%d/%m/%y') #fig.text(0.93, 0.95, str1, fontsize=0.75 * fontsize, color='0.5') #str1 = 'data provided by %s'%SDSInfo['provider_1_name'] #fig.text(0.12,0.16,str1,fontsize=0.8*fontsize,color='0.75') try: img = urllib2.urlopen(SDSInfo['lab_logo']).read() except: logging.warning("No logo found for this program, continuing...") return fig ##im = Image.open(StringIO.StringIO(img)) ##height = im.size[1] ##width = im.size[0] # We need a float array between 0-1, rather than # a uint8 array between 0-255 ##im = np.array(im).astype(np.float)[::-1, :] / 255 # With newer (1.0) versions of matplotlib, you can # use the "zorder" kwarg to make the image overlay # the plot, rather than hide behind it... (e.g. zorder=10) ax3 = fig.add_axes([0.425, 0.65, 0.15, 0.15 * height / width]) ax3.axis('off') ##ax3.imshow(im, interpolation='None') return fig
def save_time_avg_data(dacycle, infile, avg='monthly'): """ Function saves time mean surface flux data to NetCDF files *** Inputs *** rundat : a RunInfo object *** Outputs *** daily NetCDF file containing 1-hourly global surface fluxes at 1x1 degree *** Example *** ./expand_savestate project=enkf_release sd=20000101 ed=20010101 """ if 'weekly' in infile: intime = 'weekly' if 'monthly' in infile: intime = 'monthly' if 'yearly' in infile: intime = 'yearly' dirname, filename = os.path.split(infile) outdir = create_dirs(os.path.join(dacycle['dir.analysis'], dirname.replace(intime, avg))) dectime0 = date2num(datetime(2000, 1, 1)) # Create NetCDF output file # saveas = os.path.join(outdir, filename) ncf = io.CT_CDF(saveas, 'create') dimdate = ncf.add_date_dim() # # Open input file specified from the command line # if not os.path.exists(infile): logging.error("Needed input file (%s) not found. Please create this first:" % infile) logging.error("returning...") return None else: pass file = io.ct_read(infile, 'read') datasets = file.variables.keys() date = file.get_variable('date') globatts = file.ncattrs() for att in globatts: attval = file.getncattr(att) if not att in ncf.ncattrs(): ncf.setncattr(att, attval) time = [datetime(2000, 1, 1) + timedelta(days=d) for d in date] # loop over datasets in infile, skip idate and date as we will make new time axis for the averaged data for sds in ['date'] + datasets: # get original data data = file.get_variable(sds) varatts = file.variables[sds].ncattrs() vardims = file.variables[sds].dimensions # # Depending on dims of input dataset, create dims for output dataset. Note that we add the new dimdate now. # for d in vardims: if 'date' in d: continue if d in ncf.dimensions.keys(): pass else: dim = ncf.createDimension(d, size=len(file.dimensions[d])) savedict = ncf.standard_var(sds) savedict['name'] = sds savedict['dims'] = vardims savedict['units'] = file.variables[sds].units savedict['long_name'] = file.variables[sds].long_name savedict['comment'] = file.variables[sds].comment savedict['standard_name'] = file.variables[sds].standard_name savedict['count'] = 0 if not 'date' in vardims: savedict['values'] = data ncf.add_data(savedict) else: if avg == 'monthly': time_avg, data_avg = timetools.monthly_avg(time, data) elif avg == 'seasonal': time_avg, data_avg = timetools.season_avg(time, data) elif avg == 'yearly': time_avg, data_avg = timetools.yearly_avg(time, data) elif avg == 'longterm': time_avg, data_avg = timetools.longterm_avg(time, data) time_avg = [time_avg] data_avg = [data_avg] else: raise ValueError, 'Averaging (%s) does not exist' % avg count = -1 for dd, data in zip(time_avg, data_avg): count = count + 1 if sds == 'date': savedict['values'] = date2num(dd) - dectime0 else: savedict['values'] = data savedict['count'] = count ncf.add_data(savedict, silent=True) sys.stdout.write('.') sys.stdout.write('\n') sys.stdout.flush() # end NetCDF file access file.close() ncf.close() logging.info("------------------- Finished time averaging---------------------------------") return saveas
def save_weekly_avg_agg_data(dacycle, region_aggregate='olson'): """ Function creates a NetCDF file with output on TransCom regions. It uses the flux input from the function `save_weekly_avg_1x1_data` to create fluxes of length `nparameters`, which are then projected onto TC regions using the internal methods from :class:`~da.baseclasses.statevector.StateVector`. :param dacycle: a :class:`~da.tools.initexit.CycleControl` object :param StateVector: a :class:`~da.baseclasses.statevector.StateVector` :rtype: None This function only read the prior fluxes from the flux_1x1.nc files created before, because we want to convolve these with the parameters in the statevector. This creates posterior fluxes, and the posterior covariance for the complete statevector in units of mol/box/s which we then turn into TC fluxes and covariances. """ # dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_%s_weekly' % region_aggregate)) # # Some help variables # dectime0 = date2num(datetime(2000, 1, 1)) dt = dacycle['cyclelength'] startdate = dacycle['time.start'] enddate = dacycle['time.end'] ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) logging.debug("Aggregating 1x1 fluxes to %s totals" % region_aggregate) # Write/Create NetCDF output file # saveas = os.path.join(dirname, '%s_fluxes.%s.nc' % (region_aggregate, startdate.strftime('%Y-%m-%d'))) ncf = io.CT_CDF(saveas, 'write') dimdate = ncf.add_date_dim() dimidateformat = ncf.add_date_dim_format() dimgrid = ncf.add_latlon_dim() # for mask # # Select regions to aggregate to # if region_aggregate == "olson": regionmask = tc.olson240mask dimname = 'olson' dimregs = ncf.add_dim(dimname, regionmask.max()) regionnames = [] for i in range(11): for j in range(19): regionnames.append("%s_%s" % (tc.transnams[i], tc.olsonnams[j],)) regionnames.extend(tc.oifnams) xform = False for i, name in enumerate(regionnames): lab = 'Aggregate_Region_%03d' % (i + 1,) setattr(ncf, lab, name) elif region_aggregate == "olson_extended": regionmask = tc.olson_ext_mask dimname = 'olson_ext' dimregs = ncf.add_dim(dimname, regionmask.max()) xform = False for i, name in enumerate(tc.olsonextnams): lab = 'Aggreate_Region_%03d'%(i+1) setattr(ncf, lab, name) elif region_aggregate == "transcom": regionmask = tc.transcommask dimname = 'tc' dimregs = ncf.add_region_dim(type='tc') xform = False elif region_aggregate == "transcom_extended": regionmask = tc.transcommask dimname = 'tc_ext' dimregs = ncf.add_region_dim(type='tc_ext') xform = True elif region_aggregate == "country": xform = False countrydict = ct.get_countrydict() selected = ['Russia', 'Canada', 'China', 'United States', 'EU27', 'Brazil', 'Australia', 'India'] #,'G8','UNFCCC_annex1','UNFCCC_annex2'] regionmask = np.zeros((180, 360,), 'float') for i, name in enumerate(selected): lab = 'Country_%03d' % (i + 1,) setattr(ncf, lab, name) if name == 'EU27': namelist = ct.EU27 elif name == 'EU25': namelist = ct.EU25 elif name == 'G8': namelist = ct.G8 elif name == 'UNFCCC_annex1': namelist = ct.annex1 elif name == 'UNFCCC_annex2': namelist = ct.annex2 else: namelist = [name] for countryname in namelist: try: country = countrydict[countryname] regionmask.put(country.gridnr, i + 1) except: continue dimname = 'country' dimregs = ncf.add_dim(dimname, regionmask.max()) # skip = ncf.has_date(ncfdate) if skip: logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) else: # # set title and tell GMT that we are using "pixel registration" # setattr(ncf, 'Title', 'CTDAS Aggregated fluxes') setattr(ncf, 'node_offset', 1) savedict = ncf.standard_var('unknown') savedict['name'] = 'regionmask' savedict['comment'] = 'numerical mask used to aggregate 1x1 flux fields, each integer 0,...,N is one region aggregated' savedict['values'] = regionmask.tolist() savedict['units'] = '-' savedict['dims'] = dimgrid savedict['count'] = 0 ncf.add_data(savedict) # Get input data from 1x1 degree flux files area = globarea() infile = os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly', 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d')) if not os.path.exists(infile): logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile) return None ncf_in = io.ct_read(infile, 'read') # Transform data one by one # Get the date variable, and find index corresponding to the dacycle date try: dates = ncf_in.variables['date'][:] except KeyError: logging.error("The variable date cannot be found in the requested input file (%s) " % infile) logging.error("Please make sure you create gridded fluxes before making TC fluxes ") raise KeyError try: index = dates.tolist().index(ncfdate) except ValueError: logging.error("The requested cycle date is not yet available in file %s " % infile) logging.error("Please make sure you create state based fluxes before making TC fluxes ") raise ValueError # First add the date for this cycle to the file, this grows the unlimited dimension savedict = ncf.standard_var(varname='date') savedict['values'] = ncfdate savedict['dims'] = dimdate savedict['count'] = index ncf.add_data(savedict) # Now convert other variables that were inside the statevector file vardict = ncf_in.variables for vname, vprop in vardict.iteritems(): if vname == 'latitude': continue elif vname == 'longitude': continue elif vname == 'date': continue elif vname == 'idate': continue elif 'std' in vname: continue elif 'ensemble' in vname: data = ncf_in.get_variable(vname)[index] dimensemble = ncf.add_dim('members', data.shape[0]) regiondata = [] for member in data: aggdata = state_to_grid(member * area, regionmask, reverse=True, mapname=region_aggregate) regiondata.append(aggdata) regiondata = np.array(regiondata) try: regioncov = regiondata.transpose().dot(regiondata) / (data.shape[0] - 1) except: regioncov = np.dot(regiondata.transpose(), regiondata) / (data.shape[0] - 1) # Huygens fix if xform: regiondata = ExtendedTCRegions(regiondata,cov=False) regioncov = ExtendedTCRegions(regioncov,cov=True) savedict = ncf.standard_var(varname=vname) savedict['name'] = vname.replace('ensemble','covariance') savedict['units'] = '[mol/region/s]^2' savedict['dims'] = dimdate + dimregs + dimregs savedict['count'] = index savedict['values'] = regioncov ncf.add_data(savedict) savedict = ncf.standard_var(varname=vname) savedict['name'] = vname savedict['units'] = 'mol/region/s' savedict['dims'] = dimdate + dimensemble + dimregs elif 'flux' in vname: data = ncf_in.get_variable(vname)[index] regiondata = state_to_grid(data * area, regionmask, reverse=True, mapname=region_aggregate) if xform: regiondata = ExtendedTCRegions(regiondata) savedict = ncf.standard_var(varname=vname) savedict['dims'] = dimdate + dimregs savedict['units'] = 'mol/region/s' else: data = ncf_in.get_variable(vname)[:] regiondata = state_to_grid(data, regionmask, reverse=True, mapname=region_aggregate) if xform: regiondata = ExtendedTCRegions(regiondata) savedict = ncf.standard_var(varname=vname) savedict['dims'] = dimdate + dimregs savedict['count'] = index savedict['values'] = regiondata ncf.add_data(savedict) ncf_in.close() ncf.close() logging.info("%s aggregated weekly average fluxes now written" % dimname) return saveas
def save_weekly_avg_ext_tc_data(dacycle): """ Function SaveTCDataExt saves surface flux data to NetCDF files for extended TransCom regions *** Inputs *** rundat : a RunInfo object *** Outputs *** NetCDF file containing n-hourly global surface fluxes per TransCom region *** Example *** ./expand_savestate project=enkf_release sd=20000101 ed=20010101 """ # dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_tc_weekly')) # # Some help variables # dectime0 = date2num(datetime(2000, 1, 1)) dt = dacycle['cyclelength'] startdate = dacycle['time.start'] enddate = dacycle['time.end'] ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) # Write/Create NetCDF output file # saveas = os.path.join(dirname, 'tc_extfluxes.nc') ncf = io.CT_CDF(saveas, 'write') dimdate = ncf.add_date_dim() dimidateformat = ncf.add_date_dim_format() dimregs = ncf.add_region_dim(type='tc_ext') # # set title and tell GMT that we are using "pixel registration" # setattr(ncf, 'Title', 'CarbonTracker TransCom fluxes') setattr(ncf, 'node_offset', 1) # skip = ncf.has_date(ncfdate) if skip: logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) else: infile = os.path.join(dacycle['dir.analysis'], 'data_tc_weekly', 'tcfluxes.nc') if not os.path.exists(infile): logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile) return None ncf_in = io.ct_read(infile, 'read') # Transform data one by one # Get the date variable, and find index corresponding to the dacycle date try: dates = ncf_in.variables['date'][:] except KeyError: logging.error("The variable date cannot be found in the requested input file (%s) " % infile) logging.error("Please make sure you create gridded fluxes before making extended TC fluxes") raise KeyError try: index = dates.tolist().index(ncfdate) except ValueError: logging.error("The requested cycle date is not yet available in file %s " % infile) logging.error("Please make sure you create state based fluxes before making extended TC fluxes ") raise ValueError # First add the date for this cycle to the file, this grows the unlimited dimension savedict = ncf.standard_var(varname='date') savedict['values'] = ncfdate savedict['dims'] = dimdate savedict['count'] = index ncf.add_data(savedict) # Now convert other variables that were inside the tcfluxes.nc file vardict = ncf_in.variables for vname, vprop in vardict.iteritems(): data = ncf_in.get_variable(vname)[index] if vname == 'latitude': continue elif vname == 'longitude': continue elif vname == 'date': continue elif vname == 'idate': continue elif 'cov' in vname: tcdata = ExtendedTCRegions(data, cov=True) savedict = ncf.standard_var(varname=vname) savedict['units'] = '[mol/region/s]**2' savedict['dims'] = dimdate + dimregs + dimregs else: tcdata = ExtendedTCRegions(data, cov=False) savedict = ncf.standard_var(varname=vname) savedict['dims'] = dimdate + dimregs savedict['units'] = 'mol/region/s' savedict['count'] = index savedict['values'] = tcdata ncf.add_data(savedict) ncf_in.close() ncf.close() logging.info("TransCom weekly average extended fluxes now written") return saveas
def save_weekly_avg_tc_data(dacycle, statevector): """ Function creates a NetCDF file with output on TransCom regions. It uses the flux input from the function `save_weekly_avg_1x1_data` to create fluxes of length `nparameters`, which are then projected onto TC regions using the internal methods from :class:`~da.baseclasses.statevector.StateVector`. :param dacycle: a :class:`~da.tools.initexit.CycleControl` object :param statevector: a :class:`~da.baseclasses.statevector.StateVector` :rtype: None This function only read the prior fluxes from the flux_1x1.nc files created before, because we want to convolve these with the parameters in the statevector. This creates posterior fluxes, and the posterior covariance for the complete statevector in units of mol/box/s which we then turn into TC fluxes and covariances. """ # dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_tc_weekly')) # # Some help variables # dectime0 = date2num(datetime(2000, 1, 1)) dt = dacycle['cyclelength'] startdate = dacycle['time.start'] enddate = dacycle['time.end'] ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) # Write/Create NetCDF output file # saveas = os.path.join(dirname, 'tcfluxes.nc') ncf = io.CT_CDF(saveas, 'write') dimdate = ncf.add_date_dim() dimidateformat = ncf.add_date_dim_format() dimregs = ncf.add_region_dim(type='tc') # # set title and tell GMT that we are using "pixel registration" # setattr(ncf, 'Title', 'CarbonTracker TransCom fluxes') setattr(ncf, 'node_offset', 1) # skip = ncf.has_date(ncfdate) if skip: logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) else: # Get input data area = globarea() infile = os.path.join(dacycle['dir.analysis'], 'data_state_weekly', 'statefluxes.nc') if not os.path.exists(infile): logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile) return None ncf_in = io.ct_read(infile, 'read') # Transform data one by one # Get the date variable, and find index corresponding to the dacycle date try: dates = ncf_in.variables['date'][:] except KeyError: logging.error("The variable date cannot be found in the requested input file (%s) " % infile) logging.error("Please make sure you create gridded fluxes before making TC fluxes ") raise KeyError try: index = dates.tolist().index(ncfdate) except ValueError: logging.error("The requested cycle date is not yet available in file %s " % infile) logging.error("Please make sure you create state based fluxes before making TC fluxes") raise ValueError # First add the date for this cycle to the file, this grows the unlimited dimension savedict = ncf.standard_var(varname='date') savedict['values'] = ncfdate savedict['dims'] = dimdate savedict['count'] = index ncf.add_data(savedict) # Now convert other variables that were inside the flux_1x1 file vardict = ncf_in.variables for vname, vprop in vardict.iteritems(): data = ncf_in.get_variable(vname)[index] if vname in ['latitude','longitude', 'date', 'idate'] or 'std' in vname: continue elif 'ensemble' in vname: tcdata = [] for member in data: tcdata.append(statevector.vector2tc(vectordata=member)) tcdata = np.array(tcdata) try: cov = tcdata.transpose().dot(tcdata) / (statevector.nmembers - 1) except: cov = np.dot(tcdata.transpose(), tcdata) / (statevector.nmembers - 1) # Huygens fix #print vname,cov.sum() tcdata = cov savedict = ncf.standard_var(varname=vname.replace('ensemble', 'cov')) savedict['units'] = '[mol/region/s]**2' savedict['dims'] = dimdate + dimregs + dimregs else: tcdata = statevector.vector2tc(vectordata=data) # vector to TC savedict = ncf.standard_var(varname=vname) savedict['dims'] = dimdate + dimregs savedict['units'] = 'mol/region/s' savedict['count'] = index savedict['values'] = tcdata ncf.add_data(savedict) ncf_in.close() ncf.close() logging.info("TransCom weekly average fluxes now written") return saveas
def write_sample_coords(self, obsinputfile): """ Write the information needed by the observation operator to a file. Return the filename that was written for later use """ f = io.CT_CDF(obsinputfile, method='create') logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile) dimid = f.add_dim('obs', len(self.datalist)) dim200char = f.add_dim('string_of200chars', 200) dimcalcomp = f.add_dim('calendar_components', 6) if len(self.datalist) == 0: f.close() #return obsinputfile data = self.getvalues('id') savedict = io.std_savedict.copy() savedict['name'] = "obs_num" savedict['dtype'] = "int" savedict['long_name'] = "Unique_Dataset_observation_index_number" savedict['units'] = "" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." f.add_data(savedict) data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ] savedict = io.std_savedict.copy() savedict['dtype'] = "int" savedict['name'] = "date_components" savedict['units'] = "integer components of UTC date/time" savedict['dims'] = dimid + dimcalcomp savedict['values'] = data savedict['missing_value'] = -9 savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." savedict['order'] = "year, month, day, hour, minute, second" f.add_data(savedict) data = self.getvalues('lat') savedict = io.std_savedict.copy() savedict['name'] = "latitude" savedict['units'] = "degrees_north" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['missing_value'] = -999.9 f.add_data(savedict) data = self.getvalues('lon') savedict = io.std_savedict.copy() savedict['name'] = "longitude" savedict['units'] = "degrees_east" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['missing_value'] = -999.9 f.add_data(savedict) data = self.getvalues('height') savedict = io.std_savedict.copy() savedict['name'] = "altitude" savedict['units'] = "meters_above_sea_level" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['missing_value'] = -999.9 f.add_data(savedict) data = self.getvalues('samplingstrategy') savedict = io.std_savedict.copy() savedict['dtype'] = "int" savedict['name'] = "sampling_strategy" savedict['units'] = "NA" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['missing_value'] = -9 f.add_data(savedict) data = self.getvalues('evn') savedict = io.std_savedict.copy() savedict['dtype'] = "char" savedict['name'] = "obs_id" savedict['units'] = "NOAA database identifier" savedict['dims'] = dimid + dim200char savedict['values'] = data savedict['missing_value'] = '!' f.add_data(savedict) f.close() logging.debug("Successfully wrote data to obs file") logging.info("Sample input file for obs operator now in place [%s]" % obsinputfile)
def write_sample_coords(self, obsinputfile): """ Write the information needed by the observation operator to a file. Return the filename that was written for later use """ if len(self.datalist) == 0: #f.close() #return obsinputfile logging.debug("No observations found for this time period, nothing written to obs file") else: f = io.CT_CDF(obsinputfile, method='create') logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile) dimid = f.add_dim('obs', len(self.datalist)) dim200char = f.add_dim('string_of200chars', 200) dim10char = f.add_dim('string_of10chars', 10) dimcalcomp = f.add_dim('calendar_components', 6) for key, value in self.site_move.iteritems(): msg = "Site is moved by %3.2f degrees latitude and %3.2f degrees longitude" % value f.add_attribute(key, msg) data = self.getvalues('id') savedict = io.std_savedict.copy() savedict['name'] = "obs_num" savedict['dtype'] = "int" savedict['long_name'] = "Unique_Dataset_observation_index_number" savedict['units'] = "" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." f.add_data(savedict) data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ] savedict = io.std_savedict.copy() savedict['dtype'] = "int" savedict['name'] = "date_components" savedict['units'] = "integer components of UTC date/time" savedict['dims'] = dimid + dimcalcomp savedict['values'] = data savedict['missing_value'] = -9 savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." savedict['order'] = "year, month, day, hour, minute, second" f.add_data(savedict) data = self.getvalues('lat') savedict = io.std_savedict.copy() savedict['name'] = "latitude" savedict['units'] = "degrees_north" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['missing_value'] = -999.9 f.add_data(savedict) data = self.getvalues('lon') savedict = io.std_savedict.copy() savedict['name'] = "longitude" savedict['units'] = "degrees_east" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['missing_value'] = -999.9 f.add_data(savedict) data = self.getvalues('height') savedict = io.std_savedict.copy() savedict['name'] = "altitude" savedict['units'] = "meters_above_sea_level" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['missing_value'] = -999.9 f.add_data(savedict) data = self.getvalues('samplingstrategy') savedict = io.std_savedict.copy() savedict['dtype'] = "int" savedict['name'] = "sampling_strategy" savedict['units'] = "NA" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['missing_value'] = -9 f.add_data(savedict) data = self.getvalues('evn') savedict = io.std_savedict.copy() savedict['dtype'] = "char" savedict['name'] = "obs_id" savedict['units'] = "ObsPack datapoint identifier" savedict['dims'] = dimid + dim200char savedict['values'] = data savedict['missing_value'] = '!' f.add_data(savedict) data = self.getvalues('obs') savedict = io.std_savedict.copy() savedict['name'] = "observed" savedict['long_name'] = "observedvalues" savedict['units'] = "mol mol-1" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['comment'] = 'Observations used in optimization' f.add_data(savedict) data = self.getvalues('mdm') savedict = io.std_savedict.copy() savedict['name'] = "modeldatamismatch" savedict['long_name'] = "modeldatamismatch" savedict['units'] = "[mol mol-1]" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch' f.add_data(savedict) f.close() logging.debug("Successfully wrote data to obs file") logging.info("Sample input file for obs operator now in place [%s]" % obsinputfile)
def write_mole_fractions(dacycle): """ Write Sample information to NetCDF files. These files are organized by site and have an unlimited time axis to which data is appended each cycle. The needed information is obtained from the sample_auxiliary.nc files and the original input data files from ObsPack. The steps are: (1) Create a directory to hold timeseries output files (2) Read the sample_auxiliary.nc file for this cycle and get a list of original files they were obtained from (3) For each file, copy the original data file from ObsPack (if not yet present) (4) Open the copied file, find the index of each observation, fill in the simulated data """ dirname = create_dirs( os.path.join(dacycle['dir.analysis'], 'data_molefractions')) # # Some help variables # dectime0 = date2num(datetime(2000, 1, 1)) dt = dacycle['cyclelength'] startdate = dacycle['time.start'] enddate = dacycle['time.end'] logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) dacycle['time.sample.stamp'] = "%s_%s" % ( startdate.strftime("%Y%m%d%H"), enddate.strftime("%Y%m%d%H"), ) # Step (1): Get the posterior sample output data file for this cycle infile = os.path.join( dacycle['dir.output'], 'sample_auxiliary_%s.nc' % dacycle['time.sample.stamp']) ncf_in = io.ct_read(infile, 'read') obs_num = ncf_in.get_variable('obs_num') obs_val = ncf_in.get_variable('observed') simulated = ncf_in.get_variable('modelsamples') infilename = ncf_in.get_variable('inputfilename') infiles1 = netCDF4.chartostring(infilename).tolist() # In case of reanalysis on different platform, obspack-input-directory might have a different name. # This is checked here, and the filenames are corrected dir_from_rc = dacycle.dasystem['obspack.input.dir'] dir_from_output = infiles1[0] d1 = dir_from_rc[:dir_from_rc.find('obspacks')] d2 = dir_from_output[:dir_from_output.find('obspacks')] if d1 == d2: infiles = infiles1 else: infiles = [] for ff in infiles1: infiles.append(ff.replace(d2, d1)) #infiles = [join(s.compressed(),'') for s in infilename] ncf_in.close() # Step (2): Get the prior sample output data file for this cycle infile = os.path.join(dacycle['dir.output'], 'optimizer.%s.nc' % startdate.strftime('%Y%m%d')) if os.path.exists(infile): optimized_present = True else: optimized_present = False if optimized_present: ncf_fc_in = io.ct_read(infile, 'read') fc_obs_num = ncf_fc_in.get_variable('obspack_num') fc_obs_val = ncf_fc_in.get_variable('observed') fc_simulated = ncf_fc_in.get_variable('modelsamplesmean_prior') fc_simulated_ens = ncf_fc_in.get_variable( 'modelsamplesdeviations_prior') fc_flag = ncf_fc_in.get_variable('flag') if not dacycle.dasystem.has_key('opt.algorithm'): fc_r = ncf_fc_in.get_variable('modeldatamismatchvariance') fc_hphtr = ncf_fc_in.get_variable('totalmolefractionvariance') elif dacycle.dasystem['opt.algorithm'] == 'serial': fc_r = ncf_fc_in.get_variable('modeldatamismatchvariance') fc_hphtr = ncf_fc_in.get_variable('totalmolefractionvariance') elif dacycle.dasystem['opt.algorithm'] == 'bulk': fc_r = ncf_fc_in.get_variable( 'modeldatamismatchvariance').diagonal() fc_hphtr = ncf_fc_in.get_variable( 'totalmolefractionvariance').diagonal() filesitecode = ncf_fc_in.get_variable('sitecode') fc_sitecodes = netCDF4.chartostring(filesitecode).tolist() #fc_sitecodes = [join(s.compressed(),'') for s in filesitecode] ncf_fc_in.close() # Expand the list of input files with those available from the forecast list infiles_rootdir = os.path.split(infiles[0])[0] infiles.extend( os.path.join(infiles_rootdir, f + '.nc') for f in fc_sitecodes) #Step (2): For each observation timeseries we now have data for, open it and fill with data for orig_file in set(infiles): if not os.path.exists(orig_file): logging.error( "The original input file (%s) could not be found, continuing to next file..." % orig_file) continue copy_file = os.path.join(dirname, os.path.split(orig_file)[-1]) if not os.path.exists(copy_file): shutil.copy(orig_file, copy_file) logging.debug( "Copied a new original file (%s) to the analysis directory" % orig_file) ncf_out = io.CT_CDF(copy_file, 'write') # Modify the attributes of the file to reflect added data from CTDAS properly try: host = os.environ['HOSTNAME'] except: host = 'unknown' ncf_out.Caution = '===================================================================================' try: ncf_out.History += '\nOriginal observation file modified by user %s on %s\n' % ( os.environ['USER'], datetime.today().strftime('%F'), ) except: ncf_out.History = '\nOriginal observation file modified by user %s on %s\n' % ( os.environ['USER'], datetime.today().strftime('%F'), ) ncf_out.CTDAS_info = 'Simulated values added from a CTDAS run by %s on %s\n' % (os.environ['USER'], datetime.today().strftime('%F'),)\ + '\nCTDAS was run on platform %s' % (host,)\ + '\nCTDAS job directory was %s' % (dacycle['dir.da_run'],)\ + '\nCTDAS Da System was %s' % (dacycle['da.system'],)\ + '\nCTDAS Da ObsOperator was %s' % (dacycle['da.obsoperator'],) ncf_out.CTDAS_startdate = dacycle['time.start'].strftime('%F') ncf_out.CTDAS_enddate = dacycle['time.finish'].strftime("%F") ncf_out.original_file = orig_file # get nobs dimension if ncf_out.dimensions.has_key('id'): dimidob = ncf_out.dimensions['id'] dimid = ('id', ) elif ncf_out.dimensions.has_key('obs'): dimidob = ncf_out.dimensions['obs'] dimid = ('obs', ) if dimidob.isunlimited: nobs = ncf_out.inq_unlimlen() else: nobs = len(dimid) # add nmembers dimension dimmembersob = ncf_out.createDimension('nmembers', size=simulated.shape[1]) dimmembers = ('nmembers', ) nmembers = len(dimmembers) # Create empty arrays for posterior samples, as well as for forecast sample statistics savedict = io.std_savedict.copy() savedict['name'] = "flag_forecast" savedict['long_name'] = "flag_for_obs_model in forecast" savedict['units'] = "None" savedict['dims'] = dimid savedict[ 'comment'] = 'Flag (0/1/2/99) for observation value, 0 means okay, 1 means QC error, 2 means rejected, 99 means not sampled' ncf_out.add_variable(savedict) savedict = io.std_savedict.copy() savedict['name'] = "modeldatamismatch" savedict['long_name'] = "modeldatamismatch" savedict['units'] = "[mol mol-1]^2" savedict['dims'] = dimid savedict[ 'comment'] = 'Variance of mole fractions resulting from model-data mismatch' ncf_out.add_variable(savedict) savedict = io.std_savedict.copy() savedict['name'] = "totalmolefractionvariance_forecast" savedict['long_name'] = "totalmolefractionvariance of forecast" savedict['units'] = "[mol mol-1]^2" savedict['dims'] = dimid savedict[ 'comment'] = 'Variance of mole fractions resulting from prior state and model-data mismatch' ncf_out.add_variable(savedict) savedict = io.std_savedict.copy() savedict['name'] = "modelsamplesmean" savedict['long_name'] = "mean modelsamples" savedict['units'] = "mol mol-1" savedict['dims'] = dimid savedict[ 'comment'] = 'simulated mole fractions based on optimized state vector' ncf_out.add_variable(savedict) savedict = io.std_savedict.copy() savedict['name'] = "modelsamplesmean_forecast" savedict['long_name'] = "mean modelsamples from forecast" savedict['units'] = "mol mol-1" savedict['dims'] = dimid savedict[ 'comment'] = 'simulated mole fractions based on prior state vector' ncf_out.add_variable(savedict) savedict = io.std_savedict.copy() savedict['name'] = "modelsamplesstandarddeviation" savedict[ 'long_name'] = "standard deviaton of modelsamples over all ensemble members" savedict['units'] = "mol mol-1" savedict['dims'] = dimid savedict[ 'comment'] = 'std dev of simulated mole fractions based on optimized state vector' ncf_out.add_variable(savedict) savedict = io.std_savedict.copy() savedict['name'] = "modelsamplesstandarddeviation_forecast" savedict[ 'long_name'] = "standard deviaton of modelsamples from forecast over all ensemble members" savedict['units'] = "mol mol-1" savedict['dims'] = dimid savedict[ 'comment'] = 'std dev of simulated mole fractions based on prior state vector' ncf_out.add_variable(savedict) savedict = io.std_savedict.copy() savedict['name'] = "modelsamplesensemble" savedict['long_name'] = "modelsamples over all ensemble members" savedict['units'] = "mol mol-1" savedict['dims'] = dimid + dimmembers savedict[ 'comment'] = 'ensemble of simulated mole fractions based on optimized state vector' ncf_out.add_variable(savedict) savedict = io.std_savedict.copy() savedict['name'] = "modelsamplesensemble_forecast" savedict[ 'long_name'] = "modelsamples from forecast over all ensemble members" savedict['units'] = "mol mol-1" savedict['dims'] = dimid + dimmembers savedict[ 'comment'] = 'ensemble of simulated mole fractions based on prior state vector' ncf_out.add_variable(savedict) else: logging.debug( "Modifying existing file (%s) in the analysis directory" % copy_file) ncf_out = io.CT_CDF(copy_file, 'write') # Get existing file obs_nums to determine match to local obs_nums if ncf_out.variables.has_key('id'): file_obs_nums = ncf_out.get_variable('id') elif ncf_out.variables.has_key('obspack_num'): file_obs_nums = ncf_out.get_variable('obspack_num') # Get all obs_nums related to this file, determine their indices in the local arrays selected_obs_nums = [ num for infile, num in zip(infiles, obs_num) if infile == orig_file ] # Optimized data 1st: For each index, get the data and add to the file in the proper file index location for num in selected_obs_nums: model_index = obs_num.tolist().index(num) file_index = file_obs_nums.tolist().index(num) #var = ncf_out.variables['modeldatamismatch'] # Take from optimizer.yyyymmdd.nc file instead #var[file_index] = mdm[model_index] var = ncf_out.variables['modelsamplesmean'] var[file_index] = simulated[model_index, 0] var = ncf_out.variables['modelsamplesstandarddeviation'] var[file_index] = simulated[model_index, 1:].std() var = ncf_out.variables['modelsamplesensemble'] var[file_index] = simulated[model_index, :] # Now forecast data too: For each index, get the data and add to the file in the proper file index location if optimized_present: selected_fc_obs_nums = [ num for sitecode, num in zip(fc_sitecodes, fc_obs_num) if sitecode in orig_file ] for num in selected_fc_obs_nums: model_index = fc_obs_num.tolist().index(num) file_index = file_obs_nums.tolist().index(num) var = ncf_out.variables['modeldatamismatch'] var[file_index] = np.sqrt(fc_r[model_index]) var = ncf_out.variables['modelsamplesmean_forecast'] var[file_index] = fc_simulated[model_index] var = ncf_out.variables[ 'modelsamplesstandarddeviation_forecast'] var[file_index] = fc_simulated_ens[model_index, 1:].std() var = ncf_out.variables['modelsamplesensemble_forecast'] var[file_index] = fc_simulated_ens[model_index, :] var = ncf_out.variables['totalmolefractionvariance_forecast'] var[file_index] = fc_hphtr[model_index] var = ncf_out.variables['flag_forecast'] var[file_index] = fc_flag[model_index] # close the file status = ncf_out.close() return None
def summarize_obs(analysisdir, printfmt='html'): """*************************************************************************************** Call example: python summarize_obs.py Option printfmt : [tex,scr,html] print summary table in latex, terminal, or html format Other options are all those needed to create a dacycle object OR: call directly from a python script as: q=summarize_obs(dacycle,printfmt='html') ***************************************************************************************""" sumdir = os.path.join(analysisdir, 'summary') if not os.path.exists(sumdir): logging.info("Creating new directory " + sumdir) os.makedirs(sumdir) mrdir = os.path.join(analysisdir, 'data_molefractions') if not os.path.exists(mrdir): logging.error("Input directory does not exist (%s), exiting... " % mrdir) return None mrfiles = os.listdir(mrdir) infiles = [os.path.join(mrdir, f) for f in mrfiles if f.endswith('.nc')] if printfmt == 'tex': print '\\begin{tabular*}{\\textheight}{l l l l r r r r}' print 'Code & Name & Lat, Lon, Elev & Lab & N (flagged) & $\\sqrt{R}$ &Inn \\XS &Bias\\\\' print '\hline\\\\ \n\multicolumn{8}{ c }{Semi-Continuous Surface Samples}\\\\[3pt] ' fmt = '%8s & ' + ' %55s & ' + '%20s &' + '%6s &' + ' %4d (%d) & ' + ' %5.2f & ' + ' %5.2f & ' + '%+5.2f \\\\' elif printfmt == 'html': tablehead = \ "<TR>\n <TH> Site code </TH> \ <TH> Sampling Type </TH> \ <TH> Lab. </TH> \ <TH> Country </TH> \ <TH> Lat, Lon, Elev. (m ASL) </TH> \ <TH> No. Obs. Avail. </TH> \ <TH> √R (μmol mol<sup>-1</sup>) </TH> \ <TH> √HPH (μmol mol<sup>-1</sup>) </TH> \ <TH> Forecast H(x)-y (μmol mol<sup>-1</sup>) all samples </TH> \ <TH> Forecast H(x)-y (μmol mol<sup>-1</sup>) </TH> \ <TH> H(x)-y (μmol mol<sup>-1</sup>) </TH> \ <TH> H(x)-y (JJAS) (μmol mol<sup>-1</sup>) </TH> \ <TH> H(x)-y (NDJFMA) (μmol mol<sup>-1</sup>) </TH> \ <TH> Inn. Χ<sup>2</sup></TH> \ <TH> Site code </TH>\n \ </TR>\n" fmt = """<TR> \n \ <TD><a href='javascript:LoadCO2Tseries("%s")'>%s </a></TD>\ <TD>%s</TD>\ <TD>%s</TD>\ <TD>%40s</TD>\ <TD>%s</TD>\ <TD>%d</TD>\ <TD>%+5.2f</TD>\ <TD>%+5.2f</TD>\ <TD>%+5.2f±%5.2f</TD>\ <TD>%+5.2f±%5.2f</TD>\ <TD>%+5.2f±%5.2f</TD>\ <TD>%+5.2f±%5.2f</TD>\ <TD>%+5.2f±%5.2f</TD>\ <TD bgcolor=%s>%+5.2f</TD>\ <TD>%s</TD>\n \ </TR>\n""" elif printfmt == 'scr': #print 'Code Site NObs flagged R Inn X2' fmt = '%8s ' + ' %55s %s %s' + ' %4d ' + ' %4d ' + ' %5.2f ' + ' %5.2f' table = [] for infile in infiles: logging.debug( infile ) f = io.CT_CDF(infile, 'read') date = f.get_variable('time') obs = f.get_variable('value') * 1e6 mdm = f.get_variable('modeldatamismatch') * 1e6 simulated_fc = f.get_variable('modelsamplesmean_forecast') * 1e6 simulated = f.get_variable('modelsamplesmean') * 1e6 simulated_std = f.get_variable('modelsamplesstandarddeviation_forecast') * 1e6 hphtr = f.get_variable('totalmolefractionvariance_forecast') * 1e6 * 1e6 flag = f.get_variable('flag_forecast') pydates = [dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date] select = [i for i,d in enumerate(pydates) if d.year == 2010] sampled = (np.ma.getmaskarray(simulated) == False) sampled_fc = (np.ma.getmaskarray(simulated_fc) == False) #pydates = np.array(pydates).compress(flag != 2) #simulated_fc = simulated_fc.compress(flag != 2) #simulated = simulated.compress(flag != 2) #obs = obs.compress(flag != 2) #mdm = mdm.compress(flag != 2) #hphtr = hphtr.compress(flag != 2) flag_fc = np.array(flag)[select].compress(sampled_fc[select]) flag= np.array(flag)[select].compress(sampled[select]) pydates = np.array(pydates)[select].compress(sampled[select]) simulated_fc = simulated_fc[select].compress(sampled_fc[select]) simulated = simulated[select].compress(sampled[select]) simulated_std = simulated_std[select].compress(sampled[select]) obs_fi = obs[select].compress(sampled[select]) obs_fc = obs[select].compress(sampled_fc[select]) mdm = mdm[select].compress(sampled[select]) hphtr_fi = hphtr[select].compress(sampled[select]) hphtr_fc = hphtr[select].compress(sampled_fc[select]) rejected = (flag == 2.0) notused = (flag != 99.0) pydates = np.array(pydates).compress(flag != 2) obs_fc2 = obs_fc.compress(flag_fc != 2) simulated_fc2 = simulated_fc.compress(flag_fc != 2) hphtr_fc2 = hphtr_fc.compress(flag_fc != 2) simulated = simulated.compress(flag != 2) simulated_std = simulated_std.compress(flag != 2) obs_fi = obs_fi.compress(flag != 2) mdm = mdm.compress(flag != 2) hphtr = hphtr.compress(flag != 2) summer = [i for i, d in enumerate(pydates) if d.month in [6, 7, 8, 9] ] winter = [i for i, d in enumerate(pydates) if d.month in [11, 12, 1, 2, 3, 4] ] #print infile #print simulated_fc diff_fc2 = ((simulated_fc2 - obs_fc2).mean()) diff_fc = ((simulated_fc - obs_fc).mean()) diff = ((simulated - obs_fi).mean()) diffsummer = ((simulated - obs_fi).take(summer).mean()) diffwinter = ((simulated - obs_fi).take(winter).mean()) diff_fcstd = ((simulated_fc - obs_fc).std()) diff_fcstd2 = ((simulated_fc2 - obs_fc2).std()) diffstd = ((simulated - obs_fi).std()) diffsummerstd = ((simulated - obs_fi).take(summer).std()) diffwinterstd = ((simulated - obs_fi).take(winter).std()) chi_sq = ((simulated_fc - obs_fc)**2/hphtr_fc).mean() #chi_sq = ((simulated - obs_fi)**2/mdm).mean() if mdm.mean() > 900: chi_clr = '#EEEEEE' chi_sq = -99 elif chi_sq > 1.2: chi_clr = '#ff0000' elif chi_sq < 0.5: chi_clr = '#ff7f00' else: chi_clr = '#00cc00' location = nice_lat(f.site_latitude,'html') + ', ' + nice_lon(f.site_longitude,'html') + ', ' + nice_alt(f.site_elevation) if printfmt == 'html': ss = (f.dataset_name[4:], f.site_code.upper(), f.dataset_project, f.lab_1_abbr, f.site_country, location, simulated.shape[0], mdm.mean(), np.sqrt((simulated_std ** 2).mean()), diff_fc, diff_fcstd, diff_fc2, diff_fcstd2, diff, diffstd, diffsummer, diffsummerstd, diffwinter, diffwinterstd, chi_clr, chi_sq, f.site_code.upper()) table.append(ss) f.close() # #len(np.ma.compressed(mdm)), if printfmt == 'tex': saveas = os.path.join(sumdir, 'site_table.tex') f = open(saveas, 'w') elif printfmt == 'html': saveas = os.path.join(sumdir, 'site_table.html') f = open(saveas, 'w') txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n" f.write(txt) txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n" f.write(txt) f.write(tablehead) for i, ss in enumerate(table): #print i, ss f.write(fmt % ss) if (i + 1) % 15 == 0: f.write(tablehead) if printfmt == 'tex': f.write('\cline{2-8}\\\\') f.write('\hline \\\\') f.write('\end{tabular*}') else: txt = "\n</table>" f.write(txt) f.close() logging.info("File written with summary: %s" % saveas)
def summarize_stats(dacycle): """ Summarize the statistics of the observations for this cycle This includes X2 statistics, RMSD, and others for both forecast and final fluxes """ sumdir = os.path.join(dacycle['dir.analysis'], 'summary') if not os.path.exists(sumdir): logging.info("Creating new directory " + sumdir) os.makedirs(sumdir) # get forecast data from optimizer.ddddd.nc startdate = dacycle['time.start'] dacycle['time.sample.stamp'] = "%s" % (startdate.strftime("%Y%m%d"),) infile = os.path.join(dacycle['dir.output'], 'optimizer.%s.nc' % dacycle['time.sample.stamp']) if not os.path.exists(infile): logging.error("File not found: %s" % infile) raise IOError f = io.CT_CDF(infile, 'read') sites = f.get_variable('sitecode') y0 = f.get_variable('observed') * 1e6 hx = f.get_variable('modelsamplesmean_prior') * 1e6 dF = f.get_variable('modelsamplesdeviations_prior') * 1e6 HPHTR = f.get_variable('totalmolefractionvariance').diagonal() * 1e6 * 1e6 R = f.get_variable('modeldatamismatchvariance').diagonal() * 1e6 * 1e6 flags = f.get_variable('flag') f.close() HPHT = dF.dot(np.transpose(dF)).diagonal() / (dF.shape[1] - 1.0) rejected = (flags == 2.0) sitecodes = [string.join(s.compressed(), '').strip() for s in sites] # calculate X2 per observation for this time step x2 = [] for i, site in enumerate(sitecodes): x2.append((y0[i] - hx[i]) ** 2 / HPHTR[i]) x2 = np.ma.masked_where(HPHTR == 0.0, x2) # calculate X2 per site saveas = os.path.join(sumdir, 'x2_table_%s.html' % dacycle['time.sample.stamp']) logging.info("Writing HTML tables for this cycle (%s)" % saveas) f = open(saveas, 'w') txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n" f.write(txt) txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n" f.write(txt) tablehead = \ "<TR>\n <TH> Site code </TH> \ <TH> N<sub>obs</sub> </TH> \ <TH> N<sub>rejected</sub> </TH> \ <TH> √R (μmol mol<sup>-1</sup>) </TH> \ <TH> √HPH<sup>T</sup> (μmol mol<sup>-1</sup>) </TH> \ <TH> H(x)-y (μmol mol<sup>-1</sup>) </TH> \n \ <TH> X2 </TH> \n \ </TR>\n" fmt = """<TR> \n \ <TD>%s</TD>\ <TD>%d</TD>\ <TD>%d</TD>\ <TD>%+5.2f</TD>\ <TD>%+5.2f</TD>\ <TD>%+5.2f±%5.2f</TD>\ <TD>%5.2f</TD>\n \ </TR>\n""" f.write(tablehead) set_sites = set(sitecodes) set_sites = np.sort(list(set_sites)) for i, site in enumerate(set_sites): sel = [i for i, s in enumerate(sitecodes) if s == site] ss = (site, len(sel), rejected.take(sel).sum(), np.sqrt(R.take(sel)[0]), np.sqrt(HPHT.take(sel).mean()), (hx - y0).take(sel).mean(), (hx - y0).take(sel).std(), x2.take(sel).mean(),) #print site,sel,x2.take(sel) f.write(fmt % ss) if (i + 1) % 15 == 0: f.write(tablehead) txt = "\n</table>" f.write(txt) f.close() # Now summarize for each site across time steps if not dacycle['time.start'] >= dt.datetime(2008, 12, 29): return logging.info("Writing HTML tables for each site") for site in set_sites: saveas = os.path.join(sumdir, '%s_x2.html' % site) f = open(saveas, 'w') logging.debug(saveas) txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n" f.write(txt) txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n" f.write(txt) tablehead = \ "<TR>\n <TH> From File </TH> \ <TH> Site </TH> \ <TH> N<sub>obs</sub> </TH> \ <TH> N<sub>rejected</sub> </TH> \ <TH> √R (μmol mol<sup>-1</sup>) </TH> \ <TH> √HPH<sup>T</sup> (μmol mol<sup>-1</sup>) </TH> \ <TH> H(x)-y (μmol mol<sup>-1</sup>) </TH> \n \ <TH> X2 </TH> \n \ </TR>\n" f.write(tablehead) files = os.listdir(sumdir) x2_files = [fil for fil in files if fil.startswith('x2')] for htmlfile in x2_files: lines = grep(site, os.path.join(sumdir, htmlfile)) for line in lines: f.write('<TR>\n') f.write('<TD>' + htmlfile + '</TD>') f.write(line + '\n') f.write('</TR>\n') txt = "\n</table>" f.write(txt) f.close()
def run(self): """ This Randomizer will take the original observation data in the Obs object, and simply copy each mean value. Next, the mean value will be perturbed by a random normal number drawn from a specified uncertainty of +/- 2 ppm """ import da.tools.io4 as io import numpy as np # Create a flask output file in TM5-style (to be updated later?) to hold simulated values for later reading f = io.CT_CDF(self.simulated_file, method='create') logging.debug( 'Creating new simulated observation file in ObservationOperator (%s)' % self.simulated_file) dimid = f.createDimension('obs_num', size=None) dimid = ('obs_num', ) savedict = io.std_savedict.copy() savedict['name'] = "obs_num" savedict['dtype'] = "int" savedict['long_name'] = "Unique_Dataset_observation_index_number" savedict['units'] = "" savedict['dims'] = dimid savedict[ 'comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." f.add_data(savedict, nsets=0) dimmember = f.createDimension('nmembers', size=self.forecast_nmembers) dimmember = ('nmembers', ) savedict = io.std_savedict.copy() savedict['name'] = "flask" savedict['dtype'] = "float" savedict['long_name'] = "mole_fraction_of_trace_gas_in_air" savedict['units'] = "mol tracer (mol air)^-1" savedict['dims'] = dimid + dimmember savedict[ 'comment'] = "Simulated model value created by RandomizerObservationOperator" f.add_data(savedict, nsets=0) # Open file with x,y,z,t of model samples that need to be sampled f_in = io.ct_read(self.dacycle['ObsOperator.inputfile'], method='read') # Get simulated values and ID ids = f_in.get_variable('obs_num') obs = f_in.get_variable('observed') mdm = f_in.get_variable('modeldatamismatch') # Loop over observations, add random white noise, and write to file for i, data in enumerate(zip(ids, obs, mdm)): f.variables['obs_num'][i] = data[0] f.variables['flask'][i, :] = data[1] + np.random.randn( self.forecast_nmembers) * data[2] f.close() f_in.close() # Report success and exit logging.info( 'ObservationOperator finished successfully, output file written (%s)' % self.simulated_file)
def residuals_new(fig, infile, option): fontsize = 17 # # Get data # f = io.CT_CDF(infile, 'read') species = f.dataset_parameter if species == 'co2': molefac = 1e6 units = '$\mu$mol mol$^{-1}$' species = "CO$_2$" if species == 'co2c13': molefac = 1.0 units = 'permil' species = "$\delta^{13}$C" date = f.get_variable('time') obs = f.get_variable('value') * molefac mdm = f.get_variable('modeldatamismatch') * molefac if option == 'final': simulated = f.get_variable('modelsamplesmean') * molefac if option == 'forecast': simulated = f.get_variable('modelsamplesmean_forecast') * molefac hphtr = f.get_variable( 'totalmolefractionvariance_forecast') * molefac * molefac flags = f.get_variable('flag_forecast') longsitestring = f.site_name + ', ' + f.site_country location = nice_lat(f.site_latitude, 'python') + ', ' + nice_lon( f.site_longitude, 'python') + ', ' + nice_alt(f.site_elevation) SDSInfo = {} for k in f.ncattrs(): SDSInfo[k] = f.getncattr(k) f.close() pydates = np.array( [dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date]) select = [i for i, d in enumerate(pydates) if d.year == 2010] sampled = (np.ma.getmaskarray(simulated) == False) if len(sampled.nonzero()[0]) < 2: logging.warning("Too few simulated values found, continuing...") return fig simulated = simulated[select].compress(sampled[select]) obs = obs[select].compress(sampled[select]) pydates = pydates[select].compress(sampled[select]) mdm = mdm[select].compress(sampled[select]) hphtr = hphtr[select].compress(sampled[select]) flags = flags[select].compress(sampled[select]) #mdm=ma.masked_invalid(mdm) rejected = (flags == 2.0) notused = (flags == 99.0) residual = simulated - obs #if notused.all(): # return fig #else: obslabel = 'Residual' sd = pydates[0] ed = pydates[-1] ax1 = fig.add_axes([0.1, 0.12, 0.7, 0.75]) ax2 = fig.add_axes([0.85, 0.12, 0.12, 0.75]) ax1.spines['right'].set_color('none') ax1.spines['top'].set_color('none') ax1.spines['left'].set_linewidth(1.5) ax1.spines['bottom'].set_linewidth(1.5) ax1.spines['left'].set_position(('outward', 10)) ax1.spines['bottom'].set_position(('outward', 10)) ax2.spines['right'].set_color('none') ax2.spines['top'].set_color('none') ax2.spines['left'].set_linewidth(1.5) ax2.spines['bottom'].set_linewidth(1.5) ax2.spines['left'].set_position(('outward', 10)) ax2.spines['bottom'].set_position(('outward', 10)) markersize = 8 fontsize = 16 p = ax1.plot(pydates, residual, marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', \ markeredgecolor='k', label=obslabel , markersize=markersize) # # Add the model-data mismatch # q = ax1.fill_between(pydates, mdm, -1.0 * mdm, label='model-data mismatch', color='tan', alpha=0.25, zorder=5) # # Add the rejected values if available # #for i in range(len(residual.compress(rejected))): #print "rejected",residual.compress(rejected)[i],pydates.compress(rejected)[i] if rejected.any(): r = ax1.plot(pydates.compress(rejected), residual.compress(rejected), marker='s', markeredgewidth=1, markeredgecolor='r', markerfacecolor='red', \ linestyle='None', label='Model Rejected (N=%d)' % len(pydates.compress(rejected)), markersize=markersize) # # Axes 2 # if option == 'final': residual = simulated.compress(flags != 2) - obs.compress(flags != 2) pydates = pydates.compress(flags != 2) mdm = mdm.compress(flags != 2) chisquared = (residual**2) / mdm elif option == 'forecast': chisquared = (residual**2) / hphtr offset = 0.0 n, bins, patches = ax2.hist(residual, max(residual.shape[0] / 15, 15), normed=1, orientation='horizontal') p = plt.setp(patches, 'facecolor', 'tan', 'edgecolor', 'tan', label='None', alpha=0.25) # Create normal distributions for the line plots over the interval of the x-axis sc = residual.std() bins = np.arange(-4 * sc, 4 * sc, 0.1) n = normpdf(bins, residual.mean(), residual.std()) l = ax2.plot(n, bins, linestyle='-', color='lightblue', linewidth=1) # plot the PDF of the histogram in blue #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax2.get_xticklabels()] #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax2.get_yticklabels()] if option == 'final': strX = '' elif option == 'forecast': strX = 'Inn. ' if chisquared.mean() != chisquared.mean() or mdm.mean() < 900: labs = [ '%+.2f $\pm$ %.2f\nN=%d\n%s $\chi^2$ = %5.2f' % ( residual.mean(), residual.std(), residual.shape[0], strX, chisquared.mean(), ) ] else: labs = [ '%+.2f $\pm$ %.2f\nN=%d' % ( residual.mean(), residual.std(), residual.shape[0], ) ] # print the above labels onto the figure. Note that I use relative coordinates for their position by specifying the transform=ax.transAxes ax2.text(0.6, 0.01 + offset, labs[0], transform=ax2.transAxes, fontsize=1.1 * fontsize, horizontalalignment='center', color='k') offset += -0.05 ax2.set_ylim(-4 * sc, 4 * sc) ax2.spines['left'].set_position(('axes', 0.0)) ax2.spines['right'].set_color('none') ax2.spines['bottom'].axis.set_ticks([]) ax2.spines['bottom'].set_position(('axes', 0.5)) ax2.spines['top'].set_color('none') ax2.spines['left'].set_smart_bounds(True) ax2.spines['bottom'].set_smart_bounds(True) ax2.spines['left'].set_linewidth(1.5) ax2.spines['bottom'].set_linewidth(1.5) ax2.spines['bottom'].set_position(('outward', 10)) ax2.yaxis.set_ticks_position('left') ax2.xaxis.set_ticklabels([]) #ax2.set_ylabel(r"CO$_2$ [ppm]", fontsize=fontsize) # label y-axis #ax2.set_xlabel("frequency", fontsize=fontsize) # label x-axis ax2.grid(True, ls='-', color='0.75', axis='y') # # Set up x axis labels # #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_xticklabels()] #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_yticklabels()] # # Location and format of xticks # ax1.xaxis.set_major_locator(pltdt.MonthLocator([2, 4, 6, 8, 10, 12])) #[7],bymonthday=7)) ax1.xaxis.set_major_formatter(pltdt.DateFormatter('%Y-%b')) # # Legend # leg = ax1.legend(prop=FontProperties(size=(0.75 * fontsize)), borderpad=0.1, loc='upper left') #leg.get_frame().set_visible(False) leg.set_zorder(20) leg.get_frame().set_color('1.0') dummy = [lab.set_fontsize(16) for lab in leg.get_texts()] # # include grid # ax1.grid(True, ls='-', color='0.75', axis='y') ax1.set_xlim(pltdt.date2num(dt.datetime(sd.year, 1, 1)), pltdt.date2num(dt.datetime(ed.year + 1, 1, 1))) ym = ax1.get_ylim() ymin = ym[0] ymax = ym[1] for yr in range(sd.year, ed.year + 1, 2): x1 = dt.datetime(yr, 1, 1) x2 = dt.datetime(yr + 1, 1, 1) ax1.fill([x1, x2, x2, x1], [ymin, ymin, ymax, ymax], color='0.9', zorder=1) #ax1.set_ylim(ymin,ymax) ax1.set_ylim(-4 * sc, 4 * sc) # # # Set Tick Font Size # matplotlib.rcParams.update({'font.size': 18}) ax1.xaxis.set_ticks_position('bottom') #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_xticklabels()] #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_yticklabels()] #xtitle='Time' #ax1.set_xlabel(xtitle, fontsize=fontsize) # label x axis ax1.set_ylabel(r"%s [%s]" % (species, units), fontsize=fontsize + 5) # label y-axis # # Title # plt.suptitle('%s [%s]\n%s, %s, %s ' % ( longsitestring, location, SDSInfo['dataset_project'], SDSInfo['lab_1_name'], SDSInfo['lab_1_country'], ), fontsize=fontsize + 5) # # Add info to plot # font0 = FontProperties(size=15, style='italic', weight='bold') txt = '' #'CTDAS-WRF-STILT\n $\copyright$ University of Groningen' clr = 'red' #fig.text(0.82,0.01,txt,ha='left',font_properties = font0, color=clr ) #now = dt.datetime.today() #str1 = 'CTDAS2012\n' + now.strftime('%d/%m/%y') #fig.text(0.93, 0.95, str1, fontsize=0.75 * fontsize, color='0.5') #str1 = 'data provided by %s' % SDSInfo['provider_1_name'] #fig.text(0.12, 0.16, str1, fontsize=0.8 * fontsize, color='0.75') try: img = urllib2.urlopen(SDSInfo['lab_logo']).read() except: logging.warning("No logo found for this program, continuing...") return fig ##im = Image.open(StringIO.StringIO(img)) ##height = im.size[1] ##width = im.size[0] # We need a float array between 0-1, rather than # a uint8 array between 0-255 ##im = np.array(im).astype(np.float)[::-1, :] / 255 # With newer (1.0) versions of matplotlib, you can # use the "zorder" kwarg to make the image overlay # the plot, rather than hide behind it... (e.g. zorder=10) ax3 = fig.add_axes([0.7, 0.16, 0.15, 0.15 * height / width]) ax3.axis('off') ##ax3.imshow(im, interpolation='None') return fig
def save_weekly_avg_state_data(dacycle, statevector): """ Function creates a NetCDF file with output for all parameters. It uses the flux data written by the :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`. :param dacycle: a :class:`~da.tools.initexit.CycleControl` object :param statevector: a :class:`~da.baseclasses.statevector.StateVector` :rtype: None """ dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_state_weekly')) # # Some help variables # dectime0 = date2num(datetime(2000, 1, 1)) dt = dacycle['cyclelength'] startdate = dacycle['time.start'] enddate = dacycle['time.end'] nlag = statevector.nlag area = globarea() vectorarea = statevector.grid2vector(griddata=area, method='sum') logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) # # Create or open NetCDF output file # saveas = os.path.join(dirname, 'statefluxes.nc') ncf = io.CT_CDF(saveas, 'write') # # Create dimensions and lat/lon grid # dimregs = ncf.add_dim('nparameters', statevector.nparams) dimmembers = ncf.add_dim('nmembers', statevector.nmembers) dimdate = ncf.add_date_dim() # # set title and tell GMT that we are using "pixel registration" # setattr(ncf, 'Title', 'CarbonTracker fluxes') setattr(ncf, 'node_offset', 1) # # skip dataset if already in file # ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 skip = ncf.has_date(ncfdate) if skip: logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) else: next = ncf.inq_unlimlen()[0] # # if not, process this cycle. Start by getting flux input data from CTDAS # filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H'))) file = io.ct_read(filename, 'read') bio = np.array(file.get_variable(dacycle.dasystem['background.co2.bio.flux'])) ocean = np.array(file.get_variable(dacycle.dasystem['background.co2.ocean.flux'])) fire = np.array(file.get_variable(dacycle.dasystem['background.co2.fires.flux'])) fossil = np.array(file.get_variable(dacycle.dasystem['background.co2.fossil.flux'])) #mapped_parameters = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1'])) if dacycle.dasystem['background.co2.biosam.flux'] in file.variables.keys(): sam = True biosam = np.array(file.get_variable(dacycle.dasystem['background.co2.biosam.flux'])) firesam = np.array(file.get_variable(dacycle.dasystem['background.co2.firesam.flux'])) else: sam = False file.close() if sam: bio = bio + biosam fire = fire + firesam next = ncf.inq_unlimlen()[0] vectorbio = statevector.grid2vector(griddata=bio * area, method='sum') vectorocn = statevector.grid2vector(griddata=ocean * area, method='sum') vectorfire = statevector.grid2vector(griddata=fire * area, method='sum') vectorfossil = statevector.grid2vector(griddata=fossil * area, method='sum') # Start adding datasets from here on, both prior and posterior datasets for bio and ocn for prior in [True, False]: # # Now fill the statevector with the prior values for this time step. Note that the prior value for this time step # occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date.. # if prior: qual_short = 'prior' for n in range(nlag, 0, -1): priordate = enddate - timedelta(dt.days * n) priordate = startdate + n*dt - timedelta(dt.days * n) savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d')) filename = os.path.join(savedir,'savestate_%s.nc' % priordate.strftime('%Y%m%d')) if os.path.exists(filename): statevector.read_from_file(filename, qual=qual_short) # Replace the mean statevector by all ones (assumed priors) statemean = np.ones((statevector.nparams,)) choicelag = n logging.debug('Read prior dataset from file %s, lag %d: ' % (filename, choicelag)) break else: qual_short = 'opt' savedir = dacycle['dir.output'] filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d')) statevector.read_from_file(filename) choicelag = 1 statemean = statevector.ensemble_members[choicelag - 1][0].param_values logging.debug('Read posterior dataset from file %s, lag %d: ' % (filename, choicelag)) # # if prior, do not multiply fluxes with parameters, otherwise do # data = statemean * vectorbio # units of mole region-1 s-1 savedict = ncf.standard_var(varname='bio_flux_%s' % qual_short) savedict['values'] = data savedict['dims'] = dimdate + dimregs savedict['count'] = next ncf.add_data(savedict) # # Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to # ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the # uncertainties would shift just because the mean flux had increased or decreased, which is not what we want. # # The implementation is done by multiplying the ensemble with the vectorbio only, and not with the statemean values # which are assumed 1.0 in the prior always. # members = statevector.ensemble_members[choicelag - 1] deviations = np.array([mem.param_values * vectorbio for mem in members]) deviations = deviations - deviations[0, :] savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short) savedict['values'] = deviations.tolist() savedict['dims'] = dimdate + dimmembers + dimregs savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance" savedict['units'] = "mol region-1 s-1" savedict['count'] = next ncf.add_data(savedict) savedict = ncf.standard_var('unknown') savedict['name'] = 'bio_flux_%s_std' % qual_short savedict['long_name'] = 'Biosphere flux standard deviation, %s' % qual_short savedict['values'] = deviations.std(axis=0) savedict['dims'] = dimdate + dimregs savedict['comment'] = "This is the standard deviation on each parameter" savedict['units'] = "mol region-1 s-1" savedict['count'] = next ncf.add_data(savedict) data = statemean * vectorocn # units of mole region-1 s-1 savedict = ncf.standard_var(varname='ocn_flux_%s' % qual_short) savedict['values'] = data savedict['dims'] = dimdate + dimregs savedict['count'] = next ncf.add_data(savedict) # # Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to # ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the # uncertainties would shift just because the mean flux had increased or decreased, which is not what we want. # # The implementation is done by multiplying the ensemble with the vectorocn only, and not with the statemean values # which are assumed 1.0 in the prior always. # deviations = np.array([mem.param_values * vectorocn for mem in members]) deviations = deviations - deviations[0, :] savedict = ncf.standard_var(varname='ocn_flux_%s_ensemble' % qual_short) savedict['values'] = deviations.tolist() savedict['dims'] = dimdate + dimmembers + dimregs savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance" savedict['units'] = "mol region-1 s-1" savedict['count'] = next ncf.add_data(savedict) savedict = ncf.standard_var('unknown') savedict['name'] = 'ocn_flux_%s_std' % qual_short savedict['long_name'] = 'Ocean flux standard deviation, %s' % qual_short savedict['values'] = deviations.std(axis=0) savedict['dims'] = dimdate + dimregs savedict['comment'] = "This is the standard deviation on each parameter" savedict['units'] = "mol region-1 s-1" savedict['count'] = next ncf.add_data(savedict) data = vectorfire savedict = ncf.standard_var(varname='fire_flux_imp') savedict['values'] = data savedict['dims'] = dimdate + dimregs savedict['count'] = next ncf.add_data(savedict) data = vectorfossil savedict = ncf.standard_var(varname='fossil_flux_imp') savedict['values'] = data savedict['dims'] = dimdate + dimregs savedict['count'] = next ncf.add_data(savedict) savedict = ncf.standard_var(varname='date') savedict['values'] = ncfdate savedict['dims'] = dimdate savedict['count'] = next ncf.add_data(savedict) sys.stdout.write('.') sys.stdout.flush() # # Done, close the new NetCDF file # ncf.close() # # Return the full name of the NetCDF file so it can be processed by the next routine # logging.info("Vector weekly average fluxes now written") return saveas
import os import sys sys.path.append('../../') rootdir = os.getcwd().split('da/')[0] analysisdir = os.path.join(rootdir, 'da/analysis') from string import join, split from numpy import array, identity, zeros, arange, dot import da.tools.io4 as io # Get masks of different region definitions matrix_file = os.path.join(analysisdir, 'copied_regions.nc') cdf_temp = io.CT_CDF(matrix_file, 'read') transcommask = cdf_temp.get_variable('transcom_regions') if transcommask.max() < 23: if 'transcom_regions_original' in cdf_temp.variables: transcommask = cdf_temp.get_variable('transcom_regions_original') olson240mask = cdf_temp.get_variable('regions') olsonmask = cdf_temp.get_variable('land_ecosystems') #oifmask = cdf_temp.get_variable('ocean_regions') dummy = cdf_temp.close() matrix_file = os.path.join(analysisdir, 'copied_regions_extended.nc') cdf_temp = io.CT_CDF(matrix_file, 'read') olson_ext_mask = cdf_temp.get_variable('regions') dummy = cdf_temp.close() # Names and short names of TransCom regions
def save_weekly_avg_1x1_data(dacycle, statevector): """ Function creates a NetCDF file with output on 1x1 degree grid. It uses the flux data written by the :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`. :param dacycle: a :class:`~da.tools.initexit.CycleControl` object :param statevector: a :class:`~da.baseclasses.statevector.StateVector` :rtype: None """ # dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly')) # # Some help variables # dectime0 = date2num(datetime(2000, 1, 1)) dt = dacycle['cyclelength'] startdate = dacycle['time.start'] enddate = dacycle['time.end'] nlag = statevector.nlag logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) # # Create or open NetCDF output file # saveas = os.path.join(dirname, 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d')) ncf = io.CT_CDF(saveas, 'write') # # Create dimensions and lat/lon grid # dimgrid = ncf.add_latlon_dim() dimensemble = ncf.add_dim('members', statevector.nmembers) dimdate = ncf.add_date_dim() # # set title and tell GMT that we are using "pixel registration" # setattr(ncf, 'Title', 'CarbonTracker fluxes') setattr(ncf, 'node_offset', 1) # # skip dataset if already in file # ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 skip = ncf.has_date(ncfdate) if skip: logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) else: # # if not, process this cycle. Start by getting flux input data from CTDAS # filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H'))) file = io.ct_read(filename, 'read') bio = np.array(file.get_variable(dacycle.dasystem['background.co2.bio.flux'])) ocean = np.array(file.get_variable(dacycle.dasystem['background.co2.ocean.flux'])) fire = np.array(file.get_variable(dacycle.dasystem['background.co2.fires.flux'])) fossil = np.array(file.get_variable(dacycle.dasystem['background.co2.fossil.flux'])) #mapped_parameters = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1'])) if dacycle.dasystem['background.co2.biosam.flux'] in file.variables.keys(): sam = True biosam = np.array(file.get_variable(dacycle.dasystem['background.co2.biosam.flux'])) firesam = np.array(file.get_variable(dacycle.dasystem['background.co2.firesam.flux'])) else: sam = False file.close() if sam: bio = bio + biosam fire = fire + firesam next = ncf.inq_unlimlen()[0] # Start adding datasets from here on, both prior and posterior datasets for bio and ocn for prior in [True, False]: # # Now fill the statevector with the prior values for this time step. Note that the prior value for this time step # occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date.. # if prior: qual_short = 'prior' for n in range(nlag, 0, -1): priordate = startdate + n*dt - timedelta(dt.days * n) savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d')) filename = os.path.join(savedir, 'savestate_%s.nc' % priordate.strftime('%Y%m%d')) if os.path.exists(filename): statevector.read_from_file(filename, qual=qual_short) gridmean, gridensemble = statevector.state_to_grid(lag=n) # Replace the mean statevector by all ones (assumed priors) gridmean = statevector.vector2grid(vectordata=np.ones(statevector.nparams,)) logging.debug('Read prior dataset from file %s, sds %d: ' % (filename, n)) break else: qual_short = 'opt' savedir = dacycle['dir.output'] filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d')) statevector.read_from_file(filename, qual=qual_short) gridmean, gridensemble = statevector.state_to_grid(lag=1) logging.debug('Read posterior dataset from file %s, sds %d: ' % (filename, 1)) # # if prior, do not multiply fluxes with parameters, otherwise do # print gridensemble.shape, bio.shape, gridmean.shape biomapped = bio * gridmean oceanmapped = ocean * gridmean biovarmapped = bio * gridensemble oceanvarmapped = ocean * gridensemble # # # For each dataset, get the standard definitions from the module mysettings, add values, dimensions, and unlimited count, then write # savedict = ncf.standard_var(varname='bio_flux_' + qual_short) savedict['values'] = biomapped.tolist() savedict['dims'] = dimdate + dimgrid savedict['count'] = next ncf.add_data(savedict) # savedict = ncf.standard_var(varname='ocn_flux_' + qual_short) savedict['values'] = oceanmapped.tolist() savedict['dims'] = dimdate + dimgrid savedict['count'] = next ncf.add_data(savedict) print biovarmapped.shape savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short) savedict['values'] = biovarmapped.tolist() savedict['dims'] = dimdate + dimensemble + dimgrid savedict['count'] = next ncf.add_data(savedict) # savedict = ncf.standard_var(varname='ocn_flux_%s_ensemble' % qual_short) savedict['values'] = oceanvarmapped.tolist() savedict['dims'] = dimdate + dimensemble + dimgrid savedict['count'] = next ncf.add_data(savedict) # End prior/posterior block savedict = ncf.standard_var(varname='fire_flux_imp') savedict['values'] = fire.tolist() savedict['dims'] = dimdate + dimgrid savedict['count'] = next ncf.add_data(savedict) # savedict = ncf.standard_var(varname='fossil_flux_imp') savedict['values'] = fossil.tolist() savedict['dims'] = dimdate + dimgrid savedict['count'] = next ncf.add_data(savedict) area = globarea() savedict = ncf.standard_var(varname='cell_area') savedict['values'] = area.tolist() savedict['dims'] = dimgrid ncf.add_data(savedict) # savedict = ncf.standard_var(varname='date') savedict['values'] = date2num(startdate) - dectime0 + dt.days / 2.0 savedict['dims'] = dimdate savedict['count'] = next ncf.add_data(savedict) sys.stdout.write('.') sys.stdout.flush() # # Done, close the new NetCDF file # ncf.close() # # Return the full name of the NetCDF file so it can be processed by the next routine # logging.info("Gridded weekly average fluxes now written") return saveas
def write_sample_auxiliary(self, auxoutputfile): """ Write selected information contained in the Observations object to a file. """ f = io.CT_CDF(auxoutputfile, method='create') logging.debug('Creating new auxiliary sample output file for postprocessing (%s)' % auxoutputfile) dimid = f.add_dim('obs', len(self.datalist)) dim200char = f.add_dim('string_of200chars', 200) dim10char = f.add_dim('string_of10chars', 10) dimcalcomp = f.add_dim('calendar_components', 6) if len(self.datalist) == 0: f.close() #return outfile for key, value in self.site_move.iteritems(): msg = "Site is moved by %3.2f degrees latitude and %3.2f degrees longitude" % value f.add_attribute(key, msg) data = self.getvalues('id') savedict = io.std_savedict.copy() savedict['name'] = "obs_num" savedict['dtype'] = "int" savedict['long_name'] = "Unique_Dataset_observation_index_number" savedict['units'] = "" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." f.add_data(savedict) data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate')] savedict = io.std_savedict.copy() savedict['dtype'] = "int" savedict['name'] = "date_components" savedict['units'] = "integer components of UTC date/time" savedict['dims'] = dimid + dimcalcomp savedict['values'] = data savedict['missing_value'] = -9 savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." savedict['order'] = "year, month, day, hour, minute, second" f.add_data(savedict) data = self.getvalues('obs') savedict = io.std_savedict.copy() savedict['name'] = "observed" savedict['long_name'] = "observedvalues" savedict['units'] = "mol mol-1" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['comment'] = 'Observations used in optimization' f.add_data(savedict) data = self.getvalues('mdm') savedict = io.std_savedict.copy() savedict['name'] = "modeldatamismatch" savedict['long_name'] = "modeldatamismatch" savedict['units'] = "[mol mol-1]" savedict['dims'] = dimid savedict['values'] = data.tolist() savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch' f.add_data(savedict) data = self.getvalues('simulated') dimmembers = f.add_dim('members', data.shape[1]) savedict = io.std_savedict.copy() savedict['name'] = "modelsamples" savedict['long_name'] = "modelsamples for all ensemble members" savedict['units'] = "mol mol-1" savedict['dims'] = dimid + dimmembers savedict['values'] = data.tolist() savedict['comment'] = 'simulated mole fractions based on optimized state vector' f.add_data(savedict) data = self.getvalues('fromfile') savedict = io.std_savedict.copy() savedict['name'] = "inputfilename" savedict['long_name'] = "name of file where original obs data was taken from" savedict['dtype'] = "char" savedict['dims'] = dimid + dim200char savedict['values'] = data savedict['missing_value'] = '!' f.add_data(savedict) f.close() logging.debug("Successfully wrote data to auxiliary sample output file (%s)" % auxoutputfile)
def write_diagnostics(self, filename, type): """ Open a NetCDF file and write diagnostic output from optimization process: - calculated residuals - model-data mismatches - HPH^T - prior ensemble of samples - posterior ensemble of samples - prior ensemble of fluxes - posterior ensemble of fluxes The type designation refers to the writing of prior or posterior data and is used in naming the variables" """ # Open or create file if type == 'prior': f = io.CT_CDF(filename, method='create') logging.debug('Creating new diagnostics file for optimizer (%s)' % filename) elif type == 'optimized': f = io.CT_CDF(filename, method='write') logging.debug('Opening existing diagnostics file for optimizer (%s)' % filename) # Add dimensions dimparams = f.add_params_dim(self.nparams) dimmembers = f.add_members_dim(self.nmembers) dimlag = f.add_lag_dim(self.nlag, unlimited=False) dimobs = f.add_obs_dim(self.nobs) dimstate = f.add_dim('nstate', self.nparams * self.nlag) dim200char = f.add_dim('string_of200chars', 200) # Add data, first the ones that are written both before and after the optimization savedict = io.std_savedict.copy() savedict['name'] = "statevectormean_%s" % type savedict['long_name'] = "full_statevector_mean_%s" % type savedict['units'] = "unitless" savedict['dims'] = dimstate savedict['values'] = self.x.tolist() savedict['comment'] = 'Full %s state vector mean ' % type f.add_data(savedict) savedict = io.std_savedict.copy() savedict['name'] = "statevectordeviations_%s" % type savedict['long_name'] = "full_statevector_deviations_%s" % type savedict['units'] = "unitless" savedict['dims'] = dimstate + dimmembers savedict['values'] = self.X_prime.tolist() savedict['comment'] = 'Full state vector %s deviations as resulting from the optimizer' % type f.add_data(savedict) savedict = io.std_savedict.copy() savedict['name'] = "modelsamplesmean_%s" % type savedict['long_name'] = "modelsamplesforecastmean_%s" % type savedict['units'] = "mol mol-1" savedict['dims'] = dimobs savedict['values'] = self.Hx.tolist() savedict['comment'] = '%s mean mole fractions based on %s state vector' % (type, type) f.add_data(savedict) savedict = io.std_savedict.copy() savedict['name'] = "modelsamplesdeviations_%s" % type savedict['long_name'] = "modelsamplesforecastdeviations_%s" % type savedict['units'] = "mol mol-1" savedict['dims'] = dimobs + dimmembers savedict['values'] = self.HX_prime.tolist() savedict['comment'] = '%s mole fraction deviations based on %s state vector' % (type, type) f.add_data(savedict) # Continue with prior only data if type == 'prior': savedict = io.std_savedict.copy() savedict['name'] = "sitecode" savedict['long_name'] = "site code propagated from observation file" savedict['dtype'] = "char" savedict['dims'] = dimobs + dim200char savedict['values'] = self.sitecode savedict['missing_value'] = '!' f.add_data(savedict) savedict = io.std_savedict.copy() savedict['name'] = "observed" savedict['long_name'] = "observedvalues" savedict['units'] = "mol mol-1" savedict['dims'] = dimobs savedict['values'] = self.obs.tolist() savedict['comment'] = 'Observations used in optimization' f.add_data(savedict) savedict = io.std_savedict.copy() savedict['name'] = "obspack_num" savedict['dtype'] = "int64" savedict['long_name'] = "Unique_ObsPack_observation_number" savedict['units'] = "" savedict['dims'] = dimobs savedict['values'] = self.obs_ids.tolist() savedict['comment'] = 'Unique observation number across the entire ObsPack distribution' f.add_data(savedict) savedict = io.std_savedict.copy() savedict['name'] = "modeldatamismatchvariance" savedict['long_name'] = "modeldatamismatch variance" savedict['units'] = "[mol mol-1]^2" if self.algorithm == 'Serial': savedict['dims'] = dimobs else: savedict['dims'] = dimobs + dimobs savedict['values'] = self.R.tolist() savedict['comment'] = 'Variance of mole fractions resulting from model-data mismatch' f.add_data(savedict) # Continue with posterior only data elif type == 'optimized': savedict = io.std_savedict.copy() savedict['name'] = "totalmolefractionvariance" savedict['long_name'] = "totalmolefractionvariance" savedict['units'] = "[mol mol-1]^2" if self.algorithm == 'Serial': savedict['dims'] = dimobs else: savedict['dims'] = dimobs + dimobs savedict['values'] = self.HPHR.tolist() savedict['comment'] = 'Variance of mole fractions resulting from prior state and model-data mismatch' f.add_data(savedict) savedict = io.std_savedict.copy() savedict['name'] = "flag" savedict['long_name'] = "flag_for_obs_model" savedict['units'] = "None" savedict['dims'] = dimobs savedict['values'] = self.flags.tolist() savedict['comment'] = 'Flag (0/1/2/99) for observation value, 0 means okay, 1 means QC error, 2 means rejected, 99 means not sampled' f.add_data(savedict) savedict = io.std_savedict.copy() savedict['name'] = "kalmangainmatrix" savedict['long_name'] = "kalmangainmatrix" savedict['units'] = "unitless molefraction-1" savedict['dims'] = dimstate + dimobs savedict['values'] = self.KG.tolist() savedict['comment'] = 'Kalman gain matrix of all obs and state vector elements' f.add_data(savedict) f.close() logging.debug('Diagnostics file closed')