Beispiel #1
0
def get_model_pressure(lx, mod_type=5, ps=None, psfile=None, cater_ps='PS',
                       tracer_ps=None, tau_ps=None):
    
    """
    ps :  ---in ---:  array of (nlon, nlat) surface pressure
    """

    if (psfile<>None):
        # read from geos pressure file
        if ('.nc' in psfile): # the netcdf file
            varnames=['longitude', 'latitude', 'p']
            grd_lon, grd_lat, ps=ofb.ncf_read(psfile, varnames)
            ps=ps[:,:,0]
            ps=squeeze(ps)
            
        else:
            # grd_lon, grd_lat=get_model_grid()
            
            catergory_out,tracer_out,unit_out,\
                                               tau0_out,out_array= \
                                               get_model_value(psfile, catergory=cater_ps, tracer=tracer_ps, tau0=tau_ps)
            print 'unit for ps', unit_out.strip()
            
            ps=squeeze(out_array)
            print 'shape of ps, min(ps), max(ps)', shape(ps), min(ps.flat), max(ps.flat)
            
    
    
    levels=arange(lx)
    levels=levels+1
    
    if (mod_type==4):  # GEOS-4
        if (lx==30):
            use_reduced=1
        else:                 
            use_reduced=0
    else:                    # GEOS-5
        if (lx<50):
            use_reduced=1
        else:
            use_reduced=0
            # 
    pres = pm.get_pres_mod_2d(ps,levels,mod_type,use_reduced)
    print shape(pres), shape(ps)
    
    
    return pres
Beispiel #2
0
import field_read as flr
import orbit_read as orr
import ak_read as akr
from pylab import *
from numpy import *
import time_module as tm
import pres_mod_py as pm
import gp_axis as gax
import obs_operator as obo
import netCDF_gen as nf
import numpy.random as rnd
import oco_units as ounit
import oco_feedback as ofb
import oco_units as ocunit
varnames = ['x', 'x0']
resflnm = './std_res/oco_assim_res.2003D008.nc'
x, x0 = ofb.ncf_read(resflnm, varnames)

print_x = ocunit.kg_s_to_GtC_Y * x[0:8, 0]
print ' after assimilation',
print array2string(print_x, precision=3)

print_x = ocunit.kg_s_to_GtC_Y * x0[0:8, 0]
print ' after assimilation',
print array2string(print_x, precision=3)
def data_collect(start_step, nst, geos_datapath, obs_datapath, viewmode_list, \
                 err_scale=1.0, do_debug=False, do_dump=True, dumpflnm=None):
    """ doys  in date to be read in read in
    
    """

    fen = open(geos_datapath + '/' + 'ens_pos.dat', 'r')
    line = fen.readline()
    line = fen.readline()
    terms = line.split()
    step = int(terms[1])
    nstep = int(terms[3])
    line = fen.readline()
    lines = fen.readlines()
    fen.close()
    em_st = list()
    em_end = list()
    yyyy_st = list()
    yyyy_end = list()
    doy_st = list()
    doy_end = list()
    co2flnm = list()
    sel_x_idx = range(1, 4)
    for line in lines:
        line = line.strip()
        if (len(line) < 1):
            break

        terms = line.split()
        print terms

        em_st.append(int(terms[0]))
        em_end.append(int(terms[1]))
        yyyy_st.append(int(terms[2]))
        yyyy_end.append(int(terms[3]))
        doy_st.append(int(terms[4]))
        doy_end.append(int(terms[5]))
        co2flnm.append(terms[6])

    yyyy = yyyy_st[start_step]
    doy0 = doy_st[start_step]
    doy1 = doy0 + nst * step - 1  # 096 0406
    # set up the start days
    st_days = list()
    for iday in range(doy0, doy1 + 1, step):
        sday = r'%4.4dD%3.3d' % (yyyy, iday)
        st_days.append(sday)
    print st_days
    do_debug = False
    iplot = 0

    state_v = stv_c.state_vector(st_days,
                                 do_debug=False,
                                 datapath=geos_datapath)
    nx, ne = state_v.nx, state_v.ne
    print 'nx, ne', nx, ne
    nreg = (nx) / size(st_days)
    x = array(state_v.stv)
    data_count = list()
    istep = 0
    doys = arange(doy0, doy1)

    iday = 0
    nusd_obs = 0
    fclim = open('clim_co2.dat', 'r')
    lines = fclim.readlines()
    fclim.close()
    aprior = list()
    apr_pres = list()
    for iline in lines:
        terms = iline.split()
        aprior.append(float(terms[1]))
        apr_pres.append(float(terms[0]))
    aprior = array(aprior)
    apr_pres = array(apr_pres)
    apr_pres = log10(apr_pres)

    # starting to collect the observations and y
    istep = 0
    # figure(1)
    # show()
    # figure(1)
    sel_doys = [-1, -8, -15]
    day_cnt = 0

    for doy in doys:
        yyyy, mm, dd = tm.doy_to_time_array(doy, yyyy)

        print 'year mm dd', yyyy, mm, dd
        y = list()
        iend = 0
        full_doy = r'%4.4dD%3.3d' % (yyyy, doy)
        # check whether it is necessary to include surface flux (stv) during cerntain days
        for st_dd in st_days[:]:
            if (full_doy < st_dd):
                print full_doy, st_dd
                break
            else:
                iend = iend + 1

        real_nx = iend * nreg
        real_ne = real_nx + 1
        # generate x

        # the ensemble members need to be included
        sel_eid = list()
        iend = 0
        print em_st
        while True:
            print em_st[iend], real_ne
            if (real_ne < em_st[iend]):
                break
            else:
                sel_eid.append(iend)
                if (real_ne <= em_end[iend]):
                    break
                else:
                    iend = iend + 1

        # generate model obs
        sdate = r'%4.4dD%3.3d' % (yyyy, doy)
        ncflnm = obs_datapath + "oco" + "." + sdate + ".nc"
        if (doy == doys[0]):
            sdate0 = sdate
            dumpext = r"%2.2d" % nst
            dumpext = "." + sdate0 + "_N" + dumpext
        viewmode = viewmode_list[doy - 1]
        ncflnm = obs_datapath + "/oco_" + viewmode + "." + sdate + ".nc"
        print ncflnm
        std_od, std_cflag = ofb.ncf_read(ncflnm, ['od', 'cloud'])
        iy = 0
        print 'sel_eid', sel_eid

        for eid in sel_eid:
            est = em_st[eid]
            eend = em_end[eid]
            print 'sttt days', st_days[eid]
            # if (day_cnt==0):
            # the new code needs the right starting time for ctm file

            ctm_date = st_days[eid]
            ctm_yyyy, ctm_doy = int(ctm_date[0:4]), int(ctm_date[5:8])

            ctm_yyyy, ctm_mm, ctm_dd = tm.doy_to_time_array(ctm_doy, ctm_yyyy)
            ctm_date0 = r'%4.4d%2.2d%2.2d' % (ctm_yyyy, ctm_mm, ctm_dd)

            # r'%4.4d%2.2d%2.2d' % (yyyy, mm, dd)

            print '=' * 20 + 'read in data and calculate xco2' + '=' * 20
            obs=obo.obs_operator(yyyy, mm, dd, est, eend, \
                                 aprior=aprior, apr_pres=apr_pres, \
                                 viewmode=viewmode,\
                                 datapath=geos_datapath,
                                 ctm_date0=ctm_date0)
            ytop = min([eend, real_ne])
            for em in range(est, ytop + 1):
                if (em == est):
                    if (istep > 300):
                        print 'em', em, est
                    obs.get_obs_prof(em, std_od=std_od, std_cf=std_cflag)
                else:
                    obs.get_obs_prof(em, idx=used_idx, do_update=False)

                if (iy == 0):
                    # em_id is different from em. em_id is the 'real id' in the whole ensemble set
                    em_id = obs.em_id[em - 1]
                    print 'em_id', em_id, em - 1
                    xgp0 = obs.obs_xgp[em_id]
                    xgp = obs.obs_xgp[em_id]
                    print size(xgp), size(std_od), size(std_cflag)

                    obs_err = obs.obs_err
                    obs_err = array(obs_err)
                    obs_err = obs_err
                    # print 'shape obs+err', shape(obs_err)
                    rnd_obs_err = array(obs_err)

                    for iobs in range(size(obs_err)):
                        err_val = obs_err[iobs]
                        rnd_err = rnd.normal(scale=err_val)
                        rnd_obs_err[iobs] = rnd_err

                    # err_scale=1.0

                    if (do_debug):
                        subplot(2, 1, 1)
                        plot(obs_err)
                        plot(rnd_obs_err)
                        subplot(2, 1, 2)
                        hist(rnd_obs_err)
                        hist(obs_err)
                        show()

                    rnd_obs_err = err_scale * rnd_obs_err
                    cflag = obs.obs_cflag
                    otime = obs.obs_time
                    olat = obs.obs_lat
                    olon = obs.obs_lon
                    od = obs.obs_od
                    lwi = obs.obs_lwi
                    lwi = lwi.astype(int)
                    #                used_idx=where(logical_and(cflag==0, od<=0.3, lwi<>1))
                    osza = obs.obs_sza
                    sel1 = logical_and(cflag == 0, od <= 0.3)
                    sel2 = logical_and(lwi <> 1, lwi <> 2)
                    # used_idx=where(sel1)
                    # used_idx=where(logical_and(sel1, sel2))
                    # used_idx=where(cflag==0)
                    used_idx = where(logical_and(obs_err < 4.0, sel1))

                    used_idx = squeeze(used_idx)
                    print 'used_idx', len(used_idx)
                    xgp = xgp[used_idx]
                    xgp0 = xgp0[used_idx]

                    obs_err = 1.0e-6 * obs_err[used_idx]
                    rnd_obs_err = 1.0e-6 * rnd_obs_err[used_idx]
                    imax = argmax(rnd_obs_err)
                    print 'max(obs err), max(rnd_obs_err)', 1.0e6 * obs_err[
                        imax], 1.0e6 * rnd_obs_err[imax]
                    olat = olat[used_idx]
                    olon = olon[used_idx]
                    lwi = lwi[used_idx]
                    otime = otime[used_idx]
                    od = od[used_idx]

                    # r=array(varData)
                    # r=r*r
                else:  # just others for ensemble
                    em_id = obs.em_id[em - 1]
                    # print 'em_id', em_id, em-1
                    xgp = obs.obs_xgp[em_id]
                    if (em == est):
                        print em, est
                        print len(used_idx)
                        xgp = xgp[used_idx]

                    if (istep > 300):
                        print 'type xgp', type(xgp)
                        print 'len-xgp', len(xgp)

                if (em == 2):
                    print 'xgp', xgp[0:6]
                y.append(array(xgp))
                if (do_debug):
                    print ii, ne, shape(xgp)
                if (do_debug and ii == 4):
                    # figure(1)
                    # plot(xgp[0:300],'r')
                    # plot(xgp0[0:300], 'b')
                    z = xgp - xgp0
                    print 'max dev', max(z)

                iy = iy + 1
        for same_y in range(real_ne, ne):
            y.append(array(xgp0))  # filled with the same value

            # show()
        y = array(y)
        y = transpose(y)

        if (istep > 300):
            print type(y)
            print len(y)
            ix = y[0]
            print type(ix)

        # read in the data

        obs_f = NetCDFFile(ncflnm)
        yobs = obs_f.variables['xco2']
        yobs_err = obs_f.variables['err']
        yobs = array(yobs)
        # yobs_err=array(yobs_err)
        yobs_err = array(yobs_err)
        # print 'shape yobs', shape(yobs)
        yobs = squeeze(yobs)
        yobs_err = squeeze(yobs_err)

        obs_f.close()

        yobs = yobs[used_idx]
        yobs_err = yobs_err[used_idx]

        print 'shape y & yobs', shape(y), shape(yobs)

        if (istep == 0):
            all_y = array(y)
            all_yobs = array(yobs)
            all_yobs_err = array(yobs_err)
            all_rnd_err = array(rnd_obs_err)
            all_lat = array(olat)
            all_lon = array(olon)
            all_lwi = array(lwi)
            all_time = array(otime)
            all_od = array(od)

        else:
            all_y = concatenate((all_y, y))
            all_yobs = concatenate((all_yobs, yobs))
            all_yobs_err = concatenate((all_yobs_err, yobs_err))
            all_rnd_err = concatenate((all_rnd_err, rnd_obs_err))
            all_lat = concatenate((all_lat, olat))
            all_lon = concatenate((all_lon, olon))
            all_lwi = concatenate((all_lwi, lwi))
            all_time = concatenate((all_time, otime))
            all_od = concatenate((all_od, od))

        print 'istep & shape ', istep, shape(all_y), shape(all_yobs), shape(
            all_yobs_err), shape(all_rnd_err)

        istep = istep + 1
        data_count.append(size(yobs))
        day_cnt = day_cnt + 1
        if (day_cnt == gcdf.temporal_resolution):
            day_cnt = 0

    data_count = array(data_count)
    factor = 1.0e6
    all_y = factor * all_y
    all_yobs = factor * all_yobs
    all_rnd_err = factor * all_rnd_err
    all_yobs_err = factor * all_yobs_err

    if (do_dump):

        if (dumpflnm == None):
            ncdump = geos_datapath + '/' + 'obs' + dumpext + ".nc"
        else:
            ncdump = geos_datapath + '/' + dumpflnm + "_" + dumpext + ".nc"
        xnx = arange(nx)
        xne = arange(ne)
        xny = arange(size(all_y[:, 0]))
        dimTypes = ['i', 'i', 'i', 'i']
        dimVars = [xnx, xne, xny, doys]
        dimNames = ['nx', 'ne', 'ny', 'doys']
        x_info = ofb.geos_varinfo('x', 'f', ['nx', 'ne'], x)
        y_info = ofb.geos_varinfo('y', 'f', ['ny', 'ne'], all_y)
        yobs_info = ofb.geos_varinfo('obs', 'f', ['ny'], all_yobs)
        yobs_err_info = ofb.geos_varinfo('err', 'f', ['ny'], all_yobs_err)
        rnd_err_info = ofb.geos_varinfo('rnd_err', 'f', ['ny'], all_rnd_err)
        count_info = ofb.geos_varinfo('daily_count', 'i', ['doys'], data_count)
        lat_info = ofb.geos_varinfo('lat', 'f', ['ny'], all_lat)
        lon_info = ofb.geos_varinfo('lon', 'f', ['ny'], all_lon)
        time_info = ofb.geos_varinfo('time', 'f', ['ny'], all_time)
        od_info = ofb.geos_varinfo('od', 'f', ['ny'], all_od)
        lwi_info = ofb.geos_varinfo('lwi', 'i', ['ny'], all_lwi)


        ofb.ncf_write_by_varinfo(ncdump, dimNames, dimTypes, dimVars, \
                                 [x_info, y_info, yobs_info, yobs_err_info, rnd_err_info, lat_info, lon_info, time_info, od_info, lwi_info, count_info])


    return x, all_y, all_yobs, all_yobs_err, all_rnd_err, data_count, doys, \
           all_lat, all_lon, all_time, all_lwi, all_od
def gen_transcom_coef(flnm, flux_name,\
                      yyyy,\
                      inv_step,\
                      inv_path=gcdf.inv_path,\
                      modelname='GEOS4', \
                      category='CO2-SRCE', ntracer=1, \
                      do_debug=True, \
                      test_month=0):

    print flnm
    ext1 = bpy.get_name_ext_2d()
    ext1 = ext1.strip()
    model_res = bpy.get_res_ext()
    model_res = model_res.strip()

    # model information

    gc_lon = cmgrd.get_model_lon(model_res=model_res)
    ix = size(gc_lon)
    gc_lat = cmgrd.get_model_lat(model_res=model_res)
    iy = size(gc_lat)

    reg_m = rco2_f.read_region_map()
    reg_m = squeeze(reg_m)

    nreg = max(reg_m.flat) + 1
    coef_m = zeros(shape(reg_m), float)
    sstep = r'%2.2d' % (inv_step)

    resflnm = inv_path + "std_oco_assim_res" + "." + sstep + ".nc"
    print resflnm

    varnames = ['whole_x', 'whole_x0', 'dx', 'sum_xtm']
    x, x0, dx, xtm = ofb.ncf_read(resflnm, varnames)
    coef_a = x - x0

    if (test_month > 0):
        stest_month = r'%2.2d' % (test_month - 1)
        resflnm = inv_path + "/" + "std_oco_assim_res" + "." + stest_month + ".nc"
        print resflnm
        x_t, x0_t, dx_t, xtm_t = ofb.ncf_read(resflnm, varnames)
        coef_t = x_t - x0_t

    nx = nreg - 1
    nst = (yyyy - 2004) * 12 * nx

    for imm in range(1, 13):

        # read biomass_burning  in regular grid box
        # covert to ktC /y

        coef_cut = coef_a[nst:nst + nx]
        coef_m[:, :] = 0
        if (imm <= test_month):
            coef_cut = coef_t[nst:nst + nx]

        for ireg in range(1, nreg):
            sel_cells = where(reg_m == ireg)
            coef_m[sel_cells] = coef_cut[ireg - 1]

        tau0 = tm.get_tau(yyyy, imm, 1)
        tau0 = tau0 / 3600.0

        if (imm < 12):
            tau1 = tm.get_tau(yyyy, imm + 1, 1)
        else:
            tau1 = tm.get_tau(yyyy + 1, 1, 1)

        tau1 = tau1 / 3600.0
        sdate = r'%4.4d%2.2d' % (yyyy, imm)

        full_flnm = flux_name + "." + sdate
        funit = wfbp.open_flux_bpch2_file(full_flnm, title='flux')
        unit = 'unitless'


        wfbp.write_flux_record(funit, coef_m, \
                               gc_lon, gc_lat,\
                               tau0, tau1,\
                               ntracer, \
                               modelname,\
                               category,\
                               unit, \
                               do_debug=False)

        wfbp.close_flux_bpch2_file(funit)
        nst = nst + nx
Beispiel #5
0
    def __init__(self, cur_yyyy, doy_st, doy_end, rerun_st=1, rerun_end=2):
        self.rerun_est = rerun_st
        self.rerun_eend = rerun_end
        self.rerun_datapath = gcdf.data_path

        self.cur_yyyy = cur_yyyy

        viewmode_nadir = ['nadir'] * 16
        viewmode_glint = ['glint'] * 16

        if (gcdf.view_mode == 'nadir'):
            viewmode_list = viewmode_nadir + viewmode_nadir

        elif (gcdf.view_mode == 'glint'):
            viewmode_list = viewmode_glint + viewmode_glint

        else:
            viewmode_list = viewmode_nadir + viewmode_glint

        viewmode_list = viewmode_list * 12
        viewmode_code = list()

        for idoy in range(doy_st, doy_end + 1):
            yyyy, mm, dd = tm.doy_to_time_array(idoy, self.cur_yyyy)
            sdate = r'%4.4dD%3.3d' % (yyyy, idoy)
            viewmode = viewmode_list[idoy - 1]
            ncflnm = "./" + gcdf.view_type + "_" + viewmode + "." + sdate + ".nc"
            org_ncflnm = gcdf.obs_path + "/" + gcdf.view_type + "_" + viewmode + "." + sdate + ".nc"
            print ncflnm

            varnames=['time', 'obs_lvl', 'lon', 'lat', 'cloud', 'nclear', 'xco2', 'xco2_ap', \
                      'err', 'rnd_err', \
                      'od', 'lwi', 'sza', 'obs_pres', 'obs_ak', 'obs_apr']


            time, obs_lvl, lon, lat, cloud, nclear, xco2, xco2_ap, \
                  err, rnd_err, \
                  od, lwi, sza, obs_pres, obs_ak, obs_apr=\
                  ofb.ncf_read(org_ncflnm, varnames)

            nobs = size(lat)

            self.olat = array(lat)
            self.olon = array(lon)
            self.otime = array(time)
            self.obs_ak = array(obs_ak)
            self.obs_apr = array(obs_apr)
            self.obs_xgp0 = array(xco2_ap)
            self.obs_pres = array(obs_pres)

            hm0 = self.read_new_mean_y(yyyy, mm, dd)

            hm0 = squeeze(hm0)
            if (max(hm0) > 1.0):
                hm0 = 1.0e-6 * hm0

            dimnames = ['time', 'obs_lvl']
            dimtypes = ['f', 'i']
            dimvars = [time, obs_lvl]

            lon_info = ofb.geos_varinfo('lon', 'f', ['time'], lon)
            lat_info = ofb.geos_varinfo('lat', 'f', ['time'], lat)
            cloud_info = ofb.geos_varinfo('cloud', 'f', ['time'], cloud)
            nclear_info = ofb.geos_varinfo('nclear', 'f', ['time'], nclear)
            xco2_info = ofb.geos_varinfo('xco2', 'f', ['time'], hm0)
            xco2_ap_info = ofb.geos_varinfo('xco2_ap', 'f', ['time'], xco2_ap)
            err_info = ofb.geos_varinfo('err', 'f', ['time'], err)
            rnd_err_info = ofb.geos_varinfo('rnd_err', 'f', ['time'], rnd_err)
            od_info = ofb.geos_varinfo('od', 'f', ['time'], od)
            lwi_info = ofb.geos_varinfo('lwi', 'f', ['time'], lwi)
            sza_info = ofb.geos_varinfo('sza', 'f', ['time'], sza)

            obs_pres_info = ofb.geos_varinfo('obs_pres', 'f',
                                             ['time', 'obs_lvl'], obs_pres)
            obs_ak_info = ofb.geos_varinfo('obs_ak', 'f', ['time', 'obs_lvl'],
                                           obs_ak)
            obs_apr_info = ofb.geos_varinfo('obs_apr', 'f',
                                            ['time', 'obs_lvl'], obs_apr)


            ofb.ncf_write_by_varinfo(ncflnm, dimnames, dimtypes, dimvars, [lon_info, lat_info, cloud_info, \
                                                                           nclear_info, xco2_info, xco2_ap_info, \
                                                                           err_info, rnd_err_info, \
                                                                           od_info, lwi_info, sza_info, obs_pres_info, obs_ak_info, obs_apr_info])
def get_daily_avg(yyyy, doy_list, lvl_st, lvl_end, doy_end=328, **keywords):
    # end of the assimilation period

    yyyy, mm, dd = tmdl.doy_to_time_array(doy_end, yyyy)
    sdate = r'%4.4d%2.2d%2.2d' % (yyyy, 11, 28)
    resflnm = 'co_flux.' + sdate
    tmp = ['x', 'bm']
    xx, bend = ofb.ncf_read(resflnm + ".nc", tmp)
    xx0 = ones(size(xx), float)

    old_mm = 0
    nreg = 11
    inc_nx = 12

    daily_obs = list()
    daily_prof = list()
    daily_prof0 = list()
    daily_prof_sel = list()

    daily_cnt = list()
    # get hidx,  the location of tagged tracers in the reduced jacobian
    #  0== not used;  1-11 == FF+BF+BB at 11 regions;  12 == chemistry
    chm_idx = list()
    for doy in doy_list:

        yyyy, mm, dd = tmdl.doy_to_time_array(doy, yyyy)

        if (mm > old_mm):
            for imm in range(old_mm + 1, mm + 1):
                if (imm == 1):

                    hidx = [0] + range(1, nreg + 1) + range(
                        1, nreg + 1) + [nreg + 1] * 5
                    nx = 12
                    chm_idx.append(nx - 1)

                    new_hidx = [0] + range(nx + 1, nx + inc_nx) + range(
                        nx + 1, nx + inc_nx) + [nx + inc_nx] * 5
                    nx = nx + inc_nx
                    hidx = hidx + new_hidx
                    chm_idx.append(nx - 1)

                else:
                    new_hidx = [0] + range(nx + 1, nx + inc_nx) + range(
                        nx + 1, nx + inc_nx) + [nx + inc_nx] * 5
                    nx = nx + inc_nx
                    hidx = hidx + new_hidx
                    chm_idx.append(nx - 1)

            old_mm = mm
        # print nx
        # print hidx
        print doy, yyyy, mm, dd, nx
        xx_sel = array(xx0)

        # required the model value
        xx_list = list()
        key_words = list()
        do_it = 0

        # prior vs posterior
        if ('prior' in keywords):
            do_it = keywords['prior']

        if (do_it == 1):
            xx_list.append(xx0)  # a-priori
            key_words.append('prior')

        do_it = 0
        if ('posterior' in keywords):
            do_it = keywords['posterior']

        if (do_it == 1):
            xx_list.append(xx)  # a-priori
            key_words.append('posterior')

        do_it = 0

        # backgrounds

        if ('prior_bg' in keywords):
            do_it = keywords['prior_bg']

        if (do_it == 1):
            xx_add = zeros(nx, float)
            xx_add[0:nreg] = xx0[0:nreg]
            key_words.append('prior_bg')
            xx_list.append(xx_add)

        if ('new_bg' in keywords):
            do_it = keywords['new_bg']

        if (do_it == 1):
            xx_add = zeros(nx, float)
            xx_add[0:nreg] = xx[0:nreg]
            key_words.append('new_bg')
            xx_list.append(xx_add)

        # chemistry production

        if ('prior_chm' in keywords):
            do_it = keywords['prior_chm']

        if (do_it == 1):
            xx_add = zeros(nx, float)
            xx_add[chm_idx] = xx0[chm_idx]
            xx_list.append(xx_add)
            key_words.append('prior_chm')

        do_it = 0
        if ('new_chm' in keywords):
            do_it = keywords['new_chm']

        if (do_it == 1):
            xx_add = zeros(nx, float)
            xx_add[chm_idx] = xx[chm_idx]
            xx_list.append(xx_add)
            key_words.append('new_chm')

        do_it = 0

        if ('prior_sel' in keywords):
            do_it = keywords['prior_sel']

        if (do_it > 0):
            xx_add = zeros(nx, float)
            xx_add[do_it - 1] = xx0[do_it - 1]
            xx_list.append(xx_add)
            key_words.append('prior_sel')

        do_it = 0

        if ('new_sel' in keywords):
            do_it = keywords['new_sel']

        if (do_it > 0):
            xx_add = zeros(nx, float)
            xx_add[do_it - 1] = xx[do_it - 1]
            xx_list.append(xx_add)
            key_words.append('new_sel')




        cnt_avg, obs_avg, prof_avg, pres=\
                 get_daily_reg_avg(yyyy, mm, dd, xx_list,\
                                   hidx, nx, nreg, lvl_st, lvl_end)

        daily_obs.append(obs_avg)

        daily_prof.append(prof_avg)
        daily_cnt.append(cnt_avg)

    daily_obs = array(daily_obs)
    daily_prof = array(daily_prof)
    daily_cnt = array(daily_cnt)

    return daily_cnt, daily_obs, daily_prof, key_words, squeeze(pres)
def get_daily_reg_avg(yyyy, mm, dd, xx_list, hidx, nx, nreg, lvl_st, lvl_end):
    """  calculate the daily average for regions  
    yyyy,mm, dd -------in------ year, month, day
    xx0, xx, -----in-------- the prior and posterior x values
    nx   -------in ---------- the number of x values
    hidx -----in --------   reduced from tagged regions to combined regions
    nreg ------in ---- regional number
    lvl_st, lvl_end-----  in the vertical range for averaging 
    
    cnt_avg ----- return ----- the number of obs in each region
    obs_avg, prof0_avg, prof_avg ----- return ------ the averaged observation, posterior profile,  and prior profile 
    
    """

    sdate = r'%4.4d%2.2d%2.2d' % (yyyy, mm, dd)

    matplotlib.rcParams['legend.fancybox'] = True

    # read obs
    resflnm = 'co_obs.' + sdate
    tmp = ['lon', 'lat', 'pres', 'lvls', 'obs', 'ap_r', 'ak', 'err']
    olon, olat, opres, olvls, obs, oap_r, oak, oerr = ofb.ncf_read(
        resflnm + ".nc", tmp)

    # read in  h

    nlvl = lvl_end - lvl_st + 1

    obs_avg = zeros([nreg, nlvl], float)
    nmd = len(xx_list)
    prof_avg = zeros([nreg, nlvl, nmd], float)

    cnt_avg = zeros(nreg, integer)

    resflnm = 'co_k.' + sdate
    tmp = ['h']

    prof_h = ofb.ncf_read(resflnm + ".nc", tmp)
    # reduced to hm

    ridx = array(hidx)

    # print '11 region & month idx', hidx

    # print shape(prof_h)

    prof_h = squeeze(prof_h)

    hm = flb.reform_h(prof_h, olvls, ridx, nx)
    reg_id_list = rgd.get_region_id(olat, olon)

    # select the require region

    nobs = size(olon)

    for iobs in range(nobs):
        ml = olvls[iobs]
        ihm = hm[iobs, 0:ml, :]
        prof_obs = obs[iobs, :] - oap_r[iobs, :]
        ireg = reg_id_list[iobs] - 1

        cnt_avg[ireg] = cnt_avg[ireg] + 1
        obs_avg[ireg,
                0:nlvl] = obs_avg[ireg, 0:nlvl] + prof_obs[lvl_st:lvl_end + 1]
        imd = 0  # model number
        for xval in xx_list:
            prof = dot(ihm, xval[0:nx])
            prof_avg[ireg, 0:nlvl,
                     imd] = prof_avg[ireg, 0:nlvl,
                                     imd] + prof[lvl_st:lvl_end + 1]
            imd = imd + 1

    for ireg in range(nreg):
        nobs = cnt_avg[ireg]
        if (nobs > 0):
            obs_avg[ireg, 0:nlvl] = obs_avg[ireg, 0:nlvl] / nobs
            prof_avg[ireg, 0:nlvl, :] = prof_avg[ireg, 0:nlvl, :] / nobs

    return cnt_avg, obs_avg, prof_avg, opres[0, lvl_st:lvl_end + 1]
def data_collect(yyyy, doy, \
                nst,\
                step,\
                em_st,\
                em_end,\
                geos_datapath, \
                obs_datapath,\
                viewmode_list, \
                not_first_period=False,\
                err_scale=1.0, \
                do_debug=False,\
                do_dump=True, \
                dumpflnm=None):
    """ doys  in date to be read in read in
    
    """
    
    doy0=doy
    doy1=doy0 +nst*step # 096 0406
    # set up the start days
    doys=arange(doy0, doy1)
    
    iday=0
    nusd_obs=0
    fclim=open('clim_co2.dat', 'r')
    lines=fclim.readlines()
    fclim.close()
    aprior=list()
    apr_pres=list()
    
    for iline in lines:
        terms=iline.split()
        aprior.append(float(terms[1]))
        apr_pres.append(float(terms[0]))

    aprior=array(aprior)
    apr_pres=array(apr_pres)
    apr_pres=log10(apr_pres)
    
    # starting to collect the observations and y
    #
    # figure(1)
    # show()
    # figure(1)
    sel_doys=[-1, -8, -15]
    int_step=0
    all_days=size(em_st)
    all_data_list=arange(all_data)
    int_step=0
    out_step=step
    data_count=list()
    print doy0, doy1, doys
    # tt=raw_input()
    
    for doy in doys:
        yyyy, mm,dd=tm.doy_to_time_array(doy, yyyy)
        print 'year mm dd', yyyy, mm, dd
        y=list()
        iend=0
        full_doy=r'%4.4dD%3.3d' % (yyyy, doy)
        sdate=r'%4.4dD%3.3d' % (yyyy, doy)
        # check whether it is necessary to include surface flux (stv) during cerntain days 
        iy=0
        if (int_step==0):
            dumpext=r"%2.2d" % nst
            dumpext="."+sdate+"_N"+dumpext
        
        viewmode=viewmode_list[doy-1]
        ncflnm_obs=obs_datapath+"/oco_"+viewmode+"."+sdate+".nc"
        print ncflnm_obs
        std_od, std_cflag=ofb.ncf_read(ncflnm_obs, ['od', 'cloud'])
        
        for eid in all_data_list:
            est=em_st[eid]
            eend=em_end[eid]
            sdate=r'%4.4dD%3.3d' % (yyyy, doy)
            ncflnm=obs_datapath+"oco"+"."+sdate+".nc"
            # print 'sel_eid', sel_eid
            obs=obo.obs_operator(yyyy, mm, dd, est, eend, \
                                     aprior=aprior, apr_pres=apr_pres, \
                                     viewmode=viewmode,\
                                     datapath=geos_datapath)
            ytop=eend
                
            for em in range(est,ytop+1):
                if (em==est):
                    obs.get_obs_prof(em, std_od=std_od, std_cf=std_cflag)
                else:
                        # print 'iy', iy, em, est
                    obs.get_obs_prof(em, idx=used_idx, do_update=False)
                            
                    
                if (iy==0):
                    # em_id is different from em. em_id is the 'real id' in the whole ensemble set 
                    em_id=obs.em_id[em-1]
                    print 'em_id', em_id, em-1
                    xgp0=obs.obs_xgp[em_id]
                    xgp=obs.obs_xgp[em_id]
                    print size(xgp), size(std_od), size(std_cflag)
                    obs_err=obs.obs_err
                    obs_err=array(obs_err)
                    obs_err=obs_err
                    # print 'shape obs+err', shape(obs_err)
                    rnd_obs_err=array(obs_err)
                    rnd_err=rnd.normal(scale=obs_err)
                    rnd_obs_err[iobs]=rnd_err
                            
                        # err_scale=1.0

                    if (do_debug):
                        subplot(2,1,1)
                        plot(obs_err)
                        plot(rnd_obs_err)
                        subplot(2,1,2)
                        hist(rnd_obs_err)
                        hist(obs_err)
                        show()
                
                
                    rnd_obs_err=err_scale*rnd_obs_err
                    cflag=obs.obs_cflag
                    otime=obs.obs_time
                    olat=obs.obs_lat
                    olon=obs.obs_lon
                    od=obs.obs_od
                    lwi=obs.obs_lwi
                    lwi=lwi.astype(int)
                    #                used_idx=where(logical_and(cflag==0, od<=0.3, lwi<>1))
                    osza=obs.obs_sza
                    sel1=logical_and(cflag==0, od<=0.3)
                    sel2=logical_and(lwi<>1, lwi<>2)
                    # used_idx=where(sel1)
                    # used_idx=where(logical_and(sel1, sel2))
                    # used_idx=where(cflag==0)
                    used_idx=where(logical_and(obs_err<4.0, sel1))
                        
                    used_idx=squeeze(used_idx)
                    print 'used_idx', len(used_idx)
                    xgp=xgp[used_idx]
                    xgp0=xgp0[used_idx]
                        
                        
                    obs_err=1.0e-6*obs_err[used_idx]
                    rnd_obs_err=1.0e-6*rnd_obs_err[used_idx]
                    imax=argmax(rnd_obs_err)
                    print 'max(obs err), max(rnd_obs_err)', 1.0e6*obs_err[imax], 1.0e6*rnd_obs_err[imax]
                    olat=olat[used_idx]
                    olon=olon[used_idx]
                    lwi=lwi[used_idx]
                    otime=otime[used_idx]
                    od=od[used_idx]
                        
                    # r=array(varData)
                    # r=r*r
                else: # just others for ensemble 
                    em_id=obs.em_id[em-1]
                    # print 'em_id', em_id, em-1
                    xgp=obs.obs_xgp[em_id]
                    if (em==est):
                        print em,  est
                        print len(used_idx)
                    xgp=xgp[used_idx]
                    
                    if (istep>300):
                        print 'type xgp',type(xgp)
                        print 'len-xgp', len(xgp)
                
                
                    if (em==2):
                        print 'xgp', xgp[0:6]


                y.append(array(xgp))

                if (do_debug):
                    print ii, ne, shape(xgp)
                    if (do_debug and ii==4):
                        # figure(1)
                        # plot(xgp[0:300],'r')
                        # plot(xgp0[0:300], 'b')
                        z=xgp-xgp0
                        print 'max dev', max(z)
                    
                
                         
                iy=iy+1
                    # show()
        y=array(y)
        y=transpose(y)
        print 'y-shape', shape(y), shape(xgp)
        
        if (istep>300):
            print type(y)
            print len(y)
            ix=y[0]
            print type(ix)
            
                # read in the data
        print ncflnm_obs
        obs_f=NetCDFFile(ncflnm_obs)
        yobs=obs_f.variables['xco2']
        yobs_err=obs_f.variables['err']
        yobs=array(yobs)
        # yobs_err=array(yobs_err)
        yobs_err=array(yobs_err)
        # print 'shape yobs', shape(yobs)
        yobs=squeeze(yobs)
        yobs_err=squeeze(yobs_err)
        
        obs_f.close()
        
        yobs=yobs[used_idx]
        yobs_err=yobs_err[used_idx]
    
        
        if (int_step==0):
            
            all_y=array(y)
            all_yobs=array(yobs)
            all_yobs_err=array(yobs_err)
            all_rnd_err=array(rnd_obs_err)
            all_lat=array(olat)
            all_lon=array(olon)
            all_lwi=array(lwi)
            all_time=array(otime)
            all_od=array(od)

            state_v=stv_c.state_vector(sdate, do_debug=False, datapath=geos_datapath)
            nx, ne=state_v.nx, state_v.ne
            
            print 'nx, ne', nx,ne
            nreg=(nx)/size(st_days)
            x=array(state_v.stv)
            
                                
        else:
            all_y=concatenate((all_y, y))
            all_yobs=concatenate((all_yobs, yobs))
            all_yobs_err=concatenate((all_yobs_err, yobs_err))
            all_rnd_err=concatenate((all_rnd_err, rnd_obs_err))
            all_lat=concatenate((all_lat, olat))
            all_lon=concatenate((all_lon, olon))
            all_lwi=concatenate((all_lwi, lwi))
            all_time=concatenate((all_time, otime))
            all_od=concatenate((all_od, od))
            
        print 'shape y & yobs', shape(y), shape(yobs)
        
        print 'istep & shape ',istep,  shape(all_y), shape(all_yobs), shape(all_yobs_err), shape(all_rnd_err)
        
        data_count.append(size(yobs))
            
        int_step=int_step+1
        print '*'*40+'int_step'+'*'*80
        print int_step
        
        if ((int_step==out_step) and do_dump):
            if (dumpflnm==None):
                ncdump=geos_datapath+'/'+'obs'+dumpext+"_"+sdate0+".nc"
            else:
                ncdump=geos_datapath+'/'+dumpflnm+"_"+dumpext+".nc"
            tmp_data_count=array(data_count)
            
            factor=1.0e6
            all_y=factor*all_y
            all_yobs=factor*all_yobs
            all_rnd_err=factor*all_rnd_err
            all_yobs_err=factor*all_yobs_err

            
            xnx=arange(nx)
            xne=arange(ne)
            xny=arange(size(all_y[:,0]))
            xney=arange(size(all_y[0,:]))
            dimTypes=['i', 'i','i','i', 'i']
            dimVars=[xnx, xne, xny, xney, doys]
            dimNames=['nx', 'ne', 'ny', 'ney', 'doys']
            x_info=ofb.geos_varinfo('x', 'f', ['nx', 'ne'], x)
            y_info=ofb.geos_varinfo('y', 'f', ['ny', 'ney'], all_y)
            yobs_info=ofb.geos_varinfo('obs', 'f', ['ny'], all_yobs)
            yobs_err_info=ofb.geos_varinfo('err', 'f', ['ny'], all_yobs_err)
            rnd_err_info=ofb.geos_varinfo('rnd_err', 'f', ['ny'], all_rnd_err)
            count_info=ofb.geos_varinfo('daily_count', 'i', ['doys'], tmp_data_count)
            lat_info=ofb.geos_varinfo('lat', 'f', ['ny'], all_lat)
            lon_info=ofb.geos_varinfo('lon', 'f', ['ny'], all_lon)
            time_info=ofb.geos_varinfo('time', 'f', ['ny'], all_time)
            od_info=ofb.geos_varinfo('od', 'f', ['ny'], all_od)
            lwi_info=ofb.geos_varinfo('lwi', 'i', ['ny'], all_lwi)
            ofb.ncf_write_by_varinfo(ncdump, dimNames, dimTypes, dimVars, \
                                     [x_info, y_info, yobs_info, yobs_err_info, \
                                      rnd_err_info, lat_info, lon_info, time_info, od_info, lwi_info, count_info])
            int_step=0
    

    data_count=array(data_count)
    print 'shape, all_y', shape(all_y)
    return x, all_y, all_yobs, all_yobs_err, all_rnd_err, data_count, doys, \
           all_lat, all_lon, all_time, all_lwi, all_od
    inv_step = 17
    inv_path = './oco_inv_tight_noshape/'
    sstep = r'%2.2d' % (inv_step)
    sstep = r'%2.2d' % inv_step

    resflnm = inv_path + "std_oco_assim_res" + "." + sstep + ".nc"
    print resflnm

    do_pertb = True

    if (do_pertb):

        coef_a = zeros(inv_step * 144, float)
        coef_a[1:19] = 0.2
        coef_a[82:100] = 0.0
    else:
        varnames = ['whole_x', 'whole_x0', 'dx', 'sum_xtm']
        x, x0, dx, xtm = ofb.ncf_read(resflnm, varnames)
        coef_a = x - x0

    flux_output_name = 'CO2_EMISSION_PERTURB'

    gen_transcom_coef(flux_output_name,\
                      yyyy,\
                      coef_a,\
                      inv_step,\
                      inv_path='./oco_inv_tight_shape/',\
                      modelname='GEOS5', \
                      category='CO2-SRCE', ntracer=1, \
                      do_debug=False)