def get_from_inv(ERA_data, varname):
     z = ERA_data.ERA_z.values / 9.81
     theta = mu.theta_from_p_T(p=np.broadcast_to(
         ERA_data.level.values, z.shape),
                               T=ERA_data.ERA_t.values)
     heff = mu.heffter_pblht_2d(z, theta)
     ret = np.empty_like(ERA_data.time.values).astype(float)
     for i, (b, t) in enumerate(
             zip(heff['i_bot'].astype(int), heff['i_top'].astype(int))):
         ret[i] = np.nanmean(
             ERA_data[varname][i, slice(min(b, t), max(b, t))])
     return ret
    def get_from_inv(self, varname):
        if not hasattr(self, 'traj_data'):
            print("warning: ERA traj data not added; adding now")
            self.add_traj_data()
        ret = dict()
        for trajname, tf in zip(self.trajectories, self.trajectory_files):
            ERA_data = self.traj_data[trajname]
            z = ERA_data.ERA_z.values / 9.81
            theta = mu.theta_from_p_T(p=np.broadcast_to(
                ERA_data.level.values, z.shape),
                                      T=ERA_data.ERA_t.values)
            try:
                heff = mu.heffter_pblht_2d(z, theta, handle_nans=True)
            except ValueError as e:
                print(self.name)
                print(trajname)
                print(ERA_data.lon)
                raise e
            vals = np.empty_like(ERA_data.time.values).astype(float)
            if varname == 'z_i':
                vals = heff['z_bot']
                newarray = ERA_data.ERA_z.mean(dim='level').copy(deep=True)
                newarray.values = vals
                newarray.name = 'z_i'
            else:
                for i, (b, t) in enumerate(
                        zip(heff['i_bot'].astype(int),
                            heff['i_top'].astype(int))):
                    if np.isnan(b) or np.isnan(t):
                        vals[i] = float('nan')
                    else:
                        vals[i] = np.nanmean(
                            ERA_data[varname][i,
                                              slice(min(b, t), max(b, t))])


#                         vals[i] = ERA_data[varname].sel(level=slice(min(b,t), max(b,t))).isel(time=i).mean(skipna=True)
                newarray = ERA_data[varname].mean(dim='level').copy(deep=True)
                newarray.values = vals
            ret[trajname] = newarray
        return ret
Beispiel #3
0
    def add_ERA_data(self):
        """Retrieve ERA5 data in a box around a trajectory
        Assumes ERA5 data is 0.3x0.3 degrees
        Returns an xarray Dataset
        """
        start = utils.as_datetime(self.flight_data.time.values[0]).replace(minute=0, second=0)
        end = utils.as_datetime(self.flight_data.time.values[-1]).replace(minute=0, second=0)+dt.timedelta(hours=1)
        dates = np.array([start + dt.timedelta(minutes=i*15) for i in range(1+int((end-start).total_seconds()/(60*15)))])
        index = [np.argmin(abs(utils.as_datetime(self.flight_data.time.values) - i)) for i in dates]
        lats = self.flight_data.GGLAT.values[index]
        lons = self.flight_data.GGLON.values[index]
        times = [np.datetime64(i.replace(tzinfo=None)) for i in dates]
        box_degrees = 2
        space_index = int(np.round(box_degrees/0.3/2)) # go up/down/left/right this many pixels
        unique_days = set([utils.as_datetime(i).date() for i in times])
        files = [os.path.join(utils.ERA_source, "ERA5.pres.NEP.{:%Y-%m-%d}.nc".format(i))
                 for i in unique_days]
        sfc_files = [os.path.join(utils.ERA_source, "ERA5.sfc.NEP.{:%Y-%m-%d}.nc".format(i))
                 for i in unique_days]
        flux_files = [os.path.join(utils.ERA_source, "4dvar_sfc_proc", "ERA5.4Dvarflux.NEP.{:%Y-%m-%d}.nc".format(i))
                 for i in unique_days]
        self.files['ERA_files'] = files + sfc_files
        with xr.open_mfdataset(sorted(files)) as data:
            #return_ds = xr.Dataset(coords={'time': ds.coords['time'], 'level': data.coords['level']})
            ds = xr.Dataset(coords={'time': (('time'), times, data.coords['time'].attrs),
                                    'level': (('level'), data.coords['level'])})

 
            # ds.coords['level'] = data.coords['level']

            #adding in q:
            T = data['t'].values 
            RH = data['r'].values
            p = np.broadcast_to(data.coords['level'].values[None, :, None, None], T.shape)*100
            q = utils.qv_from_p_T_RH(p, T, RH)
            data['q'] = (('time', 'level', 'latitude', 'longitude'), q)
            data['q'] = data['q'].assign_attrs({'units': "kg kg**-1", 
                                    'long_name': "specific_humidity",
                                    'dependencies': 'ERA_t, ERA_p, ERA_r'})

            # adding gradients in for z, t, and q. Assuming constant grid spacing.
            for var in ['t', 'q', 'z', 'u', 'v']:
                [_,_,dvardj, dvardi] = np.gradient(data[var].values)
                dlatdy = 360/4.000786e7  # degrees lat per meter y
                def get_dlondx(lat) : return(360/(np.cos(np.deg2rad(lat))*4.0075017e7))

                lat_spaces = np.diff(data.coords['latitude'].values)
                lon_spaces = np.diff(data.coords['longitude'].values)
                assert(np.allclose(lat_spaces, -0.3, atol=0.01) and np.allclose(lon_spaces, 0.3, atol=0.05))
                dlondi = np.mean(lon_spaces)
                dlatdj = np.mean(lat_spaces)
                dlondx = get_dlondx(data.coords['latitude'].values)
                dvardx = dvardi/dlondi*dlondx[None,None,:,None]
                dvardy = dvardj/dlatdj*dlatdy
                data['d{}dx'.format(var)] = (('time', 'level', 'latitude', 'longitude'), dvardx)
                data['d{}dy'.format(var)] = (('time', 'level', 'latitude', 'longitude'), dvardy)

            grad_attrs = {'q': {'units': "kg kg**-1 m**-1",
                                'long_name': "{}_gradient_of_specific_humidity",
                                'dependencies': "ERA_t, ERA_p, ERA_r"},
                          't':  {'units': "K m**-1",
                                'long_name': "{}_gradient_of_temperature",
                                'dependencies': "ERA_t"},
                          'z':  {'units': "m**2 s**-2 m**-1",
                                'long_name': "{}_gradient_of_geopotential",
                                'dependencies': "ERA_z"},
                          'u': {'units': "m s**-1 m**-1",
                                'long_name': "{}_gradient_of_zonal_wind",
                                'dependencies': "ERA_u"},
                          'v': {'units': "m s**-1 m**-1",
                                'long_name': "{}_gradient_of_meridional_wind",
                                'dependencies': "ERA_v"}}

            for key, val in grad_attrs.items():
                for (n, drn) in [('x', 'eastward'), ('y', 'northward')]:
                    attrs = val.copy()
                    var = 'd{}d{}'.format(key, n)
                    attrs['long_name'] = attrs['long_name'].format(drn)
                    data[var] = data[var].assign_attrs(attrs)

            for var in data.data_vars.keys():
                vals = []
                for (lat, lon, time) in zip(lats, lons%360, times):
                    if lat > np.max(data.coords['latitude']) or lat < np.min(data.coords['latitude']) or \
                        lon > np.max(data.coords['longitude']) or lon < np.min(data.coords['longitude']):
                        print('out of range of data')
                        print(lat, lon, time)
                        vals.append(np.full_like(data.coords['level'], float('nan'), dtype='float'))
                        continue
                    x = data[var].sel(longitude=slice(lon - box_degrees/2, lon + box_degrees/2),
                                      latitude=slice(lat + box_degrees/2, lat - box_degrees/2))
                    z = x.sel(method='nearest', time=time, tolerance=np.timedelta64(1, 'h'))
                    #z = y.sel(method='nearest', tolerance=50, level=pres)
                    #this applies a 2D gaussian the width of z, i.e. sigma=box_degrees
                    # print(z.shape)
                    gauss = utils.gauss2D(shape=z.shape[1:], sigma=z.shape[0])
                    filtered = z.values * gauss
                    vals.append(np.sum(filtered, axis=(1,2)))
                ds['ERA_'+var] = (('time', 'level'), np.array(vals))
                ds['ERA_'+var] = ds['ERA_'+var].assign_attrs(data[var].attrs)


            t_1000 = ds.ERA_t.sel(level=1000).values
            theta_700 = mu.theta_from_p_T(p=700, T=ds.ERA_t.sel(level=700).values)
            LTS = theta_700-t_1000
            ds['ERA_LTS'] = (('time'), np.array(LTS))
            ds['ERA_LTS'] = ds['ERA_LTS'].assign_attrs(
                    {"long_name": "Lower tropospheric stability",
                     "units": "K",
                     "_FillValue": "NaN"})
            t_dew = t_1000-(100-ds.ERA_r.sel(level=1000).values)/5
            lcl = mu.get_LCL(t=t_1000, t_dew=t_dew, z=ds.ERA_z.sel(level=1000).values/9.81)
            z_700 = ds.ERA_z.sel(level=700).values/9.81
            gamma_850 = mu.get_moist_adiabatic_lapse_rate(ds.ERA_t.sel(level=850).values, 850)
            eis = LTS - gamma_850*(z_700-lcl)
            ds['ERA_EIS'] = (('time'), np.array(eis))
            ds['ERA_EIS'] = ds['ERA_EIS'].assign_attrs(
                    {"long_name": "Estimated inversion strength",
                     "units": "K",
                     "_FillValue": "NaN"})
            
            
            with xr.open_mfdataset(sorted(sfc_files)) as sfc_data:
                for var in sfc_data.data_vars.keys():
                    vals = []
                    for (lat, lon, time) in zip(lats, lons%360, times):
                        if lat > np.max(sfc_data.coords['latitude']) or lat < np.min(sfc_data.coords['latitude']) or \
                            lon > np.max(sfc_data.coords['longitude']) or lon < np.min(sfc_data.coords['longitude']):
                            print('out of range of data')
                            print(lat, lon, time)
                            vals.append(float('nan'))
                            continue
                        x = sfc_data[var].sel(longitude=slice(lon - box_degrees/2, lon + box_degrees/2),
                                              latitude=slice(lat + box_degrees/2, lat - box_degrees/2))
                        z = x.sel(method='nearest', time=time, tolerance=np.timedelta64(1, 'h'))
                        gauss = utils.gauss2D(shape=z.shape, sigma=z.shape[0])
                        filtered = z.values * gauss
                        vals.append(np.sum(filtered))
                    ds['ERA_'+var] = (('time'), np.array(vals))
                    ds['ERA_'+var] = ds['ERA_'+var].assign_attrs(sfc_data[var].attrs)
                    
            with xr.open_mfdataset(sorted(flux_files)) as flux_data:
                for var in flux_data.data_vars.keys():
                    if var not in ['sshf', 'slhf']:
                        continue
                    vals = []
                    for (lat, lon, time) in zip(lats, lons%360, times):
                        if lat > np.max(flux_data.coords['latitude']) or lat < np.min(flux_data.coords['latitude']) or \
                            lon > np.max(flux_data.coords['longitude']) or lon < np.min(flux_data.coords['longitude']):
                            print('out of range of data')
                            print(lat, lon, time)
                            vals.append(float('nan'))
                            continue
                        x = flux_data[var].sel(longitude=slice(lon - box_degrees/2, lon + box_degrees/2),
                                              latitude=slice(lat + box_degrees/2, lat - box_degrees/2))
                        z = x.sel(method='nearest', time=time, tolerance=np.timedelta64(1, 'h'))
                        gauss = utils.gauss2D(shape=z.shape, sigma=z.shape[0])
                        filtered = z.values * gauss
                        vals.append(np.sum(filtered))
                    ds['ERA_'+var] = (('time'), np.array(vals))
                    ds['ERA_'+var] = ds['ERA_'+var].assign_attrs(flux_data[var].attrs)

                
        self.ERA_data = ds