Exemple #1
0
def create_df_missing(cfg_set_tds,cfg_set_input,cfg_var,check_sources):
    """Check the availability of input datasets during the complete testdata period.
    """
    print("Check the availability of input datasets during the complete testdata period")
    if check_sources!=["RADAR","SEVIRI","COSMO_CONV","THX"]:
        raise NotImplementedError("Other check_sources not yet implemented.")
    
    ## Append further datetime objects accroding to settings in cfg_set_tds:
    dt_start = datetime.datetime.combine(cfg_set_tds["tds_period_start"],datetime.time(0,0))
    dt_end   = datetime.datetime.combine(cfg_set_tds["tds_period_end"]+datetime.timedelta(days=1),datetime.time(0,0))
    
    dt_complete_list = pd.date_range(dt_start,dt_end,freq="5min")
    RADAR_vars = cfg_var.loc[cfg_var["SOURCE"]=="RADAR","VARIABLE"].values[:-1]
    columns_list = np.concatenate([RADAR_vars,check_sources[1:]])
    
    bool_array = np.full((len(dt_complete_list), len(columns_list)), False, dtype=np.bool)
    df_missing = pd.DataFrame(bool_array,index=dt_complete_list,columns=columns_list)
    
    cfg_set_input["n_past_frames"]=0
    t_start = datetime.datetime.now()
    t_exp   = "(calculating)"
    for counter, sampling_time in enumerate(dt_complete_list):
        perc_checked_total = float(counter)/len(dt_complete_list)
        perc_checked = np.round((sampling_time.hour*60+sampling_time.minute)/1440.,2)        
        if counter%100==0 and counter > 10:
            t_exp = (datetime.datetime.now() + \
                     (datetime.datetime.now() - t_start)*int((1-perc_checked_total)/perc_checked_total)).strftime("%d.%m.%Y %H:%M")
        print("  Check input data availability of date: %s - %3d%% | Expected finishing time: %s" % \
              (sampling_time.strftime("%d.%m.%Y"),perc_checked*100,t_exp), end='\r')
        sys.stdout.flush()

        for RADAR_var in RADAR_vars:
            if path_creator(sampling_time, RADAR_var, "RADAR", cfg_set_input)[0][0] is None:
                df_missing.loc[sampling_time,RADAR_var] = True
        if path_creator(sampling_time, "IR_108", "SEVIRI", cfg_set_input)[0][0] is None:
            df_missing.loc[sampling_time,"SEVIRI"] = True
        if not os.path.exists(path_creator(sampling_time, "POT_VORTIC_70000", "COSMO_CONV", cfg_set_input)[0]):
            df_missing.loc[sampling_time,"COSMO_CONV"] = True
        if path_creator(sampling_time, "THX_abs", "THX", cfg_set_input)[0][0] is None:
            df_missing.loc[sampling_time,"THX"] = True
    
    
    df_missing.to_pickle(os.path.join(cfg_set_tds["root_path_tds"],u"MissingInputData.pkl"))
    print("Save dataframe to %s" % (os.path.join(cfg_set_tds["root_path_tds"],u"MissingInputData.pkl")))
    return df_missing
Exemple #2
0
def get_vararr_TRT_t0(t0, cfg_set):
    """Provide vararr style array filled with centre locations of TRT cells"""

    ## Read filepath of respective TRT file:
    filepaths, timestamps = pth.path_creator(t0, "TRT", "TRT", cfg_set)
    cell_info_df = df_empty(cfg_set["TRT_cols"], cfg_set["TRT_dtype"])
    filename = "%stmp/%s%s" % (cfg_set["root_path"],
                               cfg_set["t0"].strftime("%Y%m%d%H%M"),
                               "_TRT_df.pkl")

    ## Exception if no TRT-file is available:
    if filepaths[0] is None:
        print("   *** Warning: No TRT file found for %s ***" % t0)
        cell_info_df.to_pickle(filename)
        vararr = np.zeros((cfg_set["n_integ"], ) + cfg_set["xy_ext"])
        return vararr

    ## Read in TRT files, get location (CHi,CHj) and TRT variables:
    traj_IDs, TRTcells, cell_mask = swisstrt.readRdt(filepaths[0])
    vararr = np.zeros((cfg_set["n_integ"], ) + cfg_set["xy_ext"],
                      dtype=np.int32)
    traj_ID_list = []
    for traj_ID in traj_IDs:
        dict_cellinfo = {
            key: value
            for key, value in TRTcells[traj_ID].__dict__.items()
            if not key.startswith('__') and not callable(key)
        }
        cell_info_df = cell_info_df.append(pd.DataFrame.from_records(
            [dict_cellinfo], index=[9]),
                                           ignore_index=True,
                                           sort=True)
        vararr[:, int(TRTcells[traj_ID].iCH),
               int(TRTcells[traj_ID].jCH)] = np.int32(traj_ID[8:])
        traj_ID_list.append(traj_ID)

    ## Change index to TRT_ID, set dtype for columns and save to disk:
    cell_info_df.index = traj_ID_list
    cell_info_df = cell_info_df.astype(cfg_set["type_dict_TRT"],
                                       errors='raise')
    cell_info_df.to_pickle(filename)
    return vararr
Exemple #3
0
## Get config info
cfg_set_tds = cfg.get_config_info_tds()
cfg_set_input, cfg_var, cfg_var_combi = cfg.get_config_info_op()

## Initialise fields (CCS4 meshgrid and VIL, EchoTop and MaxEcho observations):
ccs4_CH = np.meshgrid(
    np.arange(255000, 965000, 1000) + 500,
    np.arange(-160000, 480000, 1000) + 500)
ET45 = rccs.get_vararr_t(user_time_point, "EZC45", cfg_set_input)
CZC = rccs.get_vararr_t(user_time_point, "CZC", cfg_set_input)
LZC = rccs.get_vararr_t(user_time_point, "LZC", cfg_set_input)
RZC = rccs.get_vararr_t(user_time_point, "RZC", cfg_set_input)

## Get TRT file:
filename = pth.path_creator(user_time_point, "TRT", "TRT", cfg_set_input)[0]
if len(filename) == 0: raise IOError("No TRT file found")
elif len(filename) > 1: raise IOError("More than one TRT file found")
file = open(filename[0], "r")

## Go through TRT file:
for line in file:
    line2 = line.strip()
    if len(line2) > 0:
        if line2.startswith("@") or line2.startswith("#"): pass
        else:
            data = line2.split(";")
            print_str = "  Working on TRT traj_ID: %s" % data[0]
            print('\r', print_str, end='')
            shape_coord = [float(coord) for coord in data[27:][:-1]]
            lon_coord = shape_coord[::2]
Exemple #4
0
def calc_disparr(t_current, cfg_set, resid=False):
    """Get 2-dim displacement array for flow between timesteps t_current and t_current - n_past_frames*timestep.
   
    Parameters
    ----------
    
    t_current : datetime object
        Current time for which to calculate displacement array.
    
    cfg_set : dict
        Basic variables defined in input_NOSTRADAMUS_ANN.py
        
    resid : bool
        Do displacement array creation for residual movement correction?
        Default: False.
        
    UV_inter : bool
        If UV_inter is true, the calculated UV and sparsened UV vectors are returned as well.
    
    See function check_create_disparray(t0, timestep, n_integ, root_path, t0_str, oflow_source, oflow_source_path)
    """
    
    if resid:
        ## Read in current displaced oflow_source file:
        filename = pth.path_creator_vararr("disp",cfg_set["oflow_source"],cfg_set)
        #filename = "%stmp/%s_%s_disp%s.%s" % (cfg_set["root_path"], cfg_set["t0"].strftime("%Y%m%d%H%M"),
        #                                       cfg_set["oflow_source"], cfg_set["file_ext_verif"], cfg_set["save_type"])
        t_diff = cfg_set["t0"] - t_current
        t_diff_ind = int((t_diff.seconds/60)/cfg_set["timestep"])
        #oflow_source_data = np.load(filename)[t_diff_ind:t_diff_ind+cfg_set["n_past_frames"]+1,:,:]
        #oflow_source_data = np.load(filename)[t_diff_ind+cfg_set["n_past_frames_resid"]::-1,:,:][:cfg_set["n_past_frames"]+1]
        oflow_source_data = iotmp.load_file(filename,cfg_set["oflow_source"])[t_diff_ind+cfg_set["n_past_frames_resid"]::-1,:,:][:cfg_set["n_past_frames"]+1]
        if oflow_source_data.shape[0]==1:
            UV = R = np.zeros((2,oflow_source_data.shape[1],oflow_source_data.shape[2]))
            if not cfg_set["UV_inter"]: return UV, R
            else: return UV, R, np.zeros(4)*np.nan, np.zeros(4)*np.nan
        if np.all(np.array_equal(oflow_source_data[0,:,:],oflow_source_data[1,:,:])):
            raise ValueError("Input data equal")
    else:
        ## Read in current oflow_source file:
        filenames, timestamps = pth.path_creator(t_current, cfg_set["oflow_source"], cfg_set["source_dict"][cfg_set["oflow_source"]], cfg_set)
        ret = metranet.read_file(filenames[0], physic_value=True)
        oflow_source_data = np.atleast_3d(ret.data)
        for filename in filenames[1:]:
            ret_d_t = metranet.read_file(filename, physic_value=True)
            oflow_source_data_d_t = np.atleast_3d(ret_d_t.data)
            oflow_source_data = np.append(oflow_source_data,oflow_source_data_d_t, axis=2)
        
        oflow_source_data = np.moveaxis(oflow_source_data,2,0)
        #oflow_source_data_masked = np.ma.masked_invalid(oflow_source_data)
        #oflow_source_data_masked = np.ma.masked_where(oflow_source_data_masked==0,oflow_source_data_masked)    
        

    ## Check whether there are non-nan entries:
    if np.any(np.isnan(oflow_source_data).all(axis=(1,2))):
        print("   *** Warning: Input oflow source field is all NAN!\n                Returning NAN fields.***")
        nan_arr = oflow_source_data[0,:,:]*np.nan
        D  = np.array([nan_arr,nan_arr])
        UV = np.array([nan_arr,nan_arr])
        UV_vec  = []; UV_vec_sp = []
        if not cfg_set["UV_inter"]:
            return D, UV
        else: return D, UV, UV_vec, UV_vec_sp
        
    ## Convert linear rainrates to logarithimc dBR units
    if not cfg_set["oflow_source"]=="RZC":
        raise NotImplementedError("So far displacement array retrieval only implemented for RZC")
    else:
        ## Get threshold method:
        if not resid:
            R_thresh_meth = cfg_set["R_thresh_meth"]
            R_threshold = cfg_set["R_threshold"]
        else:
            R_thresh_meth = cfg_set["R_thresh_meth_resid"]
            R_threshold = cfg_set["R_threshold_resid"]
        
        ## Get threshold value:
        if R_thresh_meth == "fix":
            R_thresh = R_threshold
        elif R_thresh_meth == "perc":
            R_thresh = np.min([np.nanpercentile(oflow_source_data[0,:,:],R_threshold),
                              np.nanpercentile(oflow_source_data[1,:,:],R_threshold)])
        else: raise ValueError("R_thresh_meth must either be set to 'fix' or 'perc'")
                
        ## Convert to dBR
        dBR, dBRmin = st.utils.mmhr2dBR(oflow_source_data, R_thresh)
        dBR[~np.isfinite(dBR)] = dBRmin
        #R_thresh = cfg_set["R_threshold"]
        
        ## In case threshold is not exceeded, lower R_threshold by 20%
        while (dBR==dBRmin).all():
            if cfg_set["verbose"]: print("   *** Warning: Threshold not exceeded, "+
                                         "lower R_threshold by 20% to "+str(R_thresh*0.8)+" ***")
            R_thresh = R_thresh*0.8
            dBR, dBRmin = st.utils.mmhr2dBR(oflow_source_data, R_thresh)
            dBR[~np.isfinite(dBR)] = dBRmin
        
        ## For correction of residuals original mm/h values are used:
        if resid:
            oflow_source_data_min = oflow_source_data
            oflow_source_data_min[oflow_source_data_min<=R_thresh] = R_thresh
            oflow_source_data_min[~np.isfinite(oflow_source_data_min)] = R_thresh
  
    ## Calculate UV field
    oflow_method = st.optflow.get_method(cfg_set["oflow_method_name"])
    if not resid:
        UV, UV_vec, UV_vec_sp = oflow_method(dBR,return_single_vec=True,return_declust_vec=True)
    else:
        UV, UV_vec, UV_vec_sp = oflow_method(oflow_source_data_min,min_distance_ST=2,winsize_LK5=(120,20),quality_level_ST=0.05,
                                             max_speed=20,nr_IQR_outlier=5,k=30,
                                             decl_grid=cfg_set["decl_grid_resid"],function=cfg_set["inter_fun_resid"],
                                             epsilon=cfg_set["epsilon_resid"],#factor_median=.2,
                                             return_single_vec=True,return_declust_vec=True,
                                             zero_interpol=cfg_set["zero_interpol"])
        #UV, UV_vec, UV_vec_sp = oflow_method(oflow_source_data_min,min_distance_ST=2,block_size_ST=15,winsize_LK5=(120,20),quality_level_ST=0.05,
        #                                                  max_speed=20,nr_IQR_outlier=5,decl_grid=20,function="inverse",k=20,factor_median=0.05,
        #                                                  return_single_vec=True,return_declust_vec=True,zero_interpol=True)
        #UV, UV_vec, UV_vec_sp = oflow_method(oflow_source_data_min,min_distance_ST=2,block_size_ST=15,winsize_LK5=(120,20),quality_level_ST=0.05,
        #                                                  max_speed=20,nr_IQR_outlier=5,decl_grid=20,function="nearest",k=20,factor_median=.2,
        #                                                  return_single_vec=True,return_declust_vec=True)
       
    ## In case no motion vectors were detected, lower R_threshold by 20%
    if np.any(~np.isfinite(UV)):
        dBR_orig = dBR
    n_rep = 0
    while np.any(~np.isfinite(UV)):
        if cfg_set["verbose"]:
            print("   *** Warning: No motion vectors detected, lower R_threshold by 30% to "+str(R_thresh*0.7)+" ***")
        R_thresh = R_thresh*0.7
        dBR, dBRmin = st.utils.mmhr2dBR(oflow_source_data, R_thresh)
        dBR[~np.isfinite(dBR)] = dBRmin
        
        if resid:
            oflow_source_data_min = oflow_source_data
            oflow_source_data[oflow_source_data<=R_thresh] = R_thresh
            oflow_source_data[~np.isfinite(oflow_source_data)] = R_thresh
        if not resid:
            UV, UV_vec, UV_vec_sp = oflow_method(dBR,return_single_vec=True,return_declust_vec=True)
        else:
            UV, UV_vec, UV_vec_sp = oflow_method(oflow_source_data_min,min_distance_ST=2,winsize_LK5=(120,20),quality_level_ST=0.05,
                                                 max_speed=20,nr_IQR_outlier=5,k=30,
                                                 decl_grid=cfg_set["decl_grid_resid"],function=cfg_set["inter_fun_resid"],
                                                 epsilon=cfg_set["epsilon_resid"],#factor_median=.2,
                                                 return_single_vec=True,return_declust_vec=True,
                                                 zero_interpol=cfg_set["zero_interpol"])
            #UV, UV_vec, UV_vec_sp = oflow_method(oflow_source_data_min,min_distance_ST=2,block_size_ST=15,winsize_LK5=(120,20),quality_level_ST=0.05,
            #                                                  max_speed=20,nr_IQR_outlier=5,decl_grid=20,function="inverse",k=20,epsilon=10,#factor_median=0.05,
            #                                                  return_single_vec=True,return_declust_vec=True,zero_interpol=True)
            #UV, UV_vec, UV_vec_sp = oflow_method(oflow_source_data_min,min_distance_ST=2,block_size_ST=15,winsize_LK5=(120,20),quality_level_ST=0.05,
            #                                                  max_speed=20,nr_IQR_outlier=5,decl_grid=20,function="nearest",k=20,factor_median=.2,
            #                                                  return_single_vec=True,return_declust_vec=True)
        n_rep += 1
        if n_rep > 2:
            UV = np.zeros((2,dBR.shape[1],dBR.shape[2]))
            if cfg_set["verbose"]: print("   *** Warning: Return zero UV-array! ***")
            break
    
    ## Invert direction of intermediate motion vectors
    #if cfg_set["UV_inter"]:
    #    UV_vec[2:3,:,:] = -UV_vec[2:3,:,:]
    #    UV_vec_sp[2:3,:,:] = -UV_vec_sp[2:3,:,:]
    
    """
    ## Advect disp_test to get the advected test_array and the displacement array
    adv_method = st.advection.get_method(cfg_set["adv_method"])
    dBR_adv, D = adv_method(dBR[-1,:,:], UV, 1, return_displacement=True) 
    
    ## convert the forecasted dBR to mmhr
    if cfg_set["oflow_source"]=="RZC":
        if cfg_set["R_thresh_meth"] == "fix":
            R_tresh = cfg_set["R_threshold"]
        elif cfg_set["R_thresh_meth"] == "perc":
            R_tresh = np.min([np.nanpercentile(oflow_source_data[0,:,:],cfg_set["R_threshold"]),
                              np.nanpercentile(oflow_source_data[1,:,:],cfg_set["R_threshold"])])
        else: raise NotImplementedError("R_thresh_meth must either be set to 'fix' or 'perc'")
        oflow_source_data_forecast = st.utils.dBR2mmhr(dBR_adv, R_tresh)
    
    ## Print results:
    if False:
        calc_disparr_ctrl_plot(D,timestamps,oflow_source_data,oflow_source_data_forecast,cfg_set)
    """
    #plt.imshow(D[0,:,:])
    #plt.show()
    #fig, axes = plt.subplots(nrows=1, ncols=2)
    #fig1=axes[0].imshow(UV[0,:,:])
    #fig.colorbar(fig1,ax=axes[0])#,orientation='horizontal')
    #fig2=axes[1].imshow(UV[1,:,:])
    #fig.colorbar(fig2,ax=axes[1])#,orientation='horizontal')
    #fig.tight_layout()
    #plt.show()
    #sys.exit()
    

    if np.all(UV==0): #np.any(~np.isfinite(UV)):
        if cfg_set["instant_resid_corr"] and not resid:
            print("   *** Warning: No residual movement correction performed ***")
        D = UV.copy()
    else:
        adv_method = st.advection.get_method(cfg_set["adv_method"])
        dBR_disp, D = adv_method(dBR[-2,:,:],UV,1,return_displacement=True,return_XYW=False)
        
        if cfg_set["instant_resid_corr"] and not resid:    
            if cfg_set["verbose"]:
                print("   Make instantaneous residual movement correction")
            ## Advect second last observation to t0:
            dBR_disp[~np.isfinite(dBR_disp)] = dBRmin
            
            ## Convert dBR values of t0 and second last time step to mm/h:
            RZC_resid_fields = np.stack([st.utils.dBR2mmhr(dBR_disp[0,:,:], R_thresh),
                                         st.utils.dBR2mmhr(dBR[-1,:,:], R_thresh)])
            #plt.imshow(RZC_resid_fields[0,:,:]); plt.title("RZC_resid_fields[0,:,:]"); plt.show()
            #plt.imshow(RZC_resid_fields[1,:,:]); plt.title("RZC_resid_fields[1,:,:]"); plt.show()

            ## Get residual displacement field
            UV_resid = oflow_method(RZC_resid_fields,min_distance_ST=2,
                                    winsize_LK5=(120,20),quality_level_ST=0.05,
                                    max_speed=20,nr_IQR_outlier=5,k=30,
                                    decl_grid=cfg_set["decl_grid_resid"],function=cfg_set["inter_fun_resid"],
                                    epsilon=cfg_set["epsilon_resid"],#factor_median=.2,
                                    zero_interpol=cfg_set["zero_interpol"])
            
            ## Add UV_resid to original UV array
            n_rep = 0
            while np.any(~np.isfinite(UV_resid)):
                print("       No UV_resid field found")
                R_thresh *= 0.7
                RZC_resid_fields = np.stack([st.utils.dBR2mmhr(dBR_disp[0,:,:], R_thresh),
                                             st.utils.dBR2mmhr(dBR[-1,:,:], R_thresh)])
                
                UV_resid = oflow_method(RZC_resid_fields,min_distance_ST=2,
                                        winsize_LK5=(120,20),quality_level_ST=0.05,
                                        max_speed=20,nr_IQR_outlier=5,k=30,
                                        decl_grid=cfg_set["decl_grid_resid"],function=cfg_set["inter_fun_resid"],
                                        epsilon=cfg_set["epsilon_resid"],#factor_median=.2,
                                        zero_interpol=cfg_set["zero_interpol"])
                n_rep += 1
                if n_rep > 2:
                    UV_resid = np.zeros((2,dBR.shape[1],dBR.shape[2]))
                    #if cfg_set["verbose"]: 
                    print("   *** Warning: Return zero UV_resid array! ***")
                    break
            UV += UV_resid
            
            ## Displace with UV_resid field to get D_resid and add to D array:
            dBR_disp_disp, D = adv_method(RZC_resid_fields[0,:,:],UV,1,
                                          return_displacement=True,return_XYW=False)  
            #D += D_resid
    
    if not cfg_set["UV_inter"]:
        return D, UV
    else: return D, UV, UV_vec, UV_vec_sp
Exemple #5
0
def get_TRT_cell_info(dt_sampling_list,
                      cfg_set_tds,
                      cfg_set_input=None,
                      len_ini_df=None):
    """Get information on TRT cells within time period.
    
    Parameters:
    -----------
    
    len_ini_df : uint
        Length of initial dataframe (to setup the dataframe, if number of TRT cells
        exceeds this initial length, additional lines are appended, if there are fewer,
        the exceeding lines are deleted.
    """
    print("Estimate number of samples within training period")

    ## Get input data config file
    if cfg_set_input is None:
        cfg_set_input, cfg_var = cfg.get_config_info_op()

    ## Create empty DataFrame
    if len_ini_df is None: len_ini_df = len(dt_sampling_list) * 3
    ## Old:
    #df_cols = ["traj_ID","date","RANKr","area","lat","lon","iCH","jCH"]
    #samples_df = pd.DataFrame(np.zeros((len_ini_df,len(df_cols)))*np.nan,
    #                          columns=df_cols)
    ## New:
    #samples_df = Nip.df_empty(["traj_ID"]+cfg_set_input["TRT_cols"],[np.object]+cfg_set_input["TRT_dtype"])
    samples_ls = []

    #ind_df = 0; first_append = True; doy_temp = -1

    ## Loop over time steps to gather information on TRT cells at specific time step:
    t_start = datetime.datetime.now()
    t_exp = "(calculating)"
    for counter, sampling_time in enumerate(dt_sampling_list):
        perc_checked = np.round(
            (sampling_time.hour * 60 + sampling_time.minute) / 1440., 2)
        if counter % 100 == 0 and counter > 10:
            t_exp = (datetime.datetime.now() + \
                     (datetime.datetime.now() - t_start)*int((1-perc_checked)/perc_checked)).strftime("%d.%m.%Y %H:%M")

        print("  Check input data availability of date: %s - %3d%% (expected finishing time: %s) " % \
            (sampling_time.strftime("%d.%m.%Y"),100*perc_checked,t_exp), end='\r')

        ## Update time in config dict:
        cfg_set_input["t0"] = sampling_time
        t0 = cfg_set_input["t0"]
        cfg_set_input["t0_doy"] = t0.timetuple().tm_yday
        cfg_set_input["t0_str"] = t0.strftime("%Y%m%d%H%M")
        #if cfg_set_input["t0_doy"]%10==0 and cfg_set_input["t0_doy"]!=doy_temp:
        #    print("   For doy: %s" % cfg_set_input["t0_doy"])
        #    doy_temp = cfg_set_input["t0_doy"]

        ## Get file path to respective TRT file of time point sampling_time:
        filepaths, timestamps = path.path_creator(sampling_time, "TRT", "TRT",
                                                  cfg_set_input)

        ## In case file is not available, look for files just right before and after this timepoint
        ## (e.g. if no file available at 16:35, look at 16:25/16:30/16:40/16:45), otherwise skip this time point.
        if filepaths[0] is None:
            for dt_daily_shift_fac in [-1, 1, -2, 2]:
                sampling_time_temp = sampling_time + dt_daily_shift_fac * datetime.timedelta(
                    minutes=cfg_set_tds["dt_daily_shift"])
                filepaths_temp, timestamps = path.path_creator(
                    sampling_time_temp, "TRT", "TRT", cfg_set_input)
                if filepaths_temp[0] is not None:
                    filepaths = filepaths_temp
                    print("       Instead using dataset: %s" % filepaths[0])
                    break
        if filepaths[0] is None:
            print("       No files found, skip this timepoint")
            continue

        ## Read in TRT-info:
        traj_IDs, TRTcells, cell_mask = swisstrt.readRdt(filepaths[0])
        for traj_ID in traj_IDs:
            ## New:
            dict_cellinfo = {
                key: value
                for key, value in TRTcells[traj_ID].__dict__.items()
                if not key.startswith('__') and not callable(key)
            }
            #cell_info_df  = pd.DataFrame.from_records([dict_cellinfo], index=[9])
            #samples_df_append = pd.DataFrame([[traj_ID]],columns=["traj_ID"],index=[9]).join(pd.DataFrame.from_records([dict_cellinfo],index=[9]))
            #samples_df = samples_df.append(samples_df_append, ignore_index=True, sort=True)
            samples_ls.append(
                pd.DataFrame([[traj_ID]], columns=["traj_ID"], index=[9]).join(
                    pd.DataFrame.from_records([dict_cellinfo], index=[9])))
            ## Old:
            """
            cell = TRTcells[traj_ID]
            cell_date = datetime.datetime.strptime(cell.date,"%Y%m%d%H%M")
            if ind_df <= len_ini_df-1:
                samples_df.iloc[ind_df,:] = [traj_ID,cell_date,cell.RANKr,cell.area,
                                             cell.lat,cell.lon,int(cell.iCH),int(cell.jCH)]
            else:            
                if first_append: print("   *** Start appending to dataframe at t = %s ***" % sampling_time)
                first_append = False
                samples_df = samples_df.append(pd.DataFrame([[traj_ID,cell_date,cell.RANKr,cell.area,
                                                              cell.lat,cell.lon,int(cell.iCH),int(cell.jCH)]],
                                               columns=["traj_ID","date","RANKr","area","lat","lon","iCH","jCH"]))
            ind_df += 1
            """

    samples_df = pd.concat(samples_ls)

    ## Only keep non-nan lines (where there are TRT cells):
    #print("   Lenght of dataframe before dropping of nans: %s" % samples_df.shape[0])
    #print("   Index of dataframe after filling: %s" % ind_df)
    samples_df = samples_df.dropna()
    print("   Lenght of dataframe after dropping of nans: %s" %
          samples_df.shape[0])
    print("   Number of different TRT cells: %s\n" %
          len(np.unique(samples_df["traj_ID"])))
    print(samples_df.info(), "\n")
    print(samples_df, "\n")
    samples_df.to_pickle(
        os.path.join(cfg_set_tds["root_path_tds"],
                     u"Training_Dataset_Sampling.pkl"))
    print("   Dataframe saved in: %s" % os.path.join(
        cfg_set_tds["root_path_tds"], u"Training_Dataset_Sampling.pkl"))
    return (samples_df)
def plot_oflow_derivation(cfg_set, t0, dt):
    ## Import datasets:
    #RZC = np.load("/data/COALITION2/PicturesSatellite/results_JMZ/2_input_NOSTRADAMUS_ANN/tmp/201507071830_RZC_orig.npy")
    #UVdisparr = np.load("/data/COALITION2/PicturesSatellite/results_JMZ/2_input_NOSTRADAMUS_ANN/tmp/201507071830_RZC_disparr_UV.npz")
    #Vx = UVdisparr["Vx"][0:,:,:]; Vy = UVdisparr["Vy"][0:,:,:]
    #UV_t0 = np.moveaxis(np.dstack((Vx[0,:,:],Vy[0,:,:])),2,0)
    #Dx = UVdisparr["Dx"]; Dy = UVdisparr["Dy"]

    ## Get index of respective RZC fields at t0
    ind = (cfg_set["t_end_alt"] - t0).seconds / 60 / cfg_set["timestep"]
    ind2 = ind + (dt / cfg_set["timestep"])

    ## Read in current oflow_source file:
    t_current = t0
    cfg_set["timestep"] = dt
    filenames, timestamps = pth.path_creator(t_current,
                                             cfg_set["oflow_source"], "RADAR",
                                             cfg_set)
    ret = metranet.read_file(filenames[0], physic_value=True)
    oflow_source_data = np.atleast_3d(ret.data)

    for filename in filenames[1:]:
        ret_d_t = metranet.read_file(filename, physic_value=True)
        oflow_source_data_d_t = np.atleast_3d(ret_d_t.data)
        oflow_source_data = np.append(oflow_source_data,
                                      oflow_source_data_d_t,
                                      axis=2)

    oflow_source_data = np.moveaxis(oflow_source_data, 2, 0)
    #oflow_source_data_masked = np.ma.masked_invalid(oflow_source_data)
    #oflow_source_data_masked = np.ma.masked_where(oflow_source_data_masked==0,oflow_source_data_masked)

    ## convert linear rainrates to logarithimc dBR units
    if cfg_set["oflow_source"] == "RZC":
        if cfg_set["R_thresh_meth"] == "fix":
            R_tresh = cfg_set["R_threshold"]
        elif cfg_set["R_thresh_meth"] == "perc":
            R_tresh = np.min([
                np.nanpercentile(oflow_source_data[0, :, :],
                                 cfg_set["R_threshold"]),
                np.nanpercentile(oflow_source_data[1, :, :],
                                 cfg_set["R_threshold"])
            ])
        else:
            raise ValueError(
                "R_thresh_meth must either be set to 'fix' or 'perc'")

        dBR, dBRmin = st.utils.mmhr2dBR(oflow_source_data, R_tresh)
        dBR[~np.isfinite(dBR)] = dBRmin
        #R_thresh = cfg_set["R_threshold"]

        ## In case threshold is not exceeded, lower R_threshold by 20%
        while (dBR == dBRmin).all():
            print(
                "   *** Warning: Threshold not exceeded, lower R_threshold by 20% to "
                + str(R_thresh * 0.8) + " ***")
            R_thresh = R_thresh * 0.8
            dBR, dBRmin = st.utils.mmhr2dBR(oflow_source_data, R_thresh)
            dBR[~np.isfinite(dBR)] = dBRmin
    else:
        raise ValueError(
            "So far displacement array retrieval only implemented for RZC")

    ## Calculate UV field
    oflow_method = st.optflow.get_method(cfg_set["oflow_method_name"])
    UV = oflow_method(dBR, return_single_vec=True)[1]
    UV_decl = oflow_method(dBR, return_declust_vec=True)[0]
    UV_final = oflow_method(dBR)
    #plt.imshow(oflow_source_data[0,:,:])
    UV_final = np.stack([-UV_final[0, :, :], UV_final[1, :, :]])
    print(UV.shape)
    print(UV_decl.shape)

    #fig, axs = plt.subplots(1,2, figsize=(10,6.5))
    fig = plt.figure(figsize=(14, 9.5))

    from matplotlib import gridspec
    gs = gridspec.GridSpec(1,
                           2,
                           width_ratios=[1, 1, 1],
                           wspace=0.0,
                           hspace=0.0,
                           top=0.95,
                           bottom=0.05,
                           left=0.05,
                           right=0.95)

    if t0 == datetime.datetime(2015, 07, 07, 15, 00):
        xlimit = (270, 340)
        ylimit = (360, 380)
Exemple #7
0
def get_vararr_t(t_current, var, cfg_set):
    """Get CCS4 variable array at timestep t_current.

    Parameters
    ----------

    t_current : datetime object
        Current time for which to calculate displacement array.

    var : string
        Name of variable to be returned

    cfg_set : dict
        Basic variables defined in input_NOSTRADAMUS_ANN.py
    """

    source = cfg_set["source_dict"][var]
    ## Implement different reader for different variable:
    if source == "RADAR":
        filenames, timestamps = pth.path_creator(t_current, var, source,
                                                 cfg_set)
        index_timestep = np.where(
            [timestamp == t_current for timestamp in timestamps])[0][0]
        vararr = metranet.read_file(filenames[index_timestep],
                                    physic_value=True)
        #print(t_current,np.nanmax(vararr.data))
        vararr = np.moveaxis(np.atleast_3d(vararr.data), 2, 0)
        return vararr
    elif source == "THX":
        filenames, timestamps = pth.path_creator(t_current, var, source,
                                                 cfg_set)
        vararr = read_lightning_data(var, filenames, cfg_set, t_current)
        vararr = np.moveaxis(np.atleast_3d(vararr), 2, 0)  #np.moveaxis(,2,1)
        return vararr
    elif source == "COSMO_WIND":
        filename, timestamps = pth.path_creator(t_current, var, source,
                                                cfg_set)
        vararr = read_wind_nc(filename)
        plt.imshow(vararr[0, :, :, :])
        plt.show()
        sys.exit()

        return vararr
    elif source == "COSMO_CONV":
        if t_current.minute == 0:
            filename, timestamps = pth.path_creator(t_current, var, source,
                                                    cfg_set)
            vararr = read_convection_nc(filename, var, cfg_set)
        else:
            filename_h_old, timestamp_h_old = pth.path_creator(
                t_current, var, source, cfg_set)
            vararr_old = read_convection_nc(filename_h_old, var, cfg_set)
            weight_old = 1 - t_current.minute / 60.

            t_current_plus1h = t_current + datetime.timedelta(hours=1)
            filename_h_new, timestamp_h_new = pth.path_creator(
                t_current_plus1h, var, source, cfg_set)
            vararr_new = read_convection_nc(filename_h_new, var, cfg_set)
            weight_new = 1 - weight_old

            vararr = weight_old * vararr_old + weight_new * vararr_new

        ## Smooth fields if requested (DEPRICATED):
        ## COSMO fields are smoothed before reading the statistics
        return vararr
    elif source == "SEVIRI":
        filenames, timestamps = pth.path_creator(t_current, var, source,
                                                 cfg_set)
        if all(filename is None for filename in filenames):
            vararr = np.zeros(
                (1, cfg_set["xy_ext"][0], cfg_set["xy_ext"][1])) * np.nan
        else:
            vararr = read_sat_nc(filenames[0], cfg_set, var)
            vararr = np.moveaxis(np.atleast_3d(vararr), 2, 0)
        return vararr
    else:
        raise NotImplementedError("So far path_creator implemented \
            RADAR, SEVIRI, COSMO_Conv, and THX variables only")