def apply_smoother(varalias,vardict,output_dates=None,method=None,date_incr=None,**kwargs): """ Applies a smoother to the data **kwargs includes method specific input for chosen method Methods are: blockMean GP GAM Lanczos ... Caution: for some smoothers much more of time series has to be included. """ print('Apply smoother') print('Smooth data using method:',method) stdvarname = variable_info[varalias]['standard_name'] newdict = deepcopy(vardict) # determine the output grid if (isinstance(date_incr,int) and output_dates is None): # increments are in #hours # create output grid --> list of time stamps depending on choice sd = vardict['datetime'][0] ed = vardict['datetime'][-1] #steps = int((ed-sd).total_seconds()/(date_incr*60*60))+1 steps = int((ed-sd).total_seconds()/(60*60))+1 tmpd = sd output_dates = [tmpd + timedelta(hours=i) \ for i in range(0,steps,date_incr) \ if (tmpd + timedelta(hours=i) <= ed)] del tmpd elif output_dates is None: # original datetimes are used output_dates = vardict['datetime'] output_grid = netCDF4.date2num(output_dates,\ units=vardict['time_unit']) # align datetime and output_grid/output_dates idx = collocate_times(vardict['datetime'],output_dates) idx_output_dates = collocate_times(output_dates, list(np.array(vardict['datetime'])[idx])) output_dates = list(np.array(output_dates)[idx_output_dates]) output_grid = list(np.array(output_grid)[idx_output_dates]) # do smoothing smoothed_ts = smoothing(varalias,newdict,output_grid,\ output_dates,method=method,\ date_incr=date_incr,\ **kwargs) newdict[stdvarname] = list(smoothed_ts) for key in newdict: if (key != stdvarname and key != 'time_unit' and key != 'meta'): newdict[key] = list(np.array(newdict[key])[idx]) return newdict
def validate_collocated_values(dtime, obs, mods, **kwargs): target_t, sdate, edate, twin = None, None, None, None if ('col_obj' in kwargs.keys() and kwargs['col_obj'] is not None): mods = col_obj.vars['model_values'] obs = col_obj.vars['obs_values'] dtime = col_obj.vars['datetime'] # get idx for date and twin if 'target_t' in kwargs.keys(): target_t = kwargs['target_t'] if 'sdate' in kwargs.keys(): sdate = kwargs['sdate'] if 'edate' in kwargs.keys(): edate = kwargs['edate'] if 'twin' in kwargs.keys(): twin = kwargs['twin'] idx = collocate_times(dtime, target_t=target_t, sdate=sdate, edate=edate, twin=twin) mods = np.array(mods)[idx] obs = np.array(obs)[idx] results_dict = {'model_values': mods, 'obs_values': obs} # validate from wavy.validationmod import validate, disp_validation validation_dict = validate(results_dict) disp_validation(validation_dict) return validation_dict
def collocate_field(mc_obj=None, obs_obj=None, col_obj=None, distlim=None, datein=None, model_lats=None, model_lons=None, model_vals=None): """ Some info """ if mc_obj is not None: datein = netCDF4.num2date(mc_obj.vars['time'], mc_obj.vars['time_unit']) model_lats = mc_obj.vars['latitude'] model_lons = mc_obj.vars['longitude'] model_vals = mc_obj.vars[mc_obj.stdvarname] dtime = netCDF4.num2date(obs_obj.vars['time'], obs_obj.vars['time_unit']) if isinstance(dtime, np.ndarray): dtime = list(dtime) if isinstance(datein, np.ndarray): datein = list(datein) if isinstance(datein, datetime): datein = [datein] cidx = collocate_times(dtime, target_t=datein, twin=obs_obj.twin) obs_time_dt = np.array(dtime)[cidx] obs_time_dt = [ datetime(t.year, t.month, t.day, t.hour, t.minute, t.second) for t in obs_time_dt ] datein = [ datetime(t.year, t.month, t.day, t.hour, t.minute, t.second) for t in datein ] obs_time = np.array(obs_obj.vars['time'])[cidx] obs_time_unit = obs_obj.vars['time_unit'] # Compare wave heights of satellite with model with # constraint on distance and time frame # 1. time constraint obs_lats = np.array(obs_obj.vars['latitude'])[cidx] obs_lons = np.array(obs_obj.vars['longitude'])[cidx] obs_vals = np.array(obs_obj.vars[obs_obj.stdvarname])[cidx] if distlim == None: distlim = 6 if (col_obj is None): print("No collocation idx available") print(len(obs_time_dt), "footprints to be collocated") print ("Perform collocation with distance limit\n",\ "distlim:",distlim) index_array_2d, distance_array, _ =\ collocation_fct( obs_lons, obs_lats, model_lons, model_lats) # caution: index_array_2d is tuple # impose distlim dist_idx = np.where( (distance_array<distlim*1000)&\ (~np.isnan(\ model_vals[index_array_2d[0],\ index_array_2d[1]])) )[0] idx_x = index_array_2d[0][dist_idx] idx_y = index_array_2d[1][dist_idx] results_dict = { 'valid_date':datein, 'time':list(obs_time[dist_idx]), 'time_unit':obs_time_unit, 'datetime':list(np.array(obs_time_dt)[dist_idx]), 'distance':list(distance_array[dist_idx]), 'model_values':list(model_vals[idx_x,\ idx_y]), 'model_lons':list(model_lons[idx_x,\ idx_y]), 'model_lats':list(model_lats[idx_x,\ idx_y]), 'obs_values':list(obs_vals[dist_idx]), 'obs_lons':list(obs_lons[dist_idx]), 'obs_lats':list(obs_lats[dist_idx]), 'collocation_idx_x':list(idx_x), 'collocation_idx_y':list(idx_y), } elif (col_obj is not None and \ len(col_obj.vars['collocation_idx'][0]) > 0): print("Collocation idx given through collocation_class object") results_dict = col_obj.vars results_dict['model_values'] = list(\ model_vals[\ col_obj.vars['collocation_idx_x'], col_obj.vars['collocation_idx_y'] ]) return results_dict
def collocate_satellite_ts(obs_obj=None,model=None,distlim=None,\ leadtime=None,date_incr=None): """ Some info """ fc_date = make_fc_dates(obs_obj.sdate, obs_obj.edate, date_incr) fc_date = find_valid_fc_dates_for_model_and_leadtime(\ fc_date,model,leadtime) results_dict = { 'valid_date': [], 'time': [], 'time_unit': obs_obj.vars['time_unit'], 'datetime': [], 'distance': [], 'model_values': [], 'model_lons': [], 'model_lats': [], 'obs_values': [], 'obs_lons': [], 'obs_lats': [], 'collocation_idx_x': [], 'collocation_idx_y': [], } for i in tqdm(range(len(fc_date))): # for i in range(len(fc_date)): # for f in range(1): with NoStdStreams(): # for t in range(1): try: # filter needed obs within time period idx = collocate_times(obs_obj.vars['datetime'], target_t=[fc_date[i]], twin=obs_obj.twin) # make tmp obs_obj with filtered data obs_obj_tmp = deepcopy(obs_obj) obs_obj_tmp.vars['time'] = list(\ np.array(obs_obj.vars['time'])[idx] ) obs_obj_tmp.vars['latitude'] = list(\ np.array(obs_obj.vars['latitude'])[idx] ) obs_obj_tmp.vars['longitude'] = list(\ np.array(obs_obj.vars['longitude'])[idx] ) obs_obj_tmp.vars[obs_obj.stdvarname] = \ list(np.array(\ obs_obj.vars[obs_obj.stdvarname])[idx] ) vardict, _, _, _, _ = get_model(model=model, fc_date=fc_date[i], varalias=obs_obj.varalias, leadtime=leadtime, transform_lons=180) results_dict_tmp = collocate_field(\ datein=fc_date[i],\ model_lats=vardict['latitude'],\ model_lons=vardict['longitude'],\ model_vals=vardict[obs_obj.stdvarname],\ obs_obj=obs_obj_tmp,\ distlim=distlim ) # append to dict results_dict['valid_date'].append(fc_date[i]) results_dict['time'].append(results_dict_tmp['time']) results_dict['datetime'].append(results_dict_tmp['datetime']) results_dict['distance'].append(results_dict_tmp['distance']) results_dict['model_values'].append( results_dict_tmp['model_values']) results_dict['model_lons'].append( results_dict_tmp['model_lons']) results_dict['model_lats'].append( results_dict_tmp['model_lats']) results_dict['obs_values'].append( results_dict_tmp['obs_values']) results_dict['obs_lats'].append(results_dict_tmp['obs_lats']) results_dict['obs_lons'].append(results_dict_tmp['obs_lons']) results_dict['collocation_idx_x'].append(\ results_dict_tmp['collocation_idx_x']) results_dict['collocation_idx_y'].append(\ results_dict_tmp['collocation_idx_y']) if 'results_dict_tmp' in locals(): del results_dict_tmp except (ValueError, FileNotFoundError, OSError) as e: # ValueError, pass if no collocation # FileNotFoundError, pass if file not accessible # OSError, pass if file not accessible from thredds print(e) # flatten all aggregated entries results_dict['time'] = flatten(results_dict['time']) results_dict['datetime'] = flatten(results_dict['datetime']) results_dict['distance'] = flatten(results_dict['distance']) results_dict['model_values'] = flatten(results_dict['model_values']) results_dict['model_lons'] = flatten(results_dict['model_lons']) results_dict['model_lats'] = flatten(results_dict['model_lats']) results_dict['obs_values'] = flatten(results_dict['obs_values']) results_dict['obs_lats'] = flatten(results_dict['obs_lats']) results_dict['obs_lons'] = flatten(results_dict['obs_lons']) results_dict['collocation_idx_x'] = flatten(\ results_dict['collocation_idx_x']) results_dict['collocation_idx_y'] = flatten(\ results_dict['collocation_idx_y']) return results_dict
def collocate_station_ts(obs_obj=None,model=None,distlim=None,\ leadtime=None,date_incr=None): """ Some info """ fc_date = make_fc_dates(obs_obj.sdate, obs_obj.edate, date_incr) # get coinciding date between fc_date and dates in obs_obj idx1 = collocate_times(unfiltered_t=obs_obj.vars['datetime'], target_t=fc_date, twin=obs_obj.twin) # find valid/coinciding fc_dates if len(idx1) > len(fc_date): print('Muliple assignments within given time window') print('--> only closest to time stamp is chosen') idx_closest = get_closest_date(\ list(np.array(obs_obj.vars['datetime'])[idx1]),\ fc_date) idx1 = list(np.array(idx1)[idx_closest]) # adjust obs_obj according to valid dates for key in obs_obj.vars.keys(): if (key != 'time_unit' and key != 'meta'): obs_obj.vars[key] = list(np.array(obs_obj.vars[key])[idx1]) # adjust again assumed fc_dates by filtered obs dates fc_date = obs_obj.vars['datetime'] # find valid dates for given leadtime and model fc_date = find_valid_fc_dates_for_model_and_leadtime(\ fc_date,model,leadtime) # check if file exists and if it includes desired time # if not check next possible file check = False for d in range(len(fc_date)): check = check_if_file_is_valid(fc_date[d], model, leadtime) if check == True: break if check == True: mc_obj = model_class(model=model, fc_date=fc_date[d], leadtime=leadtime, varalias=obs_obj.varalias, transform_lons=180) col_obj = collocation_class(mc_obj_in=mc_obj, obs_obj_in=obs_obj, distlim=distlim) model_vals = [col_obj.vars['model_values'][0]] tmpdate = hour_rounder(col_obj.vars['datetime'][0]) model_datetime = [ datetime(tmpdate.year, tmpdate.month, tmpdate.day, tmpdate.hour) ] model_time = [ netCDF4.date2num(model_datetime[0], units=col_obj.vars['time_unit']) ] if check == False: print('No valid model file available!') else: print('Collocating and appending values ...') for i in tqdm(range(d + 1, len(fc_date))): with NoStdStreams(): try: check = check_if_file_is_valid(fc_date[i], model, leadtime) if check == False: raise FileNotFoundError mc_obj = model_class(model=model, fc_date=fc_date[i], leadtime=leadtime, varalias=obs_obj.varalias, transform_lons=180) model_vals.append( mc_obj.vars[\ mc_obj.stdvarname][ \ col_obj.vars['collocation_idx_x'],\ col_obj.vars['collocation_idx_y']\ ][0] ) model_time.append(mc_obj.vars['time'][0]) model_datetime.append( datetime(\ mc_obj.vars['datetime'][0].year, mc_obj.vars['datetime'][0].month, mc_obj.vars['datetime'][0].day, mc_obj.vars['datetime'][0].hour ) ) except FileNotFoundError as e: print(e) # potentially there are different number of values # for obs and model # double check and use only coherent datetimes idx2 = collocate_times(model_datetime, target_t=obs_obj.vars['datetime'], twin=obs_obj.twin) col_obj.vars['model_values'] = list(np.array(\ model_vals)[idx2]) col_obj.vars['time'] = list(np.array(model_time)\ [idx2]) col_obj.vars['datetime'] = list(np.array(\ model_datetime)[idx2]) idx3 = collocate_times( \ unfiltered_t = obs_obj.vars['datetime'], target_t = col_obj.vars['datetime'], twin = obs_obj.twin) col_obj.vars['obs_values'] = list( np.array(obs_obj.vars[obs_obj.stdvarname])[idx3]) # valid_date is meaningless for ts application and set to None col_obj.vars['valid_date'] = None # inflate length of constant sized variables col_obj.vars['distance'] = col_obj.vars['distance']*\ len(col_obj.vars['datetime']) col_obj.vars['obs_lats'] = col_obj.vars['obs_lats']*\ len(col_obj.vars['datetime']) col_obj.vars['obs_lons'] = col_obj.vars['obs_lons']*\ len(col_obj.vars['datetime']) col_obj.vars['collocation_idx_x'] = col_obj.vars['collocation_idx_x']*\ len(col_obj.vars['datetime']) col_obj.vars['collocation_idx_y'] = col_obj.vars['collocation_idx_y']*\ len(col_obj.vars['datetime']) col_obj.vars['model_lats'] = col_obj.vars['model_lats']*\ len(col_obj.vars['datetime']) col_obj.vars['model_lons'] = col_obj.vars['model_lons']*\ len(col_obj.vars['datetime']) results_dict = col_obj.vars return results_dict
def collocate_poi_ts(indict,model=None,distlim=None,\ leadtime=None,date_incr=None,varalias=None,twin=None): """ indict: mandatory - lons, lats, time, values optional - leadtime, distlim, date_incr """ # get stdvarname stdvarname = variable_info[varalias]['standard_name'] # datetime or str if isinstance(indict['time'][0], str): poi_dtimes = [parse_date(t) for t in indict['time']] elif isinstance(indict['time'][0], datetime): poi_dtimes = indict['time'] else: print('no valid time/datetime format for poi') print('use either str or datetime') fc_date = make_fc_dates(poi_dtimes[0], poi_dtimes[-1], date_incr) # get coinciding date between fc_date and dates in obs_obj idx1 = collocate_times(unfiltered_t=poi_dtimes, target_t=fc_date, twin=twin) # find valid/coinciding fc_dates if len(idx1) > len(fc_date): print('Muliple assignments within given time window') print('--> only closest to time stamp is chosen') idx_closest = get_closest_date(\ list(np.array(poi_dtimes)[idx1]),\ fc_date) idx1 = list(np.array(idx1)[idx_closest]) # adjust obs_obj according to valid dates for key in indict.keys(): if (key != 'time_unit' and key != 'meta' and key != 'nID'): indict[key] = list(np.array(indict[key])[idx1]) poi_dtimes = list(np.array(poi_dtimes)[idx1]) del idx1 # find valid dates for given leadtime and model fc_date = find_valid_fc_dates_for_model_and_leadtime(\ fc_date,model,leadtime) # adjust fc_date according to obs date idx2 = collocate_times(unfiltered_t=fc_date, target_t=poi_dtimes, twin=twin) fc_date = list(np.array(fc_date)[idx2]) del idx2 # compute time based on time unit from variable definition time_unit = variable_info['time']['units'] time = netCDF4.date2num(poi_dtimes, time_unit) # check if file exists and if it includes desired time and append model_vals = [] model_lons = [] model_lats = [] obs_vals = [] obs_lons = [] obs_lats = [] collocation_idx_x = [] collocation_idx_y = [] distance = [] time_lst = [] dtimes = [] switch = 0 for d in tqdm(range(len(fc_date))): # for t in range(1): with NoStdStreams(): check = False check = check_if_file_is_valid(fc_date[d], model, leadtime) if check == True: # retrieve model fname = make_model_filename_wrapper(model, fc_date[d], leadtime) # get hold of variable names (done only once) if switch == 0: meta = ncdumpMeta(fname) lonsname = get_filevarname('lons', variable_info, model_dict[model], meta) latsname = get_filevarname('lats', variable_info, model_dict[model], meta) timename = get_filevarname('time', variable_info, model_dict[model], meta) filevarname = get_filevarname(varalias, variable_info, model_dict[model], meta) mlons = xr.open_dataset(fname)[lonsname].values # secure lons from -180 to 180 mlons = ((mlons - 180) % 360) - 180 mlats = xr.open_dataset(fname)[latsname].values # ensure matching dimension if len(mlons.shape) == 1: Mlons, Mlats = np.meshgrid(mlons, mlats) else: Mlons, Mlats = mlons, mlats switch = 1 plon = [indict['longitude'][d]] plat = [indict['latitude'][d]] index_array_2d, distance_array, _ = \ collocation_fct(plon,plat,Mlons,Mlats) dst = xr.open_dataset(fname)[timename].values tidx = list(dst).index(np.datetime64(fc_date[d])) # impose distlim if distance_array[0] < distlim * 1000: idx_x = index_array_2d[0][0] idx_y = index_array_2d[1][0] model_lons.append(Mlons[idx_x, idx_y]) model_lats.append(Mlats[idx_x, idx_y]) vals = xr.open_dataset(fname)[filevarname]\ [tidx,idx_x,idx_y].values model_vals.append(vals.item()) obs_vals.append(indict['obs'][d]) obs_lons.append(indict['longitude'][d]) obs_lats.append(indict['latitude'][d]) collocation_idx_x.append(idx_x) collocation_idx_y.append(idx_y) distance.append(distance_array[0]) time_lst.append(time[d]) dtimes.append(poi_dtimes[d]) results_dict = { 'valid_date': dtimes, 'time': time_lst, 'time_unit': time_unit, 'datetime': dtimes, 'distance': distance, 'model_values': model_vals, 'model_lons': model_lons, 'model_lats': model_lats, 'obs_values': obs_vals, 'obs_lons': obs_lons, 'obs_lats': obs_lats, 'collocation_idx_x': collocation_idx_x, 'collocation_idx_y': collocation_idx_y } return results_dict
def get_d22_dict(**kwargs): sdate = kwargs.get('sdate') edate = kwargs.get('edate') basedate = kwargs.get('basedate') nID = kwargs.get('nID') sensor = kwargs.get('sensor') varalias = kwargs.get('varalias') pathlst = kwargs.get('pathlst') strsublst = kwargs.get('strsublst') dict_for_sub = kwargs.get('dict_for_sub') stdvarname = variable_info[varalias]['standard_name'] var, time, timedt = \ get_d22_ts(sdate,edate,basedate,nID,sensor,varalias,\ pathlst,strsublst,dict_for_sub) if 'twin' in insitu_dict[nID]: idxtmp = collocate_times(unfiltered_t=timedt,\ sdate=sdate,edate=edate, twin=insitu_dict[nID]['twin']) else: # default to allow for a 1 min variation idxtmp = collocate_times(unfiltered_t=timedt,\ sdate=sdate,edate=edate, twin=1) # convert to list for consistency with other classes # and make sure that only steps with existing obs are included time = [time[i] for i in idxtmp if i < len(var)] timedt = [timedt[i] for i in idxtmp if i < len(var)] var = [np.real(var[i]) for i in idxtmp if i < len(var)] # rm double entries due to 10min spacing if ('unique' in kwargs.keys() and kwargs['unique'] is True): # delete 10,30,50 min times, keep 00,20,40 # 1. create artificial time vector for collocation tmpdate = deepcopy(sdate) tmpdatelst = [] while tmpdate<edate: tmpdatelst.append(tmpdate) tmpdate += timedelta(minutes=20) # 2. collocate times if 'twin' in insitu_dict[nID]: idxtmp = collocate_times(\ unfiltered_t=timedt, target_t=tmpdatelst, twin=insitu_dict[nID]['twin']) else: idxtmp = collocate_times(unfiltered_t=timedt,\ target_t=tmpdatelst, twin=1) time = list(np.array(time)[idxtmp]) timedt = list(np.array(timedt)[idxtmp]) var = list(np.array(var)[idxtmp]) lons = [insitu_dict[nID]['coords'][sensor]['lon']]\ *len(var) lats = [insitu_dict[nID]['coords'][sensor]['lat']]\ *len(var) vardict = { stdvarname:var, 'time':time, 'datetime':timedt, 'time_unit':variable_info['time']['units'], 'longitude':lons, 'latitude':lats } return vardict