def __init__(self, config_file='../config.toml'): with open(config_file) as config_file: self.config = toml.loads(config_file.read()) self.config['time']['begin_dt'] = datetime.datetime.strptime( self.config['time']['begin'], '%Y-%m-%d_%H') self.config['time']['end_dt'] = datetime.datetime.strptime( self.config['time']['end'], '%Y-%m-%d_%H') print('config', self.config) self.dt_list = trace_source.time_list(self.config['time']['begin_dt'], self.config['time']['end_dt'], self.config['time']['step']) print('dt_list', self.dt_list) self.height_list = list( range(500, self.config['height']['top'] + 1, 500))
def assemble(self, dt_range=None): """ assemble the statistics for a range of trajectories and save the statistics to dicts Args: dt_range (list(datetime), optional): timerange for that the statistics is assembled, default taken from config """ if dt_range is not None: self.dt_list = trace_source.time_list(dt_range[0], dt_range[1], self.config['time']['step']) # only for the testcase traj_dir = self.config['traj_dir'] files = os.listdir(traj_dir) # filter only for the trajectory files with tdump extension files = [f for f in files if f[-6:] == '.tdump'] # the defaultdict is used here to sort the files by datetime within a dictionary filtered_files = defaultdict(list) for f in files: # regex the yyyymmdd-hh timestamp in the filename dt = datetime.datetime.strptime( re.search('([0-9]{8})-([0-9]){2}', f).group(0), '%Y%m%d-%H') height = float( re.search('([0-9]{3,6})(?=_0[0-9-]{1,4}.tdump)', f).group(0)) #print(f, dt, height) if dt >= self.dt_list[0] and dt <= self.dt_list[-1]: filtered_files[dt].append((f, height)) # here an empty dict is generated with a zero containing array self.stat2d_dict = defaultdict(lambda: np.zeros( (len(self.dt_list), len(self.height_list)))) self.statls_dict = defaultdict(lambda: np.zeros( (len(self.dt_list), len(self.height_list), 7))) self.raw_dict = defaultdict(lambda: np.zeros( (len(self.dt_list), len(self.height_list), abs(self.config['time']['tr_duration']) + 1))) # TODO make more than 7 geo names possible ng = trace_source.land_sfc.named_geography(self.config['geonames']) self.geo_names = ng.geo_names no_geo_names = len(list(self.geo_names.keys())) self.statgn_dict = defaultdict(lambda: np.zeros( (len(self.dt_list), len(self.height_list), no_geo_names))) ls = trace_source.land_sfc.land_sfc() self.ls_categories = ls.categories for it, dt in enumerate(self.dt_list[:]): print(dt) # sort by height f_list = sorted(filtered_files[dt], key=lambda x: x[1]) print('file_list ', f_list) #assert len(f_list) > 1 for ih, f in enumerate(f_list): print(it, ih, f[1], dt) traj = trajectory(self.config) traj.load_file(traj_dir + f[0], silent=True) savepath = '{}/{}'.format(self.config['plot_dir'], dt.strftime('%Y%m%d')) if "timeinterval" in self.config['plotmap']: timeinterval = self.config['plotmap']['timeinterval'] else: timeinterval = 12 if "heights" in self.config['plotmap']: heightlist = self.config['plotmap']['heights'] else: heightlist = [1500.0, 3000.0, 4500.0] #if f[1] == 3000.0 and dt.hour % 12 == 0: if f[1] in heightlist and dt.hour % timeinterval == 0: print("plotting ", f[1], dt.hour) plot_trajectories_ens(traj, savepath, ls=ls, config=self.config) #continue traj.evaluate(silent=True) traj.add_land_sfc(ls, silent=True) traj.add_ensemble_land_sfc(ls) traj.add_ensemble_geo_names(ng) #traj.add_area_land_sfc('md', ls, silent=True) #traj.add_area_land_sfc(2000, ls, silent=True) #print("at step", it, dt, ih, f) #print('keys ', traj.statistics.keys()) # now the empty dict is filled with the keys (and values) of the statistics dict from traj for k in list(traj.statistics.keys()): self.stat2d_dict[k][it, ih] = traj.statistics[k] # subset of trajectory data to collect param_collect = [ 'latitude', 'longitude', 'height', "PRESSURE", "AIR_TEMP", "RAINFALL", "RELHUMID", "TERR_MSL", 'age' ] if 'land_sfc_category' in list(traj.data.keys()): param_collect.append('land_sfc_category') for k in param_collect: #self.raw_dict[k][it, ih, :traj.data[1][k].shape[0]] = traj.data[1][k] self.raw_dict[k][it, ih, :] = traj.data[1][k] #self.raw_dict[k][it, ih, traj.data[1][k].shape[0]:] = -999. for k in list(traj.stat_ls.keys()): self.stat2d_dict[k + '_no_below'][ it, ih] = traj.stat_ls[k].no_below print('stat ls ', k, traj.stat_ls[k]) self.statls_dict[k][it, ih] = list( traj.stat_ls[k].counter.values()) for k in list(traj.stat_gn.keys()): self.stat2d_dict[k + '_no_below'][ it, ih] = traj.stat_gn[k].no_below print('stat gn ', k, traj.stat_gn[k]) self.statgn_dict[k][it, ih] = list( traj.stat_gn[k].counter.values()) # trying to free memory del ls del ng
def assemble(self, dt_range=None): """ assemble the statistics for a range of trajectories and save the statistics to dicts Args: dt_range (list(datetime), optional): timerange for that the statistics is assembled, default taken from config """ if dt_range is not None: self.dt_list = trace_source.time_list(dt_range[0], dt_range[1], self.config['time']['step']) # only for the testcase traj_dir = self.config['partposit_dir'] days_avail = os.listdir(traj_dir) # filter only for the trajectory files with tdump extension days_avail = [f for f in days_avail if len(f) == 11] print(days_avail) folders = [ f for f in days_avail if datetime.datetime.strptime(f, "%Y%m%d_%H") in self.dt_list ] assert len(folders) > 0, 'no folders with flexpart partposit data' # the defaultdict is used here to sort the files by datetime within a dictionary # filtered_files = defaultdict(list) # for f in files: # # regex the yyyymmdd-hh timestamp in the filename # dt = datetime.datetime.strptime(re.search('([0-9]{8})-([0-9]){2}', f).group(0), '%Y%m%d-%H') # height = float(re.search('([0-9]{3,6})(?=_0[0-9-]{1,4}.tdump)', f).group(0)) # #print(f, dt, height) # if dt >= self.dt_list[0] and dt <= self.dt_list[-1]: # filtered_files[dt].append((f,height)) # here an empty dict is generated with a zero containing array self.stat2d_dict = defaultdict(lambda: np.zeros( (len(self.dt_list), len(self.height_list)))) self.statls_dict = defaultdict(lambda: np.zeros( (len(self.dt_list), len(self.height_list), 7))) self.raw_dict = defaultdict(lambda: np.zeros( (len(self.dt_list), len(self.height_list), abs(self.config['time']['tr_duration']) + 1))) # TODO make more than 7 geo names possible ng = trace_source.land_sfc.named_geography(self.config['geonames']) self.geo_names = ng.geo_names no_geo_names = len(list(self.geo_names.keys())) self.statgn_dict = defaultdict(lambda: np.zeros( (len(self.dt_list), len(self.height_list), no_geo_names))) self.lat_names = { 0: '<-60', 1: '-60..-30', 2: '-30..0', 3: '0..30', 4: '30..60', 5: '>60' } self.statlat_dict = defaultdict(lambda: np.zeros( (len(self.dt_list), len(self.height_list), len(list(self.lat_names.keys()))))) ls = trace_source.land_sfc.land_sfc() self.ls_categories = ls.categories for it, dt in enumerate(self.dt_list[:]): print('trajectories eding at ', dt) files_for_time = os.listdir(traj_dir + dt.strftime("%Y%m%d_%H")) files_for_time = sorted( [f for f in files_for_time if "partposit_" in f]) folder = traj_dir + dt.strftime("%Y%m%d_%H") + "/" print('files_for_time ', files_for_time) print('heights ', len(self.height_list), self.height_list) flex_stat = [ flex_statistics(self.config, ls=ls, ng=ng) for h in self.height_list ] traj_meta = read_flexpart_traj_meta(folder + "trajectories.txt") self.no_part.append(traj_meta['releases_meta'][1]['no_particles']) self.time_res.append(10 * 24 / len(files_for_time)) # different structure than hysplit # 1. loop through the ending times of the current day # 2. load partposit for a specified time # 3. loop through heights for f in files_for_time: print('files_for_time ', f) part_pos = read_partpositions(folder + f, 1, ctable=True) part_pos = np.array(part_pos) for ih, h in enumerate(self.height_list): #print("at ", ih, h) this_population = np.where(part_pos[:, 0] == ih + 1)[0] #release_sel = np.array([list(p) for p in part_pos if p[0]==ih+1]) release_sel = part_pos[this_population, :] #assert np.all(release_sel == other_release) meta = traj_meta['releases_meta'][ih + 1] #print(meta) assert np.mean(meta['heights'] ) == h, f"{meta['heights']} {h} do not fit" flex_stat[ih].add_partposits_gn(release_sel) flex_stat[ih].add_partposits_ls(release_sel) flex_stat[ih].add_partposits_thres(release_sel) # now assemble the statistics for all heights for ih, h in enumerate(self.height_list): flex_stat[ih].calc_gn_stat() for k in list(flex_stat[ih].stat_gn.keys()): self.stat2d_dict[k + '_no_below'][ it, ih] = flex_stat[ih].stat_gn[k].no_below print('stat gn ', h, k, flex_stat[ih].stat_gn[k]) self.statgn_dict[k][it, ih] = list( flex_stat[ih].stat_gn[k].counter.values()) flex_stat[ih].calc_ls_stat() for k in list(flex_stat[ih].stat_ls.keys()): self.stat2d_dict[k + '_no_below'][ it, ih] = flex_stat[ih].stat_ls[k].no_below print('stat ls ', h, k, flex_stat[ih].stat_ls[k]) self.statls_dict[k][it, ih] = list( flex_stat[ih].stat_ls[k].counter.values()) flex_stat[ih].calc_thres_stat() for k in list(flex_stat[ih].stat_lat.keys()): self.stat2d_dict[k + '_no_below'][ it, ih] = flex_stat[ih].stat_lat[k].no_below print('stat_lat ', h, k, flex_stat[ih].stat_lat[k]) self.statlat_dict[k][it, ih] = list( flex_stat[ih].stat_lat[k].counter.values()) # #assert len(f_list) > 1 # for ih, f in enumerate(f_list): # print(it, ih, f[1], dt) # traj = trajectory(self.config) # traj.load_file(traj_dir+f[0], silent=True) # savepath = '{}/{}'.format(self.config['plot_dir'], dt.strftime('%Y%m%d')) # if "timeinterval" in self.config['plotmap']: # timeinterval = self.config['plotmap']['timeinterval'] # else: # timeinterval = 12 # if "heights" in self.config['plotmap']: # heightlist = self.config['plotmap']['heights'] # else: # heightlist = [1500.0, 3000.0, 4500.0] # #if f[1] == 3000.0 and dt.hour % 12 == 0: # if f[1] in heightlist and dt.hour % timeinterval == 0: # print("plotting ", f[1], dt.hour) # plot_trajectories_ens(traj, savepath, ls=ls, config=self.config) # #continue # traj.evaluate(silent=True) # traj.add_land_sfc(ls, silent=True) # traj.add_ensemble_land_sfc(ls) # traj.add_ensemble_geo_names(ng) # #traj.add_area_land_sfc('md', ls, silent=True) # #traj.add_area_land_sfc(2000, ls, silent=True) # #print("at step", it, dt, ih, f) # #print('keys ', traj.statistics.keys()) # # now the empty dict is filled with the keys (and values) of the statistics dict from traj # for k in list(traj.statistics.keys()): # self.stat2d_dict[k][it, ih] = traj.statistics[k] # # subset of trajectory data to collect # param_collect = ['latitude', 'longitude', 'height', "PRESSURE", "AIR_TEMP", # "RAINFALL", "RELHUMID", "TERR_MSL", 'age'] # if 'land_sfc_category' in list(traj.data.keys()): # param_collect.append('land_sfc_category') # for k in param_collect: # #self.raw_dict[k][it, ih, :traj.data[1][k].shape[0]] = traj.data[1][k] # self.raw_dict[k][it, ih, :] = traj.data[1][k] # #self.raw_dict[k][it, ih, traj.data[1][k].shape[0]:] = -999. # for k in list(traj.stat_ls.keys()): # self.stat2d_dict[k+'_no_below'][it, ih] = traj.stat_ls[k].no_below # print('stat ls ', k, traj.stat_ls[k]) # self.statls_dict[k][it, ih] = list(traj.stat_ls[k].counter.values()) # for k in list(traj.stat_gn.keys()): # self.stat2d_dict[k+'_no_below'][it, ih] = traj.stat_gn[k].no_below # print('stat gn ', k, traj.stat_gn[k]) # self.statgn_dict[k][it, ih] = list(traj.stat_gn[k].counter.values()) # trying to free memory del ls del ng
dt_end = datetime.datetime.strptime(args.daterange.split('-')[1], '%Y%m%d') + datetime.timedelta(hours=23) station = args.station with open('config_{}.toml'.format(args.station)) as config_file: config = toml.loads(config_file.read()) trajdir = config['traj_dir'] files = os.listdir(trajdir) files = [f for f in files if f[-6:] == '.tdump'] filtered_files = defaultdict(list) #dt_begin = datetime.datetime(2015, 11, 25) #dt_end = datetime.datetime(2015, 12, 1) dt_list = trace_source.time_list(dt_begin, dt_end, 3) for f in files[:]: dt = datetime.datetime.strptime( re.search('([0-9]{8})-([0-9]){2}', f).group(0), '%Y%m%d-%H') height = float( re.search('([0-9]{3,6})(?=_0[0-9-]{1,4}.tdump)', f).group(0)) size = os.stat(trajdir + '/' + f).st_size filtered_files[dt].append((f, height, size)) #print(f, height, size, dt) days_missing = set(dt_list) - set(filtered_files.keys()) print('days missing completely') [print(dt) for dt in sorted(list(days_missing))] for dt in sorted(list(filtered_files.keys())):