Example #1
0
    def __init__(self, config_file='../config.toml'):
        with open(config_file) as config_file:
            self.config = toml.loads(config_file.read())

        self.config['time']['begin_dt'] = datetime.datetime.strptime(
            self.config['time']['begin'], '%Y-%m-%d_%H')
        self.config['time']['end_dt'] = datetime.datetime.strptime(
            self.config['time']['end'], '%Y-%m-%d_%H')
        print('config', self.config)
        self.dt_list = trace_source.time_list(self.config['time']['begin_dt'],
                                              self.config['time']['end_dt'],
                                              self.config['time']['step'])
        print('dt_list', self.dt_list)
        self.height_list = list(
            range(500, self.config['height']['top'] + 1, 500))
Example #2
0
    def assemble(self, dt_range=None):
        """
        assemble the statistics for a range of trajectories and
        save the statistics to dicts
        
        Args:
            dt_range (list(datetime), optional): timerange for that the statistics is assembled,
                default taken from config 

        """
        if dt_range is not None:
            self.dt_list = trace_source.time_list(dt_range[0], dt_range[1],
                                                  self.config['time']['step'])

        # only for the testcase
        traj_dir = self.config['traj_dir']
        files = os.listdir(traj_dir)
        # filter only for the trajectory files with tdump extension
        files = [f for f in files if f[-6:] == '.tdump']

        # the defaultdict is used here to sort the files by datetime within a dictionary
        filtered_files = defaultdict(list)
        for f in files:
            # regex the yyyymmdd-hh timestamp in the filename
            dt = datetime.datetime.strptime(
                re.search('([0-9]{8})-([0-9]){2}', f).group(0), '%Y%m%d-%H')
            height = float(
                re.search('([0-9]{3,6})(?=_0[0-9-]{1,4}.tdump)', f).group(0))
            #print(f, dt, height)
            if dt >= self.dt_list[0] and dt <= self.dt_list[-1]:
                filtered_files[dt].append((f, height))

        # here an empty dict is generated with a zero containing array
        self.stat2d_dict = defaultdict(lambda: np.zeros(
            (len(self.dt_list), len(self.height_list))))
        self.statls_dict = defaultdict(lambda: np.zeros(
            (len(self.dt_list), len(self.height_list), 7)))

        self.raw_dict = defaultdict(lambda: np.zeros(
            (len(self.dt_list), len(self.height_list),
             abs(self.config['time']['tr_duration']) + 1)))

        # TODO make more than 7 geo names possible
        ng = trace_source.land_sfc.named_geography(self.config['geonames'])
        self.geo_names = ng.geo_names
        no_geo_names = len(list(self.geo_names.keys()))
        self.statgn_dict = defaultdict(lambda: np.zeros(
            (len(self.dt_list), len(self.height_list), no_geo_names)))

        ls = trace_source.land_sfc.land_sfc()
        self.ls_categories = ls.categories

        for it, dt in enumerate(self.dt_list[:]):
            print(dt)
            # sort by height
            f_list = sorted(filtered_files[dt], key=lambda x: x[1])
            print('file_list ', f_list)
            #assert len(f_list) > 1
            for ih, f in enumerate(f_list):
                print(it, ih, f[1], dt)
                traj = trajectory(self.config)
                traj.load_file(traj_dir + f[0], silent=True)
                savepath = '{}/{}'.format(self.config['plot_dir'],
                                          dt.strftime('%Y%m%d'))

                if "timeinterval" in self.config['plotmap']:
                    timeinterval = self.config['plotmap']['timeinterval']
                else:
                    timeinterval = 12
                if "heights" in self.config['plotmap']:
                    heightlist = self.config['plotmap']['heights']
                else:
                    heightlist = [1500.0, 3000.0, 4500.0]
                #if f[1] == 3000.0 and dt.hour % 12 == 0:
                if f[1] in heightlist and dt.hour % timeinterval == 0:
                    print("plotting ", f[1], dt.hour)
                    plot_trajectories_ens(traj,
                                          savepath,
                                          ls=ls,
                                          config=self.config)
                #continue

                traj.evaluate(silent=True)
                traj.add_land_sfc(ls, silent=True)
                traj.add_ensemble_land_sfc(ls)
                traj.add_ensemble_geo_names(ng)
                #traj.add_area_land_sfc('md', ls, silent=True)
                #traj.add_area_land_sfc(2000, ls, silent=True)

                #print("at step", it, dt, ih, f)
                #print('keys ', traj.statistics.keys())
                # now the empty dict is filled with the keys (and values) of the statistics dict from traj
                for k in list(traj.statistics.keys()):
                    self.stat2d_dict[k][it, ih] = traj.statistics[k]
                # subset of trajectory data to collect
                param_collect = [
                    'latitude', 'longitude', 'height', "PRESSURE", "AIR_TEMP",
                    "RAINFALL", "RELHUMID", "TERR_MSL", 'age'
                ]
                if 'land_sfc_category' in list(traj.data.keys()):
                    param_collect.append('land_sfc_category')
                for k in param_collect:
                    #self.raw_dict[k][it, ih, :traj.data[1][k].shape[0]] = traj.data[1][k]
                    self.raw_dict[k][it, ih, :] = traj.data[1][k]
                    #self.raw_dict[k][it, ih, traj.data[1][k].shape[0]:] = -999.

                for k in list(traj.stat_ls.keys()):
                    self.stat2d_dict[k + '_no_below'][
                        it, ih] = traj.stat_ls[k].no_below
                    print('stat ls ', k, traj.stat_ls[k])
                    self.statls_dict[k][it, ih] = list(
                        traj.stat_ls[k].counter.values())

                for k in list(traj.stat_gn.keys()):
                    self.stat2d_dict[k + '_no_below'][
                        it, ih] = traj.stat_gn[k].no_below
                    print('stat gn ', k, traj.stat_gn[k])
                    self.statgn_dict[k][it, ih] = list(
                        traj.stat_gn[k].counter.values())

        # trying to free memory
        del ls
        del ng
Example #3
0
    def assemble(self, dt_range=None):
        """
        assemble the statistics for a range of trajectories and
        save the statistics to dicts
        
        Args:
            dt_range (list(datetime), optional): timerange for that the statistics is assembled,
                default taken from config 

        """
        if dt_range is not None:
            self.dt_list = trace_source.time_list(dt_range[0], dt_range[1],
                                                  self.config['time']['step'])

        # only for the testcase
        traj_dir = self.config['partposit_dir']
        days_avail = os.listdir(traj_dir)
        # filter only for the trajectory files with tdump extension
        days_avail = [f for f in days_avail if len(f) == 11]
        print(days_avail)
        folders = [
            f for f in days_avail
            if datetime.datetime.strptime(f, "%Y%m%d_%H") in self.dt_list
        ]

        assert len(folders) > 0, 'no folders with flexpart partposit data'

        # the defaultdict is used here to sort the files by datetime within a dictionary
        # filtered_files = defaultdict(list)
        # for f in files:
        #     # regex the yyyymmdd-hh timestamp in the filename
        #     dt = datetime.datetime.strptime(re.search('([0-9]{8})-([0-9]){2}', f).group(0), '%Y%m%d-%H')
        #     height = float(re.search('([0-9]{3,6})(?=_0[0-9-]{1,4}.tdump)', f).group(0))
        #     #print(f, dt, height)
        #     if dt >= self.dt_list[0] and dt <= self.dt_list[-1]:
        #         filtered_files[dt].append((f,height))

        # here an empty dict is generated with a zero containing array
        self.stat2d_dict = defaultdict(lambda: np.zeros(
            (len(self.dt_list), len(self.height_list))))

        self.statls_dict = defaultdict(lambda: np.zeros(
            (len(self.dt_list), len(self.height_list), 7)))

        self.raw_dict = defaultdict(lambda: np.zeros(
            (len(self.dt_list), len(self.height_list),
             abs(self.config['time']['tr_duration']) + 1)))

        # TODO make more than 7 geo names possible
        ng = trace_source.land_sfc.named_geography(self.config['geonames'])
        self.geo_names = ng.geo_names
        no_geo_names = len(list(self.geo_names.keys()))
        self.statgn_dict = defaultdict(lambda: np.zeros(
            (len(self.dt_list), len(self.height_list), no_geo_names)))

        self.lat_names = {
            0: '<-60',
            1: '-60..-30',
            2: '-30..0',
            3: '0..30',
            4: '30..60',
            5: '>60'
        }
        self.statlat_dict = defaultdict(lambda: np.zeros(
            (len(self.dt_list), len(self.height_list),
             len(list(self.lat_names.keys())))))

        ls = trace_source.land_sfc.land_sfc()
        self.ls_categories = ls.categories

        for it, dt in enumerate(self.dt_list[:]):
            print('trajectories eding at ', dt)
            files_for_time = os.listdir(traj_dir + dt.strftime("%Y%m%d_%H"))
            files_for_time = sorted(
                [f for f in files_for_time if "partposit_" in f])
            folder = traj_dir + dt.strftime("%Y%m%d_%H") + "/"
            print('files_for_time ', files_for_time)

            print('heights ', len(self.height_list), self.height_list)

            flex_stat = [
                flex_statistics(self.config, ls=ls, ng=ng)
                for h in self.height_list
            ]
            traj_meta = read_flexpart_traj_meta(folder + "trajectories.txt")

            self.no_part.append(traj_meta['releases_meta'][1]['no_particles'])
            self.time_res.append(10 * 24 / len(files_for_time))

            # different structure than hysplit
            # 1. loop through the ending times of the current day
            # 2. load partposit for a specified time
            # 3. loop through heights

            for f in files_for_time:
                print('files_for_time ', f)
                part_pos = read_partpositions(folder + f, 1, ctable=True)
                part_pos = np.array(part_pos)

                for ih, h in enumerate(self.height_list):
                    #print("at ", ih, h)
                    this_population = np.where(part_pos[:, 0] == ih + 1)[0]
                    #release_sel = np.array([list(p) for p in part_pos if p[0]==ih+1])
                    release_sel = part_pos[this_population, :]
                    #assert np.all(release_sel == other_release)
                    meta = traj_meta['releases_meta'][ih + 1]
                    #print(meta)
                    assert np.mean(meta['heights']
                                   ) == h, f"{meta['heights']} {h} do not fit"
                    flex_stat[ih].add_partposits_gn(release_sel)

                    flex_stat[ih].add_partposits_ls(release_sel)
                    flex_stat[ih].add_partposits_thres(release_sel)

            # now assemble the statistics for all heights
            for ih, h in enumerate(self.height_list):
                flex_stat[ih].calc_gn_stat()
                for k in list(flex_stat[ih].stat_gn.keys()):
                    self.stat2d_dict[k + '_no_below'][
                        it, ih] = flex_stat[ih].stat_gn[k].no_below
                    print('stat gn ', h, k, flex_stat[ih].stat_gn[k])
                    self.statgn_dict[k][it, ih] = list(
                        flex_stat[ih].stat_gn[k].counter.values())

                flex_stat[ih].calc_ls_stat()
                for k in list(flex_stat[ih].stat_ls.keys()):
                    self.stat2d_dict[k + '_no_below'][
                        it, ih] = flex_stat[ih].stat_ls[k].no_below
                    print('stat ls ', h, k, flex_stat[ih].stat_ls[k])
                    self.statls_dict[k][it, ih] = list(
                        flex_stat[ih].stat_ls[k].counter.values())

                flex_stat[ih].calc_thres_stat()
                for k in list(flex_stat[ih].stat_lat.keys()):
                    self.stat2d_dict[k + '_no_below'][
                        it, ih] = flex_stat[ih].stat_lat[k].no_below
                    print('stat_lat ', h, k, flex_stat[ih].stat_lat[k])
                    self.statlat_dict[k][it, ih] = list(
                        flex_stat[ih].stat_lat[k].counter.values())

            # #assert len(f_list) > 1
            # for ih, f in enumerate(f_list):
            #     print(it, ih, f[1], dt)
            #     traj = trajectory(self.config)
            #     traj.load_file(traj_dir+f[0], silent=True)
            #     savepath = '{}/{}'.format(self.config['plot_dir'], dt.strftime('%Y%m%d'))

            #     if "timeinterval" in self.config['plotmap']:
            #         timeinterval = self.config['plotmap']['timeinterval']
            #     else:
            #         timeinterval = 12
            #     if "heights" in self.config['plotmap']:
            #         heightlist = self.config['plotmap']['heights']
            #     else:
            #         heightlist = [1500.0, 3000.0, 4500.0]
            #     #if f[1] == 3000.0 and dt.hour % 12 == 0:
            #     if f[1] in heightlist and dt.hour % timeinterval == 0:
            #         print("plotting ", f[1], dt.hour)
            #         plot_trajectories_ens(traj, savepath, ls=ls, config=self.config)
            #     #continue

            #     traj.evaluate(silent=True)
            #     traj.add_land_sfc(ls, silent=True)
            #     traj.add_ensemble_land_sfc(ls)
            #     traj.add_ensemble_geo_names(ng)
            #     #traj.add_area_land_sfc('md', ls, silent=True)
            #     #traj.add_area_land_sfc(2000, ls, silent=True)

            #     #print("at step", it, dt, ih, f)
            #     #print('keys ', traj.statistics.keys())
            #     # now the empty dict is filled with the keys (and values) of the statistics dict from traj
            #     for k in list(traj.statistics.keys()):
            #         self.stat2d_dict[k][it, ih] = traj.statistics[k]
            #     # subset of trajectory data to collect
            #     param_collect = ['latitude', 'longitude', 'height', "PRESSURE", "AIR_TEMP",
            #                      "RAINFALL", "RELHUMID", "TERR_MSL", 'age']
            #     if 'land_sfc_category' in list(traj.data.keys()):
            #         param_collect.append('land_sfc_category')
            #     for k in param_collect:
            #         #self.raw_dict[k][it, ih, :traj.data[1][k].shape[0]] = traj.data[1][k]
            #         self.raw_dict[k][it, ih, :] = traj.data[1][k]
            #         #self.raw_dict[k][it, ih, traj.data[1][k].shape[0]:] = -999.

            #     for k in list(traj.stat_ls.keys()):
            #         self.stat2d_dict[k+'_no_below'][it, ih] = traj.stat_ls[k].no_below
            #         print('stat ls ', k, traj.stat_ls[k])
            #         self.statls_dict[k][it, ih] = list(traj.stat_ls[k].counter.values())

            #     for k in list(traj.stat_gn.keys()):
            #         self.stat2d_dict[k+'_no_below'][it, ih] = traj.stat_gn[k].no_below
            #         print('stat gn ', k, traj.stat_gn[k])
            #         self.statgn_dict[k][it, ih] = list(traj.stat_gn[k].counter.values())

        # trying to free memory
        del ls
        del ng
Example #4
0
dt_end = datetime.datetime.strptime(args.daterange.split('-')[1],
                                    '%Y%m%d') + datetime.timedelta(hours=23)
station = args.station

with open('config_{}.toml'.format(args.station)) as config_file:
    config = toml.loads(config_file.read())

trajdir = config['traj_dir']

files = os.listdir(trajdir)
files = [f for f in files if f[-6:] == '.tdump']
filtered_files = defaultdict(list)

#dt_begin = datetime.datetime(2015, 11, 25)
#dt_end = datetime.datetime(2015, 12, 1)
dt_list = trace_source.time_list(dt_begin, dt_end, 3)

for f in files[:]:
    dt = datetime.datetime.strptime(
        re.search('([0-9]{8})-([0-9]){2}', f).group(0), '%Y%m%d-%H')
    height = float(
        re.search('([0-9]{3,6})(?=_0[0-9-]{1,4}.tdump)', f).group(0))
    size = os.stat(trajdir + '/' + f).st_size
    filtered_files[dt].append((f, height, size))
    #print(f, height, size, dt)

days_missing = set(dt_list) - set(filtered_files.keys())
print('days missing completely')
[print(dt) for dt in sorted(list(days_missing))]

for dt in sorted(list(filtered_files.keys())):