def get_mean_number_of_hles_days(self, start_year: int, end_year: int, season_to_months: dict, hles_vname: str): result = defaultdict(dict) cache_dir = Path(self.base_folder) / "cache" cache_dir.mkdir(exist_ok=True) seasons_str = "-".join(season_to_months) cache_file = cache_dir / f"get_mean_number_of_hles_days_{start_year}-{end_year}_m{seasons_str}_{hles_vname}.bin" if cache_file.exists(): return pickle.load(cache_file.open("rb")) for season, months in season_to_months.items(): for y in range(start_year, end_year + 1): d1 = Pendulum(y, months[0], 1) d2 = d1.add(months=len(months)).subtract(seconds=1) if d2.year > end_year: continue current_period = Period(d1, d2) logger.info("calculating mean for [{}, {}]".format(current_period.start, current_period.end)) data = self.read_data_for_period(current_period, hles_vname) # calculate number of hles days data_daily = data.resample(t="1D", keep_attrs=True).mean(dim="t") result[season][y] = (data_daily.values >= 0.1).sum(axis=0) pickle.dump(result, cache_file.open("wb")) return result
def get_seasonal_maxima(self, start_year: int, end_year: int, season_to_months: dict, varname_internal: str): """ returns a dictionary {season:{year: field of maxima}} :param start_year: :param end_year: :param season_to_months: (order of months in the list of months is important, i.e. for DJF the order should be [12, 1, 2]) """ result = defaultdict(dict) for season, months in season_to_months.items(): for y in range(start_year, end_year + 1): d1 = Pendulum(y, months[0], 1) d2 = d1.add(months=len(months)).subtract(seconds=1) if d2.year > end_year: continue current_period = Period(d1, d2) ("calculating mean for [{}, {}]".format(current_period.start, current_period.end)) data = self.read_data_for_period(current_period, varname_internal) if varname_internal == LAKE_ICE_FRACTION: result[season][y] = np.ma.masked_where(data.values > 1, data.values).max(axis=0) else: result[season][y] = data.max(dim="t").values return result
def iter_date(start_date: pendulum.datetime, end_date: pendulum.datetime, chunk_size=59): if end_date < start_date: raise ValueError("start_date:%s should not large than end_date:%s", start_date, end_date) while start_date <= end_date: new_end_date = min(start_date.add(days=chunk_size), end_date) yield start_date, new_end_date start_date = new_end_date.add(days=1)
def iter_date( start_date: pendulum.datetime, end_date: pendulum.datetime, chunk_size=59 ): if end_date < start_date: raise ValueError( "start_date:%s should not large than end_date:%s", start_date, end_date ) while start_date <= end_date: new_end_date = min(start_date.add(days=chunk_size), end_date) yield start_date, new_end_date start_date = new_end_date.add(days=1)
def get_seasonal_means(self, start_year: int, end_year: int, season_to_months: dict, varname_internal: str): """ returns a dictionary {season:{year: mean_field}} :param start_year: :param end_year: :param season_to_months: (order of months in the list of months is important, i.e. for DJF the order should be [12, 1, 2]) """ result = defaultdict(dict) cache_dir = Path(self.base_folder) / "cache" cache_dir.mkdir(exist_ok=True) seasons_str = "-".join(season_to_months) cache_file = cache_dir / f"get_seasonal_means_{start_year}-{end_year}_m{seasons_str}_{varname_internal}.bin" if cache_file.exists(): return pickle.load(cache_file.open("rb")) for season, months in season_to_months.items(): for y in range(start_year, end_year + 1): d1 = Pendulum(y, months[0], 1) d2 = d1.add(months=len(months)).subtract(seconds=1) if d2.year > end_year: continue current_period = Period(d1, d2) logger.info("calculating mean for [{}, {}]".format(current_period.start, current_period.end)) data = self.read_data_for_period(current_period, varname_internal) result[season][y] = data.mean(dim="t").values pickle.dump(result, cache_file.open("wb")) return result
def get_mean_number_of_cao_days(self, start_year: int, end_year: int, season_to_months: dict, temperature_vname: str, min_cao_width_cells=5): """ calculate mean number of CAO days for each season and year {season: {year: field}} Calculation following Wheeler et al 2011 :param self: :param start_year: :param end_year: :param season_to_months: :param temperature_vname: """ season_to_year_to_std = defaultdict(dict) season_to_year_to_data = defaultdict(dict) season_to_year_to_rolling_mean = defaultdict(dict) season_to_year_to_n_cao_days = defaultdict(dict) cache_dir = Path(self.base_folder) / "cache" cache_dir.mkdir(exist_ok=True) seasons_str = "-".join(season_to_months) cache_file = cache_dir / f"get_mean_number_of_cao_days_{start_year}-{end_year}_m{seasons_str}_{temperature_vname}.bin" if cache_file.exists() and False: return pickle.load(cache_file.open("rb")) for season, months in season_to_months.items(): for y in range(start_year, end_year + 1): d1 = Pendulum(y, months[0], 1) d2 = d1.add(months=len(months)).subtract(seconds=1) if d2.year > end_year: continue current_period = Period(d1, d2) logger.info("calculating mean for [{}, {}]".format(current_period.start, current_period.end)) data = self.read_data_for_period(current_period, temperature_vname) # calculate daily means data_daily = data.resample(t="1D", keep_attrs=True).mean(dim="t").dropna(dim="t") assert isinstance(data_daily, xarray.DataArray) # save the data for reuse below season_to_year_to_data[season][y] = data_daily.values season_to_year_to_std[season][y] = data_daily.std(dim="t").values season_to_year_to_rolling_mean[season][y] = data_daily.rolling(center=True, t=31).mean(dim="t").values # Calculate climatological std and rolling mean season_to_std_clim = { s: np.mean([f for f in y_to_std.values()], axis=0) for s, y_to_std in season_to_year_to_std.items() } season_to_rolling_clim = { s: np.mean([f for f in y_to_rolling.values()], axis=0) for s, y_to_rolling in season_to_year_to_rolling_mean.items() } # calculate number of CAO days for season, std_clim in season_to_std_clim.items(): for y in range(start_year, end_year + 1): t31_rolling = season_to_rolling_clim[season] cao_suspect = (np.array(season_to_year_to_data[season][y]) <= t31_rolling - 1.5 * std_clim) & (std_clim > 2) n_cao_days = cao_suspect.sum(axis=0) season_to_year_to_n_cao_days[season][y] = n_cao_days pickle.dump(season_to_year_to_n_cao_days, cache_file.open("wb")) return season_to_year_to_n_cao_days
def _get_period_for_ym(year, month): start = Pendulum(year, month, 1) end = start.add(months=1).subtract(microseconds=1) return Period(start, end)