def identify_gaps_in_files(data_req): """ Check the files in the data request to see if there appears to be any files missing from this dataset. """ one_day = 1. data_files = data_req.datafile_set.order_by('name') gap_found = False msg = '' for index, data_file in enumerate(data_files): if index == 0: continue start_time = cf_units.num2date(data_file.start_time, data_file.time_units, data_file.calendar) previous_end_time = cf_units.num2date(data_files[index - 1].end_time, data_files[index - 1].time_units, data_files[index - 1].calendar) difference = start_time - previous_end_time if difference.days > one_day: gap_found = True msg = f'{difference.days} day gap prior to {data_file.name}' else: # End of gap so report if there was a gap if gap_found: print(msg) gap_found = False if gap_found: # Gap extends to the end of files so report last message print(msg)
def _autoparse_filename_(self, fname): """ Determine if autoparsing the filename needs to be done Parameters: fname (str): The original name of the file Returns: str: The new name for the file """ if '{' in fname: possible_tvars = [] possible_inputs = list(self.inputs) if self._filedesc.autoparse_time_variable: possible_tvars.append(self._filedesc.autoparse_time_variable) possible_inputs += self._hidden_inputs else: for var in self._filedesc.variables: vdesc = self._filedesc.variables[var] if var in ('time', 'time1', 'time2', 'time3'): possible_tvars.append(var) elif vdesc.cfunits().is_time_reference() and len(vdesc.dimensions) == 1: possible_tvars.append(var) elif 'standard_name' in vdesc.attributes and vdesc.attributes['standard_name'] == 'time': possible_tvars.append(var) elif 'axis' in vdesc.attributes and vdesc.attributes['axis'] == 'T': possible_tvars.append(var) if len(possible_tvars) == 0: msg = 'Could not identify a time variable to autoparse filename {!r}'.format(fname) warn(msg, DateTimeAutoParseWarning) return fname possible_tnodes = {vnode.label:vnode for vnode in possible_inputs if vnode.label in possible_tvars} if len(possible_tnodes) == 0: raise ValueError('Time variable input missing for file {!r}'.format(fname)) tnode = possible_tnodes['time'] if 'time' in possible_tnodes else possible_tnodes.values()[0] t1 = tnode[0:1] t2 = tnode[-1:] while '{' in fname: beg = fname.find('{') end = fname.find('}', beg) if end == -1: raise ValueError( 'Filename {!r} has unbalanced special characters'.format(fname)) prefix = fname[:beg] fmtstr1, fmtstr2 = fname[beg + 1:end].split('-') suffix = fname[end + 1:] datestr1 = num2date(t1.data[0], str(t1.units), t1.units.calendar).strftime( fmtstr1).replace(' ', '0') datestr2 = num2date(t2.data[0], str(t2.units), t2.units.calendar).strftime( fmtstr2).replace(' ', '0') fname = '{}{}-{}{}'.format(prefix, datestr1, datestr2, suffix) return fname
def test_num2date_wrong_calendar(self): with self.assertRaisesRegex( ValueError, "illegal calendar or reference date" ): num2date( 1, "days since 1970-01-01", calendar="360_day", only_use_cftime_datetimes=False, only_use_python_datetimes=True, )
def __check_date_alignment_in_file__(date): """ Evaluates the dates between time steps to verify that no gaps are found in the time dimension. Parameters: date (dict): Contains file information, such as time period(t_per), time step(t_step), first/last step(t0 and tn), slice counts(cnt). """ t_step = date['t_step'] t_per = date['t_per'] t = date['time'] prev_time = t[0] # If the time period in monthly, then the time difference between slices is not uniform and # requires looking into the month length array to get correct day #'s # between slices. if t_per == 'mon': if t_per == 'mon': date1 = (parser.parse(str(cf_units.num2date(t[0], date['units'], calendar=date['calendar']))).timetuple()) if date1[1] == 12: next_val = 1 else: next_val = date1[1] + 1 for i in range(1, len(t)): new_date = (parser.parse(str(cf_units.num2date(t[i], date['units'], calendar=date['calendar']))).timetuple()) if (next_val == new_date[1]): date1 = (parser.parse(str(cf_units.num2date(t[i], date['units'], calendar=date['calendar']))).timetuple()) if date1[1] == 12: next_val = 1 else: next_val = date1[1] + 1 else: print "Disconnect? Expected: ", next_val, " Got: ", new_date[1], ' around time step: ', i return 1 # All other time periods should have the same number of days between # slices. else: for i in range(1, len(t)): if (prev_time + t_step == t[i]): # Time step looks okay prev_time = t[i] else: print "Disconnect? Expected: ", str(prev_time + t_step), " Got: ", t[i], ' around time step: ', i return 1 return 0
def __check_date_alignment__(keys, date_info): """ Evaluates the dates between files to verify that no gaps are found in the time dimension. Parameters: keys (list): A list of time slice references that are in correct time order. date_info (dict): Contains file information, such as time period(t_per), time step(t_step), first/last step(t0 and tn), slice counts(cnt). """ prev_last = date_info[keys[0]]['tn'] t_step = date_info[keys[0]]['t_step'] if date_info[keys[0]]['t_per'] == 'mon': date = (parser.parse(str(cf_units.num2date(date_info[keys[0]]['tn'], date_info[keys[0]]['units'], calendar=date_info[keys[0]]['calendar']))).timetuple()) if date[1] == 12: next_val = 1 else: next_val = date[1] + 1 else: next_val = prev_last + t_step for i in range(1, len(keys)): if date_info[keys[i]]['t_per'] == 'mon': new_date = (parser.parse(str(cf_units.num2date(date_info[keys[i]]['t0'], date_info[keys[i]]['units'], calendar=date_info[keys[i]]['calendar']))).timetuple()) if (next_val == new_date[1]): date = (parser.parse(str(cf_units.num2date(date_info[keys[i]]['tn'], date_info[keys[i]]['units'], calendar=date_info[keys[i]]['calendar']))).timetuple()) if date[1] == 12: next_val = 1 else: next_val = date[1] + 1 else: print "Disconnect? Expected: ", next_val, " Got: ", new_date[1] return 1 else: if (next_val == date_info[keys[i]]['t0']): # print "Looks # okay",date_info[keys[i]]['t0'],'-',date_info[keys[i]]['tn'] prev_last = date_info[keys[i]]['tn'] next_val = prev_last + t_step else: print "Disconnect? Expected: ", next_val, " Got: ", date_info[keys[i]]['t0'] return 1 return 0
def create_data_object(self, filenames, variable): data_dict = {} #initialise data dictionary inData = netCDF4.Dataset(filenames[0]) #open netCDF file data_dict['longitude'] = np.array( inData.variables['lon']) #extract longitudes data_dict['latitude'] = np.array( inData.variables['lat']) #extract latitudes origTimes = np.array(inData.variables['time_counter']) #extract times #Convert time to days since niceDateTime = cf_units.num2date(origTimes, 'seconds since 1999-01-01 00:00:00', 'gregorian') data_dict['time'] = cf_units.date2num( niceDateTime, 'days since 1600-01-01 00:00:00', 'gregorian') data_dict[variable] = np.array( inData.variables[variable]) #extract requested variable inData.close() #close netCDF file coords = self._create_coord_list(filenames, data_dict) return UngriddedData( data_dict[variable], Metadata(name=variable, long_name=variable, shape=(len(data_dict), ), missing_value=-999.0, units="1"), coords)
def start_time(self): std_units = Settings.get_solo().standard_time_units start_times = self.datafile_set.values_list('start_time', 'time_units', 'calendar') if not start_times: return None std_times = [ (standardise_time_unit(time, unit, std_units, cal), cal) for time, unit, cal in start_times ] none_values_removed = [(std_time, cal) for std_time, cal in std_times if std_time is not None] if not none_values_removed: return None earliest_time, calendar = min(none_values_removed, key=lambda x: x[0]) earliest_obj = cf_units.num2date(earliest_time, std_units, calendar) return earliest_obj.strftime('%Y-%m-%d')
def end_string(self): if self.end_time is not None and self.time_units and self.calendar: return safe_strftime( cf_units.num2date(self.end_time, self.time_units, self.calendar), '%Y-%m-%d') else: return None
def construct_time_string(time_point, time_units, calendar, frequency): """ Calculate the time string to the appropriate resolution for use in CMIP6 filenames according to http://goo.gl/v1drZl :param float time_point: the start time :param str time_units: the time's units :param str calendar: the time's calendar :param str frequency: the variables' frequnecy string :returns: the time point :rtype: str :raises NotImplementedError: if the frequency isn't known """ formats = { 'ann': '%Y', 'mon': '%Y%m', 'day': '%Y%m%d', '6hr': '%Y%m%d%H%M', '3hr': '%Y%m%d%H%M', '1hr': '%Y%m%d%H%M', } try: time_fmt = formats[frequency] except KeyError: msg = 'No time format known for frequency string {}'.format(frequency) raise NotImplementedError(msg) datetime_obj = cf_units.num2date(time_point, time_units, calendar) return datetime_obj.strftime(time_fmt)
def standardise_time_unit(time_float, time_unit, standard_unit, calendar): """ Standardise a floating point time in one time unit by returning the corresponding time in the `standard_unit`. The original value is returned if it is already in the `standard_unit`. None is returned if the `time_float` is None. :param float time_float: The time to change :param str time_unit: The original time's units :param str standard_unit: The new unit :param str calendar: The cftime calendar :returns: A floating point representation of the old time in `standard_unit` """ if (time_float is None or time_unit is None or standard_unit is None or calendar is None): return None if time_unit == standard_unit: return time_float date_time = cf_units.num2date(time_float, time_unit, calendar) corrected_time = cf_units.date2num(date_time, standard_unit, calendar) return corrected_time
def calc_last_day_in_month(year, month, calendar): """ Calculate the last day of the specified month using the calendar given. :param int year: The year :param int month: The month :param str calendar: The calendar to use, which must be supported by cf_units :returns: The last day of the specified month :rtype: int """ ref_units = 'days since 1969-07-21' if month == 12: start_next_month_obj = netcdftime.datetime(year + 1, 1, 1) else: start_next_month_obj = netcdftime.datetime(year, month + 1, 1) start_next_month = cf_units.date2num(start_next_month_obj, ref_units, calendar) end_this_month = cf_units.num2date(start_next_month - 1, ref_units, calendar) return end_this_month.day
def end_time(self): std_units = Settings.get_solo().standard_time_units end_times = self.obs_files.values_list('end_time', 'time_units', 'calendar') if not end_times: return None std_times = [ (standardise_time_unit(time, unit, std_units, cal), cal) for time, unit, cal in end_times ] none_values_removed = [(std_time, cal) for std_time, cal in std_times if std_time is not None] if not none_values_removed: return None latest_time, calendar = max(none_values_removed, key=lambda x: x[0]) latest_obj = cf_units.num2date(latest_time, std_units, calendar) return latest_obj
def test_fix_metadata(self): """Test fix for bad calendar.""" cube = self.fix.fix_metadata([self.cube])[0] time = cube.coord('time') dates = num2date(time.points, time.units.name, time.units.calendar) self.assertEqual(time.units.calendar, 'gregorian') self.assertEqual(dates[0].strftime('%Y%m%d%H%M'), ' 30001161200') self.assertEqual(dates[1].strftime('%Y%m%d%H%M'), '185001161200')
def test_get_cube_time_axis_in_calendar(test_generic_tas_cube, out_calendar): tcn = test_generic_tas_cube.cube.coord_dims("time")[0] ttime = test_generic_tas_cube.cube.dim_coords[tcn] expected = cf_units.num2date(ttime.points, ttime.units.name, out_calendar) result = get_scm_cube_time_axis_in_calendar(test_generic_tas_cube, out_calendar) np.testing.assert_array_equal(result, expected)
def num2date(time_value, unit, calendar): if ('common_year' in unit): my_unit = unit.replace('common_year', 'day') my_time_value = time_value * 365 else: my_unit = unit my_time_value = time_value return cf_units.num2date(my_time_value, my_unit, calendar)
def num2date(time_value, unit, calendar): ## fix for fractional time bounds if (math.floor(time_value) != time_value): time_value = int(round(time_value)) if ('common_year' in unit): my_unit = unit.replace('common_year', 'day') my_time_value = int(round(time_value)) * 365 else: my_unit = unit my_time_value = time_value return cf_units.num2date(my_time_value, my_unit, calendar)
def test_assert_all_time_axes_same(test_generic_tas_cube): tcn = test_generic_tas_cube.cube.coord_dims("time")[0] ttime = test_generic_tas_cube.cube.dim_coords[tcn] ttime_axis = cf_units.num2date(ttime.points, ttime.units.name, "gregorian") assert_all_time_axes_same([ttime_axis, ttime_axis]) otime_axis = ttime_axis - datetime.timedelta(10) error_msg = re.escape("all the time axes should be the same") with pytest.raises(AssertionError, match=error_msg): assert_all_time_axes_same([otime_axis, ttime_axis])
def get_branch_year(data_cube, control_time_units): """Get the year of the branching in control run.""" if not control_time_units: control_time_units = gio.fix_time_descriptor(data_cube.attributes['parent_time_units']) else: control_time_units = control_time_units.replace("_", " ") branch_time = data_cube.attributes['branch_time_in_parent'] branch_datetime = cf_units.num2date(branch_time, control_time_units, cf_units.CALENDAR_STANDARD) branch_year = branch_datetime.year print(f"Branch year: {branch_year}") return branch_year
def create_data_object(self, filenames, variable): data_dict = {} #initialise data dictionary inData = netCDF4.Dataset(filenames[0]) #open netCDF file data_dict['longitude'] = np.array(inData.variables['lon']) #extract longitudes data_dict['latitude'] = np.array(inData.variables['lat']) #extract latitudes origTimes = np.array(inData.variables['time_counter']) #extract times #Convert time to days since niceDateTime = cf_units.num2date(origTimes,'seconds since 1999-01-01 00:00:00', 'gregorian') data_dict['time'] = cf_units.date2num(niceDateTime,'days since 1600-01-01 00:00:00', 'gregorian') data_dict[variable] = np.array(inData.variables[variable]) #extract requested variable inData.close() #close netCDF file coords = self._create_coord_list(filenames,data_dict) return UngriddedData(data_dict[variable],Metadata(name=variable,long_name=variable,shape=(len(data_dict),),missing_value=-999.0,units="1"),coords)
def _create_coord_list(self, filenames, data=None): if data is None: data = {} #initialise data dictionary inData = netCDF4.Dataset(filenames[0]) #open netCDF file data['longitude'] = np.array( inData.variables['longitude']) #extract longitudes data['latitude'] = np.array( inData.variables['latitude']) #extract latitudes origTimes = np.array(inData.variables['time']) #extract times #convert to days since 1600-01-01 (cis col doesn't work otherwise - not sure why...): niceDateTime = cf_units.num2date(origTimes, 'days since 1990-01-01 00:00:00', 'gregorian') data['time'] = cf_units.date2num(niceDateTime, 'days since 1600-01-01 00:00:00', 'gregorian') inData.close() #close netCDF file coords = CoordList() #initialise coordinate list #Append latitudes and longitudes to coordinate list: coords.append( Coord( data['longitude'], Metadata(name="longitude", long_name='longitude', standard_name='longitude', shape=(len(data), ), missing_value=-999.0, units="degrees_east", range=(-180, 180)), "x")) coords.append( Coord( data['latitude'], Metadata(name="latitude", long_name='latitude', standard_name='latitude', shape=(len(data), ), missing_value=-999.0, units="degrees_north", range=(-90, 90)), "y")) coords.append( Coord( data['time'], Metadata(name="time", long_name='time', standard_name='time', shape=(len(data), ), missing_value=-999.0, units="days since 1600-01-01 00:00:00"), "t")) return coords
def _create_coord_list(self, filenames, data=None): if data is None: data = {} #initialise data dictionary inData = netCDF4.Dataset(filenames[0]) #open netCDF file data['longitude'] = np.array(inData.variables['lon']) #extract longitudes data['latitude'] = np.array(inData.variables['lat']) #extract latitudes origTimes = np.array(inData.variables['time_counter']) #extract times #Convert time to days since niceDateTime = cf_units.num2date(origTimes,'seconds since 1999-01-01 00:00:00', 'gregorian') data['time_counter'] = cf_units.date2num(niceDateTime,'days since 1600-01-01 00:00:00', 'gregorian') inData.close() #close netCDF file coords = CoordList() #initialise coordinate list #Append latitudes and longitudes to coordinate list: coords.append(Coord(data['longitude'],Metadata(name="longitude",long_name='longitude',standard_name='longitude',shape=(len(data),),missing_value=-999.0,units="degrees_east",range=(-180, 180)),"x")) coords.append(Coord(data['latitude'],Metadata(name="latitude",long_name='latitude',standard_name='latitude',shape=(len(data),),missing_value=-999.0,units="degrees_north",range=(-90, 90)),"y")) coords.append(Coord(data['time'],Metadata(name="time",long_name='time',standard_name='time',shape=(len(data),),missing_value=-999.0,units="days since 1600-01-01 00:00:00"),"t")) return coords
def get_scm_cube_time_axis_in_calendar(scm_cube, calendar): """Gets a cube's time axis in a given calendar Parameters ---------- scm_cube : :obj:`SCMCube` An ``SCMCube`` instance. calendar : str The calendar to return the time axis in e.g. '365_day', 'gregorian'. Returns ------- np.ndarray Array of datetimes, containing the cube's calendar. """ time_coord_number = scm_cube.cube.coord_dims("time")[0] time = scm_cube.cube.dim_coords[time_coord_number] return cf_units.num2date(time.points, time.units.name, calendar)
def _fix_time_monthly(cube): """Fix time by setting it to 15th of month.""" # Read dataset time unit and calendar from file dataset_time_unit = str(cube.coord('time').units) dataset_time_calender = cube.coord('time').units.calendar # Convert datetime time_as_datetime = cf_units.num2date( cube.coord('time').core_points(), dataset_time_unit, dataset_time_calender) newtime = [] for timepoint in time_as_datetime: midpoint = datetime(timepoint.year, timepoint.month, 15) newtime.append(midpoint) newtime = cf_units.date2num(newtime, dataset_time_unit, dataset_time_calender) # Put them on the file cube.coord('time').points = newtime cube.coord('time').bounds = None return cube
def time_processing_period(cube, grib): """ For template 4.8 (time mean, time max, etc). The time range is taken from the 'time' coordinate bounds. If the cell-method coordinate is not 'time' itself, the type of statistic will not be derived and the save process will be aborted. """ # We could probably split this function up a bit # Can safely assume bounded pt. pt_coord = cube.coord("time") end = cf_units.num2date(pt_coord.bounds[0, 1], pt_coord.units.name, pt_coord.units.calendar) gribapi.grib_set_long(grib, "yearOfEndOfOverallTimeInterval", end.year) gribapi.grib_set_long(grib, "monthOfEndOfOverallTimeInterval", end.month) gribapi.grib_set_long(grib, "dayOfEndOfOverallTimeInterval", end.day) gribapi.grib_set_long(grib, "hourOfEndOfOverallTimeInterval", end.hour) gribapi.grib_set_long(grib, "minuteOfEndOfOverallTimeInterval", end.minute) gribapi.grib_set_long(grib, "secondOfEndOfOverallTimeInterval", end.second) gribapi.grib_set_long(grib, "numberOfTimeRange", 1) gribapi.grib_set_long(grib, "numberOfMissingInStatisticalProcess", 0) type_of_statistical_processing(cube, grib, pt_coord) # Type of time increment, e.g incrementing fp, incrementing ref # time, etc. (code table 4.11) gribapi.grib_set_long(grib, "typeOfTimeIncrement", 255) # time unit for period over which statistical processing is done (hours) gribapi.grib_set_long(grib, "indicatorOfUnitForTimeRange", 1) # period over which statistical processing is done gribapi.grib_set_long(grib, "lengthOfTimeRange", float(pt_coord.bounds[0, 1] - pt_coord.bounds[0, 0])) # time unit between successive source fields (not setting this at present) gribapi.grib_set_long(grib, "indicatorOfUnitForTimeIncrement", 255) # between successive source fields (just set to 0 for now) gribapi.grib_set_long(grib, "timeIncrement", 0)
# -*- coding: utf-8 -*- """ Created on Tue Apr 17 11:34:03 2018 @author: jonasg """ from matplotlib.pyplot import close import numpy as np import iris from pyaerocom.io.testfiles import get from pyaerocom import GriddedData from pyaerocom.plot.mapping import plot_map if __name__ == "__main__": close("all") files = get() data = GriddedData(files['models']['aatsr_su_v4.3'], var_name="od550aer") cube = data.grid iris.coord_categorisation.add_month(cube, "time", name='months') iris.coord_categorisation.add_year(cube, "time", name='years') cube_monthly = cube.aggregated_by(['months', 'years'], iris.analysis.MEAN) cube_mean = cube_monthly.collapsed ts = data.time import cf_units times = cf_units.num2date(ts.points, ts.units.name, ts.units.calendar)
def estimate_coefficients_for_ngr(self, current_forecast, historic_forecast, truth): """ Using Nonhomogeneous Gaussian Regression/Ensemble Model Output Statistics, estimate the required coefficients from historical forecasts. The main contents of this method is: 1. Metadata checks to ensure that the current forecast, historic forecast and truth exist in a form that can be processed. 2. Loop through times within the concatenated current forecast cube: 1. Extract the desired forecast period from the historic forecasts to match the current forecasts. Apply unit conversion to ensure that historic forecasts have the desired units for calibration. 2. Extract the relevant truth to co-incide with the time within the historic forecasts. Apply unit conversion to ensure that the truth has the desired units for calibration. 3. Calculate mean and variance. 4. Calculate initial guess at coefficient values by performing a linear regression, if requested, otherwise default values are used. 5. Perform minimisation. Args: current_forecast (Iris Cube or CubeList): The cube containing the current forecast. historical_forecast (Iris Cube or CubeList): The cube or cubelist containing the historical forecasts used for calibration. truth (Iris Cube or CubeList): The cube or cubelist containing the truth used for calibration. Returns: (tuple): tuple containing: **optimised_coeffs** (Dictionary): Dictionary containing a list of the optimised coefficients for each date. **coeff_names** (List): The name of each coefficient. """ def convert_to_cubelist(cubes, cube_type="forecast"): """ Convert cube to cubelist, if necessary. Args: cubes (Iris Cube or Iris CubeList): Cube to be converted to CubeList. cube_type (String): String to describe the cube, which is being converted to a CubeList. Raises ------ TypeError: The input cube is not an Iris cube. """ if not isinstance(cubes, iris.cube.CubeList): cubes = iris.cube.CubeList([cubes]) for cube in cubes: if not isinstance(cube, iris.cube.Cube): msg = ("The input data within the {} " "is not an Iris Cube.".format(cube_type)) raise TypeError(msg) return cubes # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(self.predictor_of_mean_flag) # Setting default values for optimised_coeffs and coeff_names. optimised_coeffs = {} coeff_names = ["gamma", "delta", "a", "beta"] # Set default values for whether there are NaN values within the # initial guess. nan_in_initial_guess = False for var in [current_forecast, historic_forecast, truth]: if (isinstance(var, iris.cube.Cube) or isinstance(var, iris.cube.CubeList)): current_forecast_cubes = current_forecast historic_forecast_cubes = historic_forecast truth_cubes = truth else: msg = ("{} is not a Cube or CubeList." "Returning default values for optimised_coeffs {} " "and coeff_names {}.").format(var, optimised_coeffs, coeff_names) warnings.warn(msg) return optimised_coeffs, coeff_names current_forecast_cubes = (convert_to_cubelist( current_forecast_cubes, cube_type="current forecast")) historic_forecast_cubes = (convert_to_cubelist( historic_forecast_cubes, cube_type="historic forecast")) truth_cubes = convert_to_cubelist(truth_cubes, cube_type="truth") if (len(current_forecast_cubes) == 0 or len(historic_forecast_cubes) == 0 or len(truth_cubes) == 0): msg = ("Insufficient input data present to estimate " "coefficients using NGR. " "\nNumber of current_forecast_cubes: {}" "\nNumber of historic_forecast_cubes: {}" "\nNumber of truth_cubes: {}".format( len(current_forecast_cubes), len(historic_forecast_cubes), len(truth_cubes))) warnings.warn(msg) return optimised_coeffs, coeff_names current_forecast_cubes = concatenate_cubes(current_forecast_cubes) historic_forecast_cubes = concatenate_cubes(historic_forecast_cubes) truth_cubes = concatenate_cubes(truth_cubes) for current_forecast_cube in current_forecast_cubes.slices_over( "time"): date = unit.num2date( current_forecast_cube.coord("time").points, current_forecast_cube.coord("time").units.name, current_forecast_cube.coord("time").units.calendar)[0] # Extract desired forecast_period from historic_forecast_cubes. forecast_period_constr = iris.Constraint( forecast_period=current_forecast_cube.coord( "forecast_period").points) historic_forecast_cube = historic_forecast_cubes.extract( forecast_period_constr) # Extract truth matching the time of the historic forecast. reference_time = iris_time_to_datetime( historic_forecast_cube.coord("time").copy()) truth_constr = iris.Constraint( forecast_reference_time=reference_time) truth_cube = truth_cubes.extract(truth_constr) if truth_cube is None: msg = ("Unable to calibrate for the time points {} " "as no truth data is available." "Moving on to try to calibrate " "next time point.".format( historic_forecast_cube.coord("time").points)) warnings.warn(msg) continue # Make sure inputs have the same units. historic_forecast_cube.convert_units(self.desired_units) truth_cube.convert_units(self.desired_units) if self.predictor_of_mean_flag.lower() in ["mean"]: no_of_realizations = None forecast_predictor = historic_forecast_cube.collapsed( "realization", iris.analysis.MEAN) elif self.predictor_of_mean_flag.lower() in ["realizations"]: no_of_realizations = len( historic_forecast_cube.coord("realization").points) forecast_predictor = historic_forecast_cube forecast_var = historic_forecast_cube.collapsed( "realization", iris.analysis.VARIANCE) # Computing initial guess for EMOS coefficients # If no initial guess from a previous iteration, or if there # are NaNs in the initial guess, calculate an initial guess. if "initial_guess" not in locals() or nan_in_initial_guess: initial_guess = self.compute_initial_guess( truth_cube, forecast_predictor, self.predictor_of_mean_flag, self.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG, no_of_realizations=no_of_realizations) if np.any(np.isnan(initial_guess)): nan_in_initial_guess = True if not nan_in_initial_guess: # Need to access the x attribute returned by the # minimisation function. optimised_coeffs[date] = ( self.minimiser.crps_minimiser_wrapper( initial_guess, forecast_predictor, truth_cube, forecast_var, self.predictor_of_mean_flag, self.distribution.lower())) initial_guess = optimised_coeffs[date] else: optimised_coeffs[date] = initial_guess return optimised_coeffs, coeff_names
def plotmaps(data, verbose = False, filter ="WORLD", var="od550aer", plotdir="./"): """plot aerocom standard maps Will plot every supplied time step""" if not isinstance(data, GriddedData): raise TypeError("Need pyaerocom.GriddedData as input") #define color bar; #will be moved somewhere else and variable specific at some point colorbar_levels = [0., 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] colorbar_ticks = [0., 0.02, 0.04, 0.06, 0.08, 0.1, 0.3, 0.5, 0.7, 0.9] aod_data = { 'red': ((0., 0, 0), (0.07, 0, 0), (0.1, 0.678, 0.678,), (0.26, 1, 1), (0.85, 1, 1), (1, 0.545, 0.545)), 'green': ((0., 0, 0), (0.07, 1, 1), (0.24, 1, 1), (0.91, 0, 0), (1, 0, 0)), 'blue': ((0., 1, 1), (0.07, 1, 1), (0.1, 0.133, 0.133), (0.25, 0, 0), (1, 0, 0))} colormap = LinearSegmentedColormap('aod_jet', aod_data) plt.register_cmap(cmap = colormap) TIME_VAR_NAME = 'time' PlotDailyFlag = False PlotMonthlyFlag = True for model in data: #assume that we have single cube for the moment #iterate over the time dimension cube = data[model].data #model = 'AATSR_ORAC_v4.01' cube.coord('latitude').guess_bounds() cube.coord('longitude').guess_bounds() #plot daily data if PlotDailyFlag: for time_sub_cube in cube.slices_over(TIME_VAR_NAME): pre_grid_areas = iris.analysis.cartography.area_weights(time_sub_cube) #pdb.set_trace() #print(time_sub_cube) # Perform the area-weighted mean for each of the datasets using the # computed grid-box areas. weighted_mean = time_sub_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights = pre_grid_areas) time = (unit.num2date(time_sub_cube.coord(TIME_VAR_NAME).points, time_sub_cube.coord(TIME_VAR_NAME).units.name, time_sub_cube.coord(TIME_VAR_NAME).units.calendar)) date = str(time[0]).split(' ')[0].replace('-','') Year = date[0:4] TsStr = 'm'+date PlotType = 'MAP' title = " ".join([var, date, 'mean: {:6.3f}'.format(float(weighted_mean.data))]) #ABS550_AER_an2012_mALLYEAR_WORLD_ZONALOBS_AERONETSky.ps.png #OD550_AER_an2017_d20171125_WORLD_MAP.ps.png plotfilename = os.path.join(plotdir, '_'.join([var, "an" + Year, TsStr, filter, PlotType]) + ".png") if verbose: print(plotfilename) plot = iplt.pcolormesh(time_sub_cube[:, :], cmap = colormap, vmin=0., vmax=max(colorbar_levels)) # pdb.set_trace() plot.axes.set_aspect(1.8) LatsToPlot = time_sub_cube.coord(axis='X').points LonsToPlot = time_sub_cube.coord(axis='Y').points axis = plt.axis([LatsToPlot.min(), LatsToPlot.max(), LonsToPlot.min(), LonsToPlot.max()]) ax = plot.axes ax.annotate('source: AEROCOM', xy=(0.88, 0.04), xycoords='figure fraction', horizontalalignment='right', fontsize=10, bbox=dict(boxstyle='square', facecolor='none', edgecolor='black')) ax.annotate(model, xy=(-174., -83.), xycoords='data', horizontalalignment='left', fontsize=13, color='black', bbox=dict(boxstyle='square', facecolor='white', edgecolor='none', alpha=0.7)) # plt.ylabel(_title(plot_defn.coords[0], with_units=True)) # plot_defn = iplt._get_plot_defn(cube, mode, ndims) plt.colorbar(spacing='uniform', ticks=colorbar_ticks, boundaries=colorbar_levels, extend='max') ax.coastlines() ax.set_xticks([-180., -120., -60., 0., 60, 120, 180], crs=ccrs.PlateCarree()) ax.set_yticks([-90., -60, -30, 0., 30, 60, 90], crs=ccrs.PlateCarree()) lon_formatter = LongitudeFormatter(number_format='.1f', degree_symbol='') lat_formatter = LatitudeFormatter(number_format='.1f', degree_symbol='') ax.xaxis.set_major_formatter(lon_formatter) ax.yaxis.set_major_formatter(lat_formatter) plt.xlabel = 'longitude' plt.ylabel = 'latitude' plt.title(title) plt.savefig(plotfilename, dpi=300) plt.close() #pdb.set_trace() elif PlotMonthlyFlag: #calculate monthly data iris.coord_categorisation.add_month(cube, TIME_VAR_NAME, name='month_number') iris.coord_categorisation.add_year(cube, TIME_VAR_NAME, name='month_year') cube_monthly = cube.aggregated_by(['month_number', 'month_year'], iris.analysis.MEAN) for time_sub_cube in cube_monthly.slices_over(TIME_VAR_NAME): pre_grid_areas = iris.analysis.cartography.area_weights(time_sub_cube) # pdb.set_trace() # print(time_sub_cube) # Perform the area-weighted mean for each of the datasets using the # computed grid-box areas. weighted_mean = time_sub_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=pre_grid_areas) time = (unit.num2date(time_sub_cube.coord(TIME_VAR_NAME).points, time_sub_cube.coord(TIME_VAR_NAME).units.name, time_sub_cube.coord(TIME_VAR_NAME).units.calendar)) date = str(time[0]).split(' ')[0].replace('-', ' ')[0:7] Year = date[0:4] TsStr = 'm' + date[-2:] PlotType = 'MAP' title = " ".join([var, date, 'mean: {:6.3f}'.format(float(weighted_mean.data))]) # ABS550_AER_an2012_mALLYEAR_WORLD_ZONALOBS_AERONETSky.ps.png # OD550_AER_an2017_d20171125_WORLD_MAP.ps.png plotfilename = os.path.join(plotdir, '_'.join([var, "an" + Year, TsStr, filter, PlotType]) + ".png") if verbose: print(plotfilename) LatsToPlot = time_sub_cube.coord(axis='X').points LonsToPlot = time_sub_cube.coord(axis='Y').points xticks = [-180., -120., -60., 0., 60, 120, 180] yticks = [-90., -60, -30, 0., 30, 60, 90] plot_ts_map(time_sub_cube, title, plotfilename, LatsToPlot, LonsToPlot, colorbar_ticks, colorbar_levels, colormap, model, xticks, yticks)
def retrieval_years(request): if request.method == 'POST': data_req_ids = [] # loop through the items in the POST from the form and identify any # DataRequest object ids that have been requested for key in request.POST: components = re.match(r'^request_data_req_(\d+)$', key) if components: data_req_ids.append(int(components.group(1))) # get a string representation of each id data_req_strs = [ str(DataRequest.objects.filter(id=req).first()) for req in data_req_ids ] start_year_objects = [ (DataRequest.objects.get(id=req).datafile_set.aggregate( Min('start_time'))['start_time__min'], DataRequest.objects.get(id=req).datafile_set.first().time_units, DataRequest.objects.get(id=req).datafile_set.first().calendar) for req in data_req_ids ] start_year_objects_no_nones = [ (start_time, time_units, calendar) for start_time, time_units, calendar in start_year_objects if start_time is not None ] if start_year_objects_no_nones: earliest_year_float, time_units, calendar = min( start_year_objects_no_nones, key=lambda x: x[0]) earliest_year = cf_units.num2date(earliest_year_float, time_units, calendar).strftime('%Y') else: earliest_year = None end_year_objects = [ (DataRequest.objects.get(id=req).datafile_set.aggregate( Max('end_time'))['end_time__max'], DataRequest.objects.get(id=req).datafile_set.first().time_units, DataRequest.objects.get(id=req).datafile_set.first().calendar) for req in data_req_ids ] end_year_objects_no_nones = [ (end_time, time_units, calendar) for end_time, time_units, calendar in end_year_objects if end_time is not None ] if end_year_objects_no_nones: latest_year_float, time_units, calendar = max( end_year_objects_no_nones, key=lambda x: x[0]) end_year = cf_units.num2date(latest_year_float, time_units, calendar).strftime('%Y') else: end_year = None # generate the confirmation page return render( request, 'pdata_app/retrieval_request_choose_years.html', { 'request': request, 'data_reqs': data_req_strs, 'page_title': 'Choose Retrieval Years', 'return_url': request.POST['variables_received_url'], 'data_request_ids': ','.join(map(str, data_req_ids)), 'earliest_year': earliest_year, 'end_year': end_year }) else: return render(request, 'pdata_app/retrieval_request_error.html', { 'request': request, 'page_title': 'Retrieval Request' })
def _apply_params(self, forecast_predictors, forecast_vars, optimised_coeffs, coeff_names, predictor_of_mean_flag): """ Function to apply EMOS coefficients to all required dates. Args: forecast_predictors (Iris cube): Cube containing the forecast predictor e.g. ensemble mean or ensemble members. forecast_vars (Iris cube.): Cube containing the forecast variance e.g. ensemble variance. optimised_coeffs (List): Coefficients for all dates. coeff_names (List): Coefficient names. predictor_of_mean_flag (String): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble members ("members") are supported as the predictors. Returns: (tuple) : tuple containing: **calibrated_forecast_predictor_all_dates** (CubeList): List of cubes containing the calibrated forecast predictor. **calibrated_forecast_var_all_dates** (CubeList): List of cubes containing the calibrated forecast variance. **calibrated_forecast_coefficients_all_dates** (CubeList): List of cubes containing the coefficients used for calibration. """ calibrated_forecast_predictor_all_dates = iris.cube.CubeList() calibrated_forecast_var_all_dates = iris.cube.CubeList() calibrated_forecast_coefficients_all_dates = iris.cube.CubeList() for forecast_predictor, forecast_var in zip( forecast_predictors.slices_over("time"), forecast_vars.slices_over("time")): date = unit.num2date( forecast_predictor.coord("time").points, forecast_predictor.coord("time").units.name, forecast_predictor.coord("time").units.calendar)[0] with iris.FUTURE.context(cell_datetime_objects=True): constr = iris.Constraint(time=date) forecast_predictor_at_date = forecast_predictor.extract(constr) forecast_var_at_date = forecast_var.extract(constr) # If the coefficients are not available for the date, use the # raw ensemble forecast as the calibrated ensemble forecast. if date not in optimised_coeffs.keys(): msg = ("Ensemble calibration not available " "for forecasts with start time of {}. " "Coefficients not available".format( date.strftime("%Y%m%d%H%M"))) warnings.warn(msg) calibrated_forecast_predictor_at_date = ( forecast_predictor_at_date.copy()) calibrated_forecast_var_at_date = forecast_var_at_date.copy() optimised_coeffs[date] = np.full(len(coeff_names), np.nan) coeff_cubes = self._create_coefficient_cube( forecast_predictor_at_date, optimised_coeffs, coeff_names) else: optimised_coeffs_at_date = (optimised_coeffs[date]) # Assigning coefficients to coefficient names. if len(optimised_coeffs_at_date) == len(coeff_names): optimised_coeffs_at_date = dict( zip(coeff_names, optimised_coeffs_at_date)) elif len(optimised_coeffs_at_date) > len(coeff_names): excess_beta = ( optimised_coeffs_at_date[len(coeff_names):].tolist()) optimised_coeffs_at_date = (dict( zip(coeff_names, optimised_coeffs_at_date))) optimised_coeffs_at_date["beta"] = np.array( [optimised_coeffs_at_date["beta"]] + excess_beta) else: msg = ("Number of coefficient names {} with names {} " "is not equal to the number of " "optimised_coeffs_at_date values {} " "with values {} or the number of " "coefficients is not greater than the " "number of coefficient names. Can not continue " "if the number of coefficient names out number " "the number of coefficients".format( len(coeff_names), coeff_names, len(optimised_coeffs_at_date), optimised_coeffs_at_date)) raise ValueError(msg) if predictor_of_mean_flag.lower() in ["mean"]: # Calculate predicted mean = a + b*X, where X is the # raw ensemble mean. In this case, b = beta. beta = [ optimised_coeffs_at_date["a"], optimised_coeffs_at_date["beta"] ] forecast_predictor_flat = ( forecast_predictor_at_date.data.flatten()) new_col = np.ones(forecast_predictor_flat.shape) all_data = np.column_stack( (new_col, forecast_predictor_flat)) predicted_mean = np.dot(all_data, beta) calibrated_forecast_predictor_at_date = ( forecast_predictor_at_date) elif predictor_of_mean_flag.lower() in ["members"]: # Calculate predicted mean = a + b*X, where X is the # raw ensemble mean. In this case, b = beta^2. beta = np.concatenate([[optimised_coeffs_at_date["a"]], optimised_coeffs_at_date["beta"]**2 ]) forecast_predictor = (enforce_coordinate_ordering( forecast_predictor, "realization")) forecast_predictor_flat = ( convert_cube_data_to_2d(forecast_predictor_at_date)) forecast_var_flat = forecast_var_at_date.data.flatten() new_col = np.ones(forecast_var_flat.shape) all_data = (np.column_stack( (new_col, forecast_predictor_flat))) predicted_mean = np.dot(all_data, beta) # Calculate mean of ensemble members, as only the # calibrated ensemble mean will be returned. calibrated_forecast_predictor_at_date = ( forecast_predictor_at_date.collapsed( "realization", iris.analysis.MEAN)) xlen = len(forecast_predictor_at_date.coord(axis="x").points) ylen = len(forecast_predictor_at_date.coord(axis="y").points) predicted_mean = np.reshape(predicted_mean, (ylen, xlen)) calibrated_forecast_predictor_at_date.data = predicted_mean # Calculating the predicted variance, based on the # raw variance S^2, where predicted variance = c + dS^2, # where c = (gamma)^2 and d = (delta)^2 predicted_var = (optimised_coeffs_at_date["gamma"]**2 + optimised_coeffs_at_date["delta"]**2 * forecast_var_at_date.data) calibrated_forecast_var_at_date = forecast_var_at_date calibrated_forecast_var_at_date.data = predicted_var coeff_cubes = self._create_coefficient_cube( calibrated_forecast_predictor_at_date, optimised_coeffs[date], coeff_names) calibrated_forecast_predictor_all_dates.append( calibrated_forecast_predictor_at_date) calibrated_forecast_var_all_dates.append( calibrated_forecast_var_at_date) calibrated_forecast_coefficients_all_dates.extend(coeff_cubes) return (calibrated_forecast_predictor_all_dates, calibrated_forecast_var_all_dates, calibrated_forecast_coefficients_all_dates)
def main(inargs): """Run the program.""" metadata_dict = {} # Read data control_cube, control_history = read_data(inargs.control_files, inargs.variable, inargs.grid_point, convert_to_annual=inargs.annual) metadata_dict[inargs.control_files[0]] = control_history coord_names = [coord.name() for coord in control_cube.dim_coords] time_var = coord_names[0] assert time_var in ['time', 'year'] experiment_cube, experiment_history = read_data(inargs.experiment_files, inargs.variable, inargs.grid_point, convert_to_annual=inargs.annual) metadata_dict[inargs.experiment_files[0]] = experiment_history if inargs.dedrifted_files: dedrifted_cube, dedrifted_history = read_data(inargs.dedrifted_files, inargs.variable, inargs.grid_point, convert_to_annual=inargs.annual) metadata_dict[inargs.dedrifted_files[0]] = dedrifted_history if inargs.coefficient_file: cubic_data, a_cube = cubic_fit(inargs.coefficient_file, inargs.grid_point, control_cube.coord(time_var).points) #TODO: coeff metadata # Time axis adjustment if time_var == 'time': first_data_cube = iris.load_cube(inargs.experiment_files[0], gio.check_iris_var(inargs.variable)) if inargs.grid_point: first_data_cube = select_point(first_data_cube, inargs.grid_point, timeseries=True) if inargs.annual: first_data_cube = timeseries.convert_to_annual(first_data_cube) time_diff, branch_time, new_time_unit = remove_drift.time_adjustment(first_data_cube, control_cube, 'annual', branch_time=inargs.branch_time) print(f'branch time: {branch_time - 182.5}') time_coord = experiment_cube.coord('time') time_coord.convert_units(new_time_unit) experiment_time_values = time_coord.points.astype(numpy.float32) - time_diff elif time_var == 'year': if not inargs.branch_year == None: branch_year = inargs.branch_year else: if not inargs.control_time_units: control_time_units = gio.fix_time_descriptor(experiment_cube.attributes['parent_time_units']) else: control_time_units = inargs.control_time_units.replace("_", " ") branch_time = experiment_cube.attributes['branch_time_in_parent'] branch_datetime = cf_units.num2date(branch_time, control_time_units, cf_units.CALENDAR_STANDARD) branch_year = branch_datetime.year print(f'branch year: {branch_year}') experiment_time_values = numpy.arange(branch_year, branch_year + experiment_cube.shape[0]) # Plot fig = plt.figure(figsize=[14, 7]) plt.plot(control_cube.coord(time_var).points, control_cube.data, label='control') plt.plot(experiment_time_values, experiment_cube.data, label='experiment') if inargs.dedrifted_files: plt.plot(experiment_time_values, dedrifted_cube.data, label='dedrifted') if inargs.coefficient_file: plt.plot(control_cube.coord(time_var).points, cubic_data, label='cubic fit') if inargs.outlier_threshold: data, outlier_idx = timeseries.outlier_removal(control_cube.data, inargs.outlier_threshold) plt.plot(control_cube.coord(time_var).points[outlier_idx], control_cube.data[outlier_idx], marker='o', linestyle='none', color='r', alpha=0.3) if inargs.ylim: ymin, ymax = inargs.ylim plt.ylim(ymin, ymax) plt.ylabel(f"{gio.check_iris_var(inargs.variable)} ({control_cube.units})") if time_var == 'time': plt.xlabel(str(new_time_unit)) else: plt.xlabel('control run year') plt.legend() if inargs.grid_point: title = get_title(inargs.control_files[0], inargs.variable, inargs.grid_point) plt.title(title) # Save output plt.savefig(inargs.outfile, bbox_inches='tight') log_text = cmdprov.new_log(infile_history=metadata_dict, git_repo=repo_dir) log_file = re.sub('.png', '.met', inargs.outfile) cmdprov.write_log(log_file, log_text)
def __get_time_info__(f, io): """ Evaluates the time slices in the input files to verify that there are the correct number of slices in the file and that the slices are contiguous. It also pulls off other information, such as start/en times, time period/spacing, slice count, and the average of the slices (for ordering purposes). Parameters: f (climIO file): A pointer to an open netCDF file. io (climIO): An object that contains a setof io commands to use. Returns: date_info (dict): Contains file information, such as time period(t_per), time step(t_step), first/last step(t0 and tn), slice counts(cnt). average (int): The average of all time slices. """ date_info = {} _tc, _dim, att = io.get_var_info(f, "time") stand_cal = cf_units.Unit("days since 1-1-1 0:0:0", calendar=att["calendar"]) cal_unit = cf_units.Unit(att["units"], calendar=att["calendar"]) if "bounds" in att.keys(): # print 'Using bounds' tb = f.variables[att["bounds"]] j = len(tb) d0 = tb[0, 0] d1 = tb[1, 0] d2 = tb[2, 0] dn = tb[j - 1, 1] - 1 time = tb[:, 0] else: # print 'Using time' tb = f.variables["time"] j = len(tb) d0 = tb[0] d1 = tb[1] d2 = tb[2] dn = tb[j - 1] time = tb[:] date_info["time"] = time # Get second and third time bounds to figure out the time period t1 = parser.parse( str(cf_units.num2date(d1, att["units"], calendar=att["calendar"]))).timetuple() t2 = parser.parse( str(cf_units.num2date(d2, att["units"], calendar=att["calendar"]))).timetuple() # Get time difference between the steps t_step = d2 - d1 h = t2[3] - t1[3] if t1[3] != t2[3]: t_per = str(h) + "hour" elif t1[2] != t2[2]: t_per = "day" elif t1[1] != t2[1]: t_per = "mon" elif t1[0] != t2[0]: t_per = "year" else: t_per = "UNKNOWN" date_info["t_per"] = t_per date_info["t_step"] = t_step # Get first and last dates # t0 = (parser.parse(str(cf_units.num2date(d0, att['units'], # calendar=att['calendar']))).timetuple()) # tn = (parser.parse(str(cf_units.num2date(dn, att['units'], # calendar=att['calendar']))).timetuple()) date_info["t0"] = cal_unit.convert(d0, stand_cal) date_info["tn"] = cal_unit.convert(dn, stand_cal) date_info["cnt"] = j average = (date_info["t0"] + date_info["tn"]) / 2 date_info["units"] = att["units"] date_info["calendar"] = att["calendar"] # Check to see if the number of slices matches how many should be in the # date range # if t_per == 'year': # _ok = (tn[0] - t0[0] == j) # elif t_per == 'mon': # _ok = (((tn[0] - t0[0]) * 12) + (tn[1] - t0[1] + 1) == j) # elif t_per == 'day': # _ok = ((dn - d0 + 1) == j) # elif 'hour' in t_per: # cnt_per_day = 24.0 / h # _ok = (((dn - d0) * cnt_per_day + 1) == j) return average, date_info
def _num2date(num): return cf_units.num2date(num, origin, calendar)