def specify(manager, options): """Return the list of operations and expected output given the input catalogue.""" # Require many local variables here # pylint: disable=too-many-locals # parse input option list parser = argparse.ArgumentParser() parser.add_argument('--start', required=True) parser.add_argument('--end', required=True) args = parser.parse_args(options) # process a whole number of days startday = int(days_since_epoch(datetime_numeric.parse(args.start))) endday = int(days_since_epoch(datetime_numeric.parse(args.end))) # find items in catalogue grouped by day lstinput = manager.references_groupbyday(INPUTNAME, subsetindex=0) auxinput = manager.references_groupbyday(INPUTNAME, subsetindex=1) # the main output dataset dataset = manager.newdataset() # list of subsets (one for each command pattern) subsets = [dataset.newsubset([spec.outputpattern]) for spec in SUBSETSPECS] # iterate over subsets for subsetindex, subset in enumerate(subsets): # process each day for dayindex in range(startday, endday + 1): # convert to datetime object for formatting etc day = epoch_plus_days(dayindex) # loop over LST files for this day inputs = [] for lstreference in lstinput[dayindex]: # find AUX file matching the LST file lsttime = manager.match(lstreference).time print 'day {0} time {1}'.format(dayindex, lsttime) auxreference = next( reference for reference in auxinput[dayindex] if manager.match(reference).time == lsttime) # append pair inputs.extend([lstreference, auxreference]) # build output filename outputs = [subset.newfiletime(day)] # Append this operation to make it dataset.newoperation(inputs, outputs, SUBSETSPECS[subsetindex])
def __init__(self, path, source, observable, startdate, enddate): """Build counter instance.""" # parse strings to datetime objects startdate = ObsCounter.todatetime(startdate) enddate = ObsCounter.todatetime(enddate) # set member variables self.path = path self.source = source self.observable = observable # express as day numbers self.startday = int(days_since_epoch(startdate)) self.endday = int(days_since_epoch(enddate))
def operate(self, results, netcdf): """Operate on netcdf dataset object and populate results dictionary.""" timevariable = netcdf.variables['time'] timevalue = netCDF4.num2date(timevariable[0], units=timevariable.units) results[ConsistentModelOutputNetCDF.FIELDNAME_DAYNUMBER] = int( days_since_epoch(timevalue))
def run(outputfilename, inputs, processdate): """EUMOPPS run commands.""" # Need day number for inclusion in output daynumber = numpy.int64(days_since_epoch(processdate)) # Grid onto this axes = GlobalFieldAxes2DWithTime(daynumber).aslist() outputgrid = DiagnosticGridBins(axes) # Cache location lookup as we might refer to same one multiple times locationlookup_cache = {} # One set of results per available source [land|sea|ice|lakes|in-situ land|in-situ ocean] per available observable [Tmean|Tmax|Tmin] for inputindex, descriptor in enumerate(inputs): # Cached loading of location lookup try: # Attempt to find in cache in case it's already been loaded for other sources/observables locationlookup = locationlookup_cache[descriptor.locationfilename] except KeyError: # Not found - load it for the first time this operation locationlookup = LocationLookupRawBinaryReader().read( descriptor.locationfilename) locationlookup_cache[descriptor.locationfilename] = locationlookup # Read correlation ranges for this item ranges = LocalCorrelationRangeRawBinaryReader().read( descriptor.correlationrangesfilename) # Observation files for each observable filespecs = { descriptor.observable: ObservableFileSpec(descriptor.observationfilename, ranges) } # Load as observation source filesource = ObservationSourceSingleDayRawBinary( locationlookup, filespecs, daynumber) # Show stats print_stats(filesource, descriptor.sourcename, descriptor.observable) # Connector for gridding this connector = DiagnosticGridObservationConnector(axes, filesource) # Grid each observable dailydata = connector.get_day(descriptor.observable, daynumber) outputgrid.create_fields_from_sparse_observations( descriptor.sourcename, dailydata) outputgrid.compute_weighted_mean(descriptor.sourcename, descriptor.observable) # Store result ensuredirectory(outputfilename) saver = NetCDFSaverEUSTACE(outputfilename) saver.write_cubes(outputgrid)
def __init__(self, filename): """Load from filename.""" # Load file inputdata = netCDF4.Dataset(filename, 'r') # Retrieve fields inputfields = { name: inputdata.variables[name][:] for name in IceSurfaceTemperatureQualityControlNetCDF.FIELDNAMES } # Reduce to 2D those with time dimension inputfields = { name: field[0, :, :] if (field.ndim == 3 and not '3d' in name) else field for name, field in inputfields.iteritems() } # Get time timevariable = inputdata.variables['time'] fieldtime = netCDF4.num2date(timevariable[0], units=timevariable.units) # Convert to days since EUSTACE epoch daynumber = int(days_since_epoch(fieldtime)) # Construct super(IceSurfaceTemperatureQualityControlNetCDF, self).__init__(daynumber, **inputfields)
def build_bias_matrix(model, first_year, last_year): """Evaluate the bias model at the first day of each year for each station and store""" #model = load_fitted_breakmodel(modelfile) evaluation_dates = [ datetime.datetime(year, 1, 1) for year in range(first_year, last_year + 1) ] n_stations = max(model['expanded_station_indices']) + 1 station_indices = range(n_stations) time_indices = [days_since_epoch(date) for date in evaluation_dates] bias_index = numpy.zeros((n_stations, len(time_indices))) + numpy.nan bias_grid = numpy.zeros((n_stations, len(time_indices))) + numpy.nan for ind, time_index in enumerate(time_indices): effect = insitu_land_covariate_effect(time_index, station_indices, model['breakpoints']) if effect is not None: bias_index[effect[:, 0], ind] = effect[:, 1] bias_grid[effect[:, 0], ind] = model['biases'][effect[:, 1]] model['time_indices'] = time_indices model[ 'evaluation_dates'] = evaluation_dates # dates represented by time_indices model[ 'bias_index'] = bias_index # inidcies to the biases mapping into the adjustment matrix model['bias_grid'] = bias_grid # the bias matrix
def __init__(self, format, filename): """Load and parse specified filename according to specified format instance.""" super(ObservationSource, self).__init__() # assume text-gzip textgzip = gzip.open(filename, 'rb') # temporary text file textfile = tempfile.NamedTemporaryFile(prefix='eustace.preprocess.insitu_ocean.', suffix='.txt') # unzip shutil.copyfileobj(textgzip, textfile) # parse tab-delimted format # comments=None is required because some stations have the hashtag character in their name textfile.seek(0) txtdata = numpy.genfromtxt(textfile, delimiter=HadNMAT2Format.FIELDWIDTH, usecols=format.usecols, names=HadNMAT2Format.FIELDS, dtype=HadNMAT2Format.DTYPES, comments=None) # build coordinates self.coords = numpy.vstack([ txtdata[HadNMAT2Format.LAT] * HadNMAT2Format.LOCATION_SCALE, txtdata[HadNMAT2Format.LON] * HadNMAT2Format.LOCATION_SCALE ]) # build date values year = txtdata[HadNMAT2Format.YEAR] month = txtdata[HadNMAT2Format.MONTH] day = txtdata[HadNMAT2Format.DAY] self.time = numpy.array([ days_since_epoch(datetime(year[index], month[index], day[index])) for index in range(txtdata.shape[0]) ], numpy.float32) # compute mean in kelvin self.tmean = (txtdata[HadNMAT2Format.AIRT].astype(numpy.float32) * HadNMAT2Format.TEMPERATURE_SCALE) + HadNMAT2Format.TEMPERATURE_OFFSET
def test_all_methods(self): # Test data testfile = tempfile.NamedTemporaryFile( prefix='eustace.outputformats.test.test_filebuilder.', suffix='.nc') # Build attributes for example attributes = DatasetAttributesGlobalField( dataset='Example', version='A', mainvariable='tas', source='B', institution='MO', comment='EUSTACE project example file format for global field', history='Created ' + time.strftime('%c')) # Day number for the given date daynumber = int(days_since_epoch(datetime(2015, 11, 5))) # object to build global field file at current time builder = FileBuilderGlobalField(testfile.name, daynumber, **attributes.__dict__) #fill field with -95 degree C temperatures which is representative of the lowest temperature likely #to be found in EUSTACE shape = definitions.GLOBAL_FIELD_SHAPE testdata_values = numpy.full(shape, -95. + 273.15) testdata_mask = numpy.full(shape, False) testdata = numpy.ma.masked_array(data=testdata_values, mask=testdata_mask) builder.add_global_field(definitions.TAS, testdata) builder.save_and_close() # Check results result = netCDF4.Dataset(testfile.name, 'r') #check that the data haven't wrapped numpy.testing.assert_almost_equal(result.variables['tas'][:], testdata, decimal=4) #check that the time offsets match the longitudes numpy.testing.assert_almost_equal(result.variables['longitude'][:] / 360., result.variables['timeoffset'][:], decimal=6)
def extractdaynumber(filename): """This is a hack to use filename to get daynumber, assuming filename ends with YYmmmdd.bin Ideally EUMOPPS would provide us with this but it doesn't at present.""" # check file extension if filename[-4:] != '.bin': raise ValueError('Filename \"{0}\" expected to end with .bin but does not'.format(filename)) # extract date string datestring = filename[-12:-4] # Convert to datetime object t = datetime.strptime(datestring, '%Y%m%d') # Convert to daynumber return numpy.int64( days_since_epoch(t) )
def write_example_global_field(source, version, outputdirectory, institution): """Make an example output format for a global field.""" # set fixed seed so psuedo-random noise is actually repeatable numpy.random.seed(1) # Build attributes for example attributes = DatasetAttributesGlobalField( dataset='Example', version=version, mainvariable='tas', source=source, institution=institution, comment='EUSTACE project example file format for global field', history='Created ' + time.strftime('%c')) # Day number for the given date daynumber = int(days_since_epoch(datetime(2015, 11, 5))) # Make a pathname pathname = attributes.build_pathname(outputdirectory, daynumber) # object to build global field file at current time builder = FileBuilderGlobalField(pathname, daynumber, **attributes.__dict__) # get some global field data field_data = get_global_field_data() # add examples to output builder.add_global_field(definitions.TAS, field_data) builder.add_global_field(definitions.TASMIN, field_data - float(10.0)) builder.add_global_field(definitions.TASMAX, field_data + float(10.0)) # also get some uncertainty data uncertainty_data = get_uncertainty_example_data() # add to output builder.add_uncertainty_parameter( 'uncertainty_example', 'An example of an uncertainty variable (K)', uncertainty_data) # store the result builder.save_and_close()
def __init__(self, pathname, outputstructure): """Create with specified output structure.""" super(FileBuilder, self).__init__() # get EUSTACE daynumber daynumber = days_since_epoch(outputstructure.time_datetime()) # make the file self.create(pathname, title='Test output', institution='', comment='Test output', history='', source='') # time variable time_variable = OutputVariable( name='time', dtype=numpy.float32, fill_value=None, standard_name='time', long_name='Time', units=definitions.TIME_UNITS_DAYS_SINCE_EPOCH, calendar='gregorian', axis='T') # set time in days since the EUSTACE epoch [and set UNLIMITED] self.add_dimension_and_variable( dimensionname='time', variable=time_variable, values=numpy.array([daynumber], numpy.float32), unlimited=True) # global latitude axis self.add_dimension_and_variable( dimensionname=definitions.DIMENSION_NAME_LATITUDE, variable=definitions.LATITUDE, values=outputstructure.latitudes) # global longitude axis self.add_dimension_and_variable( dimensionname=definitions.DIMENSION_NAME_LONGITUDE, variable=definitions.LONGITUDE, values=outputstructure.longitudes)
def process_inputs(self, input_descriptor, component_index, time_keys): """Pre-process observations at specified times for a specified component Does not solve the system. To preprocess the observations and also solve the system run update_component. """ for time_key in time_keys: # convert time_key string to days since epoch this_time = dateutil.parser.parse(time_key) time_index = int(epoch.days_since_epoch(this_time)) # Build inputloaders from list of sources inputloaders = [ AnalysisSystemInputLoaderRawBinary_OneDay( time_index=time_index, **source) for source in input_descriptor[time_key] ] # Build and store measurement systems for component self.update_component_time(inputloaders, component_index, time_index)
def run_day(outputobservationsfilename, outputlocationfilename, inputfilename, processdate, sourcename, observable): """EUMOPPS run commands.""" # Get the info source = SOURCECLASS[sourcename](inputfilename) # Compute day number (since EUSTACE epoch) daynumber = numpy.int64(days_since_epoch(processdate)) # Retrieve daily data (and daily locations) dailydata = source.observations(observable) dailylocations = source.observation_location_lookup() # Location lookup structure with new unique ID dailylookup = LocationLookupWithID(uuid.uuid1(), dailylocations) # Store daily data ObservationRawBinaryWriter().write_day(outputobservationsfilename, dailylookup.uuid, dailydata, daynumber) # Store corresponding location lookup LocationLookupRawBinaryWriter().write(outputlocationfilename, dailylookup)
def test_days_since_epoch_negative(self): self.assertAlmostEqual(-1.0, days_since_epoch(datetime(1849, 12, 31)))
def test_days_since_epoch_zero(self): result = days_since_epoch(datetime(1850, 1, 1)) self.assertTrue(isinstance(result, float)) self.assertAlmostEqual(0.0, result)
def main(): print 'Advanced standard example using a few days of EUSTACE data' parser = argparse.ArgumentParser(description='Advanced standard example using a few days of EUSTACE data') parser.add_argument('outpath', help='directory where the output should be redirected') parser.add_argument('--json_descriptor', default = None, help='a json descriptor containing the covariates to include in the climatology model') parser.add_argument('--land_biases', action='store_true', help='include insitu land homogenization bias terms') parser.add_argument('--global_biases', action='store_true', help='include global satellite bias terms') parser.add_argument('--n_iterations', type=int, default=5, help='number of solving iterations') args = parser.parse_args() # Input data path basepath = os.path.join('/work/scratch/eustace/rawbinary3') # Days to process time_indices = range(int(days_since_epoch(datetime(2006, 2, 1))), int(days_since_epoch(datetime(2006, 2, 2)))) # Sources to use sources = [ 'surfaceairmodel_land', 'surfaceairmodel_ocean', 'surfaceairmodel_ice', 'insitu_land', 'insitu_ocean' ] #SETUP # setup for the seasonal core: climatology covariates setup read from file seasonal_setup = {'n_triangulation_divisions':5, 'n_harmonics':4, 'n_spatial_components':6, 'amplitude':2., 'space_length_scale':5., # length scale in units of degrees } grandmean_amplitude = 15.0 # setup for the large scale component spacetime_setup = {'n_triangulation_divisions':2, 'alpha':2, 'starttime':0, 'endtime':10., 'n_nodes':2, 'overlap_factor':2.5, 'H':1, 'amplitude':1., 'space_lenght_scale':15.0, # length scale in units of degrees 'time_length_scale':15.0 # length scal in units of days } bias_amplitude = .9 # setup for the local component local_setup = {'n_triangulation_divisions':6, 'amplitude':2., 'space_length_scale':2. # length scale in units of degrees } globalbias_amplitude = 15.0 # CLIMATOLOGY COMPONENT: combining the seasonal core along with latitude harmonics, altitude and coastal effects if args.json_descriptor is not None: loader = LoadCovariateElement(args.json_descriptor) loader.check_keys() covariate_elements, covariate_hyperparameters = loader.load_covariates_and_hyperparameters() print('The following fields have been added as covariates of the climatology model') print(loader.data.keys()) else: covariate_elements, covariate_hyperparameters = [], [] climatology_element = CombinationElement( [SeasonalElement(n_triangulation_divisions=seasonal_setup['n_triangulation_divisions'], n_harmonics=seasonal_setup['n_harmonics'], include_local_mean=True), GrandMeanElement()]+covariate_elements) climatology_hyperparameters = CombinationHyperparameters( [SeasonalHyperparameters(n_spatial_components=seasonal_setup['n_spatial_components'], common_log_sigma=numpy.log(seasonal_setup['amplitude']), common_log_rho=numpy.log(numpy.radians(seasonal_setup['space_length_scale']))), CovariateHyperparameters(numpy.log(grandmean_amplitude))] + covariate_hyperparameters ) climatology_component = SpaceTimeComponent(ComponentStorage_InMemory(climatology_element, climatology_hyperparameters), SpaceTimeComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED', compute_sample=True, sample_size=definitions.GLOBAL_SAMPLE_SHAPE[3]) # LARGE SCALE (kronecker product) COMPONENT: combining large scale trends with bias terms accounting for homogeneization effects if args.land_biases: bias_element, bias_hyperparameters = [InsituLandBiasElement(BREAKPOINTS_FILE)], [CovariateHyperparameters(numpy.log(bias_amplitude))] print('Adding bias terms for insitu land homogenization') else: bias_element, bias_hyperparameters = [], [] large_scale_element = CombinationElement( [SpaceTimeKroneckerElement(n_triangulation_divisions=spacetime_setup['n_triangulation_divisions'], alpha=spacetime_setup['alpha'], starttime=spacetime_setup['starttime'], endtime=spacetime_setup['endtime'], n_nodes=spacetime_setup['n_nodes'], overlap_factor=spacetime_setup['overlap_factor'], H=spacetime_setup['H'])] + bias_element) large_scale_hyperparameters = CombinationHyperparameters( [SpaceTimeSPDEHyperparameters(space_log_sigma=numpy.log(spacetime_setup['amplitude']), space_log_rho=numpy.log(numpy.radians(spacetime_setup['space_lenght_scale'])), time_log_rho=numpy.log(spacetime_setup['time_length_scale']))] + bias_hyperparameters) large_scale_component = SpaceTimeComponent(ComponentStorage_InMemory(large_scale_element, large_scale_hyperparameters), SpaceTimeComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED', compute_sample=True, sample_size=definitions.GLOBAL_SAMPLE_SHAPE[3]) # LOCAL COMPONENT: combining local scale variations with global satellite bias terms if args.global_biases: bias_elements = [BiasElement(groupname, 1) for groupname in GLOBAL_BIASES_GROUP_LIST] bias_hyperparameters = [CovariateHyperparameters(numpy.log(globalbias_amplitude)) for index in range(len(GLOBAL_BIASES_GROUP_LIST))] print('Adding global bias terms for all the surfaces') else: bias_elements, bias_hyperparameters = [], [] local_scale_element = CombinationElement([LocalElement(n_triangulation_divisions=local_setup['n_triangulation_divisions'])] + bias_elements) local_scale_hyperparameters = CombinationHyperparameters([LocalHyperparameters(log_sigma=numpy.log(local_setup['amplitude']), log_rho=numpy.log(numpy.radians(local_setup['space_length_scale'])))] + bias_hyperparameters) local_component = SpatialComponent(ComponentStorage_InMemory(local_scale_element, local_scale_hyperparameters), SpatialComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED', compute_sample=True, sample_size=definitions.GLOBAL_SAMPLE_SHAPE[3]) # Analysis system using the specified components, for the Tmean observable print 'Analysing inputs' analysis_system = AnalysisSystem( [ climatology_component, large_scale_component, local_component ], ObservationSource.TMEAN) # Object to load raw binary inputs at time indices inputloaders = [ AnalysisSystemInputLoaderRawBinary_Sources(basepath, source, time_indices) for source in sources ] for iteration in range(args.n_iterations): message = 'Iteration {}'.format(iteration) print(message) # Update with data analysis_system.update(inputloaders, time_indices) print 'Computing outputs' # Produce an output for each time index for time_index in time_indices: # Get date for output outputdate = inputloaders[0].datetime_at_time_index(time_index) print 'Evaluating output grid: ', outputdate #Configure output grid outputstructure = OutputRectilinearGridStructure( time_index, outputdate, latitudes=numpy.linspace(-90.+definitions.GLOBAL_FIELD_RESOLUTION/2., 90.-definitions.GLOBAL_FIELD_RESOLUTION/2., num=definitions.GLOBAL_FIELD_SHAPE[1]), longitudes=numpy.linspace(-180.+definitions.GLOBAL_FIELD_RESOLUTION/2., 180.-definitions.GLOBAL_FIELD_RESOLUTION/2., num=definitions.GLOBAL_FIELD_SHAPE[2])) # Evaluate expected value at these locations for field in ['MAP', 'post_STD']: print 'Evaluating: ',field result_expected_value = analysis_system.evaluate_expected_value('MAP', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1,1], 1000) result_expected_uncertainties = analysis_system.evaluate_expected_value('post_STD', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1,1], 1000) print 'Evaluating: climatology fraction' climatology_fraction = analysis_system.evaluate_climatology_fraction(outputstructure, [1,1], 1000) print 'Evaluating: the sample' sample = analysis_system.evaluate_projected_sample(outputstructure) # Make output filename pathname = 'eustace_example_output_{0:04d}{1:02d}{2:02d}.nc'.format(outputdate.year, outputdate.month, outputdate.day) pathname = os.path.join(args.outpath, pathname) print 'Saving: ', pathname # Save results filebuilder = FileBuilderGlobalField( pathname, time_index, 'Infilling Example', 'UNVERSIONED', definitions.TAS.name, '', 'Example data only', 'eustace.analysis.advanced_standard.examples.example_eustace_few_days', '') filebuilder.add_global_field(definitions.TAS, result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field(definitions.TASUNCERTAINTY, result_expected_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field(definitions.TAS_CLIMATOLOGY_FRACTION, climatology_fraction.reshape(definitions.GLOBAL_FIELD_SHAPE)) for index in range(definitions.GLOBAL_SAMPLE_SHAPE[3]): variable = copy.deepcopy(definitions.TASENSEMBLE) variable.name = variable.name + '_' + str(index) selected_sample = sample[:,index].ravel()+result_expected_value filebuilder.add_global_field(variable, selected_sample.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close() print 'Complete'
def parse_date(s): """Helper to parse date to integer since the epoch.""" return days_since_epoch( datetime.strptime(s, ObservationSourceLakeReading.DATEFORMAT))
def main(): print 'Advanced standard example using a few days of EUSTACE data' parser = argparse.ArgumentParser( description='Advanced standard example using a few days of EUSTACE data' ) parser.add_argument('outpath', help='directory where the output should be redirected') parser.add_argument( '--json_descriptor', default=None, help= 'a json descriptor containing the covariates to include in the climatology model' ) parser.add_argument('--land_biases', action='store_true', help='include insitu land homogenization bias terms') parser.add_argument('--global_biases', action='store_true', help='include global satellite bias terms') parser.add_argument('--n_iterations', type=int, default=5, help='number of solving iterations') args = parser.parse_args() # Input data path basepath = os.path.join('/work/scratch/eustace/rawbinary3') # Days to process #time_indices = range(int(days_since_epoch(datetime(2006, 2, 1))), int(days_since_epoch(datetime(2006, 2, 2)))) #time_indices = range(int(days_since_epoch(datetime(1906, 2, 1))), int(days_since_epoch(datetime(1906, 2, 2)))) date_list = [ datetime(2006, 1, 1) + relativedelta(days=k) for k in range(3) ] #backwards_list = [date_list[i] for i in range(11, -1, -1)] #date_list = backwards_list time_indices = [int(days_since_epoch(date)) for date in date_list] # Sources to use sources = [ 'surfaceairmodel_land', 'surfaceairmodel_ocean', 'surfaceairmodel_ice', 'insitu_land', 'insitu_ocean' ] sources = ['insitu_land', 'insitu_ocean'] #sources = [ 'surfaceairmodel_land' ] # CLIMATOLOGY COMPONENT: combining the seasonal core along with latitude harmonics, altitude and coastal effects if args.json_descriptor is not None: loader = LoadCovariateElement(args.json_descriptor) loader.check_keys() covariate_elements, covariate_hyperparameters = loader.load_covariates_and_hyperparameters( ) print( 'The following fields have been added as covariates of the climatology model' ) print(loader.data.keys()) else: covariate_elements, covariate_hyperparameters = [], [] #climatology_element = CombinationElement( [SeasonalElement(n_triangulation_divisions=2, n_harmonics=2, include_local_mean=False), GrandMeanElement()]+covariate_elements) #climatology_hyperparameters = CombinationHyperparameters( [SeasonalHyperparameters(n_spatial_components=2, common_log_sigma=0.0, common_log_rho=0.0), CovariateHyperparameters(numpy.log(15.0))] + covariate_hyperparameters ) climatology_element = CombinationElement([ GrandMeanElement(), ] + covariate_elements) climatology_hyperparameters = CombinationHyperparameters([ CovariateHyperparameters(numpy.log(15.0)), ] + covariate_hyperparameters) #climatology_element =SeasonalElement(n_triangulation_divisions=2, n_harmonics=2, include_local_mean=False) #climatology_hyperparameters = SeasonalHyperparameters(n_spatial_components=2, common_log_sigma=0.0, common_log_rho=0.0) climatology_component = SpaceTimeComponent( ComponentStorage_InMemory(climatology_element, climatology_hyperparameters), SpaceTimeComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED') # LARGE SCALE (kronecker product) COMPONENT: combining large scale trends with bias terms accounting for homogeneization effects if args.land_biases: bias_element, bias_hyperparameters = [ InsituLandBiasElement(BREAKPOINTS_FILE) ], [CovariateHyperparameters(numpy.log(.9))] print('Adding bias terms for insitu land homogenization') else: bias_element, bias_hyperparameters = [], [] large_scale_element = CombinationElement([ SpaceTimeKroneckerElement(n_triangulation_divisions=2, alpha=2, starttime=-30, endtime=365 * 1 + 30, n_nodes=12 * 1 + 2, overlap_factor=2.5, H=1) ] + bias_element) large_scale_hyperparameters = CombinationHyperparameters([ SpaceTimeSPDEHyperparameters(space_log_sigma=0.0, space_log_rho=numpy.log( numpy.radians(15.0)), time_log_rho=numpy.log(15.0)) ] + bias_hyperparameters) large_scale_component = SpaceTimeComponent( ComponentStorage_InMemory(large_scale_element, large_scale_hyperparameters), SpaceTimeComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED') # LOCAL COMPONENT: combining local scale variations with global satellite bias terms if args.global_biases: bias_elements = [ BiasElement(groupname, 1) for groupname in GLOBAL_BIASES_GROUP_LIST ] bias_hyperparameters = [ CovariateHyperparameters(numpy.log(15.0)) for index in range(3) ] print('Adding global bias terms for all the surfaces') else: bias_elements, bias_hyperparameters = [], [] n_triangulation_divisions_local = 7 local_log_sigma = numpy.log(5) local_log_rho = numpy.log(numpy.radians(5.0)) local_element = NonStationaryLocal( n_triangulation_divisions=n_triangulation_divisions_local) n_local_nodes = local_element.spde.n_latent_variables() local_scale_element = CombinationElement([local_element] + bias_elements) local_hyperparameters = ExpandedLocalHyperparameters( log_sigma=numpy.repeat(local_log_sigma, n_local_nodes), log_rho=numpy.repeat(local_log_rho, n_local_nodes)) local_scale_hyperparameters = CombinationHyperparameters( [local_hyperparameters] + bias_hyperparameters) local_component = DelayedSpatialComponent( ComponentStorage_InMemory(local_scale_element, local_scale_hyperparameters), SpatialComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED') print "hyperparameter storage:", local_component.storage.hyperparameters print 'Analysing inputs' # Analysis system using the specified components, for the Tmean observable ##analysis_system = AnalysisSystem( ## [ climatology_component, large_scale_component, local_component ], ## ObservationSource.TMEAN) analysis_system = OptimizationSystem( [climatology_component, local_component], ObservationSource.TMEAN) # Object to load raw binary inputs at time indices inputloaders = [ AnalysisSystemInputLoaderRawBinary_Sources(basepath, source, time_indices) for source in sources ] for iteration in range(args.n_iterations): message = 'Iteration {}'.format(iteration) print(message) # Update with data analysis_system.update(inputloaders, time_indices) ################################################## # Optimize local model hyperparameters # Loop over local regions, generate optimization systems, fit hyperparameters and save # split spde and bias models for local component into two components global_spde_sub_component_definition = ComponentStorage_InMemory( CombinationElement([local_element]), CombinationHyperparameters([local_hyperparameters])) global_spde_sub_component_storage_solution = SpatialComponentSolutionStorage_InMemory( ) global_spde_sub_component = DelayedSpatialComponent( global_spde_sub_component_definition, global_spde_sub_component_storage_solution) bias_sub_component_definition = ComponentStorage_InMemory( CombinationElement(bias_elements), CombinationHyperparameters(bias_hyperparameters)) bias_sub_component_storage_solution = SpatialComponentSolutionStorage_InMemory( ) bias_sub_component = DelayedSpatialComponent( bias_sub_component_definition, bias_sub_component_storage_solution) element_optimisation_flags = [True, False, False, False] # one spde, three biases for time_key in time_indices: split_states_time(local_component, global_spde_sub_component, bias_sub_component, element_optimisation_flags, time_key) # Define subregions and extract their states neighbourhood_level = 1 n_subregions = global_spde_sub_component.storage.element_read( ).combination[0].spde.n_triangles_at_level(neighbourhood_level) hyperparameter_file_template = "local_hyperparameters.%i.%i.%i.npy" fit_hyperparameters = True optimization_component_index = 2 if fit_hyperparameters: for region_index in range(n_subregions): # Setup model for local subregion of neighours with super triangle view_flags = [ True, ] region_element = CombinationElement([ LocalSubRegion(n_triangulation_divisions_local, neighbourhood_level, region_index) ]) region_hyperparameters = ExtendedCombinationHyperparameters([ LocalHyperparameters(log_sigma=local_log_sigma, log_rho=local_log_rho) ]) region_component_storage_solution = SpatialComponentSolutionStorage_InMemory( ) region_sub_component = DelayedSpatialComponent( ComponentStorage_InMemory(region_element, region_hyperparameters), region_component_storage_solution) for time_key in time_indices: print "region_index, time_key:", region_index, time_key extract_local_view_states_time(global_spde_sub_component, region_sub_component, view_flags, time_key) print "running optimization for region:", region_index region_optimization_system = OptimizationSystem([ climatology_component, bias_sub_component, region_sub_component ], ObservationSource.TMEAN) for time_key in time_indices: region_optimization_system.update_component_time( inputloaders, optimization_component_index, time_key) # commented version that works for few days inputs #region_optimization_system.components[optimization_component_index].component_solution().optimize() #region_optimization_system.components[optimization_component_index].storage.hyperparameters.get_array() #hyperparameter_file = os.path.join(args.outpath, hyperparameter_file_template % (n_triangulation_divisions_local, neighbourhood_level, region_index) ) #region_sub_component.storage.hyperparameters.values_to_npy_savefile( hyperparameter_file ) # replaced with version for full processing based json dump of input files - need to generate the input_descriptor dict hyperparameter_file = os.path.join( args.outpath, hyperparameter_file_template % (n_triangulation_divisions_local, neighbourhood_level, region_index)) region_optimization_system.process_inputs( input_descriptor, optimization_component_index, time_indices) region_optimization_system.optimize_component( optimization_component_index, hyperparameter_storage_file=hyperparameter_file) fitted_hyperparameters_converted = region_sub_component.storage.hyperparameters.get_array( ) fitted_hyperparameters_converted[0] = numpy.exp( fitted_hyperparameters_converted[0]) fitted_hyperparameters_converted[1] = numpy.exp( fitted_hyperparameters_converted[1]) * 180.0 / numpy.pi print 'fitted_hyperparameters_converted:', fitted_hyperparameters_converted # Setup model for the super triangle without neighbours for hyperparameter merging region_spdes = [] region_hyperparameter_values = [] for region_index in range(n_subregions): # Redefine the region sub component as a supertriangle rather than a neighbourhood region_element = CombinationElement([ LocalSuperTriangle(n_triangulation_divisions_local, neighbourhood_level, region_index) ]) region_hyperparameters = ExtendedCombinationHyperparameters([ LocalHyperparameters(log_sigma=local_log_sigma, log_rho=local_log_rho) ]) region_component_storage_solution = SpatialComponentSolutionStorage_InMemory( ) region_sub_component = DelayedSpatialComponent( ComponentStorage_InMemory(region_element, region_hyperparameters), region_component_storage_solution) # Read the optimized hyperparameters hyperparameter_file = os.path.join( args.outpath, hyperparameter_file_template % (n_triangulation_divisions_local, neighbourhood_level, region_index)) region_sub_component.storage.hyperparameters.values_from_npy_savefile( hyperparameter_file) # Append the spde model and hyperparameters to their lists for merging region_spdes.append(region_element.combination[0].spde) region_hyperparameter_values.append( region_sub_component.storage.hyperparameters.get_array()) # merge and save hyperparameters full_spde = local_element.spde new_hyperparameter_values, global_sigma_design, global_rho_design = full_spde.merge_local_parameterisations( region_spdes, region_hyperparameter_values, merge_method='exp_average') local_hyperparameters.set_array(new_hyperparameter_values) hyperparameter_file_merged = "merged_hyperparameters.%i.%i.npy" % ( n_triangulation_divisions_local, neighbourhood_level) local_hyperparameters.values_to_npy_savefile( os.path.join(args.outpath, hyperparameter_file_merged)) # Refit local model with the optimized hyperparameters analysis_system.update_component(inputloaders, 1, time_indices) ################################################## print 'Computing outputs' # Produce an output for each time index for time_index in time_indices: # Get date for output outputdate = inputloaders[0].datetime_at_time_index(time_index) print 'Evaluating output grid: ', outputdate #Configure output grid outputstructure = OutputRectilinearGridStructure( time_index, outputdate, latitudes=numpy.linspace(-89.875, 89.875, num=definitions.GLOBAL_FIELD_SHAPE[1]), longitudes=numpy.linspace(-179.875, 179.875, num=definitions.GLOBAL_FIELD_SHAPE[2])) # print 'Size of grid : ', outputstructure.number_of_observations() # Evaluate expected value at these locations result_expected_value = analysis_system.evaluate_expected_value( 'MAP', outputstructure, 'POINTWISE') result_expected_uncertainties = analysis_system.evaluate_expected_value( 'post_STD', outputstructure, 'POINTWISE') # Make output filename pathname = 'eustace_example_output_{0:04d}{1:02d}{2:02d}.nc'.format( outputdate.year, outputdate.month, outputdate.day) pathname = os.path.join(args.outpath, pathname) print 'Saving: ', pathname # Save results filebuilder = FileBuilderGlobalField( pathname, time_index, 'Infilling Example', 'UNVERSIONED', definitions.TAS.name, '', 'Example data only', 'eustace.analysis.advanced_standard.examples.example_eustace_few_days', '') filebuilder.add_global_field( definitions.TAS, result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TASUNCERTAINTY, result_expected_uncertainties.reshape( definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close() print 'Complete'
def operation_input_resolve(self, request_skip, catalogue, step): return OperationParameterList( numpy.arange(int(days_since_epoch(step.start)), int(days_since_epoch(step.end) + 1), 1, numpy.int32))
def time_index(self): return epoch.days_since_epoch(self.datetime)