예제 #1
0
def specify(manager, options):
    """Return the list of operations and expected output given the input catalogue."""

    # Require many local variables here
    # pylint: disable=too-many-locals

    # parse input option list
    parser = argparse.ArgumentParser()
    parser.add_argument('--start', required=True)
    parser.add_argument('--end', required=True)
    args = parser.parse_args(options)

    # process a whole number of days
    startday = int(days_since_epoch(datetime_numeric.parse(args.start)))
    endday = int(days_since_epoch(datetime_numeric.parse(args.end)))

    # find items in catalogue grouped by day
    lstinput = manager.references_groupbyday(INPUTNAME, subsetindex=0)
    auxinput = manager.references_groupbyday(INPUTNAME, subsetindex=1)

    # the main output dataset
    dataset = manager.newdataset()

    # list of subsets (one for each command pattern)
    subsets = [dataset.newsubset([spec.outputpattern]) for spec in SUBSETSPECS]

    # iterate over subsets
    for subsetindex, subset in enumerate(subsets):

        # process each day
        for dayindex in range(startday, endday + 1):

            # convert to datetime object for formatting etc
            day = epoch_plus_days(dayindex)

            # loop over LST files for this day
            inputs = []
            for lstreference in lstinput[dayindex]:

                # find AUX file matching the LST file
                lsttime = manager.match(lstreference).time
                print 'day {0} time {1}'.format(dayindex, lsttime)
                auxreference = next(
                    reference for reference in auxinput[dayindex]
                    if manager.match(reference).time == lsttime)

                # append pair
                inputs.extend([lstreference, auxreference])

            # build output filename
            outputs = [subset.newfiletime(day)]

            # Append this operation to make it
            dataset.newoperation(inputs, outputs, SUBSETSPECS[subsetindex])
예제 #2
0
    def __init__(self, path, source, observable, startdate, enddate):
        """Build counter instance."""

        # parse strings to datetime objects
        startdate = ObsCounter.todatetime(startdate)
        enddate = ObsCounter.todatetime(enddate)

        # set member variables
        self.path = path
        self.source = source
        self.observable = observable

        # express as day numbers
        self.startday = int(days_since_epoch(startdate))
        self.endday = int(days_since_epoch(enddate))
예제 #3
0
    def operate(self, results, netcdf):
        """Operate on netcdf dataset object and populate results dictionary."""

        timevariable = netcdf.variables['time']
        timevalue = netCDF4.num2date(timevariable[0], units=timevariable.units)
        results[ConsistentModelOutputNetCDF.FIELDNAME_DAYNUMBER] = int(
            days_since_epoch(timevalue))
예제 #4
0
def run(outputfilename, inputs, processdate):
    """EUMOPPS run commands."""

    # Need day number for inclusion in output
    daynumber = numpy.int64(days_since_epoch(processdate))

    # Grid onto this
    axes = GlobalFieldAxes2DWithTime(daynumber).aslist()
    outputgrid = DiagnosticGridBins(axes)

    # Cache location lookup as we might refer to same one multiple times
    locationlookup_cache = {}

    # One set of results per available source [land|sea|ice|lakes|in-situ land|in-situ ocean] per available observable [Tmean|Tmax|Tmin]
    for inputindex, descriptor in enumerate(inputs):

        # Cached loading of location lookup
        try:

            # Attempt to find in cache in case it's already been loaded for other sources/observables
            locationlookup = locationlookup_cache[descriptor.locationfilename]

        except KeyError:

            # Not found - load it for the first time this operation
            locationlookup = LocationLookupRawBinaryReader().read(
                descriptor.locationfilename)
            locationlookup_cache[descriptor.locationfilename] = locationlookup

        # Read correlation ranges for this item
        ranges = LocalCorrelationRangeRawBinaryReader().read(
            descriptor.correlationrangesfilename)

        # Observation files for each observable
        filespecs = {
            descriptor.observable:
            ObservableFileSpec(descriptor.observationfilename, ranges)
        }

        # Load as observation source
        filesource = ObservationSourceSingleDayRawBinary(
            locationlookup, filespecs, daynumber)

        # Show stats
        print_stats(filesource, descriptor.sourcename, descriptor.observable)

        # Connector for gridding this
        connector = DiagnosticGridObservationConnector(axes, filesource)

        # Grid each observable
        dailydata = connector.get_day(descriptor.observable, daynumber)
        outputgrid.create_fields_from_sparse_observations(
            descriptor.sourcename, dailydata)
        outputgrid.compute_weighted_mean(descriptor.sourcename,
                                         descriptor.observable)

    # Store result
    ensuredirectory(outputfilename)
    saver = NetCDFSaverEUSTACE(outputfilename)
    saver.write_cubes(outputgrid)
예제 #5
0
    def __init__(self, filename):
        """Load from filename."""

        # Load file
        inputdata = netCDF4.Dataset(filename, 'r')

        # Retrieve fields
        inputfields = {
            name: inputdata.variables[name][:]
            for name in IceSurfaceTemperatureQualityControlNetCDF.FIELDNAMES
        }

        # Reduce to 2D those with time dimension
        inputfields = {
            name: field[0, :, :] if
            (field.ndim == 3 and not '3d' in name) else field
            for name, field in inputfields.iteritems()
        }

        # Get time
        timevariable = inputdata.variables['time']
        fieldtime = netCDF4.num2date(timevariable[0], units=timevariable.units)

        # Convert to days since EUSTACE epoch
        daynumber = int(days_since_epoch(fieldtime))

        # Construct
        super(IceSurfaceTemperatureQualityControlNetCDF,
              self).__init__(daynumber, **inputfields)
예제 #6
0
def build_bias_matrix(model, first_year, last_year):
    """Evaluate the bias model at the first day of each year for each station and store"""

    #model = load_fitted_breakmodel(modelfile)

    evaluation_dates = [
        datetime.datetime(year, 1, 1)
        for year in range(first_year, last_year + 1)
    ]

    n_stations = max(model['expanded_station_indices']) + 1
    station_indices = range(n_stations)

    time_indices = [days_since_epoch(date) for date in evaluation_dates]

    bias_index = numpy.zeros((n_stations, len(time_indices))) + numpy.nan
    bias_grid = numpy.zeros((n_stations, len(time_indices))) + numpy.nan

    for ind, time_index in enumerate(time_indices):
        effect = insitu_land_covariate_effect(time_index, station_indices,
                                              model['breakpoints'])

        if effect is not None:
            bias_index[effect[:, 0], ind] = effect[:, 1]
            bias_grid[effect[:, 0], ind] = model['biases'][effect[:, 1]]

    model['time_indices'] = time_indices
    model[
        'evaluation_dates'] = evaluation_dates  # dates represented by time_indices
    model[
        'bias_index'] = bias_index  # inidcies to the biases mapping into the adjustment matrix
    model['bias_grid'] = bias_grid  # the bias matrix
예제 #7
0
    def __init__(self, format, filename):
        """Load and parse specified filename according to specified format instance."""

        super(ObservationSource, self).__init__()                

        # assume text-gzip
        textgzip = gzip.open(filename, 'rb')

        # temporary text file
        textfile = tempfile.NamedTemporaryFile(prefix='eustace.preprocess.insitu_ocean.', suffix='.txt')

        # unzip
        shutil.copyfileobj(textgzip, textfile)

        # parse tab-delimted format
        # comments=None is required because some stations have the hashtag character in their name
        textfile.seek(0)
        txtdata = numpy.genfromtxt(textfile, delimiter=HadNMAT2Format.FIELDWIDTH, usecols=format.usecols, names=HadNMAT2Format.FIELDS, dtype=HadNMAT2Format.DTYPES, comments=None)

        # build coordinates
        self.coords = numpy.vstack([ 
                txtdata[HadNMAT2Format.LAT] * HadNMAT2Format.LOCATION_SCALE, 
                txtdata[HadNMAT2Format.LON] * HadNMAT2Format.LOCATION_SCALE ])

        # build date values
        year = txtdata[HadNMAT2Format.YEAR]
        month = txtdata[HadNMAT2Format.MONTH]
        day = txtdata[HadNMAT2Format.DAY]
        self.time = numpy.array([ days_since_epoch(datetime(year[index], month[index], day[index])) for index in range(txtdata.shape[0]) ], numpy.float32)

        # compute mean in kelvin
        self.tmean = (txtdata[HadNMAT2Format.AIRT].astype(numpy.float32) * HadNMAT2Format.TEMPERATURE_SCALE)  + HadNMAT2Format.TEMPERATURE_OFFSET
예제 #8
0
    def test_all_methods(self):

        # Test data
        testfile = tempfile.NamedTemporaryFile(
            prefix='eustace.outputformats.test.test_filebuilder.',
            suffix='.nc')

        # Build attributes for example
        attributes = DatasetAttributesGlobalField(
            dataset='Example',
            version='A',
            mainvariable='tas',
            source='B',
            institution='MO',
            comment='EUSTACE project example file format for global field',
            history='Created ' + time.strftime('%c'))

        # Day number for the given date
        daynumber = int(days_since_epoch(datetime(2015, 11, 5)))

        # object to build global field file at current time
        builder = FileBuilderGlobalField(testfile.name, daynumber,
                                         **attributes.__dict__)

        #fill field with -95 degree C temperatures which is representative of the lowest temperature likely
        #to be found in  EUSTACE
        shape = definitions.GLOBAL_FIELD_SHAPE

        testdata_values = numpy.full(shape, -95. + 273.15)
        testdata_mask = numpy.full(shape, False)

        testdata = numpy.ma.masked_array(data=testdata_values,
                                         mask=testdata_mask)

        builder.add_global_field(definitions.TAS, testdata)

        builder.save_and_close()

        # Check results
        result = netCDF4.Dataset(testfile.name, 'r')

        #check that the data haven't wrapped
        numpy.testing.assert_almost_equal(result.variables['tas'][:],
                                          testdata,
                                          decimal=4)

        #check that the time offsets match the longitudes
        numpy.testing.assert_almost_equal(result.variables['longitude'][:] /
                                          360.,
                                          result.variables['timeoffset'][:],
                                          decimal=6)
예제 #9
0
def extractdaynumber(filename):
    """This is a hack to use filename to get daynumber, assuming filename ends with YYmmmdd.bin
       Ideally EUMOPPS would provide us with this but it doesn't at present."""

    # check file extension
    if filename[-4:] != '.bin':
        raise ValueError('Filename \"{0}\" expected to end with .bin but does not'.format(filename))

    # extract date string
    datestring = filename[-12:-4]

    # Convert to datetime object
    t = datetime.strptime(datestring, '%Y%m%d')

    # Convert to daynumber
    return numpy.int64( days_since_epoch(t) )
예제 #10
0
def write_example_global_field(source, version, outputdirectory, institution):
    """Make an example output format for a global field."""

    # set fixed seed so psuedo-random noise is actually repeatable
    numpy.random.seed(1)

    # Build attributes for example
    attributes = DatasetAttributesGlobalField(
        dataset='Example',
        version=version,
        mainvariable='tas',
        source=source,
        institution=institution,
        comment='EUSTACE project example file format for global field',
        history='Created ' + time.strftime('%c'))

    # Day number for the given date
    daynumber = int(days_since_epoch(datetime(2015, 11, 5)))

    # Make a pathname
    pathname = attributes.build_pathname(outputdirectory, daynumber)

    # object to build global field file at current time
    builder = FileBuilderGlobalField(pathname, daynumber,
                                     **attributes.__dict__)

    # get some global field data
    field_data = get_global_field_data()

    # add examples to output
    builder.add_global_field(definitions.TAS, field_data)
    builder.add_global_field(definitions.TASMIN, field_data - float(10.0))
    builder.add_global_field(definitions.TASMAX, field_data + float(10.0))

    # also get some uncertainty data
    uncertainty_data = get_uncertainty_example_data()

    # add to output
    builder.add_uncertainty_parameter(
        'uncertainty_example', 'An example of an uncertainty variable (K)',
        uncertainty_data)

    # store the result
    builder.save_and_close()
예제 #11
0
    def __init__(self, pathname, outputstructure):
        """Create with specified output structure."""

        super(FileBuilder, self).__init__()

        # get EUSTACE daynumber
        daynumber = days_since_epoch(outputstructure.time_datetime())

        # make the file
        self.create(pathname, title='Test output', institution='', comment='Test output', history='', source='')

        # time variable
        time_variable = OutputVariable(
            name='time',
            dtype=numpy.float32,
            fill_value=None,
            standard_name='time',
            long_name='Time',
            units=definitions.TIME_UNITS_DAYS_SINCE_EPOCH,
            calendar='gregorian',
            axis='T')

        # set time in days since the EUSTACE epoch [and set UNLIMITED]
        self.add_dimension_and_variable(
            dimensionname='time',
            variable=time_variable,
            values=numpy.array([daynumber], numpy.float32),
            unlimited=True)

        # global latitude axis
        self.add_dimension_and_variable(
            dimensionname=definitions.DIMENSION_NAME_LATITUDE,
            variable=definitions.LATITUDE,
            values=outputstructure.latitudes)

        # global longitude axis
        self.add_dimension_and_variable(
            dimensionname=definitions.DIMENSION_NAME_LONGITUDE,
            variable=definitions.LONGITUDE,
            values=outputstructure.longitudes)
예제 #12
0
    def process_inputs(self, input_descriptor, component_index, time_keys):
        """Pre-process observations at specified times for a specified component
        
        Does not solve the system. To preprocess the observations and also
        solve the system run update_component.
        
        """

        for time_key in time_keys:

            # convert time_key string to days since epoch
            this_time = dateutil.parser.parse(time_key)
            time_index = int(epoch.days_since_epoch(this_time))

            # Build inputloaders from list of sources
            inputloaders = [
                AnalysisSystemInputLoaderRawBinary_OneDay(
                    time_index=time_index, **source)
                for source in input_descriptor[time_key]
            ]

            # Build and store measurement systems for component
            self.update_component_time(inputloaders, component_index,
                                       time_index)
예제 #13
0
def run_day(outputobservationsfilename, outputlocationfilename, inputfilename,
            processdate, sourcename, observable):
    """EUMOPPS run commands."""

    # Get the info
    source = SOURCECLASS[sourcename](inputfilename)

    # Compute day number (since EUSTACE epoch)
    daynumber = numpy.int64(days_since_epoch(processdate))

    # Retrieve daily data (and daily locations)
    dailydata = source.observations(observable)
    dailylocations = source.observation_location_lookup()

    # Location lookup structure with new unique ID
    dailylookup = LocationLookupWithID(uuid.uuid1(), dailylocations)

    # Store daily data
    ObservationRawBinaryWriter().write_day(outputobservationsfilename,
                                           dailylookup.uuid, dailydata,
                                           daynumber)

    # Store corresponding location lookup
    LocationLookupRawBinaryWriter().write(outputlocationfilename, dailylookup)
예제 #14
0
 def test_days_since_epoch_negative(self):
     self.assertAlmostEqual(-1.0, days_since_epoch(datetime(1849, 12, 31)))
예제 #15
0
 def test_days_since_epoch_zero(self):
     result = days_since_epoch(datetime(1850, 1, 1))
     self.assertTrue(isinstance(result, float))
     self.assertAlmostEqual(0.0, result)
예제 #16
0
def main():

    print 'Advanced standard example using a few days of EUSTACE data'
    parser = argparse.ArgumentParser(description='Advanced standard example using a few days of EUSTACE data')
    parser.add_argument('outpath', help='directory where the output should be redirected')
    parser.add_argument('--json_descriptor', default = None, help='a json descriptor containing the covariates to include in the climatology model')
    parser.add_argument('--land_biases', action='store_true', help='include insitu land homogenization bias terms')
    parser.add_argument('--global_biases', action='store_true', help='include global satellite bias terms')
    parser.add_argument('--n_iterations', type=int, default=5, help='number of solving iterations')
    args = parser.parse_args()

    # Input data path
    basepath = os.path.join('/work/scratch/eustace/rawbinary3')

    # Days to process
    time_indices = range(int(days_since_epoch(datetime(2006, 2, 1))), int(days_since_epoch(datetime(2006, 2, 2))))

    # Sources to use
    sources = [ 'surfaceairmodel_land', 'surfaceairmodel_ocean', 'surfaceairmodel_ice', 'insitu_land', 'insitu_ocean' ]    

    #SETUP
    # setup for the seasonal core: climatology covariates setup read from file
    seasonal_setup = {'n_triangulation_divisions':5,
		      'n_harmonics':4,
		      'n_spatial_components':6,
		      'amplitude':2.,
		      'space_length_scale':5., # length scale in units of degrees
		     }
    grandmean_amplitude = 15.0
    
    # setup for the large scale component
    spacetime_setup = {'n_triangulation_divisions':2,
		       'alpha':2,
		       'starttime':0,
		       'endtime':10.,
		       'n_nodes':2,
		       'overlap_factor':2.5,
		       'H':1,
		       'amplitude':1.,
		       'space_lenght_scale':15.0, # length scale in units of degrees
		       'time_length_scale':15.0   # length scal in units of days
		      }
    bias_amplitude = .9

    # setup for the local component
    local_setup = {'n_triangulation_divisions':6,
                   'amplitude':2.,
                   'space_length_scale':2. # length scale in units of degrees
                  }
    globalbias_amplitude = 15.0

    # CLIMATOLOGY COMPONENT: combining the seasonal core along with latitude harmonics, altitude and coastal effects    
    if args.json_descriptor is not None:
      loader = LoadCovariateElement(args.json_descriptor)
      loader.check_keys()
      covariate_elements, covariate_hyperparameters = loader.load_covariates_and_hyperparameters()
      print('The following fields have been added as covariates of the climatology model')
      print(loader.data.keys())
    else:
      covariate_elements, covariate_hyperparameters = [], []

    climatology_element = CombinationElement( [SeasonalElement(n_triangulation_divisions=seasonal_setup['n_triangulation_divisions'], 
							       n_harmonics=seasonal_setup['n_harmonics'], 
							       include_local_mean=True), 
					       GrandMeanElement()]+covariate_elements)       
    climatology_hyperparameters = CombinationHyperparameters( [SeasonalHyperparameters(n_spatial_components=seasonal_setup['n_spatial_components'], 
										       common_log_sigma=numpy.log(seasonal_setup['amplitude']), 
										       common_log_rho=numpy.log(numpy.radians(seasonal_setup['space_length_scale']))), 
							       CovariateHyperparameters(numpy.log(grandmean_amplitude))] + covariate_hyperparameters )
    climatology_component = SpaceTimeComponent(ComponentStorage_InMemory(climatology_element, climatology_hyperparameters), SpaceTimeComponentSolutionStorage_InMemory(), 
                                                                         compute_uncertainties=True, method='APPROXIMATED',
                                                                         compute_sample=True, sample_size=definitions.GLOBAL_SAMPLE_SHAPE[3])

    # LARGE SCALE (kronecker product) COMPONENT: combining large scale trends with bias terms accounting for homogeneization effects    
    if args.land_biases:
	bias_element, bias_hyperparameters = [InsituLandBiasElement(BREAKPOINTS_FILE)], [CovariateHyperparameters(numpy.log(bias_amplitude))]
	print('Adding bias terms for insitu land homogenization')
    else:
	bias_element, bias_hyperparameters = [], []

    large_scale_element = CombinationElement( [SpaceTimeKroneckerElement(n_triangulation_divisions=spacetime_setup['n_triangulation_divisions'], 
                                                                         alpha=spacetime_setup['alpha'], 
                                                                         starttime=spacetime_setup['starttime'], 
                                                                         endtime=spacetime_setup['endtime'], 
                                                                         n_nodes=spacetime_setup['n_nodes'], 
                                                                         overlap_factor=spacetime_setup['overlap_factor'], 
                                                                         H=spacetime_setup['H'])] + bias_element)
    large_scale_hyperparameters = CombinationHyperparameters( [SpaceTimeSPDEHyperparameters(space_log_sigma=numpy.log(spacetime_setup['amplitude']),
                                                                                            space_log_rho=numpy.log(numpy.radians(spacetime_setup['space_lenght_scale'])), 
                                                                                            time_log_rho=numpy.log(spacetime_setup['time_length_scale']))] + bias_hyperparameters) 
    large_scale_component =  SpaceTimeComponent(ComponentStorage_InMemory(large_scale_element, large_scale_hyperparameters), SpaceTimeComponentSolutionStorage_InMemory(), 
                                                                          compute_uncertainties=True, method='APPROXIMATED',
                                                                          compute_sample=True, sample_size=definitions.GLOBAL_SAMPLE_SHAPE[3])
                                 
    # LOCAL COMPONENT: combining local scale variations with global satellite bias terms    
    if args.global_biases:
	bias_elements = [BiasElement(groupname, 1) for groupname in GLOBAL_BIASES_GROUP_LIST]
	bias_hyperparameters = [CovariateHyperparameters(numpy.log(globalbias_amplitude)) for index in range(len(GLOBAL_BIASES_GROUP_LIST))]
	print('Adding global bias terms for all the surfaces')
    else:
	bias_elements, bias_hyperparameters = [], []

    local_scale_element = CombinationElement([LocalElement(n_triangulation_divisions=local_setup['n_triangulation_divisions'])] + bias_elements)
    local_scale_hyperparameters = CombinationHyperparameters([LocalHyperparameters(log_sigma=numpy.log(local_setup['amplitude']), 
                                                                                   log_rho=numpy.log(numpy.radians(local_setup['space_length_scale'])))] + bias_hyperparameters)
    local_component = SpatialComponent(ComponentStorage_InMemory(local_scale_element, local_scale_hyperparameters), SpatialComponentSolutionStorage_InMemory(), 
                                                                 compute_uncertainties=True, method='APPROXIMATED',
                                                                 compute_sample=True, sample_size=definitions.GLOBAL_SAMPLE_SHAPE[3])

    # Analysis system using the specified components, for the Tmean observable
    print 'Analysing inputs'

    analysis_system = AnalysisSystem(
        [ climatology_component, large_scale_component, local_component ],
        ObservationSource.TMEAN)

    # Object to load raw binary inputs at time indices
    inputloaders = [ AnalysisSystemInputLoaderRawBinary_Sources(basepath, source, time_indices) for source in sources ]

    for iteration in range(args.n_iterations):
	
	message = 'Iteration {}'.format(iteration)
	print(message)
	
	# Update with data
	analysis_system.update(inputloaders, time_indices)

    print 'Computing outputs'

    # Produce an output for each time index
    for time_index in time_indices:

        # Get date for output
        outputdate = inputloaders[0].datetime_at_time_index(time_index)
        print 'Evaluating output grid: ', outputdate

        #Configure output grid
        outputstructure = OutputRectilinearGridStructure(
            time_index, outputdate,
            latitudes=numpy.linspace(-90.+definitions.GLOBAL_FIELD_RESOLUTION/2., 90.-definitions.GLOBAL_FIELD_RESOLUTION/2., num=definitions.GLOBAL_FIELD_SHAPE[1]),
            longitudes=numpy.linspace(-180.+definitions.GLOBAL_FIELD_RESOLUTION/2., 180.-definitions.GLOBAL_FIELD_RESOLUTION/2., num=definitions.GLOBAL_FIELD_SHAPE[2]))

        # Evaluate expected value at these locations
        for field in ['MAP', 'post_STD']:
	  print 'Evaluating: ',field
	  result_expected_value = analysis_system.evaluate_expected_value('MAP', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1,1], 1000)
	  result_expected_uncertainties = analysis_system.evaluate_expected_value('post_STD', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1,1], 1000)
	  
	print 'Evaluating: climatology fraction'
	climatology_fraction = analysis_system.evaluate_climatology_fraction(outputstructure, [1,1], 1000)

	print 'Evaluating: the sample'
	sample = analysis_system.evaluate_projected_sample(outputstructure)

	# Make output filename
        pathname = 'eustace_example_output_{0:04d}{1:02d}{2:02d}.nc'.format(outputdate.year, outputdate.month, outputdate.day)
	pathname = os.path.join(args.outpath, pathname)
        print 'Saving: ', pathname

        # Save results
        filebuilder = FileBuilderGlobalField(
            pathname, 
            time_index,
            'Infilling Example',
            'UNVERSIONED',
            definitions.TAS.name,
            '',
            'Example data only',
            'eustace.analysis.advanced_standard.examples.example_eustace_few_days', 
            '')
        filebuilder.add_global_field(definitions.TAS, result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE))
        filebuilder.add_global_field(definitions.TASUNCERTAINTY, result_expected_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE))
        filebuilder.add_global_field(definitions.TAS_CLIMATOLOGY_FRACTION, climatology_fraction.reshape(definitions.GLOBAL_FIELD_SHAPE))

	for index in range(definitions.GLOBAL_SAMPLE_SHAPE[3]):
	  variable = copy.deepcopy(definitions.TASENSEMBLE)
	  variable.name = variable.name + '_' + str(index)
	  selected_sample = sample[:,index].ravel()+result_expected_value
	  filebuilder.add_global_field(variable, selected_sample.reshape(definitions.GLOBAL_FIELD_SHAPE))
	  
	filebuilder.save_and_close()

    print 'Complete'
예제 #17
0
 def parse_date(s):
     """Helper to parse date to integer since the epoch."""
     return days_since_epoch(
         datetime.strptime(s, ObservationSourceLakeReading.DATEFORMAT))
예제 #18
0
def main():

    print 'Advanced standard example using a few days of EUSTACE data'
    parser = argparse.ArgumentParser(
        description='Advanced standard example using a few days of EUSTACE data'
    )
    parser.add_argument('outpath',
                        help='directory where the output should be redirected')
    parser.add_argument(
        '--json_descriptor',
        default=None,
        help=
        'a json descriptor containing the covariates to include in the climatology model'
    )
    parser.add_argument('--land_biases',
                        action='store_true',
                        help='include insitu land homogenization bias terms')
    parser.add_argument('--global_biases',
                        action='store_true',
                        help='include global satellite bias terms')
    parser.add_argument('--n_iterations',
                        type=int,
                        default=5,
                        help='number of solving iterations')
    args = parser.parse_args()

    # Input data path
    basepath = os.path.join('/work/scratch/eustace/rawbinary3')

    # Days to process
    #time_indices = range(int(days_since_epoch(datetime(2006, 2, 1))), int(days_since_epoch(datetime(2006, 2, 2))))
    #time_indices = range(int(days_since_epoch(datetime(1906, 2, 1))), int(days_since_epoch(datetime(1906, 2, 2))))

    date_list = [
        datetime(2006, 1, 1) + relativedelta(days=k) for k in range(3)
    ]

    #backwards_list = [date_list[i] for i in range(11, -1, -1)]
    #date_list = backwards_list

    time_indices = [int(days_since_epoch(date)) for date in date_list]

    # Sources to use
    sources = [
        'surfaceairmodel_land', 'surfaceairmodel_ocean', 'surfaceairmodel_ice',
        'insitu_land', 'insitu_ocean'
    ]
    sources = ['insitu_land', 'insitu_ocean']
    #sources = [ 'surfaceairmodel_land' ]
    # CLIMATOLOGY COMPONENT: combining the seasonal core along with latitude harmonics, altitude and coastal effects

    if args.json_descriptor is not None:
        loader = LoadCovariateElement(args.json_descriptor)
        loader.check_keys()
        covariate_elements, covariate_hyperparameters = loader.load_covariates_and_hyperparameters(
        )
        print(
            'The following fields have been added as covariates of the climatology model'
        )
        print(loader.data.keys())
    else:
        covariate_elements, covariate_hyperparameters = [], []

    #climatology_element = CombinationElement( [SeasonalElement(n_triangulation_divisions=2, n_harmonics=2, include_local_mean=False), GrandMeanElement()]+covariate_elements)
    #climatology_hyperparameters = CombinationHyperparameters( [SeasonalHyperparameters(n_spatial_components=2, common_log_sigma=0.0, common_log_rho=0.0), CovariateHyperparameters(numpy.log(15.0))] + covariate_hyperparameters )

    climatology_element = CombinationElement([
        GrandMeanElement(),
    ] + covariate_elements)
    climatology_hyperparameters = CombinationHyperparameters([
        CovariateHyperparameters(numpy.log(15.0)),
    ] + covariate_hyperparameters)

    #climatology_element =SeasonalElement(n_triangulation_divisions=2, n_harmonics=2, include_local_mean=False)
    #climatology_hyperparameters = SeasonalHyperparameters(n_spatial_components=2, common_log_sigma=0.0, common_log_rho=0.0)

    climatology_component = SpaceTimeComponent(
        ComponentStorage_InMemory(climatology_element,
                                  climatology_hyperparameters),
        SpaceTimeComponentSolutionStorage_InMemory(),
        compute_uncertainties=True,
        method='APPROXIMATED')

    # LARGE SCALE (kronecker product) COMPONENT: combining large scale trends with bias terms accounting for homogeneization effects

    if args.land_biases:
        bias_element, bias_hyperparameters = [
            InsituLandBiasElement(BREAKPOINTS_FILE)
        ], [CovariateHyperparameters(numpy.log(.9))]
        print('Adding bias terms for insitu land homogenization')
    else:
        bias_element, bias_hyperparameters = [], []

    large_scale_element = CombinationElement([
        SpaceTimeKroneckerElement(n_triangulation_divisions=2,
                                  alpha=2,
                                  starttime=-30,
                                  endtime=365 * 1 + 30,
                                  n_nodes=12 * 1 + 2,
                                  overlap_factor=2.5,
                                  H=1)
    ] + bias_element)
    large_scale_hyperparameters = CombinationHyperparameters([
        SpaceTimeSPDEHyperparameters(space_log_sigma=0.0,
                                     space_log_rho=numpy.log(
                                         numpy.radians(15.0)),
                                     time_log_rho=numpy.log(15.0))
    ] + bias_hyperparameters)
    large_scale_component = SpaceTimeComponent(
        ComponentStorage_InMemory(large_scale_element,
                                  large_scale_hyperparameters),
        SpaceTimeComponentSolutionStorage_InMemory(),
        compute_uncertainties=True,
        method='APPROXIMATED')

    # LOCAL COMPONENT: combining local scale variations with global satellite bias terms

    if args.global_biases:
        bias_elements = [
            BiasElement(groupname, 1) for groupname in GLOBAL_BIASES_GROUP_LIST
        ]
        bias_hyperparameters = [
            CovariateHyperparameters(numpy.log(15.0)) for index in range(3)
        ]
        print('Adding global bias terms for all the surfaces')
    else:
        bias_elements, bias_hyperparameters = [], []

    n_triangulation_divisions_local = 7
    local_log_sigma = numpy.log(5)
    local_log_rho = numpy.log(numpy.radians(5.0))
    local_element = NonStationaryLocal(
        n_triangulation_divisions=n_triangulation_divisions_local)
    n_local_nodes = local_element.spde.n_latent_variables()
    local_scale_element = CombinationElement([local_element] + bias_elements)
    local_hyperparameters = ExpandedLocalHyperparameters(
        log_sigma=numpy.repeat(local_log_sigma, n_local_nodes),
        log_rho=numpy.repeat(local_log_rho, n_local_nodes))
    local_scale_hyperparameters = CombinationHyperparameters(
        [local_hyperparameters] + bias_hyperparameters)
    local_component = DelayedSpatialComponent(
        ComponentStorage_InMemory(local_scale_element,
                                  local_scale_hyperparameters),
        SpatialComponentSolutionStorage_InMemory(),
        compute_uncertainties=True,
        method='APPROXIMATED')
    print "hyperparameter storage:", local_component.storage.hyperparameters
    print 'Analysing inputs'

    # Analysis system using the specified components, for the Tmean observable
    ##analysis_system = AnalysisSystem(
    ##    [ climatology_component, large_scale_component, local_component ],
    ##    ObservationSource.TMEAN)

    analysis_system = OptimizationSystem(
        [climatology_component, local_component], ObservationSource.TMEAN)

    # Object to load raw binary inputs at time indices
    inputloaders = [
        AnalysisSystemInputLoaderRawBinary_Sources(basepath, source,
                                                   time_indices)
        for source in sources
    ]

    for iteration in range(args.n_iterations):

        message = 'Iteration {}'.format(iteration)
        print(message)

        # Update with data
        analysis_system.update(inputloaders, time_indices)

    ##################################################

    # Optimize local model hyperparameters

    # Loop over local regions, generate optimization systems, fit hyperparameters and save

    # split spde and bias models for local component into two components
    global_spde_sub_component_definition = ComponentStorage_InMemory(
        CombinationElement([local_element]),
        CombinationHyperparameters([local_hyperparameters]))
    global_spde_sub_component_storage_solution = SpatialComponentSolutionStorage_InMemory(
    )
    global_spde_sub_component = DelayedSpatialComponent(
        global_spde_sub_component_definition,
        global_spde_sub_component_storage_solution)

    bias_sub_component_definition = ComponentStorage_InMemory(
        CombinationElement(bias_elements),
        CombinationHyperparameters(bias_hyperparameters))
    bias_sub_component_storage_solution = SpatialComponentSolutionStorage_InMemory(
    )
    bias_sub_component = DelayedSpatialComponent(
        bias_sub_component_definition, bias_sub_component_storage_solution)

    element_optimisation_flags = [True, False, False,
                                  False]  # one spde, three biases

    for time_key in time_indices:
        split_states_time(local_component, global_spde_sub_component,
                          bias_sub_component, element_optimisation_flags,
                          time_key)

    # Define subregions and extract their states
    neighbourhood_level = 1

    n_subregions = global_spde_sub_component.storage.element_read(
    ).combination[0].spde.n_triangles_at_level(neighbourhood_level)
    hyperparameter_file_template = "local_hyperparameters.%i.%i.%i.npy"

    fit_hyperparameters = True
    optimization_component_index = 2
    if fit_hyperparameters:
        for region_index in range(n_subregions):
            # Setup model for local subregion of neighours with super triangle
            view_flags = [
                True,
            ]
            region_element = CombinationElement([
                LocalSubRegion(n_triangulation_divisions_local,
                               neighbourhood_level, region_index)
            ])
            region_hyperparameters = ExtendedCombinationHyperparameters([
                LocalHyperparameters(log_sigma=local_log_sigma,
                                     log_rho=local_log_rho)
            ])
            region_component_storage_solution = SpatialComponentSolutionStorage_InMemory(
            )
            region_sub_component = DelayedSpatialComponent(
                ComponentStorage_InMemory(region_element,
                                          region_hyperparameters),
                region_component_storage_solution)

            for time_key in time_indices:
                print "region_index, time_key:", region_index, time_key
                extract_local_view_states_time(global_spde_sub_component,
                                               region_sub_component,
                                               view_flags, time_key)

            print "running optimization for region:", region_index

            region_optimization_system = OptimizationSystem([
                climatology_component, bias_sub_component, region_sub_component
            ], ObservationSource.TMEAN)

            for time_key in time_indices:
                region_optimization_system.update_component_time(
                    inputloaders, optimization_component_index, time_key)

            # commented version that works for few days inputs
            #region_optimization_system.components[optimization_component_index].component_solution().optimize()
            #region_optimization_system.components[optimization_component_index].storage.hyperparameters.get_array()
            #hyperparameter_file = os.path.join(args.outpath, hyperparameter_file_template % (n_triangulation_divisions_local, neighbourhood_level, region_index) )
            #region_sub_component.storage.hyperparameters.values_to_npy_savefile( hyperparameter_file )

            # replaced with version for full processing based json dump of input files - need to generate the input_descriptor dict
            hyperparameter_file = os.path.join(
                args.outpath, hyperparameter_file_template %
                (n_triangulation_divisions_local, neighbourhood_level,
                 region_index))
            region_optimization_system.process_inputs(
                input_descriptor, optimization_component_index, time_indices)
            region_optimization_system.optimize_component(
                optimization_component_index,
                hyperparameter_storage_file=hyperparameter_file)

            fitted_hyperparameters_converted = region_sub_component.storage.hyperparameters.get_array(
            )
            fitted_hyperparameters_converted[0] = numpy.exp(
                fitted_hyperparameters_converted[0])
            fitted_hyperparameters_converted[1] = numpy.exp(
                fitted_hyperparameters_converted[1]) * 180.0 / numpy.pi
            print 'fitted_hyperparameters_converted:', fitted_hyperparameters_converted

    # Setup model for the super triangle without neighbours for hyperparameter merging
    region_spdes = []
    region_hyperparameter_values = []
    for region_index in range(n_subregions):
        # Redefine the region sub component as a supertriangle rather than a neighbourhood
        region_element = CombinationElement([
            LocalSuperTriangle(n_triangulation_divisions_local,
                               neighbourhood_level, region_index)
        ])
        region_hyperparameters = ExtendedCombinationHyperparameters([
            LocalHyperparameters(log_sigma=local_log_sigma,
                                 log_rho=local_log_rho)
        ])
        region_component_storage_solution = SpatialComponentSolutionStorage_InMemory(
        )
        region_sub_component = DelayedSpatialComponent(
            ComponentStorage_InMemory(region_element, region_hyperparameters),
            region_component_storage_solution)

        # Read the optimized hyperparameters
        hyperparameter_file = os.path.join(
            args.outpath,
            hyperparameter_file_template % (n_triangulation_divisions_local,
                                            neighbourhood_level, region_index))
        region_sub_component.storage.hyperparameters.values_from_npy_savefile(
            hyperparameter_file)

        # Append the spde model and hyperparameters to their lists for merging
        region_spdes.append(region_element.combination[0].spde)
        region_hyperparameter_values.append(
            region_sub_component.storage.hyperparameters.get_array())

    # merge and save hyperparameters
    full_spde = local_element.spde
    new_hyperparameter_values, global_sigma_design, global_rho_design = full_spde.merge_local_parameterisations(
        region_spdes, region_hyperparameter_values, merge_method='exp_average')

    local_hyperparameters.set_array(new_hyperparameter_values)
    hyperparameter_file_merged = "merged_hyperparameters.%i.%i.npy" % (
        n_triangulation_divisions_local, neighbourhood_level)
    local_hyperparameters.values_to_npy_savefile(
        os.path.join(args.outpath, hyperparameter_file_merged))

    # Refit local model with the optimized hyperparameters
    analysis_system.update_component(inputloaders, 1, time_indices)

    ##################################################

    print 'Computing outputs'

    # Produce an output for each time index
    for time_index in time_indices:

        # Get date for output
        outputdate = inputloaders[0].datetime_at_time_index(time_index)
        print 'Evaluating output grid: ', outputdate

        #Configure output grid
        outputstructure = OutputRectilinearGridStructure(
            time_index,
            outputdate,
            latitudes=numpy.linspace(-89.875,
                                     89.875,
                                     num=definitions.GLOBAL_FIELD_SHAPE[1]),
            longitudes=numpy.linspace(-179.875,
                                      179.875,
                                      num=definitions.GLOBAL_FIELD_SHAPE[2]))

        # print 'Size of grid : ', outputstructure.number_of_observations()

        # Evaluate expected value at these locations
        result_expected_value = analysis_system.evaluate_expected_value(
            'MAP', outputstructure, 'POINTWISE')
        result_expected_uncertainties = analysis_system.evaluate_expected_value(
            'post_STD', outputstructure, 'POINTWISE')

        # Make output filename
        pathname = 'eustace_example_output_{0:04d}{1:02d}{2:02d}.nc'.format(
            outputdate.year, outputdate.month, outputdate.day)
        pathname = os.path.join(args.outpath, pathname)
        print 'Saving: ', pathname

        # Save results
        filebuilder = FileBuilderGlobalField(
            pathname, time_index, 'Infilling Example', 'UNVERSIONED',
            definitions.TAS.name, '', 'Example data only',
            'eustace.analysis.advanced_standard.examples.example_eustace_few_days',
            '')
        filebuilder.add_global_field(
            definitions.TAS,
            result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE))
        filebuilder.add_global_field(
            definitions.TASUNCERTAINTY,
            result_expected_uncertainties.reshape(
                definitions.GLOBAL_FIELD_SHAPE))
        filebuilder.save_and_close()

    print 'Complete'
예제 #19
0
    def operation_input_resolve(self, request_skip, catalogue, step):

        return OperationParameterList(
            numpy.arange(int(days_since_epoch(step.start)),
                         int(days_since_epoch(step.end) + 1), 1, numpy.int32))
예제 #20
0
 def time_index(self):
     return epoch.days_since_epoch(self.datetime)