예제 #1
0
def test_get_variable():
    variable = utils.get_variable(local_path(TESTDATA['cmip5_tasmax_2007_nc']))
    assert 'tasmax' == variable

    variable = utils.get_variable(local_path(
        TESTDATA['cordex_tasmax_2007_nc']))
    assert 'tasmax' == variable
예제 #2
0
def write_to_file(nc_indice, data):
    from netCDF4 import Dataset
    from shutil import copy
    from os.path import split, join
    from flyingpigeon.utils import get_variable
    from flyingpigeon.metadata import get_frequency

    #path, nc_indice = split(indice_file)

    var = get_variable(nc_indice)
    fq = get_frequency(nc_indice)
    agg = nc_indice.split('_')[-2]

    nc = nc_indice.replace(var, 'tree').replace(agg, fq)
    copy(nc_indice, nc)

    ds = Dataset(nc, mode='a')
    vals = ds.variables[var]

    ds.renameVariable(var, 'tree')

    vals[:, :, :] = data[:, :, :]
    vals.long_name = 'Favourabilliy for tree species'
    vals.standard_name = 'tree'
    vals.units = '0-1'
    ds.close()
    return nc
예제 #3
0
파일: sdm.py 프로젝트: KatiRG/flyingpigeon
def get_reference(ncs_indices, period='all'):
  """
  calculates the netCDF files containing the mean climatology for statistical GAM training
  :param ncs_indices: list of climate indices defining the growing conditions of tree species
  :param refperiod: time period for statistic training 
  :return present: present conditions
  """
  from datetime import datetime as dt
  from flyingpigeon.ocgis_module import call
  from flyingpigeon.utils import get_variable
  from os.path import basename
  
  if not period == 'all':
    s, e = period.split('-')
    start = dt.strptime(s+'-01-01', '%Y-%m-%d')
    end = dt.strptime(e+'-12-31', '%Y-%m-%d')
    time_range=[start, end]
  else:
    time_range=None
    
  ref_indices = []
  for nc_indice in ncs_indices: 
    variable = get_variable(nc_indice)
    f = basename(nc_indice).strip('.nc')
    prefix = '%s_ref-%s' % ('_'.join(f.split('_')[0:-1]), period) 
    
    ref_indices.append(call(resource=nc_indice, variable=variable,prefix=prefix, calc=[{'func':'mean','name': variable}],calc_grouping=['all'],time_range=time_range))
  
  return ref_indices
예제 #4
0
파일: sdm.py 프로젝트: KatiRG/flyingpigeon
def write_to_file(nc_indice, data):
  from netCDF4 import Dataset
  from shutil import copy
  from os.path import split, join
  from flyingpigeon.utils import get_variable
  from flyingpigeon.metadata import get_frequency 

  #path, nc_indice = split(indice_file)

  var = get_variable(nc_indice)
  fq = get_frequency(nc_indice)
  agg = nc_indice.split('_')[-2]

  nc = nc_indice.replace(var,'tree').replace(agg,fq)
  copy(nc_indice, nc)
  
  ds = Dataset(nc, mode= 'a')
  vals = ds.variables[var]
  
  ds.renameVariable(var,'tree') 
  
  vals[:,:,:] = data[:,:,:]
  vals.long_name = 'Favourabilliy for tree species'
  vals.standard_name = 'tree'
  vals.units = '0-1'
  ds.close()                  
  return nc                
예제 #5
0
def get_level(resource, level):
    from flyingpigeon.ocgis_module import call
    from netCDF4 import Dataset
    from flyingpigeon.utils import get_variable
    from numpy import squeeze

    try:
        level_data = call(resource, level_range=[int(level), int(level)])
        if type(resource) == list:
            resource.sort()
        variable = get_variable(level_data)
        logger.info('found %s in file' % variable)
        ds = Dataset(level_data, mode='a')
        var = ds.variables.pop(variable)
        dims = var.dimensions
        new_var = ds.createVariable('z%s' % level,
                                    var.dtype,
                                    dimensions=(dims[0], dims[2], dims[3]))
        # i = where(var[:]==level)
        new_var[:, :, :] = squeeze(var[:, 0, :, :])
        ds.close()
        logger.info('level %s extracted' % level)

        data = call(level_data, variable='z%s' % level)

    except Exception as e:
        logger.error('failed to extract level %s ' % e)
    return data
예제 #6
0
def get_pca(resource):
  """
  calculation of principal components

  :param resource: netCDF file containing pressure values for a defined region and selected timesteps
  :return pca: sklean objct
  """
  from netCDF4 import Dataset, num2date
  from flyingpigeon.utils import get_variable

  var = get_variable(resource)
  print 'variable name: %s' % var
  ds = Dataset(resource)
  vals = ds.variables[var]
  lat = ds.variables['lat']
  lon = ds.variables['lon']
  #time = ds.variables['time']
  
  # make array of seasons:
  # convert netCDF timesteps to datetime
  #timestamps = num2date(time[:], time.units, time.calendar)
  #season = [get_season(s) for s in timestamps]
  
  from sklearn.decomposition import PCA
  import numpy as np
  
  # reshape
  data = np.array(vals)
  adata = data.reshape(vals[:].shape[0], (vals[:].shape[1] * vals[:].shape[2]) )
  pca = PCA(n_components=50).fit_transform(adata)
  return vals, pca #, season
예제 #7
0
def get_level(resource, level):
    from flyingpigeon.ocgis_module import call
    from netCDF4 import Dataset
    from flyingpigeon.utils import get_variable
    from numpy import squeeze
    from os import path

    try:
        if type(resource) == list:
            resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0])
            # resource.sort()

        level_data = call(resource, level_range=[int(level), int(level)])
        variable = get_variable(level_data)
        LOGGER.info('found %s in file' % variable)
        ds = Dataset(level_data, mode='a')
        var = ds.variables.pop(variable)
        dims = var.dimensions
        new_var = ds.createVariable('z%s' % level, var.dtype, dimensions=(dims[0], dims[2], dims[3]))
        # i = where(var[:]==level)
        new_var[:, :, :] = squeeze(var[:, 0, :, :])

        # TODO: Here may be an error! in case of exception, dataset will not close!
        # Exception arise for example for 20CRV2 data...
        try:
            new_var.setncatts({k: var.getncattr(k) for k in var.ncattrs()})
        except:
            LOGGER.info('Could not set attributes for z%s' % level)
        ds.close()
        LOGGER.info('level %s extracted' % level)
        data = call(level_data, variable='z%s' % level)
    except:
        LOGGER.exception('failed to extract level')

    return data
예제 #8
0
def get_reference(ncs_indices, period='all'):
    """
    calculates the netCDF files containing the mean climatology for statistical GAM training

    :param ncs_indices: list of climate indices defining the growing conditions of tree species
    :param refperiod: time period for statistic training

    :return present: present conditions
    """
    from datetime import datetime as dt
    from flyingpigeon.ocgis_module import call
    from flyingpigeon.utils import get_variable
    from os.path import basename

    if not period == 'all':
        s, e = period.split('-')
        start = dt.strptime(s + '-01-01', '%Y-%m-%d')
        end = dt.strptime(e + '-12-31', '%Y-%m-%d')
        time_range = [start, end]
    else:
        time_range = None

    ref_indices = []
    for nc_indice in ncs_indices:
        variable = get_variable(nc_indice)
        f = basename(nc_indice).strip('.nc')
        prefix = '%s_ref-%s' % ('_'.join(f.split('_')[0:-1]), period)

        ref_indices.append(call(resource=nc_indice, variable=variable, prefix=prefix,
                                calc=[{'func': 'mean', 'name': variable}],
                                calc_grouping=['all'], time_range=time_range))

    return ref_indices
예제 #9
0
def get_level(resource, level):
  from flyingpigeon.ocgis_module import call
  from netCDF4 import Dataset
  from flyingpigeon.utils import get_variable
  from numpy import squeeze

  try:
    level_data = call(resource, level_range=[int(level),int(level)])
    if type(resource) == list:
      resource.sort()
    variable = get_variable(level_data)
    logger.info('found %s in file' % variable)
    ds = Dataset(level_data, mode='a')
    var = ds.variables.pop(variable)
    dims = var.dimensions
    new_var = ds.createVariable('z%s'% level, var.dtype, dimensions=(dims[0],dims[2],dims[3]))
    # i = where(var[:]==level)
    new_var[:,:,:] = squeeze(var[:,0,:,:])
    ds.close()
    logger.info('level %s extracted' % level)

    data = call(level_data , variable = 'z%s'%level)
    
  except Exception as e:
    logger.error('failed to extract level %s ' % e)
  return data
예제 #10
0
def get_gam(ncs_reference, PAmask):
  
  from netCDF4 import Dataset
  from os.path import basename
  from numpy import squeeze, ravel, isnan, nan, array, reshape
  
  from flyingpigeon.utils import get_variable
  from rpy2.robjects.packages import importr
  import rpy2.robjects as ro

  import rpy2.robjects.numpy2ri
  rpy2.robjects.numpy2ri.activate()
  mgcv = importr("mgcv")
  base = importr("base")
  stats = importr("stats")
  
  data = {'PA': ro.FloatVector(ravel(PAmask))}
  domain = PAmask.shape
  
  form = 'PA ~ '
  ncs_reference.sort()
  
  for i , nc in enumerate(ncs_reference):
    var = get_variable(nc)
    agg = basename(nc).split('_')[-2]
    ds = Dataset(nc)
    vals = squeeze(ds.variables[var])
    vals[isnan(PAmask)] = nan 
    indice = '%s_%s' % (var, agg)
    data[str(indice)] = ro.FloatVector(ravel(vals))
    if i == 0:
      form = form + 's(%s, k=3)' % indice 
    else: 
      form = form + ' + s(%s, k=3)' % indice
  
  dataf = ro.DataFrame(data)
  eq = ro.Formula(str(form))
  
  gam_model = mgcv.gam(base.eval(eq), data=dataf, family=stats.binomial(), scale=-1, na_action=stats.na_exclude)
  
  grdevices = importr('grDevices')
  
  output_info = "info.pdf"
  grdevices.pdf(file=output_info)
  # plotting code here

  for i in range(1,len(ncs_reference)+1):    
  #ylim = ro.IntVector([-6,6])
    mgcv.plot_gam(gam_model, shade='T', col='black',select=i,ylab='Predicted Probability',rug=False , cex_lab = 1.4, cex_axis = 1.4, ) #ylim=ylim,  trans=base.eval(trans),
    
  grdevices.dev_off()
  
  predict_gam = mgcv.predict_gam(gam_model, type="response", progress="text", na_action=stats.na_exclude) #, 
  
  prediction = array(predict_gam).reshape(domain)
    
  return gam_model, prediction, output_info
예제 #11
0
  def execute(self):
    from flyingpigeon.ocgis_module import call
    from flyingpigeon.utils import get_time, get_variable, sort_by_filename
    
    from datetime import datetime as dt
    from netCDF4 import Dataset
    from numpy import savetxt, column_stack, squeeze
    
    ncs = self.getInputValues(identifier='netcdf_file')
    logging.info("ncs: %s " % ncs) 
    coords = self.getInputValues(identifier='coords')
    logging.info("coords %s", coords)

 
    nc_exp = sort_by_filename(ncs) # dictionary {experiment:[files]}
    filenames = []
    
    (fp_tar, tarout_file) = tempfile.mkstemp(dir=".", suffix='.tar')
    tar = tarfile.open(tarout_file, "w")
    
    for key in nc_exp.keys():
      logging.info('start calculation for %s ' % key )
      ncs = nc_exp[key]
      nc = ncs[0]
      
      times = get_time(nc)
      var = get_variable(nc)
      
      concat_vals = [dt.strftime(t, format='%Y-%d-%m_%H:%M:%S') for t in times]
      header = 'date_time'
      filename = '%s.csv' % key
      filenames.append(filename) 
      
      for ugid, p in enumerate(coords, start=1):
        self.status.set('processing point : {0}'.format(p), 20)
        p = p.split(',')
        self.status.set('splited x and y coord : {0}'.format(p), 20)
        point = Point(float(p[0]), float(p[1]))
        
        #get the timeseries at gridpoint
        timeseries = call(resource=ncs, geom=point, select_nearest=True)
        
        ds = Dataset(timeseries)
        vals = squeeze(ds.variables[var])
        header = header + ',%s_%s' % (p[0], p[1])
        concat_vals = column_stack([concat_vals, vals])

      savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header)
      tar.add( filename )
      
    tar.close()
    self.tarout.setValue( tarout_file )
예제 #12
0
def spaghetti(resouces, variable=None, title=None, file_extension='png'):
    """
    creates a png file containing the appropriate spaghetti plot as a field mean of the values.

    :param resouces: list of files containing the same variable
    :param variable: variable to be visualised. If None (default), variable will be detected
    :param title: string to be used as title

    :retruns str: path to png file
    """
    from flyingpigeon.calculation import fieldmean

    try:
        fig = plt.figure(figsize=(20, 10), dpi=600, facecolor='w', edgecolor='k')
        LOGGER.debug('Start visualisation spaghetti plot')

        # === prepare invironment
        if type(resouces) != list:
            resouces = [resouces]
        if variable is None:
            variable = utils.get_variable(resouces[0])
        if title is None:
            title = "Field mean of %s " % variable

        LOGGER.info('plot values preparation done')
    except:
        msg = "plot values preparation failed"
        LOGGER.exception(msg)
        raise Exception(msg)
    try:
        for c, nc in enumerate(resouces):
            # get timestapms
            try:
                dt = utils.get_time(nc)  # [datetime.strptime(elem, '%Y-%m-%d') for elem in strDate[0]]
                ts = fieldmean(nc)
                plt.plot(dt, ts)
                # fig.line( dt,ts )
            except:
                msg = "spaghetti plot failed for %s " % nc
                LOGGER.exception(msg)

        plt.title(title, fontsize=20)
        plt.grid()

        output_png = fig2plot(fig=fig, file_extension=file_extension)

        plt.close()
        LOGGER.info('timeseries spaghetti plot done for %s with %s lines.' % (variable, c))
    except:
        msg = 'matplotlib spaghetti plot failed'
        LOGGER.exception(msg)
    return output_png
예제 #13
0
def get_prediction(gam_model, ncs_indices):  #mask=None
    """
  predict the probabillity based on the gam_model and the given climate index datasets
  
  :param gam_model: fitted gam (output from sdm.get_gam)
  :pram nsc_indices: list of netCDF files containing climate indices of one dataset
  :param mask: 2D array of True/False to exclude areas (e.g ocean) for prediction
  """
    from netCDF4 import Dataset
    from os.path import basename
    from numpy import squeeze, ravel, array, reshape  #, zeros, broadcast_arrays, nan

    from flyingpigeon.utils import get_variable

    from rpy2.robjects.packages import importr
    import rpy2.robjects as ro

    import rpy2.robjects.numpy2ri
    rpy2.robjects.numpy2ri.activate()
    mgcv = importr("mgcv")
    stats = importr("stats")

    ncs_indices.sort()

    data = {}

    for i, nc in enumerate(ncs_indices):
        var = get_variable(nc)
        agg = basename(nc).split('_')[-2]
        ds = Dataset(nc)
        vals = squeeze(ds.variables[var])
        if i == 0:
            dims = vals.shape
        #if mask != None:
        #mask = broadcast_arrays(vals, mask)[1]
        #vals[mask==False] = nan
        indice = '%s_%s' % (var, agg)
        data[str(indice)] = ro.FloatVector(ravel(vals))

    dataf = ro.DataFrame(data)
    predict_gam = mgcv.predict_gam(
        gam_model,
        newdata=dataf,
        type="response",
        progress="text",
        newdata_guaranteed=True)  #, na_action=`na.pass`

    prediction = array(predict_gam).reshape(dims)

    return prediction
예제 #14
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        ncfiles = archiveextract(
            resource=rename_complexinputs(request.inputs['resource']))

        if 'variable' in request.inputs:
            var = request.inputs['variable'][0].data
        else:
            var = get_variable(ncfiles[0])
            #  var = ncfiles[0].split("_")[0]

        response.update_status('plotting variable {}'.format(var), 10)

        try:
            plotout_spagetti_file = vs.spaghetti(
                ncfiles,
                variable=var,
                title='Field mean of {}'.format(var),
            )
            LOGGER.info("spagetti plot done")
            response.update_status(
                'Spagetti plot for %s %s files done' % (len(ncfiles), var), 50)
            response.outputs['plotout_spagetti'].file = plotout_spagetti_file
        except Exception as e:
            raise Exception("spagetti plot failed : {}".format(e))

        try:
            plotout_uncertainty_file = vs.uncertainty(
                ncfiles,
                variable=var,
                title='Ensemble uncertainty for {}'.format(var),
            )

            response.update_status(
                'Uncertainty plot for {} {} files done'.format(
                    len(ncfiles), var), 90)
            response.outputs[
                'plotout_uncertainty'].file = plotout_uncertainty_file
            LOGGER.info("uncertainty plot done")
        except Exception as err:
            raise Exception("uncertainty plot failed {}".format(err.message))

        response.update_status('visualisation done', 100)
        return response
예제 #15
0
파일: sdm.py 프로젝트: KatiRG/flyingpigeon
def get_prediction(gam_model, ncs_indices ): #mask=None
  """
  predict the probabillity based on the gam_model and the given climate index datasets
  
  :param gam_model: fitted gam (output from sdm.get_gam)
  :pram nsc_indices: list of netCDF files containing climate indices of one dataset
  :param mask: 2D array of True/False to exclude areas (e.g ocean) for prediction
  """
  from netCDF4 import Dataset
  from os.path import basename
  from numpy import squeeze, ravel, array, reshape#, zeros, broadcast_arrays, nan
  
  from flyingpigeon.utils import get_variable
  
  from rpy2.robjects.packages import importr
  import rpy2.robjects as ro

  import rpy2.robjects.numpy2ri
  rpy2.robjects.numpy2ri.activate()
  mgcv = importr("mgcv")
  stats = importr("stats")

  ncs_indices.sort()
  
  data = {}
  
  for i , nc in enumerate(ncs_indices):
    var = get_variable(nc)
    agg = basename(nc).split('_')[-2]
    ds = Dataset(nc)
    vals = squeeze(ds.variables[var])
    if i == 0:
      dims = vals.shape
    #if mask != None: 
      #mask = broadcast_arrays(vals, mask)[1]
      #vals[mask==False] = nan
    indice = '%s_%s' % (var, agg)
    data[str(indice)] = ro.FloatVector(ravel(vals))
  
  dataf = ro.DataFrame(data)
  predict_gam = mgcv.predict_gam(gam_model, newdata=dataf, type="response", progress="text", newdata_guaranteed = True) #, na_action=`na.pass`
  
  prediction = array(predict_gam).reshape(dims)
  
  return prediction
예제 #16
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        ncfiles = archiveextract(
            resource=rename_complexinputs(request.inputs['resource']))
        var = request.inputs['variableIn']

        if var is None:
            from flyingpigeon.utils import get_variable
            var = get_variable(ncfiles[0])

        response.update_status('plotting variable %s' % var, 10)

        try:
            plotout_spagetti_file = vs.spaghetti(ncfiles,
                                                 variable=var,
                                                 title='Fieldmean of %s ' %
                                                 (var),
                                                 dir_out=None)
            LOGGER.info("spagetti plot done")
            response.update_status(
                'Spagetti plot for %s %s files done' % (len(ncfiles), var), 50)
        except:
            LOGGER.exception("spagetti plot failed")

        try:
            plotout_uncertainty_file = vs.uncertainty(
                ncfiles,
                variable=var,
                title='Ensemble uncertainty for %s ' % (var),
                dir_out=None)

            response.update_status(
                'Uncertainty plot for %s %s files done' % (len(ncfiles), var),
                90)
            LOGGER.info("uncertainty plot done")
        except:
            LOGGER.exception("uncertainty plot failed")

        response.outputs['plotout_spagetti'].file = plotout_spagetti_file
        response.outputs['plotout_uncertainty'].file = plotout_uncertainty_file
        response.update_status('visualisation done', 100)
        return response
예제 #17
0
    def execute(self):

        init_process_logger('log.txt')
        self.output_log.setValue('log.txt')
        from flyingpigeon.utils import archiveextract

        ncfiles = archiveextract(self.getInputValues(identifier='resource'))
        var = self.variableIn.getValue()

        if var is None:
            from flyingpigeon.utils import get_variable
            var = get_variable(ncfiles[0])

        self.status.set('plotting variable %s' % var, 10)

        try:
            plotout_spagetti_file = vs.spaghetti(
                                             ncfiles,
                                             variable=var,
                                             title='Fieldmean of %s ' % (var),
                                             dir_out=None
                                             )
            logger.info("spagetti plot done")
            self.status.set('Spagetti plot for %s %s files done' % (len(ncfiles), var), 50)
        except:
            logger.exception("spagetti plot failed")

        try:
            plotout_uncertainty_file = vs.uncertainty(
                                                  ncfiles,
                                                  variable=var,
                                                  title='Ensemble uncertainty for %s ' % (var),
                                                  dir_out=None
                                                  )

            self.status.set('Uncertainty plot for %s %s files done' % (len(ncfiles), var), 90)
            logger.info("uncertainty plot done")
        except:
            logger.exception("uncertainty plot failed")

        self.plotout_spagetti.setValue(plotout_spagetti_file)
        self.plotout_uncertainty.setValue(plotout_uncertainty_file)
        self.status.set('visualisation done', 100)
예제 #18
0
def write_to_file(nc_indice, data):
    """
    repaces the values in an indice file with given data

    :param nc_indice: base netCDF file (indice file)
    :param data: data to be filled into the netCDF file

    :returns str: path to netCDF file
    """
    try:
        from netCDF4 import Dataset
        from shutil import copy
        from os.path import split, join
        from flyingpigeon.utils import get_variable
        from flyingpigeon.metadata import get_frequency
        from numpy import nan

        # path, nc_indice = split(indice_file)

        var = get_variable(nc_indice)
        fq = get_frequency(nc_indice)
        agg = nc_indice.split('_')[-2]

        nc = nc_indice.replace(var, 'tree').replace(agg, fq)
        copy(nc_indice, nc)

        ds = Dataset(nc, mode='a')
        vals = ds.variables[var]

        ds.renameVariable(var, 'tree')

        vals[:, :, :] = data[:, :, :]
        vals.long_name = 'Favourabilliy for tree species'
        vals.standard_name = 'tree'
        vals.units = '0-1'
        vals.missing_value = nan
        ds.close()
    except:
        msg = 'failed to fill data to netCDF file'
        logger.exception(msg)
    return nc
예제 #19
0
def calc_indice_percentile(resources=[], variable=None, 
    prefix=None, indices='TG90p', refperiod=None,
    groupings='yr', polygons=None, percentile=90, mosaik = False, 
    dir_output=None, dimension_map = None):
    """
    Calculates given indices for suitable files in the appopriate time grouping and polygon.

    :param resource: list of filenames in drs convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: list of indices (default ='TG90p')
    :param prefix: filename prefix 
    :param refperiod: reference refperiod touple = (start,end)
    :param grouping: indices time aggregation (default='yr')
    :param dir_output: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into out_dir
    """
    from os.path import join, dirname, exists
    from os import remove
    import uuid
    from numpy import ma 
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon.subset import clipping
    from flyingpigeon.utils import get_values, get_time
    
    if type(resources) != list: 
      resources = list([resources])
    if type(indices) != list: 
      indices = list([indices])
      
    if type(groupings) != list: 
      groupings = list([groupings])
      
    if type(refperiod) == list: 
      refperiod = refperiod[0]
      
    if refperiod != None:
      start = dt.strptime(refperiod.split('-')[0] , '%Y%m%d')
      end = dt.strptime(refperiod.split('-')[1] , '%Y%m%d')
      time_range = [start, end]
    else:  
      time_range = None
    
    if dir_output != None:
      if not exists(dir_output): 
        makedirs(dir_output)
    
    ########################################################################################################################
    # Compute a custom percentile basis using ICCLIM. ######################################################################
    ########################################################################################################################

    from ocgis.contrib import library_icclim  as lic 
    nc_indices = []
    nc_dic = sort_by_filename(resources)
    
    for grouping in groupings:
      calc_group = calc_grouping(grouping)
      for key in nc_dic.keys():
        resource = nc_dic[key]
        if variable == None: 
          variable = get_variable(resource)
        if polygons == None:
          nc_reference = call(resource=resource, 
            prefix=str(uuid.uuid4()), 
            time_range=time_range,
            output_format='nc', 
            dir_output=dir_output)
        else:
          nc_reference = clipping(resource=resource, 
            prefix=str(uuid.uuid4()),
            time_range=time_range, 
            output_format='nc', 
            polygons=polygons,
            dir_output=dir_output, 
            mosaik = mosaik)
          
        arr = get_values(nc_files=nc_reference)
        dt_arr = get_time(nc_files=nc_reference)
        arr = ma.masked_array(arr)
        dt_arr = ma.masked_array(dt_arr)
        percentile = percentile
        window_width = 5
        
        for indice in indices:
          name = indice.replace('_', str(percentile))
          var = indice.split('_')[0]

          operation = None
          if 'T' in var: 
            if percentile >= 50: 
              operation = 'Icclim%s90p' % var
              func = 'icclim_%s90p' % var # icclim_TG90p
            else: 
              operation = 'Icclim%s10p' % var
              func = 'icclim_%s10p' % var
              
          ################################
          # load the appropriate operation
          ################################

          ops = [op for op in dir(lic) if operation in op]
          if len(ops) == 0:
              raise Exception("operator does not exist %s", operation)
          
          exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0]
          calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}]
          
          if polygons == None:
            nc_indices.append(call(resource=resource, 
                                prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), 
                                calc=calc, 
                                calc_grouping=calc_group, 
                                output_format='nc',
                                dir_output=dir_output))
          else: 
            nc_indices.extend(clipping(resource=resource, 
                                prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), 
                                calc=calc, 
                                calc_grouping=calc_group, 
                                output_format='nc',
                                dir_output=dir_output,
                                polygons=polygons, 
                                mosaik = mosaik,
                                ))
    return nc_indices
예제 #20
0
def method_A(resource=[],
             start=None,
             end=None,
             timeslice=20,
             variable=None,
             title=None,
             cmap='seismic'):
    """returns the result

    :param resource: list of paths to netCDF files
    :param start: beginning of reference period (if None (default),
                  the first year of the consistent ensemble will be detected)
    :param end: end of comparison period (if None (default), the last year of the consistent ensemble will be detected)
    :param timeslice: period length for mean calculation of reference and comparison period
    :param variable: OBSOLETE
    :param title: str to be used as title for the signal mal
    :param cmap: define the color scheme for signal map plotting

    :return: signal.nc, low_agreement_mask.nc, high_agreement_mask.nc, text.txt,  #  graphic.png,
    """
    from os.path import split
    from tempfile import mkstemp
    from cdo import Cdo
    cdo = Cdo()
    cdo.forceOutput = True

    # preparing the resource
    try:
        file_dic = sort_by_filename(resource, historical_concatination=True)
        LOGGER.info('file names sorted experimets: %s' % len(file_dic.keys()))
    except:
        msg = 'failed to sort the input files'
        LOGGER.exception(msg)

    # check that all datasets contains the same variable

    try:
        var_name = set()
        for key in file_dic.keys():
            var_name = var_name.union([get_variable(file_dic[key])])
        LOGGER.debug(var_name)
    except:
        LOGGER.exception('failed to get the variable in common')

    if len(var_name) == 1:
        variable = [str(n) for n in var_name][0]
        LOGGER.info('varible %s detected in all members of the ensemble' %
                    variable)
    else:
        raise Exception(
            'none or more than one variables are found in the ensemble members'
        )

    # TODO: drop missfitting grids

    # timemerge for seperate datasets
    try:
        mergefiles = []
        for key in file_dic.keys():
            # if variable is None:
            #     variable = get_variable(file_dic[key])
            #     LOGGER.info('variable detected %s ' % variable)
            try:
                if type(file_dic[key]) == list and len(file_dic[key]) > 1:
                    _, nc_merge = mkstemp(dir='.', suffix='.nc')
                    mergefiles.append(
                        cdo.mergetime(input=file_dic[key], output=nc_merge))
                else:
                    mergefiles.extend(file_dic[key])
            except:
                LOGGER.exception('failed to merge files for %s ' % key)
        LOGGER.info('datasets merged %s ' % mergefiles)
    except:
        msg = 'seltime and mergetime failed'
        LOGGER.exception(msg)

    # dataset documentation
    try:
        text_src = open('infiles.txt', 'a')
        for key in file_dic.keys():
            text_src.write(key + '\n')
        text_src.close()
    except:
        msg = 'failed to write source textfile'
        LOGGER.exception(msg)
        _, text_src = mkstemp(dir='.', suffix='.txt')

    # configure reference and compare period
    # TODO: filter files by time

    try:
        if start is None:
            st_set = set()
            en_set = set()
            for f in mergefiles:
                times = get_time(f)
                st_set.update([times[0].year])
        if end is None:
            en_set.update([times[-1].year])
            start = max(st_set)
        if end is None:
            end = min(en_set)
        LOGGER.info('Start and End: %s - %s ' % (start, end))
        if start >= end:
            LOGGER.error(
                'ensemble is inconsistent!!! start year is later than end year'
            )
    except:
        msg = 'failed to detect start and end times of the ensemble'
        LOGGER.exception(msg)

    # set the periodes:
    try:
        LOGGER.debug(type(start))
        # start = int(start)
        # end = int(end)
        if timeslice is None:
            timeslice = int((end - start) / 3)
            if timeslice == 0:
                timeslice = 1
        else:
            timeslice = int(timeslice)
        start1 = start
        start2 = start1 + timeslice - 1
        end1 = end - timeslice + 1
        end2 = end
        LOGGER.info('timeslice and periodes set')
    except:
        msg = 'failed to set the periodes'
        LOGGER.exception(msg)

    try:
        files = []
        for i, mf in enumerate(mergefiles):
            files.append(
                cdo.selyear('{0}/{1}'.format(start1, end2),
                            input=[mf.replace(' ', '\ ')],
                            output='file_{0}_.nc'.format(i)))  # python version
        LOGGER.info('timeseries selected from defined start to end year')
    except:
        msg = 'seltime and mergetime failed'
        LOGGER.exception(msg)

    try:
        # ensemble mean
        nc_ensmean = cdo.ensmean(input=files, output='nc_ensmean.nc')
        LOGGER.info('ensemble mean calculation done')
    except:
        msg = 'ensemble mean failed'
        LOGGER.exception(msg)

    try:
        # ensemble std
        nc_ensstd = cdo.ensstd(input=files, output='nc_ensstd.nc')
        LOGGER.info('ensemble std and calculation done')
    except:
        msg = 'ensemble std or failed'
        LOGGER.exception(msg)

    # get the get the signal as difference from the beginning (first years) and end period (last years), :
    try:
        selyearstart = cdo.selyear('%s/%s' % (start1, start2),
                                   input=nc_ensmean,
                                   output='selyearstart.nc')
        selyearend = cdo.selyear('%s/%s' % (end1, end2),
                                 input=nc_ensmean,
                                 output='selyearend.nc')
        meanyearst = cdo.timmean(input=selyearstart, output='meanyearst.nc')
        meanyearend = cdo.timmean(input=selyearend, output='meanyearend.nc')
        signal = cdo.sub(input=[meanyearend, meanyearst], output='signal.nc')
        LOGGER.info('Signal calculation done')
    except:
        msg = 'calculation of signal failed'
        LOGGER.exception(msg)
        _, signal = mkstemp(dir='.', suffix='.nc')

    # get the intermodel standard deviation (mean over whole period)
    try:
        # std_selyear = cdo.selyear('%s/%s' % (end1,end2), input=nc_ensstd, output='std_selyear.nc')
        # std = cdo.timmean(input = std_selyear, output = 'std.nc')

        std = cdo.timmean(input=nc_ensstd, output='std.nc')
        std2 = cdo.mulc('2', input=std, output='std2.nc')
        LOGGER.info('calculation of internal model std for time period done')
    except:
        msg = 'calculation of internal model std failed'
        LOGGER.exception(msg)
    try:
        absolut = cdo.abs(input=signal, output='absolut_signal.nc')
        high_agreement_mask = cdo.gt(
            input=[absolut, std2],
            output='large_change_with_high_model_agreement.nc')
        low_agreement_mask = cdo.lt(
            input=[absolut, std],
            output='small_signal_or_low_agreement_of_models.nc')
        LOGGER.info('high and low mask done')
    except:
        msg = 'calculation of robustness mask failed'
        LOGGER.exception(msg)
        _, high_agreement_mask = mkstemp(dir='.', suffix='.nc')
        _, low_agreement_mask = mkstemp(dir='.', suffix='.nc')

    return signal, low_agreement_mask, high_agreement_mask, text_src
예제 #21
0
  def execute(self):
    import time # performance test
    process_start_time = time.time() # measure process execution time ...
     
    from os import path
    from tempfile import mkstemp
    from flyingpigeon import analogs
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon.datafetch import reanalyses
    from flyingpigeon.utils import get_variable, rename_variable
    
    self.status.set('execution started at : %s '  % dt.now(),5)

    start_time = time.time() # measure init ...
    
    resource = self.getInputValues(identifier='resource')
    bbox_obj = self.BBox.getValue()
    refSt = self.getInputValues(identifier='refSt')
    refEn = self.getInputValues(identifier='refEn')
    dateSt = self.getInputValues(identifier='dateSt')
    dateEn = self.getInputValues(identifier='dateEn')
    normalize = self.getInputValues(identifier='normalize')[0]
    distance = self.getInputValues(identifier='dist')[0]
    outformat = self.getInputValues(identifier='outformat')[0]
    timewin = int(self.getInputValues(identifier='timewin')[0])
    experiment = self.getInputValues(identifier='experiment')[0]      
    dataset , var = experiment.split('_')



    refSt = dt.strptime(refSt[0],'%Y-%m-%d')
    refEn = dt.strptime(refEn[0],'%Y-%m-%d')
    dateSt = dt.strptime(dateSt[0],'%Y-%m-%d')
    dateEn = dt.strptime(dateEn[0],'%Y-%m-%d')
    
    
    if normalize == 'None': 
      seacyc = False
    else: 
      seacyc = True
      
    
    if outformat == 'ascii': 
      outformat = '.txt'
    elif outformat == 'netCDF':
      outformat = '.nc'
    else:
      logger.error('output format not valid')
    
    if bbox_obj is not None:
      logger.info("bbox_obj={0}".format(bbox_obj.coords))
      bbox = [bbox_obj.coords[0][0], bbox_obj.coords[0][1],bbox_obj.coords[1][0],bbox_obj.coords[1][1]]
      logger.info("bbox={0}".format(bbox))
    else:
      bbox=None

     #start = min( refSt, dateSt )
    #end = max( refEn, dateEn )
    # region = self.getInputValues(identifier='region')[0]
    # bbox = [float(b) for b in region.split(',')]

    try:            
      if dataset == 'NCEP': 
        if 'z' in var:
          variable='hgt'
          level=var.strip('z')
          #conform_units_to=None
        else:
          variable='slp'
          level=None
          #conform_units_to='hPa'
      elif '20CRV2' in var: 
        if 'z' in level:
          variable='hgt'
          level=var.strip('z')
          #conform_units_to=None
        else:
          variable='prmsl'
          level=None
          #conform_units_to='hPa'
      else:
        logger.error('Reanalyses dataset not known')          
      logger.info('environment set')
    except Exception as e: 
      msg = 'failed to set environment %s ' % e
      logger.error(msg)  
      raise Exception(msg)

    logger.debug("init took %s seconds.", time.time() - start_time)
    self.status.set('Read in the arguments', 5)
    #################
    # get input data
    #################

    start_time = time.time()  # measure get_input_data ...
    self.status.set('fetching input data', 7)
    try:
      input = reanalyses(start = dateSt.year, end = dateEn.year, variable=var, dataset=dataset)
      nc_subset = call(resource=input, variable=var, geom=bbox)
    except Exception as e :
      msg = 'failed to fetch or subset input files %s' % e
      logger.error(msg)
      raise Exception(msg)
    logger.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time)
    self.status.set('**** Input data fetched', 10)
    
    ########################
    # input data preperation 
    ########################
    self.status.set('Start preparing input data', 12)
    start_time = time.time()  # mesure data preperation ...
    try:
      self.status.set('Preparing simulation data', 15)
      simulation = call(resource=nc_subset, time_range=[dateSt , dateEn])
    except:
      msg = 'failed to prepare simulation period'
      logger.debug(msg)
      
    try:
      self.status.set('Preparing target data', 17)
      var_target = get_variable(resource)
      #var_simulation = get_variable(simulation)
      archive = call(resource=resource, variable=var_target, 
          time_range=[refSt , refEn],  geom=bbox, t_calendar='standard',# conform_units_to=conform_units_to,  spatial_wrapping='wrap',
          regrid_destination=simulation, regrid_options='bil')
    except Exception as e:
      msg = 'failed subset archive dataset %s ' % e
      logger.debug(msg)
      raise Exception(msg)
    
    try:     
      if var != var_target:
        rename_variable(archive, oldname=var_target, newname=var)
        logger.info('varname %s in netCDF renamed to %s' %(var_target, var))
    except Exception as e:
      msg = 'failed to rename variable in target files %s ' % e
      logger.debug(msg)
      raise Exception(msg)
    
    try:          
      if seacyc == True:
        seasoncyc_base , seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize)
      else: 
        seasoncyc_base , seasoncyc_sim = None
    except Exception as e:
      msg = 'failed to prepare seasonal cycle reference files %s ' % e
      logger.debug(msg)
      raise Exception(msg)
      
    ip, output = mkstemp(dir='.',suffix='.txt')
    output_file =  path.abspath(output)
    files=[path.abspath(archive), path.abspath(simulation), output_file]

    logger.debug("data preperation took %s seconds.", time.time() - start_time)

    ############################
    # generating the config file
    ############################
    
    self.status.set('writing config file', 15)
    start_time = time.time() # measure write config ...
    
    try:  
      config_file = analogs.get_configfile(
        files=files,
        seasoncyc_base = seasoncyc_base,
        seasoncyc_sim = seasoncyc_sim, 
        timewin=timewin, 
        varname=var, 
        seacyc=seacyc, 
        cycsmooth=91, 
        nanalog=nanalog, 
        seasonwin=seasonwin, 
        distfun=distance,
        outformat=outformat,
        calccor=True,
        silent=False, 
        period=[dt.strftime(refSt,'%Y-%m-%d'),dt.strftime(refEn,'%Y-%m-%d')], 
        bbox="%s,%s,%s,%s" % (bbox[0],bbox[2],bbox[1],bbox[3]))
    except Exception as e:
      msg = 'failed to generate config file %s ' % e
      logger.debug(msg)
      raise Exception(msg)

    logger.debug("write_config took %s seconds.", time.time() - start_time)
      
    #######################
    # CASTf90 call 
    #######################
    import subprocess
    import shlex

    start_time = time.time() # measure call castf90
    
    self.status.set('Start CASTf90 call', 20)
    try:
      #self.status.set('execution of CASTf90', 50)
      cmd = 'analogue.out %s' % path.relpath(config_file)
      #system(cmd)
      args = shlex.split(cmd)
      output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate()
      logger.info('analogue.out info:\n %s ' % output)
      logger.debug('analogue.out errors:\n %s ' % error)
      self.status.set('**** CASTf90 suceeded', 90)
    except Exception as e: 
      msg = 'CASTf90 failed %s ' % e
      logger.error(msg)  
      raise Exception(msg)

    logger.debug("castf90 took %s seconds.", time.time() - start_time)
    
    self.status.set('preparting output', 99)
    self.config.setValue( config_file )
    self.analogs.setValue( output_file )
    self.simulation_netcdf.setValue( simulation )
    self.target_netcdf.setValue( archive )
    
    self.status.set('execution ended', 100)

    logger.debug("total execution took %s seconds.", time.time() - process_start_time)
예제 #22
0
def set_metadata_segetalflora(resource):
  """
  :param resources: imput files 
  """
  # gather the set_metadata
  
  dic_segetalflora = {
    'keywords' : 'Segetalflora', 
    'tier': '2',
    'in_var' : 'tas',
    'description':'Number of European segetalflora species', 
    'method':'regression equation',
    'institution':'Julius Kuehn-Institut (JKI) Federal Research Centre for Cultivated Plants', 
    'institution_url':'www.jki.bund.de',
    'institute_id' : "JKI",
    'contact_mail_3':'*****@*****.**',
    'version' : '1.0',
     }
  
  dic_climatetype = {
    '1' : 'cold northern species group', 
    '2' : 'warm northern species group',
    '3' : 'moderate warm-toned species group',
    '4' : 'moderate warm-toned to mediterranean species group',
    '5' : 'mediterranean species group',
    '6' : 'climate-indifferent species',
    '7' : 'climate-undefinable species',
    'all' : 'species of all climate types'
      }
  
  try:
    set_basic_md(resource)
  except Exception as e: 
    logger.error(e)
  
  try:
    set_dynamic_md(resource)
  except Exception as e: 
    logger.error(e)
  
  #set the segetalflora specific metadata
  try:
    ds = Dataset(resource, mode='a')
    ds.setncatts(dic_segetalflora)
    ds.close()
  except Exception as e: 
    logger.error(e)
    # set the variable attributes: 
  from flyingpigeon.utils import get_variable
  
  try:
    ds = Dataset(resource, mode='a')
    var = get_variable(resource)
    if 'all' in var: 
      climat_type = 'all'
    else: 
      climat_type = var[-1]

    culture_type = var.strip('sf').strip(climat_type)  
    
    sf = ds.variables[var]
    sf.setncattr('units',1)
    sf.setncattr('standard_name', 'sf%s%s' % (culture_type, climat_type))  
    sf.setncattr('long_name', 'Segetal flora %s land use for %s' % (culture_type, dic_climatetype['%s' % climat_type]))
    ds.close()
  except Exception as e: 
    logger.error('failed to set sf attributes %s ' % e)
  # sort the attributes: 
  try:
    ds = Dataset(resource, mode='a')
    att = ds.ncattrs()
    att.sort()
    for a in att: 
      entry = ds.getncattr(a)
      ds.setncattr(a,entry)
    history = '%s , Segetalflora Impact Model V1.0' % (ds.history) 
    ds.setncattr('history',history)
    ds.close()
  except Exception as e: 
    logger.error('failed to sort attributes %s ' % e)
  
  return resource
    def execute(self):
        logger.info('Start process')

        init_process_logger('log.txt')
        self.output_log.setValue('log.txt')

        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp

        ################################
        # reading in the input arguments
        ################################
        try:
            resource = self.getInputValues(identifier='resource')
            url_Rdat = self.getInputValues(identifier='Rdat')[0]
            url_dat = self.getInputValues(identifier='dat')[0]
            url_ref_file = self.getInputValues(
                identifier='netCDF')  # can be None
            season = self.getInputValues(identifier='season')[0]
            period = self.getInputValues(identifier='period')[0]
            anualcycle = self.getInputValues(identifier='anualcycle')[0]
        except Exception as e:
            logger.debug('failed to read in the arguments %s ' % e)

        try:
            start = dt.strptime(period.split('-')[0], '%Y%m%d')
            end = dt.strptime(period.split('-')[1], '%Y%m%d')
            # kappa = int(self.getInputValues(identifier='kappa')[0])

            logger.info('period %s' % str(period))
            logger.info('season %s' % str(season))
            logger.info('read in the arguments')
            logger.info('url_ref_file: %s' % url_ref_file)
            logger.info('url_Rdat: %s' % url_Rdat)
            logger.info('url_dat: %s' % url_dat)
        except Exception as e:
            logger.debug('failed to convert arguments %s ' % e)

        ############################
        # fetching trainging data
        ############################

        from flyingpigeon.utils import download, get_time
        from os.path import abspath

        try:
            dat = abspath(download(url_dat))
            Rdat = abspath(download(url_Rdat))
            logger.info('training data fetched')
        except Exception as e:
            logger.error('failed to fetch training data %s' % e)

        ##########################################################
        # get the required bbox and time region from resource data
        ##########################################################
        # from flyingpigeon.weatherregimes import get_level

        from flyingpigeon.ocgis_module import call
        from flyingpigeon.utils import get_variable
        time_range = [start, end]

        variable = get_variable(resource)

        if len(url_ref_file) > 0:
            ref_file = download(url_ref_file[0])
            model_subset = call(
                resource=resource,
                variable=variable,
                time_range=
                time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                regrid_destination=ref_file,
                regrid_options='bil')
            logger.info('Dataset subset with regridding done: %s ' %
                        model_subset)
        else:
            model_subset = call(
                resource=resource,
                variable=variable,
                time_range=
                time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
            )
            logger.info('Dataset time period extracted: %s ' % model_subset)

        #######################
        # computing anomalies
        #######################

        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[0]
        reference = [
            dt.strptime(cycst, '%Y%m%d'),
            dt.strptime(cycen, '%Y%m%d')
        ]
        model_anomal = wr.get_anomalies(model_subset, reference=reference)

        #####################
        # extracting season
        #####################

        model_season = wr.get_season(model_anomal, season=season)

        #######################
        # call the R scripts
        #######################

        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
            rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'weatherregimes_projection.R'

            yr1 = start.year
            yr2 = end.year
            time = get_time(model_season, format='%Y%m%d')

            # ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
            ip, file_pca = mkstemp(dir=curdir, suffix='.txt')
            ip, file_class = mkstemp(dir=curdir, suffix='.Rdat')
            ip, output_frec = mkstemp(dir=curdir, suffix='.txt')

            args = [
                'Rscript',
                join(Rsrc, Rfile),
                '%s/' % curdir,
                '%s/' % Rsrc,
                '%s' % model_season,
                '%s' % variable,
                '%s' % str(time).strip("[]").replace("'", "").replace(" ", ""),
                # '%s' % output_graphics,
                '%s' % dat,
                '%s' % Rdat,
                '%s' % file_pca,
                '%s' % file_class,
                '%s' % output_frec,
                '%s' % season,
                '%s' % start.year,
                '%s' % end.year,
                '%s' % 'MODEL'
            ]

            logger.info('Rcall builded')
        except Exception as e:
            msg = 'failed to build the R command %s' % e
            logger.error(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            # , shell=True
            logger.info('R outlog info:\n %s ' % output)
            logger.debug('R outlog errors:\n %s ' % error)
            if len(output) > 0:
                self.status.set('**** weatherregime in R suceeded', 90)
            else:
                logger.error('NO! output returned from R call')
        except Exception as e:
            msg = 'weatherregime in R %s ' % e
            logger.error(msg)
            raise Exception(msg)

        #################
        # set the outputs
        #################

        # self.Routput_graphic.setValue( output_graphics )
        self.output_pca.setValue(file_pca)
        self.output_classification.setValue(file_class)
        self.output_netcdf.setValue(model_season)
        self.output_frequency.setValue(output_frec)
예제 #24
0
def calc_indice_percentile(resources=[], variable=None, 
    prefix=None, indices='TG90p', refperiod=None,
    groupings='yr', polygons=None, percentile=90, mosaic = False, 
    dir_output=None, dimension_map = None):
    """
    Calculates given indices for suitable files in the appropriate time grouping and polygon.

    :param resource: list of filenames in data reference syntax (DRS) convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: list of indices (default ='TG90p')
    :param prefix: filename prefix 
    :param refperiod: reference period tuple = (start,end)
    :param grouping: indices time aggregation (default='yr')
    :param dir_output: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into out_dir.
    """
    from os.path import join, dirname, exists
    from os import remove
    import uuid
    from numpy import ma 
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon.subset import clipping
    from flyingpigeon.utils import get_values, get_time
    
    if type(resources) != list: 
      resources = list([resources])
    if type(indices) != list: 
      indices = list([indices])
      
    if type(groupings) != list: 
      groupings = list([groupings])
      
    if type(refperiod) == list: 
      refperiod = refperiod[0]
      
    if refperiod != None:
      start = dt.strptime(refperiod.split('-')[0] , '%Y%m%d')
      end = dt.strptime(refperiod.split('-')[1] , '%Y%m%d')
      time_range = [start, end]
    else:  
      time_range = None
    
    if dir_output != None:
      if not exists(dir_output): 
        makedirs(dir_output)
    
    ########################################################################################################################
    # Compute a custom percentile basis using ICCLIM. ######################################################################
    ########################################################################################################################

    from ocgis.contrib import library_icclim  as lic 
    nc_indices = []
    nc_dic = sort_by_filename(resources)
    
    for grouping in groupings:
      calc_group = calc_grouping(grouping)
      for key in nc_dic.keys():
        resource = nc_dic[key]
        if variable == None: 
          variable = get_variable(resource)
        if polygons == None:
          nc_reference = call(resource=resource, 
            prefix=str(uuid.uuid4()), 
            time_range=time_range,
            output_format='nc', 
            dir_output=dir_output)
        else:
          nc_reference = clipping(resource=resource, 
            prefix=str(uuid.uuid4()),
            time_range=time_range, 
            output_format='nc', 
            polygons=polygons,
            dir_output=dir_output, 
            mosaic = mosaic)
          
        arr = get_values(resource=nc_reference)
        dt_arr = get_time(resource=nc_reference)
        arr = ma.masked_array(arr)
        dt_arr = ma.masked_array(dt_arr)
        percentile = percentile
        window_width = 5
        
        for indice in indices:
          name = indice.replace('_', str(percentile))
          var = indice.split('_')[0]

          operation = None
          if 'T' in var: 
            if percentile >= 50: 
              operation = 'Icclim%s90p' % var
              func = 'icclim_%s90p' % var # icclim_TG90p
            else: 
              operation = 'Icclim%s10p' % var
              func = 'icclim_%s10p' % var
              
          ################################
          # load the appropriate operation
          ################################

          ops = [op for op in dir(lic) if operation in op]
          if len(ops) == 0:
              raise Exception("operator does not exist %s", operation)
          
          exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0]
          calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}]
          
          if polygons == None:
            nc_indices.append(call(resource=resource, 
                                prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), 
                                calc=calc, 
                                calc_grouping=calc_group, 
                                output_format='nc',
                                dir_output=dir_output))
          else: 
            nc_indices.extend(clipping(resource=resource, 
                                prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), 
                                calc=calc, 
                                calc_grouping=calc_group, 
                                output_format='nc',
                                dir_output=dir_output,
                                polygons=polygons, 
                                mosaic = mosaic,
                                ))
    return nc_indices

#def calc_indice_unconventional(resource=[], variable=None, prefix=None,
  #indices=None, polygons=None,  groupings=None, 
  #dir_output=None, dimension_map = None):
    #"""
    #Calculates given indices for suitable files in the appropriate time grouping and polygon.

    #:param resource: list of filenames in data reference syntax (DRS) convention (netcdf)
    #:param variable: variable name to be selected in the in netcdf file (default=None)
    #:param indices: list of indices (default ='TGx')
    #:param polygons: list of polygons (default =None)
    #:param grouping: indices time aggregation (default='yr')
    #:param out_dir: output directory for result file (netcdf)
    #:param dimension_map: optional dimension map if different to standard (default=None)

    #:return: list of netcdf files with calculated indices. Files are saved into dir_output
    #"""
    
    #from os.path import join, dirname, exists
    #from os import remove
    #import uuid
    #from flyingpigeon import ocgis_module
    #from flyingpigeon.subset import get_ugid, get_geom

    #if type(resource) != list: 
      #resource = list([resource])
    #if type(indices) != list: 
      #indices = list([indices])
    #if type(polygons) != list and polygons != None:
      #polygons = list([polygons])
    #elif polygons == None:
      #polygons = [None]
    #else: 
      #logger.error('Polygons not found')
    #if type(groupings) != list:
      #groupings = list([groupings])
    
    #if dir_output != None:
      #if not exists(dir_output): 
        #makedirs(dir_output)
    
    #experiments = sort_by_filename(resource)
    #outputs = []

    #print('environment for calc_indice_unconventional set')
    #logger.info('environment for calc_indice_unconventional set')
    
    #for key in experiments:
      #if variable == None:
        #variable = get_variable(experiments[key][0])
      #try: 
        #ncs = experiments[key]
        #for indice in indices:
          #logger.info('indice: %s' % indice)
          #try: 
            #for grouping in groupings:
              #logger.info('grouping: %s' % grouping)
              #try:
                #calc_group = calc_grouping(grouping)
                #logger.info('calc_group: %s' % calc_group)
                #for polygon in polygons:  
                  #try:
                    #domain = key.split('_')[1].split('-')[0]
                    #if polygon == None:
                      #if prefix == None: 
                        #prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping )
                      #geom = None
                      #ugid = None
                    #else:
                      #if prefix == None: 
                        #prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ).replace(domain,polygon)
                      #geom = get_geom(polygon=polygon)
                      #ugid = get_ugid(polygons=polygon, geom=geom)
                    #if indice == 'TGx':
                      #calc=[{'func': 'max', 'name': 'TGx'}]
                      #tmp = ocgis_module.call(resource=ncs,# conform_units_to='celcius',
                                              #variable=variable, dimension_map=dimension_map, 
                                              #calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              #dir_output=dir_output, geom=geom, select_ugid=ugid)
                    #elif indice == 'TGn':
                      #calc=[{'func': 'min', 'name': 'TGn'}]
                      #tmp = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              #variable=variable, dimension_map=dimension_map, 
                                              #calc=calc, calc_grouping= calc_group, prefix=prefix,
                                               #dir_output=dir_output, geom=geom, select_ugid = ugid)
                    #elif indice == 'TGx5day':
                      #calc = [{'func': 'moving_window', 'name': 'TGx5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}]
                      #tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              #variable=variable, dimension_map=dimension_map, 
                                              #calc=calc, prefix=str(uuid.uuid4()),
                                              #geom=geom, select_ugid = ugid)
                      #calc=[{'func': 'max', 'name': 'TGx5day'}]
                      #logger.info('moving window calculated : %s' % tmp2)
                      #tmp = ocgis_module.call(resource=tmp2,
                                              #variable=indice, dimension_map=dimension_map, 
                                              #calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              #dir_output=dir_output)
                      #remove(tmp2)
                    #elif indice == 'TGn5day':
                      #calc = [{'func': 'moving_window', 'name': 'TGn5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}]
                      #tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              #variable=variable, dimension_map=dimension_map, 
                                              #calc=calc, prefix=str(uuid.uuid4()),
                                              #geom=geom, select_ugid = ugid)
                      #calc=[{'func': 'min', 'name': 'TGn5day'}]
                      
                      #logger.info('moving window calculated : %s' % tmp2)
                      
                      #tmp = ocgis_module.call(resource=tmp2,
                                              #variable=indice, dimension_map=dimension_map, 
                                              #calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              #dir_output=dir_output)
                      #remove(tmp2)
                    #else: 
                      #logger.error('Indice %s is not a known inidce' % (indice))
                    #outputs.append(tmp)
                    #logger.info('indice file calcualted %s ' % (tmp))
                  #except Exception as e:
                    #logger.debug('could not calc indice %s for key %s, polygon %s and calc_grouping %s : %s' %  (indice, key, polygon, grouping, e ))
              #except Exception as e:
                #logger.debug('could not calc indice %s for key %s and calc_grouping %s : %s' % ( indice, key, polygon, e ))
          #except Exception as e:
            #logger.debug('could not calc indice %s for key %s: %s'%  (indice, key, e ))
      #except Exception as e:
        #logger.debug('could not calc key %s: %s' % (key, e))
    #return outputs
예제 #25
0
def get_anomalies(nc_file, frac=0.2, reference=None, method='ocgis', sseas='serial', variable=None):
    """
    Anomalisation of data subsets for weather classification by subtracting a smoothed annual cycle

    :param nc_file: input netCDF file
    :param frac: Number between 0-1 for strength of smoothing
               (0 = close to the original data, 1 = flat line)
               default = 0.2
    :param reference: Period to calculate annual cycle

    :returns str: path to output netCDF file
    """
    from netCDF4 import Dataset

    if variable is None:
        variable = utils.get_variable(nc_file)
        # if more when 2 variables:
        if (variable.count(variable)==0):
            _ds=Dataset(nc_file)
            # Works only if we have one 3D variables
            for j in variable:
                if len(_ds.variables[j].dimensions)==3: _var=j
            variable=_var
            _ds.close()
    LOGGER.debug('3D Variable selected: %s'%(variable))

    try:
        if (method == 'cdo'):
            from cdo import Cdo
            from os import system

            ip2, nc_anual_cycle = mkstemp(dir='.', suffix='.nc')

            cdo = Cdo()
            #ip, nc_anual_cycle_tmp = mkstemp(dir='.', suffix='.nc')
            # TODO: if reference is none, use utils.get_time for nc_file to set the ref range
            #       But will need to fix 360_day issue (use get_time_nc from analogs)

            # com = 'seldate'
            # comcdo = 'cdo %s,%s-%s-%s,%s-%s-%s %s %s' % (com, reference[0].year, reference[0].month, reference[0].day,
            #                                              reference[1].year, reference[1].month, reference[1].day,
            #                                              nc_file, nc_anual_cycle_tmp)
            # LOGGER.debug('CDO: %s' % (comcdo))
            # system(comcdo)

            # Sub cdo with this trick... Cdo keeps the precision and anomalies are integers...
            calc = '%s=%s'%(variable, variable)
            nc_anual_cycle_tmp = call(nc_file, time_range=reference, variable=variable, calc=calc)
            nc_anual_cycle = cdo.ydaymean(input=nc_anual_cycle_tmp, output=nc_anual_cycle)
        else:
            calc = [{'func': 'mean', 'name': variable}]
            calc_grouping = calc_grouping = ['day', 'month']
            nc_anual_cycle = call(nc_file,
                                  calc=calc,
                                  calc_grouping=calc_grouping,
                                  variable=variable,
                                  time_range=reference)
        LOGGER.info('annual cycle calculated: %s' % (nc_anual_cycle))

    except Exception as e:
        msg = 'failed to calcualte annual cycle %s' % e
        LOGGER.error(msg)
        raise Exception(msg)

    try:
        # spline for smoothing
        #import statsmodels.api as sm
        #from numpy import tile, empty, linspace
        from cdo import Cdo
        cdo = Cdo()
        # variable = utils.get_variable(nc_file)
        ds = Dataset(nc_anual_cycle, mode='a')
        vals = ds.variables[variable]
        vals_sm = empty(vals.shape)
        ts = vals.shape[0]
        x = linspace(1, ts*3, num=ts*3, endpoint=True)

        if ('serial' not in sseas):
            # Multiprocessing =======================

            from multiprocessing import Pool
            pool = Pool()

            valex = [0.]
            valex = valex*vals.shape[1]*vals.shape[2]

            # TODO redo with reshape
            ind = 0
            for lat in range(vals.shape[1]):
                for lon in range(vals.shape[2]):
                    valex[ind] = vals[:, lat, lon]
                    ind += 1

            LOGGER.debug('Start smoothing with multiprocessing')
            # TODO fraction option frac=... is not used here
            tmp_sm = pool.map(_smooth, valex)
            pool.close()
            pool.join()

            # TODO redo with reshape
            ind=0
            for lat in range(vals.shape[1]):
                for lon in range(vals.shape[2]):
                    vals_sm[:, lat, lon] = tmp_sm[ind]
                    ind+=1
        else:
            # Serial ==================================
            vals_sm = empty(vals.shape)
            for lat in range(vals.shape[1]):
                for lon in range(vals.shape[2]):
                    try:
                        y = tile(vals[:, lat, lon], 3)
                        # ys = smooth(y, window_size=91, order=2, deriv=0, rate=1)[ts:ts*2]
                        ys = sm.nonparametric.lowess(y, x, frac=frac)[ts:ts*2, 1]
                        vals_sm[:, lat, lon] = ys
                    except:
                        msg = 'failed for lat %s lon %s' % (lat, lon)
                        LOGGER.exception(msg)
                        raise Exception(msg)
                LOGGER.debug('done for %s - %s ' % (lat, lon))

        vals[:, :, :] = vals_sm[:, :, :]
        ds.close()
        LOGGER.info('smothing of annual cycle done')
    except:
        msg = 'failed smothing of annual cycle'
        LOGGER.exception(msg)
        raise Exception(msg)
    try:
        ip, nc_anomal = mkstemp(dir='.', suffix='.nc')
        try:
            nc_anomal = cdo.sub(input=[nc_file, nc_anual_cycle], output=nc_anomal)
            LOGGER.info('cdo.sub; anomalisation done: %s ' % nc_anomal)
        except:
            # bug cdo: https://code.mpimet.mpg.de/boards/1/topics/3909
            ip3, nc_in1 = mkstemp(dir='.', suffix='.nc')
            ip4, nc_in2 = mkstemp(dir='.', suffix='.nc')
            ip5, nc_out = mkstemp(dir='.', suffix='.nc')
            nc_in1 = cdo.selvar(variable, input=nc_file, output=nc_in1)
            nc_in2 = cdo.selvar(variable, input=nc_anual_cycle, output=nc_in2)
            nc_out = cdo.sub(input=[nc_in1, nc_in2], output=nc_out)
            nc_anomal = nc_out
    except:
        msg = 'failed substraction of annual cycle'
        LOGGER.exception(msg)
        raise Exception(msg)
    return nc_anomal
예제 #26
0
def calc_indice_percentile(resource=[], variable=None,
                           prefix=None, indices='TG90p', refperiod=None,
                           grouping='yr', polygons=None, percentile=90, mosaic=False,
                           dir_output=None, dimension_map=None):
    """
    Calculates given indices for suitable dataset in the appropriate time grouping and polygon.

    :param resource: list of filenames in data reference syntax (DRS) convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: string of indice (default ='TG90p')
    :param prefix: filename prefix
    :param refperiod: reference period  = [datetime,datetime]
    :param grouping: indices time aggregation (default='yr')
    :param dir_output: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: reference_file, indice_file
    """
    from os.path import join, dirname, exists
    from os import remove
    import uuid
    from numpy import ma
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon.subset import clipping
    from flyingpigeon.utils import get_values, get_time

    # TODO: see ticket https://github.com/bird-house/flyingpigeon/issues/200
    raise NotImplementedError('Sorry! Function is under construction.')

    if type(resource) != list:
        resource = list([resource])

    # if type(indices) != list:
    #     indices = list([indices])
    #
    # if type(groupings) != list:
    #     groupings = list([groupings])
    #
    # if type(refperiod) == list:
    #     refperiod = refperiod[0]
    #
    # if refperiod is not None:
    #     start = dt.strptime(refperiod.split('-')[0], '%Y%m%d')
    #     end = dt.strptime(refperiod.split('-')[1], '%Y%m%d')
    #     time_range = [start, end]
    # else:
    #     time_range = None

    ################################################
    # Compute a custom percentile basis using ICCLIM
    ################################################
    from ocgis.contrib import library_icclim as lic

    calc_group = calc_grouping(grouping)

    if variable is None:
        variable = get_variable(resource)

    if polygons is None:
        nc_reference = call(resource=resource,
                            prefix=str(uuid.uuid4()),
                            time_range=refperiod,
                            output_format='nc')
    else:
        nc_reference = clipping(resource=resource,
                                prefix=str(uuid.uuid4()),
                                time_range=refperiod,
                                output_format='nc',
                                polygons=polygons,
                                mosaic=mosaic)

    # arr = get_values(resource=nc_reference)
    # dt_arr = get_time(resource=nc_reference)
    # arr = ma.masked_array(arr)
    # dt_arr = ma.masked_array(dt_arr)
    # percentile = percentile
    # window_width = 5

    #     for indice in indices:
    #         name = indice.replace('_', str(percentile))
    #         var = indice.split('_')[0]
    #
    #         operation = None
    #         if 'T' in var:
    #             if percentile >= 50:
    #                 operation = 'Icclim%s90p' % var
    #                 func = 'icclim_%s90p' % var  # icclim_TG90p
    #             else:
    #                 operation = 'Icclim%s10p' % var
    #                 func = 'icclim_%s10p' % var
    #
    #             ################################
    #             # load the appropriate operation
    #             ################################
    #
    #             ops = [op for op in dir(lic) if operation in op]
    #             if len(ops) == 0:
    #                 raise Exception("operator does not exist %s", operation)
    #
    #             exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0]
    #             calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}]
    #
    #             if polygons is None:
    #                 nc_indices.extend(call(resource=resource,
    #                                        prefix=key.replace(variable, name).replace('_day_', '_%s_' % grouping),
    #                                        calc=calc,
    #                                        calc_grouping=calc_group,
    #                                        output_format='nc'))
    #             else:
    #                 nc_indices.extend(clipping(resource=resource,
    #                                            prefix=key.replace(variable, name).replace('_day_', '_%s_' % grouping),
    #                                            calc=calc,
    #                                            calc_grouping=calc_group,
    #                                            output_format='nc',
    #                                            polygons=polygons,
    #                                            mosaic=mosaic,
    #                                            ))
    # if len(nc_indices) is 0:
    #     LOGGER.debug('No indices are calculated')
    #     return None
    return nc_indices
예제 #27
0
def uncertainty(resouces , variable=None, title=None, dir_out=None): 
  """
  retunes an html file containing the appropriate uncertainty plot. 
  
  :param resouces: list of files containing the same variable 
  :param variable: variable to be visualised, if None (default) variable will be detected
  :param title: sting to be used as title
  """
  logger.debug('Start visualisation uncertainty plot')

  from bokeh.plotting import figure, output_file, save

  import cdo 
  cdo = cdo.Cdo()
  
  # === prepare invironment
  if type(resouces) == str: 
    resouces = list([resouces])    
  if variable == None:
    variable = utils.get_variable(resouces[0])
  if title == None:
    title = "Field mean of %s " % variable
  if dir_out == None: 
    dir_out = '.'

  # === prepare bokeh
  try: 
    o1 , output_html = mkstemp(dir=dir_out, suffix='.html')
  
    fig = figure(x_axis_type = "datetime", tools="pan,wheel_zoom,box_zoom,reset,previewsave")
    
    output_file(output_html, title=variable, autosave=True,)
  
    # === get the datetime
    dates = set()
    for nc in resouces:

        logger.debug('looping files : %s ' % (nc))
        # get timestapms
        rawDate = cdo.showdate(input=[nc]) # ds.variables['time'][:]
        strDate = rawDate[0].split('  ')
        logger.debug('len strDate : %s ' % (len(strDate)))
        dates =  dates.union(strDate) #dates.union( utils.get_time(nc))

    #self.show_status('dates : %s ' % len(dates), 62)
    ldates = list(dates)
    ldates.sort()
    ddates = dict( (ldates[i], i) for i in range(0,len(ldates)))

    # initialise matirx

    ma = np.empty([len(ddates), len(resouces)])*np.nan
    #self.show_status('ddates : %s ' % ddates, 62)

    # fill matrix
    for y in range(0,len(resouces)) : 
        rawDate = cdo.showdate(input=[resouces[y]]) # ds.variables['time'][:]
        strDate = rawDate[0].split('  ')

        ds=Dataset(resouces[y])
        data = np.squeeze(ds.variables[variable][:])
        if len(data.shape) == 3: 
          meanData = np.mean(data,axis=1)
          ts = np.mean(meanData,axis=1)
        else: 
          ts = data
        logger.debug('ts array  : %s ' % (len(ts)) )
        
        for t in range(0, len(strDate)) : 
            x = ddates.get(strDate[t],0)
            ma[x,y] = ts[t]
        
    # get datetimes
    dt = [datetime.strptime(elem, '%Y-%m-%d') for elem in ldates]
    mdat = np.ma.masked_array(ma ,np.isnan(ma))

    #self.show_status('matrix masked %s ' % mdat , 80)
    #logger.debug('matrix %s ', mdat.shape) 

    ma_mean = np.mean(mdat,axis=1)
    logger.debug('mean  %s '%  len(ma_mean))
    ma_min = np.min(mdat,axis=1)
    ma_max = np.max(mdat,axis=1)
    #ma_sub = np.subtract(ma_max, ma_min)
    #ma_per75 = np.percentile(mdat,75, axis=0)
    #ma_per25 = np.percentile(mdat,25, axis=0)
    logger.debug('ma Vaules %s' % len(mdat.data))

    #line(dt, ma_min , color='grey' ,line_width=1)
    #line(dt, ma_max , color='grey' , line_width=2 )
    fig.line(dt, ma_mean , color='red', line_width=1)

    x = []
    y = []
    x = np.append(dt,dt[::-1])
    y = np.append(ma_min, ma_max[::-1])

    fig.patch(x,y, color='grey', alpha=0.8, line_color=None)
    
    fig.title = "Mean and Uncertainty of  %s " % variable
    fig.grid
    
    save(fig)
    
    logger.debug('timesseries uncertainty plot done for %s'% variable) 
  except Exception as e:
    logger.exception('bokeh uncertainty plot failed for %s' % variable)
    raise  
  return output_html  
예제 #28
0
def spaghetti(resouces, variable=None, title=None, dir_out=None):
  """
  creates a png file containing the appropriate spaghetti plot as a field mean of the values. 
  
  :param resouces: list of files containing the same variable 
  :param variable: variable to be visualised. If None (default), variable will be detected
  :param title: string to be used as title
  :param dir_out: directory for output files
  
  :retruns str: path to png file
  """
  
  try:
    fig = plt.figure(figsize=(20,10), dpi=600, facecolor='w', edgecolor='k')
    logger.debug('Start visualisation spaghetti plot')
    
    # === prepare invironment
    if type(resouces) != list: 
      resouces = [resouces]    
    if variable == None:
      variable = utils.get_variable(resouces[0])
    if title == None:
      title = "Field mean of %s " % variable
    if dir_out == None: 
      dir_out = os.curdir
    logger.info('plot values preparation done')  
  except Exception as e:
    msg = "plot values preparation failed: %s" % (e)
    logger.exception(msg)
    raise Exception(msg)

  try: 
    o1 , output_png = mkstemp(dir=dir_out, suffix='.png')
    
    for c , nc in enumerate(resouces):
      # get timestapms
      try: 
        d = utils.get_time(nc) # [datetime.strptime(elem, '%Y-%m-%d') for elem in strDate[0]]
        
        dt = [datetime.strptime(str(i), '%Y-%m-%d %H:%M:%S') for i in d ]
        ds=Dataset(nc)
        data = np.squeeze(ds.variables[variable][:])
        if len(data.shape) == 3: 
          meanData = np.mean(data,axis=1)
          ts = np.mean(meanData,axis=1)
        else: 
          ts = data[:]
        plt.plot( dt,ts )
        #fig.line( dt,ts )
      except Exception as e:
        msg = "lineplot failed for %s" % (nc)
        logger.exception(msg)
        raise Exception(msg)
      
    plt.title(title, fontsize=20)
    plt.grid()
    fig.savefig(output_png)
    plt.close()
    logger.info('timeseries spaghetti plot done for %s with %s lines.'% (variable, c)) 
  except Exception as e:
    msg = 'matplotlib spaghetti plot failed: %s' % e
    logger.exception(msg)
    raise Exception(msg) 
  return output_png 
예제 #29
0
def spaghetti(resouces, variable=None, title=None, dir_out=None):
  """
  retunes a png file containing the appropriate spaghetti plot. 
  
  :param resouces: list of files containing the same variable 
  :param variable: variable to be visualised, if None (default) variable will be detected
  :param title: sting to be used as title
  :param dir_out: directory for output files
  """

  fig = plt.figure(figsize=(20,10), dpi=600, facecolor='w', edgecolor='k')

  logger.debug('Start visualisation spagetti plot')
  
  # === prepare invironment
  if type(resouces) == str: 
    resouces = list([resouces])    
  if variable == None:
    variable = utils.get_variable(resouces[0])
  if title == None:
    title = "Field mean of %s " % variable
  if dir_out == None: 
    dir_out = os.curdir

  try: 
    o1 , output_png = mkstemp(dir=dir_out, suffix='.png')
    
    for c , nc in enumerate(resouces):
      # get timestapms
      try: 
        d =  utils.get_time(nc) # [datetime.strptime(elem, '%Y-%m-%d') for elem in strDate[0]]
        
        dt = [datetime.strptime(str(i), '%Y-%m-%d %H:%M:%S') for i in d ]
        ds=Dataset(nc)
        data = np.squeeze(ds.variables[variable][:])
        if len(data.shape) == 3: 
          meanData = np.mean(data,axis=1)
          ts = np.mean(meanData,axis=1)
        else: 
          ts = data
        plt.plot( dt,ts )
        #fig.line( dt,ts )
      except Exception as e:
        logger.debug('lineplot failed for %s: %s\n' % (nc, e))

      # plot into current figure
      # , legend= nc 
    
    #fig.legend()[0].orientation = "bottom_left"
    # fig.legend().orientation = "bottom_left"
    plt.title(title, fontsize=20)
    plt.grid()# .grid_line_alpha=0.3
    #lt.rcParams.update({'font.size': 22})
    #window_size = 30
    #window = np.ones(window_size)/float(window_size)
    fig.savefig(output_png)
    #bplt.hold('off')
    
    plt.close()
    
    logger.debug('timesseries spagetti plot done for %s with %s lines.'% (variable, c)) 
  except Exception as e:
    msg = 'matplotlib spagetti plot failed for %s' % variable
    logger.debug(msg)
    #raise Exception(msg) 
  return output_png 
예제 #30
0
def get_gam(ncs_indices, coordinate):
  
  from netCDF4 import Dataset
  from os.path import basename
  from shapely.geometry import Point
  from numpy import squeeze, ravel, isnan, nan, array, reshape
  
  from flyingpigeon.utils import get_variable, get_values, unrotate_pole
  from flyingpigeon.ocgis_module import call 

  try:
    from rpy2.robjects.packages import importr
    import rpy2.robjects as ro
    import rpy2.robjects.numpy2ri
    
    rpy2.robjects.numpy2ri.activate()    
    base = importr("base")
    stats = importr("stats")
    mgcv = importr("mgcv")
    logger.info('rpy2 modules imported')
  except Exception as e: 
    msg = 'failed to import rpy2 modules %s' % e
    logger.debug(msg)
    raise Exception(msg)

  for i, ncs in enumerate(ncs_indices):
    # ocgis need unrotated coordinates to extract points
    # unrotate_pole writes lats lons into the file. 
    # ACHTUNG: will fail if the data is stored on a file system with no write permissions 
    try: 
      lats, lons = unrotate_pole(ncs, write_to_file=True)
      point = Point(float(coordinate[0]), float(coordinate[1]))
      # get the values
      variable = get_variable(ncs)
      agg = basename(ncs).split('_')[-2]
      indice = '%s_%s' % (variable, agg)
      timeseries = call(resource=ncs, geom=point, select_nearest=True)
      ts = Dataset(timeseries)
      vals = squeeze(ts.variables[variable][:])
      from numpy import min, max, mean, append, zeros, ones
      dif = max(vals) - min(vals)
      a = append(vals - dif ,vals)
      vals = append(a, vals+dif)
      
      if i == 0 :
        from numpy import zeros, ones
        a = append (zeros(len(vals)) , ones(len(vals)) )
        PA = append(a , zeros(len(vals)))
        data = {'PA': ro.FloatVector(PA)}
        data[str(indice)] = ro.FloatVector(vals)
        form = 'PA ~ '
        form = form + 's(%s, k=3)' % indice 
      else: 
        form = form + ' + s(%s, k=3)' % indice
        data[str(indice)] = ro.FloatVector(vals)

    except Exception as e: 
      msg = 'Failed to prepare data %s' % e
      logger.debug(msg)

  try: 
      
    logger.info(data)  
    dataf = ro.DataFrame(data)
    eq = ro.Formula(str(form))
    gam_model = mgcv.gam(base.eval(eq), data=dataf, family=stats.binomial(), scale=-1, na_action=stats.na_exclude) # 
    logger.info('GAM model trained')
  except Exception as e: 
    msg = 'Failed to generate GAM model %s' % e
    logger.debug(msg)
  
  # ### ###########################
  # # plot response curves
  # ### ###########################
  try: 
    from flyingpigeon.visualisation import concat_images
    from tempfile import mkstemp
    grdevices = importr('grDevices')
    graphicDev = importr('Cairo')
    infos = []    
    for i in range(1,len(ncs_indices)+1):
      
      ip, info =  mkstemp(dir='.',suffix='.png')
      #grdevices.png(filename=info)
      #graphicDev.CairoPDF(info, width = 7, height = 7, pointsize = 12)
      graphicDev.CairoPNG(info, width = 640 , height = 480, pointsize = 12) # 640, 480) #,  pointsize = 12  width = 30, height = 30,
      print 'file opened!'
      
      infos.append(info)
      #grdevices.png(filename=info)
            
      ylim = ro.IntVector([-6,6])
      trans = ro.r('function(x){exp(x)/(1+exp(x))}')
      mgcv.plot_gam(gam_model, trans=trans, shade='T',
                    col='black',select=i,ylab='Predicted Probability',rug=False ,
                    cex_lab = 1.4, cex_axis = 1.4, ) #
      print 'gam plotted ;-)'
      #ylim=ylim,  ,
      grdevices.dev_off()
      #graphicDev.dev_off()
      #graphicDev.Cairo_onSave( dev_cur(), onSave=True )
      
    print(' %s plots generated ' % len(infos))
    infos_concat = concat_images(infos, orientation='h')
  except Exception as e: 
    msg = 'Failed to plot statistical graphic %s' % e
    logger.debug(msg)
    raise Exception(msg)
    
  return  gam_model, infos_concat 
    def _handler(self, request, response):

        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        ncs = archiveextract(
            resource=rename_complexinputs(request.inputs['resource']))

        var = get_variable(ncs[0])
        LOGGER.info('variable to be plotted: {}'.format(var))

        # mosaic = self.mosaic.getValue()
        if 'region' in request.inputs:
            regions = [inp.data for inp in request.inputs['region']]
            try:
                png_region = vs.plot_polygons(regions)
            except Exception as ex:
                msg = 'failed to plot the polygon to world map: {}'.format(
                    str(ex))
                LOGGER.exception(msg)
                raise Exception(msg)
                o1, png_region = mkstemp(dir='.', suffix='.png')

            # clip the demanded polygons
            subsets = clipping(
                resource=ncs,
                variable=var,
                polygons=regions,
                mosaic=True,
                spatial_wrapping='wrap',
            )
        else:
            subsets = ncs
            png_region = vs.plot_extend(ncs[0])

        response.update_status('Arguments set for subset process', 0)

        try:
            tar_subsets = archive(subsets)
        except Exception as ex:
            msg = 'failed to archive subsets: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)
            _, tar_subsets = mkstemp(dir='.', suffix='.tar')

        try:
            png_uncertainty = vs.uncertainty(subsets, variable=var)
        except Exception as ex:
            msg = 'failed to generate the uncertainty plot: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)
            _, png_uncertainty = mkstemp(dir='.', suffix='.png')

        try:
            png_spaghetti = vs.spaghetti(
                subsets,
                variable=var,
            )

        except Exception as ex:
            msg = 'failed to generate the spaghetti plot: {}'.format(str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)
            _, png_spaghetti = mkstemp(dir='.', suffix='.png')

        try:
            from flyingpigeon import robustness as ro
            signal, low_agreement_mask, high_agreement_mask, text_src = ro.signal_noise_ratio(
                resource=subsets,
                # start=None, end=None,
                # timeslice=None,
                # variable=var
            )
            # if title is None:
            title = 'signal robustness of %s ' % (
                var)  # , end1, end2, start1, start2
            png_robustness = vs.map_robustness(
                signal,
                high_agreement_mask,
                low_agreement_mask,
                # cmap=cmap,
                #    title=title
            )
            LOGGER.info('robustness graphic generated')
        except Exception as ex:
            msg = 'failed to generate the robustness plot: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)
            _, png_robustness = mkstemp(dir='.', suffix='.png')

        factsheet = vs.factsheetbrewer(png_region=png_region,
                                       png_uncertainty=png_uncertainty,
                                       png_spaghetti=png_spaghetti,
                                       png_robustness=png_robustness)

        response.outputs['output_nc'].file = tar_subsets
        response.outputs['output_factsheet'].file = factsheet
        response.update_status("done", 100)
        return response
예제 #32
0
def calc_indice_simple(resource=[], variable=None, prefix=None,indices=None,
    polygons=None, mosaik = False, groupings='yr', dir_output=None, dimension_map = None, memory_limit=None):
    """
    Calculates given simple indices for suitable files in the appopriate time grouping and polygon.

    :param resource: list of filenames in drs convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: list of indices (default ='SU')
    :param polygons: list of polgons (default ='FRA')
    :param grouping: indices time aggregation (default='yr')
    :param out_dir: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into out_dir
    """
    from os.path import join, dirname, exists
    from flyingpigeon import ocgis_module
    from flyingpigeon.subset import clipping
    import uuid

    #DIR_SHP = config.shapefiles_dir()
    #env.DIR_SHPCABINET = DIR_SHP
    #env.OVERWRITE = True

    if type(resource) != list: 
      resource = list([resource])
    if type(indices) != list: 
      indices = list([indices])
    if type(polygons) != list and polygons != None:
      polygons = list([polygons])
    if type(groupings) != list:
      groupings = list([groupings])
    
    if dir_output != None:
      if not exists(dir_output): 
        makedirs(dir_output)
    
    #from flyingpigeon.subset import select_ugid
    #    tile_dim = 25
    output = None


    experiments = sort_by_filename(resource)
    outputs = []
    
    for key in experiments:
      if variable == None: 
        variable = get_variable(experiments[key][0])
        #variable = key.split('_')[0]
      try: 
        
        if variable == 'pr': 
          calc = 'pr=pr*86400'
          ncs = ocgis_module.call(resource=experiments[key],
                     variable=variable,
                     dimension_map=dimension_map, 
                     calc=calc,
                     memory_limit=memory_limit,
                     #alc_grouping= calc_group, 
                     prefix=str(uuid.uuid4()), 
                     dir_output=dir_output,
                     output_format='nc')

        else:
          
          ncs = experiments[key]         
        for indice in indices:
          logger.info('indice: %s' % indice)
          try: 
            calc = [{'func' : 'icclim_' + indice, 'name' : indice}]
            logger.info('calc: %s' % calc)
            for grouping in groupings:
              logger.info('grouping: %s' % grouping)
              try:
                calc_group = calc_grouping(grouping)
                logger.info('calc_group: %s' % calc_group)
                if polygons == None:
                  try:
                    if prefix == None:   
                      prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping )
                    tmp = ocgis_module.call(resource=ncs,
                     variable=variable,
                     dimension_map=dimension_map, 
                     calc=calc,
                     calc_grouping= calc_group, 
                     prefix=prefix, 
                     dir_output=dir_output,
                     output_format='nc')
                    outputs.extend( [tmp] )
                  except Exception as e:
                    msg = 'could not calc indice %s for domain in %s' %( indice, key)
                    logger.exception( msg )
                    raise Exception(msg)   
                else:
                  try:
                    if prefix == None:   
                      prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping )
                    tmp = clipping(resource=ncs,
                     variable=variable,
                     dimension_map=dimension_map, 
                     calc=calc,
                     calc_grouping= calc_group, 
                     prefix=prefix, 
                     polygons=polygons,
                     mosaik=mosaik,
                     dir_output=dir_output,
                     output_format='nc')
                    outputs.extend( [tmp] )
                  except Exception as e:
                    msg = 'could not calc indice %s for domain in %s' %( indice, key)
                    logger.exception( msg )
                    raise Exception(msg)
                logger.info('indice file calculated')      
              except Exception as e:
                msg = 'could not calc indice %s for key %s and grouping %s' %  (indice, key, grouping)
                logger.exception(msg)
                raise Exception(msg)  
          except Exception as e:
            msg = 'could not calc indice %s for key %s' % ( indice, key)
            logger.exception(msg)
            raise Exception(msg)        
      except Exception as e:
        msg = 'could not calc key %s' % key
        logger.exception(msg)
        raise Exception(msg)
    return outputs
예제 #33
0
def seacyc(archive, simulation, method='base'):
    """
    Subtracts the seasonal cycle.

    :param archive: netCDF file containing the reference period
    :param simulation: netCDF file containing the period to be analysed
    :param method: method to generate the seasonal cycle files
                   base = seasonal cycle generated from reference period
                   sim = seasonal cycle generated from period to be analysed
                   own = seasonal cycle generated for both time windows

    :return [str,str]: two netCDF filenames for analysis and reference period (located in working directory)
    """
    try:
        logger.debug('seacyc started with method: %s' % method)

        from shutil import copy
        from flyingpigeon.ocgis_module import call
        from flyingpigeon.utils import get_variable
        from cdo import Cdo
        cdo = Cdo()

        if method == 'base':
            seasoncyc_base = cdo.ydaymean(
                input=archive, output='seasoncyc_base.nc')
            variable = get_variable(archive)
            # seasoncyc_base = call(resource=archive,
            # variable=variable,
            # prefix='seasoncyc_base',
            #calc=[{'func': 'mean', 'name': variable}],
            # calc_grouping=['day','month'] )

            logger.debug('seasoncyc_base calculated : %s' % seasoncyc_base)
            cdo.ydaymean(input=archive, output='seasoncyc_base.nc')
            seasoncyc_sim = 'seasoncyc_sim.nc'
            copy(seasoncyc_base, seasoncyc_sim)
        elif method == 'sim':
            # seasoncyc_sim  = call(resource=archive,
              # variable=variable,
              # prefix='seasoncyc_sim',
              #calc=[{'func': 'mean', 'name': variable}],
              # calc_grouping=['day','month'] )
            cdo.ydaymean(input=simulation, output='seasoncyc_sim.nc')
            seasoncyc_base = 'seasoncyc_base.nc'
            copy(seasoncyc_sim, seasoncyc_base)
        elif method == 'own':
            # seasoncyc_base = call(resource=archive,
              # variable=variable,
              # prefix='seasoncyc_base',
              #calc=[{'func': 'mean', 'name': variable}],
              # calc_grouping=['day','month'] )
            seasoncyc_base = cdo.ydaymean(
                input=archive, output='seasoncyc_base.nc')
            # seasoncyc_sim  = call(resource=archive,
            # variable=variable,
            # prefix='seasoncyc_sim',
            #calc=[{'func': 'mean', 'name': variable}],
            # calc_grouping=['day','month'] )
            seasoncyc_sim = cdo.ydaymean(
                input=simulation, output='seasoncyc_sim.nc')
        else:
            raise Exception('normalisation method not found')

    except Exception as e:
        msg = 'seacyc function failed : %s ' % e
        logger.debug(msg)
        raise Exception(msg)

    return seasoncyc_base, seasoncyc_sim
예제 #34
0
def seacyc(archive, simulation, method='base'):
    """
    Subtracts the seasonal cycle.

    :param archive: netCDF file containing the reference period
    :param simulation: netCDF file containing the period to be analysed
    :param method: method to generate the seasonal cycle files
                   base = seasonal cycle generated from reference period
                   sim = seasonal cycle generated from period to be analysed
                   own = seasonal cycle generated for both time windows

    :return [str,str]: two netCDF filenames for analysis and reference period (located in working directory)
    """
    try:
        logger.debug('seacyc started with method: %s' % method)

        from shutil import copy
        from flyingpigeon.ocgis_module import call
        from flyingpigeon.utils import get_variable
        from cdo import Cdo
        cdo = Cdo()

        if method == 'base':
            seasoncyc_base = cdo.ydaymean(
                input=archive, output='seasoncyc_base.nc')
            variable = get_variable(archive)
            # seasoncyc_base = call(resource=archive,
            # variable=variable,
            # prefix='seasoncyc_base',
            #calc=[{'func': 'mean', 'name': variable}],
            # calc_grouping=['day','month'] )

            logger.debug('seasoncyc_base calculated : %s' % seasoncyc_base)
            cdo.ydaymean(input=archive, output='seasoncyc_base.nc')
            seasoncyc_sim = 'seasoncyc_sim.nc'
            copy(seasoncyc_base, seasoncyc_sim)
        elif method == 'sim':
            # seasoncyc_sim  = call(resource=archive,
              # variable=variable,
              # prefix='seasoncyc_sim',
              #calc=[{'func': 'mean', 'name': variable}],
              # calc_grouping=['day','month'] )
            cdo.ydaymean(input=simulation, output='seasoncyc_sim.nc')
            seasoncyc_base = 'seasoncyc_base.nc'
            copy(seasoncyc_sim, seasoncyc_base)
        elif method == 'own':
            # seasoncyc_base = call(resource=archive,
              # variable=variable,
              # prefix='seasoncyc_base',
              #calc=[{'func': 'mean', 'name': variable}],
              # calc_grouping=['day','month'] )
            seasoncyc_base = cdo.ydaymean(
                input=archive, output='seasoncyc_base.nc')
            # seasoncyc_sim  = call(resource=archive,
            # variable=variable,
            # prefix='seasoncyc_sim',
            #calc=[{'func': 'mean', 'name': variable}],
            # calc_grouping=['day','month'] )
            seasoncyc_sim = cdo.ydaymean(
                input=simulation, output='seasoncyc_sim.nc')
        else:
            raise Exception('normalisation method not found')

    except Exception as e:
        msg = 'seacyc function failed : %s ' % e
        logger.debug(msg)
        raise Exception(msg)

    return seasoncyc_base, seasoncyc_sim
    def execute(self):
        logger.info('Start process')
        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp
        
      
        ################################
        # reading in the input arguments
        ################################
        try: 
            logger.info('read in the arguments')
            resource = self.getInputValues(identifier='resource')
            season = self.getInputValues(identifier='season')[0]
            bbox = self.getInputValues(identifier='BBox')[0]
            #model_var = self.getInputValues(identifier='reanalyses')[0]
            period = self.getInputValues(identifier='period')[0]            
            anualcycle = self.getInputValues(identifier='anualcycle')[0]
            # model, var = model_var.split('_')
            
            bbox = [float(b) for b in bbox.split(',')]

            start = dt.strptime(period.split('-')[0] , '%Y%m%d')
            end = dt.strptime(period.split('-')[1] , '%Y%m%d')

            kappa = int(self.getInputValues(identifier='kappa')[0])
            
            logger.info('bbox %s' % bbox)
            logger.info('period %s' % str(period))
            logger.info('season %s' % str(season))
            
        except Exception as e: 
            logger.debug('failed to read in the arguments %s ' % e)
       
                
        ############################################################    
        ### get the required bbox and time region from resource data
        ############################################################
        
        # from flyingpigeon.weatherregimes import get_level
        
        from flyingpigeon.ocgis_module import call 
        from flyingpigeon.utils import get_variable
        time_range = [start, end]
      
        variable = get_variable(resource)
        model_subset = call(resource=resource, variable=variable, 
          geom=bbox, spatial_wrapping='wrap', time_range=time_range,  #conform_units_to=conform_units_to
          )
        logger.info('Dataset subset done: %s ' % model_subset)
        
        ##############################################
        ### computing anomalies 
        ##############################################
        
        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[0]
        reference = [dt.strptime(cycst,'%Y%m%d'), dt.strptime(cycen,'%Y%m%d')]
        model_anomal = wr.get_anomalies(model_subset, reference=reference)

        #####################
        ### extracting season
        #####################
        model_season = wr.get_season(model_anomal, season=season)

        #######################
        ### call the R scripts
        #######################
        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
          rworkspace = curdir
          Rsrc = config.Rsrc_dir() 
          Rfile = 'weatherregimes_model.R'
          
          infile = model_season  #model_subset #model_ponderate 
          modelname = 'MODEL'
          yr1 = start.year
          yr2 = end.year
          ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
          ip, file_pca = mkstemp(dir=curdir ,suffix='.dat')
          ip, file_class = mkstemp(dir=curdir ,suffix='.Rdat')
                    
          args = ['Rscript', join(Rsrc,Rfile), '%s/' % curdir, 
                  '%s/' % Rsrc, '%s'% infile, '%s' % variable, 
                  '%s' % output_graphics, '%s' % file_pca,
                   '%s' % file_class, '%s' % season, 
                   '%s' % start.year, '%s' % end.year,
                   '%s' % 'MODEL', '%s' % kappa]
          logger.info('Rcall builded')
        except Exception as e: 
          msg = 'failed to build the R command %s' % e
          logger.error(msg)  
          raise Exception(msg)
        try:
          output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() #, shell=True
          logger.info('R outlog info:\n %s ' % output)
          logger.debug('R outlog errors:\n %s ' % error)
          if len(output) > 0:            
            self.status.set('**** weatherregime in R suceeded', 90)
          else:
            logger.error('NO! output returned from R call')
        except Exception as e: 
          msg = 'weatherregime in R %s ' % e
          logger.error(msg)  
          raise Exception(msg)

        ############################################
        ### set the outputs
        ############################################

        self.Routput_graphic.setValue( output_graphics )
        self.output_pca.setValue( file_pca )
        self.output_classification.setValue( file_class )
        self.output_netcdf.setValue( model_season )
예제 #36
0
def method_A(resource=[], start=None, end=None, timeslice=20, 
  variable=None, title=None, cmap='seismic' ):
  """returns the result
  
  :param resource: list of paths to netCDF files
  :param start: beginning of reference period (if None (default), the first year of the consistent ensemble will be detected)
  :param end: end of comparison period (if None (default), the last year of the consistent ensemble will be detected)
  :param timeslice: period length for mean calculation of reference and comparison period
  :param variable: variable name to be detected in the netCDF file. If not set (not recommended), the variable name will be detected
  :param title: str to be used as title for the signal mal
  :param cmap: define the color scheme for signal map plotting 

  :return: signal.nc, low_agreement_mask.nc, high_agreement_mask.nc, graphic.png, text.txt
  """
  from os.path import split
  from cdo import Cdo
  cdo = Cdo()
  cdo.forceOutput = True 
  
  try: 
    # preparing the resource
#    from flyingpigeon.ocgis_module import call
    file_dic = sort_by_filename(resource, historical_concatination = True)
    #print file_dic
    logger.info('file names sorted experimets: %s' % len(file_dic.keys()))
  except Exception as e:
    msg = 'failed to sort the input files'
    logger.exception(msg)
    raise Exception(msg)
  

  try:
    mergefiles = []
    for key in file_dic.keys():
      
      if type(file_dic[key]) == list and len(file_dic[key]) > 1:
        input = []
        for i in file_dic[key]:
          print i 
          input.extend([i.replace(' ','\\\ ')])
        mergefiles.append(cdo.mergetime(input=input, output=key+'_mergetime.nc'))
      else:
        mergefiles.extend(file_dic[key])
#      files.append(cdo.selyear('%s/%s' % (start1,end2), input = tmpfile , output =  key+'.nc' )) #python version
    logger.info('datasets merged %s ' % mergefiles)
  except Exception as e:
    msg = 'seltime and mergetime failed %s' % e
    logger.exception(msg)
    raise Exception(e)    
  
  try: 
    text_src = open('infiles.txt', 'a')
    for key in file_dic.keys():
      text_src.write(key + '\n')
    text_src.close()
  except Exception as e:
    msg = 'failed to write source textfile'
    logger.exception(msg)
    raise Exception(msg)
    
  # configure reference and compare period
  try: 
    if start == None:
      st_set = set()
      en_set = set()
      for f in mergefiles:
        print f
        times = get_time(f)
        st_set.update([times[0].year])
        if end == None: 
          en_set.update([times[-1].year])
      start = max(st_set)
      if end == None:
        end = min(en_set)
    logger.info('Start and End: %s - %s ' % (start, end))
    if start >= end: 
      logger.error('ensemble is inconsistent!!! start year is later than end year')
  except Exception as e:
    msg = 'failed to detect start and end times of the ensemble'
    logger.exception(msg)
    raise Exception(msg)

  # set the periodes: 
  try: 
    start = int(start)
    end = int(end)
    if timeslice == None: 
      timeslice = int((end - start) / 3)
      if timeslice == 0: 
        timeslice = 1
    else: 
      timeslice = int(timeslice)
    start1 = start
    start2 = start1 + timeslice - 1 
    end1 = end - timeslice + 1
    end2 = end
    logger.info('timeslice and periodes set')
  except Exception as e:
    msg = 'failed to set the periodes'
    logger.exception(msg)
    raise Exception(msg)

  try:
    files = []
    for i, mf in enumerate(mergefiles):
      files.append(cdo.selyear('{0}/{1}'.format(start1,end2), input=[mf.replace(' ','\ ')] , output='file_{0}_.nc'.format(i) )) #python version
    logger.info('timeseries selected from defined start to end year')
  except Exception as e:
    msg = 'seltime and mergetime failed'
    logger.exception(msg)
    raise Exception(msg)    

  try: 
    # ensemble mean 
    nc_ensmean = cdo.ensmean(input=files , output='nc_ensmean.nc')
    logger.info('ensemble mean calculation done')
  except Exception as e:
    msg = 'ensemble mean failed'
    logger.exception(msg)
    raise Exception(msg)
  
  try: 
    # ensemble std 
    nc_ensstd  = cdo.ensstd(input=files , output='nc_ensstd.nc')
    logger.info('ensemble std and calculation done')
  except Exception as e:
    msg = 'ensemble std or failed'
    logger.exception(msg)
    raise Exception(msg)
  
  # get the get the signal as difference from the beginning (first years) and end period (last years), :
  try:
    selyearstart = cdo.selyear('%s/%s' % (start1,start2), input = nc_ensmean, output = 'selyearstart.nc' ) 
    selyearend = cdo.selyear('%s/%s' % (end1,end2), input = nc_ensmean, output = 'selyearend.nc' )
    meanyearst = cdo.timmean(input = selyearstart, output= 'meanyearst.nc')
    meanyearend = cdo.timmean(input = selyearend, output= 'meanyearend.nc')
    signal = cdo.sub(input=[meanyearend, meanyearst], output = 'signal.nc')
    logger.info('Signal calculation done')
  except Exception as e:
    msg = 'calculation of signal failed'
    logger.exception(msg)
    raise Exception(msg)
  
  # get the intermodel standard deviation (mean over whole period)
  try:
    #std_selyear = cdo.selyear('%s/%s' % (end1,end2), input=nc_ensstd, output='std_selyear.nc')
    #std = cdo.timmean(input = std_selyear, output = 'std.nc')
    
    std = cdo.timmean(input = nc_ensstd, output = 'std.nc')
    std2 = cdo.mulc('2', input = std, output = 'std2.nc')
    logger.info('calculation of internal model std for time period done')
  except Exception as e:
    msg = 'calculation of internal model std failed'
    logger.exception(msg) 
    raise Exception(msg)
  try:
    absolut = cdo.abs(input=signal, output='absolut_signal.nc')
    high_agreement_mask = cdo.gt(input=[absolut,std2],  output= 'large_change_with_high_model_agreement.nc')
    low_agreement_mask = cdo.lt(input=[absolut,std], output= 'small_signal_or_low_agreement_of_models.nc')
    logger.info('high and low mask done')
  except Exception as e:
    msg = 'calculation of robustness mask failed'
    logger.exception(msg)
    raise Exception(msg)
  
  try: 
    if variable == None: 
      variable = get_variable(signal)
    logger.info('variable to be plotted: %s' % variable)
    
    if title == None: 
      title='Change of %s (difference of mean %s-%s to %s-%s)' % (variable, end1, end2, start1, start2)  
    
    graphic = None
    graphic = map_ensembleRobustness(signal, high_agreement_mask, low_agreement_mask, 
              variable=variable, 
              cmap=cmap,
              title = title)
    
    logger.info('graphic generated')
  except Exception as e:
    msg('graphic generation failed: %s' % e)
    logger.debug(msg)
    raise Exception(msg)

  return signal, low_agreement_mask, high_agreement_mask, graphic, text_src # 
예제 #37
0
def test_get_variable():
    variable = utils.get_variable(local_path(TESTDATA["cmip5_tasmax_2007_nc"]))
    assert "tasmax" == variable

    variable = utils.get_variable(local_path(TESTDATA["cordex_tasmax_2007_nc"]))
    assert "tasmax" == variable
예제 #38
0
def get_gam(ncs_reference, PAmask, modelname=None):
    """
    GAM statistical training based on presence/absence mask and indices

    :param ncs_reference: list of netCDF files containing the indices
    :param PAmask: presence/absence mask as output from get_PAmask
    :param modelname: modelname to be used for potting

    :return gam_model, prediction, infos_concat: Rstatisics,
                                                 occurence predicion based on ncs_reference files,
                                                 graphical visualisation of regression curves
    """

    try:
        from netCDF4 import Dataset
        from os.path import basename
        from numpy import squeeze, ravel, isnan, nan, array, reshape

        from flyingpigeon.utils import get_variable

        from rpy2.robjects.packages import importr
        import rpy2.robjects as ro
        import rpy2.robjects.numpy2ri

        rpy2.robjects.numpy2ri.activate()

        base = importr("base")
        stats = importr("stats")
        mgcv = importr("mgcv")
        logger.info('rpy2 modules imported')
    except:
        msg = 'failed to import rpy2 modules'
        logger.exception(msg)
        raise

    try:
        data = {'PA': ro.FloatVector(ravel(PAmask))}
        domain = PAmask.shape
        logger.info('mask data converted to R float vector')
    except:
        msg = 'failed to convert mask to R vector'
        logger.exception(msg)
        raise Exception

    try:
        form = 'PA ~ '
        ncs_reference.sort()
        for i, nc in enumerate(ncs_reference):
            var = get_variable(nc)
            agg = basename(nc).split('_')[-2]
            ds = Dataset(nc)
            vals = squeeze(ds.variables[var])
            # vals[vals > 1000] = 0
            vals[isnan(PAmask)] = nan
            indice = '%s_%s' % (var, agg)
            data[str(indice)] = ro.FloatVector(ravel(vals))
            if i == 0:
                form = form + 's(%s, k=3)' % indice
            else:
                form = form + ' + s(%s, k=3)' % indice
        logger.info('form string generated for gam model')
    except:
        msg = 'form string generation for gam failed'
        logger.exception(msg)
        # raise Exception

    try:
        dataf = ro.DataFrame(data)
        eq = ro.Formula(str(form))
        gam_model = mgcv.gam(base.eval(eq),
                             data=dataf,
                             family=stats.binomial(),
                             scale=-1,
                             na_action=stats.na_exclude)
        logger.info('GAM model trained')
    except:
        msg = 'failed to train the GAM model'
        logger.exception(msg)

    # ####################
    # plot response curves
    # ####################

    try:
        try:
            from tempfile import mkstemp
            grdevices = importr('grDevices')
            ip, statinfos = mkstemp(dir='.', suffix='.pdf')
            grdevices.pdf(file=statinfos)
            for i in range(1, len(ncs_reference) + 1):
                try:
                    trans = ro.r('function(x){exp(x)/(1+exp(x))}')
                    _ = mgcv.plot_gam(gam_model, trans=trans, shade='T',
                                      col='black', select=i, ylab='Predicted Probability',
                                      main=modelname,
                                      rug=False, cex_lab=1.4, cex_axis=4.2)
                    logger.info('plot GAM curves for %s.', i)
                except:
                    logger.exception('failed to plot GAM curves for %s.', i)
            _ = grdevices.dev_off()
        except:
            logger.exception('GAM plot failedin SDM process')

        try:
            predict_gam = mgcv.predict_gam(gam_model, type="response",
                                           progress="text", na_action=stats.na_pass)
            prediction = array(predict_gam).reshape(domain)
            logger.info('SDM prediction for reference period processed')
        except:
            logger.exception('failed to process SDM prediction')
            prediction = None
    except:
        logger.exception('failed to plot GAM curves')
        _, infos_concat = mkstemp(dir='.', suffix='.pdf')

    return gam_model, prediction, statinfos
예제 #39
0
def get_gam(ncs_reference, PAmask):

    from netCDF4 import Dataset
    from os.path import basename
    from numpy import squeeze, ravel, isnan, nan, array, reshape

    from flyingpigeon.utils import get_variable

    try:
        from rpy2.robjects.packages import importr
        import rpy2.robjects as ro
        import rpy2.robjects.numpy2ri

        rpy2.robjects.numpy2ri.activate()

        base = importr("base")
        stats = importr("stats")
        mgcv = importr("mgcv")
        logger.info('rpy2 modules imported')
    except Exception as e:
        msg = 'failed to import rpy2 modules %s' % e
        logger.debug(msg)
        raise Exception(msg)

    try:
        data = {'PA': ro.FloatVector(ravel(PAmask))}
        domain = PAmask.shape
        logger.info('mask data converted to R float vector')
    except Exception as e:
        msg = 'failed to convert mask to R vector'

    form = 'PA ~ '
    ncs_reference.sort()

    try:
        for i, nc in enumerate(ncs_reference):
            var = get_variable(nc)
            agg = basename(nc).split('_')[-2]
            ds = Dataset(nc)
            vals = squeeze(ds.variables[var])
            vals[vals > 1000] = 0
            vals[isnan(PAmask)] = nan
            indice = '%s_%s' % (var, agg)
            data[str(indice)] = ro.FloatVector(ravel(vals))
            if i == 0:
                form = form + 's(%s, k=3)' % indice
            else:
                form = form + ' + s(%s, k=3)' % indice
    except Exception as e:
        logger.debug('form string generation for gam failed')

    dataf = ro.DataFrame(data)
    eq = ro.Formula(str(form))

    gam_model = mgcv.gam(base.eval(eq),
                         data=dataf,
                         family=stats.binomial(),
                         scale=-1,
                         na_action=stats.na_exclude)  #

    grdevices = importr('grDevices')

    ### ###########################
    # plot response curves
    ### ###########################

    from flyingpigeon.visualisation import concat_images
    from tempfile import mkstemp
    infos = []

    for i in range(1, len(ncs_reference) + 1):
        #ip, info =  mkstemp(dir='.',suffix='.pdf')
        ip, info = mkstemp(dir='.', suffix='.png')
        infos.append(info)
        grdevices.png(filename=info)
        #grdevices.pdf(filename=info)
        #ylim = ro.IntVector([-6,6])
        trans = ro.r('function(x){exp(x)/(1+exp(x))}')
        mgcv.plot_gam(
            gam_model,
            trans=trans,
            shade='T',
            col='black',
            select=i,
            ylab='Predicted Probability',
            rug=False,
            cex_lab=1.4,
            cex_axis=1.4,
        )  #
        #ylim=ylim,  ,
        grdevices.dev_off()

    infos_concat = concat_images(infos, orientation='h')
    predict_gam = mgcv.predict_gam(gam_model,
                                   type="response",
                                   progress="text",
                                   na_action=stats.na_exclude)  #,
    prediction = array(predict_gam).reshape(domain)

    return gam_model, prediction, infos_concat
예제 #40
0
def set_metadata_segetalflora(resource):
  """
  :param resources: imput files
  """
  # gather the set_metadata

  dic_segetalflora = {
    'keywords' : 'Segetalflora',
    'tier': '2',
    'in_var' : 'tas',
    'description':'Number of European segetalflora species',
    'method':'regression equation',
    'institution':'Julius Kuehn-Institut (JKI) Federal Research Centre for Cultivated Plants',
    'institution_url':'www.jki.bund.de',
    'institute_id' : "JKI",
    'contact_mail_3':'*****@*****.**',
    'version' : '1.0',
     }

  dic_climatetype = {
    '1' : 'cold northern species group',
    '2' : 'warm northern species group',
    '3' : 'moderate warm-toned species group',
    '4' : 'moderate warm-toned to mediterranean species group',
    '5' : 'mediterranean species group',
    '6' : 'climate-indifferent species',
    '7' : 'climate-undefinable species',
    'all' : 'species of all climate types'
      }

  try:
    set_basic_md(resource)
  except Exception as e:
    LOGGER.error(e)

  try:
    set_dynamic_md(resource)
  except Exception as e:
    LOGGER.error(e)

  #set the segetalflora specific metadata
  try:
    ds = Dataset(resource, mode='a')
    ds.setncatts(dic_segetalflora)
    ds.close()
  except Exception as e:
    LOGGER.error(e)
    # set the variable attributes:
  from flyingpigeon.utils import get_variable

  try:
    ds = Dataset(resource, mode='a')
    var = get_variable(resource)
    if 'all' in var:
      climat_type = 'all'
    else:
      climat_type = var[-1]

    culture_type = var.strip('sf').strip(climat_type)

    sf = ds.variables[var]
    sf.setncattr('units',1)
    sf.setncattr('standard_name', 'sf%s%s' % (culture_type, climat_type))
    sf.setncattr('long_name', 'Segetal flora %s land use for %s' % (culture_type, dic_climatetype['%s' % climat_type]))
    ds.close()
  except Exception as e:
    LOGGER.error('failed to set sf attributes %s ' % e)
  # sort the attributes:
  try:
    ds = Dataset(resource, mode='a')
    att = ds.ncattrs()
    att.sort()
    for a in att:
      entry = ds.getncattr(a)
      ds.setncattr(a,entry)
    history = '%s , Segetalflora Impact Model V1.0' % (ds.history)
    ds.setncattr('history',history)
    ds.close()
  except Exception as e:
    LOGGER.error('failed to sort attributes %s ' % e)

  return resource
    def execute(self):
        logger.info('Start process')
        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp
        
        ################################
        # reading in the input arguments
        ################################
        try: 
            resource = self.getInputValues(identifier='resource')
            url_Rdat = self.getInputValues(identifier='Rdat')[0]
            url_dat = self.getInputValues(identifier='dat')[0]
            url_ref_file = self.getInputValues(identifier='netCDF') # can be None
            season = self.getInputValues(identifier='season')[0]
            period = self.getInputValues(identifier='period')[0]            
            anualcycle = self.getInputValues(identifier='anualcycle')[0]
        except Exception as e: 
            logger.debug('failed to read in the arguments %s ' % e)
        
        try: 
            start = dt.strptime(period.split('-')[0] , '%Y%m%d')
            end = dt.strptime(period.split('-')[1] , '%Y%m%d')
            # kappa = int(self.getInputValues(identifier='kappa')[0])
            
            logger.info('period %s' % str(period))
            logger.info('season %s' % str(season))
            logger.info('read in the arguments')
            logger.info('url_ref_file: %s' % url_ref_file)
            logger.info('url_Rdat: %s' % url_Rdat)
            logger.info('url_dat: %s' % url_dat)
        except Exception as e: 
            logger.debug('failed to convert arguments %s ' % e)
           
        ############################
        # fetching trainging data 
        ############################
        
        from flyingpigeon.utils import download, get_time
        from os.path import abspath
        
        try:
          dat = abspath(download(url_dat))
          Rdat = abspath(download(url_Rdat))
          logger.info('training data fetched')
        except Exception as e:
          logger.error('failed to fetch training data %s' % e)
          
        ############################################################    
        ### get the required bbox and time region from resource data
        ############################################################        
        # from flyingpigeon.weatherregimes import get_level
        
        from flyingpigeon.ocgis_module import call 
        from flyingpigeon.utils import get_variable
        time_range = [start, end]

        variable = get_variable(resource)

        if len(url_ref_file) > 0:
            ref_file = download(url_ref_file[0])  
            model_subset = call(resource=resource, variable=variable, 
                time_range=time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                regrid_destination=ref_file, regrid_options='bil')
            logger.info('Dataset subset with regridding done: %s ' % model_subset)
        else:
            model_subset = call(resource=resource, variable=variable, 
                time_range=time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                )
            logger.info('Dataset time period extracted: %s ' % model_subset)
            
        
        ##############################################
        ### computing anomalies 
        ##############################################
        
        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[0]
        reference = [dt.strptime(cycst,'%Y%m%d'), dt.strptime(cycen,'%Y%m%d')]
        model_anomal = wr.get_anomalies(model_subset, reference=reference)

        #####################
        ### extracting season
        #####################
        model_season = wr.get_season(model_anomal, season=season)

        #######################
        ### call the R scripts
        #######################
        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
          rworkspace = curdir
          Rsrc = config.Rsrc_dir() 
          Rfile = 'weatherregimes_projection.R'
          
          yr1 = start.year
          yr2 = end.year
          time = get_time(model_season, format='%Y%m%d')

          #ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
          ip, file_pca = mkstemp(dir=curdir ,suffix='.txt')
          ip, file_class = mkstemp(dir=curdir ,suffix='.Rdat')
          ip, output_frec = mkstemp(dir=curdir ,suffix='.txt')
                    
          args = ['Rscript', join(Rsrc,Rfile), '%s/' % curdir, 
                  '%s/' % Rsrc, 
                  '%s' % model_season, 
                  '%s' % variable,
                  '%s' % str(time).strip("[]").replace("'","").replace(" ",""),
            #      '%s' % output_graphics,
                  '%s' % dat, 
                  '%s' % Rdat, 
                  '%s' % file_pca,
                  '%s' % file_class, 
                  '%s' % output_frec,      
                  '%s' % season, 
                  '%s' % start.year, 
                  '%s' % end.year,                  
                  '%s' % 'MODEL']

          logger.info('Rcall builded')
        except Exception as e: 
          msg = 'failed to build the R command %s' % e
          logger.error(msg)  
          raise Exception(msg)
        try:
          output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() #, shell=True
          logger.info('R outlog info:\n %s ' % output)
          logger.debug('R outlog errors:\n %s ' % error)
          if len(output) > 0:            
            self.status.set('**** weatherregime in R suceeded', 90)
          else:
            logger.error('NO! output returned from R call')
        except Exception as e: 
          msg = 'weatherregime in R %s ' % e
          logger.error(msg)  
          raise Exception(msg)

        ############################################
        ### set the outputs
        ############################################

        #self.Routput_graphic.setValue( output_graphics )
        self.output_pca.setValue( file_pca )
        self.output_classification.setValue( file_class )
        self.output_netcdf.setValue( model_season )
        self.output_frequency.setValue( output_frec )
예제 #42
0
def calc_indice_unconventional(resource=[], variable=None, prefix=None,
  indices=None, polygons=None,  groupings=None, 
  dir_output=None, dimension_map = None):
    """
    Calculates given indices for suitable files in the appopriate time grouping and polygon.

    :param resource: list of filenames in drs convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: list of indices (default ='TGx')
    :param polygons: list of polgons (default =None)
    :param grouping: indices time aggregation (default='yr')
    :param out_dir: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into dir_output
    """
    
    from os.path import join, dirname, exists
    from os import remove
    import uuid
    from flyingpigeon import ocgis_module
    from flyingpigeon.subset import get_ugid, get_geom

    if type(resource) != list: 
      resource = list([resource])
    if type(indices) != list: 
      indices = list([indices])
    if type(polygons) != list and polygons != None:
      polygons = list([polygons])
    elif polygons == None:
      polygons = [None]
    else: 
      logger.error('Polygons not found')
    if type(groupings) != list:
      groupings = list([groupings])
    
    if dir_output != None:
      if not exists(dir_output): 
        makedirs(dir_output)
    
    experiments = sort_by_filename(resource)
    outputs = []

    # print('environment for calc_indice_unconventional set')
    logger.info('environment for calc_indice_unconventional set')
    
    for key in experiments:
      if variable == None:
        variable = get_variable(experiments[key][0])
      try: 
        ncs = experiments[key]
        for indice in indices:
          logger.info('indice: %s' % indice)
          try: 
            for grouping in groupings:
              logger.info('grouping: %s' % grouping)
              try:
                calc_group = calc_grouping(grouping)
                logger.info('calc_group: %s' % calc_group)
                for polygon in polygons:  
                  try:
                    domain = key.split('_')[1].split('-')[0]
                    if polygon == None:
                      if prefix == None: 
                        prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping )
                      geom = None
                      ugid = None
                    else:
                      if prefix == None: 
                        prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ).replace(domain,polygon)
                      geom = get_geom(polygon=polygon)
                      ugid = get_ugid(polygons=polygon, geom=geom)
                    if indice == 'TGx':
                      calc=[{'func': 'max', 'name': 'TGx'}]
                      tmp = ocgis_module.call(resource=ncs,# conform_units_to='celcius',
                                              variable=variable, dimension_map=dimension_map, 
                                              calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              dir_output=dir_output, geom=geom, select_ugid=ugid)
                    elif indice == 'TGn':
                      calc=[{'func': 'min', 'name': 'TGn'}]
                      tmp = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              variable=variable, dimension_map=dimension_map, 
                                              calc=calc, calc_grouping= calc_group, prefix=prefix,
                                               dir_output=dir_output, geom=geom, select_ugid = ugid)
                    elif indice == 'TGx5day':
                      calc = [{'func': 'moving_window', 'name': 'TGx5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}]
                      tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              variable=variable, dimension_map=dimension_map, 
                                              calc=calc, prefix=str(uuid.uuid4()),
                                              geom=geom, select_ugid = ugid)
                      calc=[{'func': 'max', 'name': 'TGx5day'}]
                      logger.info('moving window calculated : %s' % tmp2)
                      tmp = ocgis_module.call(resource=tmp2,
                                              variable=indice, dimension_map=dimension_map, 
                                              calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              dir_output=dir_output)
                      remove(tmp2)
                    elif indice == 'TGn5day':
                      calc = [{'func': 'moving_window', 'name': 'TGn5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}]
                      tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              variable=variable, dimension_map=dimension_map, 
                                              calc=calc, prefix=str(uuid.uuid4()),
                                              geom=geom, select_ugid = ugid)
                      calc=[{'func': 'min', 'name': 'TGn5day'}]
                      
                      logger.info('moving window calculated : %s' % tmp2)
                      
                      tmp = ocgis_module.call(resource=tmp2,
                                              variable=indice, dimension_map=dimension_map, 
                                              calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              dir_output=dir_output)
                      remove(tmp2)
                    else: 
                      logger.error('Indice %s is not a known inidce' % (indice))
                    outputs.append(tmp)
                    logger.info('indice file calcualted %s ' % (tmp))
                  except Exception as e:
                    logger.exception('could not calc indice %s for key %s, polygon %s and calc_grouping %s : %s' %  (indice, key, polygon, grouping, e ))
              except Exception as e:
                logger.exception('could not calc indice %s for key %s and calc_grouping %s : %s' % ( indice, key, polygon, e ))
          except Exception as e:
            logger.exception('could not calc indice %s for key %s: %s'%  (indice, key, e ))
      except Exception as e:
        logger.exception('could not calc key %s: %s' % (key, e))
    return outputs
예제 #43
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('execution started at : {}'.format(dt.now()), 5)

        process_start_time = time.time()  # measure process execution time ...
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        response.update_status('execution started at : %s ' % dt.now(), 5)
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        try:
            response.update_status('read input parameter : %s ' % dt.now(), 5)

            resource = archiveextract(resource=rename_complexinputs(request.inputs['resource']))
            refSt = request.inputs['refSt'][0].data
            refEn = request.inputs['refEn'][0].data
            dateSt = request.inputs['dateSt'][0].data
            dateEn = request.inputs['dateEn'][0].data
            seasonwin = request.inputs['seasonwin'][0].data
            nanalog = request.inputs['nanalog'][0].data

            # bbox = [-80, 20, 50, 70]
            # TODO: Add checking for wrong cordinates and apply default if nesessary
            #level = 500

            level = request.inputs['level'][0].data
            if (level == 500): 
                dummylevel = 1000 # dummy workaround for cdo sellevel
            else:
                dummylevel = 500
            LOGGER.debug('LEVEL selected: %s hPa' % (level))

            bbox=[]
            bboxStr = request.inputs['BBox'][0].data
            bboxStr = bboxStr.split(',')
            #for i in bboxStr: bbox.append(int(i))
            bbox.append(float(bboxStr[0]))
            bbox.append(float(bboxStr[2]))
            bbox.append(float(bboxStr[1]))
            bbox.append(float(bboxStr[3]))
            LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
            LOGGER.debug('BBOX original: %s ' % (bboxStr))

            # if bbox_obj is not None:
            #     LOGGER.info("bbox_obj={0}".format(bbox_obj.coords))
            #     bbox = [bbox_obj.coords[0][0],
            #             bbox_obj.coords[0][1],
            #             bbox_obj.coords[1][0],
            #             bbox_obj.coords[1][1]]
            #     LOGGER.info("bbox={0}".format(bbox))
            # else:
            #     bbox = None
            # region = self.getInputValues(identifier='region')[0]
            # bbox = [float(b) for b in region.split(',')]
            # bbox_obj = self.BBox.getValue()

            normalize = request.inputs['normalize'][0].data
            distance = request.inputs['dist'][0].data
            outformat = request.inputs['outformat'][0].data
            timewin = request.inputs['timewin'][0].data

            # model_var = request.inputs['reanalyses'][0].data
            # model, var = model_var.split('_')

            # experiment = self.getInputValues(identifier='experiment')[0]
            # dataset, var = experiment.split('_')
            # LOGGER.info('environment set')
            LOGGER.info('input parameters set')
            response.update_status('Read in and convert the arguments', 5)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:
            # refSt = dt.strptime(refSt[0], '%Y-%m-%d')
            # refEn = dt.strptime(refEn[0], '%Y-%m-%d')
            # dateSt = dt.strptime(dateSt[0], '%Y-%m-%d')
            # dateEn = dt.strptime(dateEn[0], '%Y-%m-%d')

            #not nesessary if fix ocgis_module.py
            refSt = dt.combine(refSt,dt_time(12,0))
            refEn = dt.combine(refEn,dt_time(12,0))
            dateSt = dt.combine(dateSt,dt_time(12,0))
            dateEn = dt.combine(dateEn,dt_time(12,0))

            # refSt = refSt.replace(hour=12)
            # refEn = refEn.replace(hour=12)
            # dateSt = dateSt.replace(hour=12)
            # dateEn = dateEn.replace(hour=12)

            if normalize == 'None':
                seacyc = False
            else:
                seacyc = True

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                LOGGER.error('output format not valid')

            start = min(refSt, dateSt)
            end = max(refEn, dateEn)

#            if bbox_obj is not None:
#                LOGGER.info("bbox_obj={0}".format(bbox_obj.coords))
#                bbox = [bbox_obj.coords[0][0],
#                        bbox_obj.coords[0][1],
#                        bbox_obj.coords[1][0],
#                        bbox_obj.coords[1][1]]
#                LOGGER.info("bbox={0}".format(bbox))
#            else:
#                bbox = None

            LOGGER.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        LOGGER.debug("init took %s seconds.", time.time() - start_time)
        response.update_status('Read in and convert the arguments', 5)

        ########################
        # input data preperation
        ########################

        # TODO: Check if files containing more than one dataset

        response.update_status('Start preparing input data', 12)
        start_time = time.time()  # mesure data preperation ...
        try:
            # TODO: Add selection of the level. maybe bellow in call(..., level_range=[...,...])

            if type(resource) == list:
                #resource.sort()
                resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0])
            else:
                resource=[resource]

            #===============================================================
            # TODO: REMOVE resources which are out of interest from the list 
            # (years > and < than requested for calculation)

            tmp_resource = []

            for re in resource:
                s,e = get_timerange(re)
                tmpSt = dt.strptime(s,'%Y%m%d') 
                tmpEn = dt.strptime(e,'%Y%m%d') 
                if ((tmpSt <= end ) and (tmpEn >= start)):
                    tmp_resource.append(re)
                    LOGGER.debug('Selected file: %s ' % (re))
            resource = tmp_resource
            # ===============================================================

            #================================================================
            # Try to fix memory issue... (ocgis call for files like 20-30 gb... )
            # IF 4D - select pressure level before domain cut
            #
            # resource properties
            ds = Dataset(resource[0])
            variable = get_variable(resource[0])
            var = ds.variables[variable]
            dims = list(var.dimensions)
            dimlen = len(dims)

            try:
                model_id = ds.getncattr('model_id') 
            except AttributeError:
                model_id = 'Unknown model'

            LOGGER.debug('MODEL: %s ' % (model_id)) 

            lev_units = 'hPa'

            if (dimlen>3) :
                lev = ds.variables[dims[1]]
                # actually index [1] need to be detected... assuming zg(time, plev, lat, lon)
                lev_units = lev.units

                if (lev_units=='Pa'):
                    level = level*100
                    dummylevel=dummylevel*100
                    # TODO: OR check the NAME and units of vertical level and find 200 , 300, or 500 mbar in it
                    # Not just level = level * 100.

            # Get Levels

            from cdo import Cdo
            cdo = Cdo()

            lev_res=[]
            if(dimlen>3):
                for res_fn in resource:
                    tmp_f = 'lev_' + path.basename(res_fn)
                    comcdo = '%s,%s' % (level,dummylevel)
                    cdo.sellevel(comcdo, input=res_fn, output=tmp_f)
                    lev_res.append(tmp_f)
            else:
                lev_res = resource

            # Get domain
            regr_res=[]
            for res_fn in lev_res:
                tmp_f = 'dom_' + path.basename(res_fn)
                comcdo = '%s,%s,%s,%s' % (bbox[0],bbox[2],bbox[1],bbox[3])
                cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f)
                regr_res.append(tmp_f)

            #archive_tmp = call(resource=resource, time_range=[refSt, refEn], geom=bbox, spatial_wrapping='wrap')
            #simulation_tmp = call(resource=resource, time_range=[dateSt, dateEn], geom=bbox, spatial_wrapping='wrap')
            #============================  

            archive_tmp = call(resource=regr_res, time_range=[refSt, refEn], spatial_wrapping='wrap')
            simulation_tmp = call(resource=regr_res, time_range=[dateSt, dateEn], spatial_wrapping='wrap')

            #######################################################################################
            # TEMORAL dirty workaround to get the level and it's units - will be func in utils.py
            
            if (dimlen>3) :
                archive = get_level(archive_tmp, level = level)
                simulation = get_level(simulation_tmp,level = level)
                variable = 'z%s' % level
                # TODO: here should be modulated
            else:
                archive = archive_tmp
                simulation = simulation_tmp
                # 3D, move forward
            #######################################################################################

            if seacyc is True:
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize)
            else:
                seasoncyc_base = None
                seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            LOGGER.debug(msg)
            raise Exception(msg)
        ip, output = mkstemp(dir='.', suffix='.txt')
        output_file = path.abspath(output)
        files = [path.abspath(archive), path.abspath(simulation), output_file]

        LOGGER.debug("data preperation took %s seconds.", time.time() - start_time)

        ############################
        # generating the config file
        ############################

        # TODO: add MODEL name as argument

        response.update_status('writing config file', 15)
        start_time = time.time()  # measure write config ...

        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                base_id=model_id,
                sim_id=model_id, 
                timewin=timewin,
                varname=variable,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')],
                bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3]))
        except Exception as e:
            msg = 'failed to generate config file %s ' % e
            LOGGER.debug(msg)
            raise Exception(msg)

        LOGGER.debug("write_config took %s seconds.", time.time() - start_time)

        ##############
        # CASTf90 call
        ##############
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90
        response.update_status('Start CASTf90 call', 20)
        try:
            # response.update_status('execution of CASTf90', 50)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            # system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
            LOGGER.info('analogue.out info:\n %s ' % output)
            LOGGER.debug('analogue.out errors:\n %s ' % error)
            response.update_status('**** CASTf90 suceeded', 70)
        except Exception as e:
            msg = 'CASTf90 failed %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)
        
        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)
        response.update_status('preparing output', 70)

        response.outputs['config'].file = config_file #config_output_url  # config_file )
        response.outputs['analogs'].file = output_file
        response.outputs['output_netcdf'].file = simulation

        ########################
        # generate analog viewer
        ########################

        formated_analogs_file = analogs.reformat_analogs(output_file)
        # response.outputs['formated_analogs'].storage = FileStorage()
        response.outputs['formated_analogs'].file = formated_analogs_file
        LOGGER.info('analogs reformated')
        response.update_status('reformatted analog file', 80)

        viewer_html = analogs.render_viewer(
            # configfile=response.outputs['config'].get_url(),
            configfile=config_file,
            # datafile=response.outputs['formated_analogs'].get_url())
            datafile=formated_analogs_file)
        response.outputs['output'].file = viewer_html
        response.update_status('Successfully generated analogs viewer', 90)
        LOGGER.info('rendered pages: %s ', viewer_html)

        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response
예제 #44
0
파일: sdm.py 프로젝트: KatiRG/flyingpigeon
def get_gam(ncs_reference, PAmask):
  
  from netCDF4 import Dataset
  from os.path import basename
  from numpy import squeeze, ravel, isnan, nan, array, reshape
  
  from flyingpigeon.utils import get_variable
  

  try:
    from rpy2.robjects.packages import importr
    import rpy2.robjects as ro
    import rpy2.robjects.numpy2ri
    
    rpy2.robjects.numpy2ri.activate()
    
    base = importr("base")
    stats = importr("stats")
    mgcv = importr("mgcv")
    logger.info('rpy2 modules imported')
  except Exception as e: 
    msg = 'failed to import rpy2 modules %s' % e
    logger.debug(msg)
    raise Exception(msg)
  
  try: 
    data = {'PA': ro.FloatVector(ravel(PAmask))}
    domain = PAmask.shape
    logger.info('mask data converted to R float vector')
  except Exception as e: 
    msg = 'failed to convert mask to R vector'
  
  form = 'PA ~ '
  ncs_reference.sort()
  
  try:
    for i , nc in enumerate(ncs_reference):
      var = get_variable(nc)
      agg = basename(nc).split('_')[-2]
      ds = Dataset(nc)
      vals = squeeze(ds.variables[var])
      vals[vals > 1000 ] = 0 
      vals[isnan(PAmask)] = nan 
      indice = '%s_%s' % (var, agg)
      data[str(indice)] = ro.FloatVector(ravel(vals))
      if i == 0:
        form = form + 's(%s, k=3)' % indice 
      else: 
        form = form + ' + s(%s, k=3)' % indice
  except Exception as e:
    logger.debug('form string generation for gam failed')
  
  dataf = ro.DataFrame(data)
  eq = ro.Formula(str(form))
  
  gam_model = mgcv.gam(base.eval(eq), data=dataf, family=stats.binomial(), scale=-1, na_action=stats.na_exclude) # 
  
  grdevices = importr('grDevices')
  
  ### ###########################
  # plot response curves
  ### ###########################

  from flyingpigeon.visualisation import concat_images
  from tempfile import mkstemp
  infos = []

  for i in range(1,len(ncs_reference)+1):
    #ip, info =  mkstemp(dir='.',suffix='.pdf')
    ip, info =  mkstemp(dir='.',suffix='.png')
    infos.append(info)
    grdevices.png(filename=info)
    #grdevices.pdf(filename=info)
    #ylim = ro.IntVector([-6,6])
    trans = ro.r('function(x){exp(x)/(1+exp(x))}')
    mgcv.plot_gam(gam_model, trans=trans, shade='T', col='black',select=i,ylab='Predicted Probability',rug=False , cex_lab = 1.4, cex_axis = 1.4, ) #
    #ylim=ylim,  ,
    grdevices.dev_off()
    
  infos_concat = concat_images(infos, orientation='h')
  predict_gam = mgcv.predict_gam(gam_model, type="response", progress="text", na_action=stats.na_exclude) #, 
  prediction = array(predict_gam).reshape(domain)
    
  return gam_model, prediction, infos_concat
예제 #45
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'
        process_start_time = time.time()  # measure process execution time ...

        response.update_status('execution started at : %s ' % dt.now(), 5)

        start_time = time.time()  # measure init ...

        resource = archiveextract(
            resource=rename_complexinputs(request.inputs['resource']))

        refSt = request.inputs['refSt'][0].data
        refEn = request.inputs['refEn'][0].data
        dateSt = request.inputs['dateSt'][0].data
        dateEn = request.inputs['dateEn'][0].data
        regrset = request.inputs['regrset'][0].data

        # fix 31 December issue
        # refSt = dt.combine(refSt,dt_time(12,0))
        # refEn = dt.combine(refEn,dt_time(12,0))
        # dateSt = dt.combine(dateSt,dt_time(12,0))
        # dateEn = dt.combine(dateEn,dt_time(12,0))

        seasonwin = request.inputs['seasonwin'][0].data
        nanalog = request.inputs['nanalog'][0].data
        # bbox = [-80, 20, 50, 70]
        # TODO: Add checking for wrong cordinates and apply default if nesessary
        bbox = []
        bboxStr = request.inputs['BBox'][0].data
        bboxStr = bboxStr.split(',')
        bbox.append(float(bboxStr[0]))
        bbox.append(float(bboxStr[2]))
        bbox.append(float(bboxStr[1]))
        bbox.append(float(bboxStr[3]))

        direction = request.inputs['direction'][0].data
        normalize = request.inputs['normalize'][0].data
        distance = request.inputs['dist'][0].data
        outformat = request.inputs['outformat'][0].data
        timewin = request.inputs['timewin'][0].data

        model_var = request.inputs['reanalyses'][0].data
        model, var = model_var.split('_')

        try:
            if direction == 're2mo':
                anaSt = dt.combine(dateSt, dt_time(
                    0, 0))  #dt.strptime(dateSt[0], '%Y-%m-%d')
                anaEn = dt.combine(dateEn, dt_time(
                    0, 0))  #dt.strptime(dateEn[0], '%Y-%m-%d')
                refSt = dt.combine(refSt, dt_time(
                    12, 0))  #dt.strptime(refSt[0], '%Y-%m-%d')
                refEn = dt.combine(refEn, dt_time(
                    12, 0))  #dt.strptime(refEn[0], '%Y-%m-%d')
                r_time_range = [anaSt, anaEn]
                m_time_range = [refSt, refEn]
            elif direction == 'mo2re':
                anaSt = dt.combine(dateSt, dt_time(
                    12, 0))  #dt.strptime(refSt[0], '%Y-%m-%d')
                anaEn = dt.combine(dateEn, dt_time(
                    12, 0))  #dt.strptime(refEn[0], '%Y-%m-%d')
                refSt = dt.combine(refSt, dt_time(
                    0, 0))  #dt.strptime(dateSt[0], '%Y-%m-%d')
                refEn = dt.combine(refEn, dt_time(
                    0, 0))  #dt.strptime(dateEn[0], '%Y-%m-%d')
                r_time_range = [refSt, refEn]
                m_time_range = [anaSt, anaEn]
            else:
                LOGGER.exception(
                    'failed to find time periods for comparison direction')
        except:
            msg = 'failed to put simulation and reference time in order'
            LOGGER.exception(msg)
            raise Exception(msg)

        if normalize == 'None':
            seacyc = False
        else:
            seacyc = True

        if outformat == 'ascii':
            outformat = '.txt'
        elif outformat == 'netCDF':
            outformat = '.nc'
        else:
            LOGGER.exception('output format not valid')

        try:
            if model == 'NCEP':
                getlevel = True
                if 'z' in var:
                    level = var.strip('z')
                    variable = 'hgt'
                    # conform_units_to='hPa'
                else:
                    variable = 'slp'
                    level = None
                    # conform_units_to='hPa'
            elif '20CRV2' in model:
                getlevel = False
                if 'z' in var:
                    variable = 'hgt'
                    level = var.strip('z')
                    # conform_units_to=None
                else:
                    variable = 'prmsl'
                    level = None
                    # conform_units_to='hPa'
            else:
                LOGGER.exception('Reanalyses model not known')
            LOGGER.info('environment set')
        except:
            msg = 'failed to set environment'
            LOGGER.exception(msg)
            raise Exception(msg)

        # LOGGER.exception("init took %s seconds.", time.time() - start_time)
        response.update_status('Read in the arguments', 6)

        #################
        # get input data
        #################
        # TODO: do not forget to select years

        start_time = time.time()  # measure get_input_data ...
        response.update_status('fetching input data', 7)
        try:
            if direction == 're2mo':
                nc_reanalyses = reanalyses(start=anaSt.year,
                                           end=anaEn.year,
                                           variable=var,
                                           dataset=model,
                                           getlevel=getlevel)
            else:
                nc_reanalyses = reanalyses(start=refSt.year,
                                           end=refEn.year,
                                           variable=var,
                                           dataset=model,
                                           getlevel=getlevel)

            if type(nc_reanalyses) == list:
                nc_reanalyses = sorted(
                    nc_reanalyses,
                    key=lambda i: path.splitext(path.basename(i))[0])
            else:
                nc_reanalyses = [nc_reanalyses]

            # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb
            # So it makes sense, to operate it step-by-step
            # TODO: need to create dictionary for such datasets (for models as well)
            # TODO: benchmark the method bellow for NCEP z500 for 60 years, may be use the same (!)
            # TODO Now everything regrid to the reanalysis

            if ('20CRV2' in model) and ('z' in var):
                tmp_total = []
                origvar = get_variable(nc_reanalyses)

                for z in nc_reanalyses:
                    tmp_n = 'tmp_%s' % (uuid.uuid1())
                    b0 = call(resource=z,
                              variable=origvar,
                              level_range=[int(level), int(level)],
                              geom=bbox,
                              spatial_wrapping='wrap',
                              prefix='levdom_' + path.basename(z)[0:-3])
                    tmp_total.append(b0)

                tmp_total = sorted(
                    tmp_total,
                    key=lambda i: path.splitext(path.basename(i))[0])
                inter_subset_tmp = call(resource=tmp_total,
                                        variable=origvar,
                                        time_range=r_time_range)

                # Clean
                for i in tmp_total:
                    tbr = 'rm -f %s' % (i)
                    #system(tbr)

                # Create new variable
                ds = Dataset(inter_subset_tmp, mode='a')
                z_var = ds.variables.pop(origvar)
                dims = z_var.dimensions
                new_var = ds.createVariable('z%s' % level,
                                            z_var.dtype,
                                            dimensions=(dims[0], dims[2],
                                                        dims[3]))
                new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
                # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
                ds.close()
                nc_subset = call(inter_subset_tmp, variable='z%s' % level)
            else:
                nc_subset = call(
                    resource=nc_reanalyses,
                    variable=var,
                    geom=bbox,
                    spatial_wrapping='wrap',
                    time_range=r_time_range,
                    # conform_units_to=conform_units_to
                )

            # nc_subset = call(resource=nc_reanalyses, variable=var, geom=bbox, spatial_wrapping='wrap') # XXXXXX wrap
            # LOGGER.exception("get_input_subset_model took %s seconds.", time.time() - start_time)
            response.update_status('**** Input reanalyses data fetched', 10)
        except:
            msg = 'failed to fetch or subset input files'
            LOGGER.exception(msg)
            raise Exception(msg)

        ########################
        # input data preperation
        ########################
        response.update_status('Start preparing input data', 12)

        # Filter resource:
        if type(resource) == list:
            resource = sorted(resource,
                              key=lambda i: path.splitext(path.basename(i))[0])
        else:
            resource = [resource]

        tmp_resource = []

        m_start = m_time_range[0]
        m_end = m_time_range[1]

        for re in resource:
            s, e = get_timerange(re)
            tmpSt = dt.strptime(s, '%Y%m%d')
            tmpEn = dt.strptime(e, '%Y%m%d')
            if ((tmpSt <= m_end) and (tmpEn >= m_start)):
                tmp_resource.append(re)
                LOGGER.debug('Selected file: %s ' % (re))
        resource = tmp_resource

        start_time = time.time()  # mesure data preperation ...
        # TODO: Check the callendars ! for model vs reanalyses.
        # TODO: Check the units! model vs reanalyses.
        try:
            m_total = []
            modvar = get_variable(resource)
            # resource properties
            ds = Dataset(resource[0])
            m_var = ds.variables[modvar]
            dims = list(m_var.dimensions)
            dimlen = len(dims)

            try:
                model_id = ds.getncattr('model_id')
            except AttributeError:
                model_id = 'Unknown model'

            LOGGER.debug('MODEL: %s ' % (model_id))

            lev_units = 'hPa'

            if (dimlen > 3):
                lev = ds.variables[dims[1]]
                # actually index [1] need to be detected... assuming zg(time, plev, lat, lon)
                lev_units = lev.units

                if (lev_units == 'Pa'):
                    m_level = str(int(level) * 100)
                else:
                    m_level = level
            else:
                m_level = None

            if level == None:
                level_range = None
            else:
                level_range = [int(m_level), int(m_level)]

            for z in resource:
                tmp_n = 'tmp_%s' % (uuid.uuid1())
                # select level and regrid
                b0 = call(
                    resource=z,
                    variable=modvar,
                    level_range=level_range,
                    spatial_wrapping='wrap',  #cdover='system',
                    regrid_destination=nc_reanalyses[0],
                    regrid_options='bil',
                    prefix=tmp_n)
                # select domain
                b01 = call(resource=b0,
                           geom=bbox,
                           spatial_wrapping='wrap',
                           prefix='levregr_' + path.basename(z)[0:-3])
                tbr = 'rm -f %s' % (b0)
                #system(tbr)
                tbr = 'rm -f %s' % (tmp_n)
                #system(tbr)
                # get full resource
                m_total.append(b01)
            ds.close()
            model_subset = call(m_total, time_range=m_time_range)
            for i in m_total:
                tbr = 'rm -f %s' % (i)
                #system(tbr)

            if m_level is not None:
                # Create new variable in model set
                ds = Dataset(model_subset, mode='a')
                mod_var = ds.variables.pop(modvar)
                dims = mod_var.dimensions
                new_modvar = ds.createVariable('z%s' % level,
                                               mod_var.dtype,
                                               dimensions=(dims[0], dims[2],
                                                           dims[3]))
                new_modvar[:, :, :] = squeeze(mod_var[:, 0, :, :])
                # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
                ds.close()
                mod_subset = call(model_subset, variable='z%s' % level)
            else:
                mod_subset = model_subset

#            if direction == 're2mo':
#                try:
#                    response.update_status('Preparing simulation data', 15)
#                    reanalyses_subset = call(resource=nc_subset, time_range=[anaSt, anaEn])
#                except:
#                    msg = 'failed to prepare simulation period'
#                    LOGGER.exception(msg)
#                try:
#                    response.update_status('Preparing target data', 17)
#                    var_target = get_variable(resource)
#                    # var_simulation = get_variable(simulation)

#                    model_subset_tmp = call(resource=resource, variable=var_target,
#                                            time_range=[refSt, refEn],
#                                            t_calendar='standard',
#                                            spatial_wrapping='wrap',
#                                            regrid_destination=nc_reanalyses[0],
#                                            regrid_options='bil')

#                    # model_subset = call(resource=resource, variable=var_target,
#                    #                     time_range=[refSt, refEn],
#                    #                     geom=bbox,
#                    #                     t_calendar='standard',
#                    #                     # conform_units_to=conform_units_to,
#                    #                     spatial_wrapping='wrap',
#                    #                     regrid_destination=reanalyses_subset,
#                    #                     regrid_options='bil') # XXXXXXXXXXXX ADD WRAP rem calendar

#                    model_subset = call(resource=model_subset_tmp,variable=var_target, geom=bbox, spatial_wrapping='wrap', t_calendar='standard')

#                   # ISSUE: the regrided model has white border with null! Check it.
#                   # check t_calendar!
#                except:
#                    msg = 'failed subset archive model'
#                    LOGGER.exception(msg)
#                    raise Exception(msg)
#            else:
#                try:
#                    response.update_status('Preparing target data', 15)
#                    var_target = get_variable(resource)
#                    # var_simulation = get_variable(simulation)
#                    model_subset = call(resource=resource, variable=var_target,
#                                        time_range=[refSt, refEn],
#                                        geom=bbox,
#                                        t_calendar='standard',
#                                        # conform_units_to=conform_units_to,
#                                        # spatial_wrapping='wrap',
#                                        )
#                except:
#                    msg = 'failed subset archive model'
#                    LOGGER.exception(msg)
#                    raise Exception(msg)
#                try:
#                    response.update_status('Preparing simulation data', 17)
#                    reanalyses_subset = call(resource=nc_subset,
#                                             time_range=[anaSt, anaEn],
#                                             regrid_destination=model_subset,
#                                             regrid_options='bil')
#                except:
#                    msg = 'failed to prepare simulation period'
#                    LOGGER.exception(msg)
        except:
            msg = 'failed to subset simulation or reference data'
            LOGGER.exception(msg)
            raise Exception(msg)

# --------------------------------------------
        try:
            if direction == 'mo2re':
                simulation = mod_subset
                archive = nc_subset
                base_id = model
                sim_id = model_id
            elif direction == 're2mo':
                simulation = nc_subset
                archive = mod_subset
                base_id = model_id
                sim_id = model
            else:
                LOGGER.exception('direction not valid: %s ' % direction)
        except:
            msg = 'failed to find comparison direction'
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            if level is not None:
                out_var = 'z%s' % level
            else:
                var_archive = get_variable(archive)
                var_simulation = get_variable(simulation)
                if var_archive != var_simulation:
                    rename_variable(archive,
                                    oldname=var_archive,
                                    newname=var_simulation)
                    out_var = var_simulation
                    LOGGER.info('varname %s in netCDF renamed to %s' %
                                (var_archive, var_simulation))
        except:
            msg = 'failed to rename variable in target files'
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            if seacyc is True:
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(
                    archive, simulation, method=normalize)
            else:
                seasoncyc_base = None
                seasoncyc_sim = None
        except:
            msg = 'failed to prepare seasonal cycle reference files'
            LOGGER.exception(msg)
            raise Exception(msg)

        ip, output = mkstemp(dir='.', suffix='.txt')
        output_file = path.abspath(output)
        files = [path.abspath(archive), path.abspath(simulation), output_file]

        # LOGGER.exception("data preperation took %s seconds.", time.time() - start_time)

        ############################
        # generating the config file
        ############################

        response.update_status('writing config file', 18)
        start_time = time.time()  # measure write config ...

        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                base_id=base_id,
                sim_id=sim_id,
                timewin=timewin,
                varname=var,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                period=[
                    dt.strftime(refSt, '%Y-%m-%d'),
                    dt.strftime(refEn, '%Y-%m-%d')
                ],
                bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3]))
        except:
            msg = 'failed to generate config file'
            LOGGER.exception(msg)
            raise Exception(msg)

        # LOGGER.exception("write_config took %s seconds.", time.time() - start_time)

        #######################
        # CASTf90 call
        #######################
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90

        response.update_status('Start CASTf90 call', 20)
        try:
            # response.update_status('execution of CASTf90', 50)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            # system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            LOGGER.info('analogue.out info:\n %s ' % output)
            LOGGER.exception('analogue.out errors:\n %s ' % error)
            response.update_status('**** CASTf90 suceeded', 90)
        except:
            msg = 'CASTf90 failed'
            LOGGER.exception(msg)
            raise Exception(msg)

        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)

        response.update_status('preparting output', 91)

        # Stopper to keep twitcher results, for debug
        # dummy=dummy

        response.outputs[
            'config'].file = config_file  #config_output_url  # config_file )
        response.outputs['analogs'].file = output_file
        response.outputs['output_netcdf'].file = simulation
        response.outputs['target_netcdf'].file = archive

        ########################
        # generate analog viewer
        ########################

        formated_analogs_file = analogs.reformat_analogs(output_file)
        # response.outputs['formated_analogs'].storage = FileStorage()
        response.outputs['formated_analogs'].file = formated_analogs_file
        LOGGER.info('analogs reformated')
        response.update_status('reformatted analog file', 95)
        viewer_html = analogs.render_viewer(
            # configfile=response.outputs['config'].get_url(),
            configfile=config_file,
            # datafile=response.outputs['formated_analogs'].get_url())
            datafile=formated_analogs_file)
        response.outputs['output'].file = viewer_html
        response.update_status('Successfully generated analogs viewer', 99)
        LOGGER.info('rendered pages: %s ', viewer_html)
        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response
예제 #46
0
def calc_indice_percentile(resources=[],
                           variable=None,
                           prefix=None,
                           indices='TG90p',
                           refperiod=None,
                           groupings='yr',
                           polygons=None,
                           percentile=90,
                           mosaic=False,
                           dir_output=None,
                           dimension_map=None):
    """
    Calculates given indices for suitable files in the appropriate time grouping and polygon.

    :param resource: list of filenames in data reference syntax (DRS) convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: list of indices (default ='TG90p')
    :param prefix: filename prefix
    :param refperiod: reference period tuple = (start,end)
    :param grouping: indices time aggregation (default='yr')
    :param dir_output: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into out_dir.
    """
    from os.path import join, dirname, exists
    from os import remove
    import uuid
    from numpy import ma
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon.subset import clipping
    from flyingpigeon.utils import get_values, get_time

    if type(resources) != list:
        resources = list([resources])
    if type(indices) != list:
        indices = list([indices])

    if type(groupings) != list:
        groupings = list([groupings])

    if type(refperiod) == list:
        refperiod = refperiod[0]

    if refperiod is None:
        start = dt.strptime(refperiod.split('-')[0], '%Y%m%d')
        end = dt.strptime(refperiod.split('-')[1], '%Y%m%d')
        time_range = [start, end]
    else:
        time_range = None

    if dir_output is None:
        if not exists(dir_output):
            makedirs(dir_output)

    ################################################
    # Compute a custom percentile basis using ICCLIM
    ################################################
    from ocgis.contrib import library_icclim as lic
    nc_indices = []
    nc_dic = sort_by_filename(resources)

    for grouping in groupings:
        calc_group = calc_grouping(grouping)
        for key in nc_dic.keys():
            resource = nc_dic[key]
            if variable is None:
                variable = get_variable(resource)
            if polygons is None:
                nc_reference = call(resource=resource,
                                    prefix=str(uuid.uuid4()),
                                    time_range=time_range,
                                    output_format='nc',
                                    dir_output=dir_output)
        else:
            nc_reference = clipping(resource=resource,
                                    prefix=str(uuid.uuid4()),
                                    time_range=time_range,
                                    output_format='nc',
                                    polygons=polygons,
                                    dir_output=dir_output,
                                    mosaic=mosaic)

        arr = get_values(resource=nc_reference)
        dt_arr = get_time(resource=nc_reference)
        arr = ma.masked_array(arr)
        dt_arr = ma.masked_array(dt_arr)
        percentile = percentile
        window_width = 5

        for indice in indices:
            name = indice.replace('_', str(percentile))
            var = indice.split('_')[0]

            operation = None
            if 'T' in var:
                if percentile >= 50:
                    operation = 'Icclim%s90p' % var
                    func = 'icclim_%s90p' % var  # icclim_TG90p
                else:
                    operation = 'Icclim%s10p' % var
                    func = 'icclim_%s10p' % var

                ################################
                # load the appropriate operation
                ################################

                ops = [op for op in dir(lic) if operation in op]
                if len(ops) == 0:
                    raise Exception("operator does not exist %s", operation)

                exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[
                    0]
                calc = [{
                    'func': func,
                    'name': name,
                    'kwds': {
                        'percentile_dict': percentile_dict
                    }
                }]

                if polygons is None:
                    nc_indices.extend(
                        call(resource=resource,
                             prefix=key.replace(variable, name).replace(
                                 '_day_', '_%s_' % grouping),
                             calc=calc,
                             calc_grouping=calc_group,
                             output_format='nc',
                             dir_output=dir_output))
                else:
                    nc_indices.extend(
                        clipping(
                            resource=resource,
                            prefix=key.replace(variable, name).replace(
                                '_day_', '_%s_' % grouping),
                            calc=calc,
                            calc_grouping=calc_group,
                            output_format='nc',
                            dir_output=dir_output,
                            polygons=polygons,
                            mosaic=mosaic,
                        ))
    if len(nc_indices) is 0:
        logger.debug('No indices are calculated')
        return None
    return nc_indices
예제 #47
0
def uncertainty(resouces , variable=None, ylim=None, title=None, dir_out=None): 
  """
  creates a png file containing the appropriate uncertainty plot. 
  
  :param resouces: list of files containing the same variable 
  :param variable: variable to be visualised. If None (default), variable will be detected
  :param title: string to be used as title

  :returns str: path/to/file.png
  """
  logger.debug('Start visualisation uncertainty plot')

  import pandas as pd
  import numpy as np
  import netCDF4
  from os.path import basename
  
  # === prepare invironment
  if type(resouces) == str: 
    resouces = list([resouces])    
  if variable == None:
    variable = utils.get_variable(resouces[0])
  if title == None:
    title = "Field mean of %s " % variable
  if dir_out == None: 
    dir_out = '.'
  
  try:
    fig = plt.figure(figsize=(20,10), dpi=600, facecolor='w', edgecolor='k')
    o1 , output_png = mkstemp(dir=dir_out, suffix='.png')
    variable = utils.get_variable(resouces[0])
    df = pd.DataFrame()
    
    logger.info('variable %s found in resources.' % variable)
    for f in resouces:
      try:
        ds = Dataset(f)
        data = np.squeeze(ds.variables[variable][:])
        if len(data.shape) == 3: 
          meanData = np.mean(data,axis=1)
          ts = np.mean(meanData,axis=1)
        else: 
          ts = data[:]

        times = ds.variables['time']
        jd = netCDF4.num2date(times[:],times.units)
        
        hs = pd.Series(ts, index=jd, name=basename(f))
        hd = hs.to_frame()
        df[basename(f)] = hs#     
        
      except Exception as e: 
        logger.debug('failed to calculate timeseries for%s :  %s ' %(f, e))

    try: 
      rollmean = df.rolling(window=30,center=True).mean()
      logger.info('rolling mean calculated for all input data')
      rmean = rollmean.median(axis=1, skipna=False)#  quantile([0.5], axis=1, numeric_only=False )
      q05 = rollmean.quantile([0.05], axis=1,)# numeric_only=False)
      q33 = rollmean.quantile([0.33], axis=1,)# numeric_only=False)
      q66 = rollmean.quantile([0.66], axis=1, )#numeric_only=False)
      q95 = rollmean.quantile([0.95], axis=1, )#numeric_only=False)
    
      logger.info('quantile calculated for all input data')
    except Exception as e: 
      logger.debug('failed to calculate quantiles %s ' % e)
    
    try:
      plt.fill_between(rollmean.index.values,  np.squeeze(q05.values), np.squeeze( q95.values), alpha=0.5, color='grey')
      plt.fill_between(rollmean.index.values, np.squeeze( q33.values),np.squeeze( q66.values), alpha=0.5, color='grey')
      plt.plot(rollmean.index.values, np.squeeze(rmean.values), c='r', lw=3)
      
      plt.xlim(min(df.index.values), max(df.index.values))
      plt.ylim(ylim)
      plt.title(title, fontsize=20)
      plt.grid()# .grid_line_alpha=0.3
    
      fig.savefig(output_png)
      plt.close()    
      logger.debug('timeseries uncertainty plot done for %s'% variable) 
    except Exception as e: 
      logger.debug('failed to calculate quantiles %s ' % e)

  except Exception as e:
    logger.exception('uncertainty plot failed for %s' % variable)
    raise  
  return output_png 
예제 #48
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp

        response.update_status('execution started at : {}'.format(dt.now()), 5)

        ################################
        # reading in the input arguments
        ################################
        LOGGER.info('read in the arguments')
        # resources = self.getInputValues(identifier='resources')
        season = request.inputs['season'][0].data
        LOGGER.info('season %s', season)

        # bbox = [-80, 20, 50, 70]
        # TODO: Add checking for wrong cordinates and apply default if nesessary
        bbox = []
        bboxStr = request.inputs['BBox'][0].data
        bboxStr = bboxStr.split(',')
        bbox.append(float(bboxStr[0]))
        bbox.append(float(bboxStr[2]))
        bbox.append(float(bboxStr[1]))
        bbox.append(float(bboxStr[3]))
        LOGGER.debug('BBOX for ocgis: {}'.format(bbox))
        LOGGER.debug('BBOX original: {}'.format(bboxStr))

        model_var = request.inputs['reanalyses'][0].data
        model, variable = model_var.split('_')

        period = request.inputs['period'][0].data
        LOGGER.info('period: {}'.format(period))
        anualcycle = request.inputs['anualcycle'][0].data
        kappa = request.inputs['kappa'][0].data
        LOGGER.info('kappa: {}', kappa)

        method = request.inputs['method'][0].data
        LOGGER.info('Calc annual cycle with {}'.format(method))

        sseas = request.inputs['sseas'][0].data
        LOGGER.info('Annual cycle calc with {}'.format(sseas))

        start = dt.strptime(period.split('-')[0], '%Y%m%d')
        end = dt.strptime(period.split('-')[1], '%Y%m%d')
        LOGGER.debug('start: {0}, end: {1}'.format(start, end))

        ###########################
        # set the environment
        ###########################

        response.update_status('fetching data from archive', 10)

        try:
            if model == 'NCEP':
                getlevel = False
                if 'z' in variable:
                    level = variable.strip('z')
                    conform_units_to = None
                else:
                    level = None
                    conform_units_to = 'hPa'
            elif '20CRV2' in model:
                getlevel = False
                if 'z' in variable:
                    level = variable.strip('z')
                    conform_units_to = None
                else:
                    level = None
                    conform_units_to = 'hPa'
            else:
                LOGGER.exception('Reanalyses dataset not known')
            LOGGER.info('environment set for model: {}'.format(model))
        except Exception as ex:
            msg = 'failed to set environment: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)

        ##########################################
        # fetch Data from original data archive
        ##########################################

        from flyingpigeon.datafetch import reanalyses as rl
        from flyingpigeon.utils import get_variable
        # from os.path import basename, splitext
        from os import system
        from netCDF4 import Dataset
        from numpy import squeeze

        try:
            model_nc = rl(start=start.year,
                          end=end.year,
                          dataset=model,
                          variable=variable,
                          getlevel=getlevel)
            LOGGER.info('reanalyses data fetched')
        except Exception as ex:
            msg = 'failed to get reanalyses data: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('fetching data done', 15)
        ############################################################
        # get the required bbox and time region from resource data
        ############################################################

        response.update_status('subsetting region of interest', 17)
        # from flyingpigeon.weatherregimes import get_level
        # from flyingpigeon.ocgis_module import call

        time_range = [start, end]

        ############################################################
        # Block of level and domain selection for geop huge dataset
        ############################################################

        LevMulti = False

        # ===========================================================================================
        if 'z' in variable:
            tmp_total = []
            origvar = get_variable(model_nc)

            if LevMulti == False:
                for z in model_nc:
                    b0 = call(resource=z,
                              variable=origvar,
                              level_range=[int(level), int(level)],
                              geom=bbox,
                              spatial_wrapping='wrap',
                              prefix='levdom_' + basename(z)[0:-3])
                    tmp_total.append(b0)
            else:
                # multiproc - no inprovements yet, need to check in hi perf machine...
                # -----------------------
                try:
                    import ctypes
                    import os
                    # TODO: This lib is for linux
                    mkl_rt = ctypes.CDLL('libmkl_rt.so')
                    nth = mkl_rt.mkl_get_max_threads()
                    LOGGER.debug('Current number of threads: {}'.format(nth))
                    mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
                    nth = mkl_rt.mkl_get_max_threads()
                    LOGGER.debug('NEW number of threads: {}'.format(nth))
                    # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
                    os.environ['MKL_NUM_THREADS'] = str(nth)
                    os.environ['OMP_NUM_THREADS'] = str(nth)
                except Exception as ex:
                    msg = 'Failed to set THREADS: {}'.format(ex)
                    LOGGER.debug(msg)
                # -----------------------

                from multiprocessing import Pool
                pool = Pool()
                # from multiprocessing.dummy import Pool as ThreadPool
                # pool = ThreadPool()
                tup_var = [origvar] * len(model_nc)
                tup_lev = [level] * len(model_nc)
                tup_bbox = [bbox] * len(model_nc)
                tup_args = zip(model_nc, tup_var, tup_lev, tup_bbox)

                tmp_total = pool.map(ocgis_call_wrap, tup_args)
                pool.close()
                pool.join()

            LOGGER.debug('Temporal subset files: {}'.format(tmp_total))

            tmp_total = sorted(tmp_total,
                               key=lambda i: splitext(basename(i))[0])
            inter_subset_tmp = call(resource=tmp_total,
                                    variable=origvar,
                                    time_range=time_range)

            # FIXME: System calls to rm are dangerous! Use os.rmdir instead!
            # Clean
            for i in tmp_total:
                tbr = 'rm -f {}'.format(i)
                system(tbr)

            # Create new variable
            ds = Dataset(inter_subset_tmp, mode='a')
            z_var = ds.variables.pop(origvar)
            dims = z_var.dimensions
            new_var = ds.createVariable('z{}'.format(level),
                                        z_var.dtype,
                                        dimensions=(dims[0], dims[2], dims[3]))
            new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
            # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
            ds.close()
            model_subset = call(inter_subset_tmp, variable='z{}'.format(level))
        else:
            model_subset = call(
                resource=model_nc,
                variable=variable,
                geom=bbox,
                spatial_wrapping='wrap',
                time_range=time_range,
                # conform_units_to=conform_units_to
            )
        # =============================================================================================
        LOGGER.info('Dataset subset done: {}'.format(model_subset))

        response.update_status('dataset subsetted', 18)
        ##############################################
        # computing anomalies
        ##############################################
        response.update_status('computing anomalies ', 19)

        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[1]
        reference = [
            dt.strptime(cycst, '%Y%m%d'),
            dt.strptime(cycen, '%Y%m%d')
        ]
        LOGGER.info('reference time: {}'.format(reference))

        model_anomal = wr.get_anomalies(model_subset,
                                        reference=reference,
                                        method=method,
                                        sseas=sseas)  # , variable=variable)

        #####################
        # extracting season
        #####################
        response.update_status('normalizing data', 21)
        model_season = wr.get_season(model_anomal, season=season)

        response.update_status('anomalies computed and  normalized', 24)
        #######################
        # call the R scripts
        #######################
        response.update_status('Start weather regime clustering ', 25)
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, join

        try:
            rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'weatherregimes_model.R'

            infile = model_season  # model_subset #model_ponderate
            modelname = model
            yr1 = start.year
            yr2 = end.year
            ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf')
            ip, file_pca = mkstemp(dir=curdir, suffix='.txt')
            ip, file_class = mkstemp(dir=curdir, suffix='.Rdat')

            # TODO: Rewrite this using os.path.join or pathlib libraries
            args = [
                'Rscript',
                join(Rsrc, Rfile),
                '%s/' % curdir,
                '%s/' % Rsrc,
                '%s' % infile,
                '%s' % variable,
                '%s' % output_graphics,
                '%s' % file_pca,
                '%s' % file_class,
                '%s' % season,
                '%s' % start.year,
                '%s' % end.year,
                '%s' % model_var,
                '%s' % kappa
            ]
            LOGGER.info('Rcall builded')
            LOGGER.debug('ARGS: %s' % (args))
        except Exception as ex:
            msg = 'failed to build the R command: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            LOGGER.info('R outlog info:\n {}'.format(output))
            LOGGER.exception('R outlog errors:\n {}'.format(error))
            if len(output) > 0:
                response.update_status('**** weatherregime in R suceeded', 90)
            else:
                LOGGER.exception('No output returned from R call')
        except Exception as ex:
            msg = 'failed to run the R weatherregime: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('Weather regime clustering done ', 93)
        ############################################
        # set the outputs
        ############################################
        response.update_status('Set the process outputs ', 95)

        response.outputs['Routput_graphic'].file = output_graphics
        response.outputs['output_pca'].file = file_pca
        response.outputs['output_classification'].file = file_class
        response.outputs['output_netcdf'].file = model_subset
        response.update_status('done', 100)
        return response
예제 #49
0
    def execute(self):
        import time  # performance test
        process_start_time = time.time()  # measure process execution time ...

        from os import path
        from tempfile import mkstemp
        from flyingpigeon import analogs
        from datetime import datetime as dt

        from flyingpigeon.ocgis_module import call
        from flyingpigeon.datafetch import reanalyses
        from flyingpigeon.utils import get_variable, rename_variable

        self.status.set('execution started at : %s ' % dt.now(), 5)

        start_time = time.time()  # measure init ...

        resource = self.getInputValues(identifier='resource')
        bbox_obj = self.BBox.getValue()
        refSt = self.getInputValues(identifier='refSt')
        refEn = self.getInputValues(identifier='refEn')
        dateSt = self.getInputValues(identifier='dateSt')
        dateEn = self.getInputValues(identifier='dateEn')
        normalize = self.getInputValues(identifier='normalize')[0]
        distance = self.getInputValues(identifier='dist')[0]
        outformat = self.getInputValues(identifier='outformat')[0]
        timewin = int(self.getInputValues(identifier='timewin')[0])
        experiment = self.getInputValues(identifier='experiment')[0]
        dataset, var = experiment.split('_')

        refSt = dt.strptime(refSt[0], '%Y-%m-%d')
        refEn = dt.strptime(refEn[0], '%Y-%m-%d')
        dateSt = dt.strptime(dateSt[0], '%Y-%m-%d')
        dateEn = dt.strptime(dateEn[0], '%Y-%m-%d')

        if normalize == 'None':
            seacyc = False
        else:
            seacyc = True

        if outformat == 'ascii':
            outformat = '.txt'
        elif outformat == 'netCDF':
            outformat = '.nc'
        else:
            logger.error('output format not valid')

        if bbox_obj is not None:
            logger.info("bbox_obj={0}".format(bbox_obj.coords))
            bbox = [
                bbox_obj.coords[0][0], bbox_obj.coords[0][1],
                bbox_obj.coords[1][0], bbox_obj.coords[1][1]
            ]
            logger.info("bbox={0}".format(bbox))
        else:
            bbox = None

        #start = min( refSt, dateSt )
        #end = max( refEn, dateEn )
        # region = self.getInputValues(identifier='region')[0]
        # bbox = [float(b) for b in region.split(',')]

        try:
            if dataset == 'NCEP':
                if 'z' in var:
                    variable = 'hgt'
                    level = var.strip('z')
                    #conform_units_to=None
                else:
                    variable = 'slp'
                    level = None
                    #conform_units_to='hPa'
            elif '20CRV2' in var:
                if 'z' in level:
                    variable = 'hgt'
                    level = var.strip('z')
                    #conform_units_to=None
                else:
                    variable = 'prmsl'
                    level = None
                    #conform_units_to='hPa'
            else:
                logger.error('Reanalyses dataset not known')
            logger.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            logger.error(msg)
            raise Exception(msg)

        logger.debug("init took %s seconds.", time.time() - start_time)
        self.status.set('Read in the arguments', 5)
        #################
        # get input data
        #################

        start_time = time.time()  # measure get_input_data ...
        self.status.set('fetching input data', 7)
        try:
            input = reanalyses(start=dateSt.year,
                               end=dateEn.year,
                               variable=var,
                               dataset=dataset)
            nc_subset = call(resource=input, variable=var, geom=bbox)
        except Exception as e:
            msg = 'failed to fetch or subset input files %s' % e
            logger.error(msg)
            raise Exception(msg)
        logger.debug("get_input_subset_dataset took %s seconds.",
                     time.time() - start_time)
        self.status.set('**** Input data fetched', 10)

        ########################
        # input data preperation
        ########################
        self.status.set('Start preparing input data', 12)
        start_time = time.time()  # mesure data preperation ...
        try:
            self.status.set('Preparing simulation data', 15)
            simulation = call(resource=nc_subset, time_range=[dateSt, dateEn])
        except:
            msg = 'failed to prepare simulation period'
            logger.debug(msg)

        try:
            self.status.set('Preparing target data', 17)
            var_target = get_variable(resource)
            #var_simulation = get_variable(simulation)
            archive = call(
                resource=resource,
                variable=var_target,
                time_range=[refSt, refEn],
                geom=bbox,
                t_calendar=
                'standard',  # conform_units_to=conform_units_to,  spatial_wrapping='wrap',
                regrid_destination=simulation,
                regrid_options='bil')
        except Exception as e:
            msg = 'failed subset archive dataset %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        try:
            if var != var_target:
                rename_variable(archive, oldname=var_target, newname=var)
                logger.info('varname %s in netCDF renamed to %s' %
                            (var_target, var))
        except Exception as e:
            msg = 'failed to rename variable in target files %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        try:
            if seacyc == True:
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(
                    archive, simulation, method=normalize)
            else:
                seasoncyc_base, seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to prepare seasonal cycle reference files %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        ip, output = mkstemp(dir='.', suffix='.txt')
        output_file = path.abspath(output)
        files = [path.abspath(archive), path.abspath(simulation), output_file]

        logger.debug("data preperation took %s seconds.",
                     time.time() - start_time)

        ############################
        # generating the config file
        ############################

        self.status.set('writing config file', 15)
        start_time = time.time()  # measure write config ...

        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                timewin=timewin,
                varname=var,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                period=[
                    dt.strftime(refSt, '%Y-%m-%d'),
                    dt.strftime(refEn, '%Y-%m-%d')
                ],
                bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3]))
        except Exception as e:
            msg = 'failed to generate config file %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        logger.debug("write_config took %s seconds.", time.time() - start_time)

        #######################
        # CASTf90 call
        #######################
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90

        self.status.set('Start CASTf90 call', 20)
        try:
            #self.status.set('execution of CASTf90', 50)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            #system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            logger.info('analogue.out info:\n %s ' % output)
            logger.debug('analogue.out errors:\n %s ' % error)
            self.status.set('**** CASTf90 suceeded', 90)
        except Exception as e:
            msg = 'CASTf90 failed %s ' % e
            logger.error(msg)
            raise Exception(msg)

        logger.debug("castf90 took %s seconds.", time.time() - start_time)

        self.status.set('preparting output', 99)
        self.config.setValue(config_file)
        self.analogs.setValue(output_file)
        self.simulation_netcdf.setValue(simulation)
        self.target_netcdf.setValue(archive)

        self.status.set('execution ended', 100)

        logger.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
예제 #50
0
def calc_indice_simple(resource=[], variable=None, prefix=None, indice='SU',
                       polygons=None, mosaic=False, grouping='yr', dir_output=None,
                       dimension_map=None, memory_limit=None):
    """
    Calculates given simple indices for suitable files in the appropriate time grouping and polygon.

    :param resource: list of filenames in data reference syntax (DRS) convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: Indice (default ='SU')
    :param polygons: list of polgons (default ='FRA')
    :param grouping: indices time aggregation (default='yr')
    :param out_dir: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into out_dir.
    """
    from os.path import join, dirname, exists
    from flyingpigeon import ocgis_module
    from flyingpigeon.subset import clipping
    import uuid

    if type(resource) != list:
        resource = list([resource])
    # if type(indices) != list:
    #     indices = list([indices])
    if type(polygons) != list and polygons is None:
        polygons = list([polygons])
    # if type(groupings) != list:
    #     groupings = list([groupings])

    if dir_output is not None:
        if not exists(dir_output):
            makedirs(dir_output)

    datasets = sort_by_filename(resource).keys()

    if len(datasets) is 1:
        key = datasets[0]
    else:
        LOGGER.warning('more than one dataset in resource')

    # from flyingpigeon.subset import select_ugid
    #    tile_dim = 25
    output = None

    # experiments = sort_by_filename(resource)
    outputs = []

    # for key in experiments:

    if variable is None:
        variable = get_variable(resource)
        LOGGER.debug('Variable detected % s ' % variable)

    # variable = key.split('_')[0]
    try:
        # icclim can't handling 'kg m2 sec' needs to be 'mm/day'
        if variable == 'pr':
            calc = 'pr=pr*86400'
            ncs = ocgis_module.call(resource=resource,
                                    variable=variable,
                                    dimension_map=dimension_map,
                                    calc=calc,
                                    memory_limit=memory_limit,
                                    # calc_grouping= calc_group,
                                    prefix=str(uuid.uuid4()),
                                    dir_output=dir_output,
                                    output_format='nc')
        else:
            ncs = resource

        try:
            calc = [{'func': 'icclim_' + indice, 'name': indice}]
            LOGGER.info('calc: %s' % calc)
            try:
                calc_group = calc_grouping(grouping)
                LOGGER.info('calc_group: %s' % calc_group)
                if polygons is None:
                    try:
                        prefix = key.replace(variable, indice).replace('_day_', '_%s_' % grouping)
                        LOGGER.debug(' **** dir_output = %s ' % dir_output)
                        tmp = ocgis_module.call(resource=ncs,
                                                variable=variable,
                                                dimension_map=dimension_map,
                                                calc=calc,
                                                calc_grouping=calc_group,
                                                prefix=prefix,
                                                dir_output=dir_output,
                                                output_format='nc')
                        if len(tmp) is not 0:
                            outputs.extend(tmp)
                        else:
                            msg = 'could not calc indice %s for domain ' % (indice)
                            LOGGER.exception(msg)
                    except:
                        msg = 'could not calc indice %s for domain in %s' % (indice)
                        LOGGER.exception(msg)
                else:
                    try:
                        prefix = key.replace(variable, indice).replace('_day_', '_%s_' % grouping)
                        tmp = clipping(resource=ncs,
                                       variable=variable,
                                       dimension_map=dimension_map,
                                       calc=calc,
                                       calc_grouping=calc_group,
                                       prefix=prefix,
                                       polygons=polygons,
                                       mosaic=mosaic,
                                       dir_output=dir_output,
                                       output_format='nc')
                        if len(tmp) is not 0:
                            outputs.extend(tmp)
                        else:
                            msg = 'could not calc clipped indice %s ' % (indice)
                            LOGGER.exception(msg)
                    except:
                        msg = 'could not calc indice %s for domai' % (indice)
                        LOGGER.debug(msg)
                        # raise Exception(msg)
                    LOGGER.info('indice file calculated: %s' % tmp)
            except:
                msg = 'could not calc indice %s for key %s and grouping %s' % (indice, grouping)
                LOGGER.exception(msg)
                # raise Exception(msg)
        except:
            msg = 'could not calc indice %s ' % (indice)
            LOGGER.exception(msg)
            # raise Exception(msg)
    except:
        msg = 'could not calculate indices'
        LOGGER.exception(msg)
        # raise Exception(msg)
    LOGGER.info('indice outputs %s ' % outputs)

    if len(outputs) is 0:
        LOGGER.debug('No indices are calculated')
        return None
    return outputs
예제 #51
0
def get_anomalies(nc_file, frac=0.2, reference=None):
    """
    Anomalisation of data subsets for weather classification by subtracting a smoothed annual cycle

    :param nc_file: input netCDF file
    :param frac: Number between 0-1 for strength of smoothing
               (0 = close to the original data, 1 = flat line)
               default = 0.2
    :param reference: Period to calculate annual cycle

    :returns str: path to output netCDF file
    """
    try:
        variable = utils.get_variable(nc_file)
        calc = [{'func': 'mean', 'name': variable}]
        calc_grouping = calc_grouping = ['day', 'month']
        nc_anual_cycle = call(nc_file,
                              calc=calc,
                              calc_grouping=calc_grouping,
                              time_range=reference)
        logger.info('annual cycle calculated')
    except Exception as e:
        msg = 'failed to calcualte annual cycle %s' % e
        logger.error(msg)
        raise Exception(msg)

    try:
        # spline for smoothing
        import statsmodels.api as sm
        from numpy import tile, empty, linspace
        from netCDF4 import Dataset
        from cdo import Cdo
        cdo = Cdo()
        # variable = utils.get_variable(nc_file)
        ds = Dataset(nc_anual_cycle, mode='a')
        vals = ds.variables[variable]
        vals_sm = empty(vals.shape)
        ts = vals.shape[0]
        x = linspace(1, ts * 3, num=ts * 3, endpoint=True)
        for lat in range(vals.shape[1]):
            for lon in range(vals.shape[2]):
                try:
                    y = tile(vals[:, lat, lon], 3)
                    # ys = smooth(y, window_size=91, order=2, deriv=0, rate=1)[ts:ts*2]
                    ys = sm.nonparametric.lowess(y, x, frac=frac)[ts:ts * 2, 1]
                    vals_sm[:, lat, lon] = ys
                except:
                    msg = 'failed for lat %s lon %s' % (lat, lon)
                    logger.exception(msg)
                    raise Exception(msg)
            logger.debug('done for %s - %s ' % (lat, lon))
        vals[:, :, :] = vals_sm[:, :, :]
        ds.close()
        logger.info('smothing of annual cycle done')
    except:
        msg = 'failed smothing of annual cycle'
        logger.exception(msg)
        raise Exception(msg)
    try:
        ip, nc_anomal = mkstemp(dir='.', suffix='.nc')
        nc_anomal = cdo.sub(input=[nc_file, nc_anual_cycle], output=nc_anomal)
        logger.info('cdo.sub; anomalisation done: %s ' % nc_anomal)
    except:
        msg = 'failed substraction of annual cycle'
        logger.exception(msg)
        raise Exception(msg)
    return nc_anomal
예제 #52
0
def method_A(resource=[],
             start=None,
             end=None,
             timeslice=20,
             variable=None,
             title=None,
             cmap='seismic'):
    """returns the result

    :param resource: list of paths to netCDF files
    :param start: beginning of reference period (if None (default),
                  the first year of the consistent ensemble will be detected)
    :param end: end of comparison period (if None (default), the last year of the consistent ensemble will be detected)
    :param timeslice: period length for mean calculation of reference and comparison period
    :param variable: variable name to be detected in the netCDF file. If not set (not recommended),
                     the variable name will be detected
    :param title: str to be used as title for the signal mal
    :param cmap: define the color scheme for signal map plotting

    :return: signal.nc, low_agreement_mask.nc, high_agreement_mask.nc, graphic.png, text.txt
    """
    from os.path import split
    from cdo import Cdo
    cdo = Cdo()
    cdo.forceOutput = True

    try:
        # preparing the resource
        file_dic = sort_by_filename(resource, historical_concatination=True)
        logger.info('file names sorted experimets: %s' % len(file_dic.keys()))
    except Exception as e:
        msg = 'failed to sort the input files'
        logger.exception(msg)
        raise Exception(msg)

    try:
        mergefiles = []
        for key in file_dic.keys():

            if type(file_dic[key]) == list and len(file_dic[key]) > 1:
                input = []
                for i in file_dic[key]:
                    input.extend([i.replace(' ', '\\\ ')])
                    mergefiles.append(
                        cdo.mergetime(input=input,
                                      output=key + '_mergetime.nc'))
            else:
                mergefiles.extend(file_dic[key])
        logger.info('datasets merged %s ' % mergefiles)
    except Exception as e:
        msg = 'seltime and mergetime failed %s' % e
        logger.exception(msg)
        raise Exception(e)

    try:
        text_src = open('infiles.txt', 'a')
        for key in file_dic.keys():
            text_src.write(key + '\n')
        text_src.close()
    except Exception as e:
        msg = 'failed to write source textfile'
        logger.exception(msg)
        raise Exception(msg)

# configure reference and compare period
    try:
        if start is None:
            st_set = set()
            en_set = set()
            for f in mergefiles:
                times = get_time(f)
                st_set.update([times[0].year])
        if end is None:
            en_set.update([times[-1].year])
            start = max(st_set)
        if end is None:
            end = min(en_set)
        logger.info('Start and End: %s - %s ' % (start, end))
        if start >= end:
            logger.error(
                'ensemble is inconsistent!!! start year is later than end year'
            )
    except Exception as e:
        msg = 'failed to detect start and end times of the ensemble'
        logger.exception(msg)
        raise Exception(msg)

# set the periodes:
    try:
        start = int(start)
        end = int(end)
        if timeslice is None:
            timeslice = int((end - start) / 3)
            if timeslice == 0:
                timeslice = 1
        else:
            timeslice = int(timeslice)
        start1 = start
        start2 = start1 + timeslice - 1
        end1 = end - timeslice + 1
        end2 = end
        logger.info('timeslice and periodes set')
    except Exception as e:
        msg = 'failed to set the periodes'
        logger.exception(msg)
        raise Exception(msg)

    try:
        files = []
        for i, mf in enumerate(mergefiles):
            files.append(
                cdo.selyear('{0}/{1}'.format(start1, end2),
                            input=[mf.replace(' ', '\ ')],
                            output='file_{0}_.nc'.format(i)))  # python version
        logger.info('timeseries selected from defined start to end year')
    except Exception as e:
        msg = 'seltime and mergetime failed'
        logger.exception(msg)
        raise Exception(msg)

    try:
        # ensemble mean
        nc_ensmean = cdo.ensmean(input=files, output='nc_ensmean.nc')
        logger.info('ensemble mean calculation done')
    except Exception as e:
        msg = 'ensemble mean failed'
        logger.exception(msg)
        raise Exception(msg)

    try:
        # ensemble std
        nc_ensstd = cdo.ensstd(input=files, output='nc_ensstd.nc')
        logger.info('ensemble std and calculation done')
    except Exception as e:
        msg = 'ensemble std or failed'
        logger.exception(msg)
        raise Exception(msg)

#  get the get the signal as difference from the beginning (first years) and end period (last years), :
    try:
        selyearstart = cdo.selyear('%s/%s' % (start1, start2),
                                   input=nc_ensmean,
                                   output='selyearstart.nc')
        selyearend = cdo.selyear('%s/%s' % (end1, end2),
                                 input=nc_ensmean,
                                 output='selyearend.nc')
        meanyearst = cdo.timmean(input=selyearstart, output='meanyearst.nc')
        meanyearend = cdo.timmean(input=selyearend, output='meanyearend.nc')
        signal = cdo.sub(input=[meanyearend, meanyearst], output='signal.nc')
        logger.info('Signal calculation done')
    except Exception as e:
        msg = 'calculation of signal failed'
        logger.exception(msg)
        raise Exception(msg)

    # get the intermodel standard deviation (mean over whole period)
    try:
        # std_selyear = cdo.selyear('%s/%s' % (end1,end2), input=nc_ensstd, output='std_selyear.nc')
        # std = cdo.timmean(input = std_selyear, output = 'std.nc')

        std = cdo.timmean(input=nc_ensstd, output='std.nc')
        std2 = cdo.mulc('2', input=std, output='std2.nc')
        logger.info('calculation of internal model std for time period done')
    except Exception as e:
        msg = 'calculation of internal model std failed'
        logger.exception(msg)
        raise Exception(msg)
    try:
        absolut = cdo.abs(input=signal, output='absolut_signal.nc')
        high_agreement_mask = cdo.gt(
            input=[absolut, std2],
            output='large_change_with_high_model_agreement.nc')
        low_agreement_mask = cdo.lt(
            input=[absolut, std],
            output='small_signal_or_low_agreement_of_models.nc')
        logger.info('high and low mask done')
    except Exception as e:
        msg = 'calculation of robustness mask failed'
        logger.exception(msg)
        raise Exception(msg)

    try:
        if variable is None:
            variable = get_variable(signal)
        logger.info('variable to be plotted: %s' % variable)

        if title is None:
            title = 'Change of %s (difference of mean %s-%s to %s-%s)' % (
                variable, end1, end2, start1, start2)
        graphic = None
        graphic = map_ensembleRobustness(signal,
                                         high_agreement_mask,
                                         low_agreement_mask,
                                         variable=variable,
                                         cmap=cmap,
                                         title=title)

        logger.info('graphic generated')
    except Exception as e:
        msg('graphic generation failed: %s' % e)
        logger.debug(msg)
        raise Exception(msg)

    return signal, low_agreement_mask, high_agreement_mask, graphic, text_src  #
예제 #53
0
def clipping(resource=[],
             variable=None,
             dimension_map=None,
             calc=None,
             output_format='nc',
             calc_grouping=None,
             time_range=None,
             time_region=None,
             historical_concatination=True,
             prefix=None,
             spatial_wrapping='wrap',
             polygons=None,
             mosaic=False,
             dir_output=None,
             memory_limit=None):
    """ returns list of clipped netCDF files

    :param resource: list of input netCDF files
    :param variable: variable (string) to be used in netCDF
    :param dimesion_map: specify a dimension map if input netCDF has unconventional dimension
    :param calc: ocgis calculation argument
    :param calc_grouping: ocgis calculation grouping
    :param historical_concatination: concat files of RCPs with appropriate historical runs into one timeseries
    :param prefix: prefix for output file name
    :param polygons: list of polygons to be used. If more than 1 in the list, an appropriate mosaic will be clipped
    :param mosaic: Whether the polygons are aggregated into a single geometry (True) or individual files are created for each geometry (False).
    :param output_format: output_format (default='nc')
    :param dir_output: specify an output location
    :param time_range: [start, end] of time subset
    :param time_region: year, months or days to be extracted in the timeseries

    :returns list: path to clipped files
    """

    if type(resource) != list:
        resource = list([resource])
    if type(polygons) != list:
        polygons = list([polygons])
    if prefix is not None:
        if type(prefix) != list:
            prefix = list([prefix])

    geoms = set()
    ncs = sort_by_filename(resource,
                           historical_concatination=historical_concatination
                           )  # historical_concatenation=True
    geom_files = []
    if mosaic is True:
        try:
            nameadd = '_'
            for polygon in polygons:
                geoms.add(get_geom(polygon))
                nameadd = nameadd + polygon.replace(' ', '')
            if len(geoms) > 1:
                LOGGER.error(
                    'polygons belong to different shapefiles! mosaic option is not possible %s',
                    geoms)
            else:
                geom = geoms.pop()
            ugids = get_ugid(polygons=polygons, geom=geom)
        except:
            LOGGER.exception('geom identification failed')
        for i, key in enumerate(ncs.keys()):
            try:
                # if variable is None:
                variable = get_variable(ncs[key])
                LOGGER.info('variable %s detected in resource' % (variable))
                if prefix is None:
                    name = key + nameadd
                else:
                    name = prefix[i]
                geom_file = call(resource=ncs[key],
                                 variable=variable,
                                 calc=calc,
                                 calc_grouping=calc_grouping,
                                 output_format=output_format,
                                 prefix=name,
                                 geom=geom,
                                 select_ugid=ugids,
                                 time_range=time_range,
                                 time_region=time_region,
                                 spatial_wrapping=spatial_wrapping,
                                 memory_limit=memory_limit,
                                 dir_output=dir_output,
                                 dimension_map=dimension_map)
                geom_files.append(geom_file)
                LOGGER.info('ocgis mosaik clipping done for %s ' % (key))
            except:
                msg = 'ocgis mosaik clipping failed for %s ' % (key)
                LOGGER.exception(msg)
    else:
        for i, polygon in enumerate(polygons):
            try:
                geom = get_geom(polygon)
                ugid = get_ugid(polygons=polygon, geom=geom)
                for key in ncs.keys():
                    try:
                        # if variable is None:
                        variable = get_variable(ncs[key])
                        LOGGER.info('variable %s detected in resource' %
                                    (variable))
                        if prefix is None:
                            name = key + '_' + polygon.replace(' ', '')
                        else:
                            name = prefix[i]
                        geom_file = call(
                            resource=ncs[key],
                            variable=variable,
                            calc=calc,
                            calc_grouping=calc_grouping,
                            output_format=output_format,
                            prefix=name,
                            geom=geom,
                            select_ugid=ugid,
                            dir_output=dir_output,
                            dimension_map=dimension_map,
                            spatial_wrapping=spatial_wrapping,
                            memory_limit=memory_limit,
                            time_range=time_range,
                            time_region=time_region,
                        )
                        geom_files.append(geom_file)
                        LOGGER.info('ocgis clipping done for %s ' % (key))
                    except:
                        msg = 'ocgis clipping failed for %s ' % (key)
                        LOGGER.exception(msg)
            except:
                LOGGER.exception('geom identification failed')
    return geom_files
예제 #54
0
    def execute(self):
        init_process_logger('log.txt')
        self.output_log.setValue('log.txt')

        import time  # performance test
        process_start_time = time.time()  # measure process execution time ...

        from os import path
        from tempfile import mkstemp
        from datetime import datetime as dt

        from flyingpigeon import analogs
        from flyingpigeon.ocgis_module import call
        from flyingpigeon.datafetch import reanalyses
        from flyingpigeon.utils import get_variable

        self.status.set('execution started at : %s ' % dt.now(), 5)
        start_time = time.time()  # measure init ...

        #######################
        # read input parameters
        #######################
        try:
            self.status.set('read input parameter : %s ' % dt.now(), 5)
            resource = self.getInputValues(identifier='resource')
            refSt = self.getInputValues(identifier='refSt')
            refEn = self.getInputValues(identifier='refEn')
            dateSt = self.getInputValues(identifier='dateSt')
            dateEn = self.getInputValues(identifier='dateEn')
            normalize = self.getInputValues(identifier='normalize')[0]
            distance = self.getInputValues(identifier='dist')[0]
            outformat = self.getInputValues(identifier='outformat')[0]
            timewin = int(self.getInputValues(identifier='timewin')[0])
            bbox_obj = self.BBox.getValue()
            seasonwin = int(self.getInputValues(identifier='seasonwin')[0])
            nanalog = int(self.getInputValues(identifier='nanalog')[0])

            # region = self.getInputValues(identifier='region')[0]
            # bbox = [float(b) for b in region.split(',')]
            # experiment = self.getInputValues(identifier='experiment')[0]
            # dataset , var = experiment.split('_')

            logger.info('input parameters set')
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            logger.error(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:
            refSt = dt.strptime(refSt[0], '%Y-%m-%d')
            refEn = dt.strptime(refEn[0], '%Y-%m-%d')
            dateSt = dt.strptime(dateSt[0], '%Y-%m-%d')
            dateEn = dt.strptime(dateEn[0], '%Y-%m-%d')

            if normalize == 'None':
                seacyc = False
            else:
                seacyc = True

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                logger.error('output format not valid')

            start = min(refSt, dateSt)
            end = max(refEn, dateEn)

            if bbox_obj is not None:
                logger.info("bbox_obj={0}".format(bbox_obj.coords))
                bbox = [bbox_obj.coords[0][0],
                        bbox_obj.coords[0][1],
                        bbox_obj.coords[1][0],
                        bbox_obj.coords[1][1]]
                logger.info("bbox={0}".format(bbox))
            else:
                bbox = None

            logger.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            logger.error(msg)
            raise Exception(msg)

        logger.debug("init took %s seconds.", time.time() - start_time)
        self.status.set('Read in and convert the arguments', 5)

        ########################
        # input data preperation
        ########################

        # TODO: Check if files containing more than one dataset

        self.status.set('Start preparing input data', 12)
        start_time = time.time()  # mesure data preperation ...
        try:
            variable = get_variable(resource)

            archive = call(resource=resource, time_range=[refSt, refEn], geom=bbox, spatial_wrapping='wrap')
            simulation = call(resource=resource, time_range=[dateSt, dateEn], geom=bbox, spatial_wrapping='wrap')
            if seacyc is True:
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize)
            else:
                seasoncyc_base = None
                seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            logger.debug(msg)
            raise Exception(msg)
        ip, output = mkstemp(dir='.', suffix='.txt')
        output_file = path.abspath(output)
        files = [path.abspath(archive), path.abspath(simulation), output_file]

        logger.debug("data preperation took %s seconds.", time.time() - start_time)

        ############################
        # generating the config file
        ############################
        self.status.set('writing config file', 15)
        start_time = time.time()  # measure write config ...

        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                timewin=timewin,
                varname=variable,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')],
                bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3]))
        except Exception as e:
            msg = 'failed to generate config file %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        logger.debug("write_config took %s seconds.", time.time() - start_time)

        ##############
        # CASTf90 call
        ##############
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90
        self.status.set('Start CASTf90 call', 20)
        try:
            # self.status.set('execution of CASTf90', 50)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            # system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
            logger.info('analogue.out info:\n %s ' % output)
            logger.debug('analogue.out errors:\n %s ' % error)
            self.status.set('**** CASTf90 suceeded', 90)
        except Exception as e:
            msg = 'CASTf90 failed %s ' % e
            logger.error(msg)
            raise Exception(msg)

        logger.debug("castf90 took %s seconds.", time.time() - start_time)

        self.status.set('preparting output', 99)
        self.config.setValue(config_file)
        self.analogs.setValue(output_file)
        self.output_netcdf.setValue(simulation)

        self.status.set('execution ended', 100)

        logger.debug("total execution took %s seconds.", time.time() - process_start_time)
예제 #55
0
def get_anomalies(nc_file, frac=0.2, reference=None):
  '''
  anomalisation of data subsets for weather classification. 
  Anomalisation is done by substrcting a smoothed anual cycle 

  :parm nc_file: input netCDF file 
  :param frac: Number between 0-1 for stregth of smoothing 
               (0 = close to the original data, 1=flat line)
               default=0.2
  :param reference: Period to calulate anual cycle 
  :return string: path to output netCDF file
  '''
  try: 
    variable = utils.get_variable(nc_file)
    calc = [{'func': 'mean', 'name': variable}]
    calc_grouping = calc_grouping = ['day','year']
    nc_anual_cycle = call(nc_file, calc=calc, calc_grouping=calc_grouping, time_range=reference)
    logger.info('anual cycle calculated')  
  except Exception as e:
    msg = 'failed to calcualte anual cycle %s' % e
    logger.error(msg)
    raise Exception(msg)

  ### spline for smoothing
  import statsmodels.api as sm
  from numpy import tile, empty, linspace
  from netCDF4 import Dataset
  from cdo import Cdo
  cdo = Cdo()
  
  try:
    # variable = utils.get_variable(nc_file)
    ds = Dataset(nc_anual_cycle, mode='a')
    vals = ds.variables[variable]
    vals_sm = empty(vals.shape)
    ts = vals.shape[0]
    x = linspace(1, ts*3 , num=ts*3 , endpoint=True)

    for lat in range(vals.shape[1]):
      for lon in range(vals.shape[2]):
        try:
          y = tile(vals[:,lat,lon], 3)
          # ys = smooth(y, window_size=91, order=2, deriv=0, rate=1)[ts:ts*2]
          ys = sm.nonparametric.lowess(y, x, frac=frac )[ts:ts*2,1]
          vals_sm[:,lat,lon] = ys
        except Exception as e:
          msg = 'failed for lat %s lon %s  %s ' % (lat,lon,e)
          logger.debug('failed for lat %s lon %s  %s ' % (lat,lon,e))
          raise Exception(msg)
      print 'done for %s - %s ' % (lat, lon)    
    vals[:,:,:] = vals_sm[:,:,:]
    ds.close()
    logger.info('smothing of anual cycle done')
  except Exception as e:
    msg = 'failed smothing of anual cycle %s ' % e
    logger.error(msg)
    raise Exception(msg)
  try:   
    ip , nc_anomal = mkstemp(dir='.',suffix='.nc')
    nc_anomal = cdo.sub(input=[nc_file, nc_anual_cycle], output= nc_anomal )
    logger.info('anomalisation done: %s ' % nc_anomal)
  except Exception as e:
    msg = 'failed substraction of anual cycle %s ' % e
    logger.error(msg)
    raise Exception(msg)    
  return nc_anomal
예제 #56
0
def clipping(resource=[], variable=None, dimension_map=None, calc=None,  output_format='nc',
  calc_grouping= None, time_range=None, time_region=None,  historical_concatination=True, prefix=None, spatial_wrapping='wrap', polygons=None, mosaik=False, dir_output=None, memory_limit=None):
  """ returns list of clipped netCDF files
  possible entries: 
  :param resource: list of input netCDF files
  :param variable: variable (string) to be used in netCDF
  :param dimesion_map: specify a dimension map input netCDF has unconventional dimension
  :param calc: ocgis calculation argument
  :param calc_grouping: ocgis calculation grouping 
  :param historical_concatination: concat files of RCPs with appropriate historical runs to one timeseries 
  :param prefix: perfix for output file name
  :param polygons: list of polygons to be used. if more than 1 in the list, a appropriate mosaik will be clipped
  :param output_format: output_format (default='nc')
  :param dir_output: specify a output location
  """
  
  from flyingpigeon.utils import get_variable, drs_filename
  from flyingpigeon.ocgis_module import call
  
  if type(resource) != list: 
    resource = list([resource])
  if type(polygons) != list:
    polygons = list([polygons])
  if prefix != None:
    if type(prefix) != list:
      prefix = list([prefix])
  
  geoms = set()
  ncs = sort_by_filename(resource, historical_concatination=historical_concatination) #  historical_concatination=True
  geom_files = []
  if mosaik == True :
    try:
      nameadd = '_'
      for polygon in polygons: 
        geoms.add(get_geom(polygon))
        nameadd = nameadd + '-' + polygon  
      if len(geoms) > 1: 
        logger.error('polygons belong to differnt shapefiles! mosaik option is not possible %s', geoms)
      else: 
        geom = geoms.pop()
      ugids = get_ugid(polygons=polygons, geom=geom)
    except Exception as e:
      logger.debug('geom identification failed %s ' % e)
    for i, key in enumerate (ncs.keys()):
      try:
        if variable == None:
          variable = get_variable(ncs[key])
          logger.info('variable %s detected in resource' % (variable))
        if prefix == None:
          name = key + nameadd
        else:
          name = prefix[i]
        geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping, output_format=output_format,
                         prefix=name, geom=geom, select_ugid=ugids, time_range=time_range, time_region=time_region, 
                         spatial_wrapping=spatial_wrapping, memory_limit=memory_limit,
                         dir_output=dir_output, dimension_map=dimension_map)
        geom_files.append( geom_file )  
      except Exception as e:
        msg = 'ocgis calculations failed for %s ' % (key)
        logger.debug(msg)
  else: 
    for i, polygon in enumerate(polygons): 
      try:
        geom = get_geom(polygon)
        ugid = get_ugid(polygons=polygon, geom=geom)
        for key in  ncs.keys():
          try:
            if variable == None:
              variable = get_variable(ncs[key])
              logger.info('variable %s detected in resource' % (variable))  
            if prefix == None: 
              name = key + '_' + polygon
            else:
              name = prefix[i]
            geom_file = call(resource=ncs[key], variable=variable,  calc=calc, calc_grouping=calc_grouping,output_format=output_format,
              prefix=name, geom=geom, select_ugid=ugid, dir_output=dir_output, dimension_map=dimension_map, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit,time_range=time_range, time_region=time_region,
              )
            geom_files.append( geom_file )
          except Exception as e:
            msg = 'ocgis calculations failed for %s ' % (key)
            logger.debug(msg)
            raise
      except Exception as e:
          logger.debug('geom identification failed')
          raise
  return  geom_files
예제 #57
0
 def test_get_variable(self):
     variable = utils.get_variable(utils.local_path(TESTDATA['cmip5_tasmax_nc']))
     nose.tools.ok_("tasmax" == variable, variable)