Ejemplo n.º 1
0
def get_level(resource, level):
  from flyingpigeon.ocgis_module import call
  from netCDF4 import Dataset
  from flyingpigeon.utils import get_variable
  from numpy import squeeze

  try:
    level_data = call(resource, level_range=[int(level),int(level)])
    if type(resource) == list:
      resource.sort()
    variable = get_variable(level_data)
    logger.info('found %s in file' % variable)
    ds = Dataset(level_data, mode='a')
    var = ds.variables.pop(variable)
    dims = var.dimensions
    new_var = ds.createVariable('z%s'% level, var.dtype, dimensions=(dims[0],dims[2],dims[3]))
    # i = where(var[:]==level)
    new_var[:,:,:] = squeeze(var[:,0,:,:])
    ds.close()
    logger.info('level %s extracted' % level)

    data = call(level_data , variable = 'z%s'%level)
    
  except Exception as e:
    logger.error('failed to extract level %s ' % e)
  return data
Ejemplo n.º 2
0
def get_level(resource, level):
    from flyingpigeon.ocgis_module import call
    from netCDF4 import Dataset
    from flyingpigeon.utils import get_variable
    from numpy import squeeze

    try:
        if type(resource) == list:
            resource.sort()
        level_data = call(resource, level_range=[int(level), int(level)])
        variable = get_variable(level_data)
        LOGGER.info('found %s in file' % variable)
        ds = Dataset(level_data, mode='a')
        var = ds.variables.pop(variable)
        dims = var.dimensions
        new_var = ds.createVariable('z%s' % level,
                                    var.dtype,
                                    dimensions=(dims[0], dims[2], dims[3]))
        # i = where(var[:]==level)
        new_var[:, :, :] = squeeze(var[:, 0, :, :])

        # TODO: Here may be an error! in case of exception, dataset will not close!
        # Exception arise for example for 20CRV2 data...
        try:
            new_var.setncatts({k: var.getncattr(k) for k in var.ncattrs()})
        except:
            LOGGER.info('Could not set attributes for z%s' % level)
        ds.close()
        LOGGER.info('level %s extracted' % level)
        data = call(level_data, variable='z%s' % level)
    except:
        LOGGER.exception('failed to extract level')

    return data
Ejemplo n.º 3
0
def get_level(resource, level):
    from flyingpigeon.ocgis_module import call
    from netCDF4 import Dataset
    from flyingpigeon.utils import get_variable
    from numpy import squeeze

    try:
        level_data = call(resource, level_range=[int(level), int(level)])
        if type(resource) == list:
            resource.sort()
        variable = get_variable(level_data)
        logger.info('found %s in file' % variable)
        ds = Dataset(level_data, mode='a')
        var = ds.variables.pop(variable)
        dims = var.dimensions
        new_var = ds.createVariable('z%s' % level,
                                    var.dtype,
                                    dimensions=(dims[0], dims[2], dims[3]))
        # i = where(var[:]==level)
        new_var[:, :, :] = squeeze(var[:, 0, :, :])
        ds.close()
        logger.info('level %s extracted' % level)

        data = call(level_data, variable='z%s' % level)

    except Exception as e:
        logger.error('failed to extract level %s ' % e)
    return data
Ejemplo n.º 4
0
def get_reference(ncs_indices, period='all'):
  """
  calculates the netCDF files containing the mean climatology for statistical GAM training
  :param ncs_indices: list of climate indices defining the growing conditions of tree species
  :param refperiod: time period for statistic training 
  :return present: present conditions
  """
  from datetime import datetime as dt
  from flyingpigeon.ocgis_module import call
  from flyingpigeon.utils import get_variable
  from os.path import basename
  
  if not period == 'all':
    s, e = period.split('-')
    start = dt.strptime(s+'-01-01', '%Y-%m-%d')
    end = dt.strptime(e+'-12-31', '%Y-%m-%d')
    time_range=[start, end]
  else:
    time_range=None
    
  ref_indices = []
  for nc_indice in ncs_indices: 
    variable = get_variable(nc_indice)
    f = basename(nc_indice).strip('.nc')
    prefix = '%s_ref-%s' % ('_'.join(f.split('_')[0:-1]), period) 
    
    ref_indices.append(call(resource=nc_indice, variable=variable,prefix=prefix, calc=[{'func':'mean','name': variable}],calc_grouping=['all'],time_range=time_range))
  
  return ref_indices
Ejemplo n.º 5
0
def get_indices(resources, indices):
  from flyingpigeon.utils import sort_by_filename, calc_grouping, drs_filename
  from flyingpigeon.ocgis_module import call
  from flyingpigeon.indices import indice_variable

  #names = [drs_filename(nc, skip_timestamp=False, skip_format=False, 
  #               variable=None, rename_file=True, add_file_path=True) for nc in resources]
  
  ncs = sort_by_filename(resources, historical_concatination=True)
  ncs_indices = []
  logger.info('resources sorted found %s datasets' % len(ncs.keys()) ) 
  for key in ncs.keys():
    for indice in indices:
      try: 
        name , month = indice.split('_')
        variable=key.split('_')[0]
        print name, month , variable 
        if variable == indice_variable(name):
          
          logger.info('calculating indice %s ' % indice)
          grouping = calc_grouping(month)
          calc = [{'func' : 'icclim_' + name, 'name' : name}]
          prefix=key.replace(variable, name).replace('_day_','_%s_' % month)
           
          nc = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=grouping, prefix=prefix , memory_limit=500) #memory_limit=500
          ncs_indices.append(nc)
          logger.info('Successful calculated indice %s %s' % (key, indice))
      except Exception as e: 
        logger.exception('failed to calculate indice %s %s' % (key, indice))    
  return ncs_indices
Ejemplo n.º 6
0
def get_reference(ncs_indices, period='all'):
    """
    calculates the netCDF files containing the mean climatology for statistical GAM training

    :param ncs_indices: list of climate indices defining the growing conditions of tree species
    :param refperiod: time period for statistic training

    :return present: present conditions
    """
    from datetime import datetime as dt
    from flyingpigeon.ocgis_module import call
    from flyingpigeon.utils import get_variable
    from os.path import basename

    if not period == 'all':
        s, e = period.split('-')
        start = dt.strptime(s + '-01-01', '%Y-%m-%d')
        end = dt.strptime(e + '-12-31', '%Y-%m-%d')
        time_range = [start, end]
    else:
        time_range = None

    ref_indices = []
    for nc_indice in ncs_indices:
        variable = get_variable(nc_indice)
        f = basename(nc_indice).strip('.nc')
        prefix = '%s_ref-%s' % ('_'.join(f.split('_')[0:-1]), period)

        ref_indices.append(call(resource=nc_indice, variable=variable, prefix=prefix,
                                calc=[{'func': 'mean', 'name': variable}],
                                calc_grouping=['all'], time_range=time_range))

    return ref_indices
Ejemplo n.º 7
0
    def execute(self):
        from flyingpigeon.ocgis_module import call
        from flyingpigeon.utils import sort_by_filename, archive, get_values, get_time

        ncs = self.getInputValues(identifier='resource')
        logger.info("ncs: %s " % ncs)
        coords = self.getInputValues(identifier='coords')
        logger.info("coords %s", coords)
        filenames = []
        nc_exp = sort_by_filename(ncs, historical_concatination=True)

        from numpy import savetxt, column_stack
        from shapely.geometry import Point

        for key in nc_exp.keys():
            try:
                logger.info('start calculation for %s ' % key)
                ncs = nc_exp[key]
                times = get_time(ncs, format='%Y-%m-%d_%H:%M:%S')
                concat_vals = times  # ['%s-%02d-%02d_%02d:%02d:%02d' %
                # (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times]
                header = 'date_time'
                filename = '%s.csv' % key
                filenames.append(filename)

                for p in coords:
                    try:
                        self.status.set('processing point : {0}'.format(p), 20)
                        # define the point:
                        p = p.split(',')
                        point = Point(float(p[0]), float(p[1]))

                        # get the values
                        timeseries = call(resource=ncs,
                                          geom=point,
                                          select_nearest=True)
                        vals = get_values(timeseries)

                        # concatenation of values
                        header = header + ',%s-%s' % (p[0], p[1])
                        concat_vals = column_stack([concat_vals, vals])
                    except Exception as e:
                        logger.debug('failed for point %s %s' % (p, e))
                self.status.set(
                    '*** all points processed for {0} ****'.format(key), 50)
                savetxt(filename,
                        concat_vals,
                        fmt='%s',
                        delimiter=',',
                        header=header)
            except Exception as e:
                logger.debug('failed for %s %s' % (key, e))

    # set the outputs
        self.status.set('*** creating output tar archive ****', 90)
        tarout_file = archive(filenames)
        self.tarout.setValue(tarout_file)
Ejemplo n.º 8
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        ncs = archiveextract(
            resource=rename_complexinputs(request.inputs['resource']))
        LOGGER.info('ncs: {}'.format(ncs))

        coords = []
        for coord in request.inputs['coords']:
            coords.append(coord.data)

        LOGGER.info('coords {}'.format(coords))
        filenames = []
        nc_exp = sort_by_filename(ncs, historical_concatination=True)

        for key in nc_exp.keys():
            try:
                LOGGER.info('start calculation for {}'.format(key))
                ncs = nc_exp[key]
                times = get_time(ncs)  # , format='%Y-%m-%d_%H:%M:%S')
                concat_vals = times  # ['%s-%02d-%02d_%02d:%02d:%02d' %
                # (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times]
                header = 'date_time'
                filename = '{}.csv'.format(key)
                filenames.append(filename)

                for p in coords:
                    try:
                        response.update_status('processing point: {}'.format(p), 20)
                        # define the point:
                        p = p.split(',')
                        point = Point(float(p[0]), float(p[1]))

                        # get the values
                        timeseries = call(resource=ncs, geom=point, select_nearest=True)
                        vals = get_values(timeseries)

                        # concatenation of values
                        header = header + ',{}-{}'.format(p[0], p[1])
                        concat_vals = column_stack([concat_vals, vals])
                    except Exception as e:
                        LOGGER.debug('failed for point {} {}'.format(p, e))
                response.update_status('*** all points processed for {0} ****'.format(key), 50)

                # TODO: Ascertain whether this 'savetxt' is a valid command without string formatting argument: '%s'
                savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header)
            except Exception as ex:
                LOGGER.debug('failed for {}: {}'.format(key, str(ex)))

        # set the outputs
        response.update_status('*** creating output tar archive ****', 90)
        tarout_file = archive(filenames)
        response.outputs['tarout'].file = tarout_file
        return response
Ejemplo n.º 9
0
  def execute(self):
    from flyingpigeon.ocgis_module import call
    from flyingpigeon.utils import sort_by_filename, archive, get_values, get_time
        
    ncs = self.getInputValues(identifier='netcdf_file')
    logger.info("ncs: %s " % ncs) 
    coords = self.getInputValues(identifier='coords')
    logger.info("coords %s", coords)
    filenames = []    
    nc_exp = sort_by_filename(ncs, historical_concatination=True)
    
    #(fp_tar, tarout_file) = tempfile.mkstemp(dir=".", suffix='.tar')
    #tar = tarfile.open(tarout_file, "w")

    from numpy import savetxt, column_stack
    from shapely.geometry import Point
    
    for key in nc_exp.keys():
      try:
        logger.info('start calculation for %s ' % key )
        ncs = nc_exp[key]
        times = get_time(ncs)
        concat_vals = ['%s-%02d-%02d_%02d:%02d:%02d' %
                       (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times]
        header = 'date_time'
        filename = '%s.csv' % key
        filenames.append(filename) 
        
        for p in coords:
          try: 
            self.status.set('processing point : {0}'.format(p), 20)
            # define the point:  
            p = p.split(',')
            point = Point(float(p[0]), float(p[1]))       
            
            # get the values
            timeseries = call(resource=ncs, geom=point, select_nearest=True)
            vals = get_values(timeseries)
            
            # concatination of values 
            header = header + ',%s-%s' % (p[0], p[1])
            concat_vals = column_stack([concat_vals, vals])
          except Exception as e: 
            logger.debug('failed for point %s %s' % (p , e))
        self.status.set('*** all points processed for {0} ****'.format(key), 50)
        savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header)
      except Exception as e: 
        logger.debug('failed for %s %s' % (key, e))

    ### set the outputs
    self.status.set('*** creating output tar archive ****',90) 
    tarout_file = archive(filenames)
    self.tarout.setValue( tarout_file )
Ejemplo n.º 10
0
  def execute(self):
    from flyingpigeon.ocgis_module import call
    from flyingpigeon.utils import get_time, get_variable, sort_by_filename
    
    from datetime import datetime as dt
    from netCDF4 import Dataset
    from numpy import savetxt, column_stack, squeeze
    
    ncs = self.getInputValues(identifier='netcdf_file')
    logging.info("ncs: %s " % ncs) 
    coords = self.getInputValues(identifier='coords')
    logging.info("coords %s", coords)

 
    nc_exp = sort_by_filename(ncs) # dictionary {experiment:[files]}
    filenames = []
    
    (fp_tar, tarout_file) = tempfile.mkstemp(dir=".", suffix='.tar')
    tar = tarfile.open(tarout_file, "w")
    
    for key in nc_exp.keys():
      logging.info('start calculation for %s ' % key )
      ncs = nc_exp[key]
      nc = ncs[0]
      
      times = get_time(nc)
      var = get_variable(nc)
      
      concat_vals = [dt.strftime(t, format='%Y-%d-%m_%H:%M:%S') for t in times]
      header = 'date_time'
      filename = '%s.csv' % key
      filenames.append(filename) 
      
      for ugid, p in enumerate(coords, start=1):
        self.status.set('processing point : {0}'.format(p), 20)
        p = p.split(',')
        self.status.set('splited x and y coord : {0}'.format(p), 20)
        point = Point(float(p[0]), float(p[1]))
        
        #get the timeseries at gridpoint
        timeseries = call(resource=ncs, geom=point, select_nearest=True)
        
        ds = Dataset(timeseries)
        vals = squeeze(ds.variables[var])
        header = header + ',%s_%s' % (p[0], p[1])
        concat_vals = column_stack([concat_vals, vals])

      savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header)
      tar.add( filename )
      
    tar.close()
    self.tarout.setValue( tarout_file )
Ejemplo n.º 11
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        ncs = archiveextract(
            resource=rename_complexinputs(request.inputs['resource']))
        LOGGER.info("ncs: %s " % ncs)
        coords = request.inputs['coords']  # self.getInputValues(identifier='coords')
        LOGGER.info("coords %s", coords)
        filenames = []
        nc_exp = sort_by_filename(ncs, historical_concatination=True)

        for key in nc_exp.keys():
            try:
                LOGGER.info('start calculation for %s ' % key)
                ncs = nc_exp[key]
                times = get_time(ncs, format='%Y-%m-%d_%H:%M:%S')
                concat_vals = times  # ['%s-%02d-%02d_%02d:%02d:%02d' %
                # (t.year, t.month, t.day, t.hour, t.minute, t.second) for t in times]
                header = 'date_time'
                filename = '%s.csv' % key
                filenames.append(filename)

                for p in coords:
                    try:
                        response.update_status('processing point : {0}'.format(p), 20)
                        # define the point:
                        p = p.split(',')
                        point = Point(float(p[0]), float(p[1]))

                        # get the values
                        timeseries = call(resource=ncs, geom=point, select_nearest=True)
                        vals = get_values(timeseries)

                        # concatenation of values
                        header = header + ',%s-%s' % (p[0], p[1])
                        concat_vals = column_stack([concat_vals, vals])
                    except Exception as e:
                        LOGGER.debug('failed for point %s %s' % (p, e))
                response.update_status('*** all points processed for {0} ****'.format(key), 50)
                savetxt(filename, concat_vals, fmt='%s', delimiter=',', header=header)
            except Exception as e:
                LOGGER.debug('failed for %s %s' % (key, e))

    # set the outputs
        response.update_status('*** creating output tar archive ****', 90)
        tarout_file = archive(filenames)
        response.outputs['tarout'].file = tarout_file
        return response
Ejemplo n.º 12
0
def ocgis_call_wrap(tmargs):
    _z = tmargs[0]
    _origvar = tmargs[1]
    _level = tmargs[2]
    _bbox = tmargs[3]
    _plev = [int(_level), int(_level)]
    _pref = 'levdom_' + basename(_z)[0:-3]

    _tmpf = call(resource=_z,
                 variable=_origvar,
                 level_range=_plev,
                 geom=_bbox,
                 spatial_wrapping='wrap',
                 prefix=_pref)

    return _tmpf
Ejemplo n.º 13
0
def aggregatTime(resource=[], variable=None, frequency=None, prefix=None, grouping='mon', calculation='mean', historical_concatination=True):
  """
  Aggregates over the time axis. 

  :param resource: input netCDF files
  :param variable: variable to be used from resource 
  :param frequency: time frequency in resource
  :param grouping: time aggregation for output
  :param prefix: file name prefix
  :param calculation: calculation methode (default = mean )
  :param historical_concatination: if rcps and appropriate historical runs are present thy are concatinated 
  :return: path to netCDF file
  """ 
  try: 
    ncs = sort_by_filename(resource, historical_concatination=historical_concatination)
    group = calc_grouping(grouping=grouping)
  except Exception as e: 
    logger.exception('failed to determine ncs or calc_grouping')
    raise  
  
  if len(ncs.keys())!= 1: 
    logger.exception('None or more than one data experiments found in resource')
    raise Exception('None or more than one data experiments found in resource') 

  for key in ncs.keys()[0:1]:
    try:
      if frequency == None: 
        frequency = get_frequency(ncs[key][0])
      if variable == None: 
        variable = get_variable(ncs[key][0])

      meta_attrs = { 'field': {'frequency': grouping}}# 'variable': {'new_attribute': 5, 'hello': 'attribute'},
      calc = [{'func' : calculation , 'name' : variable, 'meta_attrs': meta_attrs}] 
      logger.info('calculation:  %s ' % (calc))
      if prefix == None:
        prefix = key.replace(frequency,grouping)
      
      logger.info('prefix:  %s ' % (prefix))
      output = call(resource=ncs[key], variable=None, 
      calc=calc, calc_grouping=group,
      prefix=prefix )
      logger.info('time aggregation done for %s '% (key))
    except Exception as e: 
      logger.exception('time aggregation failed for %s' % key)
      raise

  return output #  key # output
Ejemplo n.º 14
0
def get_season(nc_file, season='DJF'):
    """
  extacting of selected months

  :param nc_file: input netCDF
  :param season: month to be extracted (default = 'DJF')

  :returns str: netCDF with time subset
  """
    try:
        time_region = _TIMEREGIONS_[season]

        nc_season = call(nc_file, time_region=time_region)
        logger.info('season selection done %s ' % nc_season)
    except Exception as e:
        msg = 'failed to select season %s' % e
        logger.error(msg)
        raise Exception(msg)
    return nc_season
Ejemplo n.º 15
0
def get_season(nc_file, season='DJF'): 
  """
  extacting of selected months

  :param nc_file: input netCDF
  :param season: month to be extracted (default = 'DJF')

  :returns str: netCDF with time subset
  """
  try: 
    time_region = _TIMEREGIONS_[season]
   
    nc_season = call(nc_file, time_region=time_region)
    logger.info('season selection done %s ' % nc_season)
  except Exception as e:
    msg = 'failed to select season %s' % e
    logger.error(msg)
    raise Exception(msg)
  return nc_season
Ejemplo n.º 16
0
def get_season(nc_file, season='DJF'):
    """
    extacting of selected months

    :param nc_file: input netCDF
    :param season: month to be extracted (default = 'DJF')

    :returns str: netCDF with time subset
    """
    try:
        time_region = _TIMEREGIONS_[season]
        LOGGER.info('time_region: %s ' % time_region)
        nc_season = call(nc_file, time_region=time_region)
        LOGGER.info('season selection done %s ' % nc_season)
    except:
        msg = 'failed to select season, input file is passed '
        LOGGER.exception(msg)
        nc_season = nc_file
    return nc_season
Ejemplo n.º 17
0
def get_segetalflora(resource=[], culture_type='fallow', climate_type=3):
    """
  calulation of segetalflora species numbers based on yearly mean temperature 
  
  :param resource: list of netCDF yearly mean temperature (tas) files. 
  :param culture_type: Type of culture. Possible values are:
                       'fallow', 'intensive', 'extensive' (default:'fallow')
  :param climate_type: Type of climate: number 1 to 7 or 'all' (default: 2)
  
  :return list: list of result segeltaflora files
  """

    from flyingpigeon.ocgis_module import call
    from os import path

    if not type(culture_type) == list:
        culture_type = list([culture_type])
    if not type(climate_type) == list:
        climate_type = list([climate_type])

    outputs = []

    for name in resource:
        for cult in culture_type:
            for climat in climate_type:
                try:
                    calc = get_equation(culture_type=cult, climate_type=climat)
                    if type(calc) != None:
                        var = 'sf%s%s' % (cult, climat)
                        prefix = path.basename(name).replace('tas',
                                                             var).strip('.nc')

                        outputs.append(
                            call(resource=name, calc=calc, prefix=prefix))

                        logger.info('segetalflora done for %s' % (prefix))
                    else:
                        logger.debug('NO EQUATION found for %s %s ' %
                                     (cult, climat))
                except Exception as e:
                    logger.debug('Segetal flora failed: %s' % (e))
    return outputs
Ejemplo n.º 18
0
def get_season(nc_file, season='DJF'): 
  """
  extacting of selected months

  :param nc_file: input netCDF
  :param season: month to be extrected default='DJF'
  :returns str: netCDF with time subset
  """
  try: 
    if season == 'JJA':
      time_region = {'month':[6,7,8]}
    elif season == 'SON':
      time_region = {'month':[9,10,11]}
    elif season == 'DJF':
      time_region = {'month':[12,1,2]}
    elif season == 'FAM':
      time_region = {'month':[2,3,4]}
    elif season == 'MAM':
      time_region = {'month':[3,4,5]}
    elif season == 'JJAS':
      time_region = {'month':[6,7,8,9]}
    elif season == 'DJFM':
      time_region = {'month':[12,1,2,3]}
    elif season == 'MAMJ':
      time_region = {'month':[3,4,5,6]}
    elif season == 'SOND':
      time_region = {'month':[9,10,11,12]}
    elif season == 'SONDJF':
      time_region = {'month':[9,10,11,12,1,2]}
    elif season == 'MAMJJA':
      time_region = {'month':[3,4,5,6,7,8]}
    elif season == 'all':
      time_region = None 
    else:
      logger.error('sesason %s not found' % season )
    nc_season = call(nc_file, time_region=time_region)
    logger.info('seson exction done %s ' % nc_season)
  except Exception as e:
    msg = 'failed extract season %s ' % e
    logger.error(msg)
    raise Exception(msg)
  return nc_season
Ejemplo n.º 19
0
def get_anomalies(nc_file, frac=0.2, reference=None):
  '''
  anomalisation of data subsets for weather classification. 
  Anomalisation is done by substrcting a smoothed anual cycle 

  :parm nc_file: input netCDF file 
  :param frac: Number between 0-1 for stregth of smoothing 
               (0 = close to the original data, 1=flat line)
               default=0.2
  :param reference: Period to calulate anual cycle 
  :return string: path to output netCDF file
  '''
  try: 
    variable = utils.get_variable(nc_file)
    calc = [{'func': 'mean', 'name': variable}]
    calc_grouping = calc_grouping = ['day','year']
    nc_anual_cycle = call(nc_file, calc=calc, calc_grouping=calc_grouping, time_range=reference)
    logger.info('anual cycle calculated')  
  except Exception as e:
    msg = 'failed to calcualte anual cycle %s' % e
    logger.error(msg)
    raise Exception(msg)

  ### spline for smoothing
  import statsmodels.api as sm
  from numpy import tile, empty, linspace
  from netCDF4 import Dataset
  from cdo import Cdo
  cdo = Cdo()
  
  try:
    # variable = utils.get_variable(nc_file)
    ds = Dataset(nc_anual_cycle, mode='a')
    vals = ds.variables[variable]
    vals_sm = empty(vals.shape)
    ts = vals.shape[0]
    x = linspace(1, ts*3 , num=ts*3 , endpoint=True)

    for lat in range(vals.shape[1]):
      for lon in range(vals.shape[2]):
        try:
          y = tile(vals[:,lat,lon], 3)
          # ys = smooth(y, window_size=91, order=2, deriv=0, rate=1)[ts:ts*2]
          ys = sm.nonparametric.lowess(y, x, frac=frac )[ts:ts*2,1]
          vals_sm[:,lat,lon] = ys
        except Exception as e:
          msg = 'failed for lat %s lon %s  %s ' % (lat,lon,e)
          logger.debug('failed for lat %s lon %s  %s ' % (lat,lon,e))
          raise Exception(msg)
      print 'done for %s - %s ' % (lat, lon)    
    vals[:,:,:] = vals_sm[:,:,:]
    ds.close()
    logger.info('smothing of anual cycle done')
  except Exception as e:
    msg = 'failed smothing of anual cycle %s ' % e
    logger.error(msg)
    raise Exception(msg)
  try:   
    ip , nc_anomal = mkstemp(dir='.',suffix='.nc')
    nc_anomal = cdo.sub(input=[nc_file, nc_anual_cycle], output= nc_anomal )
    logger.info('anomalisation done: %s ' % nc_anomal)
  except Exception as e:
    msg = 'failed substraction of anual cycle %s ' % e
    logger.error(msg)
    raise Exception(msg)    
  return nc_anomal
Ejemplo n.º 20
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'
        process_start_time = time.time()  # measure process execution time ...

        response.update_status('execution started at : %s ' % dt.now(), 5)

        start_time = time.time()  # measure init ...

        resource = archiveextract(
            resource=rename_complexinputs(request.inputs['resource']))

        refSt = request.inputs['refSt'][0].data
        refEn = request.inputs['refEn'][0].data
        dateSt = request.inputs['dateSt'][0].data
        dateEn = request.inputs['dateEn'][0].data
        regrset = request.inputs['regrset'][0].data

        # fix 31 December issue
        # refSt = dt.combine(refSt,dt_time(12,0))
        # refEn = dt.combine(refEn,dt_time(12,0))
        # dateSt = dt.combine(dateSt,dt_time(12,0))
        # dateEn = dt.combine(dateEn,dt_time(12,0))

        seasonwin = request.inputs['seasonwin'][0].data
        nanalog = request.inputs['nanalog'][0].data
        # bbox = [-80, 20, 50, 70]
        # TODO: Add checking for wrong cordinates and apply default if nesessary
        bbox = []
        bboxStr = request.inputs['BBox'][0].data
        bboxStr = bboxStr.split(',')
        bbox.append(float(bboxStr[0]))
        bbox.append(float(bboxStr[2]))
        bbox.append(float(bboxStr[1]))
        bbox.append(float(bboxStr[3]))

        direction = request.inputs['direction'][0].data
        normalize = request.inputs['normalize'][0].data
        distance = request.inputs['dist'][0].data
        outformat = request.inputs['outformat'][0].data
        timewin = request.inputs['timewin'][0].data

        model_var = request.inputs['reanalyses'][0].data
        model, var = model_var.split('_')

        try:
            if direction == 're2mo':
                anaSt = dt.combine(dateSt, dt_time(
                    0, 0))  #dt.strptime(dateSt[0], '%Y-%m-%d')
                anaEn = dt.combine(dateEn, dt_time(
                    0, 0))  #dt.strptime(dateEn[0], '%Y-%m-%d')
                refSt = dt.combine(refSt, dt_time(
                    12, 0))  #dt.strptime(refSt[0], '%Y-%m-%d')
                refEn = dt.combine(refEn, dt_time(
                    12, 0))  #dt.strptime(refEn[0], '%Y-%m-%d')
                r_time_range = [anaSt, anaEn]
                m_time_range = [refSt, refEn]
            elif direction == 'mo2re':
                anaSt = dt.combine(dateSt, dt_time(
                    12, 0))  #dt.strptime(refSt[0], '%Y-%m-%d')
                anaEn = dt.combine(dateEn, dt_time(
                    12, 0))  #dt.strptime(refEn[0], '%Y-%m-%d')
                refSt = dt.combine(refSt, dt_time(
                    0, 0))  #dt.strptime(dateSt[0], '%Y-%m-%d')
                refEn = dt.combine(refEn, dt_time(
                    0, 0))  #dt.strptime(dateEn[0], '%Y-%m-%d')
                r_time_range = [refSt, refEn]
                m_time_range = [anaSt, anaEn]
            else:
                LOGGER.exception(
                    'failed to find time periods for comparison direction')
        except:
            msg = 'failed to put simulation and reference time in order'
            LOGGER.exception(msg)
            raise Exception(msg)

        if normalize == 'None':
            seacyc = False
        else:
            seacyc = True

        if outformat == 'ascii':
            outformat = '.txt'
        elif outformat == 'netCDF':
            outformat = '.nc'
        else:
            LOGGER.exception('output format not valid')

        try:
            if model == 'NCEP':
                getlevel = True
                if 'z' in var:
                    level = var.strip('z')
                    variable = 'hgt'
                    # conform_units_to='hPa'
                else:
                    variable = 'slp'
                    level = None
                    # conform_units_to='hPa'
            elif '20CRV2' in model:
                getlevel = False
                if 'z' in var:
                    variable = 'hgt'
                    level = var.strip('z')
                    # conform_units_to=None
                else:
                    variable = 'prmsl'
                    level = None
                    # conform_units_to='hPa'
            else:
                LOGGER.exception('Reanalyses model not known')
            LOGGER.info('environment set')
        except:
            msg = 'failed to set environment'
            LOGGER.exception(msg)
            raise Exception(msg)

        # LOGGER.exception("init took %s seconds.", time.time() - start_time)
        response.update_status('Read in the arguments', 6)

        #################
        # get input data
        #################
        # TODO: do not forget to select years

        start_time = time.time()  # measure get_input_data ...
        response.update_status('fetching input data', 7)
        try:
            if direction == 're2mo':
                nc_reanalyses = reanalyses(start=anaSt.year,
                                           end=anaEn.year,
                                           variable=var,
                                           dataset=model,
                                           getlevel=getlevel)
            else:
                nc_reanalyses = reanalyses(start=refSt.year,
                                           end=refEn.year,
                                           variable=var,
                                           dataset=model,
                                           getlevel=getlevel)

            if type(nc_reanalyses) == list:
                nc_reanalyses = sorted(
                    nc_reanalyses,
                    key=lambda i: path.splitext(path.basename(i))[0])
            else:
                nc_reanalyses = [nc_reanalyses]

            # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb
            # So it makes sense, to operate it step-by-step
            # TODO: need to create dictionary for such datasets (for models as well)
            # TODO: benchmark the method bellow for NCEP z500 for 60 years, may be use the same (!)
            # TODO Now everything regrid to the reanalysis

            if ('20CRV2' in model) and ('z' in var):
                tmp_total = []
                origvar = get_variable(nc_reanalyses)

                for z in nc_reanalyses:
                    tmp_n = 'tmp_%s' % (uuid.uuid1())
                    b0 = call(resource=z,
                              variable=origvar,
                              level_range=[int(level), int(level)],
                              geom=bbox,
                              spatial_wrapping='wrap',
                              prefix='levdom_' + path.basename(z)[0:-3])
                    tmp_total.append(b0)

                tmp_total = sorted(
                    tmp_total,
                    key=lambda i: path.splitext(path.basename(i))[0])
                inter_subset_tmp = call(resource=tmp_total,
                                        variable=origvar,
                                        time_range=r_time_range)

                # Clean
                for i in tmp_total:
                    tbr = 'rm -f %s' % (i)
                    #system(tbr)

                # Create new variable
                ds = Dataset(inter_subset_tmp, mode='a')
                z_var = ds.variables.pop(origvar)
                dims = z_var.dimensions
                new_var = ds.createVariable('z%s' % level,
                                            z_var.dtype,
                                            dimensions=(dims[0], dims[2],
                                                        dims[3]))
                new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
                # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
                ds.close()
                nc_subset = call(inter_subset_tmp, variable='z%s' % level)
            else:
                nc_subset = call(
                    resource=nc_reanalyses,
                    variable=var,
                    geom=bbox,
                    spatial_wrapping='wrap',
                    time_range=r_time_range,
                    # conform_units_to=conform_units_to
                )

            # nc_subset = call(resource=nc_reanalyses, variable=var, geom=bbox, spatial_wrapping='wrap') # XXXXXX wrap
            # LOGGER.exception("get_input_subset_model took %s seconds.", time.time() - start_time)
            response.update_status('**** Input reanalyses data fetched', 10)
        except:
            msg = 'failed to fetch or subset input files'
            LOGGER.exception(msg)
            raise Exception(msg)

        ########################
        # input data preperation
        ########################
        response.update_status('Start preparing input data', 12)

        # Filter resource:
        if type(resource) == list:
            resource = sorted(resource,
                              key=lambda i: path.splitext(path.basename(i))[0])
        else:
            resource = [resource]

        tmp_resource = []

        m_start = m_time_range[0]
        m_end = m_time_range[1]

        for re in resource:
            s, e = get_timerange(re)
            tmpSt = dt.strptime(s, '%Y%m%d')
            tmpEn = dt.strptime(e, '%Y%m%d')
            if ((tmpSt <= m_end) and (tmpEn >= m_start)):
                tmp_resource.append(re)
                LOGGER.debug('Selected file: %s ' % (re))
        resource = tmp_resource

        start_time = time.time()  # mesure data preperation ...
        # TODO: Check the callendars ! for model vs reanalyses.
        # TODO: Check the units! model vs reanalyses.
        try:
            m_total = []
            modvar = get_variable(resource)
            # resource properties
            ds = Dataset(resource[0])
            m_var = ds.variables[modvar]
            dims = list(m_var.dimensions)
            dimlen = len(dims)

            try:
                model_id = ds.getncattr('model_id')
            except AttributeError:
                model_id = 'Unknown model'

            LOGGER.debug('MODEL: %s ' % (model_id))

            lev_units = 'hPa'

            if (dimlen > 3):
                lev = ds.variables[dims[1]]
                # actually index [1] need to be detected... assuming zg(time, plev, lat, lon)
                lev_units = lev.units

                if (lev_units == 'Pa'):
                    m_level = str(int(level) * 100)
                else:
                    m_level = level
            else:
                m_level = None

            if level == None:
                level_range = None
            else:
                level_range = [int(m_level), int(m_level)]

            for z in resource:
                tmp_n = 'tmp_%s' % (uuid.uuid1())
                # select level and regrid
                b0 = call(
                    resource=z,
                    variable=modvar,
                    level_range=level_range,
                    spatial_wrapping='wrap',  #cdover='system',
                    regrid_destination=nc_reanalyses[0],
                    regrid_options='bil',
                    prefix=tmp_n)
                # select domain
                b01 = call(resource=b0,
                           geom=bbox,
                           spatial_wrapping='wrap',
                           prefix='levregr_' + path.basename(z)[0:-3])
                tbr = 'rm -f %s' % (b0)
                #system(tbr)
                tbr = 'rm -f %s' % (tmp_n)
                #system(tbr)
                # get full resource
                m_total.append(b01)
            ds.close()
            model_subset = call(m_total, time_range=m_time_range)
            for i in m_total:
                tbr = 'rm -f %s' % (i)
                #system(tbr)

            if m_level is not None:
                # Create new variable in model set
                ds = Dataset(model_subset, mode='a')
                mod_var = ds.variables.pop(modvar)
                dims = mod_var.dimensions
                new_modvar = ds.createVariable('z%s' % level,
                                               mod_var.dtype,
                                               dimensions=(dims[0], dims[2],
                                                           dims[3]))
                new_modvar[:, :, :] = squeeze(mod_var[:, 0, :, :])
                # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
                ds.close()
                mod_subset = call(model_subset, variable='z%s' % level)
            else:
                mod_subset = model_subset

#            if direction == 're2mo':
#                try:
#                    response.update_status('Preparing simulation data', 15)
#                    reanalyses_subset = call(resource=nc_subset, time_range=[anaSt, anaEn])
#                except:
#                    msg = 'failed to prepare simulation period'
#                    LOGGER.exception(msg)
#                try:
#                    response.update_status('Preparing target data', 17)
#                    var_target = get_variable(resource)
#                    # var_simulation = get_variable(simulation)

#                    model_subset_tmp = call(resource=resource, variable=var_target,
#                                            time_range=[refSt, refEn],
#                                            t_calendar='standard',
#                                            spatial_wrapping='wrap',
#                                            regrid_destination=nc_reanalyses[0],
#                                            regrid_options='bil')

#                    # model_subset = call(resource=resource, variable=var_target,
#                    #                     time_range=[refSt, refEn],
#                    #                     geom=bbox,
#                    #                     t_calendar='standard',
#                    #                     # conform_units_to=conform_units_to,
#                    #                     spatial_wrapping='wrap',
#                    #                     regrid_destination=reanalyses_subset,
#                    #                     regrid_options='bil') # XXXXXXXXXXXX ADD WRAP rem calendar

#                    model_subset = call(resource=model_subset_tmp,variable=var_target, geom=bbox, spatial_wrapping='wrap', t_calendar='standard')

#                   # ISSUE: the regrided model has white border with null! Check it.
#                   # check t_calendar!
#                except:
#                    msg = 'failed subset archive model'
#                    LOGGER.exception(msg)
#                    raise Exception(msg)
#            else:
#                try:
#                    response.update_status('Preparing target data', 15)
#                    var_target = get_variable(resource)
#                    # var_simulation = get_variable(simulation)
#                    model_subset = call(resource=resource, variable=var_target,
#                                        time_range=[refSt, refEn],
#                                        geom=bbox,
#                                        t_calendar='standard',
#                                        # conform_units_to=conform_units_to,
#                                        # spatial_wrapping='wrap',
#                                        )
#                except:
#                    msg = 'failed subset archive model'
#                    LOGGER.exception(msg)
#                    raise Exception(msg)
#                try:
#                    response.update_status('Preparing simulation data', 17)
#                    reanalyses_subset = call(resource=nc_subset,
#                                             time_range=[anaSt, anaEn],
#                                             regrid_destination=model_subset,
#                                             regrid_options='bil')
#                except:
#                    msg = 'failed to prepare simulation period'
#                    LOGGER.exception(msg)
        except:
            msg = 'failed to subset simulation or reference data'
            LOGGER.exception(msg)
            raise Exception(msg)

# --------------------------------------------
        try:
            if direction == 'mo2re':
                simulation = mod_subset
                archive = nc_subset
                base_id = model
                sim_id = model_id
            elif direction == 're2mo':
                simulation = nc_subset
                archive = mod_subset
                base_id = model_id
                sim_id = model
            else:
                LOGGER.exception('direction not valid: %s ' % direction)
        except:
            msg = 'failed to find comparison direction'
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            if level is not None:
                out_var = 'z%s' % level
            else:
                var_archive = get_variable(archive)
                var_simulation = get_variable(simulation)
                if var_archive != var_simulation:
                    rename_variable(archive,
                                    oldname=var_archive,
                                    newname=var_simulation)
                    out_var = var_simulation
                    LOGGER.info('varname %s in netCDF renamed to %s' %
                                (var_archive, var_simulation))
        except:
            msg = 'failed to rename variable in target files'
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            if seacyc is True:
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(
                    archive, simulation, method=normalize)
            else:
                seasoncyc_base = None
                seasoncyc_sim = None
        except:
            msg = 'failed to prepare seasonal cycle reference files'
            LOGGER.exception(msg)
            raise Exception(msg)

        ip, output = mkstemp(dir='.', suffix='.txt')
        output_file = path.abspath(output)
        files = [path.abspath(archive), path.abspath(simulation), output_file]

        # LOGGER.exception("data preperation took %s seconds.", time.time() - start_time)

        ############################
        # generating the config file
        ############################

        response.update_status('writing config file', 18)
        start_time = time.time()  # measure write config ...

        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                base_id=base_id,
                sim_id=sim_id,
                timewin=timewin,
                varname=var,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                period=[
                    dt.strftime(refSt, '%Y-%m-%d'),
                    dt.strftime(refEn, '%Y-%m-%d')
                ],
                bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3]))
        except:
            msg = 'failed to generate config file'
            LOGGER.exception(msg)
            raise Exception(msg)

        # LOGGER.exception("write_config took %s seconds.", time.time() - start_time)

        #######################
        # CASTf90 call
        #######################
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90

        response.update_status('Start CASTf90 call', 20)
        try:
            # response.update_status('execution of CASTf90', 50)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            # system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            LOGGER.info('analogue.out info:\n %s ' % output)
            LOGGER.exception('analogue.out errors:\n %s ' % error)
            response.update_status('**** CASTf90 suceeded', 90)
        except:
            msg = 'CASTf90 failed'
            LOGGER.exception(msg)
            raise Exception(msg)

        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)

        response.update_status('preparting output', 91)

        # Stopper to keep twitcher results, for debug
        # dummy=dummy

        response.outputs[
            'config'].file = config_file  #config_output_url  # config_file )
        response.outputs['analogs'].file = output_file
        response.outputs['output_netcdf'].file = simulation
        response.outputs['target_netcdf'].file = archive

        ########################
        # generate analog viewer
        ########################

        formated_analogs_file = analogs.reformat_analogs(output_file)
        # response.outputs['formated_analogs'].storage = FileStorage()
        response.outputs['formated_analogs'].file = formated_analogs_file
        LOGGER.info('analogs reformated')
        response.update_status('reformatted analog file', 95)
        viewer_html = analogs.render_viewer(
            # configfile=response.outputs['config'].get_url(),
            configfile=config_file,
            # datafile=response.outputs['formated_analogs'].get_url())
            datafile=formated_analogs_file)
        response.outputs['output'].file = viewer_html
        response.update_status('Successfully generated analogs viewer', 99)
        LOGGER.info('rendered pages: %s ', viewer_html)
        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response
Ejemplo n.º 21
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('execution started at : {}'.format(dt.now()), 5)

        process_start_time = time.time()  # measure process execution time ...
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        try:
            response.update_status('read input parameter : %s ' % dt.now(), 7)

            refSt = request.inputs['refSt'][0].data
            refEn = request.inputs['refEn'][0].data
            dateSt = request.inputs['dateSt'][0].data
            dateEn = request.inputs['dateEn'][0].data
            seasonwin = request.inputs['seasonwin'][0].data
            nanalog = request.inputs['nanalog'][0].data
            timres = request.inputs['timeres'][0].data

            # bbox = [-80, 20, 50, 70]
            # TODO: Add checking for wrong cordinates and apply default if nesessary
            bbox = []
            bboxStr = request.inputs['BBox'][0].data
            bboxStr = bboxStr.split(',')
            bbox.append(float(bboxStr[0]))
            bbox.append(float(bboxStr[2]))
            bbox.append(float(bboxStr[1]))
            bbox.append(float(bboxStr[3]))
            LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
            LOGGER.debug('BBOX original: %s ' % (bboxStr))

            normalize = request.inputs['normalize'][0].data
            detrend = request.inputs['detrend'][0].data
            distance = request.inputs['dist'][0].data
            outformat = request.inputs['outformat'][0].data
            timewin = request.inputs['timewin'][0].data

            model_var = request.inputs['reanalyses'][0].data
            model, var = model_var.split('_')

            # experiment = self.getInputValues(identifier='experiment')[0]
            # dataset, var = experiment.split('_')
            # LOGGER.info('environment set')
            LOGGER.info('input parameters set')
            response.update_status('Read in and convert the arguments', 8)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:
            response.update_status('Preparing enviroment converting arguments',
                                   9)
            LOGGER.debug('date: %s %s %s %s ' %
                         (type(refSt), refEn, dateSt, dateSt))

            start = min(refSt, dateSt)
            end = max(refEn, dateEn)

            #
            # refSt = dt.strftime(refSt, '%Y-%m-%d')
            # refEn = dt.strftime(refEn, '%Y-%m-%d')
            # dateSt = dt.strftime(dateSt, '%Y-%m-%d')
            # dateEn = dt.strftime(dateEn, '%Y-%m-%d')

            if normalize == 'None':
                seacyc = False
            else:
                seacyc = True

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                LOGGER.exception('output format not valid')

        except Exception as e:
            msg = 'failed to set environment %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        ###########################
        # set the environment
        ###########################

        response.update_status('fetching data from archive', 10)

        try:
            if model == 'NCEP':
                getlevel = False
                if 'z' in var:
                    level = var.strip('z')
                    conform_units_to = None
                else:
                    level = None
                    conform_units_to = 'hPa'
            elif '20CRV2' in model:
                getlevel = False
                if 'z' in var:
                    level = var.strip('z')
                    conform_units_to = None
                else:
                    level = None
                    conform_units_to = 'hPa'
            else:
                LOGGER.exception('Reanalyses dataset not known')
            LOGGER.info('environment set for model: %s' % model)
        except Exception:
            msg = 'failed to set environment'
            LOGGER.exception(msg)
            raise Exception(msg)

        ##########################################
        # fetch Data from original data archive
        ##########################################

        # NOTE: If ref is say 1950 - 1990, and sim is just 1 week in 2017 - ALL the data will be downloaded, 1950 - 2017
        try:
            model_nc = rl(start=start.year,
                          end=end.year,
                          dataset=model,
                          variable=var,
                          timres=timres,
                          getlevel=getlevel)
            LOGGER.info('reanalyses data fetched')
        except Exception:
            msg = 'failed to get reanalyses data'
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('subsetting region of interest', 17)
        # from flyingpigeon.weatherregimes import get_level
        LOGGER.debug("start and end time: %s - %s" % (start, end))
        time_range = [start, end]

        # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb
        # So it makes sense, to operate it step-by-step
        # TODO: need to create dictionary for such datasets (for models as well)
        # TODO: benchmark the method bellow for NCEP z500 for 60 years

        #        if ('20CRV2' in model) and ('z' in var):
        if ('z' in var):
            tmp_total = []
            origvar = get_variable(model_nc)

            for z in model_nc:
                tmp_n = 'tmp_%s' % (uuid.uuid1())
                b0 = call(resource=z,
                          variable=origvar,
                          level_range=[int(level), int(level)],
                          geom=bbox,
                          spatial_wrapping='wrap',
                          prefix='levdom_' + os.path.basename(z)[0:-3])
                tmp_total.append(b0)

            tmp_total = sorted(
                tmp_total,
                key=lambda i: os.path.splitext(os.path.basename(i))[0])
            inter_subset_tmp = call(resource=tmp_total,
                                    variable=origvar,
                                    time_range=time_range)

            # Clean
            for i in tmp_total:
                tbr = 'rm -f %s' % (i)
                os.system(tbr)

                # Create new variable
            ds = Dataset(inter_subset_tmp, mode='a')
            z_var = ds.variables.pop(origvar)
            dims = z_var.dimensions
            new_var = ds.createVariable('z%s' % level,
                                        z_var.dtype,
                                        dimensions=(dims[0], dims[2], dims[3]))
            new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
            # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
            ds.close()
            model_subset_tmp = call(inter_subset_tmp, variable='z%s' % level)
        else:
            model_subset_tmp = call(
                resource=model_nc,
                variable=var,
                geom=bbox,
                spatial_wrapping='wrap',
                time_range=time_range,
                # conform_units_to=conform_units_to
            )

        # If dataset is 20CRV2 the 6 hourly file should be converted to daily.
        # Option to use previously 6h data from cache (if any) and not download daily files.

        if '20CRV2' in model:
            if timres == '6h':
                from cdo import Cdo

                cdo = Cdo()
                model_subset = '%s.nc' % uuid.uuid1()
                tmp_f = '%s.nc' % uuid.uuid1()

                cdo_op = getattr(cdo, 'daymean')
                cdo_op(input=model_subset_tmp, output=tmp_f)
                sti = '00:00:00'
                cdo_op = getattr(cdo, 'settime')
                cdo_op(sti, input=tmp_f, output=model_subset)
                LOGGER.debug('File Converted from: %s to daily' % (timres))
            else:
                model_subset = model_subset_tmp
        else:
            model_subset = model_subset_tmp

        LOGGER.info('Dataset subset done: %s ', model_subset)

        response.update_status('dataset subsetted', 19)

        # BLOCK OF DETRENDING of model_subset !
        # Original model subset kept to further visualisaion if needed
        # Now is issue with SLP:
        # TODO 1 Keep trend as separate file
        # TODO 2 Think how to add options to plot abomalies AND original data...
        #        May be do archive and simulation = call.. over NOT detrended data and keep it as well
        # TODO 3 Check with faster smoother add removing trend of each grid

        if detrend == 'None':
            orig_model_subset = model_subset
        else:
            orig_model_subset = remove_mean_trend(model_subset, varname=var)

        # ======================================

        ############################################################
        #  get the required bbox and time region from resource data
        ############################################################
        #
        #
        # try:
        #     if dataset == 'NCEP':
        #         if 'z' in var:
        #             variable = 'hgt'
        #             level = var.strip('z')
        #             # conform_units_to=None
        #         else:
        #             variable = 'slp'
        #             level = None
        #             # conform_units_to='hPa'
        #     elif '20CRV2' in var:
        #         if 'z' in level:
        #             variable = 'hgt'
        #             level = var.strip('z')
        #             # conform_units_to=None
        #         else:
        #             variable = 'prmsl'
        #             level = None
        #             # conform_units_to='hPa'
        #     else:
        #         LOGGER.exception('Reanalyses dataset not known')
        #     LOGGER.info('environment set')
        # except Exception as e:
        #     msg = 'failed to set environment %s ' % e
        #     LOGGER.exception(msg)
        #     # raise Exception(msg)
        #
        # LOGGER.debug("init took %s seconds.", time.time() - start_time)
        # response.update_status('Read in and convert the arguments done', 8)
        #
        # #################
        # # get input data
        # #################
        # start_time = time.time()  # measure get_input_data ...
        # response.update_status('fetching input data', 7)
        # try:
        #     input = reanalyses(start=start.year, end=end.year,
        #                        variable=var, dataset=dataset)
        #     LOGGER.info('input files %s' % input)
        #     nc_subset = call(resource=input, variable=var,
        #                      geom=bbox, spatial_wrapping='wrap')
        # except Exception as e:
        #     msg = 'failed to fetch or subset input files %s' % e
        #     LOGGER.exception(msg)
        #     # raise Exception(msg)

        LOGGER.debug("get_input_subset_dataset took %s seconds.",
                     time.time() - start_time)
        response.update_status('**** Input data fetched', 20)

        ########################
        # input data preperation
        ########################
        response.update_status('Start preparing input data', 22)
        start_time = time.time()  # measure data preperation ...

        try:
            # Construct descriptive filenames for the three files
            # listed in config file
            # TODO check strftime for years <1900 (!)

            refDatesString = dt.strftime(
                refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d')
            simDatesString = dt.strftime(
                dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d')
            archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                                % (bbox[0], bbox[2], bbox[1], bbox[3])
            simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                            % (bbox[0], bbox[2], bbox[1], bbox[3])
            archive = call(resource=model_subset,
                           time_range=[refSt, refEn],
                           prefix=archiveNameString)
            simulation = call(resource=model_subset,
                              time_range=[dateSt, dateEn],
                              prefix=simNameString)
            LOGGER.info('archive and simulation files generated: %s, %s' %
                        (archive, simulation))
        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            if seacyc is True:
                LOGGER.info('normalization function with method: %s ' %
                            normalize)
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(
                    archive, simulation, method=normalize)
            else:
                seasoncyc_base = seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to generate normalization files %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        output_file = 'output.txt'
        files = [
            os.path.abspath(archive),
            os.path.abspath(simulation), output_file
        ]
        LOGGER.debug("Data preperation took %s seconds.",
                     time.time() - start_time)

        ############################
        # generate the config file
        ############################
        config_file = analogs.get_configfile(
            files=files,
            seasoncyc_base=seasoncyc_base,
            seasoncyc_sim=seasoncyc_sim,
            base_id=model,
            sim_id=model,
            timewin=timewin,
            varname=var,
            seacyc=seacyc,
            cycsmooth=91,
            nanalog=nanalog,
            seasonwin=seasonwin,
            distfun=distance,
            outformat=outformat,
            calccor=True,
            silent=False,
            period=[
                dt.strftime(refSt, '%Y-%m-%d'),
                dt.strftime(refEn, '%Y-%m-%d')
            ],
            bbox="{0[0]},{0[2]},{0[1]},{0[3]}".format(bbox))
        response.update_status('generated config file', 25)
        #######################
        # CASTf90 call
        #######################
        start_time = time.time()  # measure call castf90

        # -----------------------
        try:
            import ctypes
            # TODO: This lib is for linux
            mkl_rt = ctypes.CDLL('libmkl_rt.so')
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('Current number of threads: %s' % (nth))
            mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('NEW number of threads: %s' % (nth))
            # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
            os.environ['MKL_NUM_THREADS'] = str(nth)
            os.environ['OMP_NUM_THREADS'] = str(nth)
        except Exception as e:
            msg = 'Failed to set THREADS %s ' % e
            LOGGER.debug(msg)
        # -----------------------

        response.update_status('Start CASTf90 call', 30)
        try:
            # response.update_status('execution of CASTf90', 50)
            cmd = ['analogue.out', config_file]
            LOGGER.debug("castf90 command: %s", cmd)
            output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
            LOGGER.info('analogue output:\n %s', output)
            response.update_status('**** CASTf90 suceeded', 70)
        except CalledProcessError as e:
            msg = 'CASTf90 failed:\n{0}'.format(e.output)
            LOGGER.exception(msg)
            raise Exception(msg)
        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)

        # TODO: Add try - except for pdfs
        analogs_pdf = analogs.plot_analogs(configfile=config_file)
        response.update_status('preparing output', 75)
        # response.outputs['config'].storage = FileStorage()
        response.outputs['analog_pdf'].file = analogs_pdf
        response.outputs['config'].file = config_file
        response.outputs['analogs'].file = output_file
        response.outputs['output_netcdf'].file = simulation
        response.outputs['target_netcdf'].file = archive

        if seacyc is True:
            response.outputs['base_netcdf'].file = seasoncyc_base
            response.outputs['sim_netcdf'].file = seasoncyc_sim
        else:
            # TODO: Still unclear how to overpass unknown number of outputs
            dummy_base = 'dummy_base.nc'
            dummy_sim = 'dummy_sim.nc'
            with open(dummy_base, 'a'):
                os.utime(dummy_base, None)
            with open(dummy_sim, 'a'):
                os.utime(dummy_sim, None)
            response.outputs['base_netcdf'].file = dummy_base
            response.outputs['sim_netcdf'].file = dummy_sim

        ########################
        # generate analog viewer
        ########################

        formated_analogs_file = analogs.reformat_analogs(output_file)
        # response.outputs['formated_analogs'].storage = FileStorage()
        response.outputs['formated_analogs'].file = formated_analogs_file
        LOGGER.info('analogs reformated')
        response.update_status('reformatted analog file', 80)

        viewer_html = analogs.render_viewer(
            # configfile=response.outputs['config'].get_url(),
            configfile=config_file,
            # datafile=response.outputs['formated_analogs'].get_url())
            datafile=formated_analogs_file)
        response.outputs['output'].file = viewer_html
        response.update_status('Successfully generated analogs viewer', 90)
        LOGGER.info('rendered pages: %s ', viewer_html)

        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response
Ejemplo n.º 22
0
def calc_indice_simple(resource=[], variable=None, prefix=None,indices=None,
    polygons=None, mosaik = False, groupings='yr', dir_output=None, dimension_map = None, memory_limit=None):
    """
    Calculates given simple indices for suitable files in the appopriate time grouping and polygon.

    :param resource: list of filenames in drs convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: list of indices (default ='SU')
    :param polygons: list of polgons (default ='FRA')
    :param grouping: indices time aggregation (default='yr')
    :param out_dir: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into out_dir
    """
    from os.path import join, dirname, exists
    from flyingpigeon import ocgis_module
    from flyingpigeon.subset import clipping
    import uuid

    #DIR_SHP = config.shapefiles_dir()
    #env.DIR_SHPCABINET = DIR_SHP
    #env.OVERWRITE = True

    if type(resource) != list: 
      resource = list([resource])
    if type(indices) != list: 
      indices = list([indices])
    if type(polygons) != list and polygons != None:
      polygons = list([polygons])
    if type(groupings) != list:
      groupings = list([groupings])
    
    if dir_output != None:
      if not exists(dir_output): 
        makedirs(dir_output)
    
    #from flyingpigeon.subset import select_ugid
    #    tile_dim = 25
    output = None


    experiments = sort_by_filename(resource)
    outputs = []
    
    for key in experiments:
      if variable == None: 
        variable = get_variable(experiments[key][0])
        #variable = key.split('_')[0]
      try: 
        
        if variable == 'pr': 
          calc = 'pr=pr*86400'
          ncs = ocgis_module.call(resource=experiments[key],
                     variable=variable,
                     dimension_map=dimension_map, 
                     calc=calc,
                     memory_limit=memory_limit,
                     #alc_grouping= calc_group, 
                     prefix=str(uuid.uuid4()), 
                     dir_output=dir_output,
                     output_format='nc')

        else:
          
          ncs = experiments[key]         
        for indice in indices:
          logger.info('indice: %s' % indice)
          try: 
            calc = [{'func' : 'icclim_' + indice, 'name' : indice}]
            logger.info('calc: %s' % calc)
            for grouping in groupings:
              logger.info('grouping: %s' % grouping)
              try:
                calc_group = calc_grouping(grouping)
                logger.info('calc_group: %s' % calc_group)
                if polygons == None:
                  try:
                    if prefix == None:   
                      prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping )
                    tmp = ocgis_module.call(resource=ncs,
                     variable=variable,
                     dimension_map=dimension_map, 
                     calc=calc,
                     calc_grouping= calc_group, 
                     prefix=prefix, 
                     dir_output=dir_output,
                     output_format='nc')
                    outputs.extend( [tmp] )
                  except Exception as e:
                    msg = 'could not calc indice %s for domain in %s' %( indice, key)
                    logger.exception( msg )
                    raise Exception(msg)   
                else:
                  try:
                    if prefix == None:   
                      prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping )
                    tmp = clipping(resource=ncs,
                     variable=variable,
                     dimension_map=dimension_map, 
                     calc=calc,
                     calc_grouping= calc_group, 
                     prefix=prefix, 
                     polygons=polygons,
                     mosaik=mosaik,
                     dir_output=dir_output,
                     output_format='nc')
                    outputs.extend( [tmp] )
                  except Exception as e:
                    msg = 'could not calc indice %s for domain in %s' %( indice, key)
                    logger.exception( msg )
                    raise Exception(msg)
                logger.info('indice file calculated')      
              except Exception as e:
                msg = 'could not calc indice %s for key %s and grouping %s' %  (indice, key, grouping)
                logger.exception(msg)
                raise Exception(msg)  
          except Exception as e:
            msg = 'could not calc indice %s for key %s' % ( indice, key)
            logger.exception(msg)
            raise Exception(msg)        
      except Exception as e:
        msg = 'could not calc key %s' % key
        logger.exception(msg)
        raise Exception(msg)
    return outputs
Ejemplo n.º 23
0
def calc_indice_percentile(resources=[], variable=None, 
    prefix=None, indices='TG90p', refperiod=None,
    groupings='yr', polygons=None, percentile=90, mosaik = False, 
    dir_output=None, dimension_map = None):
    """
    Calculates given indices for suitable files in the appopriate time grouping and polygon.

    :param resource: list of filenames in drs convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: list of indices (default ='TG90p')
    :param prefix: filename prefix 
    :param refperiod: reference refperiod touple = (start,end)
    :param grouping: indices time aggregation (default='yr')
    :param dir_output: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into out_dir
    """
    from os.path import join, dirname, exists
    from os import remove
    import uuid
    from numpy import ma 
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon.subset import clipping
    from flyingpigeon.utils import get_values, get_time
    
    if type(resources) != list: 
      resources = list([resources])
    if type(indices) != list: 
      indices = list([indices])
      
    if type(groupings) != list: 
      groupings = list([groupings])
      
    if type(refperiod) == list: 
      refperiod = refperiod[0]
      
    if refperiod != None:
      start = dt.strptime(refperiod.split('-')[0] , '%Y%m%d')
      end = dt.strptime(refperiod.split('-')[1] , '%Y%m%d')
      time_range = [start, end]
    else:  
      time_range = None
    
    if dir_output != None:
      if not exists(dir_output): 
        makedirs(dir_output)
    
    ########################################################################################################################
    # Compute a custom percentile basis using ICCLIM. ######################################################################
    ########################################################################################################################

    from ocgis.contrib import library_icclim  as lic 
    nc_indices = []
    nc_dic = sort_by_filename(resources)
    
    for grouping in groupings:
      calc_group = calc_grouping(grouping)
      for key in nc_dic.keys():
        resource = nc_dic[key]
        if variable == None: 
          variable = get_variable(resource)
        if polygons == None:
          nc_reference = call(resource=resource, 
            prefix=str(uuid.uuid4()), 
            time_range=time_range,
            output_format='nc', 
            dir_output=dir_output)
        else:
          nc_reference = clipping(resource=resource, 
            prefix=str(uuid.uuid4()),
            time_range=time_range, 
            output_format='nc', 
            polygons=polygons,
            dir_output=dir_output, 
            mosaik = mosaik)
          
        arr = get_values(nc_files=nc_reference)
        dt_arr = get_time(nc_files=nc_reference)
        arr = ma.masked_array(arr)
        dt_arr = ma.masked_array(dt_arr)
        percentile = percentile
        window_width = 5
        
        for indice in indices:
          name = indice.replace('_', str(percentile))
          var = indice.split('_')[0]

          operation = None
          if 'T' in var: 
            if percentile >= 50: 
              operation = 'Icclim%s90p' % var
              func = 'icclim_%s90p' % var # icclim_TG90p
            else: 
              operation = 'Icclim%s10p' % var
              func = 'icclim_%s10p' % var
              
          ################################
          # load the appropriate operation
          ################################

          ops = [op for op in dir(lic) if operation in op]
          if len(ops) == 0:
              raise Exception("operator does not exist %s", operation)
          
          exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0]
          calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}]
          
          if polygons == None:
            nc_indices.append(call(resource=resource, 
                                prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), 
                                calc=calc, 
                                calc_grouping=calc_group, 
                                output_format='nc',
                                dir_output=dir_output))
          else: 
            nc_indices.extend(clipping(resource=resource, 
                                prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), 
                                calc=calc, 
                                calc_grouping=calc_group, 
                                output_format='nc',
                                dir_output=dir_output,
                                polygons=polygons, 
                                mosaik = mosaik,
                                ))
    return nc_indices
    def execute(self):

        init_process_logger('log.txt')
        self.output_log.setValue('log.txt')

        logger.info('Start process')
        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp

        self.status.set('execution started at : %s ' % dt.now(), 5)

        ################################
        # reading in the input arguments
        ################################
        try:
            logger.info('read in the arguments')
            # resources = self.getInputValues(identifier='resources')
            season = self.getInputValues(identifier='season')[0]
            bbox_obj = self.BBox.getValue()
            model_var = self.getInputValues(identifier='reanalyses')[0]
            period = self.getInputValues(identifier='period')[0]
            anualcycle = self.getInputValues(identifier='anualcycle')[0]
            model, variable = model_var.split('_')

            kappa = int(self.getInputValues(identifier='kappa')[0])
            logger.info('period %s' % str(period))
            logger.info('season %s' % str(season))
        except Exception as e:
            logger.debug('failed to read in the arguments %s ' % e)

        try:
            start = dt.strptime(period.split('-')[0], '%Y%m%d')
            end = dt.strptime(period.split('-')[1], '%Y%m%d')

            if bbox_obj is not None:
                logger.info("bbox_obj={0}".format(bbox_obj.coords))
                bbox = [
                    bbox_obj.coords[0][0], bbox_obj.coords[0][1],
                    bbox_obj.coords[1][0], bbox_obj.coords[1][1]
                ]
                logger.info("bbox={0}".format(bbox))
            else:
                bbox = None

        except Exception as e:
            logger.debug('failed to transform BBOXObject  %s ' % e)

        ###########################
        # set the environment
        ###########################

        self.status.set('fetching data from archive', 10)

        try:
            if model == 'NCEP':
                if 'z' in variable:
                    level = variable.strip('z')
                    conform_units_to = None
                else:
                    level = None
                    conform_units_to = 'hPa'
            elif '20CRV2' in model:
                if 'z' in variable:
                    level = variable.strip('z')
                    conform_units_to = None
                else:
                    level = None
                    conform_units_to = 'hPa'
            else:
                logger.error('Reanalyses dataset not known')
            logger.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            logger.error(msg)
            raise Exception(msg)

        ##########################################
        # fetch Data from original data archive
        ##########################################

        from flyingpigeon.datafetch import reanalyses as rl
        try:
            model_nc = rl(start=start.year,
                          end=end.year,
                          dataset=model,
                          variable=variable)
            logger.info('reanalyses data fetched')
        except Exception as e:
            msg = 'failed to get reanalyses data  %s' % e
            logger.debug(msg)
            raise Exception(msg)

        self.status.set('fetching data done', 15)
        ############################################################
        # get the required bbox and time region from resource data
        ############################################################

        self.status.set('subsetting region of interest', 17)
        # from flyingpigeon.weatherregimes import get_level
        from flyingpigeon.ocgis_module import call

        time_range = [start, end]
        model_subset = call(
            resource=model_nc,
            variable=variable,
            geom=bbox,
            spatial_wrapping='wrap',
            time_range=time_range,
            # conform_units_to=conform_units_to
        )
        logger.info('Dataset subset done: %s ' % model_subset)

        self.status.set('dataset subsetted', 19)
        ##############################################
        # computing anomalies
        ##############################################
        self.status.set('computing anomalies ', 19)

        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[0]
        reference = [
            dt.strptime(cycst, '%Y%m%d'),
            dt.strptime(cycen, '%Y%m%d')
        ]
        logger.debug('reference time: %s' % reference)
        model_anomal = wr.get_anomalies(model_subset, reference=reference)

        #####################
        # extracting season
        #####################
        self.status.set('normalizing data', 21)
        model_season = wr.get_season(model_anomal, season=season)

        self.status.set('anomalies computed and  normalized', 24)
        #######################
        # call the R scripts
        #######################
        self.status.set('Start weather regime clustering ', 25)
        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
            rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'weatherregimes_model.R'

            infile = model_season  # model_subset #model_ponderate
            modelname = model
            yr1 = start.year
            yr2 = end.year
            ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf')
            ip, file_pca = mkstemp(dir=curdir, suffix='.txt')
            ip, file_class = mkstemp(dir=curdir, suffix='.Rdat')

            args = [
                'Rscript',
                join(Rsrc, Rfile),
                '%s/' % curdir,
                '%s/' % Rsrc,
                '%s' % infile,
                '%s' % variable,
                '%s' % output_graphics,
                '%s' % file_pca,
                '%s' % file_class,
                '%s' % season,
                '%s' % start.year,
                '%s' % end.year,
                '%s' % model_var,
                '%s' % kappa
            ]
            logger.info('Rcall builded')
        except Exception as e:
            msg = 'failed to build the R command %s' % e
            logger.debug(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            logger.info('R outlog info:\n %s ' % output)
            logger.debug('R outlog errors:\n %s ' % error)
            if len(output) > 0:
                self.status.set('**** weatherregime in R suceeded', 90)
            else:
                logger.error('NO! output returned from R call')
        except Exception as e:
            msg = 'weatherregime in R %s ' % e
            logger.error(msg)
            raise Exception(msg)

        self.status.set('Weather regime clustering done ', 80)
        ############################################
        # set the outputs
        ############################################
        self.status.set('Set the process outputs ', 95)

        self.Routput_graphic.setValue(output_graphics)
        self.output_pca.setValue(file_pca)
        self.output_classification.setValue(file_class)
        self.output_netcdf.setValue(model_season)
Ejemplo n.º 25
0
def get_anomalies(nc_file, frac=0.2, reference=None, method='ocgis', sseas='serial', variable=None):
    """
    Anomalisation of data subsets for weather classification by subtracting a smoothed annual cycle

    :param nc_file: input netCDF file
    :param frac: Number between 0-1 for strength of smoothing
               (0 = close to the original data, 1 = flat line)
               default = 0.2
    :param reference: Period to calculate annual cycle

    :returns str: path to output netCDF file
    """
    from netCDF4 import Dataset

    if variable is None:
        variable = utils.get_variable(nc_file)
        # if more when 2 variables:
        if (variable.count(variable)==0):
            _ds=Dataset(nc_file)
            # Works only if we have one 3D variables
            for j in variable:
                if len(_ds.variables[j].dimensions)==3: _var=j
            variable=_var
            _ds.close()
    LOGGER.debug('3D Variable selected: %s'%(variable))

    try:
        if (method == 'cdo'):
            from cdo import Cdo
            from os import system

            ip2, nc_anual_cycle = mkstemp(dir='.', suffix='.nc')

            cdo = Cdo()
            #ip, nc_anual_cycle_tmp = mkstemp(dir='.', suffix='.nc')
            # TODO: if reference is none, use utils.get_time for nc_file to set the ref range
            #       But will need to fix 360_day issue (use get_time_nc from analogs)

            # com = 'seldate'
            # comcdo = 'cdo %s,%s-%s-%s,%s-%s-%s %s %s' % (com, reference[0].year, reference[0].month, reference[0].day,
            #                                              reference[1].year, reference[1].month, reference[1].day,
            #                                              nc_file, nc_anual_cycle_tmp)
            # LOGGER.debug('CDO: %s' % (comcdo))
            # system(comcdo)

            # Sub cdo with this trick... Cdo keeps the precision and anomalies are integers...
            calc = '%s=%s'%(variable, variable)
            nc_anual_cycle_tmp = call(nc_file, time_range=reference, variable=variable, calc=calc)
            nc_anual_cycle = cdo.ydaymean(input=nc_anual_cycle_tmp, output=nc_anual_cycle)
        else:
            calc = [{'func': 'mean', 'name': variable}]
            calc_grouping = calc_grouping = ['day', 'month']
            nc_anual_cycle = call(nc_file,
                                  calc=calc,
                                  calc_grouping=calc_grouping,
                                  variable=variable,
                                  time_range=reference)
        LOGGER.info('annual cycle calculated: %s' % (nc_anual_cycle))

    except Exception as e:
        msg = 'failed to calcualte annual cycle %s' % e
        LOGGER.error(msg)
        raise Exception(msg)

    try:
        # spline for smoothing
        #import statsmodels.api as sm
        #from numpy import tile, empty, linspace
        from cdo import Cdo
        cdo = Cdo()
        # variable = utils.get_variable(nc_file)
        ds = Dataset(nc_anual_cycle, mode='a')
        vals = ds.variables[variable]
        vals_sm = empty(vals.shape)
        ts = vals.shape[0]
        x = linspace(1, ts*3, num=ts*3, endpoint=True)

        if ('serial' not in sseas):
            # Multiprocessing =======================

            from multiprocessing import Pool
            pool = Pool()

            valex = [0.]
            valex = valex*vals.shape[1]*vals.shape[2]

            # TODO redo with reshape
            ind = 0
            for lat in range(vals.shape[1]):
                for lon in range(vals.shape[2]):
                    valex[ind] = vals[:, lat, lon]
                    ind += 1

            LOGGER.debug('Start smoothing with multiprocessing')
            # TODO fraction option frac=... is not used here
            tmp_sm = pool.map(_smooth, valex)
            pool.close()
            pool.join()

            # TODO redo with reshape
            ind=0
            for lat in range(vals.shape[1]):
                for lon in range(vals.shape[2]):
                    vals_sm[:, lat, lon] = tmp_sm[ind]
                    ind+=1
        else:
            # Serial ==================================
            vals_sm = empty(vals.shape)
            for lat in range(vals.shape[1]):
                for lon in range(vals.shape[2]):
                    try:
                        y = tile(vals[:, lat, lon], 3)
                        # ys = smooth(y, window_size=91, order=2, deriv=0, rate=1)[ts:ts*2]
                        ys = sm.nonparametric.lowess(y, x, frac=frac)[ts:ts*2, 1]
                        vals_sm[:, lat, lon] = ys
                    except:
                        msg = 'failed for lat %s lon %s' % (lat, lon)
                        LOGGER.exception(msg)
                        raise Exception(msg)
                LOGGER.debug('done for %s - %s ' % (lat, lon))

        vals[:, :, :] = vals_sm[:, :, :]
        ds.close()
        LOGGER.info('smothing of annual cycle done')
    except:
        msg = 'failed smothing of annual cycle'
        LOGGER.exception(msg)
        raise Exception(msg)
    try:
        ip, nc_anomal = mkstemp(dir='.', suffix='.nc')
        try:
            nc_anomal = cdo.sub(input=[nc_file, nc_anual_cycle], output=nc_anomal)
            LOGGER.info('cdo.sub; anomalisation done: %s ' % nc_anomal)
        except:
            # bug cdo: https://code.mpimet.mpg.de/boards/1/topics/3909
            ip3, nc_in1 = mkstemp(dir='.', suffix='.nc')
            ip4, nc_in2 = mkstemp(dir='.', suffix='.nc')
            ip5, nc_out = mkstemp(dir='.', suffix='.nc')
            nc_in1 = cdo.selvar(variable, input=nc_file, output=nc_in1)
            nc_in2 = cdo.selvar(variable, input=nc_anual_cycle, output=nc_in2)
            nc_out = cdo.sub(input=[nc_in1, nc_in2], output=nc_out)
            nc_anomal = nc_out
    except:
        msg = 'failed substraction of annual cycle'
        LOGGER.exception(msg)
        raise Exception(msg)
    return nc_anomal
    def execute(self):
        logger.info('Start process')
        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp
        
        ################################
        # reading in the input arguments
        ################################
        try: 
            resource = self.getInputValues(identifier='resource')
            url_Rdat = self.getInputValues(identifier='Rdat')[0]
            url_dat = self.getInputValues(identifier='dat')[0]
            url_ref_file = self.getInputValues(identifier='netCDF') # can be None
            season = self.getInputValues(identifier='season')[0]
            period = self.getInputValues(identifier='period')[0]            
            anualcycle = self.getInputValues(identifier='anualcycle')[0]
        except Exception as e: 
            logger.debug('failed to read in the arguments %s ' % e)
        
        try: 
            start = dt.strptime(period.split('-')[0] , '%Y%m%d')
            end = dt.strptime(period.split('-')[1] , '%Y%m%d')
            # kappa = int(self.getInputValues(identifier='kappa')[0])
            
            logger.info('period %s' % str(period))
            logger.info('season %s' % str(season))
            logger.info('read in the arguments')
            logger.info('url_ref_file: %s' % url_ref_file)
            logger.info('url_Rdat: %s' % url_Rdat)
            logger.info('url_dat: %s' % url_dat)
        except Exception as e: 
            logger.debug('failed to convert arguments %s ' % e)
           
        ############################
        # fetching trainging data 
        ############################
        
        from flyingpigeon.utils import download, get_time
        from os.path import abspath
        
        try:
          dat = abspath(download(url_dat))
          Rdat = abspath(download(url_Rdat))
          logger.info('training data fetched')
        except Exception as e:
          logger.error('failed to fetch training data %s' % e)
          
        ############################################################    
        ### get the required bbox and time region from resource data
        ############################################################        
        # from flyingpigeon.weatherregimes import get_level
        
        from flyingpigeon.ocgis_module import call 
        from flyingpigeon.utils import get_variable
        time_range = [start, end]

        variable = get_variable(resource)

        if len(url_ref_file) > 0:
            ref_file = download(url_ref_file[0])  
            model_subset = call(resource=resource, variable=variable, 
                time_range=time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                regrid_destination=ref_file, regrid_options='bil')
            logger.info('Dataset subset with regridding done: %s ' % model_subset)
        else:
            model_subset = call(resource=resource, variable=variable, 
                time_range=time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                )
            logger.info('Dataset time period extracted: %s ' % model_subset)
            
        
        ##############################################
        ### computing anomalies 
        ##############################################
        
        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[0]
        reference = [dt.strptime(cycst,'%Y%m%d'), dt.strptime(cycen,'%Y%m%d')]
        model_anomal = wr.get_anomalies(model_subset, reference=reference)

        #####################
        ### extracting season
        #####################
        model_season = wr.get_season(model_anomal, season=season)

        #######################
        ### call the R scripts
        #######################
        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
          rworkspace = curdir
          Rsrc = config.Rsrc_dir() 
          Rfile = 'weatherregimes_projection.R'
          
          yr1 = start.year
          yr2 = end.year
          time = get_time(model_season, format='%Y%m%d')

          #ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
          ip, file_pca = mkstemp(dir=curdir ,suffix='.txt')
          ip, file_class = mkstemp(dir=curdir ,suffix='.Rdat')
          ip, output_frec = mkstemp(dir=curdir ,suffix='.txt')
                    
          args = ['Rscript', join(Rsrc,Rfile), '%s/' % curdir, 
                  '%s/' % Rsrc, 
                  '%s' % model_season, 
                  '%s' % variable,
                  '%s' % str(time).strip("[]").replace("'","").replace(" ",""),
            #      '%s' % output_graphics,
                  '%s' % dat, 
                  '%s' % Rdat, 
                  '%s' % file_pca,
                  '%s' % file_class, 
                  '%s' % output_frec,      
                  '%s' % season, 
                  '%s' % start.year, 
                  '%s' % end.year,                  
                  '%s' % 'MODEL']

          logger.info('Rcall builded')
        except Exception as e: 
          msg = 'failed to build the R command %s' % e
          logger.error(msg)  
          raise Exception(msg)
        try:
          output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() #, shell=True
          logger.info('R outlog info:\n %s ' % output)
          logger.debug('R outlog errors:\n %s ' % error)
          if len(output) > 0:            
            self.status.set('**** weatherregime in R suceeded', 90)
          else:
            logger.error('NO! output returned from R call')
        except Exception as e: 
          msg = 'weatherregime in R %s ' % e
          logger.error(msg)  
          raise Exception(msg)

        ############################################
        ### set the outputs
        ############################################

        #self.Routput_graphic.setValue( output_graphics )
        self.output_pca.setValue( file_pca )
        self.output_classification.setValue( file_class )
        self.output_netcdf.setValue( model_season )
        self.output_frequency.setValue( output_frec )
Ejemplo n.º 27
0
    def execute(self):
        init_process_logger('log.txt')
        self.output_log.setValue('log.txt')

        import time  # performance test
        process_start_time = time.time()  # measure process execution time ...

        from os import path
        from tempfile import mkstemp
        from datetime import datetime as dt

        from flyingpigeon.ocgis_module import call
        from flyingpigeon import analogs
        from flyingpigeon.datafetch import reanalyses

        self.status.set('execution started at : %s ' % dt.now(), 5)
        start_time = time.time()  # measure init ...

        #######################
        # read input parameters
        #######################

        try:
            self.status.set('read input parameter : %s ' % dt.now(), 5)
            refSt = self.getInputValues(identifier='refSt')
            refEn = self.getInputValues(identifier='refEn')
            dateSt = self.getInputValues(identifier='dateSt')
            dateEn = self.getInputValues(identifier='dateEn')
            seasonwin = int(self.getInputValues(identifier='seasonwin')[0])
            nanalog = int(self.getInputValues(identifier='nanalog')[0])
            bbox_obj = self.BBox.getValue()
            normalize = self.getInputValues(identifier='normalize')[0]
            distance = self.getInputValues(identifier='dist')[0]
            outformat = self.getInputValues(identifier='outformat')[0]
            timewin = int(self.getInputValues(identifier='timewin')[0])
            experiment = self.getInputValues(identifier='experiment')[0]
            logger.info('input parameters set')
            self.status.set('Read in and convert the arguments', 5)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            logger.error(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:
            self.status.set('Preparing enviroment converting arguments', 7)
            refSt = dt.strptime(refSt[0], '%Y-%m-%d')
            refEn = dt.strptime(refEn[0], '%Y-%m-%d')
            dateSt = dt.strptime(dateSt[0], '%Y-%m-%d')
            dateEn = dt.strptime(dateEn[0], '%Y-%m-%d')

            if normalize == 'None':
                seacyc = False
            else:
                seacyc = True

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                logger.error('output format not valid')

            start = min(refSt, dateSt)
            end = max(refEn, dateEn)

            if bbox_obj is not None:
                logger.info("bbox_obj={0}".format(bbox_obj.coords))
                bbox = [bbox_obj.coords[0][0],
                        bbox_obj.coords[0][1],
                        bbox_obj.coords[1][0],
                        bbox_obj.coords[1][1]]
                logger.info("bbox={0}".format(bbox))
            else:
                bbox = None

            # region = self.getInputValues(identifier='region')[0]
            # bbox = [float(b) for b in region.split(',')]
            dataset, var = experiment.split('_')
            logger.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            logger.error(msg)
            raise Exception(msg)

        try:
            if dataset == 'NCEP':
                if 'z' in var:
                    variable = 'hgt'
                    level = var.strip('z')
                    # conform_units_to=None
                else:
                    variable = 'slp'
                    level = None
                    # conform_units_to='hPa'
            elif '20CRV2' in var:
                if 'z' in level:
                    variable = 'hgt'
                    level = var.strip('z')
                    # conform_units_to=None
                else:
                    variable = 'prmsl'
                    level = None
                    # conform_units_to='hPa'
            else:
                logger.error('Reanalyses dataset not known')
            logger.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            logger.error(msg)
            raise Exception(msg)

        logger.debug("init took %s seconds.", time.time() - start_time)
        self.status.set('Read in and convert the arguments done', 8)

        #################
        # get input data
        #################
        start_time = time.time()  # measure get_input_data ...
        self.status.set('fetching input data', 7)
        try:
            input = reanalyses(start=start.year, end=end.year,
                               variable=var, dataset=dataset)
            logger.info('input files %s' % input)
            nc_subset = call(resource=input, variable=var,
                             geom=bbox, spatial_wrapping='wrap')
        except Exception as e:
            msg = 'failed to fetch or subset input files %s' % e
            logger.error(msg)
            raise Exception(msg)

        logger.debug("get_input_subset_dataset took %s seconds.",
                     time.time() - start_time)
        self.status.set('**** Input data fetched', 10)

        ########################
        # input data preperation
        ########################
        self.status.set('Start preparing input data', 12)
        start_time = time.time()  # measure data preperation ...

        try:
            # Construct descriptive filenames for the three files
            # listed in config file
            refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d')
            simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d')
            archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                                % (bbox[0], bbox[2], bbox[1], bbox[3])
            simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                            % (bbox[0], bbox[2], bbox[1], bbox[3])
            archive = call(resource=nc_subset,
                           time_range=[refSt, refEn],
                           prefix=archiveNameString)
            simulation = call(resource=nc_subset, time_range=[dateSt, dateEn],
                              prefix=simNameString)
            logger.info('archive and simulation files generated: %s, %s'
                        % (archive, simulation))
        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        try:
            if seacyc is True:
                logger.info('normalization function with method: %s '
                            % normalize)
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(
                    archive,
                    simulation,
                    method=normalize)
            else:
                seasoncyc_base = seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to generate normalization files %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        ip, output_file = mkstemp(dir='.', suffix='.txt')
        files = [path.abspath(archive), path.abspath(simulation), output_file]
        logger.debug("Data preperation took %s seconds.",
                     time.time() - start_time)

        ############################
        # generate the config file
        ############################
        self.status.set('writing config file', 15)
        start_time = time.time()  # measure write config ...
        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                timewin=timewin,
                varname=var,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                period=[dt.strftime(refSt, '%Y-%m-%d'),
                        dt.strftime(refEn, '%Y-%m-%d')],
                bbox="%s,%s,%s,%s" % (bbox[0],
                                      bbox[2],
                                      bbox[1],
                                      bbox[3]))
        except Exception as e:
            msg = 'failed to generate config file %s ' % e
            logger.debug(msg)
            raise Exception(msg)
        logger.debug("write_config took %s seconds.", time.time() - start_time)
        #######################
        # CASTf90 call
        #######################
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90

        self.status.set('Start CASTf90 call', 20)
        try:
            # self.status.set('execution of CASTf90', 50)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            # system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
                ).communicate()
            logger.info('analogue.out info:\n %s ' % output)
            logger.debug('analogue.out errors:\n %s ' % error)
            self.status.set('**** CASTf90 suceeded', 90)
        except Exception as e:
            msg = 'CASTf90 failed %s ' % e
            logger.error(msg)
            raise Exception(msg)
        logger.debug("castf90 took %s seconds.", time.time() - start_time)

        ########################
        # generate analog viewer
        ########################

        try:
            f = analogs.reformat_analogs(output_file)
            logger.info('analogs reformated')
            self.status.set('Successfully reformatted analog file', 50)

            # put config file into output folder
            config_output_path, config_output_url = analogs.copy_configfile(
                config_file
                )
            output_av = analogs.get_viewer(
                f,
                path.basename(config_output_path))
            logger.info('Viewer generated')
            self.status.set('Successfully generated analogs viewer', 90)
            logger.info('output_av: %s ' % output_av)
        except Exception as e:
            msg = 'Failed to reformat analogs file or generate viewer%s ' % e
            logger.debug(msg)

        self.status.set('preparting output', 99)
        self.config.setValue(config_output_url)  # config_file )
        self.analogs.setValue(output_file)
        self.output_netcdf.setValue(simulation)
        self.output_html.setValue(output_av)
        self.status.set('execution ended', 100)
        logger.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
Ejemplo n.º 28
0
def reanalyses(start=1948,
               end=None,
               variable='slp',
               dataset='NCEP',
               timres='day',
               getlevel=True):
    """
    Fetches the reanalysis data (NCEP, 20CR or ERA_20C) to local file system
    :param start: int for start year to fetch source data
    :param end: int for end year to fetch source data (if None, current year will be the end)
    :param variable: variable name (default='slp'), geopotential height is given as e.g. z700
    :param dataset: default='NCEP'
    :return list: list of path/files.nc
    """
    # used for NETCDF convertion
    from netCDF4 import Dataset
    from os import path, system
    from flyingpigeon.ocgis_module import call
    from shutil import move
    # used for NETCDF convertion

    try:
        from datetime import datetime as dt

        if end is None:
            end = dt.now().year
        obs_data = []

        if start is None:
            if dataset == 'NCEP':
                start = 1948
            if dataset == '20CR':
                start = 1851
        LOGGER.info('start / end date set')
    except:
        msg = "get_OBS module failed to get start end dates"
        LOGGER.exception(msg)
        raise Exception(msg)

    if 'z' in variable:
        level = variable.strip('z')
    else:
        level = None

    LOGGER.info('level: %s' % level)

    try:
        for year in range(start, end + 1):
            LOGGER.debug('fetching single file for %s year %s ' %
                         (dataset, year))
            try:
                if dataset == 'NCEP':
                    if variable == 'slp':
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/%s.%s.nc' % (
                            variable, year)  # noqa
                    if 'z' in variable:
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/hgt.%s.nc' % (
                            year)  # noqa
                elif dataset == '20CRV2':
                    if variable == 'prmsl':
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/monolevel/prmsl.%s.nc' % year  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/Dailies/monolevel/prmsl.%s.nc' % year  # noqa
                    if 'z' in variable:
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/pressure/hgt.%s.nc' % (
                                year)  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/Dailies/pressure/hgt.%s.nc' % (
                                year)  # noqa
                elif dataset == '20CRV2c':
                    if variable == 'prmsl':
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/monolevel/prmsl.%s.nc' % year  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/Dailies/monolevel/prmsl.%s.nc' % year  # noqa
                    if 'z' in variable:
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/pressure/hgt.%s.nc' % (
                                year)  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/Dailies/pressure/hgt.%s.nc' % (
                                year)  # noqa
                else:
                    LOGGER.debug('Dataset %s not known' % dataset)
                LOGGER.debug('url: %s' % url)
            except:
                msg = "could not set url"
                LOGGER.exception(msg)
            try:
                df = utils.download(url, cache=True)
                LOGGER.debug('single file fetched %s ' % year)
                # convert to NETCDF4_CLASSIC
                try:
                    ds = Dataset(df)
                    df_time = ds.variables['time']
                    # Here, need to check not just calendar, but that file is ncdf_classic already...
                    if (hasattr(df_time, 'calendar')) is False:
                        p, f = path.split(path.abspath(df))
                        LOGGER.debug("path = %s , file %s " % (p, f))
                        # May be an issue if several users are working at the same time
                        move(df, f)
                        conv = call(resource=f,
                                    output_format_options={
                                        'data_model': 'NETCDF4_CLASSIC'
                                    },
                                    dir_output=p,
                                    prefix=f.replace('.nc', ''))
                        obs_data.append(conv)
                        LOGGER.debug('file %s to NETCDF4_CLASSIC converted' %
                                     conv)
                        # Cleaning, could be 50gb... for each (!) user
                        # TODO Check how links work
                        cmdrm = 'rm -f %s' % (f)
                        system(cmdrm)
                    else:
                        obs_data.append(df)
                    ds.close()
                except:
                    LOGGER.exception('failed to convert into NETCDF4_CLASSIC')
            except:
                msg = "download failed on {0}.".format(url)
                LOGGER.exception(msg)
        LOGGER.info('Reanalyses data fetched for %s files' % len(obs_data))
    except:
        msg = "get reanalyses module failed to fetch data"
        LOGGER.exception(msg)
        raise Exception(msg)

    if (level is None) or (getlevel == False):
        data = obs_data
    else:
        LOGGER.info('get level: %s' % level)
        data = get_level(obs_data, level=level)
    return data
Ejemplo n.º 29
0
def reanalyses(start=1948, end=None, variable='slp', dataset='NCEP'):
    """
    Fetches the reanalysis data (NCEP, 20CR or ERA_20C) to local file system
    :param start: int for start year to fetch source data
    :param end: int for end year to fetch source data (if None, current year will be the end)
    :param variable: variable name (default='slp'), geopotential height is given as e.g. z700
    :param dataset: default='NCEP'
    :return list: list of path/files.nc
    """
    # used for NETCDF convertion
    from os import path
    from flyingpigeon.ocgis_module import call
    from shutil import move
    # used for NETCDF convertion

    try:
        from datetime import datetime as dt

        if end is None:
            end = dt.now().year
        obs_data = []

        if start is None:
            if dataset == 'NCEP':
                start = 1948
            if dataset == '20CR':
                start = 1851
        LOGGER.info('start / end date set')
    except:
        msg = "get_OBS module failed to get start end dates"
        LOGGER.exception(msg)
        raise Exception(msg)

    if 'z' in variable:
        level = variable.strip('z')
    else:
        level = None

    LOGGER.info('level: %s' % level)

    try:
        for year in range(start, end + 1):
            LOGGER.debug('fetching single file for %s year %s ' %
                         (dataset, year))
            try:
                if dataset == 'NCEP':
                    if variable == 'slp':
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/%s.%s.nc' % (
                            variable, year)  # noqa
                    if 'z' in variable:
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/hgt.%s.nc' % (
                            year)  # noqa
                elif dataset == '20CRV2':
                    if variable == 'prmsl':
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/monolevel/prmsl.%s.nc' % year  # noqa
                    if 'z' in variable:
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/pressure/hgt.%s.nc' % (
                            year)  # noqa
                elif dataset == '20CRV2c':
                    if variable == 'prmsl':
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/monolevel/prmsl.%s.nc' % year  # noqa
                    if 'z' in variable:
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/pressure/hgt.%s.nc' % (
                            year)  # noqa
                else:
                    LOGGER.debug('Dataset %s not known' % dataset)
                LOGGER.debug('url: %s' % url)
            except:
                msg = "could not set url"
                LOGGER.exception(msg)
            try:
                df = utils.download(url, cache=True)
                LOGGER.debug('single file fetched %s ' % year)
                # convert to NETCDF4_CLASSIC
                try:
                    p, f = path.split(path.abspath(df))
                    LOGGER.debug("path = %s , file %s " % (p, f))
                    move(df, f)
                    conv = call(resource=f,
                                output_format_options={
                                    'data_model': 'NETCDF4_CLASSIC'
                                },
                                dir_output=p,
                                prefix=f.replace('.nc', ''))
                    obs_data.append(conv)
                    LOGGER.debug('file %s to NETCDF4_CLASSIC converted' % conv)
                except:
                    LOGGER.exception('failed to convert into NETCDF4_CLASSIC')
            except:
                msg = "download failed on {0}.".format(url)
                LOGGER.exception(msg)
        LOGGER.info('Reanalyses data fetched for %s files' % len(obs_data))
    except:
        msg = "get reanalyses module failed to fetch data"
        LOGGER.exception(msg)
        raise Exception(msg)

    if level is None:
        data = obs_data
    else:
        LOGGER.info('get level: %s' % level)
        data = get_level(obs_data, level=level)
    return data
Ejemplo n.º 30
0
def get_gam(ncs_indices, coordinate):
  
  from netCDF4 import Dataset
  from os.path import basename
  from shapely.geometry import Point
  from numpy import squeeze, ravel, isnan, nan, array, reshape
  
  from flyingpigeon.utils import get_variable, get_values, unrotate_pole
  from flyingpigeon.ocgis_module import call 

  try:
    from rpy2.robjects.packages import importr
    import rpy2.robjects as ro
    import rpy2.robjects.numpy2ri
    
    rpy2.robjects.numpy2ri.activate()    
    base = importr("base")
    stats = importr("stats")
    mgcv = importr("mgcv")
    logger.info('rpy2 modules imported')
  except Exception as e: 
    msg = 'failed to import rpy2 modules %s' % e
    logger.debug(msg)
    raise Exception(msg)

  for i, ncs in enumerate(ncs_indices):
    # ocgis need unrotated coordinates to extract points
    # unrotate_pole writes lats lons into the file. 
    # ACHTUNG: will fail if the data is stored on a file system with no write permissions 
    try: 
      lats, lons = unrotate_pole(ncs, write_to_file=True)
      point = Point(float(coordinate[0]), float(coordinate[1]))
      # get the values
      variable = get_variable(ncs)
      agg = basename(ncs).split('_')[-2]
      indice = '%s_%s' % (variable, agg)
      timeseries = call(resource=ncs, geom=point, select_nearest=True)
      ts = Dataset(timeseries)
      vals = squeeze(ts.variables[variable][:])
      from numpy import min, max, mean, append, zeros, ones
      dif = max(vals) - min(vals)
      a = append(vals - dif ,vals)
      vals = append(a, vals+dif)
      
      if i == 0 :
        from numpy import zeros, ones
        a = append (zeros(len(vals)) , ones(len(vals)) )
        PA = append(a , zeros(len(vals)))
        data = {'PA': ro.FloatVector(PA)}
        data[str(indice)] = ro.FloatVector(vals)
        form = 'PA ~ '
        form = form + 's(%s, k=3)' % indice 
      else: 
        form = form + ' + s(%s, k=3)' % indice
        data[str(indice)] = ro.FloatVector(vals)

    except Exception as e: 
      msg = 'Failed to prepare data %s' % e
      logger.debug(msg)

  try: 
      
    logger.info(data)  
    dataf = ro.DataFrame(data)
    eq = ro.Formula(str(form))
    gam_model = mgcv.gam(base.eval(eq), data=dataf, family=stats.binomial(), scale=-1, na_action=stats.na_exclude) # 
    logger.info('GAM model trained')
  except Exception as e: 
    msg = 'Failed to generate GAM model %s' % e
    logger.debug(msg)
  
  # ### ###########################
  # # plot response curves
  # ### ###########################
  try: 
    from flyingpigeon.visualisation import concat_images
    from tempfile import mkstemp
    grdevices = importr('grDevices')
    graphicDev = importr('Cairo')
    infos = []    
    for i in range(1,len(ncs_indices)+1):
      
      ip, info =  mkstemp(dir='.',suffix='.png')
      #grdevices.png(filename=info)
      #graphicDev.CairoPDF(info, width = 7, height = 7, pointsize = 12)
      graphicDev.CairoPNG(info, width = 640 , height = 480, pointsize = 12) # 640, 480) #,  pointsize = 12  width = 30, height = 30,
      print 'file opened!'
      
      infos.append(info)
      #grdevices.png(filename=info)
            
      ylim = ro.IntVector([-6,6])
      trans = ro.r('function(x){exp(x)/(1+exp(x))}')
      mgcv.plot_gam(gam_model, trans=trans, shade='T',
                    col='black',select=i,ylab='Predicted Probability',rug=False ,
                    cex_lab = 1.4, cex_axis = 1.4, ) #
      print 'gam plotted ;-)'
      #ylim=ylim,  ,
      grdevices.dev_off()
      #graphicDev.dev_off()
      #graphicDev.Cairo_onSave( dev_cur(), onSave=True )
      
    print(' %s plots generated ' % len(infos))
    infos_concat = concat_images(infos, orientation='h')
  except Exception as e: 
    msg = 'Failed to plot statistical graphic %s' % e
    logger.debug(msg)
    raise Exception(msg)
    
  return  gam_model, infos_concat 
Ejemplo n.º 31
0
def calc_indice_percentile(resource=[], variable=None,
                           prefix=None, indices='TG90p', refperiod=None,
                           grouping='yr', polygons=None, percentile=90, mosaic=False,
                           dir_output=None, dimension_map=None):
    """
    Calculates given indices for suitable dataset in the appropriate time grouping and polygon.

    :param resource: list of filenames in data reference syntax (DRS) convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: string of indice (default ='TG90p')
    :param prefix: filename prefix
    :param refperiod: reference period  = [datetime,datetime]
    :param grouping: indices time aggregation (default='yr')
    :param dir_output: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: reference_file, indice_file
    """
    from os.path import join, dirname, exists
    from os import remove
    import uuid
    from numpy import ma
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon.subset import clipping
    from flyingpigeon.utils import get_values, get_time

    # TODO: see ticket https://github.com/bird-house/flyingpigeon/issues/200
    raise NotImplementedError('Sorry! Function is under construction.')

    if type(resource) != list:
        resource = list([resource])

    # if type(indices) != list:
    #     indices = list([indices])
    #
    # if type(groupings) != list:
    #     groupings = list([groupings])
    #
    # if type(refperiod) == list:
    #     refperiod = refperiod[0]
    #
    # if refperiod is not None:
    #     start = dt.strptime(refperiod.split('-')[0], '%Y%m%d')
    #     end = dt.strptime(refperiod.split('-')[1], '%Y%m%d')
    #     time_range = [start, end]
    # else:
    #     time_range = None

    ################################################
    # Compute a custom percentile basis using ICCLIM
    ################################################
    from ocgis.contrib import library_icclim as lic

    calc_group = calc_grouping(grouping)

    if variable is None:
        variable = get_variable(resource)

    if polygons is None:
        nc_reference = call(resource=resource,
                            prefix=str(uuid.uuid4()),
                            time_range=refperiod,
                            output_format='nc')
    else:
        nc_reference = clipping(resource=resource,
                                prefix=str(uuid.uuid4()),
                                time_range=refperiod,
                                output_format='nc',
                                polygons=polygons,
                                mosaic=mosaic)

    # arr = get_values(resource=nc_reference)
    # dt_arr = get_time(resource=nc_reference)
    # arr = ma.masked_array(arr)
    # dt_arr = ma.masked_array(dt_arr)
    # percentile = percentile
    # window_width = 5

    #     for indice in indices:
    #         name = indice.replace('_', str(percentile))
    #         var = indice.split('_')[0]
    #
    #         operation = None
    #         if 'T' in var:
    #             if percentile >= 50:
    #                 operation = 'Icclim%s90p' % var
    #                 func = 'icclim_%s90p' % var  # icclim_TG90p
    #             else:
    #                 operation = 'Icclim%s10p' % var
    #                 func = 'icclim_%s10p' % var
    #
    #             ################################
    #             # load the appropriate operation
    #             ################################
    #
    #             ops = [op for op in dir(lic) if operation in op]
    #             if len(ops) == 0:
    #                 raise Exception("operator does not exist %s", operation)
    #
    #             exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0]
    #             calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}]
    #
    #             if polygons is None:
    #                 nc_indices.extend(call(resource=resource,
    #                                        prefix=key.replace(variable, name).replace('_day_', '_%s_' % grouping),
    #                                        calc=calc,
    #                                        calc_grouping=calc_group,
    #                                        output_format='nc'))
    #             else:
    #                 nc_indices.extend(clipping(resource=resource,
    #                                            prefix=key.replace(variable, name).replace('_day_', '_%s_' % grouping),
    #                                            calc=calc,
    #                                            calc_grouping=calc_group,
    #                                            output_format='nc',
    #                                            polygons=polygons,
    #                                            mosaic=mosaic,
    #                                            ))
    # if len(nc_indices) is 0:
    #     LOGGER.debug('No indices are calculated')
    #     return None
    return nc_indices
Ejemplo n.º 32
0
  def execute(self):
    import time # performance test
    process_start_time = time.time() # measure process execution time ...
     
    from os import path
    from tempfile import mkstemp
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon import analogs
    from flyingpigeon.datafetch import reanalyses
    
    self.status.set('execution started at : %s '  % dt.now(),5)

    start_time = time.time() # measure init ...
    

    #######################
    ### read input parameters
    #######################

    try:
      self.status.set('read input parameter : %s '  % dt.now(),5) 
      refSt = self.getInputValues(identifier='refSt')
      refEn = self.getInputValues(identifier='refEn')
      dateSt = self.getInputValues(identifier='dateSt')
      dateEn = self.getInputValues(identifier='dateEn')
      seasonwin = int(self.getInputValues(identifier='seasonwin')[0])
      nanalog = int(self.getInputValues(identifier='nanalog')[0])
      bbox_obj = self.BBox.getValue()
      normalize = self.getInputValues(identifier='normalize')[0]
      distance = self.getInputValues(identifier='dist')[0]
      outformat = self.getInputValues(identifier='outformat')[0]
      timewin = int(self.getInputValues(identifier='timewin')[0])
      experiment = self.getInputValues(identifier='experiment')[0]      

      logger.info('input parameters set')
      self.status.set('Read in and convert the arguments', 5)
    except Exception as e: 
      msg = 'failed to read input prameter %s ' % e
      logger.error(msg)  
      raise Exception(msg)

    ######################################
    ### convert types and set environment
    ######################################
    try:
      self.status.set('Start preparing enviroment converting arguments', 7)
      refSt = dt.strptime(refSt[0],'%Y-%m-%d')
      refEn = dt.strptime(refEn[0],'%Y-%m-%d')
      dateSt = dt.strptime(dateSt[0],'%Y-%m-%d')
      dateEn = dt.strptime(dateEn[0],'%Y-%m-%d')
      
      if normalize == 'None': 
        seacyc = False
      else: 
        seacyc = True

      if outformat == 'ascii': 
        outformat = '.txt'
      elif outformat == 'netCDF':
        outformat = '.nc'
      else:
        logger.error('output format not valid')
      
      start = min( refSt, dateSt )
      end = max( refEn, dateEn )

      if bbox_obj is not None:
        logger.info("bbox_obj={0}".format(bbox_obj.coords))
        bbox = [bbox_obj.coords[0][0], bbox_obj.coords[0][1],bbox_obj.coords[1][0],bbox_obj.coords[1][1]]
        logger.info("bbox={0}".format(bbox))
      else:
        bbox=None

      # region = self.getInputValues(identifier='region')[0]
      # bbox = [float(b) for b in region.split(',')]
      dataset , var = experiment.split('_')
      logger.info('environment set')
    except Exception as e: 
      msg = 'failed to set environment %s ' % e
      logger.error(msg)  
      raise Exception(msg)

    try:            
      if dataset == 'NCEP': 
        if 'z' in var:
          variable='hgt'
          level=var.strip('z')
          #conform_units_to=None
        else:
          variable='slp'
          level=None
          #conform_units_to='hPa'
      elif '20CRV2' in var: 
        if 'z' in level:
          variable='hgt'
          level=var.strip('z')
          #conform_units_to=None
        else:
          variable='prmsl'
          level=None
          #conform_units_to='hPa'
      else:
        logger.error('Reanalyses dataset not known')          
      logger.info('environment set')
    except Exception as e: 
      msg = 'failed to set environment %s ' % e
      logger.error(msg)  
      raise Exception(msg)

    logger.debug("init took %s seconds.", time.time() - start_time)
    self.status.set('Read in and convert the arguments done', 8)

    #################
    # get input data
    #################

    start_time = time.time()  # measure get_input_data ...

    self.status.set('fetching input data', 7)
    try:
      input = reanalyses(start = start.year, end = end.year, variable=var, dataset=dataset)
      logger.info('input files %s' % input)
      nc_subset = call(resource=input, variable=var, geom=bbox, spatial_wrapping='wrap')
    except Exception as e :
      msg = 'failed to fetch or subset input files %s' % e
      logger.error(msg)
      raise Exception(msg)
    logger.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time)
    
    self.status.set('**** Input data fetched', 10)
    
    ########################
    # input data preperation 
    ########################
    self.status.set('Start preparing input data', 12)
    start_time = time.time()  # measure data preperation ...
    
    try:
      #Construct descriptive filenames for the three files listed in config file
      refDatesString = dt.strftime(refSt,'%Y-%m-%d') + "_" + dt.strftime(refEn,'%Y-%m-%d')
      simDatesString = dt.strftime(dateSt,'%Y-%m-%d') + "_" + dt.strftime(dateEn,'%Y-%m-%d')
      archiveNameString = "base_" + var +"_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' % (bbox[0], bbox[2], bbox[1], bbox[3])
      simNameString = "sim_" + var +"_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' % (bbox[0], bbox[2], bbox[1], bbox[3])

      archive = call(resource=nc_subset, time_range=[refSt , refEn], prefix=archiveNameString) 
      simulation = call(resource=nc_subset, time_range=[dateSt , dateEn], prefix=simNameString)
      logger.info('archive and simulation files generated: %s, %s' % (archive, simulation))

    except Exception as e:
      msg = 'failed to prepare archive and simulation files %s ' % e
      logger.debug(msg)
      raise Exception(msg)

    try:  
      if seacyc == True:
        logger.info('normalization function with method: %s ' % normalize)
        seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize)
      else:
        seasoncyc_base =  seasoncyc_sim = None
    except Exception as e:
      msg = 'failed to generate normalization files %s ' % e
      logger.debug(msg)
      raise Exception(msg)
      
    ip, output_file = mkstemp(dir='.',suffix='.txt')
    
# =======
#     #Create an empty config with with random name  
#     ip, output = mkstemp(dir='.', suffix='.txt')

#     #Rename random name of config file to more descriptive string
#     import os
#     anlgname = "ana_" + var + "_" + distance + "_sim_" + simDatesString + "_ref_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f_seasonwin%ddays_%danalogs.txt' % (bbox[0], bbox[2], bbox[1], bbox[3], seasonwin, nanalog) #+ seasonwin 
#     os.rename(output,anlgname)

#     #Put config file in temporary working dir
#     tmppath = os.path.dirname(output)
#     output_file = os.path.join(tmppath, anlgname)
  
#     #Put all three files with their paths in array
# >>>>>>> analogs detn gives descriptive names to files in config file
    files=[path.abspath(archive), path.abspath(simulation), output_file]

    logger.debug("Data preperation took %s seconds.", time.time() - start_time)

    ############################
    # generate the config file
    ############################
    
    self.status.set('writing config file', 15)
    start_time = time.time() # measure write config ...
    try:  
      config_file = analogs.get_configfile(
        files=files,
        seasoncyc_base = seasoncyc_base,  
        seasoncyc_sim=seasoncyc_sim,
        timewin=timewin, 
        varname=var, 
        seacyc=seacyc, 
        cycsmooth=91, 
        nanalog=nanalog, 
        seasonwin=seasonwin, 
        distfun=distance,
        outformat=outformat,
        calccor=True,
        silent=False, 
        period=[dt.strftime(refSt,'%Y-%m-%d'),dt.strftime(refEn,'%Y-%m-%d')], 
        bbox="%s,%s,%s,%s" % (bbox[0],bbox[2],bbox[1],bbox[3]))
    except Exception as e:
      msg = 'failed to generate config file %s ' % e
      logger.debug(msg)
      raise Exception(msg)

    logger.debug("write_config took %s seconds.", time.time() - start_time)
      
    #######################
    # CASTf90 call 
    #######################
    import subprocess
    import shlex

    start_time = time.time() # measure call castf90
    
    self.status.set('Start CASTf90 call', 20)
    try:
      #self.status.set('execution of CASTf90', 50)
      cmd = 'analogue.out %s' % path.relpath(config_file)
      #system(cmd)
      args = shlex.split(cmd)
      output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate()
      logger.info('analogue.out info:\n %s ' % output)
      logger.debug('analogue.out errors:\n %s ' % error)
      self.status.set('**** CASTf90 suceeded', 90)
    except Exception as e: 
      msg = 'CASTf90 failed %s ' % e
      logger.error(msg)  
      raise Exception(msg)

    logger.debug("castf90 took %s seconds.", time.time() - start_time)


    ########################
    # generate analog viewer
    ########################

    try:
      f = analogs.reformat_analogs(output_file)
      logger.info('analogs reformated')
      self.status.set('Successfully reformatted analog file', 50)

      # put config file into output folder

      config_output_path, config_output_url = analogs.copy_configfile(config_file)
      
      output_av = analogs.get_viewer(f, path.basename(config_output_path))
      logger.info('Viewer generated')
      self.status.set('Successfully generated analogs viewer', 90)

      logger.info('output_av: %s ' % output_av)

    except Exception as e:

      msg = 'Failed to reformat analogs file or generate viewer%s ' % e
      logger.debug(msg)

    self.status.set('preparting output', 99)
    self.config.setValue( config_output_url ) #config_file )
    self.analogs.setValue( output_file )
    self.output_netcdf.setValue( simulation )
    self.output_html.setValue( output_av )

    self.status.set('execution ended', 100)
    logger.debug("total execution took %s seconds.", time.time() - process_start_time)
Ejemplo n.º 33
0
def calc_indice_percentile(resources=[], variable=None, 
    prefix=None, indices='TG90p', refperiod=None,
    groupings='yr', polygons=None, percentile=90, mosaic = False, 
    dir_output=None, dimension_map = None):
    """
    Calculates given indices for suitable files in the appropriate time grouping and polygon.

    :param resource: list of filenames in data reference syntax (DRS) convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: list of indices (default ='TG90p')
    :param prefix: filename prefix 
    :param refperiod: reference period tuple = (start,end)
    :param grouping: indices time aggregation (default='yr')
    :param dir_output: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into out_dir.
    """
    from os.path import join, dirname, exists
    from os import remove
    import uuid
    from numpy import ma 
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon.subset import clipping
    from flyingpigeon.utils import get_values, get_time
    
    if type(resources) != list: 
      resources = list([resources])
    if type(indices) != list: 
      indices = list([indices])
      
    if type(groupings) != list: 
      groupings = list([groupings])
      
    if type(refperiod) == list: 
      refperiod = refperiod[0]
      
    if refperiod != None:
      start = dt.strptime(refperiod.split('-')[0] , '%Y%m%d')
      end = dt.strptime(refperiod.split('-')[1] , '%Y%m%d')
      time_range = [start, end]
    else:  
      time_range = None
    
    if dir_output != None:
      if not exists(dir_output): 
        makedirs(dir_output)
    
    ########################################################################################################################
    # Compute a custom percentile basis using ICCLIM. ######################################################################
    ########################################################################################################################

    from ocgis.contrib import library_icclim  as lic 
    nc_indices = []
    nc_dic = sort_by_filename(resources)
    
    for grouping in groupings:
      calc_group = calc_grouping(grouping)
      for key in nc_dic.keys():
        resource = nc_dic[key]
        if variable == None: 
          variable = get_variable(resource)
        if polygons == None:
          nc_reference = call(resource=resource, 
            prefix=str(uuid.uuid4()), 
            time_range=time_range,
            output_format='nc', 
            dir_output=dir_output)
        else:
          nc_reference = clipping(resource=resource, 
            prefix=str(uuid.uuid4()),
            time_range=time_range, 
            output_format='nc', 
            polygons=polygons,
            dir_output=dir_output, 
            mosaic = mosaic)
          
        arr = get_values(resource=nc_reference)
        dt_arr = get_time(resource=nc_reference)
        arr = ma.masked_array(arr)
        dt_arr = ma.masked_array(dt_arr)
        percentile = percentile
        window_width = 5
        
        for indice in indices:
          name = indice.replace('_', str(percentile))
          var = indice.split('_')[0]

          operation = None
          if 'T' in var: 
            if percentile >= 50: 
              operation = 'Icclim%s90p' % var
              func = 'icclim_%s90p' % var # icclim_TG90p
            else: 
              operation = 'Icclim%s10p' % var
              func = 'icclim_%s10p' % var
              
          ################################
          # load the appropriate operation
          ################################

          ops = [op for op in dir(lic) if operation in op]
          if len(ops) == 0:
              raise Exception("operator does not exist %s", operation)
          
          exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[0]
          calc = [{'func': func, 'name': name, 'kwds': {'percentile_dict': percentile_dict}}]
          
          if polygons == None:
            nc_indices.append(call(resource=resource, 
                                prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), 
                                calc=calc, 
                                calc_grouping=calc_group, 
                                output_format='nc',
                                dir_output=dir_output))
          else: 
            nc_indices.extend(clipping(resource=resource, 
                                prefix=key.replace(variable,name).replace('_day_', '_%s_' % grouping), 
                                calc=calc, 
                                calc_grouping=calc_group, 
                                output_format='nc',
                                dir_output=dir_output,
                                polygons=polygons, 
                                mosaic = mosaic,
                                ))
    return nc_indices

#def calc_indice_unconventional(resource=[], variable=None, prefix=None,
  #indices=None, polygons=None,  groupings=None, 
  #dir_output=None, dimension_map = None):
    #"""
    #Calculates given indices for suitable files in the appropriate time grouping and polygon.

    #:param resource: list of filenames in data reference syntax (DRS) convention (netcdf)
    #:param variable: variable name to be selected in the in netcdf file (default=None)
    #:param indices: list of indices (default ='TGx')
    #:param polygons: list of polygons (default =None)
    #:param grouping: indices time aggregation (default='yr')
    #:param out_dir: output directory for result file (netcdf)
    #:param dimension_map: optional dimension map if different to standard (default=None)

    #:return: list of netcdf files with calculated indices. Files are saved into dir_output
    #"""
    
    #from os.path import join, dirname, exists
    #from os import remove
    #import uuid
    #from flyingpigeon import ocgis_module
    #from flyingpigeon.subset import get_ugid, get_geom

    #if type(resource) != list: 
      #resource = list([resource])
    #if type(indices) != list: 
      #indices = list([indices])
    #if type(polygons) != list and polygons != None:
      #polygons = list([polygons])
    #elif polygons == None:
      #polygons = [None]
    #else: 
      #logger.error('Polygons not found')
    #if type(groupings) != list:
      #groupings = list([groupings])
    
    #if dir_output != None:
      #if not exists(dir_output): 
        #makedirs(dir_output)
    
    #experiments = sort_by_filename(resource)
    #outputs = []

    #print('environment for calc_indice_unconventional set')
    #logger.info('environment for calc_indice_unconventional set')
    
    #for key in experiments:
      #if variable == None:
        #variable = get_variable(experiments[key][0])
      #try: 
        #ncs = experiments[key]
        #for indice in indices:
          #logger.info('indice: %s' % indice)
          #try: 
            #for grouping in groupings:
              #logger.info('grouping: %s' % grouping)
              #try:
                #calc_group = calc_grouping(grouping)
                #logger.info('calc_group: %s' % calc_group)
                #for polygon in polygons:  
                  #try:
                    #domain = key.split('_')[1].split('-')[0]
                    #if polygon == None:
                      #if prefix == None: 
                        #prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping )
                      #geom = None
                      #ugid = None
                    #else:
                      #if prefix == None: 
                        #prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ).replace(domain,polygon)
                      #geom = get_geom(polygon=polygon)
                      #ugid = get_ugid(polygons=polygon, geom=geom)
                    #if indice == 'TGx':
                      #calc=[{'func': 'max', 'name': 'TGx'}]
                      #tmp = ocgis_module.call(resource=ncs,# conform_units_to='celcius',
                                              #variable=variable, dimension_map=dimension_map, 
                                              #calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              #dir_output=dir_output, geom=geom, select_ugid=ugid)
                    #elif indice == 'TGn':
                      #calc=[{'func': 'min', 'name': 'TGn'}]
                      #tmp = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              #variable=variable, dimension_map=dimension_map, 
                                              #calc=calc, calc_grouping= calc_group, prefix=prefix,
                                               #dir_output=dir_output, geom=geom, select_ugid = ugid)
                    #elif indice == 'TGx5day':
                      #calc = [{'func': 'moving_window', 'name': 'TGx5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}]
                      #tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              #variable=variable, dimension_map=dimension_map, 
                                              #calc=calc, prefix=str(uuid.uuid4()),
                                              #geom=geom, select_ugid = ugid)
                      #calc=[{'func': 'max', 'name': 'TGx5day'}]
                      #logger.info('moving window calculated : %s' % tmp2)
                      #tmp = ocgis_module.call(resource=tmp2,
                                              #variable=indice, dimension_map=dimension_map, 
                                              #calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              #dir_output=dir_output)
                      #remove(tmp2)
                    #elif indice == 'TGn5day':
                      #calc = [{'func': 'moving_window', 'name': 'TGn5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}]
                      #tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              #variable=variable, dimension_map=dimension_map, 
                                              #calc=calc, prefix=str(uuid.uuid4()),
                                              #geom=geom, select_ugid = ugid)
                      #calc=[{'func': 'min', 'name': 'TGn5day'}]
                      
                      #logger.info('moving window calculated : %s' % tmp2)
                      
                      #tmp = ocgis_module.call(resource=tmp2,
                                              #variable=indice, dimension_map=dimension_map, 
                                              #calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              #dir_output=dir_output)
                      #remove(tmp2)
                    #else: 
                      #logger.error('Indice %s is not a known inidce' % (indice))
                    #outputs.append(tmp)
                    #logger.info('indice file calcualted %s ' % (tmp))
                  #except Exception as e:
                    #logger.debug('could not calc indice %s for key %s, polygon %s and calc_grouping %s : %s' %  (indice, key, polygon, grouping, e ))
              #except Exception as e:
                #logger.debug('could not calc indice %s for key %s and calc_grouping %s : %s' % ( indice, key, polygon, e ))
          #except Exception as e:
            #logger.debug('could not calc indice %s for key %s: %s'%  (indice, key, e ))
      #except Exception as e:
        #logger.debug('could not calc key %s: %s' % (key, e))
    #return outputs
Ejemplo n.º 34
0
    def _handler(self, request, response):

        ocgis.env.DIR_OUTPUT = tempfile.mkdtemp(dir=os.getcwd())
        ocgis.env.OVERWRITE = True
        tic = dt.now()
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('Execution started at : {}'.format(tic), 1)

        ######################################
        # Read inputs
        ######################################
        try:
            candidate = archiveextract(
                resource=rename_complexinputs(request.inputs['candidate']))
            target = archiveextract(
                resource=rename_complexinputs(request.inputs['target']))
            location = request.inputs['location'][0].data
            indices = [el.data for el in request.inputs['indices']]
            dist = request.inputs['dist'][0].data
            dateStartCandidate = request.inputs['dateStartCandidate'][0].data
            dateEndCandidate = request.inputs['dateEndCandidate'][0].data
            dateStartTarget = request.inputs['dateStartTarget'][0].data
            dateEndTarget = request.inputs['dateEndTarget'][0].data

        except Exception as ex:
            msg = 'Failed to read input parameter {}'.format(ex)
            LOGGER.error(msg)
            raise Exception(msg)

        response.update_status('Input parameters ingested', 2)

        ######################################
        # Process inputs
        ######################################

        try:
            point = Point(*map(float, location.split(',')))
            dateStartCandidate = dt.strptime(dateStartCandidate, '%Y-%m-%d')
            dateEndCandidate = dt.strptime(dateEndCandidate, '%Y-%m-%d')
            dateStartTarget = dt.strptime(dateStartTarget, '%Y-%m-%d')
            dateEndTarget = dt.strptime(dateEndTarget, '%Y-%m-%d')

        except Exception as ex:
            msg = 'failed to process inputs {}'.format(ex)
            LOGGER.error(msg)
            raise Exception(msg)

        LOGGER.debug("init took {}".format(dt.now() - tic))
        response.update_status('Processed input parameters', 3)

        ######################################
        # Extract target time series
        ######################################
        savetarget = False
        try:
            # Using `call` creates a netCDF file in the tmp directory.
            #
            # Here we keep this stuff in memory
            if savetarget:
                prefix = 'target_ts'
                target_ts = call(resource=target,
                                 geom=point,
                                 variable=indices,
                                 time_range=[dateStartTarget, dateEndTarget],
                                 select_nearest=True,
                                 prefix=prefix)

                # target_ts = [get_values(prefix+'.nc', ind) for ind in indices]

            else:
                trd = RequestDataset(
                    target,
                    variable=indices,
                    time_range=[dateStartTarget, dateEndTarget])

                op = OcgOperations(trd,
                                   geom=point,
                                   select_nearest=True,
                                   search_radius_mult=1.75)
                out = op.execute()
                target_ts = out.get_element()

        except Exception as ex:
            msg = 'Target extraction failed {}'.format(ex)
            LOGGER.debug(msg)
            raise Exception(msg)

        response.update_status('Extracted target series', 5)

        ######################################
        # Compute dissimilarity metric
        ######################################

        response.update_status('Computing spatial analog', 6)
        try:
            output = call(
                resource=candidate,
                calc=[{
                    'func': 'dissimilarity',
                    'name': 'spatial_analog',
                    'kwds': {
                        'dist': dist,
                        'target': target_ts,
                        'candidate': indices
                    }
                }],
                time_range=[dateStartCandidate, dateEndCandidate],
            )

        except Exception as ex:
            msg = 'Spatial analog failed: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)

        add_metadata(output,
                     dist=dist,
                     indices=",".join(indices),
                     target_location=location,
                     candidate_time_range="{},{}".format(
                         dateStartCandidate, dateEndCandidate),
                     target_time_range="{},{}".format(dateStartTarget,
                                                      dateEndTarget))

        response.update_status('Computed spatial analog', 95)

        response.outputs['output_netcdf'].file = output

        response.update_status('Execution completed', 100)
        LOGGER.debug("Total execution took {}".format(dt.now() - tic))
        return response
Ejemplo n.º 35
0
def get_anomalies(nc_file, frac=0.2, reference=None):
    """
    Anomalisation of data subsets for weather classification by subtracting a smoothed annual cycle

    :param nc_file: input netCDF file
    :param frac: Number between 0-1 for strength of smoothing
               (0 = close to the original data, 1 = flat line)
               default = 0.2
    :param reference: Period to calculate annual cycle

    :returns str: path to output netCDF file
    """
    try:
        variable = utils.get_variable(nc_file)
        calc = [{'func': 'mean', 'name': variable}]
        calc_grouping = calc_grouping = ['day', 'month']
        nc_anual_cycle = call(nc_file,
                              calc=calc,
                              calc_grouping=calc_grouping,
                              time_range=reference)
        logger.info('annual cycle calculated')
    except Exception as e:
        msg = 'failed to calcualte annual cycle %s' % e
        logger.error(msg)
        raise Exception(msg)

    try:
        # spline for smoothing
        import statsmodels.api as sm
        from numpy import tile, empty, linspace
        from netCDF4 import Dataset
        from cdo import Cdo
        cdo = Cdo()
        # variable = utils.get_variable(nc_file)
        ds = Dataset(nc_anual_cycle, mode='a')
        vals = ds.variables[variable]
        vals_sm = empty(vals.shape)
        ts = vals.shape[0]
        x = linspace(1, ts * 3, num=ts * 3, endpoint=True)
        for lat in range(vals.shape[1]):
            for lon in range(vals.shape[2]):
                try:
                    y = tile(vals[:, lat, lon], 3)
                    # ys = smooth(y, window_size=91, order=2, deriv=0, rate=1)[ts:ts*2]
                    ys = sm.nonparametric.lowess(y, x, frac=frac)[ts:ts * 2, 1]
                    vals_sm[:, lat, lon] = ys
                except:
                    msg = 'failed for lat %s lon %s' % (lat, lon)
                    logger.exception(msg)
                    raise Exception(msg)
            logger.debug('done for %s - %s ' % (lat, lon))
        vals[:, :, :] = vals_sm[:, :, :]
        ds.close()
        logger.info('smothing of annual cycle done')
    except:
        msg = 'failed smothing of annual cycle'
        logger.exception(msg)
        raise Exception(msg)
    try:
        ip, nc_anomal = mkstemp(dir='.', suffix='.nc')
        nc_anomal = cdo.sub(input=[nc_file, nc_anual_cycle], output=nc_anomal)
        logger.info('cdo.sub; anomalisation done: %s ' % nc_anomal)
    except:
        msg = 'failed substraction of annual cycle'
        logger.exception(msg)
        raise Exception(msg)
    return nc_anomal
    def execute(self):
        logger.info('Start process')

        init_process_logger('log.txt')
        self.output_log.setValue('log.txt')

        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp

        ################################
        # reading in the input arguments
        ################################
        try:
            resource = self.getInputValues(identifier='resource')
            url_Rdat = self.getInputValues(identifier='Rdat')[0]
            url_dat = self.getInputValues(identifier='dat')[0]
            url_ref_file = self.getInputValues(
                identifier='netCDF')  # can be None
            season = self.getInputValues(identifier='season')[0]
            period = self.getInputValues(identifier='period')[0]
            anualcycle = self.getInputValues(identifier='anualcycle')[0]
        except Exception as e:
            logger.debug('failed to read in the arguments %s ' % e)

        try:
            start = dt.strptime(period.split('-')[0], '%Y%m%d')
            end = dt.strptime(period.split('-')[1], '%Y%m%d')
            # kappa = int(self.getInputValues(identifier='kappa')[0])

            logger.info('period %s' % str(period))
            logger.info('season %s' % str(season))
            logger.info('read in the arguments')
            logger.info('url_ref_file: %s' % url_ref_file)
            logger.info('url_Rdat: %s' % url_Rdat)
            logger.info('url_dat: %s' % url_dat)
        except Exception as e:
            logger.debug('failed to convert arguments %s ' % e)

        ############################
        # fetching trainging data
        ############################

        from flyingpigeon.utils import download, get_time
        from os.path import abspath

        try:
            dat = abspath(download(url_dat))
            Rdat = abspath(download(url_Rdat))
            logger.info('training data fetched')
        except Exception as e:
            logger.error('failed to fetch training data %s' % e)

        ##########################################################
        # get the required bbox and time region from resource data
        ##########################################################
        # from flyingpigeon.weatherregimes import get_level

        from flyingpigeon.ocgis_module import call
        from flyingpigeon.utils import get_variable
        time_range = [start, end]

        variable = get_variable(resource)

        if len(url_ref_file) > 0:
            ref_file = download(url_ref_file[0])
            model_subset = call(
                resource=resource,
                variable=variable,
                time_range=
                time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                regrid_destination=ref_file,
                regrid_options='bil')
            logger.info('Dataset subset with regridding done: %s ' %
                        model_subset)
        else:
            model_subset = call(
                resource=resource,
                variable=variable,
                time_range=
                time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
            )
            logger.info('Dataset time period extracted: %s ' % model_subset)

        #######################
        # computing anomalies
        #######################

        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[0]
        reference = [
            dt.strptime(cycst, '%Y%m%d'),
            dt.strptime(cycen, '%Y%m%d')
        ]
        model_anomal = wr.get_anomalies(model_subset, reference=reference)

        #####################
        # extracting season
        #####################

        model_season = wr.get_season(model_anomal, season=season)

        #######################
        # call the R scripts
        #######################

        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
            rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'weatherregimes_projection.R'

            yr1 = start.year
            yr2 = end.year
            time = get_time(model_season, format='%Y%m%d')

            # ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
            ip, file_pca = mkstemp(dir=curdir, suffix='.txt')
            ip, file_class = mkstemp(dir=curdir, suffix='.Rdat')
            ip, output_frec = mkstemp(dir=curdir, suffix='.txt')

            args = [
                'Rscript',
                join(Rsrc, Rfile),
                '%s/' % curdir,
                '%s/' % Rsrc,
                '%s' % model_season,
                '%s' % variable,
                '%s' % str(time).strip("[]").replace("'", "").replace(" ", ""),
                # '%s' % output_graphics,
                '%s' % dat,
                '%s' % Rdat,
                '%s' % file_pca,
                '%s' % file_class,
                '%s' % output_frec,
                '%s' % season,
                '%s' % start.year,
                '%s' % end.year,
                '%s' % 'MODEL'
            ]

            logger.info('Rcall builded')
        except Exception as e:
            msg = 'failed to build the R command %s' % e
            logger.error(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            # , shell=True
            logger.info('R outlog info:\n %s ' % output)
            logger.debug('R outlog errors:\n %s ' % error)
            if len(output) > 0:
                self.status.set('**** weatherregime in R suceeded', 90)
            else:
                logger.error('NO! output returned from R call')
        except Exception as e:
            msg = 'weatherregime in R %s ' % e
            logger.error(msg)
            raise Exception(msg)

        #################
        # set the outputs
        #################

        # self.Routput_graphic.setValue( output_graphics )
        self.output_pca.setValue(file_pca)
        self.output_classification.setValue(file_class)
        self.output_netcdf.setValue(model_season)
        self.output_frequency.setValue(output_frec)
Ejemplo n.º 37
0
def clipping(resource=[],
             variable=None,
             dimension_map=None,
             calc=None,
             output_format='nc',
             calc_grouping=None,
             time_range=None,
             time_region=None,
             historical_concatination=True,
             prefix=None,
             spatial_wrapping='wrap',
             polygons=None,
             mosaic=False,
             dir_output=None,
             memory_limit=None):
    """ returns list of clipped netCDF files

    :param resource: list of input netCDF files
    :param variable: variable (string) to be used in netCDF
    :param dimesion_map: specify a dimension map if input netCDF has unconventional dimension
    :param calc: ocgis calculation argument
    :param calc_grouping: ocgis calculation grouping
    :param historical_concatination: concat files of RCPs with appropriate historical runs into one timeseries
    :param prefix: prefix for output file name
    :param polygons: list of polygons to be used. If more than 1 in the list, an appropriate mosaic will be clipped
    :param mosaic: Whether the polygons are aggregated into a single geometry (True) or individual files are created for each geometry (False).
    :param output_format: output_format (default='nc')
    :param dir_output: specify an output location
    :param time_range: [start, end] of time subset
    :param time_region: year, months or days to be extracted in the timeseries

    :returns list: path to clipped files
    """

    if type(resource) != list:
        resource = list([resource])
    if type(polygons) != list:
        polygons = list([polygons])
    if prefix is not None:
        if type(prefix) != list:
            prefix = list([prefix])

    geoms = set()
    ncs = sort_by_filename(resource,
                           historical_concatination=historical_concatination
                           )  # historical_concatenation=True
    geom_files = []
    if mosaic is True:
        try:
            nameadd = '_'
            for polygon in polygons:
                geoms.add(get_geom(polygon))
                nameadd = nameadd + polygon.replace(' ', '')
            if len(geoms) > 1:
                LOGGER.error(
                    'polygons belong to different shapefiles! mosaic option is not possible %s',
                    geoms)
            else:
                geom = geoms.pop()
            ugids = get_ugid(polygons=polygons, geom=geom)
        except:
            LOGGER.exception('geom identification failed')
        for i, key in enumerate(ncs.keys()):
            try:
                # if variable is None:
                variable = get_variable(ncs[key])
                LOGGER.info('variable %s detected in resource' % (variable))
                if prefix is None:
                    name = key + nameadd
                else:
                    name = prefix[i]
                geom_file = call(resource=ncs[key],
                                 variable=variable,
                                 calc=calc,
                                 calc_grouping=calc_grouping,
                                 output_format=output_format,
                                 prefix=name,
                                 geom=geom,
                                 select_ugid=ugids,
                                 time_range=time_range,
                                 time_region=time_region,
                                 spatial_wrapping=spatial_wrapping,
                                 memory_limit=memory_limit,
                                 dir_output=dir_output,
                                 dimension_map=dimension_map)
                geom_files.append(geom_file)
                LOGGER.info('ocgis mosaik clipping done for %s ' % (key))
            except:
                msg = 'ocgis mosaik clipping failed for %s ' % (key)
                LOGGER.exception(msg)
    else:
        for i, polygon in enumerate(polygons):
            try:
                geom = get_geom(polygon)
                ugid = get_ugid(polygons=polygon, geom=geom)
                for key in ncs.keys():
                    try:
                        # if variable is None:
                        variable = get_variable(ncs[key])
                        LOGGER.info('variable %s detected in resource' %
                                    (variable))
                        if prefix is None:
                            name = key + '_' + polygon.replace(' ', '')
                        else:
                            name = prefix[i]
                        geom_file = call(
                            resource=ncs[key],
                            variable=variable,
                            calc=calc,
                            calc_grouping=calc_grouping,
                            output_format=output_format,
                            prefix=name,
                            geom=geom,
                            select_ugid=ugid,
                            dir_output=dir_output,
                            dimension_map=dimension_map,
                            spatial_wrapping=spatial_wrapping,
                            memory_limit=memory_limit,
                            time_range=time_range,
                            time_region=time_region,
                        )
                        geom_files.append(geom_file)
                        LOGGER.info('ocgis clipping done for %s ' % (key))
                    except:
                        msg = 'ocgis clipping failed for %s ' % (key)
                        LOGGER.exception(msg)
            except:
                LOGGER.exception('geom identification failed')
    return geom_files
Ejemplo n.º 38
0
def get_data(variable,
             resource = None,  
             polygons=None, 
             dir_output=None, 
             start = 1950,
             end = 2014):
  
  from os import rename, path, makedirs
  from flyingpigeon import utils
  from flyingpigeon import subset as sb
  from flyingpigeon import ocgis_module as om
  
  try: 
   # ocgis.env.OVERWRITE=True
   # ocgis.env.DIR_SHPCABINET = config.shapefiles_dir()
   # geoms = sb.get_geom()
   # sci = ShpCabinetIterator(geoms)
    
    if dir_output != None and path.exists(dir_output) == False: 
      makedirs(dir_output)
    
    if polygons != None:
      geom = sb.get_geom(polygon=polygons[0:1])
      ugid = sb.get_ugid(polygons=polygons, geom= geom)
    else:
      ugid = None
      geom = None
      
    if resource == None:
      resource = get_url(variable)

    dimension_map = {'X': {'variable': 'Actual_longitude', 'dimension': 'x', 'pos': 2},
              'Y': {'variable': 'Actual_latitude', 'dimension': 'y', 'pos': 1},
              'T': {'variable': 'time', 'dimension': 'time', 'pos': 0 }}

    time_region = {'year': range(start,end+1)} 

    if variable == 'tg':
        var = 'tas'
        unit = 'K'
    elif variable == 'tn':
        var = 'tasmin'
        unit = 'K'
    elif variable == 'tx':
        var = 'tasmax'
        unit = 'K'
    elif variable == 'rr':
        var = 'pr'
        unit = 'kg m-2 s-1'
    prefix = path.split(resource)[1].replace(variable,var).replace('.nc', '')    
    
    logger.info('processing variable %s' % (var))
  except Exception as e: 
    logger.exception('could not set processing environment')
    raise      

  if variable == 'rr':
    try: 
      
      calc = 'rr=rr/84600'
      
      EOBS_file = om.call(resource=resource, variable=variable, memory_limit=450,
                          dimension_map=dimension_map, prefix=prefix, calc=calc,
                          geom=geom, select_ugid=ugid, 
                          dir_output=dir_output, time_region = time_region)
    except Exception as e: 
      logger.error('ocgis failed for rr with url : %s' %(resource))
  else:
    try:
      unit = 'K'
      EOBS_file = om.call(resource=resource, variable=variable, memory_limit=450,
                          dimension_map=dimension_map, conform_units_to=unit , prefix=prefix, 
                          geom=geom, select_ugid=ugid,
                          dir_output=dir_output, time_region = time_region)  
    except Exception as e: 
      logger.exception('ocgis failed for tg, tx or tn')   

  try: 
    if polygons == None:
      domain =  att_dict['CORDEX_domain']
    else: 
      domain = att_dict['CORDEX_domain'].replace('EUR', polygons)
    
    EOBS_filename = '%s_%s_%s_%s_%s_%s_%s_%s_%s-%s.nc' % (var, 
                                        domain,
                                        att_dict['driving_model_id'],
                                        att_dict['experiment_id'],
                                        att_dict['driving_model_ensemble_member'],
                                        att_dict['model_id'],
                                        att_dict['rcm_version_id'],
                                        att_dict['frequency'],
                                        start,
                                        end)
  
    fpath, basename = path.split(EOBS_file)
    set_attributes(EOBS_file, variable)
    rename(EOBS_file, path.join(fpath, EOBS_filename))
      
  except Exception as e: 
    logger.exception('attributes not set for : %s' % EOBS_file)
  return path.join(fpath, EOBS_filename)
Ejemplo n.º 39
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('execution started at : {}'.format(dt.now()), 5)

        process_start_time = time.time()  # measure process execution time ...
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        response.update_status('execution started at : %s ' % dt.now(), 5)
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        try:
            response.update_status('read input parameter : %s ' % dt.now(), 5)

            resource = archiveextract(resource=rename_complexinputs(request.inputs['resource']))
            refSt = request.inputs['refSt'][0].data
            refEn = request.inputs['refEn'][0].data
            dateSt = request.inputs['dateSt'][0].data
            dateEn = request.inputs['dateEn'][0].data
            seasonwin = request.inputs['seasonwin'][0].data
            nanalog = request.inputs['nanalog'][0].data

            # bbox = [-80, 20, 50, 70]
            # TODO: Add checking for wrong cordinates and apply default if nesessary
            #level = 500

            level = request.inputs['level'][0].data
            if (level == 500): 
                dummylevel = 1000 # dummy workaround for cdo sellevel
            else:
                dummylevel = 500
            LOGGER.debug('LEVEL selected: %s hPa' % (level))

            bbox=[]
            bboxStr = request.inputs['BBox'][0].data
            bboxStr = bboxStr.split(',')
            #for i in bboxStr: bbox.append(int(i))
            bbox.append(float(bboxStr[0]))
            bbox.append(float(bboxStr[2]))
            bbox.append(float(bboxStr[1]))
            bbox.append(float(bboxStr[3]))
            LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
            LOGGER.debug('BBOX original: %s ' % (bboxStr))

            # if bbox_obj is not None:
            #     LOGGER.info("bbox_obj={0}".format(bbox_obj.coords))
            #     bbox = [bbox_obj.coords[0][0],
            #             bbox_obj.coords[0][1],
            #             bbox_obj.coords[1][0],
            #             bbox_obj.coords[1][1]]
            #     LOGGER.info("bbox={0}".format(bbox))
            # else:
            #     bbox = None
            # region = self.getInputValues(identifier='region')[0]
            # bbox = [float(b) for b in region.split(',')]
            # bbox_obj = self.BBox.getValue()

            normalize = request.inputs['normalize'][0].data
            distance = request.inputs['dist'][0].data
            outformat = request.inputs['outformat'][0].data
            timewin = request.inputs['timewin'][0].data

            # model_var = request.inputs['reanalyses'][0].data
            # model, var = model_var.split('_')

            # experiment = self.getInputValues(identifier='experiment')[0]
            # dataset, var = experiment.split('_')
            # LOGGER.info('environment set')
            LOGGER.info('input parameters set')
            response.update_status('Read in and convert the arguments', 5)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:
            # refSt = dt.strptime(refSt[0], '%Y-%m-%d')
            # refEn = dt.strptime(refEn[0], '%Y-%m-%d')
            # dateSt = dt.strptime(dateSt[0], '%Y-%m-%d')
            # dateEn = dt.strptime(dateEn[0], '%Y-%m-%d')

            #not nesessary if fix ocgis_module.py
            refSt = dt.combine(refSt,dt_time(12,0))
            refEn = dt.combine(refEn,dt_time(12,0))
            dateSt = dt.combine(dateSt,dt_time(12,0))
            dateEn = dt.combine(dateEn,dt_time(12,0))

            # refSt = refSt.replace(hour=12)
            # refEn = refEn.replace(hour=12)
            # dateSt = dateSt.replace(hour=12)
            # dateEn = dateEn.replace(hour=12)

            if normalize == 'None':
                seacyc = False
            else:
                seacyc = True

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                LOGGER.error('output format not valid')

            start = min(refSt, dateSt)
            end = max(refEn, dateEn)

#            if bbox_obj is not None:
#                LOGGER.info("bbox_obj={0}".format(bbox_obj.coords))
#                bbox = [bbox_obj.coords[0][0],
#                        bbox_obj.coords[0][1],
#                        bbox_obj.coords[1][0],
#                        bbox_obj.coords[1][1]]
#                LOGGER.info("bbox={0}".format(bbox))
#            else:
#                bbox = None

            LOGGER.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        LOGGER.debug("init took %s seconds.", time.time() - start_time)
        response.update_status('Read in and convert the arguments', 5)

        ########################
        # input data preperation
        ########################

        # TODO: Check if files containing more than one dataset

        response.update_status('Start preparing input data', 12)
        start_time = time.time()  # mesure data preperation ...
        try:
            # TODO: Add selection of the level. maybe bellow in call(..., level_range=[...,...])

            if type(resource) == list:
                #resource.sort()
                resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0])
            else:
                resource=[resource]

            #===============================================================
            # TODO: REMOVE resources which are out of interest from the list 
            # (years > and < than requested for calculation)

            tmp_resource = []

            for re in resource:
                s,e = get_timerange(re)
                tmpSt = dt.strptime(s,'%Y%m%d') 
                tmpEn = dt.strptime(e,'%Y%m%d') 
                if ((tmpSt <= end ) and (tmpEn >= start)):
                    tmp_resource.append(re)
                    LOGGER.debug('Selected file: %s ' % (re))
            resource = tmp_resource
            # ===============================================================

            #================================================================
            # Try to fix memory issue... (ocgis call for files like 20-30 gb... )
            # IF 4D - select pressure level before domain cut
            #
            # resource properties
            ds = Dataset(resource[0])
            variable = get_variable(resource[0])
            var = ds.variables[variable]
            dims = list(var.dimensions)
            dimlen = len(dims)

            try:
                model_id = ds.getncattr('model_id') 
            except AttributeError:
                model_id = 'Unknown model'

            LOGGER.debug('MODEL: %s ' % (model_id)) 

            lev_units = 'hPa'

            if (dimlen>3) :
                lev = ds.variables[dims[1]]
                # actually index [1] need to be detected... assuming zg(time, plev, lat, lon)
                lev_units = lev.units

                if (lev_units=='Pa'):
                    level = level*100
                    dummylevel=dummylevel*100
                    # TODO: OR check the NAME and units of vertical level and find 200 , 300, or 500 mbar in it
                    # Not just level = level * 100.

            # Get Levels

            from cdo import Cdo
            cdo = Cdo()

            lev_res=[]
            if(dimlen>3):
                for res_fn in resource:
                    tmp_f = 'lev_' + path.basename(res_fn)
                    comcdo = '%s,%s' % (level,dummylevel)
                    cdo.sellevel(comcdo, input=res_fn, output=tmp_f)
                    lev_res.append(tmp_f)
            else:
                lev_res = resource

            # Get domain
            regr_res=[]
            for res_fn in lev_res:
                tmp_f = 'dom_' + path.basename(res_fn)
                comcdo = '%s,%s,%s,%s' % (bbox[0],bbox[2],bbox[1],bbox[3])
                cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f)
                regr_res.append(tmp_f)

            #archive_tmp = call(resource=resource, time_range=[refSt, refEn], geom=bbox, spatial_wrapping='wrap')
            #simulation_tmp = call(resource=resource, time_range=[dateSt, dateEn], geom=bbox, spatial_wrapping='wrap')
            #============================  

            archive_tmp = call(resource=regr_res, time_range=[refSt, refEn], spatial_wrapping='wrap')
            simulation_tmp = call(resource=regr_res, time_range=[dateSt, dateEn], spatial_wrapping='wrap')

            #######################################################################################
            # TEMORAL dirty workaround to get the level and it's units - will be func in utils.py
            
            if (dimlen>3) :
                archive = get_level(archive_tmp, level = level)
                simulation = get_level(simulation_tmp,level = level)
                variable = 'z%s' % level
                # TODO: here should be modulated
            else:
                archive = archive_tmp
                simulation = simulation_tmp
                # 3D, move forward
            #######################################################################################

            if seacyc is True:
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize)
            else:
                seasoncyc_base = None
                seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            LOGGER.debug(msg)
            raise Exception(msg)
        ip, output = mkstemp(dir='.', suffix='.txt')
        output_file = path.abspath(output)
        files = [path.abspath(archive), path.abspath(simulation), output_file]

        LOGGER.debug("data preperation took %s seconds.", time.time() - start_time)

        ############################
        # generating the config file
        ############################

        # TODO: add MODEL name as argument

        response.update_status('writing config file', 15)
        start_time = time.time()  # measure write config ...

        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                base_id=model_id,
                sim_id=model_id, 
                timewin=timewin,
                varname=variable,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')],
                bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3]))
        except Exception as e:
            msg = 'failed to generate config file %s ' % e
            LOGGER.debug(msg)
            raise Exception(msg)

        LOGGER.debug("write_config took %s seconds.", time.time() - start_time)

        ##############
        # CASTf90 call
        ##############
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90
        response.update_status('Start CASTf90 call', 20)
        try:
            # response.update_status('execution of CASTf90', 50)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            # system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
            LOGGER.info('analogue.out info:\n %s ' % output)
            LOGGER.debug('analogue.out errors:\n %s ' % error)
            response.update_status('**** CASTf90 suceeded', 70)
        except Exception as e:
            msg = 'CASTf90 failed %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)
        
        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)
        response.update_status('preparing output', 70)

        response.outputs['config'].file = config_file #config_output_url  # config_file )
        response.outputs['analogs'].file = output_file
        response.outputs['output_netcdf'].file = simulation

        ########################
        # generate analog viewer
        ########################

        formated_analogs_file = analogs.reformat_analogs(output_file)
        # response.outputs['formated_analogs'].storage = FileStorage()
        response.outputs['formated_analogs'].file = formated_analogs_file
        LOGGER.info('analogs reformated')
        response.update_status('reformatted analog file', 80)

        viewer_html = analogs.render_viewer(
            # configfile=response.outputs['config'].get_url(),
            configfile=config_file,
            # datafile=response.outputs['formated_analogs'].get_url())
            datafile=formated_analogs_file)
        response.outputs['output'].file = viewer_html
        response.update_status('Successfully generated analogs viewer', 90)
        LOGGER.info('rendered pages: %s ', viewer_html)

        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response
Ejemplo n.º 40
0
  def execute(self):
    import time # performance test
    process_start_time = time.time() # measure process execution time ...
     
    from os import path
    from tempfile import mkstemp
    from flyingpigeon import analogs
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon.datafetch import reanalyses
    from flyingpigeon.utils import get_variable, rename_variable
    
    self.status.set('execution started at : %s '  % dt.now(),5)

    start_time = time.time() # measure init ...
    
    resource = self.getInputValues(identifier='resource')
    bbox_obj = self.BBox.getValue()
    refSt = self.getInputValues(identifier='refSt')
    refEn = self.getInputValues(identifier='refEn')
    dateSt = self.getInputValues(identifier='dateSt')
    dateEn = self.getInputValues(identifier='dateEn')
    normalize = self.getInputValues(identifier='normalize')[0]
    distance = self.getInputValues(identifier='dist')[0]
    outformat = self.getInputValues(identifier='outformat')[0]
    timewin = int(self.getInputValues(identifier='timewin')[0])
    experiment = self.getInputValues(identifier='experiment')[0]      
    dataset , var = experiment.split('_')



    refSt = dt.strptime(refSt[0],'%Y-%m-%d')
    refEn = dt.strptime(refEn[0],'%Y-%m-%d')
    dateSt = dt.strptime(dateSt[0],'%Y-%m-%d')
    dateEn = dt.strptime(dateEn[0],'%Y-%m-%d')
    
    
    if normalize == 'None': 
      seacyc = False
    else: 
      seacyc = True
      
    
    if outformat == 'ascii': 
      outformat = '.txt'
    elif outformat == 'netCDF':
      outformat = '.nc'
    else:
      logger.error('output format not valid')
    
    if bbox_obj is not None:
      logger.info("bbox_obj={0}".format(bbox_obj.coords))
      bbox = [bbox_obj.coords[0][0], bbox_obj.coords[0][1],bbox_obj.coords[1][0],bbox_obj.coords[1][1]]
      logger.info("bbox={0}".format(bbox))
    else:
      bbox=None

     #start = min( refSt, dateSt )
    #end = max( refEn, dateEn )
    # region = self.getInputValues(identifier='region')[0]
    # bbox = [float(b) for b in region.split(',')]

    try:            
      if dataset == 'NCEP': 
        if 'z' in var:
          variable='hgt'
          level=var.strip('z')
          #conform_units_to=None
        else:
          variable='slp'
          level=None
          #conform_units_to='hPa'
      elif '20CRV2' in var: 
        if 'z' in level:
          variable='hgt'
          level=var.strip('z')
          #conform_units_to=None
        else:
          variable='prmsl'
          level=None
          #conform_units_to='hPa'
      else:
        logger.error('Reanalyses dataset not known')          
      logger.info('environment set')
    except Exception as e: 
      msg = 'failed to set environment %s ' % e
      logger.error(msg)  
      raise Exception(msg)

    logger.debug("init took %s seconds.", time.time() - start_time)
    self.status.set('Read in the arguments', 5)
    #################
    # get input data
    #################

    start_time = time.time()  # measure get_input_data ...
    self.status.set('fetching input data', 7)
    try:
      input = reanalyses(start = dateSt.year, end = dateEn.year, variable=var, dataset=dataset)
      nc_subset = call(resource=input, variable=var, geom=bbox)
    except Exception as e :
      msg = 'failed to fetch or subset input files %s' % e
      logger.error(msg)
      raise Exception(msg)
    logger.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time)
    self.status.set('**** Input data fetched', 10)
    
    ########################
    # input data preperation 
    ########################
    self.status.set('Start preparing input data', 12)
    start_time = time.time()  # mesure data preperation ...
    try:
      self.status.set('Preparing simulation data', 15)
      simulation = call(resource=nc_subset, time_range=[dateSt , dateEn])
    except:
      msg = 'failed to prepare simulation period'
      logger.debug(msg)
      
    try:
      self.status.set('Preparing target data', 17)
      var_target = get_variable(resource)
      #var_simulation = get_variable(simulation)
      archive = call(resource=resource, variable=var_target, 
          time_range=[refSt , refEn],  geom=bbox, t_calendar='standard',# conform_units_to=conform_units_to,  spatial_wrapping='wrap',
          regrid_destination=simulation, regrid_options='bil')
    except Exception as e:
      msg = 'failed subset archive dataset %s ' % e
      logger.debug(msg)
      raise Exception(msg)
    
    try:     
      if var != var_target:
        rename_variable(archive, oldname=var_target, newname=var)
        logger.info('varname %s in netCDF renamed to %s' %(var_target, var))
    except Exception as e:
      msg = 'failed to rename variable in target files %s ' % e
      logger.debug(msg)
      raise Exception(msg)
    
    try:          
      if seacyc == True:
        seasoncyc_base , seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize)
      else: 
        seasoncyc_base , seasoncyc_sim = None
    except Exception as e:
      msg = 'failed to prepare seasonal cycle reference files %s ' % e
      logger.debug(msg)
      raise Exception(msg)
      
    ip, output = mkstemp(dir='.',suffix='.txt')
    output_file =  path.abspath(output)
    files=[path.abspath(archive), path.abspath(simulation), output_file]

    logger.debug("data preperation took %s seconds.", time.time() - start_time)

    ############################
    # generating the config file
    ############################
    
    self.status.set('writing config file', 15)
    start_time = time.time() # measure write config ...
    
    try:  
      config_file = analogs.get_configfile(
        files=files,
        seasoncyc_base = seasoncyc_base,
        seasoncyc_sim = seasoncyc_sim, 
        timewin=timewin, 
        varname=var, 
        seacyc=seacyc, 
        cycsmooth=91, 
        nanalog=nanalog, 
        seasonwin=seasonwin, 
        distfun=distance,
        outformat=outformat,
        calccor=True,
        silent=False, 
        period=[dt.strftime(refSt,'%Y-%m-%d'),dt.strftime(refEn,'%Y-%m-%d')], 
        bbox="%s,%s,%s,%s" % (bbox[0],bbox[2],bbox[1],bbox[3]))
    except Exception as e:
      msg = 'failed to generate config file %s ' % e
      logger.debug(msg)
      raise Exception(msg)

    logger.debug("write_config took %s seconds.", time.time() - start_time)
      
    #######################
    # CASTf90 call 
    #######################
    import subprocess
    import shlex

    start_time = time.time() # measure call castf90
    
    self.status.set('Start CASTf90 call', 20)
    try:
      #self.status.set('execution of CASTf90', 50)
      cmd = 'analogue.out %s' % path.relpath(config_file)
      #system(cmd)
      args = shlex.split(cmd)
      output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate()
      logger.info('analogue.out info:\n %s ' % output)
      logger.debug('analogue.out errors:\n %s ' % error)
      self.status.set('**** CASTf90 suceeded', 90)
    except Exception as e: 
      msg = 'CASTf90 failed %s ' % e
      logger.error(msg)  
      raise Exception(msg)

    logger.debug("castf90 took %s seconds.", time.time() - start_time)
    
    self.status.set('preparting output', 99)
    self.config.setValue( config_file )
    self.analogs.setValue( output_file )
    self.simulation_netcdf.setValue( simulation )
    self.target_netcdf.setValue( archive )
    
    self.status.set('execution ended', 100)

    logger.debug("total execution took %s seconds.", time.time() - process_start_time)
Ejemplo n.º 41
0
    def execute(self):
        init_process_logger('log.txt')
        self.output_log.setValue('log.txt')

        import time  # performance test
        process_start_time = time.time()  # measure process execution time ...

        from os import path
        from tempfile import mkstemp
        from datetime import datetime as dt

        from flyingpigeon import analogs
        from flyingpigeon.ocgis_module import call
        from flyingpigeon.datafetch import reanalyses
        from flyingpigeon.utils import get_variable

        self.status.set('execution started at : %s ' % dt.now(), 5)
        start_time = time.time()  # measure init ...

        #######################
        # read input parameters
        #######################
        try:
            self.status.set('read input parameter : %s ' % dt.now(), 5)
            resource = self.getInputValues(identifier='resource')
            refSt = self.getInputValues(identifier='refSt')
            refEn = self.getInputValues(identifier='refEn')
            dateSt = self.getInputValues(identifier='dateSt')
            dateEn = self.getInputValues(identifier='dateEn')
            normalize = self.getInputValues(identifier='normalize')[0]
            distance = self.getInputValues(identifier='dist')[0]
            outformat = self.getInputValues(identifier='outformat')[0]
            timewin = int(self.getInputValues(identifier='timewin')[0])
            bbox_obj = self.BBox.getValue()
            seasonwin = int(self.getInputValues(identifier='seasonwin')[0])
            nanalog = int(self.getInputValues(identifier='nanalog')[0])

            # region = self.getInputValues(identifier='region')[0]
            # bbox = [float(b) for b in region.split(',')]
            # experiment = self.getInputValues(identifier='experiment')[0]
            # dataset , var = experiment.split('_')

            logger.info('input parameters set')
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            logger.error(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:
            refSt = dt.strptime(refSt[0], '%Y-%m-%d')
            refEn = dt.strptime(refEn[0], '%Y-%m-%d')
            dateSt = dt.strptime(dateSt[0], '%Y-%m-%d')
            dateEn = dt.strptime(dateEn[0], '%Y-%m-%d')

            if normalize == 'None':
                seacyc = False
            else:
                seacyc = True

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                logger.error('output format not valid')

            start = min(refSt, dateSt)
            end = max(refEn, dateEn)

            if bbox_obj is not None:
                logger.info("bbox_obj={0}".format(bbox_obj.coords))
                bbox = [bbox_obj.coords[0][0],
                        bbox_obj.coords[0][1],
                        bbox_obj.coords[1][0],
                        bbox_obj.coords[1][1]]
                logger.info("bbox={0}".format(bbox))
            else:
                bbox = None

            logger.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            logger.error(msg)
            raise Exception(msg)

        logger.debug("init took %s seconds.", time.time() - start_time)
        self.status.set('Read in and convert the arguments', 5)

        ########################
        # input data preperation
        ########################

        # TODO: Check if files containing more than one dataset

        self.status.set('Start preparing input data', 12)
        start_time = time.time()  # mesure data preperation ...
        try:
            variable = get_variable(resource)

            archive = call(resource=resource, time_range=[refSt, refEn], geom=bbox, spatial_wrapping='wrap')
            simulation = call(resource=resource, time_range=[dateSt, dateEn], geom=bbox, spatial_wrapping='wrap')
            if seacyc is True:
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize)
            else:
                seasoncyc_base = None
                seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            logger.debug(msg)
            raise Exception(msg)
        ip, output = mkstemp(dir='.', suffix='.txt')
        output_file = path.abspath(output)
        files = [path.abspath(archive), path.abspath(simulation), output_file]

        logger.debug("data preperation took %s seconds.", time.time() - start_time)

        ############################
        # generating the config file
        ############################
        self.status.set('writing config file', 15)
        start_time = time.time()  # measure write config ...

        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                timewin=timewin,
                varname=variable,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')],
                bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3]))
        except Exception as e:
            msg = 'failed to generate config file %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        logger.debug("write_config took %s seconds.", time.time() - start_time)

        ##############
        # CASTf90 call
        ##############
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90
        self.status.set('Start CASTf90 call', 20)
        try:
            # self.status.set('execution of CASTf90', 50)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            # system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
            logger.info('analogue.out info:\n %s ' % output)
            logger.debug('analogue.out errors:\n %s ' % error)
            self.status.set('**** CASTf90 suceeded', 90)
        except Exception as e:
            msg = 'CASTf90 failed %s ' % e
            logger.error(msg)
            raise Exception(msg)

        logger.debug("castf90 took %s seconds.", time.time() - start_time)

        self.status.set('preparting output', 99)
        self.config.setValue(config_file)
        self.analogs.setValue(output_file)
        self.output_netcdf.setValue(simulation)

        self.status.set('execution ended', 100)

        logger.debug("total execution took %s seconds.", time.time() - process_start_time)
Ejemplo n.º 42
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp

        response.update_status('execution started at : {}'.format(dt.now()), 5)

        ################################
        # reading in the input arguments
        ################################
        LOGGER.info('read in the arguments')
        # resources = self.getInputValues(identifier='resources')
        season = request.inputs['season'][0].data
        LOGGER.info('season %s', season)

        # bbox = [-80, 20, 50, 70]
        # TODO: Add checking for wrong cordinates and apply default if nesessary
        bbox = []
        bboxStr = request.inputs['BBox'][0].data
        bboxStr = bboxStr.split(',')
        bbox.append(float(bboxStr[0]))
        bbox.append(float(bboxStr[2]))
        bbox.append(float(bboxStr[1]))
        bbox.append(float(bboxStr[3]))
        LOGGER.debug('BBOX for ocgis: {}'.format(bbox))
        LOGGER.debug('BBOX original: {}'.format(bboxStr))

        model_var = request.inputs['reanalyses'][0].data
        model, variable = model_var.split('_')

        period = request.inputs['period'][0].data
        LOGGER.info('period: {}'.format(period))
        anualcycle = request.inputs['anualcycle'][0].data
        kappa = request.inputs['kappa'][0].data
        LOGGER.info('kappa: {}', kappa)

        method = request.inputs['method'][0].data
        LOGGER.info('Calc annual cycle with {}'.format(method))

        sseas = request.inputs['sseas'][0].data
        LOGGER.info('Annual cycle calc with {}'.format(sseas))

        start = dt.strptime(period.split('-')[0], '%Y%m%d')
        end = dt.strptime(period.split('-')[1], '%Y%m%d')
        LOGGER.debug('start: {0}, end: {1}'.format(start, end))

        ###########################
        # set the environment
        ###########################

        response.update_status('fetching data from archive', 10)

        try:
            if model == 'NCEP':
                getlevel = False
                if 'z' in variable:
                    level = variable.strip('z')
                    conform_units_to = None
                else:
                    level = None
                    conform_units_to = 'hPa'
            elif '20CRV2' in model:
                getlevel = False
                if 'z' in variable:
                    level = variable.strip('z')
                    conform_units_to = None
                else:
                    level = None
                    conform_units_to = 'hPa'
            else:
                LOGGER.exception('Reanalyses dataset not known')
            LOGGER.info('environment set for model: {}'.format(model))
        except Exception as ex:
            msg = 'failed to set environment: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)

        ##########################################
        # fetch Data from original data archive
        ##########################################

        from flyingpigeon.datafetch import reanalyses as rl
        from flyingpigeon.utils import get_variable
        # from os.path import basename, splitext
        from os import system
        from netCDF4 import Dataset
        from numpy import squeeze

        try:
            model_nc = rl(start=start.year,
                          end=end.year,
                          dataset=model,
                          variable=variable,
                          getlevel=getlevel)
            LOGGER.info('reanalyses data fetched')
        except Exception as ex:
            msg = 'failed to get reanalyses data: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('fetching data done', 15)
        ############################################################
        # get the required bbox and time region from resource data
        ############################################################

        response.update_status('subsetting region of interest', 17)
        # from flyingpigeon.weatherregimes import get_level
        # from flyingpigeon.ocgis_module import call

        time_range = [start, end]

        ############################################################
        # Block of level and domain selection for geop huge dataset
        ############################################################

        LevMulti = False

        # ===========================================================================================
        if 'z' in variable:
            tmp_total = []
            origvar = get_variable(model_nc)

            if LevMulti == False:
                for z in model_nc:
                    b0 = call(resource=z,
                              variable=origvar,
                              level_range=[int(level), int(level)],
                              geom=bbox,
                              spatial_wrapping='wrap',
                              prefix='levdom_' + basename(z)[0:-3])
                    tmp_total.append(b0)
            else:
                # multiproc - no inprovements yet, need to check in hi perf machine...
                # -----------------------
                try:
                    import ctypes
                    import os
                    # TODO: This lib is for linux
                    mkl_rt = ctypes.CDLL('libmkl_rt.so')
                    nth = mkl_rt.mkl_get_max_threads()
                    LOGGER.debug('Current number of threads: {}'.format(nth))
                    mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
                    nth = mkl_rt.mkl_get_max_threads()
                    LOGGER.debug('NEW number of threads: {}'.format(nth))
                    # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
                    os.environ['MKL_NUM_THREADS'] = str(nth)
                    os.environ['OMP_NUM_THREADS'] = str(nth)
                except Exception as ex:
                    msg = 'Failed to set THREADS: {}'.format(ex)
                    LOGGER.debug(msg)
                # -----------------------

                from multiprocessing import Pool
                pool = Pool()
                # from multiprocessing.dummy import Pool as ThreadPool
                # pool = ThreadPool()
                tup_var = [origvar] * len(model_nc)
                tup_lev = [level] * len(model_nc)
                tup_bbox = [bbox] * len(model_nc)
                tup_args = zip(model_nc, tup_var, tup_lev, tup_bbox)

                tmp_total = pool.map(ocgis_call_wrap, tup_args)
                pool.close()
                pool.join()

            LOGGER.debug('Temporal subset files: {}'.format(tmp_total))

            tmp_total = sorted(tmp_total,
                               key=lambda i: splitext(basename(i))[0])
            inter_subset_tmp = call(resource=tmp_total,
                                    variable=origvar,
                                    time_range=time_range)

            # FIXME: System calls to rm are dangerous! Use os.rmdir instead!
            # Clean
            for i in tmp_total:
                tbr = 'rm -f {}'.format(i)
                system(tbr)

            # Create new variable
            ds = Dataset(inter_subset_tmp, mode='a')
            z_var = ds.variables.pop(origvar)
            dims = z_var.dimensions
            new_var = ds.createVariable('z{}'.format(level),
                                        z_var.dtype,
                                        dimensions=(dims[0], dims[2], dims[3]))
            new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
            # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
            ds.close()
            model_subset = call(inter_subset_tmp, variable='z{}'.format(level))
        else:
            model_subset = call(
                resource=model_nc,
                variable=variable,
                geom=bbox,
                spatial_wrapping='wrap',
                time_range=time_range,
                # conform_units_to=conform_units_to
            )
        # =============================================================================================
        LOGGER.info('Dataset subset done: {}'.format(model_subset))

        response.update_status('dataset subsetted', 18)
        ##############################################
        # computing anomalies
        ##############################################
        response.update_status('computing anomalies ', 19)

        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[1]
        reference = [
            dt.strptime(cycst, '%Y%m%d'),
            dt.strptime(cycen, '%Y%m%d')
        ]
        LOGGER.info('reference time: {}'.format(reference))

        model_anomal = wr.get_anomalies(model_subset,
                                        reference=reference,
                                        method=method,
                                        sseas=sseas)  # , variable=variable)

        #####################
        # extracting season
        #####################
        response.update_status('normalizing data', 21)
        model_season = wr.get_season(model_anomal, season=season)

        response.update_status('anomalies computed and  normalized', 24)
        #######################
        # call the R scripts
        #######################
        response.update_status('Start weather regime clustering ', 25)
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, join

        try:
            rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'weatherregimes_model.R'

            infile = model_season  # model_subset #model_ponderate
            modelname = model
            yr1 = start.year
            yr2 = end.year
            ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf')
            ip, file_pca = mkstemp(dir=curdir, suffix='.txt')
            ip, file_class = mkstemp(dir=curdir, suffix='.Rdat')

            # TODO: Rewrite this using os.path.join or pathlib libraries
            args = [
                'Rscript',
                join(Rsrc, Rfile),
                '%s/' % curdir,
                '%s/' % Rsrc,
                '%s' % infile,
                '%s' % variable,
                '%s' % output_graphics,
                '%s' % file_pca,
                '%s' % file_class,
                '%s' % season,
                '%s' % start.year,
                '%s' % end.year,
                '%s' % model_var,
                '%s' % kappa
            ]
            LOGGER.info('Rcall builded')
            LOGGER.debug('ARGS: %s' % (args))
        except Exception as ex:
            msg = 'failed to build the R command: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            LOGGER.info('R outlog info:\n {}'.format(output))
            LOGGER.exception('R outlog errors:\n {}'.format(error))
            if len(output) > 0:
                response.update_status('**** weatherregime in R suceeded', 90)
            else:
                LOGGER.exception('No output returned from R call')
        except Exception as ex:
            msg = 'failed to run the R weatherregime: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('Weather regime clustering done ', 93)
        ############################################
        # set the outputs
        ############################################
        response.update_status('Set the process outputs ', 95)

        response.outputs['Routput_graphic'].file = output_graphics
        response.outputs['output_pca'].file = file_pca
        response.outputs['output_classification'].file = file_class
        response.outputs['output_netcdf'].file = model_subset
        response.update_status('done', 100)
        return response
Ejemplo n.º 43
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('execution started at : {}'.format(dt.now()), 5)

        process_start_time = time.time()  # measure process execution time ...
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        try:
            response.update_status('read input parameter : %s ' % dt.now(), 5)

            refSt = request.inputs['refSt'][0].data
            refEn = request.inputs['refEn'][0].data
            dateSt = request.inputs['dateSt'][0].data
            dateEn = request.inputs['dateEn'][0].data
            seasonwin = request.inputs['seasonwin'][0].data
            nanalog = request.inputs['nanalog'][0].data
            bbox = [-80, 20, 50, 70]
            # if bbox_obj is not None:
            #     LOGGER.info("bbox_obj={0}".format(bbox_obj.coords))
            #     bbox = [bbox_obj.coords[0][0],
            #             bbox_obj.coords[0][1],
            #             bbox_obj.coords[1][0],
            #             bbox_obj.coords[1][1]]
            #     LOGGER.info("bbox={0}".format(bbox))
            # else:
            #     bbox = None
            # region = self.getInputValues(identifier='region')[0]
            # bbox = [float(b) for b in region.split(',')]
            # bbox_obj = self.BBox.getValue()

            normalize = request.inputs['normalize'][0].data
            distance = request.inputs['dist'][0].data
            outformat = request.inputs['outformat'][0].data
            timewin = request.inputs['timewin'][0].data

            model_var = request.inputs['reanalyses'][0].data
            model, var = model_var.split('_')

            # experiment = self.getInputValues(identifier='experiment')[0]
            # dataset, var = experiment.split('_')
            # LOGGER.info('environment set')
            LOGGER.info('input parameters set')
            response.update_status('Read in and convert the arguments', 5)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:
            response.update_status('Preparing enviroment converting arguments', 7)
            LOGGER.debug('date: %s %s %s %s ' % (type(refSt), refEn, dateSt, dateSt))

            start = min(refSt, dateSt)
            end = max(refEn, dateEn)

            #
            # refSt = dt.strftime(refSt, '%Y-%m-%d')
            # refEn = dt.strftime(refEn, '%Y-%m-%d')
            # dateSt = dt.strftime(dateSt, '%Y-%m-%d')
            # dateEn = dt.strftime(dateEn, '%Y-%m-%d')

            if normalize == 'None':
                seacyc = False
            else:
                seacyc = True

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                LOGGER.error('output format not valid')

        except Exception as e:
            msg = 'failed to set environment %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        ###########################
        # set the environment
        ###########################

        response.update_status('fetching data from archive', 10)

        try:
            if model == 'NCEP':
                if 'z' in var:
                    level = var.strip('z')
                    conform_units_to = None
                else:
                    level = None
                    conform_units_to = 'hPa'
            elif '20CRV2' in model:
                if 'z' in var:
                    level = var.strip('z')
                    conform_units_to = None
                else:
                    level = None
                    conform_units_to = 'hPa'
            else:
                LOGGER.error('Reanalyses dataset not known')
            LOGGER.info('environment set for model: %s' % model)
        except:
            msg = 'failed to set environment'
            LOGGER.exception(msg)
            raise Exception(msg)

        ##########################################
        # fetch Data from original data archive
        ##########################################

        try:
            model_nc = rl(start=start.year,
                          end=end.year,
                          dataset=model,
                          variable=var)
            LOGGER.info('reanalyses data fetched')
        except:
            msg = 'failed to get reanalyses data'
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('subsetting region of interest', 17)
        # from flyingpigeon.weatherregimes import get_level
        LOGGER.debug("start and end time: %s - %s" % (start, end))
        time_range = [start, end]

        model_subset = call(resource=model_nc, variable=var,
                            geom=bbox, spatial_wrapping='wrap', time_range=time_range,
                            # conform_units_to=conform_units_to
                            )
        LOGGER.info('Dataset subset done: %s ', model_subset)

        response.update_status('dataset subsetted', 19)

        ############################################################
        #  get the required bbox and time region from resource data
        ############################################################
        #
        #
        # try:
        #     if dataset == 'NCEP':
        #         if 'z' in var:
        #             variable = 'hgt'
        #             level = var.strip('z')
        #             # conform_units_to=None
        #         else:
        #             variable = 'slp'
        #             level = None
        #             # conform_units_to='hPa'
        #     elif '20CRV2' in var:
        #         if 'z' in level:
        #             variable = 'hgt'
        #             level = var.strip('z')
        #             # conform_units_to=None
        #         else:
        #             variable = 'prmsl'
        #             level = None
        #             # conform_units_to='hPa'
        #     else:
        #         LOGGER.error('Reanalyses dataset not known')
        #     LOGGER.info('environment set')
        # except Exception as e:
        #     msg = 'failed to set environment %s ' % e
        #     LOGGER.error(msg)
        #     raise Exception(msg)
        #
        # LOGGER.debug("init took %s seconds.", time.time() - start_time)
        # response.update_status('Read in and convert the arguments done', 8)
        #
        # #################
        # # get input data
        # #################
        # start_time = time.time()  # measure get_input_data ...
        # response.update_status('fetching input data', 7)
        # try:
        #     input = reanalyses(start=start.year, end=end.year,
        #                        variable=var, dataset=dataset)
        #     LOGGER.info('input files %s' % input)
        #     nc_subset = call(resource=input, variable=var,
        #                      geom=bbox, spatial_wrapping='wrap')
        # except Exception as e:
        #     msg = 'failed to fetch or subset input files %s' % e
        #     LOGGER.error(msg)
        #     raise Exception(msg)

        LOGGER.debug("get_input_subset_dataset took %s seconds.",
                     time.time() - start_time)
        response.update_status('**** Input data fetched', 10)

        ########################
        # input data preperation
        ########################
        response.update_status('Start preparing input data', 12)
        start_time = time.time()  # measure data preperation ...

        try:
            # Construct descriptive filenames for the three files
            # listed in config file
            refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d')
            simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d')
            archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                                % (bbox[0], bbox[2], bbox[1], bbox[3])
            simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                            % (bbox[0], bbox[2], bbox[1], bbox[3])
            archive = call(resource=model_subset,
                           time_range=[refSt, refEn],
                           prefix=archiveNameString)
            simulation = call(resource=model_subset, time_range=[dateSt, dateEn],
                              prefix=simNameString)
            LOGGER.info('archive and simulation files generated: %s, %s'
                        % (archive, simulation))
        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            LOGGER.debug(msg)
            raise Exception(msg)

        try:
            if seacyc is True:
                LOGGER.info('normalization function with method: %s '
                            % normalize)
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(
                    archive,
                    simulation,
                    method=normalize)
            else:
                seasoncyc_base = seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to generate normalization files %s ' % e
            LOGGER.debug(msg)
            raise Exception(msg)

        ip, output_file = mkstemp(dir='.', suffix='.txt')
        files = [path.abspath(archive), path.abspath(simulation), output_file]
        LOGGER.debug("Data preperation took %s seconds.",
                     time.time() - start_time)

        ############################
        # generate the config file
        ############################
        response.update_status('writing config file', 15)
        start_time = time.time()  # measure write config ...
        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                timewin=timewin,
                varname=var,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                period=[dt.strftime(refSt, '%Y-%m-%d'),
                        dt.strftime(refEn, '%Y-%m-%d')],
                bbox="%s,%s,%s,%s" % (bbox[0],
                                      bbox[2],
                                      bbox[1],
                                      bbox[3]))
        except Exception as e:
            msg = 'failed to generate config file %s ' % e
            LOGGER.debug(msg)
            raise Exception(msg)
        LOGGER.debug("write_config took %s seconds.", time.time() - start_time)
        #######################
        # CASTf90 call
        #######################
        start_time = time.time()  # measure call castf90

        response.update_status('Start CASTf90 call', 20)
        try:
            # response.update_status('execution of CASTf90', 50)
            cmd = ['analogue.out', path.relpath(config_file)]
            LOGGER.debug("castf90 command: %s", cmd)
            output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
            LOGGER.info('analogue output:\n %s', output)
            response.update_status('**** CASTf90 suceeded', 90)
        except CalledProcessError as e:
            msg = 'CASTf90 failed:\n{0}'.format(e.output)
            LOGGER.error(msg)
            raise Exception(msg)
        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)

        ########################
        # generate analog viewer
        ########################
        response.update_status('preparting output', 50)
        response.outputs['config'].file = config_file
        response.outputs['analogs'].file = output_file
        response.outputs['output_netcdf'].file = simulation

        try:
            formated_analogs_file = analogs.reformat_analogs(output_file)
            response.outputs['formated_analogs'].file = formated_analogs_file
            LOGGER.info('analogs reformated')
            response.update_status('Successfully reformatted analog file', 60)
        except Exception as e:
            msg = 'Failed to reformat analogs file.' % e
            LOGGER.error(msg)
            raise Exception(msg)

        try:
            output_av = analogs.get_viewer(
                formated_analogs_file,
                path.basename(config_file))
            response.outputs['output_html'].file = output_av.name
            response.update_status('Successfully generated analogs viewer', 90)
            LOGGER.info('output_av: %s ', output_av)
        except Exception as e:
            msg = 'Failed to generate viewer: %s' % e
            LOGGER.error(msg)
            raise Exception(msg)

        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response
Ejemplo n.º 44
0
def calc_indice_percentile(resources=[],
                           variable=None,
                           prefix=None,
                           indices='TG90p',
                           refperiod=None,
                           groupings='yr',
                           polygons=None,
                           percentile=90,
                           mosaic=False,
                           dir_output=None,
                           dimension_map=None):
    """
    Calculates given indices for suitable files in the appropriate time grouping and polygon.

    :param resource: list of filenames in data reference syntax (DRS) convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: list of indices (default ='TG90p')
    :param prefix: filename prefix
    :param refperiod: reference period tuple = (start,end)
    :param grouping: indices time aggregation (default='yr')
    :param dir_output: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into out_dir.
    """
    from os.path import join, dirname, exists
    from os import remove
    import uuid
    from numpy import ma
    from datetime import datetime as dt

    from flyingpigeon.ocgis_module import call
    from flyingpigeon.subset import clipping
    from flyingpigeon.utils import get_values, get_time

    if type(resources) != list:
        resources = list([resources])
    if type(indices) != list:
        indices = list([indices])

    if type(groupings) != list:
        groupings = list([groupings])

    if type(refperiod) == list:
        refperiod = refperiod[0]

    if refperiod is None:
        start = dt.strptime(refperiod.split('-')[0], '%Y%m%d')
        end = dt.strptime(refperiod.split('-')[1], '%Y%m%d')
        time_range = [start, end]
    else:
        time_range = None

    if dir_output is None:
        if not exists(dir_output):
            makedirs(dir_output)

    ################################################
    # Compute a custom percentile basis using ICCLIM
    ################################################
    from ocgis.contrib import library_icclim as lic
    nc_indices = []
    nc_dic = sort_by_filename(resources)

    for grouping in groupings:
        calc_group = calc_grouping(grouping)
        for key in nc_dic.keys():
            resource = nc_dic[key]
            if variable is None:
                variable = get_variable(resource)
            if polygons is None:
                nc_reference = call(resource=resource,
                                    prefix=str(uuid.uuid4()),
                                    time_range=time_range,
                                    output_format='nc',
                                    dir_output=dir_output)
        else:
            nc_reference = clipping(resource=resource,
                                    prefix=str(uuid.uuid4()),
                                    time_range=time_range,
                                    output_format='nc',
                                    polygons=polygons,
                                    dir_output=dir_output,
                                    mosaic=mosaic)

        arr = get_values(resource=nc_reference)
        dt_arr = get_time(resource=nc_reference)
        arr = ma.masked_array(arr)
        dt_arr = ma.masked_array(dt_arr)
        percentile = percentile
        window_width = 5

        for indice in indices:
            name = indice.replace('_', str(percentile))
            var = indice.split('_')[0]

            operation = None
            if 'T' in var:
                if percentile >= 50:
                    operation = 'Icclim%s90p' % var
                    func = 'icclim_%s90p' % var  # icclim_TG90p
                else:
                    operation = 'Icclim%s10p' % var
                    func = 'icclim_%s10p' % var

                ################################
                # load the appropriate operation
                ################################

                ops = [op for op in dir(lic) if operation in op]
                if len(ops) == 0:
                    raise Exception("operator does not exist %s", operation)

                exec "percentile_dict = lic.%s.get_percentile_dict(arr, dt_arr, percentile, window_width)" % ops[
                    0]
                calc = [{
                    'func': func,
                    'name': name,
                    'kwds': {
                        'percentile_dict': percentile_dict
                    }
                }]

                if polygons is None:
                    nc_indices.extend(
                        call(resource=resource,
                             prefix=key.replace(variable, name).replace(
                                 '_day_', '_%s_' % grouping),
                             calc=calc,
                             calc_grouping=calc_group,
                             output_format='nc',
                             dir_output=dir_output))
                else:
                    nc_indices.extend(
                        clipping(
                            resource=resource,
                            prefix=key.replace(variable, name).replace(
                                '_day_', '_%s_' % grouping),
                            calc=calc,
                            calc_grouping=calc_group,
                            output_format='nc',
                            dir_output=dir_output,
                            polygons=polygons,
                            mosaic=mosaic,
                        ))
    if len(nc_indices) is 0:
        logger.debug('No indices are calculated')
        return None
    return nc_indices
    def execute(self):
        logger.info('Start process')
        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp
        
      
        ################################
        # reading in the input arguments
        ################################
        try: 
            logger.info('read in the arguments')
            resource = self.getInputValues(identifier='resource')
            season = self.getInputValues(identifier='season')[0]
            bbox = self.getInputValues(identifier='BBox')[0]
            #model_var = self.getInputValues(identifier='reanalyses')[0]
            period = self.getInputValues(identifier='period')[0]            
            anualcycle = self.getInputValues(identifier='anualcycle')[0]
            # model, var = model_var.split('_')
            
            bbox = [float(b) for b in bbox.split(',')]

            start = dt.strptime(period.split('-')[0] , '%Y%m%d')
            end = dt.strptime(period.split('-')[1] , '%Y%m%d')

            kappa = int(self.getInputValues(identifier='kappa')[0])
            
            logger.info('bbox %s' % bbox)
            logger.info('period %s' % str(period))
            logger.info('season %s' % str(season))
            
        except Exception as e: 
            logger.debug('failed to read in the arguments %s ' % e)
       
                
        ############################################################    
        ### get the required bbox and time region from resource data
        ############################################################
        
        # from flyingpigeon.weatherregimes import get_level
        
        from flyingpigeon.ocgis_module import call 
        from flyingpigeon.utils import get_variable
        time_range = [start, end]
      
        variable = get_variable(resource)
        model_subset = call(resource=resource, variable=variable, 
          geom=bbox, spatial_wrapping='wrap', time_range=time_range,  #conform_units_to=conform_units_to
          )
        logger.info('Dataset subset done: %s ' % model_subset)
        
        ##############################################
        ### computing anomalies 
        ##############################################
        
        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[0]
        reference = [dt.strptime(cycst,'%Y%m%d'), dt.strptime(cycen,'%Y%m%d')]
        model_anomal = wr.get_anomalies(model_subset, reference=reference)

        #####################
        ### extracting season
        #####################
        model_season = wr.get_season(model_anomal, season=season)

        #######################
        ### call the R scripts
        #######################
        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
          rworkspace = curdir
          Rsrc = config.Rsrc_dir() 
          Rfile = 'weatherregimes_model.R'
          
          infile = model_season  #model_subset #model_ponderate 
          modelname = 'MODEL'
          yr1 = start.year
          yr2 = end.year
          ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
          ip, file_pca = mkstemp(dir=curdir ,suffix='.dat')
          ip, file_class = mkstemp(dir=curdir ,suffix='.Rdat')
                    
          args = ['Rscript', join(Rsrc,Rfile), '%s/' % curdir, 
                  '%s/' % Rsrc, '%s'% infile, '%s' % variable, 
                  '%s' % output_graphics, '%s' % file_pca,
                   '%s' % file_class, '%s' % season, 
                   '%s' % start.year, '%s' % end.year,
                   '%s' % 'MODEL', '%s' % kappa]
          logger.info('Rcall builded')
        except Exception as e: 
          msg = 'failed to build the R command %s' % e
          logger.error(msg)  
          raise Exception(msg)
        try:
          output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() #, shell=True
          logger.info('R outlog info:\n %s ' % output)
          logger.debug('R outlog errors:\n %s ' % error)
          if len(output) > 0:            
            self.status.set('**** weatherregime in R suceeded', 90)
          else:
            logger.error('NO! output returned from R call')
        except Exception as e: 
          msg = 'weatherregime in R %s ' % e
          logger.error(msg)  
          raise Exception(msg)

        ############################################
        ### set the outputs
        ############################################

        self.Routput_graphic.setValue( output_graphics )
        self.output_pca.setValue( file_pca )
        self.output_classification.setValue( file_class )
        self.output_netcdf.setValue( model_season )
Ejemplo n.º 46
0
def calc_indice_simple(resource=[], variable=None, prefix=None, indice='SU',
                       polygons=None, mosaic=False, grouping='yr', dir_output=None,
                       dimension_map=None, memory_limit=None):
    """
    Calculates given simple indices for suitable files in the appropriate time grouping and polygon.

    :param resource: list of filenames in data reference syntax (DRS) convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: Indice (default ='SU')
    :param polygons: list of polgons (default ='FRA')
    :param grouping: indices time aggregation (default='yr')
    :param out_dir: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into out_dir.
    """
    from os.path import join, dirname, exists
    from flyingpigeon import ocgis_module
    from flyingpigeon.subset import clipping
    import uuid

    if type(resource) != list:
        resource = list([resource])
    # if type(indices) != list:
    #     indices = list([indices])
    if type(polygons) != list and polygons is None:
        polygons = list([polygons])
    # if type(groupings) != list:
    #     groupings = list([groupings])

    if dir_output is not None:
        if not exists(dir_output):
            makedirs(dir_output)

    datasets = sort_by_filename(resource).keys()

    if len(datasets) is 1:
        key = datasets[0]
    else:
        LOGGER.warning('more than one dataset in resource')

    # from flyingpigeon.subset import select_ugid
    #    tile_dim = 25
    output = None

    # experiments = sort_by_filename(resource)
    outputs = []

    # for key in experiments:

    if variable is None:
        variable = get_variable(resource)
        LOGGER.debug('Variable detected % s ' % variable)

    # variable = key.split('_')[0]
    try:
        # icclim can't handling 'kg m2 sec' needs to be 'mm/day'
        if variable == 'pr':
            calc = 'pr=pr*86400'
            ncs = ocgis_module.call(resource=resource,
                                    variable=variable,
                                    dimension_map=dimension_map,
                                    calc=calc,
                                    memory_limit=memory_limit,
                                    # calc_grouping= calc_group,
                                    prefix=str(uuid.uuid4()),
                                    dir_output=dir_output,
                                    output_format='nc')
        else:
            ncs = resource

        try:
            calc = [{'func': 'icclim_' + indice, 'name': indice}]
            LOGGER.info('calc: %s' % calc)
            try:
                calc_group = calc_grouping(grouping)
                LOGGER.info('calc_group: %s' % calc_group)
                if polygons is None:
                    try:
                        prefix = key.replace(variable, indice).replace('_day_', '_%s_' % grouping)
                        LOGGER.debug(' **** dir_output = %s ' % dir_output)
                        tmp = ocgis_module.call(resource=ncs,
                                                variable=variable,
                                                dimension_map=dimension_map,
                                                calc=calc,
                                                calc_grouping=calc_group,
                                                prefix=prefix,
                                                dir_output=dir_output,
                                                output_format='nc')
                        if len(tmp) is not 0:
                            outputs.extend(tmp)
                        else:
                            msg = 'could not calc indice %s for domain ' % (indice)
                            LOGGER.exception(msg)
                    except:
                        msg = 'could not calc indice %s for domain in %s' % (indice)
                        LOGGER.exception(msg)
                else:
                    try:
                        prefix = key.replace(variable, indice).replace('_day_', '_%s_' % grouping)
                        tmp = clipping(resource=ncs,
                                       variable=variable,
                                       dimension_map=dimension_map,
                                       calc=calc,
                                       calc_grouping=calc_group,
                                       prefix=prefix,
                                       polygons=polygons,
                                       mosaic=mosaic,
                                       dir_output=dir_output,
                                       output_format='nc')
                        if len(tmp) is not 0:
                            outputs.extend(tmp)
                        else:
                            msg = 'could not calc clipped indice %s ' % (indice)
                            LOGGER.exception(msg)
                    except:
                        msg = 'could not calc indice %s for domai' % (indice)
                        LOGGER.debug(msg)
                        # raise Exception(msg)
                    LOGGER.info('indice file calculated: %s' % tmp)
            except:
                msg = 'could not calc indice %s for key %s and grouping %s' % (indice, grouping)
                LOGGER.exception(msg)
                # raise Exception(msg)
        except:
            msg = 'could not calc indice %s ' % (indice)
            LOGGER.exception(msg)
            # raise Exception(msg)
    except:
        msg = 'could not calculate indices'
        LOGGER.exception(msg)
        # raise Exception(msg)
    LOGGER.info('indice outputs %s ' % outputs)

    if len(outputs) is 0:
        LOGGER.debug('No indices are calculated')
        return None
    return outputs
Ejemplo n.º 47
0
    def execute(self):

        logger.info('Start process')
        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp
        
        self.status.set('execution started at : %s '  % dt.now(),5)
      
        ################################
        # reading in the input arguments
        ################################
        try: 
            logger.info('read in the arguments')
            # resources = self.getInputValues(identifier='resources')
            season = self.getInputValues(identifier='season')[0]
            bbox_obj = self.BBox.getValue()
            model_var = self.getInputValues(identifier='reanalyses')[0]
            period = self.getInputValues(identifier='period')[0]            
            anualcycle = self.getInputValues(identifier='anualcycle')[0]
            model, variable = model_var.split('_')

            kappa = int(self.getInputValues(identifier='kappa')[0])
            
            logger.info('period %s' % str(period))
            logger.info('season %s' % str(season))
            
        except Exception as e: 
            logger.debug('failed to read in the arguments %s ' % e)
        

        try: 
            start = dt.strptime(period.split('-')[0] , '%Y%m%d')
            end = dt.strptime(period.split('-')[1] , '%Y%m%d')

            if bbox_obj is not None:
                logger.info("bbox_obj={0}".format(bbox_obj.coords))
                bbox = [bbox_obj.coords[0][0], bbox_obj.coords[0][1],bbox_obj.coords[1][0],bbox_obj.coords[1][1]]
                logger.info("bbox={0}".format(bbox))
            else:
                bbox=None
            
        except Exception as e: 
            logger.debug('failed to transform BBOXObject  %s ' % e)

                    

        ###########################
        ### set the environment
        ###########################
        
        self.status.set('fetching data from archive',10)

        try:            
          if model == 'NCEP': 
            if 'z' in variable:
              level=variable.strip('z')
              conform_units_to=None
            else:
              level=None
              conform_units_to='hPa'
          elif '20CRV2' in model: 
            if 'z' in variable:
              level=variable.strip('z')
              conform_units_to=None
            else:
              level=None
              conform_units_to='hPa'
          else:
            logger.error('Reanalyses dataset not known')          
          logger.info('environment set')
        except Exception as e: 
          msg = 'failed to set environment %s ' % e
          logger.error(msg)  
          raise Exception(msg)

        ##########################################
        ### fetch Data from original data archive
        ##########################################

        from flyingpigeon.datafetch import reanalyses as rl            
        try:
          model_nc = rl(start=start.year , 
                        end=end.year , 
                        dataset=model, variable=variable)

          logger.info('reanalyses data fetched')
        except Exception as e:
          msg = 'failed to get reanalyses data  %s' % e
          logger.debug(msg)
          raise Exception(msg)
        
        self.status.set('fetching data done',15)
        ############################################################    
        ### get the required bbox and time region from resource data
        ############################################################
        
        self.status.set('start subsetting',17)
        # from flyingpigeon.weatherregimes import get_level
        from flyingpigeon.ocgis_module import call 

        time_range = [start, end]
        model_subset = call(resource=model_nc, variable=variable, 
          geom=bbox, spatial_wrapping='wrap', time_range=time_range, # conform_units_to=conform_units_to
          )
        logger.info('Dataset subset done: %s ' % model_subset)
        
        self.status.set('dataset subsetted',19)
        ##############################################
        ### computing anomalies 
        ##############################################
        self.status.set('computing anomalies ',19)

        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[0]
        reference = [dt.strptime(cycst,'%Y%m%d'), dt.strptime(cycen,'%Y%m%d')]
        model_anomal = wr.get_anomalies(model_subset, reference=reference)

        #####################
        ### extracting season
        #####################
        model_season = wr.get_season(model_anomal, season=season)
               
        self.status.set('values normalized',20)
        #######################
        ### call the R scripts
        #######################
        self.status.set('Start weather regime clustering ',25)
        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
          rworkspace = curdir
          Rsrc = config.Rsrc_dir() 
          Rfile = 'weatherregimes_model.R'
          
          infile = model_season  #model_subset #model_ponderate 
          modelname = model
          yr1 = start.year
          yr2 = end.year
          ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
          ip, file_pca = mkstemp(dir=curdir ,suffix='.txt')
          ip, file_class = mkstemp(dir=curdir ,suffix='.Rdat')
                    
          args = ['Rscript', join(Rsrc,Rfile), '%s/' % curdir, 
                  '%s/' % Rsrc, '%s'% infile, '%s' % variable, 
                  '%s' % output_graphics, '%s' % file_pca,
                   '%s' % file_class, '%s' % season, 
                   '%s' % start.year, '%s' % end.year,
                   '%s' % model_var, '%s' % kappa]
          logger.info('Rcall builded')
        except Exception as e: 
          msg = 'failed to build the R command %s' % e
          logger.debug(msg)  
          raise Exception(msg)
        try:
          output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() #, shell=True
          logger.info('R outlog info:\n %s ' % output)
          logger.debug('R outlog errors:\n %s ' % error)
          if len(output) > 0:            
            self.status.set('**** weatherregime in R suceeded', 90)
          else:
            logger.error('NO! output returned from R call')
        except Exception as e: 
          msg = 'weatherregime in R %s ' % e
          logger.error(msg)  
          raise Exception(msg)
        
        self.status.set('Weather regime clustering done ',80)
        ############################################
        ### set the outputs
        ############################################
        self.status.set('Set the process outputs ',95)
        
        self.Routput_graphic.setValue( output_graphics )
        self.output_pca.setValue( file_pca )
        self.output_classification.setValue( file_class )
        self.output_netcdf.setValue( model_season )
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        try:
            resources = archiveextract(
                resource=rename_complexinputs(request.inputs['resource']))

            if 'region' in request.inputs:
                region = request.inputs['region'][0].data
            else:
                region = None

            if 'mosaic' in request.inputs:
                mosaic = request.inputs['mosaic'][0].data
            else:
                mosaic = False

            percentile = request.inputs['percentile'][0].data

            LOGGER.debug("mosaic %s " % mosaic)
            LOGGER.debug('percentile: %s' % percentile)
            LOGGER.debug('region %s' % region)
            LOGGER.debug('Nr of input files %s ' % len(resources))

        except:
            LOGGER.exception('failed to read in the arguments')

        from flyingpigeon.utils import sort_by_filename
        from flyingpigeon.ocgis_module import call

        datasets = sort_by_filename(resources, historical_concatination=True)
        results = []

        kwds = {'percentile': 90, 'window_width': 5}
        calc = [{'func': 'daily_perc', 'name': 'dp', 'kwds': kwds}]

        try:
            for key in datasets.keys():
                try:
                    if region is None:
                        result = call(
                            resource=datasets[key],
                            output_format='nc',
                            calc=calc,
                            # prefix=key,
                            # time_region={'year': [1995, 2000]}
                            # calc_grouping='year'
                        )
                        results.extend([result])
                        LOGGER.debug('percentile based indice done for %s' %
                                     result)
                    else:
                        result = clipping(
                            resource=datasets[key],
                            #  variable=None,
                            calc=calc,
                            #  calc_grouping=None,
                            #  time_range=None,
                            #  time_region=None,
                            polygons=region,
                            mosaic=mosaic)
                        results.extend(result)
                except:
                    LOGGER.exception(
                        "failed to calculate percentil based indice for %s " %
                        key)
        except:
            LOGGER.exception("failed to calculate percentile indices")

        tarf = archive(results)

        response.outputs['output_archive'].file = tarf

        i = next((i for i, x in enumerate(results) if x), None)
        if i is None:
            i = "dummy.nc"
        response.outputs['ncout'].file = results[i]

        #       response.update_status("done", 100)
        response.update_status("done", 100)
        return response
Ejemplo n.º 49
0
def calc_indice_unconventional(resource=[], variable=None, prefix=None,
  indices=None, polygons=None,  groupings=None, 
  dir_output=None, dimension_map = None):
    """
    Calculates given indices for suitable files in the appopriate time grouping and polygon.

    :param resource: list of filenames in drs convention (netcdf)
    :param variable: variable name to be selected in the in netcdf file (default=None)
    :param indices: list of indices (default ='TGx')
    :param polygons: list of polgons (default =None)
    :param grouping: indices time aggregation (default='yr')
    :param out_dir: output directory for result file (netcdf)
    :param dimension_map: optional dimension map if different to standard (default=None)

    :return: list of netcdf files with calculated indices. Files are saved into dir_output
    """
    
    from os.path import join, dirname, exists
    from os import remove
    import uuid
    from flyingpigeon import ocgis_module
    from flyingpigeon.subset import get_ugid, get_geom

    if type(resource) != list: 
      resource = list([resource])
    if type(indices) != list: 
      indices = list([indices])
    if type(polygons) != list and polygons != None:
      polygons = list([polygons])
    elif polygons == None:
      polygons = [None]
    else: 
      logger.error('Polygons not found')
    if type(groupings) != list:
      groupings = list([groupings])
    
    if dir_output != None:
      if not exists(dir_output): 
        makedirs(dir_output)
    
    experiments = sort_by_filename(resource)
    outputs = []

    # print('environment for calc_indice_unconventional set')
    logger.info('environment for calc_indice_unconventional set')
    
    for key in experiments:
      if variable == None:
        variable = get_variable(experiments[key][0])
      try: 
        ncs = experiments[key]
        for indice in indices:
          logger.info('indice: %s' % indice)
          try: 
            for grouping in groupings:
              logger.info('grouping: %s' % grouping)
              try:
                calc_group = calc_grouping(grouping)
                logger.info('calc_group: %s' % calc_group)
                for polygon in polygons:  
                  try:
                    domain = key.split('_')[1].split('-')[0]
                    if polygon == None:
                      if prefix == None: 
                        prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping )
                      geom = None
                      ugid = None
                    else:
                      if prefix == None: 
                        prefix = key.replace(variable, indice).replace('_day_','_%s_' % grouping ).replace(domain,polygon)
                      geom = get_geom(polygon=polygon)
                      ugid = get_ugid(polygons=polygon, geom=geom)
                    if indice == 'TGx':
                      calc=[{'func': 'max', 'name': 'TGx'}]
                      tmp = ocgis_module.call(resource=ncs,# conform_units_to='celcius',
                                              variable=variable, dimension_map=dimension_map, 
                                              calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              dir_output=dir_output, geom=geom, select_ugid=ugid)
                    elif indice == 'TGn':
                      calc=[{'func': 'min', 'name': 'TGn'}]
                      tmp = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              variable=variable, dimension_map=dimension_map, 
                                              calc=calc, calc_grouping= calc_group, prefix=prefix,
                                               dir_output=dir_output, geom=geom, select_ugid = ugid)
                    elif indice == 'TGx5day':
                      calc = [{'func': 'moving_window', 'name': 'TGx5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}]
                      tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              variable=variable, dimension_map=dimension_map, 
                                              calc=calc, prefix=str(uuid.uuid4()),
                                              geom=geom, select_ugid = ugid)
                      calc=[{'func': 'max', 'name': 'TGx5day'}]
                      logger.info('moving window calculated : %s' % tmp2)
                      tmp = ocgis_module.call(resource=tmp2,
                                              variable=indice, dimension_map=dimension_map, 
                                              calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              dir_output=dir_output)
                      remove(tmp2)
                    elif indice == 'TGn5day':
                      calc = [{'func': 'moving_window', 'name': 'TGn5day', 'kwds': {'k': 5, 'operation': 'mean', 'mode': 'same' }}]
                      tmp2 = ocgis_module.call(resource=ncs, #conform_units_to='celcius',
                                              variable=variable, dimension_map=dimension_map, 
                                              calc=calc, prefix=str(uuid.uuid4()),
                                              geom=geom, select_ugid = ugid)
                      calc=[{'func': 'min', 'name': 'TGn5day'}]
                      
                      logger.info('moving window calculated : %s' % tmp2)
                      
                      tmp = ocgis_module.call(resource=tmp2,
                                              variable=indice, dimension_map=dimension_map, 
                                              calc=calc, calc_grouping=calc_group, prefix=prefix,
                                              dir_output=dir_output)
                      remove(tmp2)
                    else: 
                      logger.error('Indice %s is not a known inidce' % (indice))
                    outputs.append(tmp)
                    logger.info('indice file calcualted %s ' % (tmp))
                  except Exception as e:
                    logger.exception('could not calc indice %s for key %s, polygon %s and calc_grouping %s : %s' %  (indice, key, polygon, grouping, e ))
              except Exception as e:
                logger.exception('could not calc indice %s for key %s and calc_grouping %s : %s' % ( indice, key, polygon, e ))
          except Exception as e:
            logger.exception('could not calc indice %s for key %s: %s'%  (indice, key, e ))
      except Exception as e:
        logger.exception('could not calc key %s: %s' % (key, e))
    return outputs
Ejemplo n.º 50
0
def clipping(resource=[], variable=None, dimension_map=None, calc=None,  output_format='nc',
  calc_grouping= None, time_range=None, time_region=None,  historical_concatination=True, prefix=None, spatial_wrapping='wrap', polygons=None, mosaik=False, dir_output=None, memory_limit=None):
  """ returns list of clipped netCDF files
  possible entries: 
  :param resource: list of input netCDF files
  :param variable: variable (string) to be used in netCDF
  :param dimesion_map: specify a dimension map input netCDF has unconventional dimension
  :param calc: ocgis calculation argument
  :param calc_grouping: ocgis calculation grouping 
  :param historical_concatination: concat files of RCPs with appropriate historical runs to one timeseries 
  :param prefix: perfix for output file name
  :param polygons: list of polygons to be used. if more than 1 in the list, a appropriate mosaik will be clipped
  :param output_format: output_format (default='nc')
  :param dir_output: specify a output location
  """
  
  from flyingpigeon.utils import get_variable, drs_filename
  from flyingpigeon.ocgis_module import call
  
  if type(resource) != list: 
    resource = list([resource])
  if type(polygons) != list:
    polygons = list([polygons])
  if prefix != None:
    if type(prefix) != list:
      prefix = list([prefix])
  
  geoms = set()
  ncs = sort_by_filename(resource, historical_concatination=historical_concatination) #  historical_concatination=True
  geom_files = []
  if mosaik == True :
    try:
      nameadd = '_'
      for polygon in polygons: 
        geoms.add(get_geom(polygon))
        nameadd = nameadd + '-' + polygon  
      if len(geoms) > 1: 
        logger.error('polygons belong to differnt shapefiles! mosaik option is not possible %s', geoms)
      else: 
        geom = geoms.pop()
      ugids = get_ugid(polygons=polygons, geom=geom)
    except Exception as e:
      logger.debug('geom identification failed %s ' % e)
    for i, key in enumerate (ncs.keys()):
      try:
        if variable == None:
          variable = get_variable(ncs[key])
          logger.info('variable %s detected in resource' % (variable))
        if prefix == None:
          name = key + nameadd
        else:
          name = prefix[i]
        geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping, output_format=output_format,
                         prefix=name, geom=geom, select_ugid=ugids, time_range=time_range, time_region=time_region, 
                         spatial_wrapping=spatial_wrapping, memory_limit=memory_limit,
                         dir_output=dir_output, dimension_map=dimension_map)
        geom_files.append( geom_file )  
      except Exception as e:
        msg = 'ocgis calculations failed for %s ' % (key)
        logger.debug(msg)
  else: 
    for i, polygon in enumerate(polygons): 
      try:
        geom = get_geom(polygon)
        ugid = get_ugid(polygons=polygon, geom=geom)
        for key in  ncs.keys():
          try:
            if variable == None:
              variable = get_variable(ncs[key])
              logger.info('variable %s detected in resource' % (variable))  
            if prefix == None: 
              name = key + '_' + polygon
            else:
              name = prefix[i]
            geom_file = call(resource=ncs[key], variable=variable,  calc=calc, calc_grouping=calc_grouping,output_format=output_format,
              prefix=name, geom=geom, select_ugid=ugid, dir_output=dir_output, dimension_map=dimension_map, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit,time_range=time_range, time_region=time_region,
              )
            geom_files.append( geom_file )
          except Exception as e:
            msg = 'ocgis calculations failed for %s ' % (key)
            logger.debug(msg)
            raise
      except Exception as e:
          logger.debug('geom identification failed')
          raise
  return  geom_files
Ejemplo n.º 51
0
    def execute(self):
        import time  # performance test
        process_start_time = time.time()  # measure process execution time ...

        from os import path
        from tempfile import mkstemp
        from flyingpigeon import analogs
        from datetime import datetime as dt

        from flyingpigeon.ocgis_module import call
        from flyingpigeon.datafetch import reanalyses
        from flyingpigeon.utils import get_variable, rename_variable

        self.status.set('execution started at : %s ' % dt.now(), 5)

        start_time = time.time()  # measure init ...

        resource = self.getInputValues(identifier='resource')
        bbox_obj = self.BBox.getValue()
        refSt = self.getInputValues(identifier='refSt')
        refEn = self.getInputValues(identifier='refEn')
        dateSt = self.getInputValues(identifier='dateSt')
        dateEn = self.getInputValues(identifier='dateEn')
        normalize = self.getInputValues(identifier='normalize')[0]
        distance = self.getInputValues(identifier='dist')[0]
        outformat = self.getInputValues(identifier='outformat')[0]
        timewin = int(self.getInputValues(identifier='timewin')[0])
        experiment = self.getInputValues(identifier='experiment')[0]
        dataset, var = experiment.split('_')

        refSt = dt.strptime(refSt[0], '%Y-%m-%d')
        refEn = dt.strptime(refEn[0], '%Y-%m-%d')
        dateSt = dt.strptime(dateSt[0], '%Y-%m-%d')
        dateEn = dt.strptime(dateEn[0], '%Y-%m-%d')

        if normalize == 'None':
            seacyc = False
        else:
            seacyc = True

        if outformat == 'ascii':
            outformat = '.txt'
        elif outformat == 'netCDF':
            outformat = '.nc'
        else:
            logger.error('output format not valid')

        if bbox_obj is not None:
            logger.info("bbox_obj={0}".format(bbox_obj.coords))
            bbox = [
                bbox_obj.coords[0][0], bbox_obj.coords[0][1],
                bbox_obj.coords[1][0], bbox_obj.coords[1][1]
            ]
            logger.info("bbox={0}".format(bbox))
        else:
            bbox = None

        #start = min( refSt, dateSt )
        #end = max( refEn, dateEn )
        # region = self.getInputValues(identifier='region')[0]
        # bbox = [float(b) for b in region.split(',')]

        try:
            if dataset == 'NCEP':
                if 'z' in var:
                    variable = 'hgt'
                    level = var.strip('z')
                    #conform_units_to=None
                else:
                    variable = 'slp'
                    level = None
                    #conform_units_to='hPa'
            elif '20CRV2' in var:
                if 'z' in level:
                    variable = 'hgt'
                    level = var.strip('z')
                    #conform_units_to=None
                else:
                    variable = 'prmsl'
                    level = None
                    #conform_units_to='hPa'
            else:
                logger.error('Reanalyses dataset not known')
            logger.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            logger.error(msg)
            raise Exception(msg)

        logger.debug("init took %s seconds.", time.time() - start_time)
        self.status.set('Read in the arguments', 5)
        #################
        # get input data
        #################

        start_time = time.time()  # measure get_input_data ...
        self.status.set('fetching input data', 7)
        try:
            input = reanalyses(start=dateSt.year,
                               end=dateEn.year,
                               variable=var,
                               dataset=dataset)
            nc_subset = call(resource=input, variable=var, geom=bbox)
        except Exception as e:
            msg = 'failed to fetch or subset input files %s' % e
            logger.error(msg)
            raise Exception(msg)
        logger.debug("get_input_subset_dataset took %s seconds.",
                     time.time() - start_time)
        self.status.set('**** Input data fetched', 10)

        ########################
        # input data preperation
        ########################
        self.status.set('Start preparing input data', 12)
        start_time = time.time()  # mesure data preperation ...
        try:
            self.status.set('Preparing simulation data', 15)
            simulation = call(resource=nc_subset, time_range=[dateSt, dateEn])
        except:
            msg = 'failed to prepare simulation period'
            logger.debug(msg)

        try:
            self.status.set('Preparing target data', 17)
            var_target = get_variable(resource)
            #var_simulation = get_variable(simulation)
            archive = call(
                resource=resource,
                variable=var_target,
                time_range=[refSt, refEn],
                geom=bbox,
                t_calendar=
                'standard',  # conform_units_to=conform_units_to,  spatial_wrapping='wrap',
                regrid_destination=simulation,
                regrid_options='bil')
        except Exception as e:
            msg = 'failed subset archive dataset %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        try:
            if var != var_target:
                rename_variable(archive, oldname=var_target, newname=var)
                logger.info('varname %s in netCDF renamed to %s' %
                            (var_target, var))
        except Exception as e:
            msg = 'failed to rename variable in target files %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        try:
            if seacyc == True:
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(
                    archive, simulation, method=normalize)
            else:
                seasoncyc_base, seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to prepare seasonal cycle reference files %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        ip, output = mkstemp(dir='.', suffix='.txt')
        output_file = path.abspath(output)
        files = [path.abspath(archive), path.abspath(simulation), output_file]

        logger.debug("data preperation took %s seconds.",
                     time.time() - start_time)

        ############################
        # generating the config file
        ############################

        self.status.set('writing config file', 15)
        start_time = time.time()  # measure write config ...

        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                timewin=timewin,
                varname=var,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                period=[
                    dt.strftime(refSt, '%Y-%m-%d'),
                    dt.strftime(refEn, '%Y-%m-%d')
                ],
                bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3]))
        except Exception as e:
            msg = 'failed to generate config file %s ' % e
            logger.debug(msg)
            raise Exception(msg)

        logger.debug("write_config took %s seconds.", time.time() - start_time)

        #######################
        # CASTf90 call
        #######################
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90

        self.status.set('Start CASTf90 call', 20)
        try:
            #self.status.set('execution of CASTf90', 50)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            #system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            logger.info('analogue.out info:\n %s ' % output)
            logger.debug('analogue.out errors:\n %s ' % error)
            self.status.set('**** CASTf90 suceeded', 90)
        except Exception as e:
            msg = 'CASTf90 failed %s ' % e
            logger.error(msg)
            raise Exception(msg)

        logger.debug("castf90 took %s seconds.", time.time() - start_time)

        self.status.set('preparting output', 99)
        self.config.setValue(config_file)
        self.analogs.setValue(output_file)
        self.simulation_netcdf.setValue(simulation)
        self.target_netcdf.setValue(archive)

        self.status.set('execution ended', 100)

        logger.debug("total execution took %s seconds.",
                     time.time() - process_start_time)