# calc = '%s=%s*1' % ('slp', 'slp'),
    # level_range=level_range,
    geom=bbox,
    output_format='nc',
    prefix='ocgis_module_optimisation',
    add_auxiliary_files=False)

# ###################################
# check free memory available somehow
from eggshell import util_functions as ufs
free_memory = ufs.FreeMemory(unit='MB')

# ###########################
# check required memory space

data_kb = ops.get_base_request_size()['total']
data_mb = data_kb / 1024.

# ###########################
# check if half of the available memory can take the required data load

if data_mb < fm.user_free / 2:
    print "enough memory. data can be processed directly"
    shnip = dt.now()

    geom = ops.execute()

    shnap = dt.now()

    print 'operation performed with execute in %s sec' % (
        shnap - shnip).total_seconds()
Beispiel #2
0
def call(resource=[], variable=None, dimension_map=None, calc=None,  
  calc_grouping= None, conform_units_to=None, memory_limit=None,  prefix=None, 
  regrid_destination=None, regrid_options='bil', level_range=None,
  geom=None, output_format_options=False, search_radius_mult=2., 
  select_nearest=False, select_ugid=None, spatial_wrapping=None, time_region=None, time_range=None,
  dir_output=curdir, output_format='nc'):
  '''
  ocgis operation call

  :param resource:
  :param variable: variable in the input file to be picked
  :param dimension_map: dimension map in case of unconventional starage of data 
  :param calc: ocgis calc syntax for calcultion opartion 
  :param calc_grouping: time aggregate grouping 
  :param conform_units_to: 
  :param memory_limit: limit the amout of data to be loaded into the memory at once if None(default) free memory is detected by birdhouse
  :param level_range: subset of given levels
  :param prefix: string for the file base name 
  :param regrid_destination: file path with netCDF file with grid for outout file

  :param geom: name of shapefile stored in birdhouse shape cabinet
  :param output_format_options: output options for netCDF e.g compression level()
  :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file)
  :param regrid_options: methods for regridding: 
                          'bil' = Biliniar interpolation 
                          'bic' = Bicubic interpolation 
                          'dis' = Distance weighted average remapping
                          'nn' = nearest neighbour
                          'con' = First order concerative remapping
                          'laf' = largest area fraction reamapping
  :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
  :param select_nearest: neares neighbour selection for point geometries
  :param select_ugid: ugid for appropriate poligons 
  :param spatial_wrapping: how to handle coordinates in case of subsets, options: None(default), 'wrap', 'unwrap'
  :param time_region: select single month 
  :param time_range: sequence of two datetime.datetime objects to mark start and end point 
  :param dir_output (default= curdir):
  :param output_format:
  :return: output file path
  '''
  logger.info('Start ocgis module call function')
  from ocgis import OcgOperations, RequestDataset , env
  from ocgis.util.large_array import compute
  import uuid
  
  # prepare the environment 
  env.DIR_SHPCABINET = DIR_SHP
  env.OVERWRITE = True
  env.DIR_OUTPUT = dir_output
  
  if geom != None:
    spatial_reorder = True
    spatial_wrapping = 'wrap'
  else: 
    spatial_reorder = False
    spatial_wrapping = None
  
  
  if prefix == None:
    prefix = str(uuid.uuid1()) 
  env.PREFIX = prefix
  if output_format_options == False: 
    output_format_options = None
  elif output_format_options == True:
    output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC
                         'variable_kwargs': {'zlib': True, 'complevel': 9}}
  else:
    logger.info('output_format_options are set to %s ' % ( output_format_options ))
  
  if type(resource) != list: 
    resource = list([resource])
  # execute ocgis 
  logger.info('Execute ocgis module call function')
  
  if has_Lambert_Conformal(resource) == True and not geom == None:
    logger.debug('input has Lambert_Conformal projection and can not subsetted with geom')
    output = None
  else:
    try:
      rd = RequestDataset(resource, variable=variable, level_range=level_range,
        dimension_map=dimension_map, conform_units_to=conform_units_to, 
        time_region=time_region, time_range=time_range)
      ops = OcgOperations(dataset=rd,
        output_format_options=output_format_options,
        spatial_wrapping=spatial_wrapping,
        spatial_reorder=spatial_reorder,
        # options=options,
        calc=calc,
        calc_grouping=calc_grouping,
        geom=geom,
        output_format=output_format,
        prefix=prefix,
        search_radius_mult=search_radius_mult,
        select_nearest=select_nearest,
        select_ugid=select_ugid, 
        add_auxiliary_files=False)
      logger.info('OcgOperations set')
      
    except Exception as e: 
      logger.debug('failed to setup OcgOperations')
      raise  
    # check memory load
    from numpy import sqrt 
    from flyingpigeon.utils import FreeMemory
    
    if memory_limit == None: 
      f = FreeMemory()
      mem_kb = f.user_free 
      mem_mb = mem_kb / 1024.
      mem_limit = mem_mb / 2. # set limit to half of the free memory
    else:
      mem_limit = memory_limit

    if mem_limit >= 1024. * 4: 
      mem_limit = 1024. * 4
      # 475.0 MB for openDAP 
    
    data_kb = ops.get_base_request_size()['total']
    data_mb = data_kb / 1024.

    if variable == None: 
      variable = rd.variable
      logger.info('%s as variable dedected' % (variable))

    #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
    logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb  , mem_limit ))
    
    if data_mb <= mem_limit :  # input is smaler than the half of free memory size
      try:
        logger.info('ocgis module call as ops.execute()')
        geom_file = ops.execute()
      except Exception as e: 
        logger.debug('failed to execute ocgis operation')
        raise  
    else:
      ##########################
      # calcultion of chunk size
      ##########################

      size = ops.get_base_request_size()
      nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0]
      element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
      element_in_mb = element_in_kb / 1024.
      tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size 
      
      try:
        logger.info('ocgis module call compute with chunks')
        print 'ocgis module call compute with chunks'
        if calc == None:
          calc = '%s=%s*1' % (variable, variable)
          logger.info('calc set to = %s ' %  calc)
          ops = OcgOperations(dataset=rd,
            output_format_options=output_format_options,
            spatial_wrapping=spatial_wrapping,
            # options=options,
            calc=calc,
            calc_grouping=calc_grouping,
            geom=geom,
            output_format=output_format,
            prefix=prefix,
            search_radius_mult=search_radius_mult,
            select_nearest=select_nearest,
            select_ugid=select_ugid, 
            add_auxiliary_files=False)
        geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True)
      except Exception as e: 
        logger.debug('failed to compute ocgis with chunks')
        raise
    logger.info('Succeeded with ocgis module call function')

    ############################################
    # remapping according to regrid informations
    ############################################
    if not regrid_destination == None:
      try:
        from tempfile import mkstemp
        from cdo import Cdo
        cdo = Cdo()

        output = '%s.nc' % uuid.uuid1()
        remap = 'remap%s' % regrid_options  
        call = [op for op in dir(cdo) if remap in op]
        cmd = "output = cdo.%s('%s',input='%s', output='%s')" % (str(call[0]), regrid_destination, geom_file, output) 
        exec cmd
      except Exception as e: 
        logger.debug('failed to remap')
        raise 
    else:
      output = geom_file
  return output
Beispiel #3
0
def call(resource=[], variable=None, dimension_map=None, calc=None,  
  calc_grouping= None, conform_units_to=None, memory_limit=None,  prefix=None, 
  geom=None, output_format_options=False, search_radius_mult=2., select_nearest=False, select_ugid=None, time_region=None, time_range=None,
  dir_output=None, output_format='nc'):
  '''
  ocgis operation call

  :param resource:
  :param variable: variable in the input file to be picked
  :param dimension_map: dimension map in case of unconventional starage of data 
  :param calc: ocgis calc syntax for calcultion opartion 
  :param calc_grouping: time aggregate grouping 
  :param conform_units_to: 
  :param memory_limit: limit the amout of data to be loaded into the memory at once if None(default) free memory is detected by birdhouse
  :param prefix:
  :param geom: name of shapefile stored in birdhouse shape cabinet
  :param output_format_options: output options for netCDF e.g compression level()
  :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
  :param select_nearest: neares neighbour selection for point geometries
  :param select_ugid: ugid for appropriate poligons 
  :param time_region:
  :param time_range: sequence of two datetime.datetime objects to mark start and end point 
  :param dir_output:
  :param output_format:
  :return: output file path
  '''
  print 'start ocgis module'
  logger.info('Start ocgis module call function')
  from ocgis import OcgOperations, RequestDataset , env
  from ocgis.util.large_array import compute
  
  # prepare the environment 
  env.DIR_SHPCABINET = DIR_SHP
  env.OVERWRITE = True
  env.DIR_OUTPUT = dir_output
  env.PREFIX = prefix

  if output_format_options == False: 
    output_format_options = None
  elif output_format_options == True:
    output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC
                         'variable_kwargs': {'zlib': True, 'complevel': 9}}
  else:
    logger.info('output_format_options are set to %s ' % ( output_format_options ))
  
  if type(resource) != list: 
    resource = list([resource])

  # execute ocgis 
  logger.info('Execute ocgis module call function')
  
  try: 
    rd = RequestDataset(resource, variable=variable, 
      dimension_map=dimension_map, conform_units_to=conform_units_to, 
      time_region=time_region)
    
    ops = OcgOperations(dataset=rd,
        output_format_options=output_format_options,
        #options=options,
        calc=calc, 
        
        calc_grouping=calc_grouping,
        geom=geom,
        output_format=output_format, 
        search_radius_mult=search_radius_mult,
        select_nearest=select_nearest,
        select_ugid=select_ugid, 
        
        add_auxiliary_files=False)
    logger.info('OcgOperations set')
  except Exception as e: 
    logger.debug('failed to setup OcgOperations')
    raise  
  
  # check memory load
  from numpy import sqrt 
  from flyingpigeon.utils import FreeMemory
  
  if memory_limit == None: 
    f = FreeMemory()
    mem_kb = f.user_free 
    mem_mb = mem_kb / 1024.
    mem_limit = mem_mb / 2. # set limit to half of the free memory
  else:
    mem_limit = memory_limit

  if mem_limit >= 1024. * 4: 
    mem_limit = 1024. * 4
    # 475.0 MB for openDAP 
  
  data_kb = ops.get_base_request_size()['total']
  data_mb = data_kb / 1024.

  if variable == None: 
    variable = rd.variable
    logger.info('%s as variable dedected' % (variable))

  #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
  logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb  , mem_limit ))
  if data_mb <= mem_limit :  # input is smaler than the half of free memory size
    logger.info('ocgis module call as ops.execute()')
    try: 
      geom_file = ops.execute()
    except Exception as e: 
      logger.debug('failed to execute ocgis operation')
      raise  
  else:
    size = ops.get_base_request_size()
    nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0]
    element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
    element_in_mb = element_in_kb / 1024.

    tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size 
    # calcultion of chunk size
    try:
      logger.info('tile_dim = %s; calc = %s ' % (tile_dim, calc))
      if calc == None:
        calc = '%s=%s*1' % (variable, variable)
        logger.info('calc set to = %s ' %  calc)
        ops = OcgOperations(dataset=rd,
          output_format_options=output_format_options,
          calc=calc, 
          output_format=output_format, # 'nc' is necessary for chunked execution  
          select_ugid=select_ugid, 
          geom=geom,
          add_auxiliary_files=False)
      geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True)
    except Exception as e: 
      logger.debug('failed to compute ocgis operation')
      raise  
  
  logger.info('Succeeded with ocgis module call function')
  return geom_file
Beispiel #4
0
def call(resource=[], variable=None, dimension_map=None, calc=None,  
  calc_grouping= None, conform_units_to=None, memory_limit=None,  prefix=None, 
  regrid_destination=None, regrid_options='bil', level_range=None,
  geom=None, output_format_options=False, search_radius_mult=2., 
  select_nearest=False, select_ugid=None, spatial_wrapping=None, t_calendar=None, time_region=None, time_range=None,
  dir_output=curdir, output_format='nc'):
  '''
  ocgis operation call

  :param resource:
  :param variable: variable in the input file to be picked
  :param dimension_map: dimension map in case of unconventional storage of data 
  :param calc: ocgis calc syntax for calculation partion 
  :param calc_grouping: time aggregate grouping 
  :param conform_units_to: 
  :param memory_limit: limit the amount of data to be loaded into the memory at once if None (default) free memory is detected by birdhouse
  :param level_range: subset of given levels
  :param prefix: string for the file base name 
  :param regrid_destination: file path with netCDF file with grid for output file

  :param geom: name of shapefile stored in birdhouse shape cabinet
  :param output_format_options: output options for netCDF e.g compression level()
  :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file)
  :param regrid_options: methods for regridding: 
                          'bil' = Bilinear interpolation 
                          'bic' = Bicubic interpolation 
                          'dis' = Distance-weighted average remapping
                          'nn' = nearest neighbour
                          'con' = First-order conservative remapping
                          'laf' = largest area fraction reamapping
  :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
  :param select_nearest: nearest neighbour selection for point geometries
  :param select_ugid: ugid for appropriate polygons 
  :param spatial_wrapping: how to handle coordinates in case of subsets, options: None (default), 'wrap', 'unwrap'
  :param time_region: select single month 
  :param time_range: sequence of two datetime.datetime objects to mark start and end point 
  :param dir_output (default= curdir):
  :param output_format:
  :return: output file path
  '''
  logger.info('Start ocgis module call function')
  from ocgis import OcgOperations, RequestDataset , env
  from ocgis.util.large_array import compute
  import uuid
  
  # prepare the environment 
  env.DIR_SHPCABINET = DIR_SHP
  env.OVERWRITE = True
  env.DIR_OUTPUT = dir_output
  
  if geom != None:
    spatial_reorder = True
    spatial_wrapping = 'wrap'
  else: 
    spatial_reorder = False
    spatial_wrapping = None
  
  
  if prefix == None:
    prefix = str(uuid.uuid1()) 
  env.PREFIX = prefix
  if output_format_options == False: 
    output_format_options = None
  elif output_format_options == True:
    output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC
                         'variable_kwargs': {'zlib': True, 'complevel': 9}}
  else:
    logger.info('output_format_options are set to %s ' % ( output_format_options ))
  
  if type(resource) != list: 
    resource = list([resource])
  # execute ocgis 
  logger.info('Execute ocgis module call function')
  
  #if time_range != None:
    #time_range = eval_timerange(resource, time_range)

  if has_Lambert_Conformal(resource) == True and not geom == None:
    logger.debug('input has Lambert_Conformal projection and can not subsetted with geom')
    output = None
  else:
    try:
      #if regrid_destination != None: 
        #rd_regrid = RequestDataset(uri=regrid_destination)
      #else:
        #rd_regrid = None
      rd = RequestDataset(resource, variable=variable, level_range=level_range,
        dimension_map=dimension_map, conform_units_to=conform_units_to, 
        time_region=time_region,t_calendar=t_calendar, time_range=time_range)
      ops = OcgOperations(dataset=rd,
        output_format_options=output_format_options,
        spatial_wrapping=spatial_wrapping,
        spatial_reorder=spatial_reorder,
        # regrid_destination=rd_regrid,
        # options=options,
        calc=calc,
        calc_grouping=calc_grouping,
        geom=geom,
        output_format=output_format,
        prefix=prefix,
        search_radius_mult=search_radius_mult,
        select_nearest=select_nearest,
        select_ugid=select_ugid, 
        add_auxiliary_files=False)
      logger.info('OcgOperations set')
      
    except Exception as e: 
      logger.debug('failed to setup OcgOperations')
      raise  
    try:
      from numpy import sqrt 
      from flyingpigeon.utils import FreeMemory
      
      if memory_limit == None: 
        f = FreeMemory()
        mem_kb = f.user_free 
        mem_mb = mem_kb / 1024.
        mem_limit = mem_mb / 2. # set limit to half of the free memory
      else:
        mem_limit = memory_limit

      if mem_limit >= 1024. * 4: 
        mem_limit = 1024. * 4
        # 475.0 MB for openDAP 
      
      data_kb = ops.get_base_request_size()['total']
      data_mb = data_kb / 1024.

      if variable == None: 
        variable = rd.variable
        logger.info('%s as variable dedected' % (variable))

      #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
      logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb , mem_limit ))
    except Exception as e: 
      logger.debug('failed to compare dataload with free memory %s ' % e)
      raise  


    ## check memory load
    #from os import stat
    
      #if memory_limit == None: 
        #f = FreeMemory()
        #mem_kb = f.user_free 
        #mem_mb = mem_kb / 1024.
        #mem_limit = mem_mb / 2. # set limit to half of the free memory
      #else:
        #mem_limit = memory_limit

      #if mem_limit >= 1024. * 4: 
        #mem_limit = 1024. * 4
        ## 475.0 MB for openDAP 
      
      ##if type(resource) == list : 
        ##data_kb =  stat(resource[0]).st_size * len(resource)
      ##else: 
        ##data_kb =  stat(resource).st_size
      #size = ops.get_base_request_size()['total']
      #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
      #data_mb = data_kb / 1024.

      #if variable == None: 
        #variable = rd.variable
        #logger.info('%s as variable dedected' % (variable))
      
      #logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb  , mem_limit ))
    
    if data_mb <= mem_limit :  # input is smaler than the half of free memory size
      try:
        logger.info('ocgis module call as ops.execute()')
        geom_file = ops.execute()
      except Exception as e: 
        logger.debug('failed to execute ocgis operation')
        raise  
    else:
      ##########################
      # calcultion of chunk size
      ##########################

      try:
        size = ops.get_base_request_size()
        nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0]
        element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
        element_in_mb = element_in_kb / 1024.
        tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size 
        
        logger.info('ocgis module call compute with chunks')
        print 'ocgis module call compute with chunks'
        if calc == None:
          calc = '%s=%s*1' % (variable, variable)
          logger.info('calc set to = %s ' %  calc)
          ops = OcgOperations(dataset=rd,
                    output_format_options=output_format_options,
                    spatial_wrapping=spatial_wrapping,
                    spatial_reorder=spatial_reorder,
                    # regrid_destination=rd_regrid,
                    # options=options,
                    calc=calc,
                    calc_grouping=calc_grouping,
                    geom=geom,
                    output_format=output_format,
                    prefix=prefix,
                    search_radius_mult=search_radius_mult,
                    select_nearest=select_nearest,
                    select_ugid=select_ugid, 
                    add_auxiliary_files=False)          
        geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True)
      except Exception as e: 
        logger.debug('failed to compute ocgis with chunks')
        raise
    logger.info('Succeeded with ocgis module call function')

    ############################################
    # remapping according to regrid informations
    ############################################
    if not regrid_destination == None:
      try:
        from tempfile import mkstemp
        from cdo import Cdo
        cdo = Cdo()

        output = '%s.nc' % uuid.uuid1()
        remap = 'remap%s' % regrid_options  
        call = [op for op in dir(cdo) if remap in op]
        cmd = "output = cdo.%s('%s',input='%s', output='%s')" % (str(call[0]), regrid_destination, geom_file, output) 
        exec cmd
      except Exception as e: 
        logger.debug('failed to remap')
        raise 
    else:
      output = geom_file
  return output