Exemple #1
0
    def test_timing_use_optimizations(self):
        n = list(range(10))
        t = {True: [], False: []}

        for use_optimizations in [True, False]:
            for ii in n:
                t1 = time.time()
                rd = self.test_data.get_rd('cancm4_tas')
                ops = ocgis.OcgOperations(dataset=rd,
                                          calc=[{
                                              'func': 'mean',
                                              'name': 'mean'
                                          }],
                                          calc_grouping=['month'],
                                          output_format='nc',
                                          geom='state_boundaries',
                                          select_ugid=[2, 9, 12, 23, 25],
                                          add_auxiliary_files=False,
                                          prefix=str(ii) +
                                          str(use_optimizations),
                                          agg_selection=True)
                compute(ops,
                        5,
                        verbose=False,
                        use_optimizations=use_optimizations)
                t2 = time.time()
                t[use_optimizations].append(t2 - t1)
        tmean = {
            k: {
                'mean': np.array(v).mean(),
                'stdev': np.array(v).std()
            }
            for k, v in t.items()
        }
        self.assertTrue(tmean[True]['mean'] < tmean[False]['mean'])
Exemple #2
0
    def test_compute_with_geom(self):
        rd = self.test_data.get_rd('cancm4_tas')
        ops = ocgis.OcgOperations(dataset=rd,
                                  calc=[{
                                      'func': 'mean',
                                      'name': 'mean'
                                  }],
                                  calc_grouping=['month'],
                                  output_format='nc',
                                  geom='state_boundaries',
                                  select_ugid=[2, 9, 12, 23, 25],
                                  add_auxiliary_files=False,
                                  agg_selection=True)
        ret = compute(ops, 5, verbose=False)

        ops.prefix = 'ocgis'
        ret_ocgis = ops.execute()

        self.assertNcEqual(ret,
                           ret_ocgis,
                           check_fill_value=False,
                           check_types=False,
                           ignore_attributes={
                               'global': ['history'],
                               'mean': ['_FillValue']
                           })
Exemple #3
0
    def test_compute_small(self):
        rd = self.test_data.get_rd('cancm4_tas')

        # use a smaller netCDF as target
        ops = ocgis.OcgOperations(dataset=rd,
                                  geom='state_boundaries',
                                  select_ugid=[2, 9, 12, 23, 25],
                                  output_format='nc',
                                  prefix='sub',
                                  add_auxiliary_files=False,
                                  agg_selection=True)
        sub = ops.execute()

        # use the compute function
        rd_sub = ocgis.RequestDataset(sub, 'tas')
        ops = ocgis.OcgOperations(dataset=rd_sub, calc=[{'func': 'mean', 'name': 'mean'}],
                                  calc_grouping=['month'], output_format='nc',
                                  add_auxiliary_files=False)
        ret_compute = compute(ops, 5, verbose=False)

        # now just run normally and ensure the answers are the same!
        ops.prefix = 'ocgis_compare'
        ops.add_auxiliary_files = False
        ret_ocgis = ops.execute()
        self.assertNcEqual(ret_compute, ret_ocgis, check_fill_value=False, check_types=False,
                           ignore_attributes={'global': ['history'], 'mean': ['_FillValue']})
Exemple #4
0
    def test_with_callback(self):
        """Test callback reports status appropriately."""

        percentages = []

        def callback(a, b):
            percentages.append(a)

        rd = self.test_data.get_rd('cancm4_tas',
                                   kwds={'time_region': {
                                       'month': [3]
                                   }})
        ops = ocgis.OcgOperations(dataset=rd,
                                  calc=[{
                                      'func': 'mean',
                                      'name': 'mean'
                                  }],
                                  calc_grouping=['month'],
                                  output_format='nc',
                                  geom='state_boundaries',
                                  select_ugid=[2, 9, 12, 23, 25],
                                  add_auxiliary_files=False,
                                  callback=callback,
                                  agg_selection=True)
        ret = compute(ops, 3, verbose=False)
        hundreds = np.array(percentages)
        hundreds = hundreds >= 100.0
        self.assertEqual(hundreds.sum(), 1)
Exemple #5
0
 def test_compute(self):
     rd = self.test_data.get_rd('cancm4_tas')
     kwds = {'percentile': 90, 'window_width': 5}
     calc = [{'func': 'daily_perc', 'name': 'dp', 'kwds': kwds}]
     ops = ocgis.OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[23], calc=calc,
                               output_format='nc', time_region={'year': [2002, 2003]})
     ret = compute(ops, 2, verbose=False)
     rd = ocgis.RequestDataset(uri=ret)
     self.assertEqual(rd.get().shape, (1, 365, 1, 4, 3))
Exemple #6
0
    def test_compute_2d_grid(self):
        path = self.get_path_to_2d_grid_netcdf()
        rd = RequestDataset(path)

        ops = ocgis.OcgOperations(dataset=rd, calc=[{'func': 'mean', 'name': 'mean'}], calc_grouping=['month'],
                                  output_format='nc', add_auxiliary_files=False, geom=[33.7, -35.9, 109.1, 9.4])
        ret = compute(ops, 3, verbose=False)

        field = RequestDataset(ret).get()
        self.assertEqual(field['mean'].shape, (4, 17, 28))
Exemple #7
0
    def test_compute_2d_grid(self):
        path = self.get_path_to_2d_grid_netcdf()
        rd = RequestDataset(path)

        ops = ocgis.OcgOperations(dataset=rd, calc=[{'func': 'mean', 'name': 'mean'}], calc_grouping=['month'],
                                  output_format='nc', add_auxiliary_files=False, geom=[33.7, -35.9, 109.1, 9.4])
        ret = compute(ops, 3, verbose=False)

        field = RequestDataset(ret).get()
        self.assertEqual(field.shape, (1, 4, 1, 16, 27))
Exemple #8
0
    def test_timing_use_optimizations(self):
        n = range(10)
        t = {True: [], False: []}

        for use_optimizations in [True, False]:
            for ii in n:
                t1 = time.time()
                rd = self.test_data.get_rd('cancm4_tas')
                ops = ocgis.OcgOperations(dataset=rd, calc=[{'func': 'mean', 'name': 'mean'}],
                                          calc_grouping=['month'], output_format='nc',
                                          geom='state_boundaries',
                                          select_ugid=[2, 9, 12, 23, 25],
                                          add_auxiliary_files=False,
                                          prefix=str(ii) + str(use_optimizations))
                compute(ops, 5, verbose=False, use_optimizations=use_optimizations)
                t2 = time.time()
                t[use_optimizations].append(t2 - t1)
        tmean = {k: {'mean': np.array(v).mean(), 'stdev': np.array(v).std()} for k, v in t.iteritems()}
        self.assertTrue(tmean[True]['mean'] < tmean[False]['mean'])
Exemple #9
0
    def test_compute(self):
#        ocgis.env.VERBOSE = True
#        ocgis.env.DEBUG = True

        verbose = False
        n_tile_dimensions = 1
        tile_range = [100,100]
        rd = RequestDatasetCollection(self.test_data.get_rd('cancm4_tasmax_2011'))
        
        calc = [{'func':'mean','name':'my_mean'},
                {'func':'freq_perc','name':'perc_90','kwds':{'percentile':90,}},
                {'func':'freq_perc','name':'perc_95','kwds':{'percentile':95,}},
                {'func':'freq_perc','name':'perc_99','kwds':{'percentile':99,}}
               ]
        calc_grouping = ['month']
        
        ## perform computations the standard way
        if verbose: print('computing standard file...')
        ops = ocgis.OcgOperations(dataset=rd,output_format='nc',calc=calc,
                                      calc_grouping=calc_grouping,prefix='std')
        std_file = ops.execute()
        if verbose: print('standard file is: {0}'.format(std_file))
        std_ds = nc.Dataset(std_file,'r')
        std_meta = ocgis.Inspect(std_file).meta
        
        for ii in range(n_tile_dimensions):
            tile_dimension = np.random.random_integers(tile_range[0],tile_range[1])
            if verbose: print('tile dimension: {0}'.format(tile_dimension))
            ## perform computations using tiling
            tile_file = compute(rd,calc,calc_grouping,tile_dimension,verbose=verbose,
                                 prefix='tile')
            
            ## ensure output paths are different
            self.assertNotEqual(tile_file,std_file)
            
            ## confirm each variable is identical
            tile_ds = nc.Dataset(tile_file,'r')
            
            ## compare calculated values
            for element in calc:
                tile_value,std_value = [ds.variables[element['name']][:] for ds in [tile_ds,std_ds]]
                cmp = tile_value == std_value
                self.assertTrue(cmp.all())
                
            ## compare meta dictionaries
            tile_meta = ocgis.Inspect(tile_file).meta
            for k in tile_meta.iterkeys():
                for k2,v2 in tile_meta[k].iteritems():
                    ref = std_meta[k][k2]
                    self.assertEqual(v2,ref)
            
            tile_ds.close()
        std_ds.close()
Exemple #10
0
    def test_calculate_compute(self):
#        ocgis.env.VERBOSE = True
#        ocgis.env.DEBUG = True
        calc = [{'func':'sfwe','name':'sfwe','kwds':{'tas':'tas','pr':'pr'}}]
        time_range = None
        rds = []
        for var in [self.maurer_pr,self.maurer_tas]:
            var.update({'time_range':time_range})
            rds.append(var)
        rdc = RequestDatasetCollection(rds)
        sfwe = compute(rdc,calc,['month','year'],50,verbose=True,prefix='sfwe')
        import ipdb;ipdb.set_trace()
Exemple #11
0
    def test_compute_with_geom(self):
        rd = self.test_data.get_rd('cancm4_tas')
        ops = ocgis.OcgOperations(dataset=rd, calc=[{'func': 'mean', 'name': 'mean'}],
                                  calc_grouping=['month'], output_format='nc',
                                  geom='state_boundaries',
                                  select_ugid=[2, 9, 12, 23, 25],
                                  add_auxiliary_files=False)
        ret = compute(ops, 5, verbose=False)

        ops.prefix = 'ocgis'
        ret_ocgis = ops.execute()
        self.assertNcEqual(ret, ret_ocgis, ignore_attributes={'global': ['history']})
Exemple #12
0
def compute_sfwe_maurer():
    def maurer_pr():
        ret = {'uri':'Maurer02new_OBS_pr_daily.1971-2000.nc','variable':'pr'}
        return(ret)
        
    def maurer_tas():
        ret = {'uri':'Maurer02new_OBS_tas_daily.1971-2000.nc','variable':'tas'}
        return(ret)
    
    
    ocgis.env.DIR_DATA = '/usr/local/climate_data/'
    ocgis.env.DIR_OUTPUT = '/home/local/WX/ben.koziol/climate_data/QED-2013/sfwe/maurer02v2'
    ocgis.env.OVERWRITE = True
    
    calc = [{'func':'sfwe','name':'sfwe','kwds':{'tas':'tas','pr':'pr'}}]
    time_range = None
    rds = []
    for var in [maurer_pr(),maurer_tas()]:
        var.update({'time_range':time_range})
        rds.append(var)
    rdc = RequestDatasetCollection(rds)
    sfwe = compute(rdc,calc,['month','year'],175,verbose=True,prefix='sfwe')
    print(sfwe)
    #sfwe = '/home/local/WX/ben.koziol/climate_data/QED-2013/sfwe/maurer02v2/sfwe/sfwe.nc'
    
    calc = [{'func':'sum','name':'p'}]
    time_range = None
    rds = [maurer_pr()]
    rdc = RequestDatasetCollection(rds)
    pr = compute(rdc,calc,['month','year'],175,verbose=True,prefix='pr')
    print(pr)
    #pr = '/home/local/WX/ben.koziol/climate_data/QED-2013/sfwe/maurer02v2/pr/pr.nc'
    
    calc = [{'func':'ratio_sfwe_p','name':'sfwe_p','kwds':{'sfwe':'sfwe','p':'p'}}]
    rds = [RequestDataset(sfwe,'sfwe'),RequestDataset(pr,'p')]
    rdc = RequestDatasetCollection(rds)
    sfwe_p = compute(rdc,calc,None,175,verbose=True,prefix='sfwe_p')
    print(sfwe_p)
Exemple #13
0
def compute_sfwe_other():
    ocgis.env.DIR_DATA = '/data/ben.koziol/sfwe/data'
    ocgis.env.DIR_OUTPUT = '/home/local/WX/ben.koziol'
    ocgis.env.OVERWRITE = False
    
    tas = [['bcca_gfdl_cm2_1.gregorian.20c3m.run1.tas.1971-2000.nc','tas',None,'bcca_gfdl'],
     ['bcca_cccma_cgcm3_1.gregorian.20c3m.run1.tas.1971-2000.nc','tas',None,'bcca_cccma_cgcm3'],
     ['arrm_cgcm3_t63.20c3m.tas.NAm.1971-2000.nc','tas','365_day','arrm_cgcm3'],
     ['arrm_gfdl_2.1.20c3m.tas.NAm.1971-2000.nc','tas','365_day','arrm_gfdl']]
    
    pr = [['bcca_gfdl_cm2_1.gregorian.20c3m.run1.pr.1971-2000.nc','pr',None,'bcca_gfdl'],
     ['bcca_cccma_cgcm3_1.gregorian.20c3m.run1.pr.1971-2000.nc','pr',None,'bcca_cccma_cgcm3'],
     ['arrm_cgcm3_t63.20c3m.pr.NAm.1971-2000.nc','pr','365_day','arrm_cgcm3'],
     ['arrm_gfdl_2.1.20c3m.pr.NAm.1971-2000.nc','pr','365_day','arrm_gfdl']]
    
    for t,p in zip(tas,pr):
        
        tas_rd = ocgis.RequestDataset(t[0],t[1],t_calendar=t[2])
        pr_rd = ocgis.RequestDataset(p[0],p[1],t_calendar=p[2])
        
        calc = [{'func':'sfwe','name':'sfwe','kwds':{'tas':'tas','pr':'pr'}}]
        rdc = RequestDatasetCollection([tas_rd,pr_rd])
        sfwe = compute(rdc,calc,['month','year'],175,verbose=True,prefix='sfwe_'+t[3])
        print(sfwe)
        #sfwe = '/home/local/WX/ben.koziol/climate_data/QED-2013/sfwe/maurer02v2/sfwe/sfwe.nc'
        
        calc = [{'func':'sum','name':'p'}]
        rds = [pr_rd]
        rdc = RequestDatasetCollection(rds)
        pr = compute(rdc,calc,['month','year'],175,verbose=True,prefix='pr_'+t[3])
        print(pr)
        #pr = '/home/local/WX/ben.koziol/climate_data/QED-2013/sfwe/maurer02v2/pr/pr.nc'
        
        calc = [{'func':'ratio_sfwe_p','name':'sfwe_p','kwds':{'sfwe':'sfwe','p':'p'}}]
        rds = [RequestDataset(sfwe,'sfwe',t_calendar=t[2]),RequestDataset(pr,'p',t_calendar=t[2])]
        rdc = RequestDatasetCollection(rds)
        sfwe_p = compute(rdc,calc,None,175,verbose=True,prefix='sfwe_p_'+t[3])
        print(sfwe_p)
Exemple #14
0
    def test_compute_with_time_region(self):
        rd = self.test_data.get_rd('cancm4_tas', kwds={'time_region': {'month': [3]}})
        ops = ocgis.OcgOperations(dataset=rd, calc=[{'func': 'mean', 'name': 'mean'}],
                                  calc_grouping=['month'], output_format='nc',
                                  geom='state_boundaries',
                                  select_ugid=[2, 9, 12, 23, 25],
                                  add_auxiliary_files=False,
                                  agg_selection=True)
        ret = compute(ops, 5, verbose=False)

        ops.prefix = 'ocgis'
        ret_ocgis = ops.execute()
        self.assertNcEqual(ret, ret_ocgis, check_fill_value=False, check_types=False,
                           ignore_attributes={'global': ['history'], 'mean': ['_FillValue']})
Exemple #15
0
    def test_system_compute(self):
        rd = self.test_data.get_rd('cancm4_tas')
        kwds = {'percentile': 90, 'window_width': 5}
        calc = [{'func': 'daily_perc', 'name': 'dp', 'kwds': kwds}]
        ops = ocgis.OcgOperations(dataset=rd,
                                  geom='state_boundaries',
                                  select_ugid=[23],
                                  calc=calc,
                                  output_format='nc',
                                  time_region={'year': [2002, 2003]})
        ret = compute(ops, 2, verbose=False)

        rd = ocgis.RequestDataset(uri=ret)
        actual_field = rd.get()
        self.assertEqual(actual_field.data_variables[0].shape, (365, 3, 3))
Exemple #16
0
    def test_large_array_compute_local(self):
        """Test tiling works for percentile-based indice on a local dataset."""

        raise SkipTest('function is very slow with ICCLIM 4.2.5')

        calc = [{'func': 'icclim_TG10p', 'name': 'itg'}]
        calc_grouping = ['month']
        rd = self.test_data.get_rd('cancm4_tas')
        ops = ocgis.OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, output_format='nc',
                                  geom='state_boundaries',
                                  select_ugid=[24])
        ret = compute(ops, 5, verbose=False)

        with nc_scope(ret) as ds:
            self.assertAlmostEqual(ds.variables['itg'][:].sum(), 2121.0, 6)
Exemple #17
0
    def test_multivariate_computation(self):
        rd = self.test_data.get_rd('cancm4_tas', kwds={'time_region': {'month': [3]}})
        rd2 = deepcopy(rd)
        rd2.alias = 'tas2'
        calc = [{'func': 'divide', 'name': 'ln', 'kwds': {'arr1': 'tas', 'arr2': 'tas2'}}]
        ops = ocgis.OcgOperations(dataset=[rd, rd2], calc=calc,
                                  calc_grouping=['month'], output_format='nc',
                                  geom='state_boundaries',
                                  select_ugid=[2, 9, 12, 23, 25],
                                  add_auxiliary_files=False)
        ret = compute(ops, 5, verbose=False)

        ops.prefix = 'ocgis'
        ret_ocgis = ops.execute()
        self.assertNcEqual(ret, ret_ocgis, ignore_attributes={'global': ['history']})
Exemple #18
0
    def test_compute_large(self):
        """Test calculations using compute are equivalent with standard calculations."""

        #        ocgis.env.VERBOSE = True
        #        ocgis.env.DEBUG = True

        verbose = False
        n_tile_dimensions = 1
        tile_range = [100, 100]

        rd = RequestDatasetCollection(self.test_data.get_rd('cancm4_tasmax_2011'))

        calc = [{'func': 'mean', 'name': 'my_mean'},
                {'func': 'freq_perc', 'name': 'perc_90', 'kwds': {'percentile': 90}},
                {'func': 'freq_perc', 'name': 'perc_95', 'kwds': {'percentile': 95}},
                {'func': 'freq_perc', 'name': 'perc_99', 'kwds': {'percentile': 99}}]
        calc_grouping = ['month']

        # construct the operational arguments to compute
        ops_compute = ocgis.OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, output_format='nc',
                                          prefix='tile')

        # perform computations the standard way
        if verbose:
            print('computing standard file...')
        ops = ocgis.OcgOperations(dataset=rd, output_format='nc', calc=calc, calc_grouping=calc_grouping, prefix='std')
        std_file = ops.execute()
        if verbose:
            print('standard file is: {0}'.format(std_file))
        std_ds = nc.Dataset(std_file, 'r')

        for ii in range(n_tile_dimensions):
            tile_dimension = np.random.random_integers(tile_range[0], tile_range[1])
            if verbose:
                print('tile dimension: {0}'.format(tile_dimension))
            # perform computations using tiling
            tile_file = compute(ops_compute, tile_dimension, verbose=verbose)

            # ensure output paths are different
            self.assertNotEqual(tile_file, std_file)

            self.assertNcEqual(std_file, tile_file, ignore_attributes={'global': ['history']})

            # confirm each variable is identical
            tile_ds = nc.Dataset(tile_file, 'r')

            tile_ds.close()
        std_ds.close()
Exemple #19
0
    def test_compute_with_geom(self):
        grid = create_gridxy_global(resolution=5.0)
        field = create_exact_field(grid, 'exact')
        path = self.get_temporary_file_path('foo.nc')
        field.write(path)
        rd = RequestDataset(path)

        calcs = [[{'func': 'mean', 'name': 'mean'}], None]
        for ii, c in enumerate(calcs):
            ops = ocgis.OcgOperations(dataset=rd, calc=c, calc_grouping=['month'], output_format='nc',
                                      geom=self.path_state_boundaries, select_ugid=[2, 9, 12, 23, 25],
                                      add_auxiliary_files=False, agg_selection=True, prefix=str(ii) + '_foo')
            ret = compute(ops, 5, verbose=False)

            ops.prefix = str(ii)
            ret_ocgis = ops.execute()

            self.assertNcEqual(ret, ret_ocgis, check_fill_value=False, check_types=False,
                               ignore_attributes={'global': ['history'], 'mean': ['_FillValue']})
Exemple #20
0
    def test_multivariate_computation(self):
        rd = self.test_data.get_rd('cancm4_tas',
                                   kwds={'time_region': {
                                       'month': [3]
                                   }})
        rd2 = self.test_data.get_rd('cancm4_tas',
                                    kwds={
                                        'time_region': {
                                            'month': [3]
                                        },
                                        'field_name': 'tas2',
                                        'rename_variable': 'tas2'
                                    })

        calc = [{
            'func': 'divide',
            'name': 'ln',
            'kwds': {
                'arr1': 'tas',
                'arr2': 'tas2'
            }
        }]
        ops = ocgis.OcgOperations(dataset=[rd, rd2],
                                  calc=calc,
                                  calc_grouping=['month'],
                                  output_format='nc',
                                  geom='state_boundaries',
                                  select_ugid=[2, 9, 12, 23, 25],
                                  add_auxiliary_files=False,
                                  agg_selection=True)
        ret = compute(ops, 5, verbose=False)

        ops.prefix = 'ocgis'
        ret_ocgis = ops.execute()

        self.assertNcEqual(ret,
                           ret_ocgis,
                           check_fill_value=False,
                           check_types=False,
                           ignore_attributes={
                               'global': ['history'],
                               'ln': ['_FillValue']
                           })
Exemple #21
0
    def test_with_callback(self):
        """Test callback reports status appropriately."""

        percentages = []

        def callback(a, b):
            percentages.append(a)

        rd = self.test_data.get_rd('cancm4_tas', kwds={'time_region': {'month': [3]}})
        ops = ocgis.OcgOperations(dataset=rd, calc=[{'func': 'mean', 'name': 'mean'}],
                                  calc_grouping=['month'], output_format='nc',
                                  geom='state_boundaries',
                                  select_ugid=[2, 9, 12, 23, 25],
                                  add_auxiliary_files=False,
                                  callback=callback)
        ret = compute(ops, 3, verbose=False)
        hundreds = np.array(percentages)
        hundreds = hundreds >= 100.0
        self.assertEqual(hundreds.sum(), 1)
Exemple #22
0
 def test_calculate_compute(self):
     #        ocgis.env.VERBOSE = True
     #        ocgis.env.DEBUG = True
     calc = [{
         'func': 'sfwe',
         'name': 'sfwe',
         'kwds': {
             'tas': 'tas',
             'pr': 'pr'
         }
     }]
     time_range = None
     rds = []
     for var in [self.maurer_pr, self.maurer_tas]:
         var.update({'time_range': time_range})
         rds.append(var)
     rdc = RequestDatasetCollection(rds)
     sfwe = compute(rdc,
                    calc, ['month', 'year'],
                    50,
                    verbose=True,
                    prefix='sfwe')
     import ipdb
     ipdb.set_trace()
                    output_format='nc',
                    prefix='ocgis_module_optimisation',
                    dir_output='/home/nils/data/',
                    add_auxiliary_files=False)

shnip = dt.now()
geom = ops.execute()
shnap = dt.now()
duration = (shnap - shnip).total_seconds()
print("operation performed with execute in {} sec.".format(duration))
print(geom)

tile_dimension=5  # default

shnip = dt.now()
geom = compute(ops, tile_dimension=tile_dimension, verbose=True)
shnap = dt.now()
duration = (shnap - shnip).total_seconds()

print("operation performed with compute in {} sec.".format(duration))
print(geom)


# ###################################
# check free memory available somehow
# from eggshell import util_functions as ufs
# free_memory = ufs.FreeMemory(unit='MB')
#
# # ###########################
# # check required memory space
#
Exemple #24
0
    def test_compute(self):
        #        ocgis.env.VERBOSE = True
        #        ocgis.env.DEBUG = True

        verbose = False
        n_tile_dimensions = 1
        tile_range = [100, 100]
        rd = RequestDatasetCollection(
            self.test_data.get_rd('cancm4_tasmax_2011'))

        calc = [{
            'func': 'mean',
            'name': 'my_mean'
        }, {
            'func': 'freq_perc',
            'name': 'perc_90',
            'kwds': {
                'percentile': 90,
            }
        }, {
            'func': 'freq_perc',
            'name': 'perc_95',
            'kwds': {
                'percentile': 95,
            }
        }, {
            'func': 'freq_perc',
            'name': 'perc_99',
            'kwds': {
                'percentile': 99,
            }
        }]
        calc_grouping = ['month']

        ## perform computations the standard way
        if verbose: print('computing standard file...')
        ops = ocgis.OcgOperations(dataset=rd,
                                  output_format='nc',
                                  calc=calc,
                                  calc_grouping=calc_grouping,
                                  prefix='std')
        std_file = ops.execute()
        if verbose: print('standard file is: {0}'.format(std_file))
        std_ds = nc.Dataset(std_file, 'r')
        std_meta = ocgis.Inspect(std_file).meta

        for ii in range(n_tile_dimensions):
            tile_dimension = np.random.random_integers(tile_range[0],
                                                       tile_range[1])
            if verbose: print('tile dimension: {0}'.format(tile_dimension))
            ## perform computations using tiling
            tile_file = compute(rd,
                                calc,
                                calc_grouping,
                                tile_dimension,
                                verbose=verbose,
                                prefix='tile')

            ## ensure output paths are different
            self.assertNotEqual(tile_file, std_file)

            ## confirm each variable is identical
            tile_ds = nc.Dataset(tile_file, 'r')

            ## compare calculated values
            for element in calc:
                tile_value, std_value = [
                    ds.variables[element['name']][:]
                    for ds in [tile_ds, std_ds]
                ]
                cmp = tile_value == std_value
                self.assertTrue(cmp.all())

            ## compare meta dictionaries
            tile_meta = ocgis.Inspect(tile_file).meta
            for k in tile_meta.iterkeys():
                for k2, v2 in tile_meta[k].iteritems():
                    ref = std_meta[k][k2]
                    self.assertEqual(v2, ref)

            tile_ds.close()
        std_ds.close()
Exemple #25
0
def call(resource=[], variable=None, dimension_map=None, calc=None,  
  calc_grouping= None, conform_units_to=None, memory_limit=None,  prefix=None, 
  regrid_destination=None, regrid_options='bil', level_range=None,
  geom=None, output_format_options=False, search_radius_mult=2., 
  select_nearest=False, select_ugid=None, spatial_wrapping=None, t_calendar=None, time_region=None, time_range=None,
  dir_output=curdir, output_format='nc'):
  '''
  ocgis operation call

  :param resource:
  :param variable: variable in the input file to be picked
  :param dimension_map: dimension map in case of unconventional storage of data 
  :param calc: ocgis calc syntax for calculation partion 
  :param calc_grouping: time aggregate grouping 
  :param conform_units_to: 
  :param memory_limit: limit the amount of data to be loaded into the memory at once if None (default) free memory is detected by birdhouse
  :param level_range: subset of given levels
  :param prefix: string for the file base name 
  :param regrid_destination: file path with netCDF file with grid for output file

  :param geom: name of shapefile stored in birdhouse shape cabinet
  :param output_format_options: output options for netCDF e.g compression level()
  :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file)
  :param regrid_options: methods for regridding: 
                          'bil' = Bilinear interpolation 
                          'bic' = Bicubic interpolation 
                          'dis' = Distance-weighted average remapping
                          'nn' = nearest neighbour
                          'con' = First-order conservative remapping
                          'laf' = largest area fraction reamapping
  :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
  :param select_nearest: nearest neighbour selection for point geometries
  :param select_ugid: ugid for appropriate polygons 
  :param spatial_wrapping: how to handle coordinates in case of subsets, options: None (default), 'wrap', 'unwrap'
  :param time_region: select single month 
  :param time_range: sequence of two datetime.datetime objects to mark start and end point 
  :param dir_output (default= curdir):
  :param output_format:
  :return: output file path
  '''
  logger.info('Start ocgis module call function')
  from ocgis import OcgOperations, RequestDataset , env
  from ocgis.util.large_array import compute
  import uuid
  
  # prepare the environment 
  env.DIR_SHPCABINET = DIR_SHP
  env.OVERWRITE = True
  env.DIR_OUTPUT = dir_output
  
  if geom != None:
    spatial_reorder = True
    spatial_wrapping = 'wrap'
  else: 
    spatial_reorder = False
    spatial_wrapping = None
  
  
  if prefix == None:
    prefix = str(uuid.uuid1()) 
  env.PREFIX = prefix
  if output_format_options == False: 
    output_format_options = None
  elif output_format_options == True:
    output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC
                         'variable_kwargs': {'zlib': True, 'complevel': 9}}
  else:
    logger.info('output_format_options are set to %s ' % ( output_format_options ))
  
  if type(resource) != list: 
    resource = list([resource])
  # execute ocgis 
  logger.info('Execute ocgis module call function')
  
  #if time_range != None:
    #time_range = eval_timerange(resource, time_range)

  if has_Lambert_Conformal(resource) == True and not geom == None:
    logger.debug('input has Lambert_Conformal projection and can not subsetted with geom')
    output = None
  else:
    try:
      #if regrid_destination != None: 
        #rd_regrid = RequestDataset(uri=regrid_destination)
      #else:
        #rd_regrid = None
      rd = RequestDataset(resource, variable=variable, level_range=level_range,
        dimension_map=dimension_map, conform_units_to=conform_units_to, 
        time_region=time_region,t_calendar=t_calendar, time_range=time_range)
      ops = OcgOperations(dataset=rd,
        output_format_options=output_format_options,
        spatial_wrapping=spatial_wrapping,
        spatial_reorder=spatial_reorder,
        # regrid_destination=rd_regrid,
        # options=options,
        calc=calc,
        calc_grouping=calc_grouping,
        geom=geom,
        output_format=output_format,
        prefix=prefix,
        search_radius_mult=search_radius_mult,
        select_nearest=select_nearest,
        select_ugid=select_ugid, 
        add_auxiliary_files=False)
      logger.info('OcgOperations set')
      
    except Exception as e: 
      logger.debug('failed to setup OcgOperations')
      raise  
    try:
      from numpy import sqrt 
      from flyingpigeon.utils import FreeMemory
      
      if memory_limit == None: 
        f = FreeMemory()
        mem_kb = f.user_free 
        mem_mb = mem_kb / 1024.
        mem_limit = mem_mb / 2. # set limit to half of the free memory
      else:
        mem_limit = memory_limit

      if mem_limit >= 1024. * 4: 
        mem_limit = 1024. * 4
        # 475.0 MB for openDAP 
      
      data_kb = ops.get_base_request_size()['total']
      data_mb = data_kb / 1024.

      if variable == None: 
        variable = rd.variable
        logger.info('%s as variable dedected' % (variable))

      #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
      logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb , mem_limit ))
    except Exception as e: 
      logger.debug('failed to compare dataload with free memory %s ' % e)
      raise  


    ## check memory load
    #from os import stat
    
      #if memory_limit == None: 
        #f = FreeMemory()
        #mem_kb = f.user_free 
        #mem_mb = mem_kb / 1024.
        #mem_limit = mem_mb / 2. # set limit to half of the free memory
      #else:
        #mem_limit = memory_limit

      #if mem_limit >= 1024. * 4: 
        #mem_limit = 1024. * 4
        ## 475.0 MB for openDAP 
      
      ##if type(resource) == list : 
        ##data_kb =  stat(resource[0]).st_size * len(resource)
      ##else: 
        ##data_kb =  stat(resource).st_size
      #size = ops.get_base_request_size()['total']
      #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
      #data_mb = data_kb / 1024.

      #if variable == None: 
        #variable = rd.variable
        #logger.info('%s as variable dedected' % (variable))
      
      #logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb  , mem_limit ))
    
    if data_mb <= mem_limit :  # input is smaler than the half of free memory size
      try:
        logger.info('ocgis module call as ops.execute()')
        geom_file = ops.execute()
      except Exception as e: 
        logger.debug('failed to execute ocgis operation')
        raise  
    else:
      ##########################
      # calcultion of chunk size
      ##########################

      try:
        size = ops.get_base_request_size()
        nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0]
        element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
        element_in_mb = element_in_kb / 1024.
        tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size 
        
        logger.info('ocgis module call compute with chunks')
        print 'ocgis module call compute with chunks'
        if calc == None:
          calc = '%s=%s*1' % (variable, variable)
          logger.info('calc set to = %s ' %  calc)
          ops = OcgOperations(dataset=rd,
                    output_format_options=output_format_options,
                    spatial_wrapping=spatial_wrapping,
                    spatial_reorder=spatial_reorder,
                    # regrid_destination=rd_regrid,
                    # options=options,
                    calc=calc,
                    calc_grouping=calc_grouping,
                    geom=geom,
                    output_format=output_format,
                    prefix=prefix,
                    search_radius_mult=search_radius_mult,
                    select_nearest=select_nearest,
                    select_ugid=select_ugid, 
                    add_auxiliary_files=False)          
        geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True)
      except Exception as e: 
        logger.debug('failed to compute ocgis with chunks')
        raise
    logger.info('Succeeded with ocgis module call function')

    ############################################
    # remapping according to regrid informations
    ############################################
    if not regrid_destination == None:
      try:
        from tempfile import mkstemp
        from cdo import Cdo
        cdo = Cdo()

        output = '%s.nc' % uuid.uuid1()
        remap = 'remap%s' % regrid_options  
        call = [op for op in dir(cdo) if remap in op]
        cmd = "output = cdo.%s('%s',input='%s', output='%s')" % (str(call[0]), regrid_destination, geom_file, output) 
        exec cmd
      except Exception as e: 
        logger.debug('failed to remap')
        raise 
    else:
      output = geom_file
  return output
Exemple #26
0
def compute_sfwe_maurer():
    def maurer_pr():
        ret = {
            'uri': 'Maurer02new_OBS_pr_daily.1971-2000.nc',
            'variable': 'pr'
        }
        return (ret)

    def maurer_tas():
        ret = {
            'uri': 'Maurer02new_OBS_tas_daily.1971-2000.nc',
            'variable': 'tas'
        }
        return (ret)

    ocgis.env.DIR_DATA = '/usr/local/climate_data/'
    ocgis.env.DIR_OUTPUT = '/home/local/WX/ben.koziol/climate_data/QED-2013/sfwe/maurer02v2'
    ocgis.env.OVERWRITE = True

    calc = [{
        'func': 'sfwe',
        'name': 'sfwe',
        'kwds': {
            'tas': 'tas',
            'pr': 'pr'
        }
    }]
    time_range = None
    rds = []
    for var in [maurer_pr(), maurer_tas()]:
        var.update({'time_range': time_range})
        rds.append(var)
    rdc = RequestDatasetCollection(rds)
    sfwe = compute(rdc,
                   calc, ['month', 'year'],
                   175,
                   verbose=True,
                   prefix='sfwe')
    print(sfwe)
    #sfwe = '/home/local/WX/ben.koziol/climate_data/QED-2013/sfwe/maurer02v2/sfwe/sfwe.nc'

    calc = [{'func': 'sum', 'name': 'p'}]
    time_range = None
    rds = [maurer_pr()]
    rdc = RequestDatasetCollection(rds)
    pr = compute(rdc, calc, ['month', 'year'], 175, verbose=True, prefix='pr')
    print(pr)
    #pr = '/home/local/WX/ben.koziol/climate_data/QED-2013/sfwe/maurer02v2/pr/pr.nc'

    calc = [{
        'func': 'ratio_sfwe_p',
        'name': 'sfwe_p',
        'kwds': {
            'sfwe': 'sfwe',
            'p': 'p'
        }
    }]
    rds = [RequestDataset(sfwe, 'sfwe'), RequestDataset(pr, 'p')]
    rdc = RequestDatasetCollection(rds)
    sfwe_p = compute(rdc, calc, None, 175, verbose=True, prefix='sfwe_p')
    print(sfwe_p)
Exemple #27
0
def compute_sfwe_other():
    ocgis.env.DIR_DATA = '/data/ben.koziol/sfwe/data'
    ocgis.env.DIR_OUTPUT = '/home/local/WX/ben.koziol'
    ocgis.env.OVERWRITE = False

    tas = [[
        'bcca_gfdl_cm2_1.gregorian.20c3m.run1.tas.1971-2000.nc', 'tas', None,
        'bcca_gfdl'
    ],
           [
               'bcca_cccma_cgcm3_1.gregorian.20c3m.run1.tas.1971-2000.nc',
               'tas', None, 'bcca_cccma_cgcm3'
           ],
           [
               'arrm_cgcm3_t63.20c3m.tas.NAm.1971-2000.nc', 'tas', '365_day',
               'arrm_cgcm3'
           ],
           [
               'arrm_gfdl_2.1.20c3m.tas.NAm.1971-2000.nc', 'tas', '365_day',
               'arrm_gfdl'
           ]]

    pr = [[
        'bcca_gfdl_cm2_1.gregorian.20c3m.run1.pr.1971-2000.nc', 'pr', None,
        'bcca_gfdl'
    ],
          [
              'bcca_cccma_cgcm3_1.gregorian.20c3m.run1.pr.1971-2000.nc', 'pr',
              None, 'bcca_cccma_cgcm3'
          ],
          [
              'arrm_cgcm3_t63.20c3m.pr.NAm.1971-2000.nc', 'pr', '365_day',
              'arrm_cgcm3'
          ],
          [
              'arrm_gfdl_2.1.20c3m.pr.NAm.1971-2000.nc', 'pr', '365_day',
              'arrm_gfdl'
          ]]

    for t, p in zip(tas, pr):

        tas_rd = ocgis.RequestDataset(t[0], t[1], t_calendar=t[2])
        pr_rd = ocgis.RequestDataset(p[0], p[1], t_calendar=p[2])

        calc = [{
            'func': 'sfwe',
            'name': 'sfwe',
            'kwds': {
                'tas': 'tas',
                'pr': 'pr'
            }
        }]
        rdc = RequestDatasetCollection([tas_rd, pr_rd])
        sfwe = compute(rdc,
                       calc, ['month', 'year'],
                       175,
                       verbose=True,
                       prefix='sfwe_' + t[3])
        print(sfwe)
        #sfwe = '/home/local/WX/ben.koziol/climate_data/QED-2013/sfwe/maurer02v2/sfwe/sfwe.nc'

        calc = [{'func': 'sum', 'name': 'p'}]
        rds = [pr_rd]
        rdc = RequestDatasetCollection(rds)
        pr = compute(rdc,
                     calc, ['month', 'year'],
                     175,
                     verbose=True,
                     prefix='pr_' + t[3])
        print(pr)
        #pr = '/home/local/WX/ben.koziol/climate_data/QED-2013/sfwe/maurer02v2/pr/pr.nc'

        calc = [{
            'func': 'ratio_sfwe_p',
            'name': 'sfwe_p',
            'kwds': {
                'sfwe': 'sfwe',
                'p': 'p'
            }
        }]
        rds = [
            RequestDataset(sfwe, 'sfwe', t_calendar=t[2]),
            RequestDataset(pr, 'p', t_calendar=t[2])
        ]
        rdc = RequestDatasetCollection(rds)
        sfwe_p = compute(rdc,
                         calc,
                         None,
                         175,
                         verbose=True,
                         prefix='sfwe_p_' + t[3])
        print(sfwe_p)
Exemple #28
0
def call(resource=[], variable=None, dimension_map=None, calc=None,  
  calc_grouping= None, conform_units_to=None, memory_limit=None,  prefix=None, 
  regrid_destination=None, regrid_options='bil', level_range=None,
  geom=None, output_format_options=False, search_radius_mult=2., 
  select_nearest=False, select_ugid=None, spatial_wrapping=None, time_region=None, time_range=None,
  dir_output=curdir, output_format='nc'):
  '''
  ocgis operation call

  :param resource:
  :param variable: variable in the input file to be picked
  :param dimension_map: dimension map in case of unconventional starage of data 
  :param calc: ocgis calc syntax for calcultion opartion 
  :param calc_grouping: time aggregate grouping 
  :param conform_units_to: 
  :param memory_limit: limit the amout of data to be loaded into the memory at once if None(default) free memory is detected by birdhouse
  :param level_range: subset of given levels
  :param prefix: string for the file base name 
  :param regrid_destination: file path with netCDF file with grid for outout file

  :param geom: name of shapefile stored in birdhouse shape cabinet
  :param output_format_options: output options for netCDF e.g compression level()
  :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file)
  :param regrid_options: methods for regridding: 
                          'bil' = Biliniar interpolation 
                          'bic' = Bicubic interpolation 
                          'dis' = Distance weighted average remapping
                          'nn' = nearest neighbour
                          'con' = First order concerative remapping
                          'laf' = largest area fraction reamapping
  :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
  :param select_nearest: neares neighbour selection for point geometries
  :param select_ugid: ugid for appropriate poligons 
  :param spatial_wrapping: how to handle coordinates in case of subsets, options: None(default), 'wrap', 'unwrap'
  :param time_region: select single month 
  :param time_range: sequence of two datetime.datetime objects to mark start and end point 
  :param dir_output (default= curdir):
  :param output_format:
  :return: output file path
  '''
  logger.info('Start ocgis module call function')
  from ocgis import OcgOperations, RequestDataset , env
  from ocgis.util.large_array import compute
  import uuid
  
  # prepare the environment 
  env.DIR_SHPCABINET = DIR_SHP
  env.OVERWRITE = True
  env.DIR_OUTPUT = dir_output
  
  if geom != None:
    spatial_reorder = True
    spatial_wrapping = 'wrap'
  else: 
    spatial_reorder = False
    spatial_wrapping = None
  
  
  if prefix == None:
    prefix = str(uuid.uuid1()) 
  env.PREFIX = prefix
  if output_format_options == False: 
    output_format_options = None
  elif output_format_options == True:
    output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC
                         'variable_kwargs': {'zlib': True, 'complevel': 9}}
  else:
    logger.info('output_format_options are set to %s ' % ( output_format_options ))
  
  if type(resource) != list: 
    resource = list([resource])
  # execute ocgis 
  logger.info('Execute ocgis module call function')
  
  if has_Lambert_Conformal(resource) == True and not geom == None:
    logger.debug('input has Lambert_Conformal projection and can not subsetted with geom')
    output = None
  else:
    try:
      rd = RequestDataset(resource, variable=variable, level_range=level_range,
        dimension_map=dimension_map, conform_units_to=conform_units_to, 
        time_region=time_region, time_range=time_range)
      ops = OcgOperations(dataset=rd,
        output_format_options=output_format_options,
        spatial_wrapping=spatial_wrapping,
        spatial_reorder=spatial_reorder,
        # options=options,
        calc=calc,
        calc_grouping=calc_grouping,
        geom=geom,
        output_format=output_format,
        prefix=prefix,
        search_radius_mult=search_radius_mult,
        select_nearest=select_nearest,
        select_ugid=select_ugid, 
        add_auxiliary_files=False)
      logger.info('OcgOperations set')
      
    except Exception as e: 
      logger.debug('failed to setup OcgOperations')
      raise  
    # check memory load
    from numpy import sqrt 
    from flyingpigeon.utils import FreeMemory
    
    if memory_limit == None: 
      f = FreeMemory()
      mem_kb = f.user_free 
      mem_mb = mem_kb / 1024.
      mem_limit = mem_mb / 2. # set limit to half of the free memory
    else:
      mem_limit = memory_limit

    if mem_limit >= 1024. * 4: 
      mem_limit = 1024. * 4
      # 475.0 MB for openDAP 
    
    data_kb = ops.get_base_request_size()['total']
    data_mb = data_kb / 1024.

    if variable == None: 
      variable = rd.variable
      logger.info('%s as variable dedected' % (variable))

    #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
    logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb  , mem_limit ))
    
    if data_mb <= mem_limit :  # input is smaler than the half of free memory size
      try:
        logger.info('ocgis module call as ops.execute()')
        geom_file = ops.execute()
      except Exception as e: 
        logger.debug('failed to execute ocgis operation')
        raise  
    else:
      ##########################
      # calcultion of chunk size
      ##########################

      size = ops.get_base_request_size()
      nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0]
      element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
      element_in_mb = element_in_kb / 1024.
      tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size 
      
      try:
        logger.info('ocgis module call compute with chunks')
        print 'ocgis module call compute with chunks'
        if calc == None:
          calc = '%s=%s*1' % (variable, variable)
          logger.info('calc set to = %s ' %  calc)
          ops = OcgOperations(dataset=rd,
            output_format_options=output_format_options,
            spatial_wrapping=spatial_wrapping,
            # options=options,
            calc=calc,
            calc_grouping=calc_grouping,
            geom=geom,
            output_format=output_format,
            prefix=prefix,
            search_radius_mult=search_radius_mult,
            select_nearest=select_nearest,
            select_ugid=select_ugid, 
            add_auxiliary_files=False)
        geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True)
      except Exception as e: 
        logger.debug('failed to compute ocgis with chunks')
        raise
    logger.info('Succeeded with ocgis module call function')

    ############################################
    # remapping according to regrid informations
    ############################################
    if not regrid_destination == None:
      try:
        from tempfile import mkstemp
        from cdo import Cdo
        cdo = Cdo()

        output = '%s.nc' % uuid.uuid1()
        remap = 'remap%s' % regrid_options  
        call = [op for op in dir(cdo) if remap in op]
        cmd = "output = cdo.%s('%s',input='%s', output='%s')" % (str(call[0]), regrid_destination, geom_file, output) 
        exec cmd
      except Exception as e: 
        logger.debug('failed to remap')
        raise 
    else:
      output = geom_file
  return output
Exemple #29
0
def call(resource=[], variable=None, dimension_map=None, calc=None,  
  calc_grouping= None, conform_units_to=None, memory_limit=None,  prefix=None, 
  geom=None, output_format_options=False, search_radius_mult=2., select_nearest=False, select_ugid=None, time_region=None, time_range=None,
  dir_output=None, output_format='nc'):
  '''
  ocgis operation call

  :param resource:
  :param variable: variable in the input file to be picked
  :param dimension_map: dimension map in case of unconventional starage of data 
  :param calc: ocgis calc syntax for calcultion opartion 
  :param calc_grouping: time aggregate grouping 
  :param conform_units_to: 
  :param memory_limit: limit the amout of data to be loaded into the memory at once if None(default) free memory is detected by birdhouse
  :param prefix:
  :param geom: name of shapefile stored in birdhouse shape cabinet
  :param output_format_options: output options for netCDF e.g compression level()
  :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
  :param select_nearest: neares neighbour selection for point geometries
  :param select_ugid: ugid for appropriate poligons 
  :param time_region:
  :param time_range: sequence of two datetime.datetime objects to mark start and end point 
  :param dir_output:
  :param output_format:
  :return: output file path
  '''
  print 'start ocgis module'
  logger.info('Start ocgis module call function')
  from ocgis import OcgOperations, RequestDataset , env
  from ocgis.util.large_array import compute
  
  # prepare the environment 
  env.DIR_SHPCABINET = DIR_SHP
  env.OVERWRITE = True
  env.DIR_OUTPUT = dir_output
  env.PREFIX = prefix

  if output_format_options == False: 
    output_format_options = None
  elif output_format_options == True:
    output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC
                         'variable_kwargs': {'zlib': True, 'complevel': 9}}
  else:
    logger.info('output_format_options are set to %s ' % ( output_format_options ))
  
  if type(resource) != list: 
    resource = list([resource])

  # execute ocgis 
  logger.info('Execute ocgis module call function')
  
  try: 
    rd = RequestDataset(resource, variable=variable, 
      dimension_map=dimension_map, conform_units_to=conform_units_to, 
      time_region=time_region)
    
    ops = OcgOperations(dataset=rd,
        output_format_options=output_format_options,
        #options=options,
        calc=calc, 
        
        calc_grouping=calc_grouping,
        geom=geom,
        output_format=output_format, 
        search_radius_mult=search_radius_mult,
        select_nearest=select_nearest,
        select_ugid=select_ugid, 
        
        add_auxiliary_files=False)
    logger.info('OcgOperations set')
  except Exception as e: 
    logger.debug('failed to setup OcgOperations')
    raise  
  
  # check memory load
  from numpy import sqrt 
  from flyingpigeon.utils import FreeMemory
  
  if memory_limit == None: 
    f = FreeMemory()
    mem_kb = f.user_free 
    mem_mb = mem_kb / 1024.
    mem_limit = mem_mb / 2. # set limit to half of the free memory
  else:
    mem_limit = memory_limit

  if mem_limit >= 1024. * 4: 
    mem_limit = 1024. * 4
    # 475.0 MB for openDAP 
  
  data_kb = ops.get_base_request_size()['total']
  data_mb = data_kb / 1024.

  if variable == None: 
    variable = rd.variable
    logger.info('%s as variable dedected' % (variable))

  #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
  logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb  , mem_limit ))
  if data_mb <= mem_limit :  # input is smaler than the half of free memory size
    logger.info('ocgis module call as ops.execute()')
    try: 
      geom_file = ops.execute()
    except Exception as e: 
      logger.debug('failed to execute ocgis operation')
      raise  
  else:
    size = ops.get_base_request_size()
    nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0]
    element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
    element_in_mb = element_in_kb / 1024.

    tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size 
    # calcultion of chunk size
    try:
      logger.info('tile_dim = %s; calc = %s ' % (tile_dim, calc))
      if calc == None:
        calc = '%s=%s*1' % (variable, variable)
        logger.info('calc set to = %s ' %  calc)
        ops = OcgOperations(dataset=rd,
          output_format_options=output_format_options,
          calc=calc, 
          output_format=output_format, # 'nc' is necessary for chunked execution  
          select_ugid=select_ugid, 
          geom=geom,
          add_auxiliary_files=False)
      geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True)
    except Exception as e: 
      logger.debug('failed to compute ocgis operation')
      raise  
  
  logger.info('Succeeded with ocgis module call function')
  return geom_file
Exemple #30
0
def call(resource=[], variable=None, dimension_map=None, agg_selection=True,
         calc=None, calc_grouping=None, conform_units_to=None, crs=None,
         memory_limit=None, prefix=None,
         regrid_destination=None, regrid_options='bil', level_range=None,  # cdover='python',
         geom=None, output_format_options=None, search_radius_mult=2.,
         select_nearest=False, select_ugid=None, spatial_wrapping=None,
         t_calendar=None, time_region=None,
         time_range=None, dir_output=None, output_format='nc'):
    """
    Call OCGIS operation.

    :param resource: Input netCDF file.
    :param variable: variable in the input file to be picked
    :param dimension_map: dimension map in case of unconventional storage of data
    :param agg_selection: For aggregation of in case of mulitple polygons geoms
    :param calc: ocgis calc syntax for calculation partion
    :param calc_grouping: time aggregate grouping
    :param cdover: OUTDATED use py-cdo ('python', by default) or cdo from the system ('system')
    :param conform_units_to:
    :param crs: coordinate reference system
    :param memory_limit: limit the amount of data to be loaded into the memory at once \
        if None (default) free memory is detected by birdhouse
    :param level_range: subset of given levels
    :param prefix: string for the file base name
    :param regrid_destination: file path with netCDF file with grid for output file
    :param geom: name of shapefile stored in birdhouse shape cabinet
    :param output_format_options: output options for netCDF e.g compression level()
    :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file)
    :param regrid_options: methods for regridding:
                          'bil' = Bilinear interpolation
                          'bic' = Bicubic interpolation
                          'dis' = Distance-weighted average remapping
                          'nn' = nearest neighbour
                          'con' = First-order conservative remapping
                          'laf' = largest area fraction reamapping
    :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
    :param select_nearest: nearest neighbour selection for point geometries
    :param select_ugid: ugid for appropriate polygons
    :param spatial_wrapping: how to handle coordinates in case of subsets, options: None (default), 'wrap', 'unwrap'
    :param time_region: select single month
    :param time_range: sequence of two datetime.datetime objects to mark start and end point
    :param dir_output: path to folder to store ouput files  (default= curdir)
    :param output_format: format in which results will be returned.
    :return: output file path
    """
    LOGGER.info('Start ocgis module call function')
    from ocgis import OcgOperations, RequestDataset, env, DimensionMap, crs
    from ocgis.util.large_array import compute
    from datetime import datetime as dt
    from datetime import date as dd
    from datetime import time as dt_time
    import uuid

    # prepare the environment
    env.OVERWRITE = True

    if dir_output is None:
        dir_output = abspath(curdir)

    # check time_range format:

    if time_range is not None:
        try:
            LOGGER.debug('time_range type= %s , %s ' % (type(time_range[0]), type(time_range[1])))
            LOGGER.debug('time_range= %s , %s ' % (time_range[0], time_range[1]))
            # if type(time_range[0] is 'datetime.date'):
            if (isinstance(time_range[0], dd) and not isinstance(time_range[0], dt)):
                time_range = [dt.combine(time_range[0], dt.min.time()),
                              dt.combine(time_range[1], dt.min.time())]
                # time_range = [dt.combine(time_range[0], dt_time(12,0)),
                #               dt.combine(time_range[1], dt_time(12,0))]
            LOGGER.debug('time_range changed to type= %s , %s ' % (type(time_range[0]), type(time_range[1])))
            LOGGER.debug('time_range changed to= %s , %s ' % (time_range[0], time_range[1]))
        except Exception as ex:
            LOGGER.exception('failed to convert data to datetime {}'.format(ex))

    if spatial_wrapping == 'wrap':
        spatial_reorder = True
    else:
        spatial_reorder = False
    LOGGER.debug('spatial_reorder: %s and spatial_wrapping: %s ' % (spatial_reorder, spatial_wrapping))

    if prefix is None:
        prefix = str(uuid.uuid1())
        env.PREFIX = prefix
    #
    # if output_format_options is False:
    #     output_format_options = None
    # elif output_format_options is True:
    #     output_format_options = {'data_model': 'NETCDF4',  # NETCDF4_CLASSIC
    #                              'variable_kwargs': {'zlib': True, 'complevel': 9}}
    # else:
    if output_format_options is not None:
        LOGGER.info('output_format_options are set to %s ' % (output_format_options))

    if type(resource) != list:
        resource = list([resource])
    # execute ocgis
    LOGGER.info('Execute ocgis module call function')

    try:
        LOGGER.debug('call module dir_output = %s ' % abspath(dir_output))
        rd = RequestDataset(resource,
                            variable=variable,
                            level_range=level_range,
                            dimension_map=dimension_map,
                            conform_units_to=conform_units_to,
                            time_region=time_region,
                            t_calendar=t_calendar,
                            time_range=time_range)

        from ocgis.constants import DimensionMapKey
        rd.dimension_map.set_bounds(DimensionMapKey.TIME, None)

        ops = OcgOperations(dataset=rd,
                            output_format_options=output_format_options,
                            dir_output=dir_output,
                            spatial_wrapping=spatial_wrapping,
                            spatial_reorder=spatial_reorder,
                            # regrid_destination=rd_regrid,
                            # options=options,
                            calc=calc,
                            calc_grouping=calc_grouping,
                            geom=geom,
                            agg_selection=agg_selection,
                            output_format=output_format,
                            prefix=prefix,
                            search_radius_mult=search_radius_mult,
                            select_nearest=select_nearest,
                            select_ugid=select_ugid,
                            add_auxiliary_files=False)
        LOGGER.info('OcgOperations set')
    except Exception as ex:
        LOGGER.exception('failed to setup OcgOperations: {}'.format(ex))
        return None

    # TODO include comaprison dataload to available memory
    dataload = 1
    available_memory = 2

    try:
        if dataload < available_memory:  # compare dataload to free_memory
            LOGGER.info('ocgis module call as ops.execute()')
            geom_file = ops.execute()
        else:
            # LOGGER.info('ocgis module call as compute(ops)')
            # TODO: estimate right tile_dimensionS
            tile_dimension = 10  # default
            LOGGER.info('Not enough memory for data load, ocgis module call compute in chunks')
            geom_file = compute(ops, tile_dimension=tile_dimension, verbose=True)

    except Exception as ex:
        LOGGER.exception('failed to execute ocgis operation : {}'.format(ex))
        return None
    return geom_file