Example #1
0
    def test_system_through_operations(self):
        ops = OcgOperations(dataset=self.field_for_test,
                            calc=[{
                                'func': 'mff',
                                'name': 'my_mff'
                            }])
        ret = ops.execute()

        actual_field = ret.get_element()
        actual_variable = actual_field['my_mff']
        self.assertEqual(actual_variable.attrs['long_name'],
                         MockFieldFunction.long_name)
        self.assertEqual(actual_variable.get_value().tolist(),
                         self.desired_value)
        self.assertNotIn('data', list(actual_field.keys()))

        # Test writing output to netCDF.
        ops = OcgOperations(dataset=self.field_for_test,
                            calc=[{
                                'func': 'mff',
                                'name': 'my_mff'
                            }],
                            output_format='nc')
        ret = ops.execute()
        actual_field = RequestDataset(ret).get()
        self.assertEqual(actual_field['my_mff'].get_value().tolist(),
                         self.desired_value)
Example #2
0
    def test_sql_where_through_operations(self):
        """Test using a SQL where statement to select some geometries."""

        states = ("Wisconsin", "Vermont")
        s = 'STATE_NAME in {0}'.format(states)
        rd = self.test_data.get_rd('cancm4_tas')
        ops = OcgOperations(dataset=rd, geom_select_sql_where=s, geom='state_boundaries', snippet=True)
        ret = ops.execute()
        self.assertEqual(len(ret), 2)
        self.assertEqual(ret.keys(), [8, 10])
        for v in ret.properties.itervalues():
            self.assertIn(v['STATE_NAME'], states)

        # make sure the sql select has preference over uid
        ops = OcgOperations(dataset=rd, geom_select_sql_where=s, geom='state_boundaries', snippet=True,
                            geom_select_uid=[500, 600, 700])
        ret = ops.execute()
        self.assertEqual(len(ret), 2)
        for v in ret.properties.itervalues():
            self.assertIn(v['STATE_NAME'], states)

        # test possible interaction with geom_uid
        path = self.get_shapefile_path_with_no_ugid()
        ops = OcgOperations(dataset=rd, geom=path, geom_select_sql_where=s)
        ret = ops.execute()
        self.assertEqual(ret.keys(), [1, 2])

        ops = OcgOperations(dataset=rd, geom=path, geom_select_sql_where=s, geom_uid='ID')
        ret = ops.execute()
        self.assertEqual(ret.keys(), [13, 15])
Example #3
0
def test_time_region(self):
    uri = 'C:/testclip/WSI_OCGIS_abdu.1979.nc'
    shp = 'C:/testclip/state.shp'
    rd = RequestDataset(uri=uri)
    calc = [{'func': 'sum', 'name': 'sum'}]
    ops_one = OcgOperations(dataset=rd, output_format='numpy', time_region={'month': [1]},
                                 spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True,
                                 calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"')
    ret_one_month = ops_one.execute()
    ops_two = OcgOperations(dataset=rd, output_format='numpy', time_region={'month': [2]},
                                 spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True,
                                 calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"')
    ret_two_month = ops_two.execute()
    ops_original = OcgOperations(dataset=rd, output_format='numpy', time_region={'month': [1, 2]},
                                 spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True,
                                 calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"')
    ret_original = ops_original.execute()
    desired = ret_original[1]['forcalc'].variables['sum'].value  # 11.580645161290322
    ops_no_time_region = OcgOperations(dataset=rd, output_format='numpy',
                                 spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True,
                                 calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"')
    ret_no_time_region = ops_no_time_region.execute()
    field = ret_no_time_region[1]['forcalc']
    indices = []
    for idx in range(field.temporal.shape[0]):
        the_time = field.temporal.value_datetime[idx]
        if the_time.month in [1, 2]:
            indices.append(idx)
    var_sub = field.variables['sum'][:, indices, :, :, :]
    actual = var_sub.value
    self.assertNumpyAll(actual, desired)
Example #4
0
    def test_system_through_operations(self):
        calc = [{'func': MockMultiParamFunction.key, 'name': 'my_mvp', 'kwds': self.parms_for_test}]
        ops = OcgOperations(dataset=self.fields_for_ops_test, calc=calc)
        ret = ops.execute()

        actual_variable = ret.get_element(variable_name='my_mvp')
        self.assertEqual(actual_variable.get_value().tolist(), self.desired_value)

        ops = OcgOperations(dataset=self.fields_for_ops_test, calc=calc, output_format='nc')
        ret = ops.execute()
        actual = RequestDataset(ret).get()['my_mvp']
        self.assertEqual(actual.get_value().tolist(), self.desired_value)
Example #5
0
    def test_disjoint_polygons(self):
        """Test mesh regridding with the source destination containing disjoint polygons."""

        ESMF.Manager(debug=True)
        self.set_debug(True)

        path_shp = os.path.join(self.path_bin, 'three_polygons', 'three_polygons.shp')
        path_out_nc = self.get_temporary_file_path('ugrid.nc')
        path_source_nc = self.get_temporary_file_path('source.nc')
        mesh_name = 'mesh'

        self.log.debug('creating source netcdf')
        row = np.linspace(-1, 1, 10)
        col = np.linspace(-1, 1, 10)
        self.create_source_netcdf_data(path_source_nc, row=row, col=col)
        ops = OcgOperations(dataset={'uri': path_source_nc}, output_format='shp', snippet=True, prefix='source_shp',
                            dir_output=self.path_current_tmp)
        ops.execute()

        self.log.debug('creating ugrid file: {}'.format(path_out_nc))
        gm = GeometryManager('SPECIAL', path=path_shp)
        geoms = [r['geom'] for r in gm.iter_records()]
        mp = MultiPolygon(geoms)
        # mp = box(-0.25, -0.25, 0.25, 0.25)

        records = [{'geom': mp, 'properties': {'UGID': 123}}]
        gm = GeometryManager('UGID', records=records, allow_multipart=True)
        fm = get_flexible_mesh(gm, mesh_name, False, False)
        fm.save_as_netcdf(path_out_nc, kwargs_dataset={'format': 'NETCDF3_CLASSIC'})

        self.log.debug('getting source field')
        srcgrid = ESMF.Grid(filename=path_source_nc, filetype=ESMF.FileFormat.GRIDSPEC,
                            coord_names=['longitude', 'latitude'], add_corner_stagger=True)
        srcfield = get_field_src(srcgrid, path_source_nc, 'pr')

        self.log.debug('getting destination grid')
        dstgrid = ESMF.Mesh(filename=path_out_nc, filetype=ESMF.FileFormat.UGRID, meshname=mesh_name)
        self.log.debug('getting destination field')
        dstfield = ESMF.Field(dstgrid, "dstfield", meshloc=ESMF.MeshLoc.ELEMENT, ndbounds=[srcfield.data.shape[0]])

        self.log.debug('creating regrid object')
        regrid = ESMF.Regrid(srcfield, dstfield, regrid_method=ESMF.RegridMethod.CONSERVE,
                             unmapped_action=ESMF.UnmappedAction.ERROR)
        # "zero_region" only weighted data will be touched.
        self.log.debug('executing regrid')
        dstfield = regrid(srcfield, dstfield, zero_region=ESMF.Region.SELECT)

        self.assertEqual(dstfield.data.shape, (366, 1))

        print dstfield.data

        self.log.debug('success')
Example #6
0
    def test(self):
        import logbook

        log = logbook.Logger(name='combos', level=logbook.INFO)

        for key, dataset in self.iter_dataset():

            # if key != 'qed_2013_TNn_annual_min': continue

            # these datasets have only one time element
            if key in ('qed_2013_TNn_annual_min',
                       'qed_2013_TasMin_seasonal_max_of_seasonal_means',
                       'qed_2013_climatology_Tas_annual_max_of_annual_means',
                       'qed_2013_maurer02v2_median_txxmmedm_january_1971-2000',
                       'qed_2013_maurer02v2_median_txxmmedm_february_1971-2000',
                       'qed_2013_maurer02v2_median_txxmmedm_march_1971-2000',
                       'snippet_maurer_dtr',
                       'snippet_seasonalbias'):
                slc = None
            else:
                slc = [None, [10, 20], None, None, None]

            # this has different data types on the bounds for the coordinate variables. they currently get casted by the
            # software.
            if key == 'maurer_bcca_1991':
                check_types = False
            else:
                check_types = True

            log.debug('processing: {0} ({1})'.format(key, dataset.__class__.__name__))
            ops = OcgOperations(dataset=dataset, output_format='nc', prefix='nc1', slice=slc)
            try:
                log.debug('initial write...')
                ret1 = ops.execute()
            except ValueError:
                # realization dimensions may not be written to netCDF yet
                if key == 'cmip3_extraction':
                    continue
                else:
                    raise
            else:
                try:
                    ops2 = OcgOperations(dataset={'uri': ret1}, output_format='nc', prefix='nc2')
                    log.debug('second write...')
                    ret2 = ops2.execute()
                    log.debug('comparing...')
                    self.assertNcEqual(ret1, ret2, ignore_attributes={'global': ['history']}, check_types=check_types)
                finally:
                    for path in [ret1, ret2]:
                        folder = os.path.split(path)[0]
                        shutil.rmtree(folder)
        log.debug('success')
Example #7
0
    def test_execute_directory(self):
        """Test that the output directory is removed appropriately following an operations failure."""

        kwds = dict(add_auxiliary_files=[True, False])
        rd = self.test_data.get_rd('cancm4_tas')

        # this geometry is outside the domain and will result in an exception
        geom = [1000, 1000, 1100, 1100]

        for k in itr_products_keywords(kwds, as_namedtuple=True):
            ops = OcgOperations(dataset=rd, output_format='csv', add_auxiliary_files=k.add_auxiliary_files, geom=geom)
            try:
                ops.execute()
            except ExtentError:
                contents = os.listdir(self.current_dir_output)
                self.assertEqual(len(contents), 0)
Example #8
0
    def test1d(self):
        p1 = self.write_field_data('v1', ncol=1, nrow=1)
        p3 = self.write_field_data('v1', dir='b')

        ref_range = [dt.datetime(2000, 3, 1), dt.datetime(2000, 3, 31)]
        reference = ocgis.RequestDataset(p1, time_range=ref_range).get()

        cand_range = [dt.datetime(2000, 8, 1), dt.datetime(2000, 8, 31)]
        candidate = ocgis.RequestDataset(p3, time_range=cand_range)

        calc = [{
            'func': 'dissimilarity',
            'name': 'output_1d',
            'kwds': {
                'target': reference,
                'candidate': ('v1', )
            }
        }]

        ops = OcgOperations(dataset=candidate, calc=calc)
        ret = ops.execute()
        actual_field = ret.get_element()
        actual_variables = get_variable_names(actual_field.data_variables)
        self.assertEqual(actual_variables[0], ('dissimilarity'))
        dist = actual_field['dissimilarity']
        self.assertEqual(dist.shape, (1, 1, 2, 2))
Example #9
0
    def test(self):
        path1 = self.write_field_data('data1')
        path2 = self.write_field_data('data2')
        path3 = self.write_field_data('basis_var')

        time_range = [datetime(2000, 3, 1), datetime(2000, 3, 31)]
        rds = [RequestDataset(p, time_range=time_range) for p in [path1, path2]]
        mrd = MultiRequestDataset(rds)

        basis = RequestDataset(path3, time_range=[datetime(2000, 8, 1), datetime(2000, 8, 31)])
        basis_field = basis.get()

        calc = [{'func': 'mfpf',
                 'name': 'output_mfpf',
                 'kwds': {'reference': ('data1', 'data2'),
                          'basis': basis_field}}]
        ops = OcgOperations(dataset=mrd, calc=calc)
        ret = ops.execute()
        actual_field = ret.get_element()
        actual_variables = get_variable_names(actual_field.data_variables)
        self.assertEqual(actual_variables, ('diff_data1_basis_var', 'diff_data2_basis_var'))

        sums = [v.get_value().sum() for v in actual_field.data_variables]
        for s in sums:
            self.assertAlmostEqual(s, 7.8071042497325145)
Example #10
0
def run_op(resource, calc, options):
    """Create an OCGIS operation, launch it and return the results."""
    from os.path import abspath, curdir
    from ocgis import OcgOperations, RequestDataset, env
    import uuid

    LOGGER.info('Start ocgis module call function')

    # Prepare the environment
    env.OVERWRITE = True
    dir_output = abspath(curdir)

    prefix = str(uuid.uuid1())
    env.PREFIX = prefix

    rd = [
        RequestDataset(val, variable=key if key != 'resource' else None)
        for key, val in resource.items()
    ]

    ops = OcgOperations(dataset=rd,
                        calc=calc,
                        calc_grouping=options['calc_grouping'],
                        dir_output=dir_output,
                        prefix=prefix,
                        add_auxiliary_files=False,
                        output_format='nc')

    return ops.execute()
Example #11
0
 def __iter__(self):
     its = [p().__iter__() for p in self.get_parameters()]
     for ii,values in enumerate(itertools.product(*its)):
         if self.target_combo is not None:
             if self.target_combo > ii:
                 continue
         kwds = {}
         for val in values: kwds.update(val)
         if not self.ops_only:
             kwds.update({'dir_output':tempfile.mkdtemp()})
         try:
             try:
                 ops = OcgOperations(**kwds)
                 try:
                     self.check_blocked(ops)
                 except BlockedCombination:
                     continue
                 if self.verbose: print(ii)
                 if self.ops_only:
                     yld = (ii,ops)
                 else:
                     ret = ops.execute()
                     yld = (ii,ops,ret)
                 yield(yld)
             except Exception as e:
                 tb = traceback.format_exc()
                 try:
                     self.check_exception(ii,kwds,e,tb)
                 except:
                     raise
         finally:
             if not self.ops_only and self.remove_output:
                 shutil.rmtree(kwds['dir_output'])
Example #12
0
    def test_subset_with_shapefile_no_ugid(self):
        """Test a subset operation using a shapefile without a UGID attribute."""

        output_format = [constants.OUTPUT_FORMAT_NUMPY, constants.OUTPUT_FORMAT_CSV_SHAPEFILE]

        geom = self.get_shapefile_path_with_no_ugid()
        geom_select_uid = [8, 11]
        geom_uid = 'ID'
        rd = self.test_data.get_rd('cancm4_tas')

        for of in output_format:
            ops = OcgOperations(dataset=rd, geom=geom, geom_select_uid=geom_select_uid, geom_uid=geom_uid, snippet=True,
                                output_format=of)
            self.assertEqual(len(ops.geom), 2)
            ret = ops.execute()
            if of == constants.OUTPUT_FORMAT_NUMPY:
                for element in geom_select_uid:
                    self.assertIn(element, ret)
                self.assertEqual(ret.properties[8].dtype.names, ('STATE_FIPS', 'ID', 'STATE_NAME', 'STATE_ABBR'))
            else:
                with open(ret) as f:
                    reader = DictReader(f)
                    row = reader.next()
                    self.assertIn(geom_uid, row.keys())
                    self.assertNotIn(env.DEFAULT_GEOM_UID, row.keys())

                shp_path = os.path.split(ret)[0]
                shp_path = os.path.join(shp_path, 'shp', '{0}_gid.shp'.format(ops.prefix))
                with fiona.open(shp_path) as source:
                    record = source.next()
                    self.assertIn(geom_uid, record['properties'])
                    self.assertNotIn(env.DEFAULT_GEOM_UID, record['properties'])
Example #13
0
 def __iter__(self):
     its = [p().__iter__() for p in self.get_parameters()]
     for ii, values in enumerate(itertools.product(*its)):
         if self.target_combo is not None:
             if self.target_combo > ii:
                 continue
         kwds = {}
         for val in values:
             kwds.update(val)
         if not self.ops_only:
             kwds.update({'dir_output': tempfile.mkdtemp()})
         try:
             try:
                 ops = OcgOperations(**kwds)
                 try:
                     self.check_blocked(ops)
                 except BlockedCombination:
                     continue
                 if self.verbose: print(ii)
                 if self.ops_only:
                     yld = (ii, ops)
                 else:
                     ret = ops.execute()
                     yld = (ii, ops, ret)
                 yield (yld)
             except Exception as e:
                 tb = traceback.format_exc()
                 try:
                     self.check_exception(ii, kwds, e, tb)
                 except:
                     raise
         finally:
             if not self.ops_only and self.remove_output:
                 shutil.rmtree(kwds['dir_output'])
 def test_esmf(self):
     rd1 = RequestDataset(**self.get_dataset())
     rd2 = deepcopy(rd1)
     ops = OcgOperations(dataset=rd1, regrid_destination=rd2, output_format='nc')
     ret = ops.execute()
     ignore_attributes = {'time_bnds': ['units', 'calendar'], 'global': ['history'], 'foo': ['grid_mapping']}
     ignore_variables = ['latitude_longitude']
     self.assertNcEqual(ret, rd1.uri, ignore_attributes=ignore_attributes, ignore_variables=ignore_variables)
Example #15
0
    def test_system_through_operations(self):
        mrd = self.get_multirequestdataset()
        ops = OcgOperations(dataset=mrd)
        ret = ops.execute()
        field = ret.get_element()
        actual = get_variable_names(field.data_variables)
        self.assertEqual(actual, self.f_variable_names)

        mrd = self.get_multirequestdataset()
        ops = OcgOperations(dataset=mrd, output_format='nc')
        ret = ops.execute()
        actual_field = RequestDataset(ret).get()
        actual = get_variable_names(actual_field.data_variables)
        self.assertEqual(actual, self.f_variable_names)

        actual_diff = actual_field.data_variables[1].get_value() - actual_field.data_variables[0].get_value()
        self.assertAlmostEqual(actual_diff.mean(), 1.0)
Example #16
0
    def test_system_through_operations(self):
        mrd = self.get_multirequestdataset()
        ops = OcgOperations(dataset=mrd)
        ret = ops.execute()
        field = ret.get_element()
        actual = get_variable_names(field.data_variables)
        self.assertEqual(actual, self.f_variable_names)

        mrd = self.get_multirequestdataset()
        ops = OcgOperations(dataset=mrd, output_format='nc')
        ret = ops.execute()
        actual_field = RequestDataset(ret).get()
        actual = get_variable_names(actual_field.data_variables)
        self.assertEqual(actual, self.f_variable_names)

        actual_diff = actual_field.data_variables[1].get_value(
        ) - actual_field.data_variables[0].get_value()
        self.assertAlmostEqual(actual_diff.mean(), 1.0)
Example #17
0
    def test_esmf(self):
        rd1 = RequestDataset(**self.get_dataset())
        rd2 = deepcopy(rd1)
        ops = OcgOperations(dataset=rd1, regrid_destination=rd2, output_format='nc')
        ret = ops.execute()

        actual_value = RequestDataset(ret).get().data_variables[0].get_value()
        desired_value = rd1.get().data_variables[0].get_value()
        self.assertNumpyAllClose(actual_value, desired_value)
Example #18
0
    def test_execute_directory(self):
        """Test that the output directory is removed appropriately following an operations failure."""

        kwds = dict(add_auxiliary_files=[True, False])
        rd = self.test_data.get_rd('cancm4_tas')

        # this geometry is outside the domain and will result in an exception
        geom = [1000, 1000, 1100, 1100]

        for k in itr_products_keywords(kwds, as_namedtuple=True):
            ops = OcgOperations(dataset=rd,
                                output_format='csv',
                                add_auxiliary_files=k.add_auxiliary_files,
                                geom=geom)
            try:
                ops.execute()
            except ExtentError:
                contents = os.listdir(self.current_dir_output)
                self.assertEqual(len(contents), 0)
Example #19
0
 def test_shapefile_through_operations(self):
     path = os.path.join(self.path_bin, 'shp', 'state_boundaries', 'state_boundaries.shp')
     rd = RequestDataset(path)
     field = rd.get()
     ops = OcgOperations(dataset=rd, output_format='shp')
     ret = ops.execute()
     rd2 = RequestDataset(ret)
     field2 = rd2.get()
     self.assertAsSetEqual(list(field.keys()) + [HeaderName.ID_GEOMETRY], list(field2.keys()))
     self.assertEqual((51,), field2.data_variables[0].shape)
Example #20
0
    def test_write(self):
        # test melted format
        for melted in [False, True]:
            kwargs_ops = dict(melted=melted)
            kwargs_conv = dict(outdir=tempfile.mkdtemp(dir=self.current_dir_output))

            conv = self.get(kwargs_ops=kwargs_ops, kwargs_conv=kwargs_conv)
            csv_path = conv.write()
            self.assertTrue(os.path.exists(csv_path))
            self.assertEqual(conv._ugid_gid_store,
                             {1: {18: [5988, 5989, 5990, 6116, 6117, 6118], 15: [5992, 6119, 6120]}})

            shp_path = os.path.split(csv_path)[0]
            shp_path = os.path.join(shp_path, 'shp')
            shp_path_gid = os.path.join(shp_path, 'foo_gid.shp')
            target = RequestDataset(shp_path_gid).get()
            self.assertEqual(target.shape[-1], 9)
            shp_path_ugid = os.path.join(shp_path, 'foo_ugid.shp')
            target = RequestDataset(shp_path_ugid).get()
            self.assertEqual(target.shape[-1], 2)

        # test aggregating the selection geometry
        rd1 = self.test_data.get_rd('cancm4_tasmax_2011')
        rd2 = self.test_data.get_rd('maurer_bccr_1950')

        keywords = dict(agg_selection=[True, False])
        for k in self.iter_product_keywords(keywords):
            ops = OcgOperations(dataset=[rd1, rd2], snippet=True, output_format='csv-shp', geom='state_boundaries',
                                agg_selection=k.agg_selection, select_ugid=[32, 47], prefix=str(k.agg_selection))
            ret = ops.execute()
            directory = os.path.split(ret)[0]

            path_ugid = os.path.join(directory, 'shp', '{0}_ugid.shp'.format(ops.prefix))
            with fiona.open(path_ugid) as source:
                records = list(source)
            if k.agg_selection:
                uids = [1]
            else:
                uids = [32, 47]
            self.assertEqual([r['properties'][env.DEFAULT_GEOM_UID] for r in records], uids)

            path_gid = os.path.join(directory, 'shp', '{0}_gid.shp'.format(ops.prefix))
            with fiona.open(path_gid) as source:
                uid = [r['properties'][env.DEFAULT_GEOM_UID] for r in source]
            if k.agg_selection:
                self.assertAsSetEqual(uid, [1])
            else:
                uid = np.array(uid)
                self.assertEqual(np.sum(uid == 32), 1915)
                self.assertEqual(np.sum(uid == 47), 923)

            meta = os.path.join(os.path.split(ret)[0], '{0}_source_metadata.txt'.format(ops.prefix))
            with open(meta, 'r') as f:
                lines = f.readlines()
            self.assertTrue(len(lines) > 50)
Example #21
0
    def test_esmf(self):
        rd1 = RequestDataset(**self.get_dataset())
        rd2 = deepcopy(rd1)
        ops = OcgOperations(dataset=rd1,
                            regrid_destination=rd2,
                            output_format='nc')
        ret = ops.execute()

        actual_value = RequestDataset(ret).get().data_variables[0].get_value()
        desired_value = rd1.get().data_variables[0].get_value()
        self.assertNumpyAllClose(actual_value, desired_value)
Example #22
0
    def test_system_many_request_datasets(self):
        """Test numerous request datasets."""

        rd_base = self.test_data.get_rd('cancm4_tas')
        geom = [-74.0, 40.0, -72.0, 42.0]
        rds = [deepcopy(rd_base) for ii in range(500)]
        for rd in rds:
            ops = OcgOperations(dataset=rd, geom=geom, snippet=True)
            ret = ops.execute()
            actual = ret.get_element(variable_name='tas').shape
            self.assertEqual(actual, (1, 2, 1))
Example #23
0
 def test_shapefile_through_operations(self):
     path = ShpCabinet().get_shp_path('state_boundaries')
     rd = RequestDataset(path)
     field = rd.get()
     self.assertIsNone(field.spatial.properties)
     ops = OcgOperations(dataset=rd, output_format='shp')
     ret = ops.execute()
     rd2 = RequestDataset(ret)
     field2 = rd2.get()
     self.assertAsSetEqual(field.variables.keys(), field2.variables.keys())
     self.assertEqual(field.shape, field2.shape)
Example #24
0
 def test_shapefile_through_operations_subset(self):
     path = os.path.join(self.path_bin, 'shp', 'state_boundaries', 'state_boundaries.shp')
     rd = RequestDataset(path)
     field = rd.get()
     self.assertIsNone(field.spatial.properties)
     ops = OcgOperations(dataset=rd, output_format='shp', geom=path, select_ugid=[15])
     ret = ops.execute()
     rd2 = RequestDataset(ret)
     field2 = rd2.get()
     self.assertAsSetEqual(field.variables.keys(), field2.variables.keys())
     self.assertEqual(tuple([1] * 5), field2.shape)
Example #25
0
    def test_system_many_request_datasets(self):
        """Test numerous request datasets."""

        rd_base = self.test_data.get_rd('cancm4_tas')
        geom = [-74.0, 40.0, -72.0, 42.0]
        rds = [deepcopy(rd_base) for ii in range(500)]
        for rd in rds:
            ops = OcgOperations(dataset=rd, geom=geom, snippet=True)
            ret = ops.execute()
            actual = ret.get_element(variable_name='tas').shape
            self.assertEqual(actual, (1, 2, 1))
Example #26
0
 def test_shapefile_through_operations(self):
     path = os.path.join(self.path_bin, 'shp', 'state_boundaries',
                         'state_boundaries.shp')
     rd = RequestDataset(path)
     field = rd.get()
     ops = OcgOperations(dataset=rd, output_format='shp')
     ret = ops.execute()
     rd2 = RequestDataset(ret)
     field2 = rd2.get()
     self.assertAsSetEqual(
         list(field.keys()) + [HeaderName.ID_GEOMETRY], list(field2.keys()))
     self.assertEqual((51, ), field2.data_variables[0].shape)
Example #27
0
    def test_sql_where_through_operations(self):
        """Test using a SQL where statement to select some geometries."""

        states = ("Wisconsin", "Vermont")
        s = 'STATE_NAME in {0}'.format(states)
        rd = self.test_data.get_rd('cancm4_tas')
        ops = OcgOperations(dataset=rd,
                            geom_select_sql_where=s,
                            geom='state_boundaries',
                            snippet=True)
        ret = ops.execute()
        self.assertEqual(len(ret.children), 2)
        self.assertEqual(list(ret.children.keys()), [8, 10])
        for v in ret.properties.values():
            self.assertIn(v['STATE_NAME'], states)

        # Make sure the sql select has preference over UID.
        ops = OcgOperations(dataset=rd,
                            geom_select_sql_where=s,
                            geom='state_boundaries',
                            snippet=True,
                            geom_select_uid=[500, 600, 700])
        ret = ops.execute()
        self.assertEqual(len(ret.children), 2)
        for v in ret.properties.values():
            self.assertIn(v['STATE_NAME'], states)

        # test possible interaction with geom_uid
        path = self.get_shapefile_path_with_no_ugid()
        ops = OcgOperations(dataset=rd, geom=path, geom_select_sql_where=s)
        ret = ops.execute()
        self.assertEqual(list(ret.children.keys()), [7, 9])

        ops = OcgOperations(dataset=rd,
                            geom=path,
                            geom_select_sql_where=s,
                            geom_uid='ID')
        ret = ops.execute()
        self.assertEqual(list(ret.children.keys()), [13, 15])
Example #28
0
def doCalc(species):
    print 'Working on %s' %(species)
    # Directory holding climate data.
    DATA_DIR = 'G:/WSI data verification/dataverification'
    # Data returns will overwrite in this case. Use with caution!!
    env.OVERWRITE = True
    env.DIR_SHPCABINET = DATA_DIR
    env.DIR_OUTPUT = DATA_DIR
    # Always start with a snippet (if there are no calculations!).
    SNIPPET = False
    #yearstr = str(year)
    # Filename to variable name mapping.
    uri = 'G:/WSI data verification/dataverification/'+ species + '/WSI_OCGIS_'+species+'.1979_2013.nc'
    shp = 'G:/WSI data verification/dataverification/duckzone.shp'
    # RequestDatasetCollection #######################################################
    rdc = RequestDataset(uri, 'forcalc')
    # Return daily sum
    calc = [{'func': 'sum', 'name': 'sum'}]
    ### Write to Shapefile ###########################################################
    prefix = 'WSI_DZ_' + species  
    #print('returning shapefile for ' + species)
    ops = OcgOperations(dataset=rdc, output_format='shp', time_region={'month': [1,2,3,4,9,10,11,12]}, spatial_operation='clip', geom=shp, calc=calc,
                    calc_raw=True, aggregate=True, calc_grouping=['day', 'month', 'year'], prefix=prefix)
    ops.execute()
Example #29
0
    def test_system_through_operations(self):
        """Test calculation through operations."""

        row = Variable(name='y', value=[1, 2, 3, 4], dimensions='y')
        col = Variable(name='x', value=[10, 11, 12], dimensions='x')
        grid = Grid(col, row)
        time = TemporalVariable(name='time', value=[1, 2], dimensions='time')
        data = Variable(name='data', dimensions=[time.dimensions[0]] + list(grid.dimensions))
        data.get_value()[0, :] = 1
        data.get_value()[1, :] = 2
        field = Field(grid=grid, time=time, is_data=data)

        calc = [{'func': 'sum', 'name': 'sum'}]
        ops = OcgOperations(dataset=field, calc=calc, calc_grouping='day', calc_raw=True, aggregate=True)
        ret = ops.execute()
        actual = ret.get_element(variable_name='sum').get_masked_value().flatten()
        self.assertNumpyAll(actual, np.ma.array([12.0, 24.0]))
Example #30
0
    def test_full(self):
        """Compute the dissimilarity will all metrics."""
        from flyingpigeon import dissimilarity
        from matplotlib import pyplot as plt

        p1 = self.write_field_data('v1', ncol=1, nrow=1)
        p2 = self.write_field_data('v2', ncol=1, nrow=1)
        p3 = self.write_field_data('v1', ncol=11, nrow=10, dir='c')
        p4 = self.write_field_data('v2', ncol=11, nrow=10, dir='c')

        ref_range = [dt.datetime(2000, 3, 1), dt.datetime(2000, 3, 31)]
        ref = [ocgis.RequestDataset(p, time_range=ref_range) for p in [p1, p2]]
        reference = ocgis.MultiRequestDataset(ref)
        reference = reference.get()

        cand_range = [dt.datetime(2000, 8, 1), dt.datetime(2000, 8, 31)]
        can = [
            ocgis.RequestDataset(p, time_range=cand_range) for p in [p3, p4]
        ]
        candidate = ocgis.MultiRequestDataset(can)

        fig, axes = plt.subplots(2, 3)
        for i, dist in enumerate(dissimilarity.__all__):

            calc = [{
                'func': 'dissimilarity',
                'name': 'output_mfpf',
                'kwds': {
                    'target': reference,
                    'candidate': ('v1', 'v2'),
                    'dist': dist
                }
            }]

            ops = OcgOperations(dataset=candidate, calc=calc)
            ret = ops.execute()
            out_field = ret.get_element()
            var_name = get_variable_names(out_field.data_variables)[0]
            out = out_field[var_name].get_value()[0, 0]
            axes.flat[i].imshow(out)
            axes.flat[i].set_title(dist)

        path = os.path.join(test_output_path,
                            'test_spatial_analog_metrics.png')
        plt.savefig(path)
        plt.close()
Example #31
0
    def test_subset_with_shapefile_no_ugid(self):
        """Test a subset operation using a shapefile without a UGID attribute."""

        output_format = [
            constants.OutputFormatName.OCGIS,
            constants.OutputFormatName.CSV_SHAPEFILE
        ]

        geom = self.get_shapefile_path_with_no_ugid()
        geom_select_uid = [8, 11]
        geom_uid = 'ID'
        rd = self.test_data.get_rd('cancm4_tas')

        for of in output_format:
            ops = OcgOperations(dataset=rd,
                                geom=geom,
                                geom_select_uid=geom_select_uid,
                                geom_uid=geom_uid,
                                snippet=True,
                                output_format=of)
            self.assertEqual(len(ops.geom), 2)
            ret = ops.execute()
            if of == constants.OutputFormatName.OCGIS:
                for element in geom_select_uid:
                    self.assertIn(element, ret.children)
                self.assertAsSetEqual(
                    list(ret.properties[8].keys()),
                    ['STATE_FIPS', 'ID', 'STATE_NAME', 'STATE_ABBR'])
            else:
                with open(ret) as f:
                    reader = DictReader(f)
                    row = next(reader)
                    self.assertIn(geom_uid, list(row.keys()))
                    self.assertNotIn(env.DEFAULT_GEOM_UID, list(row.keys()))

                shp_path = os.path.split(ret)[0]
                shp_path = os.path.join(shp_path, 'shp',
                                        '{0}_gid.shp'.format(ops.prefix))
                with fiona.open(shp_path) as source:
                    record = next(source)
                    self.assertIn(geom_uid, record['properties'])
                    self.assertNotIn(env.DEFAULT_GEOM_UID,
                                     record['properties'])
Example #32
0
 def __iter__(self):
     its = [p().__iter__() for p in self.get_parameters()]
     for ii,values in enumerate(itertools.product(*its)):
         if self.n_only:
             yield(ii)
             continue
         if self.target_combo is not None:
             if self.target_combo > ii:
                 continue
         yield(ii)
         kwds = {}
         for val in values:
             ## check for environmental parameters
             if val.keys()[0].isupper():
                 setattr(env,val.keys()[0],val.values()[0])
             else:
                 kwds.update(val)
         if not self.ops_only:
             kwds.update({'dir_output':tempfile.mkdtemp()})
         try:
             try:
                 ops = OcgOperations(**kwds)
                 try:
                     self.check_blocked(ops)
                 except BlockedCombination:
                     continue
                 if self.verbose: print(ii)
                 if self.ops_only:
                     pass
                 else:
                     ret = ops.execute()
             except Exception as e:
                 raise
                 tb = traceback.format_exc()
                 try:
                     self.check_exception(ii,kwds,e,tb)
                 except:
                     raise
         finally:
             if not self.ops_only and self.remove_output:
                 shutil.rmtree(kwds['dir_output'])
             env.reset()
Example #33
0
File: run.py Project: imclab/ocgis
 def __iter__(self):
     its = [p().__iter__() for p in self.get_parameters()]
     for ii, values in enumerate(itertools.product(*its)):
         if self.n_only:
             yield (ii)
             continue
         if self.target_combo is not None:
             if self.target_combo > ii:
                 continue
         yield (ii)
         kwds = {}
         for val in values:
             ## check for environmental parameters
             if val.keys()[0].isupper():
                 setattr(env, val.keys()[0], val.values()[0])
             else:
                 kwds.update(val)
         if not self.ops_only:
             kwds.update({'dir_output': tempfile.mkdtemp()})
         try:
             try:
                 ops = OcgOperations(**kwds)
                 try:
                     self.check_blocked(ops)
                 except BlockedCombination:
                     continue
                 if self.verbose: print(ii)
                 if self.ops_only:
                     pass
                 else:
                     ret = ops.execute()
             except Exception as e:
                 raise
                 tb = traceback.format_exc()
                 try:
                     self.check_exception(ii, kwds, e, tb)
                 except:
                     raise
         finally:
             if not self.ops_only and self.remove_output:
                 shutil.rmtree(kwds['dir_output'])
             env.reset()
Example #34
0
    def test_build(self):
        path = self.get_shapefile_path_with_no_ugid()
        keywords = dict(geom_uid=['ID', None])
        rd = self.test_data.get_rd('cancm4_tas')
        for k in self.iter_product_keywords(keywords):
            if k.geom_uid is None:
                geom_select_uid = None
            else:
                geom_select_uid = [8]
            ops = OcgOperations(dataset=rd, geom=path, geom_uid=k.geom_uid, geom_select_uid=geom_select_uid,
                                snippet=True)
            coll = ops.execute()
            conv = CsvShapefileConverter([coll], outdir=self.current_dir_output, prefix='foo', overwrite=True, ops=ops)
            ret = conv._build_(coll)

            if k.geom_uid is None:
                actual = env.DEFAULT_GEOM_UID
            else:
                actual = k.geom_uid
            actual = [constants.HEADERS.ID_DATASET.upper(), actual, constants.HEADERS.ID_GEOMETRY.upper()]
            self.assertEqual(actual, ret['fiona_object'].meta['schema']['properties'].keys())
Example #35
0
 def _handler(self, request, response):
     try:
         ocgis.env.DIR_OUTPUT = tempfile.mkdtemp(dir=os.getcwd())
         ocgis.env.OVERWRITE = True
         nc_files = archiveextract(resource=rename_complexinputs(
             request.inputs['resource']))
         rd = RequestDataset(nc_files)
         rd.dimension_map.set_bounds('time', None)
         if nc_files[0][-3:] == '.nc':
             out_prefix = nc_files[0][:-3] + '_merged'
         else:
             out_prefix = nc_files[0] + '_merged'
         ops = OcgOperations(dataset=rd, output_format='nc',
                             prefix=out_prefix)
         ret = ops.execute()
         response.outputs['output'].file = ret
         response.outputs['output'].output_format = \
             Format('application/x-netcdf')
         return response
     except:
         raise Exception(traceback.format_exc())
Example #36
0
def merge(resource, dir_output=None, historical_concatination=False): 
  """
  returns list of paths of sorted and merged netCDF files.
  sort according to filename (in DSR convention) and merge appropriate netCDF files
  
  :param resource: list of netCDF file pathes equal domain and DSR name convention is required for merging
  :param historical_concatination : concatination of historical files to rcp szenarios (default = False)
  ;param dir_output : path to directory for output 
  """
  
  from os.path import curdir, basename, join
  from os import rename
  #from tempfile import mkdtemp
  import utils
  from ocgis import RequestDataset , OcgOperations
  
  #if type(resource) == list: 
    #resource = {'merge':resource}
    
  res_dic = utils.sort_by_filename(resource, historical_concatination = historical_concatination)
  merged_files = []
  
  if dir_output == None:
    dir_output = curdir
  
  for key in res_dic:
    if len(res_dic[key]) > 1: 
      ncs = res_dic[key]
      var = key.split('_')[0]
      rd = RequestDataset(uri=ncs, variable=var )
      ops = OcgOperations( dataset=rd, prefix = key, output_format='nc', dir_output = dir_output, add_auxiliary_files=False)
      m_file = ops.execute()
      var = key.split('_')[0]
      merged_files.append(utils.drs_filename(m_file, variable=var))
    else:
      bn = basename(res_dic[key][0])
      newname = str(join(dir_output, bn))
      rename(bn,newname)
      merged_files.append(newname)
  return merged_files
Example #37
0
def test_ocgis_average():
    v1 = TESTDATA['cmip3_tasmin_sresa2_da_nc'][6:]
    v2 = TESTDATA['cmip3_tasmax_sresa2_da_nc'][6:]

    rd1 = RequestDataset(v1)
    rd2 = RequestDataset(v2)

    ops = OcgOperations([rd1, rd2],
                        calc=[{
                            'func': 'average',
                            'name': 'tas',
                            'kwds': {
                                'v1': 'tasmin',
                                'v2': 'tasmax'
                            }
                        }])
    ret = ops.execute()
    t = ret.get_element()['tas'][0, :, :].get_value()

    t1 = rd1.get_field()['tasmin'][0, :, :].get_value()
    t2 = rd2.get_field()['tasmax'][0, :, :].get_value()
    aaae(np.mean([t1, t2], axis=0), t)
Example #38
0
    def test_calculate_operations(self):
        """Test calculation through operations."""

        row = VectorDimension(value=[1, 2, 3, 4])
        col = VectorDimension(value=[10, 11, 12])
        grid = SpatialGridDimension(row=row, col=col)
        spatial = SpatialDimension(grid=grid)
        time = TemporalDimension(value=[1, 2])
        field = Field(spatial=spatial, temporal=time)

        data = np.zeros((1, 2, 1, 4, 3), dtype=float)
        data[:, 0, :] = 1
        data[:, 1, :] = 2

        var = Variable(value=data, name='data')

        field.variables.add_variable(var)

        calc = [{'func': 'sum', 'name': 'sum'}]
        ops = OcgOperations(dataset=field, calc=calc, calc_grouping='day', calc_raw=True, aggregate=True)
        ret = ops.execute()
        actual = ret[1]['data'].variables['sum'].value.flatten()
        self.assertNumpyAll(actual, np.ma.array([12.0, 24.0]))
Example #39
0
    def test_system_through_operations(self):
        """Test calculation through operations."""

        row = Variable(name='y', value=[1, 2, 3, 4], dimensions='y')
        col = Variable(name='x', value=[10, 11, 12], dimensions='x')
        grid = Grid(col, row)
        time = TemporalVariable(name='time', value=[1, 2], dimensions='time')
        data = Variable(name='data',
                        dimensions=[time.dimensions[0]] +
                        list(grid.dimensions))
        data.get_value()[0, :] = 1
        data.get_value()[1, :] = 2
        field = Field(grid=grid, time=time, is_data=data)

        calc = [{'func': 'sum', 'name': 'sum'}]
        ops = OcgOperations(dataset=field,
                            calc=calc,
                            calc_grouping='day',
                            calc_raw=True,
                            aggregate=True)
        ret = ops.execute()
        actual = ret.get_element(
            variable_name='sum').get_masked_value().flatten()
        self.assertNumpyAll(actual, np.ma.array([12.0, 24.0]))
Example #40
0
# Compute a custom percentile basis using ICCLIM.

# Path to CF climate dataset. This examples uses the same file for indice and percentile basis calculation.
in_file = '/path/to/cf_data.nc'

# Subset the input dataset to return the desired base period for the percentile basis.
variable = 'tas'
years = range(2001, 2003)  # A custom date range may be required for your data
time_region = {'year': years}
rd = RequestDataset(uri=in_file, variable=variable)
field = rd.create_field()
field = field.time.get_time_region(time_region).parent

# Calculate the percentile basis. The data values must be a three-dimensional array.
arr = field[variable].get_masked_value().squeeze()  # This is the field data to use for the calculation
dt_arr = field.temporal.value_datetime  # This is an array of datetime objects.
percentile = 90
window_width = 5
t_calendar, t_units = field.time.calendar, field.time.units  # ICCLIM requires calendar and units for the calculation
percentile_dict = IcclimTG90p.get_percentile_dict(arr, dt_arr, percentile, window_width, t_calendar, t_units)

########################################################################################################################
# Calculate indice using custom percentile basis.

# Depending on the size of the data, this computation may take some time...
calc = [{'func': 'icclim_TG90p', 'name': 'TG90p', 'kwds': {'percentile_dict': percentile_dict}}]
calc_grouping = 'month'
# Returns data as an in-memory spatial collection
ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping)
coll = ops.execute()
Example #41
0
    def test_combinatorial_projection_with_geometries(self):

        # self.get_ret(kwds={'output_format':'shp','prefix':'as_polygon'})
        # self.get_ret(kwds={'output_format':'shp','prefix':'as_point','abstraction':'point'})

        features = [
            {'NAME': 'a', 'wkt': 'POLYGON((-105.020430 40.073118,-105.810753 39.327957,-105.660215 38.831183,-104.907527 38.763441,-104.004301 38.816129,-103.643011 39.802151,-103.643011 39.802151,-103.643011 39.802151,-103.643011 39.802151,-103.959140 40.118280,-103.959140 40.118280,-103.959140 40.118280,-103.959140 40.118280,-104.327957 40.201075,-104.327957 40.201075,-105.020430 40.073118))'},
            {'NAME': 'b', 'wkt': 'POLYGON((-102.212903 39.004301,-102.905376 38.906452,-103.311828 37.694624,-103.326882 37.295699,-103.898925 37.220430,-103.846237 36.746237,-102.619355 37.107527,-102.634409 37.724731,-101.874194 37.882796,-102.212903 39.004301))'},
            {'NAME': 'c', 'wkt': 'POLYGON((-105.336559 37.175269,-104.945161 37.303226,-104.726882 37.175269,-104.696774 36.844086,-105.043011 36.693548,-105.283871 36.640860,-105.336559 37.175269))'},
            {'NAME': 'd', 'wkt': 'POLYGON((-102.318280 39.741935,-103.650538 39.779570,-103.620430 39.448387,-103.349462 39.433333,-103.078495 39.606452,-102.325806 39.613978,-102.325806 39.613978,-102.333333 39.741935,-102.318280 39.741935))'},
        ]

        for filename in ['polygon', 'point']:
            if filename == 'point':
                geometry = 'Point'
                to_write = deepcopy(features)
                for feature in to_write:
                    geom = wkt.loads(feature['wkt'])
                    feature['wkt'] = geom.centroid.wkt
            else:
                to_write = features
                geometry = 'Polygon'

            path = os.path.join(self.current_dir_output, 'ab_{0}.shp'.format(filename))
            with FionaMaker(path, geometry=geometry) as fm:
                fm.write(to_write)

        no_bounds_nc = SimpleNcNoBounds()
        no_bounds_nc.write()
        no_bounds_uri = os.path.join(env.DIR_OUTPUT, no_bounds_nc.filename)

        no_level_nc = SimpleNcNoLevel()
        no_level_nc.write()
        no_level_uri = os.path.join(env.DIR_OUTPUT, no_level_nc.filename)

        ocgis.env.DIR_SHPCABINET = self.current_dir_output
        # ocgis.env.DEBUG = True
        #        ocgis.env.VERBOSE = True

        aggregate = [
            False,
            True
        ]
        spatial_operation = [
            'intersects',
            'clip'
        ]
        epsg = [
            2163,
            4326,
            None
        ]
        output_format = [
            constants.OUTPUT_FORMAT_NETCDF,
            constants.OUTPUT_FORMAT_SHAPEFILE,
            constants.OUTPUT_FORMAT_CSV_SHAPEFILE
        ]
        abstraction = [
            'polygon',
            'point',
            None
        ]
        dataset = [
            self.get_dataset(),
            {'uri': no_bounds_uri, 'variable': 'foo'},
            {'uri': no_level_uri, 'variable': 'foo'}
        ]
        geom = [
            'ab_polygon',
            'ab_point'
        ]
        calc = [
            None,
            [{'func': 'mean', 'name': 'my_mean'}]
        ]
        calc_grouping = ['month']

        args = (aggregate, spatial_operation, epsg, output_format, abstraction, geom, calc, dataset)
        for ii, tup in enumerate(itertools.product(*args)):
            a, s, e, o, ab, g, c, d = tup

            if os.path.split(d['uri'])[1] == 'test_simple_spatial_no_bounds_01.nc':
                unbounded = True
            else:
                unbounded = False

            if o == constants.OUTPUT_FORMAT_NETCDF and e == 4326:
                output_crs = CFWGS84()
            else:
                output_crs = CoordinateReferenceSystem(epsg=e) if e is not None else None

            kwds = dict(aggregate=a, spatial_operation=s, output_format=o, output_crs=output_crs, geom=g,
                        abstraction=ab, dataset=d, prefix=str(ii), calc=c, calc_grouping=calc_grouping)

            try:
                ops = OcgOperations(**kwds)
                ret = ops.execute()
            except DefinitionValidationError:
                if o == constants.OUTPUT_FORMAT_NETCDF:
                    if e not in [4326, None]:
                        continue
                    if s == 'clip':
                        continue
                else:
                    raise
            except ExtentError:
                if unbounded or ab == 'point':
                    continue
                else:
                    raise
            except ValueError:
                if unbounded and ab == 'polygon':
                    continue

            if o == constants.OUTPUT_FORMAT_SHAPEFILE:
                ugid_path = os.path.join(self.current_dir_output, ops.prefix, ops.prefix + '_ugid.shp')
            else:
                ugid_path = os.path.join(self.current_dir_output, ops.prefix, constants.OUTPUT_FORMAT_SHAPEFILE,
                                         ops.prefix + '_ugid.shp')

            if o != constants.OUTPUT_FORMAT_NETCDF:
                with fiona.open(ugid_path, 'r') as f:
                    if e:
                        second = output_crs
                    else:
                        second = CoordinateReferenceSystem(epsg=4326)
                    self.assertEqual(CoordinateReferenceSystem(value=f.meta['crs']), second)

            if o == constants.OUTPUT_FORMAT_SHAPEFILE:
                with fiona.open(ret, 'r') as f:
                    if a and ab == 'point':
                        second = 'MultiPoint'
                    elif ab is None:
                        field = RequestDataset(uri=d['uri'], variable='foo').get()
                        second = field.spatial.geom.get_highest_order_abstraction().geom_type
                    else:
                        second = ab.title()

                    if second in ['Polygon', 'MultiPolygon']:
                        second = ['Polygon', 'MultiPolygon']
                    elif second in ['Point', 'MultiPoint']:
                        second = ['Point', 'MultiPoint']

                    self.assertTrue(f.meta['schema']['geometry'] in second)
Example #42
0
    def test_calc_sample_size(self):
        rd1 = self.get_dataset()
        rd1['alias'] = 'var1'
        rd2 = self.get_dataset()
        rd2['alias'] = 'var2'

        dataset = [
                   # RequestDatasetCollection([rd1]),
                   RequestDatasetCollection([rd1,rd2])
                   ]
        calc_sample_size = [
                            True,
                            # False
                            ]
        calc = [
                [{'func':'mean','name':'mean'},{'func':'max','name':'max'}],
                # [{'func':'ln','name':'ln'}],
                # None,
                # [{'func':'divide','name':'divide','kwds':{'arr1':'var1','arr2':'var2'}}]
                ]
        calc_grouping = [
                         # None,
                         ['month'],
                         # ['month','year']
                         ]
        output_format = ['numpy']

        for ii,tup in enumerate(itertools.product(dataset,calc_sample_size,calc,calc_grouping,output_format)):
            kwds = dict(zip(['dataset','calc_sample_size','calc','calc_grouping','output_format'],tup))
            kwds['prefix'] = str(ii)

            try:
                ops = OcgOperations(**kwds)
            except DefinitionValidationError:
                if kwds['calc'] is not None:
                    ## set functions require a temporal grouping otherwise the calculation
                    ## is meaningless
                    if kwds['calc'][0]['func'] == 'mean' and kwds['calc_grouping'] is None:
                        continue
                    ## multivariate functions may not implemented with sample size = True
                    elif kwds['calc_sample_size'] and kwds['calc'][0]['func'] == 'divide':
                        continue
                    ## multivariate functions require the correct number of variables
                    elif kwds['calc'][0]['func'] == 'divide' and len(kwds['dataset']) == 1:
                        continue
                    ## only one request dataset may be written to netCDF at this time
                    elif kwds['output_format'] == 'nc' and len(kwds['dataset']) == 2:
                        continue
                    else:
                        raise
                ## only one request dataset may be written to netCDF at this time
                elif kwds['output_format'] == 'nc' and len(ops.dataset) == 2:
                    continue
                else:
                    raise

            ret = ops.execute()

            if kwds['output_format'] == 'nc':
                if kwds['calc_sample_size'] and kwds['calc_grouping']:
                    if kwds['calc'] is not None and kwds['calc'][0]['func'] == 'mean':
                        with self.nc_scope(ret) as ds:
                            self.assertEqual(sum([v.startswith('n_') for v in ds.variables.keys()]),2)
                            self.assertEqual(ds.variables['n_max'][:].mean(),30.5)

            if kwds['output_format'] == 'csv':
                if kwds['calc'] is not None and kwds['calc'][0]['func'] == 'mean':
                    with open(ret,'r') as f:
                        reader = DictReader(f)
                        alias_set = set([row['CALC_ALIAS'] for row in reader])
                        if len(kwds['dataset']) == 1:
                            if kwds['calc_sample_size']:
                                self.assertEqual(alias_set,set(['max','n_max','n_mean','mean']))
                            else:
                                self.assertEqual(alias_set,set(['max','mean']))
                        else:
                            if kwds['calc_sample_size']:
                                self.assertEqual(alias_set,set(['max_var1','n_max_var1','n_mean_var1','mean_var1',
                                                                'max_var2','n_max_var2','n_mean_var2','mean_var2']))
                            else:
                                self.assertEqual(alias_set,set(['max_var1','mean_var1',
                                                                'max_var2','mean_var2']))
Example #43
0
def call(resource=[], variable=None, dimension_map=None, calc=None,  
  calc_grouping= None, conform_units_to=None, memory_limit=None,  prefix=None, 
  regrid_destination=None, regrid_options='bil', level_range=None,
  geom=None, output_format_options=False, search_radius_mult=2., 
  select_nearest=False, select_ugid=None, spatial_wrapping=None, time_region=None, time_range=None,
  dir_output=curdir, output_format='nc'):
  '''
  ocgis operation call

  :param resource:
  :param variable: variable in the input file to be picked
  :param dimension_map: dimension map in case of unconventional starage of data 
  :param calc: ocgis calc syntax for calcultion opartion 
  :param calc_grouping: time aggregate grouping 
  :param conform_units_to: 
  :param memory_limit: limit the amout of data to be loaded into the memory at once if None(default) free memory is detected by birdhouse
  :param level_range: subset of given levels
  :param prefix: string for the file base name 
  :param regrid_destination: file path with netCDF file with grid for outout file

  :param geom: name of shapefile stored in birdhouse shape cabinet
  :param output_format_options: output options for netCDF e.g compression level()
  :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file)
  :param regrid_options: methods for regridding: 
                          'bil' = Biliniar interpolation 
                          'bic' = Bicubic interpolation 
                          'dis' = Distance weighted average remapping
                          'nn' = nearest neighbour
                          'con' = First order concerative remapping
                          'laf' = largest area fraction reamapping
  :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
  :param select_nearest: neares neighbour selection for point geometries
  :param select_ugid: ugid for appropriate poligons 
  :param spatial_wrapping: how to handle coordinates in case of subsets, options: None(default), 'wrap', 'unwrap'
  :param time_region: select single month 
  :param time_range: sequence of two datetime.datetime objects to mark start and end point 
  :param dir_output (default= curdir):
  :param output_format:
  :return: output file path
  '''
  logger.info('Start ocgis module call function')
  from ocgis import OcgOperations, RequestDataset , env
  from ocgis.util.large_array import compute
  import uuid
  
  # prepare the environment 
  env.DIR_SHPCABINET = DIR_SHP
  env.OVERWRITE = True
  env.DIR_OUTPUT = dir_output
  
  if geom != None:
    spatial_reorder = True
    spatial_wrapping = 'wrap'
  else: 
    spatial_reorder = False
    spatial_wrapping = None
  
  
  if prefix == None:
    prefix = str(uuid.uuid1()) 
  env.PREFIX = prefix
  if output_format_options == False: 
    output_format_options = None
  elif output_format_options == True:
    output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC
                         'variable_kwargs': {'zlib': True, 'complevel': 9}}
  else:
    logger.info('output_format_options are set to %s ' % ( output_format_options ))
  
  if type(resource) != list: 
    resource = list([resource])
  # execute ocgis 
  logger.info('Execute ocgis module call function')
  
  if has_Lambert_Conformal(resource) == True and not geom == None:
    logger.debug('input has Lambert_Conformal projection and can not subsetted with geom')
    output = None
  else:
    try:
      rd = RequestDataset(resource, variable=variable, level_range=level_range,
        dimension_map=dimension_map, conform_units_to=conform_units_to, 
        time_region=time_region, time_range=time_range)
      ops = OcgOperations(dataset=rd,
        output_format_options=output_format_options,
        spatial_wrapping=spatial_wrapping,
        spatial_reorder=spatial_reorder,
        # options=options,
        calc=calc,
        calc_grouping=calc_grouping,
        geom=geom,
        output_format=output_format,
        prefix=prefix,
        search_radius_mult=search_radius_mult,
        select_nearest=select_nearest,
        select_ugid=select_ugid, 
        add_auxiliary_files=False)
      logger.info('OcgOperations set')
      
    except Exception as e: 
      logger.debug('failed to setup OcgOperations')
      raise  
    # check memory load
    from numpy import sqrt 
    from flyingpigeon.utils import FreeMemory
    
    if memory_limit == None: 
      f = FreeMemory()
      mem_kb = f.user_free 
      mem_mb = mem_kb / 1024.
      mem_limit = mem_mb / 2. # set limit to half of the free memory
    else:
      mem_limit = memory_limit

    if mem_limit >= 1024. * 4: 
      mem_limit = 1024. * 4
      # 475.0 MB for openDAP 
    
    data_kb = ops.get_base_request_size()['total']
    data_mb = data_kb / 1024.

    if variable == None: 
      variable = rd.variable
      logger.info('%s as variable dedected' % (variable))

    #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
    logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb  , mem_limit ))
    
    if data_mb <= mem_limit :  # input is smaler than the half of free memory size
      try:
        logger.info('ocgis module call as ops.execute()')
        geom_file = ops.execute()
      except Exception as e: 
        logger.debug('failed to execute ocgis operation')
        raise  
    else:
      ##########################
      # calcultion of chunk size
      ##########################

      size = ops.get_base_request_size()
      nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0]
      element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
      element_in_mb = element_in_kb / 1024.
      tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size 
      
      try:
        logger.info('ocgis module call compute with chunks')
        print 'ocgis module call compute with chunks'
        if calc == None:
          calc = '%s=%s*1' % (variable, variable)
          logger.info('calc set to = %s ' %  calc)
          ops = OcgOperations(dataset=rd,
            output_format_options=output_format_options,
            spatial_wrapping=spatial_wrapping,
            # options=options,
            calc=calc,
            calc_grouping=calc_grouping,
            geom=geom,
            output_format=output_format,
            prefix=prefix,
            search_radius_mult=search_radius_mult,
            select_nearest=select_nearest,
            select_ugid=select_ugid, 
            add_auxiliary_files=False)
        geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True)
      except Exception as e: 
        logger.debug('failed to compute ocgis with chunks')
        raise
    logger.info('Succeeded with ocgis module call function')

    ############################################
    # remapping according to regrid informations
    ############################################
    if not regrid_destination == None:
      try:
        from tempfile import mkstemp
        from cdo import Cdo
        cdo = Cdo()

        output = '%s.nc' % uuid.uuid1()
        remap = 'remap%s' % regrid_options  
        call = [op for op in dir(cdo) if remap in op]
        cmd = "output = cdo.%s('%s',input='%s', output='%s')" % (str(call[0]), regrid_destination, geom_file, output) 
        exec cmd
      except Exception as e: 
        logger.debug('failed to remap')
        raise 
    else:
      output = geom_file
  return output
Example #44
0
def robustness_stats(resources,
                     time_range=[None, None],
                     dir_output=None,
                     variable=None):
    """
    calculating the spatial mean and corresponding standard deviation for an ensemble of consistent datasets containing one variableself.
    If a time range is given the statistical values are calculated only in the disired timeperiod.

    :param resources: str or list of str containing the netCDF files paths
    :param time_range: sequence of two datetime.datetime objects to mark start and end point
    :param dir_output: path to folder to store ouput files  (default= curdir)
    :param variable: variable name containing in netCDF file. If not set, variable name gets detected

    :return netCDF files: out_ensmean.nc, out_ensstd.nc
    """

    from ocgis import OcgOperations, RequestDataset, env
    env.OVERWRITE = True

    if variable is None:
        variable = get_variable(resources[0])

    out_means = []
    for resource in resources:

        rd = RequestDataset(resource, variable)
        prefix = basename(resource).replace('.nc', '')
        LOGGER.debug('processing mean of {}'.format(prefix))
        calc = [{
            'func': 'median',
            'name': variable
        }]  #  {'func': 'median', 'name': 'monthly_median'}
        ops = OcgOperations(dataset=rd,
                            calc=calc,
                            calc_grouping=['all'],
                            output_format='nc',
                            prefix='median_' + prefix,
                            time_range=time_range,
                            dir_output=dir_output)
        out_means.append(ops.execute())
    # nc_out = call(resource=resources, calc=[{'func': 'mean', 'name': 'ens_mean'}],
    #               calc_grouping='all', # time_region=time_region,
    #               dir_output=dir_output, output_format='nc')

    ####
    # read in numpy array

    for i, out_mean in enumerate(out_means):
        if i == 0:
            ds = Dataset(out_mean)
            var = ds[variable][:]
            dims = [len(out_means), var[:].shape[-2], var[:].shape[-1]]
            vals = np.empty(dims)
            vals[i, :, :] = np.squeeze(var[:])
            ds.close()
        else:
            ds = Dataset(out_mean)
            vals[i, :, :] = np.squeeze(ds[variable][:])
            ds.close()

    ####
    # calc median, std
    val_median = np.nanmedian(vals, axis=0)
    val_std = np.nanstd(vals, axis=0)

    #####
    # prepare files by copying ...
    ensmean_file = 'ensmean_{}_{}_{}.nc'.format(
        variable, dt.strftime(time_range[0], '%Y-%m-%d'),
        dt.strftime(time_range[1], '%Y-%m-%d'))
    out_ensmean = copyfile(out_means[0], join(dir_output, ensmean_file))

    ensstd_file = 'ensstd_{}_{}_{}.nc'.format(
        variable, dt.strftime(time_range[0], '%Y-%m-%d'),
        dt.strftime(time_range[1], '%Y-%m-%d'))
    out_ensstd = copyfile(out_means[0], join(dir_output, ensstd_file))

    ####
    # write values to files
    ds_median = Dataset(out_ensmean, mode='a')
    ds_median[variable][:] = val_median
    ds_median.close()

    ds_std = Dataset(out_ensstd, mode='a')
    ds_std[variable][:] = val_std
    ds_std.close()
    LOGGER.info('processing the overall ensemble statistical mean ')

    # prefix = 'ensmean_tg-mean_{}-{}'.format(dt.strftime(time_range[0], '%Y-%m-%d'),
    #                                         dt.strftime(time_range[1], '%Y-%m-%d'))
    # rd = RequestDataset(out_means, var)
    # calc = [{'func': 'mean', 'name': 'mean'}]  #  {'func': 'median', 'name': 'monthly_median'}
    # ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=['all'],
    #                     output_format=output_format, prefix='mean_'+prefix, time_range=time_range)
    # ensmean = ops.execute()

    return out_ensmean, out_ensstd
Example #45
0
def call(resource=[], variable=None, dimension_map=None, agg_selection=True,
         calc=None, calc_grouping=None, conform_units_to=None, crs=None,
         memory_limit=None, prefix=None,
         regrid_destination=None, regrid_options='bil', level_range=None,  # cdover='python',
         geom=None, output_format_options=None, search_radius_mult=2.,
         select_nearest=False, select_ugid=None, spatial_wrapping=None,
         t_calendar=None, time_region=None,
         time_range=None, dir_output=None, output_format='nc'):
    """
    Call OCGIS operation.

    :param resource: Input netCDF file.
    :param variable: variable in the input file to be picked
    :param dimension_map: dimension map in case of unconventional storage of data
    :param agg_selection: For aggregation of in case of mulitple polygons geoms
    :param calc: ocgis calc syntax for calculation partion
    :param calc_grouping: time aggregate grouping
    :param cdover: OUTDATED use py-cdo ('python', by default) or cdo from the system ('system')
    :param conform_units_to:
    :param crs: coordinate reference system
    :param memory_limit: limit the amount of data to be loaded into the memory at once \
        if None (default) free memory is detected by birdhouse
    :param level_range: subset of given levels
    :param prefix: string for the file base name
    :param regrid_destination: file path with netCDF file with grid for output file
    :param geom: name of shapefile stored in birdhouse shape cabinet
    :param output_format_options: output options for netCDF e.g compression level()
    :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file)
    :param regrid_options: methods for regridding:
                          'bil' = Bilinear interpolation
                          'bic' = Bicubic interpolation
                          'dis' = Distance-weighted average remapping
                          'nn' = nearest neighbour
                          'con' = First-order conservative remapping
                          'laf' = largest area fraction reamapping
    :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
    :param select_nearest: nearest neighbour selection for point geometries
    :param select_ugid: ugid for appropriate polygons
    :param spatial_wrapping: how to handle coordinates in case of subsets, options: None (default), 'wrap', 'unwrap'
    :param time_region: select single month
    :param time_range: sequence of two datetime.datetime objects to mark start and end point
    :param dir_output: path to folder to store ouput files  (default= curdir)
    :param output_format: format in which results will be returned.
    :return: output file path
    """
    LOGGER.info('Start ocgis module call function')
    from ocgis import OcgOperations, RequestDataset, env, DimensionMap, crs
    from ocgis.util.large_array import compute
    from datetime import datetime as dt
    from datetime import date as dd
    from datetime import time as dt_time
    import uuid

    # prepare the environment
    env.OVERWRITE = True

    if dir_output is None:
        dir_output = abspath(curdir)

    # check time_range format:

    if time_range is not None:
        try:
            LOGGER.debug('time_range type= %s , %s ' % (type(time_range[0]), type(time_range[1])))
            LOGGER.debug('time_range= %s , %s ' % (time_range[0], time_range[1]))
            # if type(time_range[0] is 'datetime.date'):
            if (isinstance(time_range[0], dd) and not isinstance(time_range[0], dt)):
                time_range = [dt.combine(time_range[0], dt.min.time()),
                              dt.combine(time_range[1], dt.min.time())]
                # time_range = [dt.combine(time_range[0], dt_time(12,0)),
                #               dt.combine(time_range[1], dt_time(12,0))]
            LOGGER.debug('time_range changed to type= %s , %s ' % (type(time_range[0]), type(time_range[1])))
            LOGGER.debug('time_range changed to= %s , %s ' % (time_range[0], time_range[1]))
        except Exception as ex:
            LOGGER.exception('failed to convert data to datetime {}'.format(ex))

    if spatial_wrapping == 'wrap':
        spatial_reorder = True
    else:
        spatial_reorder = False
    LOGGER.debug('spatial_reorder: %s and spatial_wrapping: %s ' % (spatial_reorder, spatial_wrapping))

    if prefix is None:
        prefix = str(uuid.uuid1())
        env.PREFIX = prefix
    #
    # if output_format_options is False:
    #     output_format_options = None
    # elif output_format_options is True:
    #     output_format_options = {'data_model': 'NETCDF4',  # NETCDF4_CLASSIC
    #                              'variable_kwargs': {'zlib': True, 'complevel': 9}}
    # else:
    if output_format_options is not None:
        LOGGER.info('output_format_options are set to %s ' % (output_format_options))

    if type(resource) != list:
        resource = list([resource])
    # execute ocgis
    LOGGER.info('Execute ocgis module call function')

    try:
        LOGGER.debug('call module dir_output = %s ' % abspath(dir_output))
        rd = RequestDataset(resource,
                            variable=variable,
                            level_range=level_range,
                            dimension_map=dimension_map,
                            conform_units_to=conform_units_to,
                            time_region=time_region,
                            t_calendar=t_calendar,
                            time_range=time_range)

        from ocgis.constants import DimensionMapKey
        rd.dimension_map.set_bounds(DimensionMapKey.TIME, None)

        ops = OcgOperations(dataset=rd,
                            output_format_options=output_format_options,
                            dir_output=dir_output,
                            spatial_wrapping=spatial_wrapping,
                            spatial_reorder=spatial_reorder,
                            # regrid_destination=rd_regrid,
                            # options=options,
                            calc=calc,
                            calc_grouping=calc_grouping,
                            geom=geom,
                            agg_selection=agg_selection,
                            output_format=output_format,
                            prefix=prefix,
                            search_radius_mult=search_radius_mult,
                            select_nearest=select_nearest,
                            select_ugid=select_ugid,
                            add_auxiliary_files=False)
        LOGGER.info('OcgOperations set')
    except Exception as ex:
        LOGGER.exception('failed to setup OcgOperations: {}'.format(ex))
        return None

    # TODO include comaprison dataload to available memory
    dataload = 1
    available_memory = 2

    try:
        if dataload < available_memory:  # compare dataload to free_memory
            LOGGER.info('ocgis module call as ops.execute()')
            geom_file = ops.execute()
        else:
            # LOGGER.info('ocgis module call as compute(ops)')
            # TODO: estimate right tile_dimensionS
            tile_dimension = 10  # default
            LOGGER.info('Not enough memory for data load, ocgis module call compute in chunks')
            geom_file = compute(ops, tile_dimension=tile_dimension, verbose=True)

    except Exception as ex:
        LOGGER.exception('failed to execute ocgis operation : {}'.format(ex))
        return None
    return geom_file
    def _handler(self, request, response):

        ocgis.env.DIR_OUTPUT = tempfile.mkdtemp(dir=os.getcwd())
        ocgis.env.OVERWRITE = True
        tic = dt.now()
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('Execution started at : {}'.format(tic), 1)

        ######################################
        # Read inputs
        ######################################
        try:
            candidate = archiveextract(
                resource=rename_complexinputs(request.inputs['candidate']))
            target = archiveextract(
                resource=rename_complexinputs(request.inputs['target']))
            location = request.inputs['location'][0].data
            indices = [el.data for el in request.inputs['indices']]
            dist = request.inputs['dist'][0].data
            dateStartCandidate = request.inputs['dateStartCandidate'][0].data
            dateEndCandidate = request.inputs['dateEndCandidate'][0].data
            dateStartTarget = request.inputs['dateStartTarget'][0].data
            dateEndTarget = request.inputs['dateEndTarget'][0].data

        except Exception as ex:
            msg = 'Failed to read input parameter {}'.format(ex)
            LOGGER.error(msg)
            raise Exception(msg)

        response.update_status('Input parameters ingested', 2)

        ######################################
        # Process inputs
        ######################################

        try:
            point = Point(*map(float, location.split(',')))
            dateStartCandidate = dt.strptime(dateStartCandidate, '%Y-%m-%d')
            dateEndCandidate = dt.strptime(dateEndCandidate, '%Y-%m-%d')
            dateStartTarget = dt.strptime(dateStartTarget, '%Y-%m-%d')
            dateEndTarget = dt.strptime(dateEndTarget, '%Y-%m-%d')

        except Exception as ex:
            msg = 'failed to process inputs {}'.format(ex)
            LOGGER.error(msg)
            raise Exception(msg)

        LOGGER.debug("init took {}".format(dt.now() - tic))
        response.update_status('Processed input parameters', 3)

        ######################################
        # Extract target time series
        ######################################
        savetarget = False
        try:
            # Using `call` creates a netCDF file in the tmp directory.
            #
            # Here we keep this stuff in memory
            if savetarget:
                prefix = 'target_ts'
                target_ts = call(resource=target,
                                 geom=point,
                                 variable=indices,
                                 time_range=[dateStartTarget, dateEndTarget],
                                 select_nearest=True,
                                 prefix=prefix)

                # target_ts = [get_values(prefix+'.nc', ind) for ind in indices]

            else:
                trd = RequestDataset(
                    target,
                    variable=indices,
                    time_range=[dateStartTarget, dateEndTarget])

                op = OcgOperations(trd,
                                   geom=point,
                                   select_nearest=True,
                                   search_radius_mult=1.75)
                out = op.execute()
                target_ts = out.get_element()

        except Exception as ex:
            msg = 'Target extraction failed {}'.format(ex)
            LOGGER.debug(msg)
            raise Exception(msg)

        response.update_status('Extracted target series', 5)

        ######################################
        # Compute dissimilarity metric
        ######################################

        response.update_status('Computing spatial analog', 6)
        try:
            output = call(
                resource=candidate,
                calc=[{
                    'func': 'dissimilarity',
                    'name': 'spatial_analog',
                    'kwds': {
                        'dist': dist,
                        'target': target_ts,
                        'candidate': indices
                    }
                }],
                time_range=[dateStartCandidate, dateEndCandidate],
            )

        except Exception as ex:
            msg = 'Spatial analog failed: {}'.format(ex)
            LOGGER.exception(msg)
            raise Exception(msg)

        add_metadata(output,
                     dist=dist,
                     indices=",".join(indices),
                     target_location=location,
                     candidate_time_range="{},{}".format(
                         dateStartCandidate, dateEndCandidate),
                     target_time_range="{},{}".format(dateStartTarget,
                                                      dateEndTarget))

        response.update_status('Computed spatial analog', 95)

        response.outputs['output_netcdf'].file = output

        response.update_status('Execution completed', 100)
        LOGGER.debug("Total execution took {}".format(dt.now() - tic))
        return response
Example #47
0
def get_segetalflora(
    resource=[], dir_output=".", culture_type="fallow", climate_type=2, region=None, dimension_map=None
):
    """productive worker for segetalflora jobs
  :param resources: list of tas netCDF files. (Any time aggregation is possible)
  :param culture_type: Type of culture. Possible values are:
                       'fallow', 'intensive', 'extensive' (default:'fallow')
  :param climate_type: Type of climate: number 1 to 7 or 'all' (default: 2)
  :param region: Region for subset. If 'None' (default), the values will be calculated for Europe
  """
    from flyingpigeon.subset import clipping
    from flyingpigeon.utils import calc_grouping, sort_by_filename
    import os
    from os import remove
    from tempfile import mkstemp
    from ocgis import RequestDataset, OcgOperations

    from cdo import Cdo

    cdo = Cdo()

    if not os.path.exists(dir_output):
        os.makedirs(dir_output)

    os.chdir(dir_output)
    # outputs = []

    if region == None:
        region = "Europe"

    if not type(culture_type) == list:
        culture_type = list([culture_type])
    if not type(climate_type) == list:
        climate_type = list([climate_type])

    ncs = sort_by_filename(resource)
    print "%s experiments found" % (len(ncs))
    print "keys: %s " % (ncs.keys())

    # generate outfolder structure:

    dir_netCDF = "netCDF"
    dir_ascii = "ascii"
    dir_netCDF_tas = dir_netCDF + "/tas"
    dir_ascii_tas = dir_ascii + "/tas"

    if not os.path.exists(dir_netCDF):
        os.makedirs(dir_netCDF)
    if not os.path.exists(dir_ascii):
        os.makedirs(dir_ascii)
    if not os.path.exists(dir_netCDF_tas):
        os.makedirs(dir_netCDF_tas)
    if not os.path.exists(dir_ascii_tas):
        os.makedirs(dir_ascii_tas)

    tas_files = []

    for key in ncs.keys():
        try:
            print "process %s" % (key)
            calc = [{"func": "mean", "name": "tas"}]
            calc_group = calc_grouping("yr")
            prefix = key.replace(key.split("_")[7], "yr")
            if not os.path.exists(os.path.join(dir_netCDF_tas, prefix + ".nc")):
                nc_tas = clipping(
                    resource=ncs[key],
                    variable="tas",
                    calc=calc,
                    dimension_map=dimension_map,
                    calc_grouping=calc_group,
                    prefix=prefix,
                    polygons="Europe",
                    dir_output=dir_netCDF_tas,
                )[0]
                print "clipping done for %s" % (key)
                if os.path.exists(os.path.join(dir_netCDF_tas, prefix + ".nc")):
                    tas_files.append(prefix)
                else:
                    print "clipping failed for %s: No output file exists" % (key)
            else:
                print "netCDF file already exists %s" % (key)
                nc_tas = os.path.join(dir_netCDF_tas, prefix + ".nc")
        except Exception as e:
            print "clipping failed for %s: %s" % (key, e)
        try:
            asc_tas = os.path.join(dir_ascii_tas, prefix + ".asc")
            if not os.path.exists(asc_tas):
                f, tmp = mkstemp(dir=os.curdir, suffix=".asc")
                tmp = tmp.replace(os.path.abspath(os.curdir), ".")

                # cdo.outputtab('name,date,lon,lat,value', input = nc_tas , output = tmp)
                cmd = "cdo outputtab,name,date,lon,lat,value %s > %s" % (nc_tas, tmp)
                print cmd
                os.system(cmd)
                print ("tanslation to ascii done")
                remove_rows(tmp, asc_tas)
                remove(tmp)
                print ("rows with missing values removed")
            else:
                print ("tas ascii already exists")
            plot_ascii(asc_tas)
        except Exception as e:
            print "translation to ascii failed %s: %s" % (key, e)
            if os.path.exists(tmp):
                remove(tmp)

    tas_files = [os.path.join(dir_netCDF_tas, nc) for nc in os.listdir(dir_netCDF_tas)]
    outputs = []

    for name in tas_files:
        for cult in culture_type:
            for climat in climate_type:
                try:
                    calc = get_equation(culture_type=cult, climate_type=climat)
                    if type(calc) != None:
                        try:
                            var = "sf%s%s" % (cult, climat)
                            prefix = os.path.basename(name).replace("tas", var).strip(".nc")

                            infile = name  # os.path.join(dir_netCDF_tas,name+'.nc')
                            dir_sf = os.path.join(dir_netCDF, var)
                            if not os.path.exists(dir_sf):
                                os.makedirs(dir_sf)
                            if os.path.exists(os.path.join(dir_sf, prefix + ".nc")):
                                nc_sf = os.path.join(dir_sf, prefix + ".nc")
                                print "netCDF file already exists: %s %s " % (dir_sf, prefix)
                            else:
                                rd = RequestDataset(name, variable="tas", dimension_map=dimension_map)
                                op = OcgOperations(
                                    dataset=rd,
                                    calc=calc,
                                    prefix=prefix,
                                    output_format="nc",
                                    dir_output=dir_sf,
                                    add_auxiliary_files=False,
                                )
                                nc_sf = op.execute()
                                print "segetalflora done for %s" % (prefix)
                                outputs.append(prefix)

                            dir_ascii_sf = os.path.join(dir_ascii, var)
                            if not os.path.exists(dir_ascii_sf):
                                os.makedirs(dir_ascii_sf)
                            asc_sf = os.path.join(dir_ascii_sf, prefix + ".asc")
                            if not os.path.exists(asc_sf):
                                f, tmp = mkstemp(dir=os.curdir, suffix=".asc")
                                tmp = tmp.replace(os.path.abspath(os.curdir), ".")
                                # cdo.outputtab('name,date,lon,lat,value', input = nc_sf , output = tmp)
                                cmd = "cdo outputtab,name,date,lon,lat,value %s > %s" % (nc_sf, tmp)
                                os.system(cmd)
                                print ("translation to ascii done")
                                remove_rows(tmp, asc_sf)
                                remove(tmp)
                                print ("rows with missing values removed")
                            else:
                                print "ascii file already exists"
                            plot_ascii(asc_sf)
                        except Exception as e:
                            print "failed for ascii file: %s %s " % (name, e)
                            if os.path.exists(tmp):
                                remove(tmp)
                    else:
                        print "NO EQUATION found for %s %s " % (cult, climat)
                except Exception as e:
                    print "Segetal flora failed: %s" % (e)
    return outputs
Example #48
0
        os.path.join(p, "{0}_{1}_{2}_se.nc".format(variable, simulation,
                                                   month)))

filenames = []
for p in filepaths:
    f = os.path.split(p)[-1].split('.')[0]
    filenames.append(f)

# Return all time slices
SNIPPET = True
# Data returns won't overwrite in this case.
env.OVERWRITE = False

# where to find the shapefiles
#ocgis.env.DIR_GEOMCABINET = os.path.join(os.getcwd(), os.path.split(ocgis.test.__file__)[0], 'bin')
ocgis.env.DIR_GEOMCABINET = os.path.join(os.getcwd(), "shapefiles")

rds = [
    RequestDataset(uri=uri, variable=variable, field_name=field_name)
    for uri, var, field_name in zip(filepaths, filenames, var_nc)
]
ops = OcgOperations(dataset=rds,
                    spatial_operation='clip',
                    aggregate=True,
                    snippet=SNIPPET,
                    geom='prov_la_p_geo83_f',
                    geom_select_uid=[1])
ret = ops.execute()

#assert len(ret.geoms) == 51
Example #49
0
def call(resource=[],
         variable=None,
         dimension_map=None,
         calc=None,
         calc_grouping=None,
         conform_units_to=None,
         memory_limit=None,
         prefix=None,
         regrid_destination=None,
         regrid_options='bil',
         level_range=None,
         geom=None,
         output_format_options=None,
         search_radius_mult=2.,
         select_nearest=False,
         select_ugid=None,
         spatial_wrapping=None,
         t_calendar=None,
         time_region=None,
         time_range=None,
         dir_output=None,
         output_format='nc'):
    '''
    ocgis operation call

    :param resource:
    :param variable: variable in the input file to be picked
    :param dimension_map: dimension map in case of unconventional storage of data
    :param calc: ocgis calc syntax for calculation partion
    :param calc_grouping: time aggregate grouping
    :param conform_units_to:
    :param memory_limit: limit the amount of data to be loaded into the memory at once \
        if None (default) free memory is detected by birdhouse
    :param level_range: subset of given levels
    :param prefix: string for the file base name
    :param regrid_destination: file path with netCDF file with grid for output file
    :param geom: name of shapefile stored in birdhouse shape cabinet
    :param output_format_options: output options for netCDF e.g compression level()
    :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file)
    :param regrid_options: methods for regridding:
                          'bil' = Bilinear interpolation
                          'bic' = Bicubic interpolation
                          'dis' = Distance-weighted average remapping
                          'nn' = nearest neighbour
                          'con' = First-order conservative remapping
                          'laf' = largest area fraction reamapping
    :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
    :param select_nearest: nearest neighbour selection for point geometries
    :param select_ugid: ugid for appropriate polygons
    :param spatial_wrapping: how to handle coordinates in case of subsets, options: None (default), 'wrap', 'unwrap'
    :param time_region: select single month
    :param time_range: sequence of two datetime.datetime objects to mark start and end point
    :param dir_output (default= curdir):
    :param output_format:
    :return: output file path
    '''
    LOGGER.info('Start ocgis module call function')
    from ocgis import OcgOperations, RequestDataset, env
    from ocgis.util.large_array import compute
    from datetime import datetime as dt
    import uuid

    # prepare the environment
    env.DIR_SHPCABINET = DIR_SHP
    env.OVERWRITE = True
    # env.DIR_OUTPUT = dir_output
    # LOGGER.debug(' **** env.DIR_OUTPUT  = %s ' % env.DIR_OUTPUT)

    if dir_output is None:
        dir_output = abspath(curdir)

    # check time_range fromat:

    if time_range is not None:
        try:
            LOGGER.debug('time_range type= %s , %s ' %
                         (type(time_range[0]), type(time_range[1])))
            if type(time_range[0] is 'datetime.date'):
                time_range = [
                    dt.combine(time_range[0], dt.min.time()),
                    dt.combine(time_range[1], dt.min.time())
                ]
            LOGGER.debug('time_range changed to type= %s , %s ' %
                         (type(time_range[0]), type(time_range[1])))
        except:
            LOGGER.exception('failed to confert data to datetime')

    #
    # if geom is not None:
    #     spatial_reorder = True
    #     spatial_wrapping = 'wrap'
    # else:
    #     spatial_reorder = False
    #     spatial_wrapping = None
    #

    if spatial_wrapping == 'wrap':
        spatial_reorder = True
    else:
        spatial_reorder = False
    LOGGER.debug('spatial_reorder: %s and spatial_wrapping: %s ' %
                 (spatial_reorder, spatial_wrapping))

    if prefix is None:
        prefix = str(uuid.uuid1())
        env.PREFIX = prefix
    #
    # if output_format_options is False:
    #     output_format_options = None
    # elif output_format_options is True:
    #     output_format_options = {'data_model': 'NETCDF4',  # NETCDF4_CLASSIC
    #                              'variable_kwargs': {'zlib': True, 'complevel': 9}}
    # else:
    if output_format_options is not None:
        LOGGER.info('output_format_options are set to %s ' %
                    (output_format_options))

    if type(resource) != list:
        resource = list([resource])
    # execute ocgis
    LOGGER.info('Execute ocgis module call function')

    # if has_Lambert_Conformal(resource) is True and geom is not None:
    #     LOGGER.debug('input has Lambert_Conformal projection and can not prcessed with ocgis:\
    #      https://github.com/NCPP/ocgis/issues/424')
    #     return None
    # else:
    try:
        LOGGER.debug('call module curdir = %s ' % abspath(curdir))
        rd = RequestDataset(resource,
                            variable=variable,
                            level_range=level_range,
                            dimension_map=dimension_map,
                            conform_units_to=conform_units_to,
                            time_region=time_region,
                            t_calendar=t_calendar,
                            time_range=time_range)

        # from ocgis.constants import DimensionMapKey
        # rd.dimension_map.set_bounds(DimensionMapKey.TIME, None)

        ops = OcgOperations(
            dataset=rd,
            output_format_options=output_format_options,
            dir_output=dir_output,
            spatial_wrapping=spatial_wrapping,
            spatial_reorder=spatial_reorder,
            # regrid_destination=rd_regrid,
            # options=options,
            calc=calc,
            calc_grouping=calc_grouping,
            geom=geom,
            output_format=output_format,
            prefix=prefix,
            search_radius_mult=search_radius_mult,
            select_nearest=select_nearest,
            select_ugid=select_ugid,
            add_auxiliary_files=False)
        LOGGER.info('OcgOperations set')
    except:
        LOGGER.exception('failed to setup OcgOperations')
        return None

    try:
        LOGGER.info('ocgis module call as ops.execute()')
        geom_file = ops.execute()
    except:
        LOGGER.exception('failed to execute ocgis operation')
        return None
    #
    # try:
    #     from numpy import sqrt
    #     from flyingpigeon.utils import FreeMemory
    #
    #     if memory_limit is None:
    #         f = FreeMemory()
    #         mem_kb = f.user_free
    #         mem_mb = mem_kb / 1024.
    #         mem_limit = mem_mb / 2.  # set limit to half of the free memory
    #     else:
    #         mem_limit = memory_limit
    #
    #     if mem_limit >= 1024. * 4:
    #         mem_limit = 1024. * 4
    #         # 475.0 MB for openDAP
    #
    #     LOGGER.info('memory_limit = %s Mb' % (mem_limit))
    #
    #     data_kb = ops.get_base_request_size()['total']
    #     data_mb = data_kb / 1024.
    #
    #     # data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
    #     LOGGER.info('data_mb  = %s Mb' % (data_mb))
    #
    #     if data_mb <= mem_limit:  # input is smaler than the half of free memory size
    #         try:
    #             LOGGER.info('ocgis module call as ops.execute()')
    #             geom_file = ops.execute()
    #         except Exception as e:
    #             LOGGER.debug('failed to execute ocgis operation')
    #             raise
    #             return None
    #
    #     else:
    #         ##########################
    #         # calcultion of chunk size
    #         ##########################
    #         try:
    #             size = ops.get_base_request_size()
    #             nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0]
    #             element_in_kb = size['total']/reduce(lambda x, y: x*y, size['variables'][variable]['value']['shape'])
    #             element_in_mb = element_in_kb / 1024.
    #             tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd))  # maximum chunk size
    #
    #             LOGGER.info('ocgis module call compute with chunks')
    #             if calc is None:
    #                 calc = '%s=%s*1' % (variable, variable)
    #                 LOGGER.info('calc set to = %s ' % calc)
    #             ops = OcgOperations(dataset=rd,
    #                                 output_format_options=output_format_options,
    #                                 dir_output=dir_output,
    #                                 spatial_wrapping=spatial_wrapping,
    #                                 spatial_reorder=spatial_reorder,
    #                                 # regrid_destination=rd_regrid,
    #                                 # options=options,
    #                                 calc=calc,
    #                                 calc_grouping=calc_grouping,
    #                                 geom=geom,
    #                                 output_format=output_format,
    #                                 prefix=prefix,
    #                                 search_radius_mult=search_radius_mult,
    #                                 select_nearest=select_nearest,
    #                                 select_ugid=select_ugid,
    #                                 add_auxiliary_files=False)
    #             geom_file = compute(ops, tile_dimension=int(tile_dim), verbose=True)
    #             print 'ocgis calculated'
    #         except Exception as e:
    #             LOGGER.debug('failed to compute ocgis with chunks')
    #             raise
    #             return None
    #     LOGGER.info('Succeeded with ocgis module call function')
    # except:
    #     LOGGER.exception('failed to compare dataload with free memory, calling as execute instead')

    ############################################
    # remapping according to regrid informations
    ############################################
    if regrid_destination is not None:
        try:
            from tempfile import mkstemp
            from cdo import Cdo
            cdo = Cdo()
            output = '%s.nc' % uuid.uuid1()
            remap = 'remap%s' % regrid_options
            call = [op for op in dir(cdo) if remap in op]
            cmd = "output = cdo.%s('%s',input='%s', output='%s')" \
                  % (str(call[0]), regrid_destination, geom_file, output)
            exec cmd
        except Exception as e:
            LOGGER.debug('failed to remap')
            raise
            return None
    else:
        output = geom_file

    # try:
    #     from flyingpigeon.utils import unrotate_pole
    #     lat, lon = unrotate_pole(output)
    # except:
    #     LOGGER.exception('failed to unrotate pole')
    return output
Example #50
0
def call(resource=[], variable=None, dimension_map=None, calc=None,  
  calc_grouping= None, conform_units_to=None, memory_limit=None,  prefix=None, 
  geom=None, output_format_options=False, search_radius_mult=2., select_nearest=False, select_ugid=None, time_region=None, time_range=None,
  dir_output=None, output_format='nc'):
  '''
  ocgis operation call

  :param resource:
  :param variable: variable in the input file to be picked
  :param dimension_map: dimension map in case of unconventional starage of data 
  :param calc: ocgis calc syntax for calcultion opartion 
  :param calc_grouping: time aggregate grouping 
  :param conform_units_to: 
  :param memory_limit: limit the amout of data to be loaded into the memory at once if None(default) free memory is detected by birdhouse
  :param prefix:
  :param geom: name of shapefile stored in birdhouse shape cabinet
  :param output_format_options: output options for netCDF e.g compression level()
  :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
  :param select_nearest: neares neighbour selection for point geometries
  :param select_ugid: ugid for appropriate poligons 
  :param time_region:
  :param time_range: sequence of two datetime.datetime objects to mark start and end point 
  :param dir_output:
  :param output_format:
  :return: output file path
  '''
  print 'start ocgis module'
  logger.info('Start ocgis module call function')
  from ocgis import OcgOperations, RequestDataset , env
  from ocgis.util.large_array import compute
  
  # prepare the environment 
  env.DIR_SHPCABINET = DIR_SHP
  env.OVERWRITE = True
  env.DIR_OUTPUT = dir_output
  env.PREFIX = prefix

  if output_format_options == False: 
    output_format_options = None
  elif output_format_options == True:
    output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC
                         'variable_kwargs': {'zlib': True, 'complevel': 9}}
  else:
    logger.info('output_format_options are set to %s ' % ( output_format_options ))
  
  if type(resource) != list: 
    resource = list([resource])

  # execute ocgis 
  logger.info('Execute ocgis module call function')
  
  try: 
    rd = RequestDataset(resource, variable=variable, 
      dimension_map=dimension_map, conform_units_to=conform_units_to, 
      time_region=time_region)
    
    ops = OcgOperations(dataset=rd,
        output_format_options=output_format_options,
        #options=options,
        calc=calc, 
        
        calc_grouping=calc_grouping,
        geom=geom,
        output_format=output_format, 
        search_radius_mult=search_radius_mult,
        select_nearest=select_nearest,
        select_ugid=select_ugid, 
        
        add_auxiliary_files=False)
    logger.info('OcgOperations set')
  except Exception as e: 
    logger.debug('failed to setup OcgOperations')
    raise  
  
  # check memory load
  from numpy import sqrt 
  from flyingpigeon.utils import FreeMemory
  
  if memory_limit == None: 
    f = FreeMemory()
    mem_kb = f.user_free 
    mem_mb = mem_kb / 1024.
    mem_limit = mem_mb / 2. # set limit to half of the free memory
  else:
    mem_limit = memory_limit

  if mem_limit >= 1024. * 4: 
    mem_limit = 1024. * 4
    # 475.0 MB for openDAP 
  
  data_kb = ops.get_base_request_size()['total']
  data_mb = data_kb / 1024.

  if variable == None: 
    variable = rd.variable
    logger.info('%s as variable dedected' % (variable))

  #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
  logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb  , mem_limit ))
  if data_mb <= mem_limit :  # input is smaler than the half of free memory size
    logger.info('ocgis module call as ops.execute()')
    try: 
      geom_file = ops.execute()
    except Exception as e: 
      logger.debug('failed to execute ocgis operation')
      raise  
  else:
    size = ops.get_base_request_size()
    nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0]
    element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
    element_in_mb = element_in_kb / 1024.

    tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size 
    # calcultion of chunk size
    try:
      logger.info('tile_dim = %s; calc = %s ' % (tile_dim, calc))
      if calc == None:
        calc = '%s=%s*1' % (variable, variable)
        logger.info('calc set to = %s ' %  calc)
        ops = OcgOperations(dataset=rd,
          output_format_options=output_format_options,
          calc=calc, 
          output_format=output_format, # 'nc' is necessary for chunked execution  
          select_ugid=select_ugid, 
          geom=geom,
          add_auxiliary_files=False)
      geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True)
    except Exception as e: 
      logger.debug('failed to compute ocgis operation')
      raise  
  
  logger.info('Succeeded with ocgis module call function')
  return geom_file
Example #51
0
env.DIR_SHPCABINET = '/Users/ryan.okuinghttons/netCDFfiles/shapefiles/ocgis_data/shp'

## RequestDatasetCollection ####################################################

rdc = RequestDatasetCollection([RequestDataset(os.path.join(DATA_DIR,NCS),'tas')])

## Return In-Memory ############################################################

## Data is returned as a dictionary with 51 keys (don't forget Puerto Rico...).
## A key in the returned dictionary corresponds to a geometry "ugid" with the
## value of type OcgCollection.
print('returning numpy...')
ops = OcgOperations(dataset=rdc,spatial_operation='clip',aggregate=True,
                    snippet=SNIPPET,geom='state_boundaries')
path = ops.execute()

## Write to Shapefile ##########################################################

print('returning shapefile...')
ops = OcgOperations(dataset=rdc,spatial_operation='clip',aggregate=True,
                    snippet=SNIPPET,geom='state_boundaries',output_format='shp')
path = ops.execute()

## Write All Data to Keyed Format ##############################################

## Without the snippet, we are writing all data to the linked CSV-Shapefile
## output format. The operation will take considerably longer.
print('returning csv+...')
ops = OcgOperations(dataset=rdc,spatial_operation='clip',aggregate=True,
                    snippet=False,geom='state_boundaries',output_format='csv+')
Example #52
0
# calc = '%s=%s*1' % (variable, variable)

rd = RequestDataset(ncs)

ops = OcgOperations(rd,
                    # time_range=time_range,
                    calc = '%s=%s*1' % ('tas', 'tas'),
                    # level_range=level_range,
                    geom=bbox,
                    output_format='nc',
                    prefix='ocgis_module_optimisation',
                    dir_output='/home/nils/data/',
                    add_auxiliary_files=False)

shnip = dt.now()
geom = ops.execute()
shnap = dt.now()
duration = (shnap - shnip).total_seconds()
print("operation performed with execute in {} sec.".format(duration))
print(geom)

tile_dimension=5  # default

shnip = dt.now()
geom = compute(ops, tile_dimension=tile_dimension, verbose=True)
shnap = dt.now()
duration = (shnap - shnip).total_seconds()

print("operation performed with compute in {} sec.".format(duration))
print(geom)
Example #53
0
## Colorado in WGS84 latitude/longitude coordinates.
BBOX = [-109.1, 36.9, -102.0, 41.0]


## Construct RequestDataset Object #############################################

## This object will be reused so just build it once.
rd = RequestDataset(URI_TAS,VAR_TAS)

## Returning NumPy Data Objects ################################################

## The NumPy data type return is the default. Only the geometry and
## RequestDataset are required (except "snippet" of course...). See the
## documentation for the OcgCollection object to understand the return 
## structure.
ret = OcgOperations(dataset=rd,geom=BBOX,snippet=SNIPPET).execute()

## Returning Converted Files ###################################################

## At this time, the software will create named temporary directories inside
## env.DIR_OUTPUT. This is to avoid the confusing process of managine overwrites
## etc. The support for managing output files will be improved in future 
## releases. The returned value is the absolute path to the file or folder
## depending on the requested format.
output_formats = ['shp','csv','keyed']
for output_format in output_formats:
    prefix = output_format
    ops = OcgOperations(dataset=rd,geom=BBOX,snippet=SNIPPET,
                        output_format=output_format,prefix=prefix)
    ret = ops.execute()
Example #54
0
# Data returns will overwrite in this case. Use with caution!!
env.OVERWRITE = True


# RequestDatasetCollection #############################################################################################

rdc = RequestDatasetCollection([RequestDataset(
    os.path.join(DATA_DIR, uri), var) for uri, var in NCS.iteritems()])

# Return In-Memory #####################################################################################################

# Data is returned as a dictionary-like object (SpatialCollection) with 51 keys (don't forget Puerto Rico...). A key in 
# the returned dictionary corresponds to a geometry "ugid" with the value of type OcgCollection.
print('returning numpy...')
ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries')
ret = ops.execute()

# Return a SpatialCollection, but only for a target state in a U.S. state boundaries shapefile. In this case, the UGID 
# attribute value of 23 is associated with Nebraska.

print('returning numpy for a state...')
ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries',
                    geom_select_uid=[23])
ret = ops.execute()

# Write to Shapefile ###################################################################################################

print('returning shapefile...')
ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries',
                    output_format='shp')
path = ops.execute()
Example #55
0
# Subset the input dataset to return the desired base period for the percentile basis.
variable = 'tas'
years = range(1971, 2001)
time_region = {'year': years}
rd = RequestDataset(uri=in_file, variable=variable)
field = rd.get()
field.get_time_region(time_region)

# Calculate the percentile basis. The data values must be a three-dimensional array.
arr = field.variables[variable].value.squeeze()
dt_arr = field.temporal.value_datetime
percentile = 90
window_width = 5
percentile_dict = IcclimTG90p.get_percentile_dict(arr, dt_arr, percentile,
                                                  window_width)

########################################################################################################################
# Calculate indice using custom percentile basis.

calc = [{
    'func': 'icclim_TG90p',
    'name': 'TG90p',
    'kwds': {
        'percentile_dict': percentile_dict
    }
}]
calc_grouping = 'month'
ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping)
coll = ops.execute()
Example #56
0
rdc = RequestDatasetCollection([
    RequestDataset(os.path.join(DATA_DIR, uri), var)
    for uri, var in NCS.iteritems()
])

## Return In-Memory ############################################################

## Data is returned as a dictionary with 51 keys (don't forget Puerto Rico...).
## A key in the returned dictionary corresponds to a geometry "ugid" with the
## value of type OcgCollection.
ops = OcgOperations(dataset=rdc,
                    spatial_operation='clip',
                    aggregate=True,
                    snippet=SNIPPET,
                    geom='state_boundaries')
ret = ops.execute()

## Write to Shapefile ##########################################################

ops = OcgOperations(dataset=rdc,
                    spatial_operation='clip',
                    aggregate=True,
                    snippet=SNIPPET,
                    geom='state_boundaries',
                    output_format='shp')
path = ops.execute()

## Write All Data to Keyed Format ##############################################

## Without the snippet, we are writing all data to the linked CSV files. The
## operation will take considerably longer.
Example #57
0
def call(resource=[], variable=None, dimension_map=None, calc=None,  
  calc_grouping= None, conform_units_to=None, memory_limit=None,  prefix=None, 
  regrid_destination=None, regrid_options='bil', level_range=None,
  geom=None, output_format_options=False, search_radius_mult=2., 
  select_nearest=False, select_ugid=None, spatial_wrapping=None, t_calendar=None, time_region=None, time_range=None,
  dir_output=curdir, output_format='nc'):
  '''
  ocgis operation call

  :param resource:
  :param variable: variable in the input file to be picked
  :param dimension_map: dimension map in case of unconventional storage of data 
  :param calc: ocgis calc syntax for calculation partion 
  :param calc_grouping: time aggregate grouping 
  :param conform_units_to: 
  :param memory_limit: limit the amount of data to be loaded into the memory at once if None (default) free memory is detected by birdhouse
  :param level_range: subset of given levels
  :param prefix: string for the file base name 
  :param regrid_destination: file path with netCDF file with grid for output file

  :param geom: name of shapefile stored in birdhouse shape cabinet
  :param output_format_options: output options for netCDF e.g compression level()
  :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file)
  :param regrid_options: methods for regridding: 
                          'bil' = Bilinear interpolation 
                          'bic' = Bicubic interpolation 
                          'dis' = Distance-weighted average remapping
                          'nn' = nearest neighbour
                          'con' = First-order conservative remapping
                          'laf' = largest area fraction reamapping
  :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned
  :param select_nearest: nearest neighbour selection for point geometries
  :param select_ugid: ugid for appropriate polygons 
  :param spatial_wrapping: how to handle coordinates in case of subsets, options: None (default), 'wrap', 'unwrap'
  :param time_region: select single month 
  :param time_range: sequence of two datetime.datetime objects to mark start and end point 
  :param dir_output (default= curdir):
  :param output_format:
  :return: output file path
  '''
  logger.info('Start ocgis module call function')
  from ocgis import OcgOperations, RequestDataset , env
  from ocgis.util.large_array import compute
  import uuid
  
  # prepare the environment 
  env.DIR_SHPCABINET = DIR_SHP
  env.OVERWRITE = True
  env.DIR_OUTPUT = dir_output
  
  if geom != None:
    spatial_reorder = True
    spatial_wrapping = 'wrap'
  else: 
    spatial_reorder = False
    spatial_wrapping = None
  
  
  if prefix == None:
    prefix = str(uuid.uuid1()) 
  env.PREFIX = prefix
  if output_format_options == False: 
    output_format_options = None
  elif output_format_options == True:
    output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC
                         'variable_kwargs': {'zlib': True, 'complevel': 9}}
  else:
    logger.info('output_format_options are set to %s ' % ( output_format_options ))
  
  if type(resource) != list: 
    resource = list([resource])
  # execute ocgis 
  logger.info('Execute ocgis module call function')
  
  #if time_range != None:
    #time_range = eval_timerange(resource, time_range)

  if has_Lambert_Conformal(resource) == True and not geom == None:
    logger.debug('input has Lambert_Conformal projection and can not subsetted with geom')
    output = None
  else:
    try:
      #if regrid_destination != None: 
        #rd_regrid = RequestDataset(uri=regrid_destination)
      #else:
        #rd_regrid = None
      rd = RequestDataset(resource, variable=variable, level_range=level_range,
        dimension_map=dimension_map, conform_units_to=conform_units_to, 
        time_region=time_region,t_calendar=t_calendar, time_range=time_range)
      ops = OcgOperations(dataset=rd,
        output_format_options=output_format_options,
        spatial_wrapping=spatial_wrapping,
        spatial_reorder=spatial_reorder,
        # regrid_destination=rd_regrid,
        # options=options,
        calc=calc,
        calc_grouping=calc_grouping,
        geom=geom,
        output_format=output_format,
        prefix=prefix,
        search_radius_mult=search_radius_mult,
        select_nearest=select_nearest,
        select_ugid=select_ugid, 
        add_auxiliary_files=False)
      logger.info('OcgOperations set')
      
    except Exception as e: 
      logger.debug('failed to setup OcgOperations')
      raise  
    try:
      from numpy import sqrt 
      from flyingpigeon.utils import FreeMemory
      
      if memory_limit == None: 
        f = FreeMemory()
        mem_kb = f.user_free 
        mem_mb = mem_kb / 1024.
        mem_limit = mem_mb / 2. # set limit to half of the free memory
      else:
        mem_limit = memory_limit

      if mem_limit >= 1024. * 4: 
        mem_limit = 1024. * 4
        # 475.0 MB for openDAP 
      
      data_kb = ops.get_base_request_size()['total']
      data_mb = data_kb / 1024.

      if variable == None: 
        variable = rd.variable
        logger.info('%s as variable dedected' % (variable))

      #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
      logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb , mem_limit ))
    except Exception as e: 
      logger.debug('failed to compare dataload with free memory %s ' % e)
      raise  


    ## check memory load
    #from os import stat
    
      #if memory_limit == None: 
        #f = FreeMemory()
        #mem_kb = f.user_free 
        #mem_mb = mem_kb / 1024.
        #mem_limit = mem_mb / 2. # set limit to half of the free memory
      #else:
        #mem_limit = memory_limit

      #if mem_limit >= 1024. * 4: 
        #mem_limit = 1024. * 4
        ## 475.0 MB for openDAP 
      
      ##if type(resource) == list : 
        ##data_kb =  stat(resource[0]).st_size * len(resource)
      ##else: 
        ##data_kb =  stat(resource).st_size
      #size = ops.get_base_request_size()['total']
      #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
      #data_mb = data_kb / 1024.

      #if variable == None: 
        #variable = rd.variable
        #logger.info('%s as variable dedected' % (variable))
      
      #logger.info('data_mb  = %s ; memory_limit = %s ' % (data_mb  , mem_limit ))
    
    if data_mb <= mem_limit :  # input is smaler than the half of free memory size
      try:
        logger.info('ocgis module call as ops.execute()')
        geom_file = ops.execute()
      except Exception as e: 
        logger.debug('failed to execute ocgis operation')
        raise  
    else:
      ##########################
      # calcultion of chunk size
      ##########################

      try:
        size = ops.get_base_request_size()
        nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0]
        element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape'])
        element_in_mb = element_in_kb / 1024.
        tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size 
        
        logger.info('ocgis module call compute with chunks')
        print 'ocgis module call compute with chunks'
        if calc == None:
          calc = '%s=%s*1' % (variable, variable)
          logger.info('calc set to = %s ' %  calc)
          ops = OcgOperations(dataset=rd,
                    output_format_options=output_format_options,
                    spatial_wrapping=spatial_wrapping,
                    spatial_reorder=spatial_reorder,
                    # regrid_destination=rd_regrid,
                    # options=options,
                    calc=calc,
                    calc_grouping=calc_grouping,
                    geom=geom,
                    output_format=output_format,
                    prefix=prefix,
                    search_radius_mult=search_radius_mult,
                    select_nearest=select_nearest,
                    select_ugid=select_ugid, 
                    add_auxiliary_files=False)          
        geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True)
      except Exception as e: 
        logger.debug('failed to compute ocgis with chunks')
        raise
    logger.info('Succeeded with ocgis module call function')

    ############################################
    # remapping according to regrid informations
    ############################################
    if not regrid_destination == None:
      try:
        from tempfile import mkstemp
        from cdo import Cdo
        cdo = Cdo()

        output = '%s.nc' % uuid.uuid1()
        remap = 'remap%s' % regrid_options  
        call = [op for op in dir(cdo) if remap in op]
        cmd = "output = cdo.%s('%s',input='%s', output='%s')" % (str(call[0]), regrid_destination, geom_file, output) 
        exec cmd
      except Exception as e: 
        logger.debug('failed to remap')
        raise 
    else:
      output = geom_file
  return output