def create_variable(self, mapped_var, ncd_params=None, **kwargs):
        '''
        Takes a MappedVariable object, propogates to NetCDF
        Returns the NetCDF variable
        '''

        if ncd_params is None:
            from awrams.utils.io.netcdf_wrapper import NCVariableParameters
            ncd_params = NCVariableParameters()
        ncd_params.update(**kwargs)
        var_dims = [(dim.name) for dim in mapped_var.coordinates.dimensions]

        for dim in var_dims:
            if dim not in list(self.ncd_group.dimensions.keys()):
                self.create_coordinates(mapped_var.coordinates[dim])

        target_var = self.ncd_group.createVariable(mapped_var.variable.name,
                                                   datatype=mapped_var.dtype,
                                                   dimensions=var_dims,
                                                   **ncd_params)

        v_attrs = mapped_var.variable._dump_attrs()

        for k, v in v_attrs.items():
            set_h5nc_attr(target_var, k, v)

        #target_var.setncatts(mapped_var.variable._dump_attrs())
        self._update_dicts()
        return target_var
def flattened_cell_file(variable,
                        period,
                        extent,
                        dtype=np.float64,
                        filename=None,
                        chunksize=None,
                        leave_open=True):

    import netCDF4 as ncd

    cell_dim = Dimension('cell', Units('cell_idx'), np.int32)
    cell_c = Coordinates(cell_dim, list(range(extent.cell_count)))
    tc = period_to_tc(period)

    cs = CoordinateSet([cell_c, tc])

    m_var = MappedVariable(variable, cs, dtype)

    if filename is None:
        filename = variable.name + '.nc'

    ds = ncd.Dataset(filename, 'w')
    dsm = DatasetManager(ds)

    if chunksize is None:
        chunksize = (1, len(period))

    chunksizes = cs.validate_chunksizes(chunksize)

    from awrams.utils.io.netcdf_wrapper import NCVariableParameters
    ncp = NCVariableParameters(chunksizes=chunksizes)

    dsm.create_variable(m_var, ncp)

    lats, lons = extent._flatten_fields()

    lat_v = MappedVariable(Variable('lats', deg_north), cell_c, np.float64)
    lon_v = MappedVariable(Variable('lons', deg_east), cell_c, np.float64)

    dsm.create_variable(lat_v)[:] = lats
    dsm.create_variable(lon_v)[:] = lons

    if not leave_open:
        dsm.close()
    else:
        return dsm
    def create_files(self,
                     schema,
                     leave_open=True,
                     clobber=False,
                     chunksize=None,
                     file_creator=None,
                     file_appender=None,
                     create_dirs=True,
                     ncparams=None,
                     **kwargs):
        '''
        kwargs are propagated to NCVariableParameters
        '''

        if ncparams is None:
            ncparams = {}

        if file_creator is None:

            def create_new_nc(fn):
                import netCDF4 as ncd
                #import awrams.utils.io.h5netcdf.legacyapi as ncd
                try:
                    return ncd.Dataset(fn, 'w')
                except RuntimeError:
                    from awrams.utils.io.general import h5py_cleanup_nc_mess
                    h5py_cleanup_nc_mess(fn)
                    return ncd.Dataset(fn, 'w')
                #return db_opener(fn,'w')

            file_creator = create_new_nc

        if file_appender is None:

            def append_nc(fn):
                # return ncd.Dataset(fn,'a')
                try:
                    #return db_opener(fn,'a')
                    return open_append(db_opener, fn, 'a')
                except:
                    logger.critical("EXCEPTION: %s", fn)
                    raise

            file_appender = append_nc

        if create_dirs:
            os.makedirs(self.path, exist_ok=True)

        period = self.mapped_var.coordinates.time.index
        split_periods = schema.split_periods(period)

        self.var_name = self.mapped_var.variable.name

        # Examine first existing file to see if we need to extend the coordinates
        if clobber == False:
            p = split_periods[0]
            fn = os.path.join(
                self.path,
                schema.gen_file_name(self.mapped_var.variable, p) + '.nc')

            if os.path.exists(fn):
                ds = file_appender(fn)
                dsm = DatasetManager(ds)
                #                logger.info("filename %s",fn)
                existing_coords = dsm.get_coords()

                if 'time' in existing_coords:
                    #+++
                    # Could definitely generalise this to autoexpand in
                    # other dimensions, hardcoding for time being the 'normal' case...

                    #seg_time = seg.coordinates.time

                    existing_time = dsm.get_coord('time').index
                    extension_time = p

                    new_seg_tc = period_to_tc(
                        existing_time.union(extension_time))

                    global_extension = self.mapped_var.coordinates.time.index

                    new_global_tc = period_to_tc(
                        existing_time.union(global_extension))

                    dsm.set_time_coords(new_seg_tc)

                    self.mapped_var.coordinates.update_coord(new_global_tc)

                    period = self.mapped_var.coordinates.time.index
                    split_periods = schema.split_periods(period)

                ds.close()
                #self.splitter.set_coordinates(self.mapped_var.coordinates)

        for p in split_periods:
            fn = os.path.join(
                self.path,
                schema.gen_file_name(self.mapped_var.variable, p) + '.nc')

            tc = period_to_tc(p)

            new_file = True

            if os.path.exists(fn):
                if not clobber:
                    ds = file_appender(fn)
                    dsm = DatasetManager(ds)

                    if len(p) > len(ds.variables['time']):
                        dsm.set_time_coords(tc, resize=True)

                    new_file = False
                else:
                    os.remove(fn)
            '''
            Separate into function ('createCoordinates'?)
            Possibly removing any (direct) reference to netCDF
            '''
            if new_file:
                ds = file_creator(fn)  # ncd.Dataset(fn,'w')
                dsm = DatasetManager(ds)

                cur_cs = CoordinateSet(
                    (tc, self.mapped_var.coordinates.latitude,
                     self.mapped_var.coordinates.longitude))

                for coord in cur_cs:
                    dsm.create_coordinates(coord)

                from awrams.utils.io.netcdf_wrapper import NCVariableParameters

                if chunksize is None:
                    chunksize = DEFAULT_CHUNKSIZE  #pylint: disable=no-name-in-module

                chunksizes = cur_cs.validate_chunksizes(chunksize)

                ncd_params = NCVariableParameters(chunksizes=chunksizes,
                                                  **kwargs)
                ncd_params.update(**ncparams)

                target_var = dsm.create_variable(self.mapped_var, ncd_params)
                dsm.awra_var = target_var

                set_h5nc_attr(dsm.ncd_group, 'var_name',
                              self.mapped_var.variable.name)
                #dsm.ncd_group.setncattr('var_name',self.mapped_var.variable.name)

                dsm.set_time_coords(cur_cs.time, resize=True)

            ds.close()

        if leave_open:
            self.mode = 'a'
            all_files = [
                schema.gen_file_name(self.mapped_var.variable, p) + '.nc'
                for p in schema.split_periods(period)
            ]
            self.map_files(self.path,
                           '*',
                           self.mapped_var.variable.name,
                           ff=lambda f: os.path.split(f)[1] in all_files)
Exemple #4
0
    def create_files(self,
                     leave_open=True,
                     clobber=True,
                     chunksize=None,
                     file_creator=None,
                     file_appender=None,
                     create_dirs=True,
                     **kwargs):
        '''
        kwargs are propagated to NCVariableParameters
        '''

        if file_creator is None:

            def create_new_nc(fn):
                import netCDF4 as ncd
                try:
                    return ncd.Dataset(fn, 'w')
                except RuntimeError:
                    from awrams.utils.io.general import h5py_cleanup_nc_mess
                    h5py_cleanup_nc_mess(fn)
                    return ncd.Dataset(fn, 'w')
                #return db_opener(fn,'w')

            file_creator = create_new_nc

        if file_appender is None:

            def append_nc(fn):
                # return ncd.Dataset(fn,'a')
                try:
                    #return db_opener(fn,'a')
                    return open_append(db_opener, fn, 'a')
                except:
                    logger.critical("EXCEPTION: %s", fn)
                    raise

            file_appender = append_nc

        if create_dirs:
            os.makedirs(self.path, exist_ok=True)

        # Examine first existing file to see if we need to extend the coordinates
        if clobber == False:
            seg = self.splitter.segments[0]
            fn = os.path.join(self.path, self.gen_file_name(seg))

            if os.path.exists(fn):
                ds = file_appender(fn)
                dsm = DatasetManager(ds)
                #                logger.info("filename %s",fn)
                existing_coords = dsm.get_coords()

                if 'time' in existing_coords:
                    #+++
                    # Could definitely generalise this to autoexpand in
                    # other dimensions, hardcoding for time being the 'normal' case...

                    seg_time = seg.coordinates.time

                    existing_time = dsm.get_coord('time').index
                    extension_time = seg_time.index

                    new_seg_tc = period_to_tc(
                        existing_time.union(extension_time))

                    global_extension = self.mapped_var.coordinates.time.index

                    new_global_tc = period_to_tc(
                        existing_time.union(global_extension))

                    dsm.set_time_coords(new_seg_tc)

                    self.mapped_var.coordinates.update_coord(new_global_tc)

                ds.close()
                self.splitter.set_coordinates(self.mapped_var.coordinates)

        for seg in self.splitter.segments:
            fn = os.path.join(self.path, self.gen_file_name(seg))
            self.file_map[seg] = fn

            new_file = True

            if os.path.exists(fn) and not clobber:
                ds = file_appender(fn)
                dsm = DatasetManager(ds)

                if 'time' in seg.coordinates:
                    if len(seg.coordinates.time) > len(ds.variables['time']):
                        dsm.set_time_coords(seg.coordinates.time, resize=True)

                new_file = False
            else:
                # if writing provenance, create / copy template
                ds = file_creator(fn)  # ncd.Dataset(fn,'w')

            dsm = DatasetManager(ds)
            '''
            Separate into function ('createCoordinates'?)
            Possibly removing any (direct) reference to netCDF
            '''
            if new_file:
                for coord in seg.coordinates:
                    dsm.create_coordinates(coord)

                from awrams.utils.io.netcdf_wrapper import NCVariableParameters

                if chunksize is None:
                    from awrams.utils.settings import CHUNKSIZE as chunksize  #pylint: disable=no-name-in-module

                chunksizes = seg.coordinates.validate_chunksizes(chunksize)
                ncd_params = NCVariableParameters(chunksizes=chunksizes,
                                                  **kwargs)

                target_var = dsm.create_variable(self.mapped_var, ncd_params)

                dsm.ncd_group.setncattr('var_name',
                                        self.mapped_var.variable.name)

                dsm.set_time_coords(seg.coordinates.time, resize=True)

            ds.close()

        if leave_open:
            self.open_all('a')

        self.time_file_map = {}
        for seg in self.splitter.segments:
            fn = self.file_map.get(seg)
            if fn is not None:
                self.time_file_map[fn] = seg.coordinates.time