def test_normalization_of_string_arrays_netcdf3(self):
        thestr = 'boodsfasfasdfm'

        with nc4.Dataset(self.fp, 'w', format="NETCDF3_CLASSIC") as ncd:

            dimsize = len(thestr)
            ncd.createDimension('n', dimsize)

            # Single str (no dimension)
            ncd.createVariable('single_S', 'S1', ('n',))

            for k, v in ncd.variables.items():
                if k.startswith('single_'):
                    v[:] = nc4.stringtoarr(thestr, dimsize)

            # Array of strq
            ncd.createVariable('many_S', 'S1', ('n', 'n',))

            for k, v in ncd.variables.items():
                if k.startswith('many_'):
                    v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize), dimsize).reshape(v.shape)

        with nc4.Dataset(self.fp) as ncd:
            assert normalize_array(ncd.variables['single_S']) == thestr
            assert np.all(normalize_array(ncd.variables['many_S']) == [thestr] * dimsize)
Exemple #2
0
    def test_normalization_of_string_arrays_netcdf4(self):
        thestr = 'bosadfsdfkljskfusdiofu987987987om'

        with nc4.Dataset(self.fp, 'w', format="NETCDF4") as ncd:

            dimsize = len(thestr)
            ncd.createDimension('n', dimsize)

            # Single str (no dimension)
            ncd.createVariable('single_str', str)
            ncd.createVariable('single_unicode_', np.unicode_)
            ncd.createVariable('single_U', '<U1')
            ncd.createVariable('single_S', 'S1', ('n', ))

            for k, v in ncd.variables.items():
                if k.startswith('single_'):
                    if v.dimensions:
                        v[:] = nc4.stringtoarr(thestr, dimsize)
                    else:
                        v[0] = thestr

            # Array of str
            ncd.createVariable('many_str', str, ('n', ))
            ncd.createVariable('many_unicode_', np.unicode_, ('n', ))
            ncd.createVariable('many_U', '<U1', ('n', ))
            ncd.createVariable('many_S', 'S1', (
                'n',
                'n',
            ))

            for k, v in ncd.variables.items():
                if k.startswith('many_'):
                    if len(v.dimensions) > 1:
                        v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize),
                                          dimsize)
                    else:
                        v[:] = np.tile(thestr, dimsize)

        with nc4.Dataset(self.fp) as ncd:
            assert normalize_array(ncd.variables['single_str']) == thestr
            assert normalize_array(ncd.variables['single_unicode_']) == thestr
            assert normalize_array(ncd.variables['single_U']) == thestr
            assert normalize_array(ncd.variables['single_S']) == thestr

            assert np.all(
                normalize_array(ncd.variables['many_str']) == [thestr] *
                len(thestr))
            assert np.all(
                normalize_array(ncd.variables['many_unicode_']) == [thestr] *
                len(thestr))
            assert np.all(
                normalize_array(ncd.variables['many_U']) == [thestr] *
                len(thestr))
            assert np.all(
                normalize_array(ncd.variables['many_S']) == [thestr] *
                len(thestr))
Exemple #3
0
 def close(self):
     # write end of dataset time in the form '2012-06-20T00:59:31Z'
     self.dataset.variables['time_coverage_end'][:] = \
         stringtoarr(self.end_time.strftime(self.date_fmt), STRING_LENGTH_SHORT)
     self.dataset.variables['sweep_number'][0] = 0
     self.dataset.variables['sweep_mode'][0] = stringtoarr(
         "pointing", STRING_LENGTH_SHORT)
     self.dataset.variables['fixed_angle'][0] = 0.0
     self.dataset.variables['sweep_start_ray_index'][0] = 0
     self.dataset.variables['sweep_end_ray_index'][
         0] = self.dataset.variables['time'].shape[0] - 1
     self.dataset.sync()
Exemple #4
0
def updateXTIME():

  # Parse command line
  ap = argparse.ArgumentParser()
  ap.add_argument('filename', type=str,
                  help='netcdf file to modify')
  ap.add_argument('date', type=str,
                  help='Date in YYYYMMDDHH format')
  args = ap.parse_args()

  assert os.path.exists(args.filename), 'filename must exist!'
  ncfile = nc.Dataset(args.filename, 'a')

  # copy global attributes all at once via dictionary
  atts = deepcopy(ncfile.__dict__)

  d = dt.datetime.strptime(args.date, '%Y%m%d%H')
  confdate = d.strftime('%Y-%m-%d_%H:%M:%S')
  atts['config_start_time'] = confdate
  atts['config_stop_time'] = confdate
  ncfile.setncatts(atts)

  varname = 'xtime'
  xtime_ = ncfile[varname][0][:]
  xtime = nc.stringtoarr(confdate, len(ncfile[varname][0][:]))
  ncfile[varname][0] = xtime

  ncfile.close()
Exemple #5
0
def save_string_list2d(group, membername, array2d, dimensionname):

    # lengths of all elements
    element_length = [ ]
    for element in array2d:
        if element:
            element_length.extend([ len(subelement) for subelement in element if subelement ])

    # if nothing then don't bother saving
    if len(element_length) == 0:
        return

    # Compute max length over all strings in array        
    max_length = max(element_length) + 1

    # Name used to store length of strings
    lengthname = STRING_LENGTH_DIMENSION_FORMAT.format(membername)

    # Build new variable
    group.createDimension(membername, max([ len(element) if element else 0 for element in array2d]))
    group.createDimension(lengthname, max_length)
    group.createVariable(membername, 'S1', [dimensionname, membername, lengthname])

    # populate contents
    for index, element in enumerate(array2d):
        if element is not None:
            listcontents = numpy.zeros((group.variables[membername].shape[1], max_length), 'S1')
            for subindex, subelement in enumerate(element):
                 if subelement is not None:
                     listcontents[subindex] = stringtoarr(subelement, max_length)
            group.variables[membername][index] = listcontents
Exemple #6
0
def create_obs_idx(ncOut, var, name):

    nc_out = ncOut.createVariable(name,
                                  'S1', (
                                      'n' + name,
                                      'strlen80',
                                  ),
                                  zlib=True)
    nc_idx_out = ncOut.createVariable(name + '_INDEX',
                                      'i4', ('OBS', ),
                                      zlib=True,
                                      fill_value=-1)

    print('processing ', name, len(var))
    i = 0
    for f in var:
        #print(f)
        try:
            nc_out[i] = stringtoarr(f[0], 80, dtype='U')
            # s = np.array(f[0], 'S80')
            #nc_out[i] = stringtochar(f[0], encoding='utf-8')
            # nc_out[i] = f[0].encode('utf-8')
            # nc_out[i] = f[0]
        except UnicodeEncodeError:
            pass
        nc_idx_out[f[1].index] = i
        i += 1
Exemple #7
0
def create_nc_strings(ncfile, vname, strings, dims, desc):
    str_length = max_len(strings)

    chars = np.zeros((len(strings), str_length), dtype='S1')
    for i, string in enumerate(strings):
        chars[i] = netCDF4.stringtoarr(string, str_length, 'S')

    create_nc_dim(ncfile, dims[1], str_length)
    create_nc_var(ncfile, vname, np.array(chars), 'S1', dims, desc)
Exemple #8
0
 def add_generator(self, gen_id):
     if gen_id in self.gen_id_dict: return
     i = len(self.dim_gens)
     self.gen_id_dict[gen_id] = i
     self.var_gen_ids[i,:] = netCDF4.stringtoarr(gen_id, len(self.dim_str))
     for var in [self.var_dispatch_5min, self.var_dispatch_30min,
                 self.var_dispatch_daily, self.var_dispatch_daily_min, self.var_dispatch_daily_max]:
         npoints = var.shape[0]
         var[:,i] = numpy.zeros((npoints, 1))
Exemple #9
0
 def add_generator(self, gen_id):
     if gen_id in self.gen_id_dict: return
     i = len(self.dim_gens)
     self.gen_id_dict[gen_id] = i
     self.var_gen_ids[i, :] = netCDF4.stringtoarr(gen_id, len(self.dim_str))
     for var in [
             self.var_dispatch_5min, self.var_dispatch_30min,
             self.var_dispatch_daily, self.var_dispatch_daily_min,
             self.var_dispatch_daily_max
     ]:
         npoints = var.shape[0]
         var[:, i] = numpy.zeros((npoints, 1))
Exemple #10
0
def do_tslist():
    global nstations

    # Parse tslist

    station = ncfile.variables['station']
    name = ncfile.variables['name']
    prefix = ncfile.variables['prefix']
    lat = ncfile.variables['lat']
    lon = ncfile.variables['lon']

    strln = len(ncfile.dimensions['strln'])

    filetslist = open('tslist', 'r')

    # Header
    # #-----------------------------------------------#
    # # 24 characters for name | pfx |  LAT  |   LON  |
    # #-----------------------------------------------#

    filetslist.next()
    filetslist.next()
    filetslist.next()

    # Body
    # veenkampen                veenk 51.98101  5.61957
    stationi = -1
    for line in filetslist:
        stationi += 1
        fields = line.split()
        station[stationi] = stationi
        name[stationi] = cdf.stringtoarr(fields[0], strln)
        prefix[stationi] = cdf.stringtoarr(fields[1], strln)
        lat[stationi] = fields[2]
        lon[stationi] = fields[3]
    nstations = stationi + 1

    filetslist.close()
Exemple #11
0
def do_tslist():
    global nstations

    # Parse tslist

    station = ncfile.variables['station']
    name    = ncfile.variables['name']
    prefix  = ncfile.variables['prefix']
    lat     = ncfile.variables['lat']
    lon     = ncfile.variables['lon']

    strln = len( ncfile.dimensions['strln'] )

    filetslist = open( 'tslist', 'r' )

    # Header
    # #-----------------------------------------------#
    # # 24 characters for name | pfx |  LAT  |   LON  |
    # #-----------------------------------------------#

    filetslist.next()
    filetslist.next()
    filetslist.next()

    # Body
    # veenkampen                veenk 51.98101  5.61957
    stationi = -1
    for line in filetslist:
        stationi += 1
        fields = line.split()
        station[stationi] = stationi
        name[stationi]    = cdf.stringtoarr( fields[ 0], strln )
        prefix[stationi]  = cdf.stringtoarr( fields[ 1], strln )
        lat[stationi]     = fields[ 2]
        lon[stationi]     = fields[ 3]
    nstations = stationi + 1

    filetslist.close()
Exemple #12
0
 def addstn(self, obs):
     # add a station given a observation dictionary
     mystn = obs['stn']
     if mystn not in self.stntoid.keys():
         # create station
         station_id = len(self.stntoid)
         self.rootcdf.variables['station_name'][station_id]=stringtoarr(mystn,20)
         self.rootcdf.variables['lat'][station_id]=obs['lat']
         self.rootcdf.variables['lon'][station_id]=obs['long']
         self.rootcdf.variables['alt'][station_id]=obs['elev']
         self.rootcdf.variables['station_info'][station_id]=obs['dsource']
         self.stntoid[mystn]=station_id
     else:
         # station is defined find it
         station_id = self.stntoid[mystn] # and back to int
     self.rootcdf.sync()
     return station_id
Exemple #13
0
def ConvertCharDims(var, datadict):

    if not var.dtype == 'S1':
        pass
    else:
        datalen = len(datadict['values'])
        dimlen = list(var.shape)

        dimlen.remove(datalen)  # string length remaining

        slen = dimlen[0]

        #print [d for d in datadict['values'] ]
        values = [netCDF4.stringtoarr(d, slen) for d in datadict['values']]
        datadict['values'] = values

    return datadict
Exemple #14
0
 def add_Variables(self):
     '''
     根据配置文件内容添加数据集
     '''
     var_dict = self.conf['%s+%s' % (self.sat1, self.sen1)]
     # add CAL_LUT for each band
     dsetNameLst = var_dict.keys()
     #         for eachchan in var_dict["_chanlist"]:
     #             dsetNameLst.append("CAL_LUT_CH%s" % eachchan)
     for eachVar in dsetNameLst:
         if eachVar.startswith('_'): continue
         if eachVar == 'TBB_Corrct_LUT': continue
         if eachVar == 'Nonlinear_coefficient': continue
         #             if eachVar.startswith('CAL_LUT'):
         #                 var_info = var_dict["CAL_LUT"]
         #                 var_info['_dims'] = ['date', 'lut_row']
         #             else:
         #                 var_info = var_dict[eachVar]
         var_info = var_dict[eachVar]
         var = self.rootgrp.createVariable(eachVar, var_info['_fmt'],
                                           var_info['_dims'])
         for eachKey in var_info:
             if eachKey.startswith('_'): continue
             if eachKey == eachVar:
                 if var_info['_fmt'] == 'S1':
                     # 字符串
                     # 需要将字符串用stringtoarr转成char的数组,再写入NC !!!
                     char_len = 1
                     for each in var_info['_dims']:
                         char_len = char_len * int(
                             var_dict['_%s' % each])  # 计算字符总个数
                     char_ary = stringtoarr(''.join(var_info[eachKey]),
                                            char_len)
                     var[:] = char_ary
                 else:
                     # 非字符串
                     var[:] = var_info[eachKey]
             else:
                 if is_number(var_info[eachKey]):
                     if '.' in var_info[eachKey]:
                         var.setncattr(eachKey,
                                       np.float32(var_info[eachKey]))
                     else:
                         var.setncattr(eachKey, np.short(var_info[eachKey]))
                 else:
                     var.setncattr(eachKey, var_info[eachKey])
Exemple #15
0
 def addstn(self, obs):
     # add a station given a observation dictionary
     mystn = obs['stn']
     if mystn not in self.stntoid.keys():
         # create station
         station_id = len(self.stntoid)
         self.rootcdf.variables['station_name'][station_id] = stringtoarr(
             mystn, 20)
         self.rootcdf.variables['lat'][station_id] = obs['lat']
         self.rootcdf.variables['lon'][station_id] = obs['long']
         self.rootcdf.variables['alt'][station_id] = obs['elev']
         self.rootcdf.variables['station_info'][station_id] = obs['dsource']
         self.stntoid[mystn] = station_id
     else:
         # station is defined find it
         station_id = self.stntoid[mystn]  # and back to int
     self.rootcdf.sync()
     return station_id
Exemple #16
0
    def set_trajectory_id(self):
        """ Sets or updates the trajectory dimension and variable for the dataset 
        and the global id attribute

        Input:
            - glider: Name of the glider deployed.
            - deployment_date: String or DateTime of when glider was
                first deployed.
        """

        if 'trajectory' not in self._nc.variables:
            # Setup Trajectory Dimension
            self._nc.createDimension('traj_strlen', len(self._trajectory))

            # Setup Trajectory Variable
            trajectory_var = self._nc.createVariable(
                u'trajectory',
                'S1', ('traj_strlen', ),
                zlib=True,
                complevel=self._comp_level)

            attrs = {
                'cf_role':
                'trajectory_id',
                'long_name':
                'Trajectory/Deployment Name',  # NOQA
                'comment':
                'A trajectory is a single deployment of a glider and may span multiple data files.'  # NOQA
            }
            for key, value in sorted(attrs.items()):
                trajectory_var.setncattr(key, value)
        else:
            trajectory_var = self._nc.variables['trajectory']

        # Set the trajectory variable data
        trajectory_var[:] = stringtoarr(self._trajectory,
                                        len(self._trajectory))

        if not self._nc.getncattr('id').strip():
            self._nc.id = self._trajectory  # Global id variable
Exemple #17
0
    def set_trajectory_id(self, glider, deployment_date):
        """ Sets the trajectory dimension and variable for the dataset

        Input:
            - glider: Name of the glider deployed.
            - deployment_date: String or DateTime of when glider was
                first deployed.
        """

        if (type(deployment_date) is datetime):
            deployment_date = deployment_date.strftime("%Y-%m-%dT%H:%M:%SZ")

        traj_str = "%s-%s" % (glider, deployment_date)

        if 'trajectory' not in self.nc.variables:
            # Setup Trajectory Dimension
            self.nc.createDimension('traj_strlen', len(traj_str))

            # Setup Trajectory Variable
            trajectory_var = self.nc.createVariable('trajectory',
                                                    'S1', ('traj_strlen', ),
                                                    zlib=True,
                                                    complevel=self.COMP_LEVEL)

            attrs = {
                'cf_role':
                'trajectory_id',
                'long_name':
                'Trajectory/Deployment Name',  # NOQA
                'comment':
                'A trajectory is a single deployment of a glider and may span multiple data files.'  # NOQA
            }
            for key, value in sorted(attrs.items()):
                trajectory_var.setncattr(key, value)
        else:
            trajectory_var = self.nc.variables['trajectory']

        trajectory_var[:] = stringtoarr(traj_str, len(traj_str))
        self.nc.id = traj_str  # Global id variable
    def set_trajectory_id(self, glider, deployment_date):
        """ Sets the trajectory dimension and variable for the dataset

        Input:
            - glider: Name of the glider deployed.
            - deployment_date: String or DateTime of when glider was
                first deployed.
        """

        if(type(deployment_date) is datetime):
            deployment_date = deployment_date.strftime("%Y-%m-%dT%H:%M:%SZ")

        traj_str = "%s-%s" % (glider, deployment_date)

        if 'trajectory' not in self.nc.variables:
            # Setup Trajectory Dimension
            self.nc.createDimension('traj_strlen', len(traj_str))

            # Setup Trajectory Variable
            trajectory_var = self.nc.createVariable(
                'trajectory',
                'S1',
                ('traj_strlen',),
                zlib=True,
                complevel=self.COMP_LEVEL
            )

            attrs = {
                'cf_role': 'trajectory_id',
                'long_name': 'Trajectory/Deployment Name',  # NOQA
                'comment': 'A trajectory is a single deployment of a glider and may span multiple data files.'  # NOQA
            }
            for key, value in sorted(attrs.items()):
                trajectory_var.setncattr(key, value)
        else:
            trajectory_var = self.nc.variables['trajectory']

        trajectory_var[:] = stringtoarr(traj_str, len(traj_str))
Exemple #19
0
    def set_source_file_var(self, source_file_string, attrs=None):
        """ Sets the trajectory dimension and variable for the dataset and the
        global id attribute

        Input:
            - glider: Name of the glider deployed.
            - deployment_date: String or DateTime of when glider was
                first deployed.
        """

        if 'source_file' not in self._nc.variables:
            # Setup Trajectory Dimension
            self._nc.createDimension('source_file_strlen',
                                     len(source_file_string))

            # Setup Trajectory Variable
            source_file_var = self._nc.createVariable(
                u'source_file',
                'S1', ('source_file_strlen', ),
                zlib=True,
                complevel=self._comp_level)

            if attrs:
                attrs['long_name'] = 'Source data file'
                attrs[
                    'comment'] = 'Name of the source data file and associated file metadata'
                for key, value in sorted(attrs.items()):
                    source_file_var.setncattr(key, value)
        else:
            source_file_var = self._nc.variables['source_file']

        # Set the trajectory variable data
        source_file_var[:] = stringtoarr(source_file_string,
                                         len(source_file_string))

        if not self._nc.getncattr('source').strip():
            self._nc.source = 'Observational Slocum glider data from source dba file {:s}'.format(
                source_file_string)  # Global source variable
    def set_trajectory_id(self, trajectory_string):
        """ Sets the trajectory dimension and variable for the dataset and the
        global id attribute

        Input:
            - glider: Name of the glider deployed.
            - deployment_date: String or DateTime of when glider was
                first deployed.
        """
            
        if 'trajectory' not in self._nc.variables:
            # Setup Trajectory Dimension
            self._nc.createDimension('traj_strlen', len(trajectory_string))

            # Setup Trajectory Variable
            trajectory_var = self._nc.createVariable(
                u'trajectory',
                'S1',
                ('traj_strlen',),
                zlib=True,
                complevel=self._comp_level
            )

            attrs = {
                'cf_role': 'trajectory_id',
                'long_name': 'Trajectory/Deployment Name',  # NOQA
                'comment': 'A trajectory is a single deployment of a glider and may span multiple data files.'  # NOQA
            }
            for key, value in sorted(attrs.items()):
                trajectory_var.setncattr(key, value)
        else:
            trajectory_var = self._nc.variables['trajectory']

        # Set the trajectory variable data
        trajectory_var[:] = stringtoarr(trajectory_string, len(trajectory_string))
        
        if not self._nc.getncattr('id').strip():
            self._nc.id = trajectory_string  # Global id variable
 def add_Variables(self):
     """
     根据配置文件内容添加数据集
     """
     var_lst = self.conf['%s+%s' % (self.sat1, self.sen1)]
     for eachVar in var_lst:
         if eachVar.startswith('_'): continue
         var_info = var_lst[eachVar]
         #             print eachVar
         var = self.rootgrp.createVariable(eachVar, var_info['_fmt'],
                                           var_info['_dims'])
         for eachKey in var_info:
             if eachKey.startswith('_'): continue
             if eachKey == eachVar:
                 if var_info['_fmt'] == 'S1':
                     # 字符串
                     # 需要将字符串用stringtoarr转成char的数组,再写入NC !!!
                     char_len = 1
                     for each in var_info['_dims']:
                         char_len = char_len * int(
                             var_lst['_%s' % each])  # 计算字符总个数
                     char_ary = stringtoarr(''.join(var_info[eachKey]),
                                            char_len)
                     var[:] = char_ary
                 else:
                     # 非字符串
                     var[:] = var_info[eachKey]
             else:
                 if is_number(var_info[eachKey]):
                     if '.' in var_info[eachKey]:
                         var.setncattr(eachKey,
                                       np.float32(var_info[eachKey]))
                     else:
                         var.setncattr(eachKey, np.short(var_info[eachKey]))
                 else:
                     var.setncattr(eachKey, var_info[eachKey])
    def set_source_file_var(self, source_file_string, attrs=None):
        """ Sets the trajectory dimension and variable for the dataset and the
        global id attribute

        Input:
            - glider: Name of the glider deployed.
            - deployment_date: String or DateTime of when glider was
                first deployed.
        """
            
        if 'source_file' not in self._nc.variables:
            # Setup Trajectory Dimension
            self._nc.createDimension('source_file_strlen', len(source_file_string))

            # Setup Trajectory Variable
            source_file_var = self._nc.createVariable(
                u'source_file',
                'S1',
                ('source_file_strlen',),
                zlib=True,
                complevel=self._comp_level
            )

            if attrs:
                attrs['long_name'] = 'Source data file'
                attrs['comment'] = 'Name of the source data file and associated file metadata'
                for key, value in sorted(attrs.items()):
                    source_file_var.setncattr(key, value)
        else:
            source_file_var = self._nc.variables['source_file']

        # Set the trajectory variable data
        source_file_var[:] = stringtoarr(source_file_string, len(source_file_string))
        
        if not self._nc.getncattr('source').strip():
            self._nc.source = 'Observational Slocum glider data from source dba file {:s}'.format(source_file_string)  # Global source variable
Exemple #23
0
def stringToArrList(list):
	newList = []
	for i in range(len(list)):
		numchars = charCounter(list[i])
		newList.append(stringtoarr(list[i], numchars))
	return newList
	nc_file.WML_featureType = 'timeSeries'
	nc_file.WML_cdm_data_type = 'Station'
	nc_file.WML_standard_name_vocabulary = 'CF-1.6'
	nc_file.title = nc_title
	nc_file.summary = nc_summary
	nc_file.id = 'testing_id'
	nc_file.naming_authory = 'testing_authority'
	nc_file.WML_date_created = nc_date_create
	nc_file.WML_creator_name = nc_creator_name
	nc_file.creator_email = nc_creator_email
	nc_file.project = nc_project
	nc_file.processing_level = nc_proc_level
	nc_file.WML_profile = 'single variable'
	
	# data
	dates = [datetime(2001,3,1)+n*timedelta(hours=12) for n in range(12)]
	nc_time[:] = date2num(dates,units=nc_time.units,calendar=nc_time.calendar)
	#nc_station_names[:] = [stringtoarr("aaaa",4),stringtoarr("bbbb",4)]
	dummy = [stringtoarr("aaaa",4),stringtoarr("bbbb",4)]
	nc_station_names[:] = dummy
	nc_lat_var[:] = [35.0, 70.0]
	nc_lon_var[:] = [-120.0, 120.0]
	#for i in range(len(nc_station_names)):
		#data[i,:] = np.random.uniform(len(nc_time))
	
except:
	print "Try again."
nc_file.close()


def WriteNCCF(FileName, Dates, Latitudes, Longitudes, ClimPoints, DataObject,
              DimObject, AttrObject, GlobAttrObject):
    ''' Sort out the date/times to write out and time bounds '''
    ''' Sort out clim bounds '''
    ''' Sort out lat and long bounds '''
    ''' Convert variables using the obtained scale_factor and add_offset: stored_var=int((var-offset)/scale) '''
    ''' Write to file, set up given dimensions, looping through all potential variables and their attributes, and then the provided dictionary of global attributes '''

    # Sort out date/times to write out
    print(Dates)
    TimPoints, TimBounds = MakeDaysSince(Dates['StYr'], Dates['StMon'],
                                         Dates['EdYr'], Dates['EdMon'])
    nTims = len(TimPoints)

    # Sort out clim bounds - paired strings
    ClimBounds = np.empty((12, 2), dtype='|S10')
    for mm in range(12):
        ClimBounds[mm,
                   0] = str(ClimPoints[0]) + '-' + str(mm + 1) + '-' + str(1)
        ClimBounds[mm, 1] = str(ClimPoints[1]) + '-' + str(mm + 1) + '-' + str(
            MonthDays[mm])

    # Sort out LatBounds and LonBounds
    LatBounds = np.empty((len(Latitudes), 2), dtype='float')
    LonBounds = np.empty((len(Longitudes), 2), dtype='float')

    LatBounds[:, 0] = Latitudes - ((Latitudes[1] - Latitudes[0]) / 2.)
    LatBounds[:, 1] = Latitudes + ((Latitudes[1] - Latitudes[0]) / 2.)

    LonBounds[:, 0] = Longitudes - ((Longitudes[1] - Longitudes[0]) / 2.)
    LonBounds[:, 1] = Longitudes + ((Longitudes[1] - Longitudes[0]) / 2.)

    #pdb.set_trace()

    #    # No need to convert float data using given scale_factor and add_offset to integers - done within writing program (packV = (V-offset)/scale
    #    # Not sure what this does to float precision though...
    #    # Change mdi into an integer -999 because these are stored as integers
    #    for vv in range(len(DataObject)):
    #        DataObject[vv][np.where(DataObject[vv] == OLDMDI)] = MDI

    # Create a new netCDF file - have tried zlib=True,least_significant_digit=3 (and 1) - no difference
    ncfw = Dataset(
        FileName, 'w', format='NETCDF4_CLASSIC'
    )  # need to try NETCDF4 and also play with compression but test this first

    # Write out the global attributes
    if ('description' in GlobAttrObject):
        ncfw.description = GlobAttrObject['description']
#print(GlobAttrObject['description'])

    if ('File_created' in GlobAttrObject):
        ncfw.File_created = GlobAttrObject['File_created']

    if ('Title' in GlobAttrObject):
        ncfw.Title = GlobAttrObject['Title']

    if ('Institution' in GlobAttrObject):
        ncfw.Institution = GlobAttrObject['Institution']

    if ('History' in GlobAttrObject):
        ncfw.History = GlobAttrObject['History']

    if ('Licence' in GlobAttrObject):
        ncfw.Licence = GlobAttrObject['Licence']

    if ('Project' in GlobAttrObject):
        ncfw.Project = GlobAttrObject['Project']

    if ('Processing_level' in GlobAttrObject):
        ncfw.Processing_level = GlobAttrObject['Processing_level']

    if ('Acknowledgement' in GlobAttrObject):
        ncfw.Acknowledgement = GlobAttrObject['Acknowledgement']

    if ('Source' in GlobAttrObject):
        ncfw.Source = GlobAttrObject['Source']

    if ('Comment' in GlobAttrObject):
        ncfw.Comment = GlobAttrObject['Comment']

    if ('References' in GlobAttrObject):
        ncfw.References = GlobAttrObject['References']

    if ('Creator_name' in GlobAttrObject):
        ncfw.Creator_name = GlobAttrObject['Creator_name']

    if ('Creator_email' in GlobAttrObject):
        ncfw.Creator_email = GlobAttrObject['Creator_email']

    if ('Version' in GlobAttrObject):
        ncfw.Version = GlobAttrObject['Version']

    if ('doi' in GlobAttrObject):
        ncfw.doi = GlobAttrObject['doi']

    if ('Conventions' in GlobAttrObject):
        ncfw.Conventions = GlobAttrObject['Conventions']

    if ('netcdf_type' in GlobAttrObject):
        ncfw.netcdf_type = GlobAttrObject['netcdf_type']

    # Loop through and set up the dimension names and quantities
    for vv in range(len(DimObject[0])):
        ncfw.createDimension(DimObject[0][vv], DimObject[1][vv])

    # Go through each dimension and set up the variable and attributes for that dimension if needed
    for vv in range(
            len(DimObject) - 2
    ):  # ignore first two elements of the list but count all other dictionaries
        print(DimObject[vv + 2]['var_name'])

        # NOt 100% sure this works in a loop with overwriting
        # initiate variable with name, type and dimensions
        MyVar = ncfw.createVariable(DimObject[vv + 2]['var_name'],
                                    DimObject[vv + 2]['var_type'],
                                    DimObject[vv + 2]['var_dims'])

        # Apply any other attributes
        if ('standard_name' in DimObject[vv + 2]):
            MyVar.standard_name = DimObject[vv + 2]['standard_name']

        if ('long_name' in DimObject[vv + 2]):
            MyVar.long_name = DimObject[vv + 2]['long_name']

        if ('units' in DimObject[vv + 2]):
            MyVar.units = DimObject[vv + 2]['units']

        if ('axis' in DimObject[vv + 2]):
            MyVar.axis = DimObject[vv + 2]['axis']

        if ('calendar' in DimObject[vv + 2]):
            MyVar.calendar = DimObject[vv + 2]['calendar']

        if ('start_year' in DimObject[vv + 2]):
            MyVar.start_year = DimObject[vv + 2]['start_year']

        if ('end_year' in DimObject[vv + 2]):
            MyVar.end_year = DimObject[vv + 2]['end_year']

        if ('start_month' in DimObject[vv + 2]):
            MyVar.start_month = DimObject[vv + 2]['start_month']

        if ('end_month' in DimObject[vv + 2]):
            MyVar.end_month = DimObject[vv + 2]['end_month']

        if ('bounds' in DimObject[vv + 2]):
            MyVar.bounds = DimObject[vv + 2]['bounds']

        if ('climatology' in DimObject[vv + 2]):
            MyVar.climatology = DimObject[vv + 2]['climatology']

        if ('point_spacing' in DimObject[vv + 2]):
            MyVar.point_spacing = DimObject[vv + 2]['point_spacing']

# Provide the data to the variable
        if (DimObject[vv + 2]['var_name'] == 'time'):
            MyVar[:] = TimPoints

        if (DimObject[vv + 2]['var_name'] == 'bounds_time'):
            MyVar[:, :] = TimBounds

        if (DimObject[vv + 2]['var_name'] == 'month'):
            for mm in range(12):
                MyVar[mm, :] = stringtoarr(MonthName[mm], 10)

        if (DimObject[vv + 2]['var_name'] == 'climbounds'):
            for mm in range(12):
                MyVar[mm, 0, :] = stringtoarr(ClimBounds[mm, 0], 10)
                MyVar[mm, 1, :] = stringtoarr(ClimBounds[mm, 1], 10)

        if (DimObject[vv + 2]['var_name'] == 'latitude'):
            MyVar[:] = Latitudes

        if (DimObject[vv + 2]['var_name'] == 'bounds_lat'):
            MyVar[:, :] = LatBounds

        if (DimObject[vv + 2]['var_name'] == 'longitude'):
            MyVar[:] = Longitudes

        if (DimObject[vv + 2]['var_name'] == 'bounds_lon'):
            MyVar[:, :] = LonBounds

    # Go through each variable and set up the variable attributes
    for vv in range(
            len(AttrObject)
    ):  # ignore first two elements of the list but count all other dictionaries

        print(AttrObject[vv]['var_name'])

        # NOt 100% sure this works in a loop with overwriting
        # initiate variable with name, type and dimensions
        MyVar = ncfw.createVariable(AttrObject[vv]['var_name'],
                                    AttrObject[vv]['var_type'],
                                    AttrObject[vv]['var_dims'],
                                    zlib=True,
                                    fill_value=AttrObject[vv]['_FillValue'])

        # Apply any other attributes
        if ('standard_name' in AttrObject[vv]):
            MyVar.standard_name = AttrObject[vv]['standard_name']

        if ('long_name' in AttrObject[vv]):
            MyVar.long_name = AttrObject[vv]['long_name']

# Too many issues with CF compliance
#        if ('cell_methods' in AttrObject[vv]):
#            MyVar.cell_methods = AttrObject[vv]['cell_methods']

        if ('comment' in AttrObject[vv]):
            MyVar.comment = AttrObject[vv]['comment']

        if ('units' in AttrObject[vv]):
            MyVar.units = AttrObject[vv]['units']

        if ('axis' in AttrObject[vv]):
            MyVar.axis = AttrObject[vv]['axis']

#        if ('add_offset' in AttrObject[vv]):
#            MyVar.add_offset = AttrObject[vv]['add_offset']
#
#        if ('scale_factor' in AttrObject[vv]):
#            MyVar.scale_factor = AttrObject[vv]['scale_factor']

#        if ('valid_min' in AttrObject[vv]):
#            MyVar.valid_min = AttrObject[vv]['valid_min']#
#
#        if ('valid_max' in AttrObject[vv]):
#            MyVar.valid_max = AttrObject[vv]['valid_max']

#        if ('missing_value' in AttrObject[vv]):
#            MyVar.missing_value = AttrObject[vv]['missing_value']

#        if ('_FillValue' in AttrObject[vv]):
#	    MyVar._FillValue = AttrObject[vv]['_FillValue']

        if ('reference_period' in AttrObject[vv]):
            MyVar.reference_period = AttrObject[vv]['reference_period']

        if ('ancillary_variables' in AttrObject[vv]):
            MyVar.ancillary_variables = AttrObject[vv]['ancillary_variables']

# Provide the data to the variable - depending on howmany dimensions there are
        if (len(AttrObject[vv]['var_dims']) == 1):
            MyVar[:] = DataObject[vv]

        if (len(AttrObject[vv]['var_dims']) == 2):
            MyVar[:, :] = DataObject[vv]

        if (len(AttrObject[vv]['var_dims']) == 3):
            MyVar[:, :, :] = DataObject[vv]

    ncfw.close()

    return  # WriteNCCF
Exemple #26
0
        if args.verb > 1:
            print 'copying unlimited dimension "%s" data' % name
        for i in xrange(dim.size):
            dst.variables[name][i] = template.variables[name][i]

# create land use type dimension
dst.createDimension('landusetype4', size=len(lu_names))

# create variable for land use type names and write the names into it
lu_name_len = max(len(x) for x in lu_names)
dst.createDimension('landusenameidx', size=lu_name_len)
namevar = dst.createVariable('landusename', 'c',
                             ('landusetype4', 'landusenameidx'))
namevar.long_name = 'names of land use types'
for i, name in enumerate(lu_names):
    namevar[i, :] = nc.stringtoarr(name, lu_name_len)

# find auxiliary (coordinate-related variables) in the netcdf, such as boundaries and
# time averaging information variables: they are not going to be combined by land use,
# but are going to be copied to the output file
auxVars = set()
auxAttrs = {'bounds', 'edges',
            'time_avg_info'}  # attributes that may list auxiliary variables
for var in template.variables.itervalues():
    for attr in auxAttrs:
        if attr not in var.ncattrs(): continue
        for v in var.getncattr(attr).split(','):
            if v not in template.dimensions: auxVars.add(v)

# find variables: if the list of variables is not provided on the command line, process
# all variables in the netcdf files, except dimension variables and averaging information
Exemple #27
0
def padded_string_to_arr(s, n=CHAR_ARRAY_LEN):
    """ Left-justify and pad a string with spaces up to total width
    n, and convert to a character array for writing to a NETCDF3 file """
    return nc.stringtoarr(s.ljust(n), n)
def str_array_to_char_array_mapper(array,str_size):
    new_array = map(lambda x: stringtoarr(x,str_size), array)
    return new_array
    pass
        dew_point_temperatures.units = "degrees Celcius"

        rain_rate = rootgrp.createVariable("rainfall_rate", "f4", ("time",))
        rain_rate.coordinates = "lat lon"
        rain_rate.standard_name = "rainfall_rate"
        rain_rate.long_name = "Rainfall rate"
        rain_rate.units = "mm hr-1"

        total_rain = rootgrp.createVariable("cumulative_rainfall", "f4", ("time",))
        total_rain.coordinates = "lat lon"
        total_rain.standard_name = "cumulative_rainfall"
        total_rain.long_name = "Cumulative rainfall"
        total_rain.units = "mm"

        # set the values of the variables
        station_name[:] = netCDF4.stringtoarr("Penlee", 50)
        altitude[:] = [station_altitude]
        latitudes[:] = [station_lat]
        longitudes[:] = [station_lon]
        times[:] = avg_timestamp
        air_temperatures[:] = avg_temp
        air_pressures[:] = avg_pressure
        relative_humiditys[:] = avg_rh
        dew_point_temperatures[:] = avg_dewpoint
        rain_rate[:] = avg_rainfall_rate
        total_rain[:] = cumulative_rainfall

        rootgrp.close()


entries = (os.path.join(sourcefolder, fn) for fn in os.listdir(sourcefolder))
Exemple #30
0
    def _write_header(self):
        """Write header"""
        logger.debug('generating header')

        # set the 'z' dimension and the number of profiles (always 1)
        self.root_group.createDimension('z', np.sum(self.ssp.cur.data_valid))
        self.root_group.createDimension('profile', 1)

        # var: profile
        # RECOMMENDED - If using the attribute below: cf_role. Data type can be whatever is appropriate for the
        # unique feature type.
        profile_str = "%s %.7f %.7f" % (
            self.ssp.cur.meta.utc_time.strftime('%Y-%m-%dT%H:%M:%SZ'),
            self.ssp.cur.meta.longitude, self.ssp.cur.meta.latitude)
        default_profile_str_length = 64
        profile_str_length = max(default_profile_str_length, len(profile_str))
        self.root_group.createDimension('profile_id_length',
                                        profile_str_length)
        profile = self.root_group.createVariable('profile', 'S1', (
            'profile',
            'profile_id_length',
        ))
        profile[:] = netCDF4.stringtoarr(profile_str, profile_str_length)
        profile.long_name = 'Unique identifier for each feature instance'  # RECOMMENDED
        profile.cf_role = 'profile_id'  # RECOMMENDED

        # var: time
        # Depending on the precision used for the variable, the data type could be int or double instead of float.
        time = self.root_group.createVariable('time',
                                              'i4', ('profile', ),
                                              fill_value=0.0)
        time[:] = int(calendar.timegm(self.ssp.cur.meta.utc_time.timetuple()))
        time.long_name = 'cast time'  # RECOMMENDED - Provide a descriptive, long name for this variable.
        time.standard_name = 'time'  # REQUIRED - Do not change
        time.units = 'seconds since 1970-01-01 00:00:00'  # REQUIRED - Use approved CF convention with approved UDUNITS.
        # time.calendar = 'julian'  # REQUIRED    - IF the calendar is not default calendar, which is "gregorian".
        time.axis = 'T'  # REQUIRED    - Do not change.
        # time._FillValue = 0.0  # REQUIRED  if there could be missing values in the data. >> set at var creation
        # time.ancillary_variables = ''  # RECOMMENDED - List other variables providing information about this variable.
        # time.comment = ''  # RECOMMENDED - Add useful, additional information here.

        # var: lat
        # depending on the precision used for the variable, the data type could be int, float or double.
        lat = self.root_group.createVariable('lat',
                                             'f8', ('profile', ),
                                             fill_value=180.0)
        lat[:] = self.ssp.cur.meta.latitude
        lat.long_name = 'latitude'  # RECOMMENDED - Provide a descriptive, long name for this variable.
        lat.standard_name = 'latitude'  # REQUIRED - Do not change.
        lat.units = 'degrees_north'  # REQUIRED - CF recommends degrees_north, but at least must use UDUNITS.
        lat.axis = 'Y'  # REQUIRED - Do not change.
        lat.valid_min = -90.0  # RECOMMENDED - Replace with correct value.
        lat.valid_max = 180.0  # RECOMMENDED - Replace with correct value.
        # lat._FillValue = 180.0  # REQUIRED if there could be missing values in the data.
        # lat.ancillary_variables = ''  # RECOMMENDED - List other variables providing information about this variable.
        # lat.comment = ''  # RECOMMENDED - Add useful, additional information here.

        # var: lon
        # Depending on the precision used for the variable, the data type could be int, float or double.
        lon = self.root_group.createVariable('lon',
                                             'f8', ('profile', ),
                                             fill_value=360.0)
        lon[:] = self.ssp.cur.meta.longitude
        lon.long_name = 'longitude'  # RECOMMENDED
        lon.standard_name = 'longitude'  # REQUIRED - This is fixed, do not change.
        lon.units = 'degrees_east'  # REQUIRED - CF recommends degrees_east, but at least use UDUNITS.
        lon.axis = 'X'  # REQUIRED - Do not change.
        lon.valid_min = -180.0  # RECOMMENDED - Replace this with correct value.
        lon.valid_max = 360.0  # RECOMMENDED - Replace this with correct value.
        # lon:_FillValue = 360.0 # REQUIRED if there could be missing values in the data.
        # lon.ancillary_variables = ''  # RECOMMENDED - List other variables providing information about this variable.
        # lon.comment = ''  # RECOMMENDED - Add useful, additional information here.

        # var: crs
        # RECOMMENDED - A container variable storing information about the grid_mapping.
        # All the attributes within a grid_mapping variable are described in:
        # - http://cfconventions.org/Data/cf-conventions/cf-conventions-1.6/build/cf-conventions.html#grid-mappings-
        # and-projections.
        # For all the measurements based on WSG84, the default coordinate system used for GPS measurements,
        # the values shown here should be used.
        crs = self.root_group.createVariable('crs', 'f8', ('profile', ))
        crs[:] = 4326.0
        crs.grid_mapping_name = 'latitude_longitude'  # RECOMMENDED
        crs.epsg_code = 'EPSG:4326'  # RECOMMENDED - European Petroleum Survey Group code for the grid mapping name.
        crs.semi_major_axis = 6378137.0  # RECOMMENDED
        crs.inverse_flattening = 298.257223563  # RECOMMENDED

        # global attributes:
        self.root_group.ncei_template_version = 'NCEI_NetCDF_Profile_Orthogonal_Template_v2.0'  # REQUIRED(NCEI)
        self.root_group.featureType = 'profile'  # REQUIRED - CF attribute for identifying the featureType.(CF)
        # SUGGESTED - The data type, as derived from Unidata's Common Data Model Scientific Data types and understood
        # by THREDDS. (ACDD)
        self.root_group.cdm_data_type = 'profile'
        # HIGHLY RECOMMENDED - Provide a useful title for the data in the file.(ACDD)
        self.root_group.title = '%s_%s profile' % (self.ssp.cur.meta.sensor,
                                                   self.ssp.cur.meta.probe)
        # HIGHLY RECOMMENDED - Provide a useful summary or abstract for the data in the file.(ACDD)
        # self.root_group.summary = ''
        # HIGHLY RECOMMENDED - A comma separated list of keywords coming from the keywords_vocabulary.(ACDD)
        # self.root_group.keywords = ''
        # HIGHLY RECOMMENDED - A comma separated list of the conventions being followed. Always try to use latest
        # version.(CF / ACDD)
        self.root_group.Conventions = 'CF-1.6, ACDD-1.3'
        # RECOMMENDED - Creation date of this version of the data(netCDF).  Use ISO 8601:2004 for date and time. (ACDD)
        self.root_group.date_created = '%s' % dt.datetime.utcnow().strftime(
            '%Y-%m-%dT%H:%M:%SZ')
        self.root_group.survey = '%s' % self.ssp.cur.meta.survey
        # RECOMMENDED - The name of the project(s) principally responsible for originating this data.
        # Multiple projects can be separated by commas.(ACDD)
        self.root_group.project = '%s' % self._project
        # SUGGESTED - Name of the platform(s) that supported the sensor data used to create this data set or product.
        # Platforms can be of any type, including satellite, ship, station, aircraft or other.(ACDD)
        # Match platform format with velocipy
        platform = str(self.ssp.cur.meta.vessel).upper()
        platform = platform.replace('NRT-', 'NOAA NAVIGATION RESPONSE TEAM-')
        if len(platform) > 2 and platform[:2] in ['RA', 'TJ', 'FH', 'FA']:
            platform = platform.replace('(SHIP)', 'NOAA SHIP')
        self.root_group.platform = '%s' % platform
        # RECOMMENDED -The name of the institution principally responsible for originating this data..  An institution
        # attribute can be used for each variable if variables come from more than one institution. (CF/ACDD)
        self.root_group.institution = '%s' % self.ssp.cur.meta.institution
        # RECOMMENDED - an instrument variable storing information about a parameter of the instrument used in the
        # measurement, the dimensions don't have to be specified if the same instrument is used for all the measurements.
        instrument = self.root_group.createVariable('instrument', 'i4')
        if self._instrument is None:

            instrument.long_name = '%s' % self.ssp.cur.meta.sensor
            probe = str(self.ssp.cur.meta.probe)
            sn = str(self.ssp.cur.meta.sn)
            match = re.match('^(\w+?) ?\(SN:(\w+?)\)', sn)
            if match:
                probe = match.group(1)
                sn = match.group(2)
            instrument.make_model = '%s' % probe
            if self.ssp.cur.meta.sn:
                instrument.serial_number = '%s' % sn

        else:  # this part is used when a custom instrument is passed (for instance, for ISS format)

            tokens = self._instrument.split()
            if len(tokens) > 0:
                instrument.long_name = self._instrument.split()[0]
            if len(tokens) > 1:
                instrument.make_model = self._instrument.split()[1]
            if self.ssp.cur.meta.sn:
                instrument.serial_number = '%s' % self.ssp.cur.meta.sn

        # SUGGESTED - Published or web - based references that describe the data or methods used to produce it.
        # Recommend URIs(such as a URL or DOI)
        self.root_group.references = 'https://www.hydroffice.org/soundspeed/'
        # RECOMMENDED - Provide useful additional information here.(CF)
        # self.root_group.comment = b'Created using HydrOffice %s v.%s' % (ssp_name, ssp_version)
        # SUGGESTED - Version identifier of the data file or product as assigned by the data creator. (ACDD)
        self.root_group.product_version = 'Created using HydrOffice %s v.%s' % (
            ssp_name, ssp_version)
Exemple #31
0
def padded_string_to_arr(s, n=CHAR_ARRAY_LEN):
    """ Left-justify and pad a string with spaces up to total width
    n, and convert to a character array for writing to a NETCDF3 file """
    return nc.stringtoarr(s.ljust(n), n)
Exemple #32
0
def write_radar_file(ref, vel, filename=None):

    _time_units = 'seconds since 1970-01-01 00:00:00'
    _calendar = 'standard'

    if filename == None:
        print(
            "\n write_DART_ascii:  No output file name is given, writing to %s"
            % "obs_seq.txt")
        filename = "obs_seq.nc"
    else:
        dirname = os.path.dirname(filename)
        basename = "%s_%s.nc" % ("obs_seq", os.path.basename(filename))
        filename = os.path.join(dirname, basename)

    _stringlen = 8
    _datelen = 19

    # Extract grid and ref data

    dbz = ref.data
    lats = ref.lats
    lons = ref.lons
    hgts = ref.zg + ref.radar_hgt
    kind = ObType_LookUp(ref.field.upper())
    R_xy = np.sqrt(ref.xg[20]**2 + ref.yg[20]**2)
    elevations = beam_elv(R_xy, ref.zg[:, 20, 20])

    # if there is a zero dbz obs type, reform the data array
    try:
        nx1, ny1 = ref.zero_dbz.shape
        zero_data = np.ma.zeros((2, ny1, nx1), dtype=np.float32)
        zero_hgts = np.ma.zeros((2, ny1, nx1), dtype=np.float32)
        zero_data[0] = ref.zero_dbz
        zero_data[1] = ref.zero_dbz
        zero_hgts[0:2] = ref.zero_dbz_zg[0:2]
        cref = ref.cref
        zero_flag = True
        print(
            "\n write_DART_ascii:  0-DBZ separate type added to netcdf output\n"
        )
    except AttributeError:
        zero_flag = False
        print("\n write_DART_ascii:  No 0-DBZ separate type found\n")

    # Extract velocity data

    vr = vel.data
    platform_lat = vel.radar_lat
    platform_lon = vel.radar_lon
    platform_hgt = vel.radar_hgt

    # Use the volume mean time for the time of the volume

    dtime = ncdf.num2date(ref.time['data'].mean(), ref.time['units'])
    days = ncdf.date2num(dtime, units="days since 1601-01-01 00:00:00")
    seconds = np.int(86400. * (days - np.floor(days)))

    # create the fileput filename and create new netCDF4 file

    #filename = os.path.join(path, "%s_%s%s" % ("Inflation", DT.strftime("%Y-%m-%d_%H:%M:%S"), ".nc" ))

    print("\n -->  Writing %s as the radar file..." % (filename))

    rootgroup = ncdf.Dataset(filename, 'w', format='NETCDF4')

    # Create dimensions

    shape = dbz.shape

    rootgroup.createDimension('nz', shape[0])
    rootgroup.createDimension('ny', shape[1])
    rootgroup.createDimension('nx', shape[2])
    rootgroup.createDimension('stringlen', _stringlen)
    rootgroup.createDimension('datelen', _datelen)
    if zero_flag:
        rootgroup.createDimension('nz2', 2)

    # Write some attributes

    rootgroup.time_units = _time_units
    rootgroup.calendar = _calendar
    rootgroup.stringlen = "%d" % (_stringlen)
    rootgroup.datelen = "%d" % (_datelen)
    rootgroup.platform_lat = platform_lat
    rootgroup.platform_lon = platform_lon
    rootgroup.platform_hgt = platform_hgt

    # Create variables

    R_type = rootgroup.createVariable('REF',
                                      'f4', ('nz', 'ny', 'nx'),
                                      zlib=True,
                                      shuffle=True)
    V_type = rootgroup.createVariable('VEL',
                                      'f4', ('nz', 'ny', 'nx'),
                                      zlib=True,
                                      shuffle=True)

    if zero_flag:
        R0_type = rootgroup.createVariable('0REF',
                                           'f4', ('nz2', 'ny', 'nx'),
                                           zlib=True,
                                           shuffle=True)
        Z0_type = rootgroup.createVariable('0HGTS',
                                           'f4', ('nz2', 'ny', 'nx'),
                                           zlib=True,
                                           shuffle=True)
        CREF_type = rootgroup.createVariable('CREF',
                                             'f4', ('ny', 'nx'),
                                             zlib=True,
                                             shuffle=True)

    V_dates = rootgroup.createVariable('date',
                                       'S1', ('datelen'),
                                       zlib=True,
                                       shuffle=True)
    V_xc = rootgroup.createVariable('XC',
                                    'f4', ('nx'),
                                    zlib=True,
                                    shuffle=True)
    V_yc = rootgroup.createVariable('YC',
                                    'f4', ('ny'),
                                    zlib=True,
                                    shuffle=True)
    V_el = rootgroup.createVariable('EL',
                                    'f4', ('nz'),
                                    zlib=True,
                                    shuffle=True)

    V_lat = rootgroup.createVariable('LATS',
                                     'f4', ('ny'),
                                     zlib=True,
                                     shuffle=True)
    V_lon = rootgroup.createVariable('LONS',
                                     'f4', ('nx'),
                                     zlib=True,
                                     shuffle=True)
    V_hgt = rootgroup.createVariable('HGTS',
                                     'f4', ('nz', 'ny', 'nx'),
                                     zlib=True,
                                     shuffle=True)

    # Write variables

    rootgroup.variables['date'][:] = ncdf.stringtoarr(
        dtime.strftime("%Y-%m-%d_%H:%M:%S"), _datelen)

    rootgroup.variables['REF'][:, :, :] = dbz[:]
    rootgroup.variables['VEL'][:, :, :] = vr[:]

    rootgroup.variables['XC'][:] = ref.xg[:]
    rootgroup.variables['YC'][:] = ref.yg[:]
    rootgroup.variables['EL'][:] = elevations[:]
    rootgroup.variables['HGTS'][:] = ref.zg[:]
    rootgroup.variables['LATS'][:] = lats[:]
    rootgroup.variables['LONS'][:] = lons[:]

    if zero_flag:
        rootgroup.variables['0REF'][:] = zero_data
        rootgroup.variables['0HGTS'][:] = zero_hgts
        rootgroup.variables['CREF'][:] = cref

    rootgroup.sync()
    rootgroup.close()

    return filename
      longitudes.long_name = 'Observatory longitude'
      longitudes.units = 'degrees_east'

      times = rootgrp.createVariable('time', 'i4', ('time',))
      times.standard_name = 'time'
      times.long_name = 'Time of measurement'
      times.units = 'seconds since 1970-01-01 00:00:00'

      air_temperatures = rootgrp.createVariable('air_temperature', 'f4', ('time',))
      air_temperatures.coordinates = 'lat lon'
      air_temperatures.standard_name = 'air_temperature'
      air_temperatures.long_name = 'Air temperature in degrees Celcius'
      air_temperatures.units = 'degrees Celcius'

      # set the values of the variables
      station_name[:] = netCDF4.stringtoarr('Penlee', 50) 
      altitude[:] = [station_altitude]
      latitudes[:] = [station_lat]
      longitudes[:] = [station_lon]
      times[:] = timestamp
      air_temperatures[:] = temp

      rootgrp.close()

entries = (os.path.join(sourcefolder, fn) for fn in os.listdir(sourcefolder))
entries = ((os.stat(path), path) for path in entries)

# leave only regular files, insert creation date
entries = ((stat[ST_CTIME], path)
           for stat, path in entries if S_ISREG(stat[ST_MODE]))
def test_tutorial():
# 2 unlimited dimensions.
#temp = rootgrp.createVariable('temp','f4',('time','level','lat','lon',))
# this makes the compression 'lossy' (preserving a precision of 1/1000)
# try it and see how much smaller the file gets.
    temp = rootgrp.createVariable('temp','f4',('time','level','lat','lon',),least_significant_digit=3)
# attributes.
    import time
    rootgrp.description = 'bogus example script'
    rootgrp.history = 'Created ' + time.ctime(time.time())
    rootgrp.source = 'netCDF4 python module tutorial'
    latitudes.units = 'degrees north'
    longitudes.units = 'degrees east'
    levels.units = 'hPa'
    temp.units = 'K'
    times.units = 'hours since 0001-01-01 00:00:00.0'
    times.calendar = 'gregorian'
    for name in rootgrp.ncattrs():
        print('Global attr', name, '=', getattr(rootgrp,name))
    print(rootgrp)
    print(rootgrp.__dict__)
    print(rootgrp.variables)
    print(rootgrp.variables['temp'])
    import numpy
# no unlimited dimension, just assign to slice.
    lats = numpy.arange(-90,91,2.5)
    lons = numpy.arange(-180,180,2.5)
    latitudes[:] = lats
    longitudes[:] = lons
    print('latitudes =\n',latitudes[:])
    print('longitudes =\n',longitudes[:])
# append along two unlimited dimensions by assigning to slice.
    nlats = len(rootgrp.dimensions['lat'])
    nlons = len(rootgrp.dimensions['lon'])
    print('temp shape before adding data = ',temp.shape)
    from numpy.random.mtrand import uniform # random number generator.
    temp[0:5,0:10,:,:] = uniform(size=(5,10,nlats,nlons))
    print('temp shape after adding data = ',temp.shape)
# levels have grown, but no values yet assigned.
    print('levels shape after adding pressure data = ',levels.shape)
# assign values to levels dimension variable.
    levels[:] = [1000.,850.,700.,500.,300.,250.,200.,150.,100.,50.]
# fancy slicing
    tempdat = temp[::2, [1,3,6], lats>0, lons>0]
    print('shape of fancy temp slice = ',tempdat.shape)
    print(temp[0, 0, [0,1,2,3], [0,1,2,3]].shape)
# fill in times.
    from datetime import datetime, timedelta
    from netCDF4 import num2date, date2num, date2index
    dates = [datetime(2001,3,1)+n*timedelta(hours=12) for n in range(temp.shape[0])]
    times[:] = date2num(dates,units=times.units,calendar=times.calendar)
    print('time values (in units %s): ' % times.units+'\\n',times[:])
    dates = num2date(times[:],units=times.units,calendar=times.calendar)
    print('dates corresponding to time values:\\n',dates)
    rootgrp.close()
# create a series of netCDF files with a variable sharing
# the same unlimited dimension.
    for nfile in range(10):
        f = Dataset('mftest'+repr(nfile)+'.nc','w',format='NETCDF4_CLASSIC')
        f.createDimension('x',None)
        x = f.createVariable('x','i',('x',))
        x[0:10] = numpy.arange(nfile*10,10*(nfile+1))
    f.close()
# now read all those files in at once, in one Dataset.
    from netCDF4 import MFDataset
    f = MFDataset('mftest*nc')
    print(f.variables['x'][:])
# example showing how to save numpy complex arrays using compound types.
    f = Dataset('complex.nc','w')
    size = 3 # length of 1-d complex array
# create sample complex data.
    datac = numpy.exp(1j*(1.+numpy.linspace(0, numpy.pi, size)))
    print(datac.dtype)
# create complex128 compound data type.
    complex128 = numpy.dtype([('real',numpy.float64),('imag',numpy.float64)])
    complex128_t = f.createCompoundType(complex128,'complex128')
# create a variable with this data type, write some data to it.
    f.createDimension('x_dim',None)
    v = f.createVariable('cmplx_var',complex128_t,'x_dim')
    data = numpy.empty(size,complex128) # numpy structured array
    data['real'] = datac.real; data['imag'] = datac.imag
    v[:] = data
# close and reopen the file, check the contents.
    f.close()
    f = Dataset('complex.nc')
    print(f)
    print(f.variables['cmplx_var'])
    print(f.cmptypes)
    print(f.cmptypes['complex128'])
    v = f.variables['cmplx_var']
    print(v.shape)
    datain = v[:] # read in all the data into a numpy structured array
# create an empty numpy complex array
    datac2 = numpy.empty(datain.shape,numpy.complex128)
# .. fill it with contents of structured array.
    datac2.real = datain['real']
    datac2.imag = datain['imag']
    print(datac.dtype,datac)
    print(datac2.dtype,datac2)
# more complex compound type example.
    from netCDF4 import chartostring, stringtoarr
    f = Dataset('compound_example.nc','w') # create a new dataset.
# create an unlimited dimension call 'station'
    f.createDimension('station',None)
# define a compound data type (can contain arrays, or nested compound types).
    NUMCHARS = 80 # number of characters to use in fixed-length strings.
    winddtype = numpy.dtype([('speed','f4'),('direction','i4')])
    statdtype = numpy.dtype([('latitude', 'f4'), ('longitude', 'f4'),
                             ('surface_wind',winddtype),
                             ('temp_sounding','f4',10),('press_sounding','i4',10),
                             ('location_name','S1',NUMCHARS)])
# use this data type definitions to create a compound data types
# called using the createCompoundType Dataset method.
# create a compound type for vector wind which will be nested inside
# the station data type. This must be done first!
    wind_data_t = f.createCompoundType(winddtype,'wind_data')
# now that wind_data_t is defined, create the station data type.
    station_data_t = f.createCompoundType(statdtype,'station_data')
# create nested compound data types to hold the units variable attribute.
    winddtype_units = numpy.dtype([('speed','S1',NUMCHARS),('direction','S1',NUMCHARS)])
    statdtype_units = numpy.dtype([('latitude', 'S1',NUMCHARS), ('longitude', 'S1',NUMCHARS),
                                   ('surface_wind',winddtype_units),
                                   ('temp_sounding','S1',NUMCHARS),
                                   ('location_name','S1',NUMCHARS),
                                   ('press_sounding','S1',NUMCHARS)])
# create the wind_data_units type first, since it will nested inside
# the station_data_units data type.
    wind_data_units_t = f.createCompoundType(winddtype_units,'wind_data_units')
    station_data_units_t =\
        f.createCompoundType(statdtype_units,'station_data_units')
# create a variable of of type 'station_data_t'
    statdat = f.createVariable('station_obs', station_data_t, ('station',))
# create a numpy structured array, assign data to it.
    data = numpy.empty(1,station_data_t)
    data['latitude'] = 40.
    data['longitude'] = -105.
    data['surface_wind']['speed'] = 12.5
    data['surface_wind']['direction'] = 270
    data['temp_sounding'] = (280.3,272.,270.,269.,266.,258.,254.1,250.,245.5,240.)
    data['press_sounding'] = range(800,300,-50)
    # variable-length string datatypes are not supported inside compound types, so
# to store strings in a compound data type, each string must be
# stored as fixed-size (in this case 80) array of characters.
    data['location_name'] = stringtoarr('Boulder, Colorado, USA',NUMCHARS)
# assign structured array to variable slice.
    statdat[0] = data
# or just assign a tuple of values to variable slice
# (will automatically be converted to a structured array).
    statdat[1] = (40.78,-73.99,(-12.5,90),
                  (290.2,282.5,279.,277.9,276.,266.,264.1,260.,255.5,243.),
                  range(900,400,-50),stringtoarr('New York, New York, USA',NUMCHARS))
    print(f.cmptypes)
    windunits = numpy.empty(1,winddtype_units)
    stationobs_units = numpy.empty(1,statdtype_units)
    windunits['speed'] = stringtoarr('m/s',NUMCHARS)
    windunits['direction'] = stringtoarr('degrees',NUMCHARS)
    stationobs_units['latitude'] = stringtoarr('degrees north',NUMCHARS)
    stationobs_units['longitude'] = stringtoarr('degrees west',NUMCHARS)
    stationobs_units['surface_wind'] = windunits
    stationobs_units['location_name'] = stringtoarr('None', NUMCHARS)
    stationobs_units['temp_sounding'] = stringtoarr('Kelvin',NUMCHARS)
    stationobs_units['press_sounding'] = stringtoarr('hPa',NUMCHARS)
    statdat.units = stationobs_units
# close and reopen the file.
    f.close()
    f = Dataset('compound_example.nc')
    print(f)
    statdat = f.variables['station_obs']
    print(statdat)
# print out data in variable.
    print('data in a variable of compound type:')
    print('----')
    for data in statdat[:]:
        for name in statdat.dtype.names:
            if data[name].dtype.kind == 'S': # a string
                # convert array of characters back to a string for display.
                units = chartostring(statdat.units[name])
                print(name,': value =',chartostring(data[name]),\
                          ': units=',units)
            elif data[name].dtype.kind == 'V': # a nested compound type
                units_list = [chartostring(s) for s in tuple(statdat.units[name])]
                print(name,data[name].dtype.names,': value=',data[name],': units=',\
                          units_list)
            else: # a numeric type.
                units = chartostring(statdat.units[name])
                print(name,': value=',data[name],': units=',units)
                print('----')
    f.close()
    f = Dataset('tst_vlen.nc','w')
    vlen_t = f.createVLType(numpy.int32, 'phony_vlen')
    x = f.createDimension('x',3)
    y = f.createDimension('y',4)
    vlvar = f.createVariable('phony_vlen_var', vlen_t, ('y','x'))
    import random
    data = numpy.empty(len(y)*len(x),object)
    for n in range(len(y)*len(x)):
        data[n] = numpy.arange(random.randint(1,10),dtype='int32')+1
        data = numpy.reshape(data,(len(y),len(x)))
        vlvar[:] = data
        print(vlvar)
        print('vlen variable =\n',vlvar[:])
        print(f)
        print(f.variables['phony_vlen_var'])
        print(f.vltypes['phony_vlen'])
        z = f.createDimension('z', 10)
        strvar = f.createVariable('strvar',str,'z')
        chars = '1234567890aabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    data = numpy.empty(10,object)
    for n in range(10):
        stringlen = random.randint(2,12)
        data[n] = ''.join([random.choice(chars) for i in range(stringlen)])
        strvar[:] = data
        print('variable-length string variable:\n',strvar[:])
        print(f)
        print(f.variables['strvar'])
    f.close()
Exemple #35
0
def save(nfn,data,dataname,x=None,y=None,z=None,t=None,tunits=None,dtim=None,
			 xbnds=None,ybnds=None,zbnds=None,tbnds=None,dims=None,xname='Lon',
			 yname='Lat',zname='Depth',tname='Time',dhist=None,dunits=None,t_atts=None,append=False,app_in_t=False,silent=False):

	if dims is None:
		raise FerrError("Dimensions and order not specified!")
	ndims = data.ndim
	ct_dims = 0
	if x is not None:
		ct_dims = ct_dims + 1
		if not isinstance(x,np.ndarray):
			raise FerrError("x is not an ndarray; all coord var data must be in np.ndarrays")
	if y is not None:
		ct_dims = ct_dims + 1
		if not isinstance(y,np.ndarray):
			raise FerrError("y is not an ndarray; all coord var data must be in np.ndarrays")
	if z is not None:
		ct_dims = ct_dims + 1
		if not isinstance(z,np.ndarray):
			raise FerrError("z is not an ndarray; all coord var data must be in np.ndarrays")
	if (t is not None) or (dtim is not None):
		ct_dims = ct_dims + 1
		if t is not None:
			if not isinstance(t,np.ndarray):
				raise FerrError("t is not an ndarray; all coord var data must be in np.ndarrays")
		if dtim is not None:
			if not isinstance(dtim,np.ndarray):
				raise FerrError("dtim is not an ndarray; all coord var data must be in np.ndarrays")

	if bool(app_in_t) == False:
		##if ndims < ct_dims:
		##raise FerrError("Not enough dim info to go by.  Please call with some dimensional data")
		if ndims != len(dims):
			raise FerrError("Dimensions and order not fully specified!")
	if ct_dims < len(dims):
		raise FerrError("Not enough dim info to go by.  Please call with more coord vars data")

	T = tname.lower()
	Z = zname.lower()
	Y = yname.lower()
	X = xname.lower()

	global outf
	outf = None

	if append is False:
		if os.path.isfile(nfn) == True:
			ques_str = "output file \'%s\' exists.  Delete? (y/n): " % nfn
			ans = raw_input(ques_str)
			if 'y' in ans.lower():
				os.remove(nfn)

		h1 = "created by ferr.py on %s, using py netCDF4" % datetime.datetime.ctime(datetime.datetime.now())
		outf = nc4.Dataset(nfn, 'w', clobber=False, format='NETCDF3_CLASSIC')
		outf.history = h1

		if 't' in dims:
			outf.createDimension(T,None)
			time_var = outf.createVariable(tname.lower(),'d',(T,))
			time_var.long_name = 'Time'
			time_var.axis = 'T'
			if dtim is not None:
				time_var.units = "days since 0001-01-01 00:00:00"
				time_var.time_origin = "0001-01-01 00:00:00"
				time_var[:] = nc4.date2num(dtim,units="days since 0001-01-01 00:00:00")
			elif t is not None:
				if tunits is None:
					raise FerrError("No time units specified, and only t-values given!")
				time_var.units = tunits
				time_var[:] = t
			if tbnds is not None:
				time_var.bounds = tname.lower() + "_bnds"
			if t_atts is not None:
				for ii in t_atts.keys():
					setattr(time_var,ii,t_atts[ii])

		if 'x' in dims:
			outf.createDimension(X,x.size)
			lon_var = outf.createVariable(xname.lower(),'d',(X,))
			lon_var.long_name = 'Longitude'
			lon_var.axis = 'X'
			lon_var.units = 'degrees_east'
			if xbnds is not None:
				lon_var.point_spacing = 'uneven'
				lon_var.bounds = xname.lower() + "_bnds"
			else:
				lon_var.point_spacing = 'even'
			lon_var.modulo = np.array([360.])
			lon_var[:] = x[:]

		if 'y' in dims:
			outf.createDimension(Y,y.size)
			lat_var = outf.createVariable(yname.lower(),'d',(Y,))
			lat_var.long_name = 'Latitude'
			lat_var.axis = 'Y'
			lat_var.units = 'degrees_north'
			if ybnds is not None:
				lat_var.point_spacing = 'uneven'
				lat_var.bounds = yname.lower() + "_bnds"
			else:
				lat_var.point_spacing = 'even'
			lat_var[:] = y[:]

		if 'z' in dims:
			outf.createDimension(Z,z.size)
			depth_var = outf.createVariable(zname.lower(),'d',(Z,))
			depth_var.long_name = 'Depth'
			depth_var.axis = 'Z'
			depth_var.units = 'meters'
			depth_var.positive = 'down'
			if zbnds is not None:
				depth_var.point_spacing = 'uneven'
				depth_var.bounds = zname.lower() + "_bnds"
			else:
				depth_var.point_spacing = 'even'
			depth_var[:] = z[:]

		if (tbnds is not None) | (xbnds is not None) | (ybnds is not None) | (zbnds is not None):
			outf.createDimension('bnds',2)

		if tbnds is not None:
			tbndsname = tname.lower() + "_bnds"
			tbnds_var = outf.createVariable(tbndsname,'d',(T,'bnds'))
			if isinstance(tbnds[0,0],dt):
				tbnds_var[:] = nc4.date2num(tbnds,units="days since 0001-01-01 00:00:00")
			elif isinstance(tbnds[0][0],float):
				tbnds_var[:] = tbnds

		if xbnds is not None:
			xbndsname = xname.lower() + "_bnds"
			xbnds_var = outf.createVariable(xbndsname,'d',(X,'bnds'))
			xbnds_var[:] = xbnds

		if ybnds is not None:
			ybndsname = yname.lower() + "_bnds"
			ybnds_var = outf.createVariable(ybndsname,'d',(Y,'bnds'))
			ybnds_var[:] = ybnds

		if zbnds is not None:
			zbndsname = zname.lower() + "_bnds"
			zbnds_var = outf.createVariable(zbndsname,'d',(Z,'bnds'))
			zbnds_var[:] = zbnds

	elif append is True:
		tax_found = False
		zax_found = False
		yax_found = False
		xax_found = False
		Tsize = 0

		if os.path.isfile(nfn) is False:
			raise FerrError("file %s does not exist; can't append to it" % nfn)

		outf = use(nfn,silent=True,_append=True)
		indims = outf.d.keys()
		invars = outf.v.keys()
		cv1 = outf.cv
		cv1_keys = cv1.keys()
		fdims = outf.f.dimensions.keys()

		if (dataname in invars) & (app_in_t is False):
			raise FerrError("variable %s already exists; can't append it to file %s" % (dataname,nfn))

		if (tbnds is not None) | (xbnds is not None) | (ybnds is not None) | (zbnds is not None):
			if 'bnds' not in fdims:
				outf.f.createDimension('bnds',2)

		if ('t' in dims) and (app_in_t is False):
			if (t is not None):
				Tsize = t.size
				for i in cv1_keys:
					if cv1[i] is 'tax':
						tax1 = outf.d[i][:]
						cmp1 = t == tax1
						##another option: sp.special.array_equiv()
						if isinstance(cmp1,np.ndarray):
							cmp1 = cmp1.all()
						if cmp1 is True:
							tax_found = True
							for fd in fdims:
								if i.lower() == fd.lower():
									T = fd
							break
			if (dtim is not None):
				Tsize = dtim.size
				for i in cv1_keys:
					if cv1[i] is 'tax':
						tax1 = outf.dt_vals(i)
						cmp1 = dtim == tax1
						if isinstance(cmp1,np.ndarray):
							cmp1 = cmp1.all()
						if cmp1:
							tax_found = True
							for fd in fdims:
								if i.lower() == fd.lower():
									T = fd
							break

			if tax_found is False:
				while T in indims:
					if T[-1].isdigit():
						T = T[0:-1] + str(int(T[-1]) + 1)
					else:
						T = T + '1'

				outf.f.createDimension(T,Tsize)
				time_var = outf.f.createVariable(T,'d',(T,))
				time_var.long_name = 'Time'
				time_var.axis = 'T'
				if dtim is not None:
					time_var.units = "days since 0001-01-01 00:00:00"
					time_var.time_origin = "0001-01-01 00:00:00"
					time_var[:] = nc4.date2num(dtim,units="days since 0001-01-01 00:00:00")
				elif t is not None:
					if tunits is None:
						raise FerrError("No time units specified, and only t-values given!")
					time_var.units = tunits
					time_var[:] = t
				if tbnds is not None:
					time_var.bounds = T + "_bnds"
				if t_atts is not None:
					for ii in t_atts.keys():
						setattr(time_var,ii,t_atts[ii])

				if tbnds is not None:
					tbndsname = T + "_bnds"
					tbnds_var = outf.f.createVariable(tbndsname,'d',(T,'bnds'))
					if isinstance(tbnds[0,0],dt):
						tbnds_var[:] = nc4.date2num(tbnds,units="days since 0001-01-01 00:00:00")
					elif isinstance(tbnds[0][0],float):
						tbnds_var[:] = tbnds

		if('z' in dims):
			if (z is not None):
				for i in cv1_keys:
					if cv1[i] is 'zax':
						zax1 = outf.d[i][:]
						cmp1 = z == zax1
						if isinstance(cmp1,np.ndarray):
							cmp1 = cmp1.all()
						if cmp1:
							zax_found = True
							for fd in fdims:
								if i.lower() == fd.lower():
									Z = fd
							break
				if zax_found is False:
					while Z in indims:
						if Z[-1].isdigit():
							Z = Z[0:-1] + str(int(Z[-1]) + 1)
						else:
							Z = Z + '1'

					outf.f.createDimension(Z,z.size)
					depth_var = outf.f.createVariable(Z,'d',(Z,))
					depth_var.long_name = 'Depth'
					depth_var.axis = 'Z'
					depth_var.units = 'meters'
					depth_var.positive = 'down'
					if zbnds is not None:
						depth_var.point_spacing = 'uneven'
						depth_var.bounds = Z + "_bnds"
					else:
						depth_var.point_spacing = 'even'
					depth_var[:] = z[:]

					if zbnds is not None:
						zbndsname = Z + "_bnds"
						zbnds_var = outf.f.createVariable(zbndsname,'d',(Z,'bnds'))
						zbnds_var[:] = zbnds

		if 'y' in dims:
			if (y is not None):
				for i in cv1_keys:
					if cv1[i] is 'yax':
						yax1 = outf.d[i][:]
						cmp1 = y == yax1
						if isinstance(cmp1,np.ndarray):
							cmp1 = cmp1.all()
						if cmp1:
							yax_found = True
							for fd in fdims:
								if i.lower() == fd.lower():
									Y = fd
							break
				if yax_found is False:
					while Y in indims:
						if Y[-1].isdigit():
							Y = Y[0:-1] + str(int(Y[-1]) + 1)
						else:
							Y = Y + '1'

					outf.f.createDimension(Y,y.size)
					lat_var = outf.f.createVariable(Y,'d',(Y,))
					lat_var.long_name = 'Latitude'
					lat_var.axis = 'Y'
					lat_var.units = 'degrees_north'
					if ybnds is not None:
						lat_var.point_spacing = 'uneven'
						lat_var.bounds = Y + "_bnds"
					else:
						lat_var.point_spacing = 'even'
					lat_var[:] = y[:]

					if ybnds is not None:
						ybndsname = Y + "_bnds"
						ybnds_var = outf.f.createVariable(ybndsname,'d',(Y,'bnds'))
						ybnds_var[:] = ybnds

		if 'x' in dims:
			if (x is not None):
				for i in cv1_keys:
					if cv1[i] is 'xax':
						xax1 = outf.d[i][:]
						cmp1 = x == xax1
						if isinstance(cmp1,np.ndarray):
							cmp1 = cmp1.all()
						if cmp1:
							xax_found = True
							for fd in fdims:
								if i.lower() == fd.lower():
									X = fd
							break
				if xax_found is False:
					while X in indims:
						if X[-1].isdigit():
							X = X[0:-1] + str(int(X[-1]) + 1)
						else:
							X = X + '1'

					outf.f.createDimension(X,x.size)
					lon_var = outf.f.createVariable(X,'d',(X,))
					lon_var.long_name = 'Longitude'
					lon_var.axis = 'X'
					lon_var.units = 'degrees_east'
					if xbnds is not None:
						lon_var.point_spacing = 'uneven'
						lon_var.bounds = X + "_bnds"
					else:
						lon_var.point_spacing = 'even'
					lon_var.modulo = np.array([360.])
					lon_var[:] = x[:]

					if xbnds is not None:
						xbndsname = X + "_bnds"
						xbnds_var = outf.f.createVariable(xbndsname,'d',(X,'bnds'))
						xbnds_var[:] = xbnds
	else:
		raise FerrError("kw 'append' has to be True or False")


	vdims = []
	for i in xrange(len(dims)):
		if 'x' is dims.lower()[i]:
			vdims.append(X)

		if 'y' is dims.lower()[i]:
			vdims.append(Y)

		if 'z' is dims.lower()[i]:
			vdims.append(Z)

		if 't' is dims.lower()[i]:
			vdims.append(T)

	vdims = tuple(vdims)
	if append is False:
		dat_var = outf.createVariable(dataname,'f',vdims,fill_value=-1.e+34)
		dat_var.missing_value = -1.e+34
		dat_var.long_name = dataname
		if dhist is not None:
			dat_var.history = dhist
		data = np.float32(data)
		data.set_fill_value(-1.e+34)
		data.data[data.mask] = data.fill_value
		if app_in_t is True:
			tsize = time_var.size
			if (tsize > 1) & (ndims == len(dims)):
				dims_a = nc4.stringtoarr(dims,len(dims))
				data_tsize = data.shape[np.where(dims_a == 't')[0]]
				dat_var[tsize:tsize+data_tsize,...] = data
			else:
				dat_var[0,...] = data
		else:
			dat_var[:] = data

	else:
		if app_in_t is True:
			if dataname.lower() not in outf.v.keys():
				raise FerrError("append mode, but var '%s' not found in file '%s'!" % (dataname,nfn))
			dat_var = outf.v[dataname.lower()]
			tim_var = outf.d[T]
			tsize = tim_var.size
			data = np.float32(data)
			data.set_fill_value(-1.e+34)
			data.data[data.mask] = data.fill_value
			if t is not None:
				t_insize = t.size
				tim_var[tsize:tsize + t_insize] = t
			elif dtim is not None:
				t_insize = dtim.size
				tim_var[tsize:tsize + t_insize] = nc4.date2num(dtim,units="days since 0001-01-01 00:00:00")
			else:
				raise FerrError("no time data, though append in time option was True")
			if (tsize > 1) & (ndims == len(dims)):
				dims_a = nc4.stringtoarr(dims,len(dims))
				data_tsize = data.shape[np.where(dims_a == 't')[0]]
				dat_var[tsize:tsize+data_tsize,...] = data
			else:
				dat_var[tsize,...] = data
		else:
			dat_var = outf.f.createVariable(dataname,'f',vdims,fill_value=-1.e+34)
			dat_var.missing_value = -1.e+34
			dat_var.long_name = dataname
			if dhist is not None:
				dat_var.history = dhist
			data = np.float32(data)
			data.set_fill_value(-1.e+34)
			data.data[data.mask] = data.fill_value
			dat_var[:] = data

	if append is False:
		outf.close()
	else:
		outf.f.close()

	if bool(silent) is False:
		print "\ndata written to %s\n" % nfn

	return None
Exemple #36
0
def write_exodus_file(filename, cells, vertices, shape="SHELL4"):
    """
    Write Exodus-II file compatible with CUBIT.

    cells is a 0-based array (ncells, ncorners).

    vertices is (nvertices, dim).

    All cells are placed in a single block.

    Requires netCDF4 module.
    """
    import numpy
    from netCDF4 import Dataset

    len_string = 33

    root = Dataset(filename, 'w', format='NETCDF3_CLASSIC')

    # Set global attributes
    root.api_version = 4.98
    root.version = 4.98
    root.floating_point_word_size = 8
    root.file_size = 0
    root.title = "cubit"

    # Setup dimensions

    # Generic information
    root.createDimension('len_string', len_string)
    root.createDimension('len_line', 81)
    root.createDimension('four', 4)
    root.createDimension('num_qa_rec', 1)
    root.createDimension('time_step', None)

    # Mesh specific information
    (ncells, ncorners) = cells.shape
    (nvertices, dim) = vertices.shape
    root.createDimension('num_dim', dim)
    root.createDimension('num_el_blk', 1)
    root.createDimension('num_nod_per_el1', ncorners)
    root.createDimension('num_att_in_blk1', 1)

    root.createDimension('num_nodes', nvertices)
    root.createDimension('num_elem', ncells)
    root.createDimension('num_el_in_blk1', ncells)

    # Setup variables
    connect1 = root.createVariable('connect1', numpy.int32, (
        'num_el_in_blk1',
        'num_nod_per_el1',
    ))

    coord = root.createVariable('coord', numpy.float64, (
        'num_dim',
        'num_nodes',
    ))

    time_whole = root.createVariable('time_whole', numpy.float64,
                                     ('time_step', ))

    coor_names = root.createVariable('coor_names', 'S1', (
        'num_dim',
        'len_string',
    ))

    qa_records = root.createVariable('qa_records', 'S1', (
        'num_qa_rec',
        'four',
        'len_string',
    ))

    eb_names = root.createVariable('eb_names', 'S1', (
        'num_el_blk',
        'len_string',
    ))

    elem_map = root.createVariable('elem_map', numpy.int32, ('num_elem', ))

    eb_status = root.createVariable('eb_status', numpy.int32, ('num_el_blk', ))

    eb_prop1 = root.createVariable('eb_prop1', numpy.int32, ('num_el_blk', ))

    attrib1 = root.createVariable('attrib1', numpy.float64, (
        'num_el_in_blk1',
        'num_att_in_blk1',
    ))

    # Set variable values
    connect1[:] = 1 + cells[:]
    connect1.elem_type = shape

    coord[:] = vertices.transpose()[:]

    from netCDF4 import stringtoarr
    if dim == 2:
        coor_names[0, :] = stringtoarr("x", len_string)
        coor_names[1, :] = stringtoarr("y", len_string)
    elif dim == 3:
        coor_names[0, :] = stringtoarr("x", len_string)
        coor_names[1, :] = stringtoarr("y", len_string)
        coor_names[2, :] = stringtoarr("z", len_string)

    qa_records[0, 0, :] = stringtoarr("CUBIT", len_string)
    qa_records[0, 1, :] = stringtoarr("11.0", len_string)
    qa_records[0, 2, :] = stringtoarr("01/01/2000", len_string)
    qa_records[0, 3, :] = stringtoarr("12:00:00", len_string)

    elem_map[:] = numpy.arange(1, ncells + 1, dtype=numpy.int32)[:]

    eb_status[:] = numpy.ones((1, ), dtype=numpy.int32)[:]

    eb_prop1[:] = numpy.ones((1, ), dtype=numpy.int32)[:]
    eb_prop1.name = "ID"

    attrib1[:] = numpy.ones((1, ncells), dtype=numpy.int32)[:]

    root.close()
Exemple #37
0
    def append_data(self, tzg):
        """
            append the data found in 'tzg' to our netCDF file
        """

        out = self.dataset
        appendDimLens = {}
        for adim in self.appendDims:
            # start by loading record dimensions that should be appended
            appendDimLens[adim] = len(out.dimensions[adim])
            if adim in out.variables:
                # store the variables associated with this record dimension (processing as needed)
                dvar = get_substruct(tzg, self.bindings[adim])
                nc_ovar = out.variables[adim]
                if dvar == None:
                    raise KeyError("Can't find record variable %s for dim %s",
                                   self.bindings[adim], adim)
                if len(dvar) == 0:
                    continue
                if adim == 'time':  #special case for time. this is used with HSRL,
                    #there needs to be a way to identify this case in the template,
                    # the what why and how, so other time axes and sources work too
                    # dvar[0] is  datetime.datetime(2012, 6, 20, 0, 59, 31, 250001, tzinfo)
                    if appendDimLens[adim] == 0:
                        print 'adding first record'
                        if 'dpl_py_binding' in out.variables[
                                'time_coverage_start'].ncattrs():
                            del out.variables[
                                'time_coverage_start'].dpl_py_binding
                        # compute start time to the nearest second
                        self.start_time = dvar[0].replace(microsecond=0)
                        # write start of dataset in the form '2012-06-20T00:59:31Z'
                        out.variables['time_coverage_start'][:] = \
                            stringtoarr(self.start_time.strftime(self.date_fmt), STRING_LENGTH_SHORT)

                    # save end_time to nearest second
                    self.end_time = dvar[-1].replace(second=dvar[-1].second,
                                                     microsecond=0)

        for f in self.bindings:
            field = get_substruct(tzg, self.bindings[f])
            if field != None:
                ovar = out.variables[f]
                basesh = [0, 0, 0, 0, 0, 0]
                didx = 0

                if 'dpl_py_type' in ovar.ncattrs():
                    dpltype = ovar.dpl_py_type[:]
                    if dpltype == 'matplotlib_num2date' or dpltype == 'python_datetime':  #this is actually datetime, but older form is kept around to not break things JPG 20130211
                        print 'compute relative time for %s' % self.bindings[f]
                        if not hasattr(self, 'start_time'):
                            bt = out.variables['time_coverage_start']
                            btv = ''
                            for x in range(bt.shape[0]):
                                btv = btv + bt[
                                    x]  #chartostring(var[:].reshape([1]+list(var[:].shape)))[0]
                            while len(btv) > 0 and btv[-1] == 'N':
                                btv = btv[:-1]
                            if len(btv) > 0:
                                self.start_time = datetime.datetime.strptime(
                                    btv, self.date_fmt)
                            else:
                                self.start_time = field[0].replace(
                                    microsecond=0)
                                # write start of dataset in the form '2012-06-20T00:59:31Z'
                                out.variables['time_coverage_start'][:] = \
                                    stringtoarr(self.start_time.strftime(self.date_fmt), STRING_LENGTH_SHORT)

                        field = [(d - self.start_time).total_seconds()
                                 for d in field]
                        if appendDimLens["time"] == 0:
                            ovar.units = "seconds since " + self.start_time.strftime(
                                self.date_fmt)
                #fixme this is crap. should be a more interpreted way that isn't slow or dangerous
                for dimname in ovar.dimensions:
                    if dimname in self.appendDims:
                        basesh[didx] = appendDimLens[dimname]
                    didx += 1
                print 'Appending variable ', f
                if len(ovar.shape) == 0:
                    ovar[:] = field
                elif len(ovar.shape) == 1:
                    ovar[basesh[0]:] = field
                else:
                    topsh = [None for x in range(len(basesh))]
                    for x in range(len(field.shape)):
                        topsh[x] = basesh[x] + field.shape[x]
                    print 'appending var', f, field.shape, ovar.shape, basesh, topsh
                    ovar[tuple([
                        slice(basesh[x], topsh[x])
                        for x in range(len(ovar.shape))
                    ])] = field
        out.sync()
def write_test_wrf_file():
    """Writes out an idealized atmosphere for testing the interpolation.
    """

    # File to write out
    rootgrp = netCDF4.Dataset('wrfout_test_file', 'w')

    # Required dimensions
    dim_time = rootgrp.createDimension('Time', 0) # Unlimited
    dim_datestrlen = rootgrp.createDimension('DateStrLen', 19)
    dim_bottom_top = rootgrp.createDimension('bottom_top', 2)
    dim_bottom_top_stag = rootgrp.createDimension('bottom_top_stag', 3)
    dim_south_north = rootgrp.createDimension('south_north', 2)
    dim_south_north_stag = rootgrp.createDimension('south_north_stag', 3)
    dim_west_east = rootgrp.createDimension('west_east', 2)
    dim_west_east_stag = rootgrp.createDimension('west_east_stag', 3)

    # Times variable
    var_times = rootgrp.createVariable('Times', 'S1', ('Time', 'DateStrLen'))
    var_times = netCDF4.stringtoarr('2014-01-01_01:00:00', 19)

    # Requried 4D variables
    var_znu = rootgrp.createVariable('ZNU', 'f4', ('Time', 'bottom_top'))
    var_znu[0] = [0.99715, 0.99010] # 23 and 80 m AGL from calculate-eta-height.py
    var_znw = rootgrp.createVariable('ZNW', 'f4', ('Time', 'bottom_top_stag'))
    var_znw[0] = [1.0000, 0.99443, 0.98577]
    var_p = rootgrp.createVariable('P', 'f4', ('Time', 'bottom_top', 'south_north', 'west_east'))
    var_p[0] = [[101049, 101049], [101049, 101049],
                [100368, 100368], [100368, 100368]]
    var_pb = rootgrp.createVariable('PB', 'f4', ('Time', 'bottom_top', 'south_north', 'west_east'))
    var_pb[0] = [[0, 0], [0, 0],
                 [0, 0], [0, 0]]
    var_psfc = rootgrp.createVariable('PSFC', 'f4', ('Time', 'south_north', 'west_east'))
    var_psfc[0] = [[101325, 101325],
                   [101325, 101325]]
    var_t = rootgrp.createVariable('T', 'f4', ('Time', 'bottom_top', 'south_north', 'west_east'))
    var_t[0] = numpy.array([[[288.000, 288.000], [288.000, 288.000]],
                            [[287.630, 287.630], [287.630, 287.630]]]) - 300
    var_t2 = rootgrp.createVariable('T2', 'f4', ('Time', 'south_north', 'west_east'))
    var_t2[0] = [[288.000, 288.000], [288.000, 288.000]]
    var_th2 = rootgrp.createVariable('TH2', 'f4', ('Time', 'south_north', 'west_east'))
    var_th2[0] = [[288.000, 288.000], [288.000, 288.000]]
    var_u = rootgrp.createVariable('U', 'f4', ('Time', 'bottom_top', 'south_north','west_east_stag'))
    var_u[0] = [[[5, 5, 5], [5, 5, 5]],
                [[5.535, 5.535, 5.535], [5.535, 5.535, 5.535]]] # Log-law profile for 23 and 80 m (wind speed of u and v)
    var_v = rootgrp.createVariable('V', 'f4', ('Time', 'bottom_top', 'south_north_stag', 'west_east'))
    var_v[0] = [[[0, 0], [0, 0], [0, 0]],
                [[0, 0], [0, 0], [0, 0]]]
    var_p_top = rootgrp.createVariable('P_TOP', 'f4', ('Time', ))
    var_p_top[0] = 0

    # Map projection variables
    var_cosalpha = rootgrp.createVariable('COSALPHA', 'f4', ('Time', 'south_north', 'west_east'))
    var_cosalpha[0] = [[0.99, 0.99], [0.99, 0.99]]
    var_sinalpha = rootgrp.createVariable('SINALPHA', 'f4', ('Time', 'south_north', 'west_east'))
    var_sinalpha[0] = [[0.99, 0.99], [0.99, 0.99]]

    # Map coordinates
    var_xlong = rootgrp.createVariable('XLONG', 'f4', ('Time', 'south_north', 'west_east'))
    var_xlong[0] = [[0, 5000], [0, 5000]]
    var_xlat = rootgrp.createVariable('XLAT', 'f4', ('Time', 'south_north', 'west_east'))
    var_xlat[0] = [[0, 5000], [0, 5000]]

    # Close the file
    rootgrp.close()
Exemple #39
0
    def test_read_two_datasets(self):
        """Read a catalogue file for two data sets."""

        # Empty file with unique temporary name
        testfile = tempfile.NamedTemporaryFile(suffix='.nc')

        # Build NetCDF manually
        nc = Dataset(testfile.name, 'w')
        nc.setncattr('Conventions', 'CF-1.6')
        nc.setncattr('identifier', 'citizen-of-the-world')
        nc.setncattr('python_class', 'eumopps.catalogue.catalogue.Catalogue')
        nc.createDimension('datasets', 2)
        nc.createDimension('default_strlen', 23)
        group0 = nc.createGroup('datasets_00000000')
        group0.setncattr('name', 'Ffflip')
        group0.setncattr('path', '/find/it/here')
        group0.setncattr('python_class',
                         'eumopps.catalogue.dataset.CatalogueDataSet')
        group1 = nc.createGroup('datasets_00000001')
        group1.setncattr('name', 'Ffflop')
        group1.setncattr('path', '/or/here')
        group1.setncattr('python_class',
                         'eumopps.catalogue.dataset.CatalogueDataSet')
        group1.createDimension('subsets', 1)
        subset = group1.createGroup('subsets_00000000')
        subset.setncattr('python_class',
                         'eumopps.catalogue.dataset.CatalogueDataSubset')
        layout = subset.createGroup('layout')
        layout.setncattr('python_class',
                         'eumopps.catalogue.storage.DataStorageFiles')
        layout.createDimension('patterns', 2)
        layout.createVariable('patterns', 'S1', ['patterns', 'default_strlen'])
        layout.variables['patterns'][0] = stringtoarr('splendid', 23)
        layout.variables['patterns'][1] = stringtoarr('pretty', 23)
        matches = subset.createGroup('matches')
        matches.setncattr('python_class',
                          'eumopps.catalogue.dataset.CatalogueFileEntry')
        matches.createDimension('list_count', 0)
        matches.createDimension('tags', 2)
        matches_name = matches.createVariable('name', 'S1',
                                              ['list_count', 'default_strlen'])
        matches_time = matches.createVariable('time', 'i8', ['list_count'])
        matches_time.units = 'seconds since 1850-01-01 00:00:00 UTC'
        matches_size = matches.createVariable('size', 'i8', ['list_count'])
        matches_tags = matches.createVariable(
            'tags', 'S1', ['list_count', 'tags', 'default_strlen'])
        matches_name[0] = stringtoarr('bob', 23)
        # 2016-05-30 17:29:33 = 60780 days * 86400  + 17 hours * 3600 + 29 minutes * 60 + 33
        matches_time[0] = 5251454973
        matches_size[0] = 39877123421
        tags = numpy.zeros((2, 23), 'S1')
        tags[0] = stringtoarr('onetag', 23)
        tags[1] = stringtoarr('twotags', 23)
        matches_tags[0] = tags
        subset.createDimension('archive_unused', 2)
        subset.createVariable('archive_unused', 'S1',
                              ['archive_unused', 'default_strlen'])
        subset.variables['archive_unused'][0] = stringtoarr('nothing', 23)
        subset.variables['archive_unused'][1] = stringtoarr('notmuch', 23)
        group1.createDimension('non_matching', 3)
        group1.createVariable('non_matching', 'S1',
                              ['non_matching', 'default_strlen'])
        group1.variables['non_matching'][0] = stringtoarr('floop', 23)
        group1.variables['non_matching'][1] = stringtoarr('sloop', 23)
        group1.variables['non_matching'][2] = stringtoarr('kaput', 23)
        nc.close()

        # TEMP: show netcdf contents
        # print '\n' + subprocess.Popen(['ncdump', testfile.name], stdout=subprocess.PIPE).communicate()[0]

        # Read
        reader = CatalogueReaderNetCDF()
        result = reader.load(testfile.name)

        # Check results
        self.assertTrue(isinstance(result, Catalogue))
        self.assertEqual('citizen-of-the-world', result.identifier)
        self.assertTrue(isinstance(result.datasets, list))
        self.assertEqual(2, len(result.datasets))
        self.assertTrue(isinstance(result.datasets[0], CatalogueDataSet))
        self.assertEqual('Ffflip', result.datasets[0].name)
        self.assertEqual('/find/it/here', result.datasets[0].path)
        self.assertTrue(isinstance(result.datasets[0].name, basestring))
        self.assertTrue(isinstance(result.datasets[0].path, basestring))
        self.assertTrue(isinstance(result.datasets[1], CatalogueDataSet))
        self.assertEqual('Ffflop', result.datasets[1].name)
        self.assertEqual('/or/here', result.datasets[1].path)
        self.assertTrue(isinstance(result.datasets[1].name, basestring))
        self.assertTrue(isinstance(result.datasets[1].path, basestring))
        self.assertEqual(['floop', 'sloop', 'kaput'],
                         result.datasets[1].non_matching)
        self.assertTrue(
            isinstance(result.datasets[1].non_matching[0], basestring))
        self.assertTrue(
            isinstance(result.datasets[1].non_matching[1], basestring))
        self.assertTrue(
            isinstance(result.datasets[1].non_matching[2], basestring))
        self.assertEqual(1, len(result.datasets[1].subsets))
        self.assertTrue(
            isinstance(result.datasets[1].subsets[0].layout, DataStorageFiles))
        self.assertEqual(
            1, isinstance(result.datasets[1].subsets[0].matches, list))
        self.assertEqual(1, len(result.datasets[1].subsets[0].matches))
        self.assertEqual('bob', result.datasets[1].subsets[0].matches[0].name)
        self.assertTrue(
            isinstance(result.datasets[1].subsets[0].matches[0].name,
                       basestring))
        self.assertEqual(datetime(2016, 05, 30, 17, 29, 33),
                         result.datasets[1].subsets[0].matches[0].time)
        self.assertEqual(39877123421,
                         result.datasets[1].subsets[0].matches[0].size)
        self.assertEqual(2, len(result.datasets[1].subsets[0].matches[0].tags))
        self.assertEqual('onetag',
                         result.datasets[1].subsets[0].matches[0].tags[0])
        self.assertEqual('twotags',
                         result.datasets[1].subsets[0].matches[0].tags[1])
        self.assertEqual(['nothing', 'notmuch'],
                         result.datasets[1].subsets[0].archive_unused)
Exemple #40
0
    def save(self, fname):
        '''Save the file to the disk.

        Create netCDF file from the ncfile object.

        Parameters
        ----------
        fname : str
            File name.


        '''

        try:
            sh.rm(fname)
        except:
            pass

        ncfile4 = Dataset(fname,'w',clobber=False,format='NETCDF4_CLASSIC')

        # Create dimensions
        for dim in self.dims.itervalues():
            #print(dim)
            if dim["isunlimited"]:
                ncfile4.createDimension(dim['name'],None)
                if self.istop == -1: self.istop=dim['size']
            else:
                ncfile4.createDimension(dim['name'],dim['size'])

        # Loop over variables
        for vari in self.variab:
            #print vari
            perem  = self.variab[vari]

            var = ncfile4.createVariable(vari,
                                         perem['datatype'],
                                         perem['dimensions'], \
                                         fill_value=perem['FillValue'],\
                                         complevel=1)

            #attdict = perem['data'].__dict__
            #if '_FillValue' in attdict: del attdict['_FillValue']
            var.setncatts(perem['attributes'])

            # Zero size string variables are loaded as masked constants by netCDF4 (e.g. rotated_pole)
            # this workaround seems to solve the problem with not beeing able to
            # save this masked constant to netCDF4 variables
            # Error "Cannot set fill value of string with array of dtype "float64".
            if perem['datatype'].char in 'SU':
                if type(perem['data'][:]) == np.ma.core.MaskedConstant :
                    perem['data'] = stringtoarr('',0)

            if perem['hasunlimdim']: # has an unlim dim, loop over unlim dim index.
                # range to copy
                if self.nchunk:
                    start = self.istart; stop = self.istop; step = self.nchunk
                    if step < 1: step = 1
                    for n in range(start, stop, step):
                        nmax = n+step
                        if nmax > self.istop: nmax=self.istop
                        idata = perem['data'][n:nmax]
                        var[n-self.istart:nmax-self.istart] = idata
                else:
                    idata = perem['data'][:]
                    var[0:len(unlimdim)] = idata


            else: # no unlim dim or 1-d variable, just copy all data at once.
                if perem['data'].shape != ():
                    idata = perem['data'][:]
                    var[:] = idata
                else:
                    var[:] = perem['data']

            ncfile4.sync() # flush data to disk

        #gattrs = self.ifile.ncattrs()
        for gatt in self.gattrs:
            setattr(ncfile4, gatt, self.gattrs[gatt])


        ncfile4.close()
# test compound attributes.

FILE_NAME = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name
DIM_NAME = 'time'
VAR_NAME = 'wind'
VAR_NAME2 = 'forecast_wind'
GROUP_NAME = 'forecasts'
dtype = np.dtype([('speed', 'f4'), ('direction', 'f4')])
TYPE_NAME = 'wind_vector_type'
TYPE_NAMEC = 'wind_vectorunits_type'
dtypec = np.dtype([('speed', 'c', (8, )), ('direction', 'c', (8, ))])
missvals = np.empty(1, dtype)
missvals['direction'] = 1.e20
missvals['speed'] = -999.
windunits = np.zeros(1, dtypec)
windunits['speed'] = stringtoarr('m/s',\
        dtypec.fields['speed'][0].itemsize)
windunits['direction'] = stringtoarr('degrees',\
        dtypec.fields['direction'][0].itemsize)


class VariablesTestCase(unittest.TestCase):
    def setUp(self):
        self.file = FILE_NAME
        f = Dataset(self.file, 'w')
        d = f.createDimension(DIM_NAME, None)
        g = f.createGroup(GROUP_NAME)
        wind_vector_type = f.createCompoundType(dtype, TYPE_NAME)
        wind_vectorunits_type = f.createCompoundType(dtypec, TYPE_NAMEC)
        v = f.createVariable(VAR_NAME, wind_vector_type, DIM_NAME)
        vv = g.createVariable(VAR_NAME2, wind_vector_type, DIM_NAME)
        v.missing_values = missvals
def create_netcdf(
    a_netcdf_filename, a_lat_points, a_lon_points, a_nb_levels, a_celerity_arr, a_u_arr, a_v_arr, a_time, a_loc_names
):
    """
    dimensions:
      altitude = 401;
      profile  = 1 ;

    variables:
       float altitude(altitude) ;
         altitude:long_name = "height above mean sea level" ;
         altitude:units = "km" ;
         altitude:positive = "up" ; 

       double time(profile);
         time:long_name = "time" ;
         time:units = "days since 1970-01-01 00:00:00" ;
    
       string loc_name(profile) ;
        loc_name:units = "-" ;
        loc_name:long_name = "Location name" ;

       float lon(profile);
         lon:long_name = "longitude" ;
         lon:units = "degrees_east" ;

       float lat(profile);
         lat:long_name = "latitude" ;
         lat:units = "degrees_north" ;

       float celerity(profile, altitude) ;
         celerity:long_name = "celerity" ;
         celerity:units = "m s**-1" ;
         celerity:coordinates = "time lon lat altitude" ;

       float u(profile, altitude) ;
         u:long_name = "U velocity" ;
         celerity:units = "m s**-1" ;
         celerity:coordinates = "time lon lat altitude" ;

       float v(profile, altitude) ;
         u:long_name = "V velocity" ;
         celerity:units = "m s**-1" ;
         celerity:coordinates = "time lon lat altitude" ;
   
       attributes:
          :CF\:featureType = "profile";
 

    """
    print ("In create_netcdf %s" % (a_netcdf_filename))

    conf = Conf.get_instance()

    netcdf_format = conf.get("NETCDF", "produced_format", "NETCDF3_CLASSIC")

    # create file
    dataset = Dataset(a_netcdf_filename, "w", format=netcdf_format)

    # create dimension
    dataset.createDimension("altitude", a_nb_levels)
    dataset.createDimension("profile", len(a_lat_points))
    loc_name_len = dataset.createDimension("loc_name_len", 5)

    # create basic variables
    the_time = dataset.createVariable("time", "f8", ("profile"))
    lat = dataset.createVariable("latitude", "f4", ("profile"))
    lon = dataset.createVariable("longitude", "f4", ("profile"))
    altitudes = dataset.createVariable("altitude", "f4", ("altitude"))

    # create loc_name
    # In netcdf4 it would be
    # loc_names  = dataset.createVariable('loc_name', str,('profile'))
    if netcdf_format == "NETCDF3_CLASSIC":
        loc_names = dataset.createVariable("loc_name", "c", ("profile", "loc_name_len"))
    else:
        loc_names = dataset.createVariable("loc_name", str, ("profile"))

    # create param variables
    # u and v wind components
    u = dataset.createVariable("u", "f4", ("profile", "altitude"))
    v = dataset.createVariable("v", "f4", ("profile", "altitude"))
    # celerity
    c = dataset.createVariable("c", "f4", ("profile", "altitude"))

    # dataset.sync()

    # add attributes
    dataset.description = "CTBTO Infrasound wind profiles"
    dataset.history = "Created " + time.ctime(time.time()) + " by infra-profile-generator-v1.2.2"
    dataset.source = "infra-profile-generator-v1.2.2"
    dataset.version = "infrasound profile v1.0-20090801"
    # dataset.station     = 'IS42'
    lat.units = "degrees north"
    lat.long_name = "Latitude"
    lon.units = "degrees east"
    lon.long_name = "Longitude"
    altitudes.units = "m"
    altitudes.long_name = "Altitude"
    loc_names.units = "-"
    loc_names.long_name = "Location name"
    the_time.units = "hours since 1970-01-01 00:00:00.0"
    the_time.calendar = "gregorian"
    the_time.long_name = "Time"
    # param attributes
    u.units = "m s**-1"
    u.long_name = "U velocity"
    v.units = "m s**-1"
    v.long_name = "V velocity"
    c.units = "m s**-1"
    c.long_name = "Celerity"

    # create altitude
    alts = numpy.arange(0, 500 * a_nb_levels, 500)

    altitudes[:] = alts

    # add lat,lon
    lat[:] = a_lat_points
    lon[:] = a_lon_points
    # not used for the moment

    print ("a_loc_names %s\n" % (a_loc_names))

    if netcdf_format == "NETCDF3_CLASSIC":
        # NETCDF3 CLASSIC doesn't know about str
        cpt = 0
        for name in a_loc_names:
            loc_names[cpt] = stringtoarr(name, len(loc_name_len))
            cpt += 1
    else:
        # NETCDF4
        cpt = 0
        for name in a_loc_names:
            loc_names[cpt] = name
            cpt += 1

    # add time
    dt = date2num(a_time, "days since 1970-01-01 00:00:00", calendar="gregorian")

    # create the time array
    data_time = numpy.repeat(dt, len(a_lat_points))

    the_time[:] = data_time
    dataset.sync()

    c[:] = a_celerity_arr[:]
    u[:] = a_u_arr[:]
    v[:] = a_v_arr[:]

    dataset.sync()

    dataset.close()

    return 0
Exemple #43
0
    def load_into_netcdf(self):
        """Load the grib files into the netCDF file that has been setup

        """

        if not hasattr(self, 'ncwriter'):
            self.setup_netcdf()

        self.logger.info('Making netCDF file %s' % self.ncfilename)
        field_dict = {}
        relevant_df = None
        for nc_field, grib_f in self.grib_vars:
            field, vertical_layer = grib_f.split(',')
            if vertical_layer is not '':
                field_dict[(field, vertical_layer)] = nc_field
                if relevant_df is None:
                    relevant_df = self.index_df[
                        (self.index_df['field'] == field) &
                        (self.index_df['vertical_layer'] == vertical_layer)]
                else:
                    relevant_df = relevant_df.append(self.index_df[
                        (self.index_df['field'] == field) &
                        (self.index_df['vertical_layer'] == vertical_layer)],
                                                     ignore_index=True)
            else:
                field_dict[field] = nc_field
                if relevant_df is None:
                    relevant_df = self.index_df[
                        (self.index_df['field'] == field)]
                else:
                    relevant_df = relevant_df.append(self.index_df[
                        (self.index_df['field'] == field)], ignore_index=True)

        relevant_df.set_index('filename', inplace=True)
        times = []
        levels = []
        for filename in sorted(relevant_df.index.unique()):
            try:
                grbs = pygrib.open(os.path.join(self.grib_path, filename))
            except IOError:
                try:
                    grbs = pygrib.open(os.path.join(self.grib_path,
                                                    filename+'.grib2'))
                except IOError:
                    continue

            for filename, series in relevant_df.loc[filename].iterrows():
                try:
                    grb = grbs[series.grib_level]
                except IOError:
                    continue

                thetime = grb.validDate
                if thetime not in times:
                    timed = len(times)
                    times.append(thetime)
                    self.ncwriter.set_variable('Times', nc4.stringtoarr(
                        thetime.strftime('%Y-%m-%d_%H:%M:%S'), 19), timed)
                else:
                    timed = times.index(thetime)

                if self.vertical:
                    thelevel = grb.level
                    if thelevel not in levels:
                        leveld = len(levels)
                        levels.append(thelevel)
                    else:
                        leveld = levels.index(thelevel)

                    nc_field = field_dict[series.field].upper()
                else:
                    nc_field = field_dict[(series.field,
                                           series.vertical_layer)].upper()

                if not self.ncwriter.check_variable(nc_field):
                    self.ncwriter.add_variable(nc_field, description=grb.name,
                                               units=grb.units,
                                               vertical=self.vertical)

                if not self.vertical:
                    self.ncwriter.set_variable(
                        nc_field, grb.values[self.min_lat:self.max_lat,
                                             self.min_lon:self.max_lon],
                        timed)
                else:
                    ivals = grb.values[self.min_lat:self.max_lat,
                                       self.min_lon:self.max_lon]
                    try:
                        self.ncwriter.set_variable(nc_field, ivals,
                                                   [timed, leveld])

                    except RuntimeError:
                        self.logger.debug(
                            'Error with leveld = %s and var = %s' %
                            (leveld, grb.cfName))
                    else:
                        if self.level == 'wrfprs':
                            if not self.ncwriter.check_variable('P'):
                                self.nc_writer.add_variable('P', units='mb',
                                                            vertical=True)

                            self.ncwriter.set_variable(
                                'P', (np.ones(ivals.shape) * grb.level),
                                [timed, leveld])
        self.ncwriter.close()
        return os.path.join(self.netcdf_path, self.ncfilename)
# now that wind_data_t is defined, create the station data type.
station_data_t = f.createCompoundType(statdtype,'station_data')



statdat = f.createVariable('station_obs', station_data_t, ('station',))
# create a numpy structured array, assign data to it.
data = numpy.empty(2,station_data_t)
data['latitude'] = 40.
data['longitude'] = -105.
data['surface_wind']['speed'] = 12.5
data['surface_wind']['direction'] = 270
data['temp_sounding'] = (280.3,272.,270.,269.,266.,258.,254.1,250.,245.5,240.)
data['press_sounding'] = range(800,300,-50)

data['location_name'][0] = stringtoarr('Boulder, Colorado, USA',NUMCHARS)

print('data=',data)







# x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', 'f8'), ('y', 'i8')])
# x = np.array([(1.0, 'ba'), (3.0, 'ab')], dtype=[('x', 'f8'), ('y', 'S1',2)])

# x = np.array([(1.0, 'ba'), (3.0, 'ab')], dtype=np.dtype({'names':['x','y'], 'formats':['f8',('S1',2)]}))

x = np.array([(1.0, 'ba'), (3.0, 'ab')], dtype=np.dtype({'names':['x','y'], 'formats':['f8','S2']}))
Exemple #45
0
# ----------------------------------------------------------------------
# Get coordinates of points from ExodusII file.
exodus = netCDF4.Dataset(filenameExodus, 'a')
points = exodus.variables['coord'][:].transpose()
cellSizeDB = getCellSizeDB(points)
cellSizeFn = getCellSizeFn(points)

# Add cell size info to ExodusII file
if not 'num_nod_var' in exodus.dimensions.keys():
    exodus.createDimension('num_nod_var', 2)

    name_nod_var = exodus.createVariable('name_nod_var', 'S1', (
        'num_nod_var',
        'len_string',
    ))
    name_nod_var[0, :] = netCDF4.stringtoarr("cell_size_db", 33)
    name_nod_var[1, :] = netCDF4.stringtoarr("cell_size_fn", 33)

    vals_nod_var = exodus.createVariable('vals_nod_var', numpy.float64, (
        'time_step',
        'num_nod_var',
        'num_nodes',
    ))

time_whole = exodus.variables['time_whole']
time_whole[0] = 0.0
vals_nod_var = exodus.variables['vals_nod_var']
vals_nod_var[0, 0, :] = cellSizeDB.transpose()
vals_nod_var[0, 1, :] = cellSizeFn.transpose()

exodus.close()
def create_netcdf(a_netcdf_filename, a_lat_points, a_lon_points, a_nb_levels, a_celerity_arr, a_u_arr, a_v_arr, a_time, a_loc_names):
    """
    dimensions:
      altitude = 401;
      profile  = 1 ;

    variables:
       float altitude(altitude) ;
         altitude:long_name = "height above mean sea level" ;
         altitude:units = "km" ;
         altitude:positive = "up" ; 

       double time(profile);
         time:long_name = "time" ;
         time:units = "days since 1970-01-01 00:00:00" ;
    
       string loc_name(profile) ;
        loc_name:units = "-" ;
        loc_name:long_name = "Location name" ;

       float lon(profile);
         lon:long_name = "longitude" ;
         lon:units = "degrees_east" ;

       float lat(profile);
         lat:long_name = "latitude" ;
         lat:units = "degrees_north" ;

       float celerity(profile, altitude) ;
         celerity:long_name = "celerity" ;
         celerity:units = "m s**-1" ;
         celerity:coordinates = "time lon lat altitude" ;

       float u(profile, altitude) ;
         u:long_name = "U velocity" ;
         u:units = "m s**-1" ;
         u:coordinates = "time lon lat altitude" ;

       float v(profile, altitude) ;
         u:long_name = "V velocity" ;
         v:units = "m s**-1" ;
         v:coordinates = "time lon lat altitude" ;
   
       attributes:
          :CF\:featureType = "profile";
 

    """
    print("In create_netcdf %s" %(a_netcdf_filename))
    
    conf = Conf.get_instance()
    
    netcdf_format  = conf.get('NETCDF', 'produced_format', 'NETCDF3_CLASSIC')
    
    #create file
    dataset = Dataset(a_netcdf_filename, 'w', format=netcdf_format)
    
    #create dimension
    dataset.createDimension('altitude', a_nb_levels)
    dataset.createDimension('profile', len(a_lat_points))
    loc_name_len = dataset.createDimension('loc_name_len', 5)

    #create basic variables
    the_time  = dataset.createVariable('time',      'f8', ('profile'))
    lat       = dataset.createVariable('latitude',  'f4', ('profile'))
    lon       = dataset.createVariable('longitude', 'f4', ('profile'))
    altitudes = dataset.createVariable('altitude',  'f4', ('altitude'))

    # create loc_name
    # In netcdf4 it would be 
    #loc_names  = dataset.createVariable('loc_name', str,('profile'))
    if netcdf_format == 'NETCDF3_CLASSIC':
        loc_names  = dataset.createVariable('loc_name', 'c', ('profile','loc_name_len') )
    else:
        loc_names  = dataset.createVariable('loc_name', str, ('profile') )
 
 
    # create param variables
    # u and v wind components
    u         = dataset.createVariable('u',   'f4', ('profile', 'altitude'))
    v         = dataset.createVariable('v',   'f4', ('profile', 'altitude'))
    # celerity 
    c         = dataset.createVariable('c',    'f4', ('profile','altitude'))

    #dataset.sync()

    # add attributes
    dataset.description = 'CTBTO Infrasound wind profiles'
    dataset.history     = 'Created ' + time.ctime(time.time()) + ' by infra-profile-generator-v1.2.2'
    dataset.source      = 'infra-profile-generator-v1.2.2'
    dataset.version     = 'infrasound profile v1.0-20090801'
    #dataset.station     = 'IS42'
    lat.units           = 'degrees north'
    lat.long_name       = 'Latitude'
    lon.units           = 'degrees east'
    lon.long_name       = 'Longitude'
    altitudes.units     = 'm'
    altitudes.long_name = 'Altitude'
    loc_names.units     = '-'
    loc_names.long_name = 'Location name'
    the_time.units      = 'hours since 1970-01-01 00:00:00.0'
    the_time.calendar   = 'gregorian'
    the_time.long_name  = 'Time'
    # param attributes
    u.units             = 'm s**-1'
    u.long_name         = 'U velocity'
    v.units             = 'm s**-1'
    v.long_name         = 'V velocity'
    c.units             = 'm s**-1'
    c.long_name         = 'Celerity'

    # create altitude
    alts = numpy.arange(0, 500 * a_nb_levels, 500)

    altitudes[:] = alts

    # add lat,lon
    lat[:]    = a_lat_points
    lon[:]    = a_lon_points
    #not used for the moment
    
    print("a_loc_names %s\n" % (a_loc_names))
    
    
    if netcdf_format == 'NETCDF3_CLASSIC':
        # NETCDF3 CLASSIC doesn't know about str
        cpt = 0
        for name in a_loc_names:
            loc_names[cpt] = stringtoarr(name,len(loc_name_len))
            cpt += 1
    else:
        # NETCDF4
        cpt = 0
        for name in a_loc_names:
            loc_names[cpt] = name
            cpt += 1
    
    #add time
    dt  = date2num(a_time, "days since 1970-01-01 00:00:00", calendar = 'gregorian') 
    
    #create the time array
    data_time = numpy.repeat(dt, len(a_lat_points) )

    the_time[:] = data_time
    dataset.sync()

    c[:] = a_celerity_arr[:]
    u[:] = a_u_arr[:]
    v[:] = a_v_arr[:]

    dataset.sync()

    dataset.close()

    return 0

# ----------------------------------------------------------------------
# Get coordinates of points from ExodusII file.
exodus = netCDF4.Dataset(filenameExodus, 'a')
points = exodus.variables['coord'][:].transpose()
cellSizeDB = getCellSizeDB(points)
cellSizeFn = getCellSizeFn(points)

# Add cell size info to ExodusII file
if not 'num_nod_var' in exodus.dimensions.keys():
    exodus.createDimension('num_nod_var', 2)

    name_nod_var = exodus.createVariable('name_nod_var', 'S1',
                                       ('num_nod_var', 'len_string',))
    name_nod_var[0,:] = netCDF4.stringtoarr("cell_size_db", 33)
    name_nod_var[1,:] = netCDF4.stringtoarr("cell_size_fn", 33)
    
    vals_nod_var = exodus.createVariable('vals_nod_var', numpy.float64,
                                       ('time_step', 'num_nod_var', 'num_nodes',))


time_whole = exodus.variables['time_whole']
time_whole[0] = 0.0
vals_nod_var = exodus.variables['vals_nod_var']
vals_nod_var[0,0,:] = cellSizeDB.transpose()
vals_nod_var[0,1,:] = cellSizeFn.transpose()

exodus.close()

parser.add_option("-n", "--number", dest="number", type='int', help="test variant to set up, 1-5", metavar="NUMBER")
options, args = parser.parse_args()
if not options.filename:
   options.filename = 'landice_grid.nc'
   print 'No file specified.  Attempting to use landice_grid.nc'

if not options.afile:
   sys.exit("Error: A restart file from test A1 is required to set up this test.  Specify with -a")

# copy the restart file to be the new input file
shutil.copyfile(options.afile, options.filename)

# Open the file, get needed dimensions
gridfile = NetCDFFile(options.filename,'r+')
StrLen = len(gridfile.dimensions['StrLen'])
gridfile.variables['xtime'][0,:] = netCDF4.stringtoarr('0000-01-01_00:00:00'.ljust(StrLen), StrLen)
gridfile.variables['simulationStartTime'][:] = netCDF4.stringtoarr('0000-01-01_00:00:00'.ljust(StrLen), StrLen)

b_moulin = {}  # empty dictionary

b_moulin[1]=((0,59000,8000,90.0),)


b_moulin[2]= ((0,30000,3000,9.0),
                (1,8000,4000,9.0),
                (2,60000,7000,9.0),
                (3,35000,9000,9.0),
                (4,46000,10000,9.0),
                (5,26000,11000,9.0),
                (6,7000,12000,9.0),
                (7,5000,14000,9.0),
def hobo_to_netcdf(input_file,
                   output,
                   config_file=None,
                   json_file=None,
                   overwrite=False):
    """
    Converte arquivos hobo (convertidos para csv) para formato netcdf

    :param input_file:
    :param output:
    :param config_file:
    :param json_file:
    :param overwrite:
    :return:
    """

    logger.debug('Input file: {}'.format(input_file))
    logger.debug('Output: {}'.format(output))
    logger.debug('Config file: {}'.format(config_file))
    logger.debug('NetCDF file json: {}'.format(json_file))
    logger.debug('Overwrite flag: {}'.format(overwrite))

    # Se output for um diretorio, gera um nome automatico
    if os.path.isdir(output):
        file_name = os.path.splitext(os.path.basename(input_file))[0] + '.nc'
        output_file = os.path.join(output, file_name)
    else:
        output_file = output

    # Verifica se arquivo ja existe e gera erro caso flag de overwrite nao for setado
    if os.path.exists(output_file) and overwrite is False:
        raise FileExistsError('File already exist. Use -ow flag to overwrite')

    # Abre arquivo e extrai informacoes
    title, serial_number, header, details = hobo.get_info(input_file)

    #if not details:
    #    # Erro - nao tem nenhuma informacao sobre esse titulo de plot
    #    print('ERRO: arquivo sem detalhes. Arquivo deve ser exportado com detalhes para permitir verificacao')
    #    exit(ERROR_CODE)

    # Le arquivo .csv com configuracoes e informacoes adicionais das estacoes

    if os.path.exists(config_file) is False:
        raise FileNotFoundError('Config file not found {}'.format(config_file))

    cfgs = pd.read_csv(config_file)

    row = cfgs.loc[
        cfgs['Plot Title'] ==
        title]  # Procura por plot tittle igual do arquivo de entrada
    if row.empty:
        # Erro - nao tem nenhuma informacao sobre esse titulo de plot
        raise AttributeError(
            'Plot title ({}) not found in config file'.format(title))

    # Extrai informacoes importantes sobre a estacao do arquivo de configuracao
    station_id = row.iloc[0]['Codigo']
    station_sn = row.iloc[0]['Numero de serie']
    station_latitude = row.iloc[0]['Latitude [graus]']
    station_longitude = row.iloc[0]['Longitude [graus]']
    station_altitude = row.iloc[0]['Altitude [m]']
    station_datetime_col = row.iloc[0]['Coluna data/hora']
    station_gmt = int(row.iloc[0]['GMT'])
    station_uuid = row.iloc[0]['UUID']
    #station_time_resolution = row.iloc[0]['Intervalo medidas (ISO8601)']
    station_variable_col = row.iloc[0]['Coluna variavel']
    # Encontra coluna datetime de formar flexivel (procura por nome parecido)
    datetime_col = None
    for col_name in header:
        found = False
        if util.find_matches(col_name, station_datetime_col):
            datetime_col = col_name
            found = True
            break
    if not datetime_col:
        raise AttributeError('col ({}) not found'.format(station_datetime_col))

    # Encontra timezone e verifica se esta dentro do valor esperado
    gmt_hour_offset, gmt_minute_offset = util.get_gmt_offset(datetime_col)
    if station_gmt != gmt_hour_offset:
        print(
            'Warning: found timezone (GMT{}) different from config (GMT{}). Using GMT{}.'
            .format(gmt_hour_offset, station_gmt, gmt_hour_offset))

    # Encontra nome de coluna de dados por semelhança
    variable_col = None
    for col_name in header:
        found = False
        if util.find_matches(col_name, station_variable_col):
            variable_col = col_name
            found = True
            break
    if not variable_col:
        raise AttributeError('col ({}) not found'.format(station_variable_col))

    # Verifica resolucao de tempo se esta dentro da esperada
    #details = hobo.process_details(details)

    # TODO: Encontrar o campo no dicionario sem ser case sensitive
    #serie = details['Details']['Series: ' + variable_col]
    #details = details['Details']
    #serie = None
    #for k, v in details.items():
    #    found = False
    #    if util.find_matches(k, ['Series:', station_variable_col]):
    #        serie = v
    #        found = True
    #        break
    #if not serie:
    #    print('ERRO: nao foi encontrada informacao da serie nos detalhes')
    #    exit(ERROR_CODE)

    #filter_param = serie['Filter Parameters']
    #filter_type = filter_param['Filter Type']
    #filter_interval = filter_param['Filter Interval']

    # TODO: isso nao esta bom, os formatos utilizados nao sao flexiveis e iguais. Automatizar mehlor isso no futuro
    #if filter_type != 'Sum of event values':
    #    print('ERRO: serie com filtro inesperado: {}'.format(filter_type))
    #    exit(ERROR_CODE)

    #if filter_interval == '5 Minutes':
    #    if station_time_resolution != 'PT5M':
    #        print('AVISO: serie com intervalo {}, esperado era {}'.format(filter_interval, station_time_resolution))
    #        station_time_resolution = 'PT5M'
    #elif filter_interval == '1 Day':
    #    if station_time_resolution != 'PT1D':
    #        print('AVISO: serie com intervalo {}, esperado era {}'.format(filter_interval, station_time_resolution))
    #        station_time_resolution = 'PT1D'
    #else:
    #    print('ERRO: resolucao temporal ainda nao implenteada: {}'.format(filter_interval))
    #    exit(ERROR_CODE)

    #if station_time_resolution == 'PT5M':
    #    if filter_interval != '5 Minutes':
    #        print('ERRO: serie com intervalo {}, esperado era {}'.format(filter_interval, station_time_resolution))
    #        exit(ERROR_CODE)
    #elif station_time_resolution == 'PT1D':
    #    if filter_interval != '1 Day':
    #        print('ERRO: serie com intervalo {}, esperado era {}'.format(filter_interval, station_time_resolution))
    #        exit(ERROR_CODE)
    #else:
    #    print('Erro: resolucao temporal ainda nao implenteada: {}'.format(station_time_resolution))
    #    exit(ERROR_CODE)

    # Extrai dados da aquisicao
    table = hobo.get_data(input_file)

    # Separa precipitacao
    precipitation = table[variable_col]
    precipitation.index = table[datetime_col]
    precipitation = precipitation.dropna()  # Deleta os NaN

    # Processa dados de data/hora
    #date_str = table[datetime_col]
    date_str = precipitation.index.to_series()
    date_time = pd.to_datetime(date_str, format='%m/%d/%y %I:%M:%S %p')
    #gmt_hour_offset = station_gmt
    #gmt_minute_offset = 0
    tzinfo = timezone(
        timedelta(hours=gmt_hour_offset, minutes=gmt_minute_offset))
    # gera os indices com informacao de fuso horarios incluido
    index = date_time.dt.tz_localize(tzinfo)
    # converte para UTC
    index_utc = index.dt.tz_convert('UTC')

    # Identifica primeiro e ultimo evento de aquisicao
    first_day_str = cf.datetime2str(index_utc.iloc[0])
    last_day_str = cf.datetime2str(index_utc.iloc[-1])
    #logger.debug("Inicio e fim de medidas em UTC: {} - {}".format(first_day_str, last_day_str))

    # Gera nome do arquivo de saida se ainda nao foi definido
    if output_file is None:
        file_name = '{}_{}_{}.nc'.format(station_id, first_day_str,
                                         last_day_str)
    else:
        file_name = output_file

    nc_input_file = file_name

    # Adiciona pasta de saida no path se definido
    #if output_folder is None:
    #    nc_input_file = file_name
    #else:
    #    nc_input_file = os.path.join(output_folder, file_name)

    # Cria arquivo netCDF
    nc_file = NetCDFJSON()
    nc_file.write(nc_input_file)

    # Le arquivo json com configuracao da estrutura do netcdf
    if os.path.exists(json_file) is False:
        raise FileNotFoundError(
            'NetCDF json file not found {}'.format(json_file))

    nc_file.load_json(json_file)
    nc_file.create_from_json()

    # pega handlers para dimensoes
    timeDim = nc_file.get_dimension('time')
    nameDim = nc_file.get_dimension('name_strlen')
    # pega handlers para variaveis
    time = nc_file.get_variable('time')
    #time_bnds = nc_file.get_variable('time_bnds')
    lat = nc_file.get_variable('lat')
    lon = nc_file.get_variable('lon')
    alt = nc_file.get_variable('alt')
    station_name = nc_file.get_variable('station_name')

    np_time = index_utc.to_numpy()
    nc_time = date2num(np_time, units=time.units, calendar=time.calendar)
    # A precipitacao eh acumulada no tempo. O CF estabelece que neste tipo de caso
    # eh necessario informar as fronteiras do tempo no qual eh feito o acumulo. No caso de ser a medida
    # acumulada nos ultimos 5 minutos as fronteiras sao o tempo atual - 5 min, e o tempo atual

    #nc_superior_bound_time = nc_time
    #if station_time_resolution == 'PT5M':
    #    delta = timedelta(minutes=5)
    #elif station_time_resolution == 'PT1D':
    #    delta = timedelta(days=1)nc_
    #else:
    #    print('Erro. Intervalo de tempo ainda nao implementada {}'.format(station_time_resolution))
    #    exit(ERROR_CODE)

    #delta = cf.period_iso8601_to_relativetime(station_time_resolution)
    #inferior_bound_time = np_time - delta

    #nc_inferior_bound_time = date2num(inferior_bound_time, units=time.units, calendar=time.calendar)
    # combina bound inferior com bound superior
    #nc_time_bnds = np.stack((nc_inferior_bound_time, nc_superior_bound_time), axis=-1)

    # Seta variaveis
    lat[:] = np.array([station_latitude])
    lon[:] = np.array([station_longitude])
    alt[:] = np.array([station_altitude])
    time[:] = nc_time
    #time_bnds[:] = nc_time_bnds
    station_name[:] = stringtoarr(station_id, nameDim.size)
    # Insere informacoes sobre a precipitacao
    nc_var = nc_file.get_variable('precipitation')

    FILL_VALUE = nc_var._FillValue
    #data_var = table[variable_col]
    data_var = precipitation
    data_var = data_var.replace(np.nan, FILL_VALUE)
    data_var = data_var.to_numpy()
    nc_var[:] = data_var
    data_len = len(data_var)

    # Faz processamento para metadados
    # Processa os dados ja convertidos para facilitar reutilizar o codigo depois

    # min max lat e lon
    min_lat = np.amin(lat)
    max_lat = np.amax(lat)
    min_lon = np.amin(lon)
    max_lon = np.amax(lon)

    # time duration
    min_time = num2date(np.amin(time),
                        units=time.units,
                        calendar=time.calendar)
    max_time = num2date(np.amax(time),
                        units=time.units,
                        calendar=time.calendar)
    min_time_str = cf.datetime2str(min_time)
    max_time_str = cf.datetime2str(max_time)
    time_delta = max_time - min_time
    time_delta_str = cf.timedelta2str(time_delta)
    # time resolution
    #time_resolution_str = station_time_resolution

    # Atualiza metadados
    gbd_index = nc_file.get_group('gbd_index')
    gbd_index.geospatial_lat_min = min_lat
    gbd_index.geospatial_lat_max = max_lat
    gbd_index.geospatial_lon_min = min_lon
    gbd_index.geospatial_lon_max = max_lon
    gbd_index.time_coverage_start = min_time_str
    gbd_index.time_coverage_end = max_time_str
    gbd_index.time_coverage_duration = time_delta_str
    #nc_file.rootgrp.time_coverage_resolution = time_resolution_str
    uuid = '{}/{}_{}_{}.nc'.format(station_uuid, station_id, first_day_str,
                                   last_day_str)
    gbd_index.uuid = uuid
    gbd_index.date_created = cf.datetime2str(datetime.now(timezone.utc))
    gbd_index.history = '({}) Created with {}'.format(gbd_index.date_created,
                                                      TOOL_NAME)
    gbd_index.keywords = [nc_var.standard_name, nc_var.units, station_id]
    gbd_index.key_variables = 'precipitation'

    # Gera dados do grupo gbd_index
    #nc_file.rootgrp.createGroup('gbd_index')
    #nc_file.rootgrp.gbd_index.uuid = uuid

    nc_file.close()

    # Imprime resultado
    print('Input file: {}'.format(input_file))
    print('Output file: {}'.format(nc_input_file))
    print('Latitude Min/Max: {} / {}'.format(min_lat, max_lat))
    print('Longitude Min/Max: {} / {}'.format(min_lon, max_lon))
    print('Datetime (UTC) Min/Max: {} / {}'.format(min_time_str, max_time_str))
    print('Coverage duration: {}'.format(time_delta_str))
    print('Data length: {}'.format(data_len))

    #print('Resolucao: {}'.format(time_resolution_str))

    # Checa se arquivo atende padrao CF utilizando o cfchecks
    if RUN_CFCHECKS:
        print('\nRunning cfchecks')
        sys.argv = ['', nc_input_file]
        sys.exit(main())
# test compound attributes.

FILE_NAME = tempfile.mktemp(".nc")
DIM_NAME = 'time'
VAR_NAME = 'wind'
VAR_NAME2 = 'forecast_wind'
GROUP_NAME = 'forecasts'
dtype=np.dtype([('speed', 'f4'), ('direction', 'f4')])
TYPE_NAME = 'wind_vector_type'
TYPE_NAMEC = 'wind_vectorunits_type'
dtypec=np.dtype([('speed', 'c',(8,)), ('direction', 'c',(8,))])
missvals = np.empty(1,dtype)
missvals['direction']=1.e20
missvals['speed']=-999.
windunits = np.zeros(1,dtypec)
windunits['speed'] = stringtoarr('m/s',\
        dtypec.fields['speed'][0].itemsize)
windunits['direction'] = stringtoarr('degrees',\
        dtypec.fields['direction'][0].itemsize)

class VariablesTestCase(unittest.TestCase):

    def setUp(self):
        self.file = FILE_NAME
        f  = Dataset(self.file, 'w')
        d = f.createDimension(DIM_NAME,None)
        g = f.createGroup(GROUP_NAME)
        wind_vector_type = f.createCompoundType(dtype, TYPE_NAME)
        wind_vectorunits_type = f.createCompoundType(dtypec, TYPE_NAMEC)
        v = f.createVariable(VAR_NAME,wind_vector_type, DIM_NAME)
        vv = g.createVariable(VAR_NAME2,wind_vector_type,DIM_NAME)
        v.missing_values = missvals
def write_exodus_file(filename, cells, vertices, shape="SHELL4"):
    """
    Write Exodus-II file compatible with CUBIT.

    cells is a 0-based array (ncells, ncorners).

    vertices is (nvertices, dim).

    All cells are placed in a single block.

    Requires netCDF4 module.
    """
    import numpy
    from netCDF4 import Dataset

    len_string = 33

    root = Dataset(filename, 'w', format='NETCDF3_CLASSIC')

    # Set global attributes
    root.api_version = 4.98
    root.version = 4.98
    root.floating_point_word_size = 8
    root.file_size = 0
    root.title = "cubit"

    # Setup dimensions

    # Generic information
    root.createDimension('len_string', len_string)
    root.createDimension('len_line', 81)
    root.createDimension('four', 4)
    root.createDimension('num_qa_rec', 1)
    root.createDimension('time_step', None)

    # Mesh specific information
    (ncells, ncorners) = cells.shape
    (nvertices, dim) = vertices.shape
    root.createDimension('num_dim', dim)
    root.createDimension('num_el_blk', 1)
    root.createDimension('num_nod_per_el1', ncorners)
    root.createDimension('num_att_in_blk1', 1)

    root.createDimension('num_nodes', nvertices)
    root.createDimension('num_elem', ncells)
    root.createDimension('num_el_in_blk1', ncells)

    # Setup variables
    connect1 = root.createVariable('connect1', numpy.int32,
                                   ('num_el_in_blk1', 'num_nod_per_el1',))

    coord = root.createVariable('coord', numpy.float64,
                                ('num_dim', 'num_nodes',))
    
    time_whole = root.createVariable('time_whole', numpy.float64,
                                     ('time_step',))
    
    coor_names = root.createVariable('coor_names', 'S1',
                                     ('num_dim', 'len_string',))
    
    qa_records = root.createVariable('qa_records', 'S1',
                                     ('num_qa_rec', 'four', 'len_string',))
    
    eb_names = root.createVariable('eb_names', 'S1',
                                   ('num_el_blk', 'len_string',))

    elem_map = root.createVariable('elem_map', numpy.int32,
                                   ('num_elem',))

    eb_status = root.createVariable('eb_status', numpy.int32,
                                    ('num_el_blk',))

    eb_prop1 = root.createVariable('eb_prop1', numpy.int32,
                                   ('num_el_blk',))

    attrib1 = root.createVariable('attrib1', numpy.float64,
                                  ('num_el_in_blk1', 'num_att_in_blk1',))

    # Set variable values
    connect1[:] = 1+cells[:]
    connect1.elem_type = shape

    coord[:] = vertices.transpose()[:]

    from netCDF4 import stringtoarr
    if dim == 2:
        coor_names[0,:] = stringtoarr("x", len_string)
        coor_names[1,:] = stringtoarr("y", len_string)
    elif dim == 3:
        coor_names[0,:] = stringtoarr("x", len_string)
        coor_names[1,:] = stringtoarr("y", len_string)
        coor_names[2,:] = stringtoarr("z", len_string)


    qa_records[0,0,:] = stringtoarr("CUBIT", len_string)
    qa_records[0,1,:] = stringtoarr("11.0", len_string)
    qa_records[0,2,:] = stringtoarr("01/01/2000", len_string)
    qa_records[0,3,:] = stringtoarr("12:00:00", len_string)

    elem_map[:] = numpy.arange(1, ncells+1, dtype=numpy.int32)[:]

    eb_status[:] = numpy.ones( (1,), dtype=numpy.int32)[:]

    eb_prop1[:] = numpy.ones( (1,), dtype=numpy.int32)[:]
    eb_prop1.name = "ID"

    attrib1[:] = numpy.ones( (1, ncells), dtype=numpy.int32)[:]

    root.close()
        longitudes.units = 'degrees_east'

        times = rootgrp.createVariable('time', 'i4', ('time', ))
        times.standard_name = 'time'
        times.long_name = 'Time of measurement'
        times.units = 'seconds since 1970-01-01 00:00:00'

        air_temperatures = rootgrp.createVariable('air_temperature', 'f4',
                                                  ('time', ))
        air_temperatures.coordinates = 'lat lon'
        air_temperatures.standard_name = 'air_temperature'
        air_temperatures.long_name = 'Air temperature in degrees Celcius'
        air_temperatures.units = 'degrees Celcius'

        # set the values of the variables
        station_name[:] = netCDF4.stringtoarr('Penlee', 50)
        altitude[:] = [station_altitude]
        latitudes[:] = [station_lat]
        longitudes[:] = [station_lon]
        times[:] = timestamp
        air_temperatures[:] = temp

        rootgrp.close()


entries = (os.path.join(sourcefolder, fn) for fn in os.listdir(sourcefolder))
entries = ((os.stat(path), path) for path in entries)

# leave only regular files, insert creation date
entries = ((stat[ST_CTIME], path) for stat, path in entries
           if S_ISREG(stat[ST_MODE]))
Exemple #53
0
                  metavar="NUMBER")
options, args = parser.parse_args()
if not options.filename:
    options.filename = 'landice_grid.nc'
    print 'No file specified.  Attempting to use landice_grid.nc'

if not options.afile:
    sys.exit(
        "Error: A restart file from test B5 is required to set up this test.  Specify with -b"
    )

# copy the restart file to be the new input file
shutil.copyfile(options.afile, options.filename)

# Open the file, get needed dimensions
gridfile = NetCDFFile(options.filename, 'r+')
StrLen = len(gridfile.dimensions['StrLen'])
gridfile.variables['xtime'][0, :] = netCDF4.stringtoarr(
    '0000-01-01_00:00:00'.ljust(StrLen), StrLen)
gridfile.variables['simulationStartTime'][:] = netCDF4.stringtoarr(
    '0000-01-01_00:00:00'.ljust(StrLen), StrLen)

# modify melt inputs
gridfile.variables['externalWaterInput'][
    0, :] = gridfile.variables['externalWaterInput'][
        0, :] * 1.0e-12  # Make value at moulin locations tiny but positive
# value for basalMeltInput doesn't matter, because it will be overwritten in the code.
gridfile.close()

print 'Successfully added initial conditions to: ', options.filename
def WriteNCCF(FileName,Dates,Latitudes,Longitudes,ClimPoints,DataObject,DimObject,AttrObject,GlobAttrObject):
    ''' Sort out the date/times to write out and time bounds '''
    ''' Sort out clim bounds '''
    ''' Sort out lat and long bounds '''
    ''' Convert variables using the obtained scale_factor and add_offset: stored_var=int((var-offset)/scale) '''
    ''' Write to file, set up given dimensions, looping through all potential variables and their attributes, and then the provided dictionary of global attributes '''
    
    # Sort out date/times to write out
    print(Dates)
    TimPoints,TimBounds = MakeDaysSince(Dates['StYr'],Dates['StMon'],Dates['EdYr'],Dates['EdMon'])
    nTims = len(TimPoints)
	
    # Sort out clim bounds - paired strings
    ClimBounds = np.empty((12,2),dtype='|S10')
    for mm in range(12):
	ClimBounds[mm,0] = str(ClimPoints[0])+'-'+str(mm+1)+'-'+str(1)
	ClimBounds[mm,1] = str(ClimPoints[1])+'-'+str(mm+1)+'-'+str(MonthDays[mm])
		
    # Sort out LatBounds and LonBounds
    LatBounds = np.empty((len(Latitudes),2),dtype='float')
    LonBounds = np.empty((len(Longitudes),2),dtype='float')
	
    LatBounds[:,0] = Latitudes - ((Latitudes[1]-Latitudes[0])/2.)
    LatBounds[:,1] = Latitudes + ((Latitudes[1]-Latitudes[0])/2.)

    LonBounds[:,0] = Longitudes - ((Longitudes[1]-Longitudes[0])/2.)
    LonBounds[:,1] = Longitudes + ((Longitudes[1]-Longitudes[0])/2.)	
	
    #pdb.set_trace()
    
    # No need to convert float data using given scale_factor and add_offset to integers - done within writing program (packV = (V-offset)/scale
    # Not sure what this does to float precision though...
    # Change mdi into an integer -999 because these are stored as integers
    for vv in range(len(DataObject)):
        DataObject[vv][np.where(DataObject[vv] == OLDMDI)] = MDI

    # Create a new netCDF file - have tried zlib=True,least_significant_digit=3 (and 1) - no difference
    ncfw=Dataset(FileName,'w',format='NETCDF4_CLASSIC') # need to try NETCDF4 and also play with compression but test this first
    
    # Write out the global attributes
    if ('description' in GlobAttrObject):
        ncfw.description = GlobAttrObject['description']
	#print(GlobAttrObject['description'])
	
    if ('File_created' in GlobAttrObject):
        ncfw.File_created = GlobAttrObject['File_created']

    if ('Title' in GlobAttrObject):
        ncfw.Title = GlobAttrObject['Title']

    if ('Institution' in GlobAttrObject):
        ncfw.Institution = GlobAttrObject['Institution']

    if ('History' in GlobAttrObject):
        ncfw.History = GlobAttrObject['History']

    if ('Licence' in GlobAttrObject):
        ncfw.Licence = GlobAttrObject['Licence']

    if ('Project' in GlobAttrObject):
        ncfw.Project = GlobAttrObject['Project']

    if ('Processing_level' in GlobAttrObject):
        ncfw.Processing_level = GlobAttrObject['Processing_level']

    if ('Acknowledgement' in GlobAttrObject):
        ncfw.Acknowledgement = GlobAttrObject['Acknowledgement']

    if ('Source' in GlobAttrObject):
        ncfw.Source = GlobAttrObject['Source']

    if ('Comment' in GlobAttrObject):
        ncfw.Comment = GlobAttrObject['Comment']

    if ('References' in GlobAttrObject):
        ncfw.References = GlobAttrObject['References']

    if ('Creator_name' in GlobAttrObject):
        ncfw.Creator_name = GlobAttrObject['Creator_name']

    if ('Creator_email' in GlobAttrObject):
        ncfw.Creator_email = GlobAttrObject['Creator_email']

    if ('Version' in GlobAttrObject):
        ncfw.Version = GlobAttrObject['Version']

    if ('doi' in GlobAttrObject):
        ncfw.doi = GlobAttrObject['doi']

    if ('Conventions' in GlobAttrObject):
        ncfw.Conventions = GlobAttrObject['Conventions']

    if ('netcdf_type' in GlobAttrObject):
        ncfw.netcdf_type = GlobAttrObject['netcdf_type']
	
    # Loop through and set up the dimension names and quantities
    for vv in range(len(DimObject[0])):
        ncfw.createDimension(DimObject[0][vv],DimObject[1][vv])
	
    # Go through each dimension and set up the variable and attributes for that dimension if needed
    for vv in range(len(DimObject)-2): # ignore first two elements of the list but count all other dictionaries
        print(DimObject[vv+2]['var_name'])
	
	# NOt 100% sure this works in a loop with overwriting
	# initiate variable with name, type and dimensions
	MyVar = ncfw.createVariable(DimObject[vv+2]['var_name'],DimObject[vv+2]['var_type'],DimObject[vv+2]['var_dims'])
        
	# Apply any other attributes
        if ('standard_name' in DimObject[vv+2]):
	    MyVar.standard_name = DimObject[vv+2]['standard_name']
	    
        if ('long_name' in DimObject[vv+2]):
	    MyVar.long_name = DimObject[vv+2]['long_name']
	    
        if ('units' in DimObject[vv+2]):
	    MyVar.units = DimObject[vv+2]['units']
		   	 
        if ('axis' in DimObject[vv+2]):
	    MyVar.axis = DimObject[vv+2]['axis']

        if ('calendar' in DimObject[vv+2]):
	    MyVar.calendar = DimObject[vv+2]['calendar']

        if ('start_year' in DimObject[vv+2]):
	    MyVar.start_year = DimObject[vv+2]['start_year']

        if ('end_year' in DimObject[vv+2]):
	    MyVar.end_year = DimObject[vv+2]['end_year']

        if ('start_month' in DimObject[vv+2]):
	    MyVar.start_month = DimObject[vv+2]['start_month']

        if ('end_month' in DimObject[vv+2]):
	    MyVar.end_month = DimObject[vv+2]['end_month']

        if ('bounds' in DimObject[vv+2]):
	    MyVar.bounds = DimObject[vv+2]['bounds']

        if ('climatology' in DimObject[vv+2]):
	    MyVar.climatology = DimObject[vv+2]['climatology']

        if ('point_spacing' in DimObject[vv+2]):
	    MyVar.point_spacing = DimObject[vv+2]['point_spacing']
	
	# Provide the data to the variable
        if (DimObject[vv+2]['var_name'] == 'time'):
	    MyVar[:] = TimPoints

        if (DimObject[vv+2]['var_name'] == 'bounds_time'):
	    MyVar[:,:] = TimBounds

        if (DimObject[vv+2]['var_name'] == 'month'):
	    for mm in range(12):
	        MyVar[mm,:] = stringtoarr(MonthName[mm],10)

        if (DimObject[vv+2]['var_name'] == 'climbounds'):
	    for mm in range(12):
	        MyVar[mm,0,:] = stringtoarr(ClimBounds[mm,0],10)
	        MyVar[mm,1,:] = stringtoarr(ClimBounds[mm,1],10)

        if (DimObject[vv+2]['var_name'] == 'latitude'):
	    MyVar[:] = Latitudes

        if (DimObject[vv+2]['var_name'] == 'bounds_lat'):
	    MyVar[:,:] = LatBounds

        if (DimObject[vv+2]['var_name'] == 'longitude'):
	    MyVar[:] = Longitudes

        if (DimObject[vv+2]['var_name'] == 'bounds_lon'):
	    MyVar[:,:] = LonBounds

    # Go through each variable and set up the variable attributes
    for vv in range(len(AttrObject)): # ignore first two elements of the list but count all other dictionaries

        print(AttrObject[vv]['var_name'])

        # NOt 100% sure this works in a loop with overwriting
	# initiate variable with name, type and dimensions
	MyVar = ncfw.createVariable(AttrObject[vv]['var_name'],AttrObject[vv]['var_type'],AttrObject[vv]['var_dims'],fill_value = AttrObject[vv]['_FillValue'])
        
	# Apply any other attributes
        if ('standard_name' in AttrObject[vv]):
	    MyVar.standard_name = AttrObject[vv]['standard_name']
	    
        if ('long_name' in AttrObject[vv]):
	    MyVar.long_name = AttrObject[vv]['long_name']
	    
        if ('cell_methods' in AttrObject[vv]):
	    MyVar.cell_methods = AttrObject[vv]['cell_methods']
	    
        if ('comment' in AttrObject[vv]):
	    MyVar.comment = AttrObject[vv]['comment']
	    
        if ('units' in AttrObject[vv]):
	    MyVar.units = AttrObject[vv]['units']
		   	 
        if ('axis' in AttrObject[vv]):
	    MyVar.axis = AttrObject[vv]['axis']

        if ('add_offset' in AttrObject[vv]):
	    MyVar.add_offset = AttrObject[vv]['add_offset']

        if ('scale_factor' in AttrObject[vv]):
	    MyVar.scale_factor = AttrObject[vv]['scale_factor']

        if ('valid_min' in AttrObject[vv]):
	    MyVar.valid_min = AttrObject[vv]['valid_min']

        if ('valid_max' in AttrObject[vv]):
	    MyVar.valid_max = AttrObject[vv]['valid_max']

        if ('missing_value' in AttrObject[vv]):
	    MyVar.missing_value = AttrObject[vv]['missing_value']

#        if ('_FillValue' in AttrObject[vv]):
#	    MyVar._FillValue = AttrObject[vv]['_FillValue']

        if ('reference_period' in AttrObject[vv]):
	    MyVar.reference_period = AttrObject[vv]['reference_period']

        if ('ancillary_variables' in AttrObject[vv]):
	    MyVar.ancillary_variables = AttrObject[vv]['ancillary_variables']
	
	# Provide the data to the variable - depending on howmany dimensions there are
        if (len(AttrObject[vv]['var_dims']) == 1):
	    MyVar[:] = DataObject[vv]
	    
        if (len(AttrObject[vv]['var_dims']) == 2):
	    MyVar[:,:] = DataObject[vv]
	    
        if (len(AttrObject[vv]['var_dims']) == 3):
	    MyVar[:,:,:] = DataObject[vv]
	    
	    
    ncfw.close()
   
    return # WriteNCCF
    def __init__(self, nc_filename, sequences, letter_features_size, map_letter2features,
                 window_size=DEFAULT_WINDOW_SIZE, map_label2class=None, word_vectors=None):
        """

        nc_filename (str): A file to write the dataset in netCDF format
        sequences (list): A list of Sequence objects containing the data
        map_letter2features (dict): a map from letters to feature vectors
        map_label2class (dict): a map from label to class
        word_vectors (dict): a map from word to vector
        """

        print 'preparing Currennt dataset'
        self.nc_filename = nc_filename
        self.sequences = sequences
        self.letter_features_size = letter_features_size
        self.input_pattern_size = letter_features_size * (2 * window_size + 1)
        if word_vectors:
            self.input_pattern_size += get_word_vectors_size(word_vectors)
        self.map_letter2features = map_letter2features
        self.window_size = window_size

        nc_file = Dataset(nc_filename, 'w')

        # collect label information
        # if given a map (say, from training set), use it
        if map_label2class:
            self.map_label2class = map_label2class
            max_label_length = 0
            for label in self.map_label2class:
                max_label_length = max(max_label_length, len(label))
        # otherwise create a new map
        else:
            labels = set()
            max_label_length = 0
            for sequence in sequences:
                for word in sequence.words:
                    for diac in word.diacs:
                        labels.add(diac)
                        max_label_length = max(max_label_length, len(diac))
            labels.add(Word.WORD_BOUNDARY)  # word boundary label (same as word boundary symbol)
            max_label_length = max(max_label_length, len(Word.WORD_BOUNDARY))
            # create map from label (diacritic) to class (integer)
            map_label2class = dict()
            for label in labels:
                map_label2class[label] = len(map_label2class)  # TODO: make sure classes are 0-indexed
            self.map_label2class = map_label2class
        print 'label2class map:', self.map_label2class

        # create dimensions
        dim_num_seqs = nc_file.createDimension('numSeqs', len(sequences))
        num_timesteps = 0
        for sequence in sequences:
            num_timesteps += sequence.num_letters(count_word_boundary=self.INCLUDE_WORD_BOUNDARY)
        dim_num_timesteps = nc_file.createDimension('numTimesteps', num_timesteps)
        dim_input_pattern_size = nc_file.createDimension('inputPattSize', self.input_pattern_size)
        dim_max_seq_tag_length = nc_file.createDimension('maxSeqTagLength', self.MAX_SEQ_TAG_LENGTH)
        # optional dimensions
        dim_num_labels = nc_file.createDimension('numLabels', len(map_label2class))
        dim_max_label_length = nc_file.createDimension('maxLabelLength', max_label_length)
        dim_max_target_string_length = nc_file.createDimension('maxTargStringLength', self.MAX_TARGET_STRING_LENGTH)

        # create variables
        var_seq_tags = nc_file.createVariable('seqTags', 'S1', ('numSeqs', 'maxSeqTagLength'))
        var_seq_tags.longname = 'sequence tags'
        var_seq_lengths = nc_file.createVariable('seqLengths', 'i4', ('numSeqs'))
        var_seq_lengths.longname = 'sequence lengths'
        var_inputs = nc_file.createVariable('inputs', 'f4', ('numTimesteps', 'inputPattSize'))
        var_inputs.longname = 'inputs'
        var_target_classes = nc_file.createVariable('targetClasses', 'i4', ('numTimesteps'))
        var_target_classes.longname = 'target classes'
        # optional variables
        var_num_target_classes = nc_file.createVariable('numTargetClasses', 'i4')
        var_num_target_classes.longname = 'number of target classes'
        var_labels = nc_file.createVariable('labels', 'S1', ('numLabels', 'maxLabelLength'))
        var_labels.longname = 'target labels'
        var_target_strings = nc_file.createVariable('targetStrings', 'S1', ('numSeqs', 'maxTargStringLength'))
        var_target_strings.longname = 'target strings'

        # write data to variables
        print 'writing sequence tags'
        seq_tags = []
        for sequence in sequences:
            seq_tags.append(stringtoarr(sequence.seq_id, self.MAX_SEQ_TAG_LENGTH))
        var_seq_tags[:] = seq_tags
        print 'writing sequence lengths'
        seq_lengths = []
        for sequence in sequences:
            seq_lengths.append(sequence.num_letters(count_word_boundary=self.INCLUDE_WORD_BOUNDARY))
        var_seq_lengths[:] = seq_lengths
        print 'writing inputs'
        # create empty array for the inputs
        inputs = np.empty((0, self.input_pattern_size))
        for sequence in sequences:
            sequence_features = self.generate_sequence_features(sequence)
            inputs = np.concatenate((inputs, sequence_features))
        var_inputs[:,:] = inputs
        print 'writing target classes'
        target_classes = []
        for sequence in sequences:
            if self.INCLUDE_WORD_BOUNDARY:
                target_classes.append(map_label2class[Word.WORD_BOUNDARY])
            for word in sequence.words:
                for diac in word.diacs:
                    assert(diac in map_label2class)
                    target_classes.append(map_label2class[diac])
                if self.INCLUDE_WORD_BOUNDARY:
                    target_classes.append(map_label2class[Word.WORD_BOUNDARY])
        var_target_classes[:] = target_classes
        # write data for optional variables
        var_num_target_classes[:] = len(map_label2class)
        labels_arr = np.empty((0, max_label_length))
        labels_ordered = [i[0] for i in sorted(self.map_label2class.items(), key=operator.itemgetter(1))]
        for label in labels_ordered:
            labels_arr = np.concatenate((labels_arr, [stringtoarr(label, max_label_length)]))
        var_labels[:,:] = labels_arr
        print 'writing target strings'
        target_strings = np.empty((0, self.MAX_TARGET_STRING_LENGTH))
        for sequence in sequences:
            sequence_letters = sequence.get_sequence_letters(include_word_boundary=self.INCLUDE_WORD_BOUNDARY)
            if len(sequence_letters) > self.MAX_TARGET_STRING_LENGTH:
                sys.stderr.write('Warning: length of sequence letters in sequence: ' + sequence.seq_id + \
                                 ' > MAX_TARGET_STRING_LENGTH\n')
            target_strings = np.concatenate((target_strings, \
                                            [stringtoarr(''.join(sequence_letters), self.MAX_TARGET_STRING_LENGTH)]))
        var_target_strings[:,:] = target_strings

        nc_file.close()
        print 'Currennt dataset written to:', nc_filename
Exemple #56
0
station_data_units_t =\
f.createCompoundType(statdtype_units,'station_data_units')
# create a variable of of type 'station_data_t'
statdat = f.createVariable('station_obs', station_data_t, ('station',))
# create a numpy structured array, assign data to it.
data = numpy.empty(1,station_data_t)
data['latitude'] = 40.
data['longitude'] = -105.
data['surface_wind']['speed'] = 12.5
data['surface_wind']['direction'] = 270
data['temp_sounding'] = (280.3,272.,270.,269.,266.,258.,254.1,250.,245.5,240.)
data['press_sounding'] = range(800,300,-50)
# variable-length string datatypes are not supported inside compound types, so
# to store strings in a compound data type, each string must be 
# stored as fixed-size (in this case 80) array of characters.
data['location_name'] = stringtoarr('Boulder, Colorado, USA',NUMCHARS)
# assign structured array to variable slice.
statdat[0] = data
# or just assign a tuple of values to variable slice
# (will automatically be converted to a structured array).
statdat[1] = (40.78,-73.99,(-12.5,90),
             (290.2,282.5,279.,277.9,276.,266.,264.1,260.,255.5,243.),
             range(900,400,-50),stringtoarr('New York, New York, USA',NUMCHARS))
print f.cmptypes
windunits = numpy.empty(1,winddtype_units)
stationobs_units = numpy.empty(1,statdtype_units)
windunits['speed'] = stringtoarr('m/s',NUMCHARS)
windunits['direction'] = stringtoarr('degrees',NUMCHARS)
stationobs_units['latitude'] = stringtoarr('degrees north',NUMCHARS)
stationobs_units['longitude'] = stringtoarr('degrees west',NUMCHARS)
stationobs_units['surface_wind'] = windunits
wind_data_t = f.createCompoundType(winddtype, "wind_data")
# now that wind_data_t is defined, create the station data type.
station_data_t = f.createCompoundType(statdtype, "station_data")


statdat = f.createVariable("station_obs", station_data_t, ("station",))
# create a numpy structured array, assign data to it.
data = numpy.empty(2, station_data_t)
data["latitude"] = 40.0
data["longitude"] = -105.0
data["surface_wind"]["speed"] = 12.5
data["surface_wind"]["direction"] = 270
data["temp_sounding"] = (280.3, 272.0, 270.0, 269.0, 266.0, 258.0, 254.1, 250.0, 245.5, 240.0)
data["press_sounding"] = range(800, 300, -50)

data["location_name"][0] = stringtoarr("Boulder, Colorado, USA", NUMCHARS)

print "data=", data


# x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', 'f8'), ('y', 'i8')])
# x = np.array([(1.0, 'ba'), (3.0, 'ab')], dtype=[('x', 'f8'), ('y', 'S1',2)])

# x = np.array([(1.0, 'ba'), (3.0, 'ab')], dtype=np.dtype({'names':['x','y'], 'formats':['f8',('S1',2)]}))

x = np.array([(1.0, "ba"), (3.0, "ab")], dtype=np.dtype({"names": ["x", "y"], "formats": ["f8", "S2"]}))


x_dtype = x.dtype

from copy import deepcopy