def test_normalization_of_string_arrays_netcdf3(self): thestr = 'boodsfasfasdfm' with nc4.Dataset(self.fp, 'w', format="NETCDF3_CLASSIC") as ncd: dimsize = len(thestr) ncd.createDimension('n', dimsize) # Single str (no dimension) ncd.createVariable('single_S', 'S1', ('n',)) for k, v in ncd.variables.items(): if k.startswith('single_'): v[:] = nc4.stringtoarr(thestr, dimsize) # Array of strq ncd.createVariable('many_S', 'S1', ('n', 'n',)) for k, v in ncd.variables.items(): if k.startswith('many_'): v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize), dimsize).reshape(v.shape) with nc4.Dataset(self.fp) as ncd: assert normalize_array(ncd.variables['single_S']) == thestr assert np.all(normalize_array(ncd.variables['many_S']) == [thestr] * dimsize)
def test_normalization_of_string_arrays_netcdf4(self): thestr = 'bosadfsdfkljskfusdiofu987987987om' with nc4.Dataset(self.fp, 'w', format="NETCDF4") as ncd: dimsize = len(thestr) ncd.createDimension('n', dimsize) # Single str (no dimension) ncd.createVariable('single_str', str) ncd.createVariable('single_unicode_', np.unicode_) ncd.createVariable('single_U', '<U1') ncd.createVariable('single_S', 'S1', ('n', )) for k, v in ncd.variables.items(): if k.startswith('single_'): if v.dimensions: v[:] = nc4.stringtoarr(thestr, dimsize) else: v[0] = thestr # Array of str ncd.createVariable('many_str', str, ('n', )) ncd.createVariable('many_unicode_', np.unicode_, ('n', )) ncd.createVariable('many_U', '<U1', ('n', )) ncd.createVariable('many_S', 'S1', ( 'n', 'n', )) for k, v in ncd.variables.items(): if k.startswith('many_'): if len(v.dimensions) > 1: v[:, :] = np.tile(nc4.stringtoarr(thestr, dimsize), dimsize) else: v[:] = np.tile(thestr, dimsize) with nc4.Dataset(self.fp) as ncd: assert normalize_array(ncd.variables['single_str']) == thestr assert normalize_array(ncd.variables['single_unicode_']) == thestr assert normalize_array(ncd.variables['single_U']) == thestr assert normalize_array(ncd.variables['single_S']) == thestr assert np.all( normalize_array(ncd.variables['many_str']) == [thestr] * len(thestr)) assert np.all( normalize_array(ncd.variables['many_unicode_']) == [thestr] * len(thestr)) assert np.all( normalize_array(ncd.variables['many_U']) == [thestr] * len(thestr)) assert np.all( normalize_array(ncd.variables['many_S']) == [thestr] * len(thestr))
def close(self): # write end of dataset time in the form '2012-06-20T00:59:31Z' self.dataset.variables['time_coverage_end'][:] = \ stringtoarr(self.end_time.strftime(self.date_fmt), STRING_LENGTH_SHORT) self.dataset.variables['sweep_number'][0] = 0 self.dataset.variables['sweep_mode'][0] = stringtoarr( "pointing", STRING_LENGTH_SHORT) self.dataset.variables['fixed_angle'][0] = 0.0 self.dataset.variables['sweep_start_ray_index'][0] = 0 self.dataset.variables['sweep_end_ray_index'][ 0] = self.dataset.variables['time'].shape[0] - 1 self.dataset.sync()
def updateXTIME(): # Parse command line ap = argparse.ArgumentParser() ap.add_argument('filename', type=str, help='netcdf file to modify') ap.add_argument('date', type=str, help='Date in YYYYMMDDHH format') args = ap.parse_args() assert os.path.exists(args.filename), 'filename must exist!' ncfile = nc.Dataset(args.filename, 'a') # copy global attributes all at once via dictionary atts = deepcopy(ncfile.__dict__) d = dt.datetime.strptime(args.date, '%Y%m%d%H') confdate = d.strftime('%Y-%m-%d_%H:%M:%S') atts['config_start_time'] = confdate atts['config_stop_time'] = confdate ncfile.setncatts(atts) varname = 'xtime' xtime_ = ncfile[varname][0][:] xtime = nc.stringtoarr(confdate, len(ncfile[varname][0][:])) ncfile[varname][0] = xtime ncfile.close()
def save_string_list2d(group, membername, array2d, dimensionname): # lengths of all elements element_length = [ ] for element in array2d: if element: element_length.extend([ len(subelement) for subelement in element if subelement ]) # if nothing then don't bother saving if len(element_length) == 0: return # Compute max length over all strings in array max_length = max(element_length) + 1 # Name used to store length of strings lengthname = STRING_LENGTH_DIMENSION_FORMAT.format(membername) # Build new variable group.createDimension(membername, max([ len(element) if element else 0 for element in array2d])) group.createDimension(lengthname, max_length) group.createVariable(membername, 'S1', [dimensionname, membername, lengthname]) # populate contents for index, element in enumerate(array2d): if element is not None: listcontents = numpy.zeros((group.variables[membername].shape[1], max_length), 'S1') for subindex, subelement in enumerate(element): if subelement is not None: listcontents[subindex] = stringtoarr(subelement, max_length) group.variables[membername][index] = listcontents
def create_obs_idx(ncOut, var, name): nc_out = ncOut.createVariable(name, 'S1', ( 'n' + name, 'strlen80', ), zlib=True) nc_idx_out = ncOut.createVariable(name + '_INDEX', 'i4', ('OBS', ), zlib=True, fill_value=-1) print('processing ', name, len(var)) i = 0 for f in var: #print(f) try: nc_out[i] = stringtoarr(f[0], 80, dtype='U') # s = np.array(f[0], 'S80') #nc_out[i] = stringtochar(f[0], encoding='utf-8') # nc_out[i] = f[0].encode('utf-8') # nc_out[i] = f[0] except UnicodeEncodeError: pass nc_idx_out[f[1].index] = i i += 1
def create_nc_strings(ncfile, vname, strings, dims, desc): str_length = max_len(strings) chars = np.zeros((len(strings), str_length), dtype='S1') for i, string in enumerate(strings): chars[i] = netCDF4.stringtoarr(string, str_length, 'S') create_nc_dim(ncfile, dims[1], str_length) create_nc_var(ncfile, vname, np.array(chars), 'S1', dims, desc)
def add_generator(self, gen_id): if gen_id in self.gen_id_dict: return i = len(self.dim_gens) self.gen_id_dict[gen_id] = i self.var_gen_ids[i,:] = netCDF4.stringtoarr(gen_id, len(self.dim_str)) for var in [self.var_dispatch_5min, self.var_dispatch_30min, self.var_dispatch_daily, self.var_dispatch_daily_min, self.var_dispatch_daily_max]: npoints = var.shape[0] var[:,i] = numpy.zeros((npoints, 1))
def add_generator(self, gen_id): if gen_id in self.gen_id_dict: return i = len(self.dim_gens) self.gen_id_dict[gen_id] = i self.var_gen_ids[i, :] = netCDF4.stringtoarr(gen_id, len(self.dim_str)) for var in [ self.var_dispatch_5min, self.var_dispatch_30min, self.var_dispatch_daily, self.var_dispatch_daily_min, self.var_dispatch_daily_max ]: npoints = var.shape[0] var[:, i] = numpy.zeros((npoints, 1))
def do_tslist(): global nstations # Parse tslist station = ncfile.variables['station'] name = ncfile.variables['name'] prefix = ncfile.variables['prefix'] lat = ncfile.variables['lat'] lon = ncfile.variables['lon'] strln = len(ncfile.dimensions['strln']) filetslist = open('tslist', 'r') # Header # #-----------------------------------------------# # # 24 characters for name | pfx | LAT | LON | # #-----------------------------------------------# filetslist.next() filetslist.next() filetslist.next() # Body # veenkampen veenk 51.98101 5.61957 stationi = -1 for line in filetslist: stationi += 1 fields = line.split() station[stationi] = stationi name[stationi] = cdf.stringtoarr(fields[0], strln) prefix[stationi] = cdf.stringtoarr(fields[1], strln) lat[stationi] = fields[2] lon[stationi] = fields[3] nstations = stationi + 1 filetslist.close()
def do_tslist(): global nstations # Parse tslist station = ncfile.variables['station'] name = ncfile.variables['name'] prefix = ncfile.variables['prefix'] lat = ncfile.variables['lat'] lon = ncfile.variables['lon'] strln = len( ncfile.dimensions['strln'] ) filetslist = open( 'tslist', 'r' ) # Header # #-----------------------------------------------# # # 24 characters for name | pfx | LAT | LON | # #-----------------------------------------------# filetslist.next() filetslist.next() filetslist.next() # Body # veenkampen veenk 51.98101 5.61957 stationi = -1 for line in filetslist: stationi += 1 fields = line.split() station[stationi] = stationi name[stationi] = cdf.stringtoarr( fields[ 0], strln ) prefix[stationi] = cdf.stringtoarr( fields[ 1], strln ) lat[stationi] = fields[ 2] lon[stationi] = fields[ 3] nstations = stationi + 1 filetslist.close()
def addstn(self, obs): # add a station given a observation dictionary mystn = obs['stn'] if mystn not in self.stntoid.keys(): # create station station_id = len(self.stntoid) self.rootcdf.variables['station_name'][station_id]=stringtoarr(mystn,20) self.rootcdf.variables['lat'][station_id]=obs['lat'] self.rootcdf.variables['lon'][station_id]=obs['long'] self.rootcdf.variables['alt'][station_id]=obs['elev'] self.rootcdf.variables['station_info'][station_id]=obs['dsource'] self.stntoid[mystn]=station_id else: # station is defined find it station_id = self.stntoid[mystn] # and back to int self.rootcdf.sync() return station_id
def ConvertCharDims(var, datadict): if not var.dtype == 'S1': pass else: datalen = len(datadict['values']) dimlen = list(var.shape) dimlen.remove(datalen) # string length remaining slen = dimlen[0] #print [d for d in datadict['values'] ] values = [netCDF4.stringtoarr(d, slen) for d in datadict['values']] datadict['values'] = values return datadict
def add_Variables(self): ''' 根据配置文件内容添加数据集 ''' var_dict = self.conf['%s+%s' % (self.sat1, self.sen1)] # add CAL_LUT for each band dsetNameLst = var_dict.keys() # for eachchan in var_dict["_chanlist"]: # dsetNameLst.append("CAL_LUT_CH%s" % eachchan) for eachVar in dsetNameLst: if eachVar.startswith('_'): continue if eachVar == 'TBB_Corrct_LUT': continue if eachVar == 'Nonlinear_coefficient': continue # if eachVar.startswith('CAL_LUT'): # var_info = var_dict["CAL_LUT"] # var_info['_dims'] = ['date', 'lut_row'] # else: # var_info = var_dict[eachVar] var_info = var_dict[eachVar] var = self.rootgrp.createVariable(eachVar, var_info['_fmt'], var_info['_dims']) for eachKey in var_info: if eachKey.startswith('_'): continue if eachKey == eachVar: if var_info['_fmt'] == 'S1': # 字符串 # 需要将字符串用stringtoarr转成char的数组,再写入NC !!! char_len = 1 for each in var_info['_dims']: char_len = char_len * int( var_dict['_%s' % each]) # 计算字符总个数 char_ary = stringtoarr(''.join(var_info[eachKey]), char_len) var[:] = char_ary else: # 非字符串 var[:] = var_info[eachKey] else: if is_number(var_info[eachKey]): if '.' in var_info[eachKey]: var.setncattr(eachKey, np.float32(var_info[eachKey])) else: var.setncattr(eachKey, np.short(var_info[eachKey])) else: var.setncattr(eachKey, var_info[eachKey])
def addstn(self, obs): # add a station given a observation dictionary mystn = obs['stn'] if mystn not in self.stntoid.keys(): # create station station_id = len(self.stntoid) self.rootcdf.variables['station_name'][station_id] = stringtoarr( mystn, 20) self.rootcdf.variables['lat'][station_id] = obs['lat'] self.rootcdf.variables['lon'][station_id] = obs['long'] self.rootcdf.variables['alt'][station_id] = obs['elev'] self.rootcdf.variables['station_info'][station_id] = obs['dsource'] self.stntoid[mystn] = station_id else: # station is defined find it station_id = self.stntoid[mystn] # and back to int self.rootcdf.sync() return station_id
def set_trajectory_id(self): """ Sets or updates the trajectory dimension and variable for the dataset and the global id attribute Input: - glider: Name of the glider deployed. - deployment_date: String or DateTime of when glider was first deployed. """ if 'trajectory' not in self._nc.variables: # Setup Trajectory Dimension self._nc.createDimension('traj_strlen', len(self._trajectory)) # Setup Trajectory Variable trajectory_var = self._nc.createVariable( u'trajectory', 'S1', ('traj_strlen', ), zlib=True, complevel=self._comp_level) attrs = { 'cf_role': 'trajectory_id', 'long_name': 'Trajectory/Deployment Name', # NOQA 'comment': 'A trajectory is a single deployment of a glider and may span multiple data files.' # NOQA } for key, value in sorted(attrs.items()): trajectory_var.setncattr(key, value) else: trajectory_var = self._nc.variables['trajectory'] # Set the trajectory variable data trajectory_var[:] = stringtoarr(self._trajectory, len(self._trajectory)) if not self._nc.getncattr('id').strip(): self._nc.id = self._trajectory # Global id variable
def set_trajectory_id(self, glider, deployment_date): """ Sets the trajectory dimension and variable for the dataset Input: - glider: Name of the glider deployed. - deployment_date: String or DateTime of when glider was first deployed. """ if (type(deployment_date) is datetime): deployment_date = deployment_date.strftime("%Y-%m-%dT%H:%M:%SZ") traj_str = "%s-%s" % (glider, deployment_date) if 'trajectory' not in self.nc.variables: # Setup Trajectory Dimension self.nc.createDimension('traj_strlen', len(traj_str)) # Setup Trajectory Variable trajectory_var = self.nc.createVariable('trajectory', 'S1', ('traj_strlen', ), zlib=True, complevel=self.COMP_LEVEL) attrs = { 'cf_role': 'trajectory_id', 'long_name': 'Trajectory/Deployment Name', # NOQA 'comment': 'A trajectory is a single deployment of a glider and may span multiple data files.' # NOQA } for key, value in sorted(attrs.items()): trajectory_var.setncattr(key, value) else: trajectory_var = self.nc.variables['trajectory'] trajectory_var[:] = stringtoarr(traj_str, len(traj_str)) self.nc.id = traj_str # Global id variable
def set_trajectory_id(self, glider, deployment_date): """ Sets the trajectory dimension and variable for the dataset Input: - glider: Name of the glider deployed. - deployment_date: String or DateTime of when glider was first deployed. """ if(type(deployment_date) is datetime): deployment_date = deployment_date.strftime("%Y-%m-%dT%H:%M:%SZ") traj_str = "%s-%s" % (glider, deployment_date) if 'trajectory' not in self.nc.variables: # Setup Trajectory Dimension self.nc.createDimension('traj_strlen', len(traj_str)) # Setup Trajectory Variable trajectory_var = self.nc.createVariable( 'trajectory', 'S1', ('traj_strlen',), zlib=True, complevel=self.COMP_LEVEL ) attrs = { 'cf_role': 'trajectory_id', 'long_name': 'Trajectory/Deployment Name', # NOQA 'comment': 'A trajectory is a single deployment of a glider and may span multiple data files.' # NOQA } for key, value in sorted(attrs.items()): trajectory_var.setncattr(key, value) else: trajectory_var = self.nc.variables['trajectory'] trajectory_var[:] = stringtoarr(traj_str, len(traj_str))
def set_source_file_var(self, source_file_string, attrs=None): """ Sets the trajectory dimension and variable for the dataset and the global id attribute Input: - glider: Name of the glider deployed. - deployment_date: String or DateTime of when glider was first deployed. """ if 'source_file' not in self._nc.variables: # Setup Trajectory Dimension self._nc.createDimension('source_file_strlen', len(source_file_string)) # Setup Trajectory Variable source_file_var = self._nc.createVariable( u'source_file', 'S1', ('source_file_strlen', ), zlib=True, complevel=self._comp_level) if attrs: attrs['long_name'] = 'Source data file' attrs[ 'comment'] = 'Name of the source data file and associated file metadata' for key, value in sorted(attrs.items()): source_file_var.setncattr(key, value) else: source_file_var = self._nc.variables['source_file'] # Set the trajectory variable data source_file_var[:] = stringtoarr(source_file_string, len(source_file_string)) if not self._nc.getncattr('source').strip(): self._nc.source = 'Observational Slocum glider data from source dba file {:s}'.format( source_file_string) # Global source variable
def set_trajectory_id(self, trajectory_string): """ Sets the trajectory dimension and variable for the dataset and the global id attribute Input: - glider: Name of the glider deployed. - deployment_date: String or DateTime of when glider was first deployed. """ if 'trajectory' not in self._nc.variables: # Setup Trajectory Dimension self._nc.createDimension('traj_strlen', len(trajectory_string)) # Setup Trajectory Variable trajectory_var = self._nc.createVariable( u'trajectory', 'S1', ('traj_strlen',), zlib=True, complevel=self._comp_level ) attrs = { 'cf_role': 'trajectory_id', 'long_name': 'Trajectory/Deployment Name', # NOQA 'comment': 'A trajectory is a single deployment of a glider and may span multiple data files.' # NOQA } for key, value in sorted(attrs.items()): trajectory_var.setncattr(key, value) else: trajectory_var = self._nc.variables['trajectory'] # Set the trajectory variable data trajectory_var[:] = stringtoarr(trajectory_string, len(trajectory_string)) if not self._nc.getncattr('id').strip(): self._nc.id = trajectory_string # Global id variable
def add_Variables(self): """ 根据配置文件内容添加数据集 """ var_lst = self.conf['%s+%s' % (self.sat1, self.sen1)] for eachVar in var_lst: if eachVar.startswith('_'): continue var_info = var_lst[eachVar] # print eachVar var = self.rootgrp.createVariable(eachVar, var_info['_fmt'], var_info['_dims']) for eachKey in var_info: if eachKey.startswith('_'): continue if eachKey == eachVar: if var_info['_fmt'] == 'S1': # 字符串 # 需要将字符串用stringtoarr转成char的数组,再写入NC !!! char_len = 1 for each in var_info['_dims']: char_len = char_len * int( var_lst['_%s' % each]) # 计算字符总个数 char_ary = stringtoarr(''.join(var_info[eachKey]), char_len) var[:] = char_ary else: # 非字符串 var[:] = var_info[eachKey] else: if is_number(var_info[eachKey]): if '.' in var_info[eachKey]: var.setncattr(eachKey, np.float32(var_info[eachKey])) else: var.setncattr(eachKey, np.short(var_info[eachKey])) else: var.setncattr(eachKey, var_info[eachKey])
def set_source_file_var(self, source_file_string, attrs=None): """ Sets the trajectory dimension and variable for the dataset and the global id attribute Input: - glider: Name of the glider deployed. - deployment_date: String or DateTime of when glider was first deployed. """ if 'source_file' not in self._nc.variables: # Setup Trajectory Dimension self._nc.createDimension('source_file_strlen', len(source_file_string)) # Setup Trajectory Variable source_file_var = self._nc.createVariable( u'source_file', 'S1', ('source_file_strlen',), zlib=True, complevel=self._comp_level ) if attrs: attrs['long_name'] = 'Source data file' attrs['comment'] = 'Name of the source data file and associated file metadata' for key, value in sorted(attrs.items()): source_file_var.setncattr(key, value) else: source_file_var = self._nc.variables['source_file'] # Set the trajectory variable data source_file_var[:] = stringtoarr(source_file_string, len(source_file_string)) if not self._nc.getncattr('source').strip(): self._nc.source = 'Observational Slocum glider data from source dba file {:s}'.format(source_file_string) # Global source variable
def stringToArrList(list): newList = [] for i in range(len(list)): numchars = charCounter(list[i]) newList.append(stringtoarr(list[i], numchars)) return newList
nc_file.WML_featureType = 'timeSeries' nc_file.WML_cdm_data_type = 'Station' nc_file.WML_standard_name_vocabulary = 'CF-1.6' nc_file.title = nc_title nc_file.summary = nc_summary nc_file.id = 'testing_id' nc_file.naming_authory = 'testing_authority' nc_file.WML_date_created = nc_date_create nc_file.WML_creator_name = nc_creator_name nc_file.creator_email = nc_creator_email nc_file.project = nc_project nc_file.processing_level = nc_proc_level nc_file.WML_profile = 'single variable' # data dates = [datetime(2001,3,1)+n*timedelta(hours=12) for n in range(12)] nc_time[:] = date2num(dates,units=nc_time.units,calendar=nc_time.calendar) #nc_station_names[:] = [stringtoarr("aaaa",4),stringtoarr("bbbb",4)] dummy = [stringtoarr("aaaa",4),stringtoarr("bbbb",4)] nc_station_names[:] = dummy nc_lat_var[:] = [35.0, 70.0] nc_lon_var[:] = [-120.0, 120.0] #for i in range(len(nc_station_names)): #data[i,:] = np.random.uniform(len(nc_time)) except: print "Try again." nc_file.close()
def WriteNCCF(FileName, Dates, Latitudes, Longitudes, ClimPoints, DataObject, DimObject, AttrObject, GlobAttrObject): ''' Sort out the date/times to write out and time bounds ''' ''' Sort out clim bounds ''' ''' Sort out lat and long bounds ''' ''' Convert variables using the obtained scale_factor and add_offset: stored_var=int((var-offset)/scale) ''' ''' Write to file, set up given dimensions, looping through all potential variables and their attributes, and then the provided dictionary of global attributes ''' # Sort out date/times to write out print(Dates) TimPoints, TimBounds = MakeDaysSince(Dates['StYr'], Dates['StMon'], Dates['EdYr'], Dates['EdMon']) nTims = len(TimPoints) # Sort out clim bounds - paired strings ClimBounds = np.empty((12, 2), dtype='|S10') for mm in range(12): ClimBounds[mm, 0] = str(ClimPoints[0]) + '-' + str(mm + 1) + '-' + str(1) ClimBounds[mm, 1] = str(ClimPoints[1]) + '-' + str(mm + 1) + '-' + str( MonthDays[mm]) # Sort out LatBounds and LonBounds LatBounds = np.empty((len(Latitudes), 2), dtype='float') LonBounds = np.empty((len(Longitudes), 2), dtype='float') LatBounds[:, 0] = Latitudes - ((Latitudes[1] - Latitudes[0]) / 2.) LatBounds[:, 1] = Latitudes + ((Latitudes[1] - Latitudes[0]) / 2.) LonBounds[:, 0] = Longitudes - ((Longitudes[1] - Longitudes[0]) / 2.) LonBounds[:, 1] = Longitudes + ((Longitudes[1] - Longitudes[0]) / 2.) #pdb.set_trace() # # No need to convert float data using given scale_factor and add_offset to integers - done within writing program (packV = (V-offset)/scale # # Not sure what this does to float precision though... # # Change mdi into an integer -999 because these are stored as integers # for vv in range(len(DataObject)): # DataObject[vv][np.where(DataObject[vv] == OLDMDI)] = MDI # Create a new netCDF file - have tried zlib=True,least_significant_digit=3 (and 1) - no difference ncfw = Dataset( FileName, 'w', format='NETCDF4_CLASSIC' ) # need to try NETCDF4 and also play with compression but test this first # Write out the global attributes if ('description' in GlobAttrObject): ncfw.description = GlobAttrObject['description'] #print(GlobAttrObject['description']) if ('File_created' in GlobAttrObject): ncfw.File_created = GlobAttrObject['File_created'] if ('Title' in GlobAttrObject): ncfw.Title = GlobAttrObject['Title'] if ('Institution' in GlobAttrObject): ncfw.Institution = GlobAttrObject['Institution'] if ('History' in GlobAttrObject): ncfw.History = GlobAttrObject['History'] if ('Licence' in GlobAttrObject): ncfw.Licence = GlobAttrObject['Licence'] if ('Project' in GlobAttrObject): ncfw.Project = GlobAttrObject['Project'] if ('Processing_level' in GlobAttrObject): ncfw.Processing_level = GlobAttrObject['Processing_level'] if ('Acknowledgement' in GlobAttrObject): ncfw.Acknowledgement = GlobAttrObject['Acknowledgement'] if ('Source' in GlobAttrObject): ncfw.Source = GlobAttrObject['Source'] if ('Comment' in GlobAttrObject): ncfw.Comment = GlobAttrObject['Comment'] if ('References' in GlobAttrObject): ncfw.References = GlobAttrObject['References'] if ('Creator_name' in GlobAttrObject): ncfw.Creator_name = GlobAttrObject['Creator_name'] if ('Creator_email' in GlobAttrObject): ncfw.Creator_email = GlobAttrObject['Creator_email'] if ('Version' in GlobAttrObject): ncfw.Version = GlobAttrObject['Version'] if ('doi' in GlobAttrObject): ncfw.doi = GlobAttrObject['doi'] if ('Conventions' in GlobAttrObject): ncfw.Conventions = GlobAttrObject['Conventions'] if ('netcdf_type' in GlobAttrObject): ncfw.netcdf_type = GlobAttrObject['netcdf_type'] # Loop through and set up the dimension names and quantities for vv in range(len(DimObject[0])): ncfw.createDimension(DimObject[0][vv], DimObject[1][vv]) # Go through each dimension and set up the variable and attributes for that dimension if needed for vv in range( len(DimObject) - 2 ): # ignore first two elements of the list but count all other dictionaries print(DimObject[vv + 2]['var_name']) # NOt 100% sure this works in a loop with overwriting # initiate variable with name, type and dimensions MyVar = ncfw.createVariable(DimObject[vv + 2]['var_name'], DimObject[vv + 2]['var_type'], DimObject[vv + 2]['var_dims']) # Apply any other attributes if ('standard_name' in DimObject[vv + 2]): MyVar.standard_name = DimObject[vv + 2]['standard_name'] if ('long_name' in DimObject[vv + 2]): MyVar.long_name = DimObject[vv + 2]['long_name'] if ('units' in DimObject[vv + 2]): MyVar.units = DimObject[vv + 2]['units'] if ('axis' in DimObject[vv + 2]): MyVar.axis = DimObject[vv + 2]['axis'] if ('calendar' in DimObject[vv + 2]): MyVar.calendar = DimObject[vv + 2]['calendar'] if ('start_year' in DimObject[vv + 2]): MyVar.start_year = DimObject[vv + 2]['start_year'] if ('end_year' in DimObject[vv + 2]): MyVar.end_year = DimObject[vv + 2]['end_year'] if ('start_month' in DimObject[vv + 2]): MyVar.start_month = DimObject[vv + 2]['start_month'] if ('end_month' in DimObject[vv + 2]): MyVar.end_month = DimObject[vv + 2]['end_month'] if ('bounds' in DimObject[vv + 2]): MyVar.bounds = DimObject[vv + 2]['bounds'] if ('climatology' in DimObject[vv + 2]): MyVar.climatology = DimObject[vv + 2]['climatology'] if ('point_spacing' in DimObject[vv + 2]): MyVar.point_spacing = DimObject[vv + 2]['point_spacing'] # Provide the data to the variable if (DimObject[vv + 2]['var_name'] == 'time'): MyVar[:] = TimPoints if (DimObject[vv + 2]['var_name'] == 'bounds_time'): MyVar[:, :] = TimBounds if (DimObject[vv + 2]['var_name'] == 'month'): for mm in range(12): MyVar[mm, :] = stringtoarr(MonthName[mm], 10) if (DimObject[vv + 2]['var_name'] == 'climbounds'): for mm in range(12): MyVar[mm, 0, :] = stringtoarr(ClimBounds[mm, 0], 10) MyVar[mm, 1, :] = stringtoarr(ClimBounds[mm, 1], 10) if (DimObject[vv + 2]['var_name'] == 'latitude'): MyVar[:] = Latitudes if (DimObject[vv + 2]['var_name'] == 'bounds_lat'): MyVar[:, :] = LatBounds if (DimObject[vv + 2]['var_name'] == 'longitude'): MyVar[:] = Longitudes if (DimObject[vv + 2]['var_name'] == 'bounds_lon'): MyVar[:, :] = LonBounds # Go through each variable and set up the variable attributes for vv in range( len(AttrObject) ): # ignore first two elements of the list but count all other dictionaries print(AttrObject[vv]['var_name']) # NOt 100% sure this works in a loop with overwriting # initiate variable with name, type and dimensions MyVar = ncfw.createVariable(AttrObject[vv]['var_name'], AttrObject[vv]['var_type'], AttrObject[vv]['var_dims'], zlib=True, fill_value=AttrObject[vv]['_FillValue']) # Apply any other attributes if ('standard_name' in AttrObject[vv]): MyVar.standard_name = AttrObject[vv]['standard_name'] if ('long_name' in AttrObject[vv]): MyVar.long_name = AttrObject[vv]['long_name'] # Too many issues with CF compliance # if ('cell_methods' in AttrObject[vv]): # MyVar.cell_methods = AttrObject[vv]['cell_methods'] if ('comment' in AttrObject[vv]): MyVar.comment = AttrObject[vv]['comment'] if ('units' in AttrObject[vv]): MyVar.units = AttrObject[vv]['units'] if ('axis' in AttrObject[vv]): MyVar.axis = AttrObject[vv]['axis'] # if ('add_offset' in AttrObject[vv]): # MyVar.add_offset = AttrObject[vv]['add_offset'] # # if ('scale_factor' in AttrObject[vv]): # MyVar.scale_factor = AttrObject[vv]['scale_factor'] # if ('valid_min' in AttrObject[vv]): # MyVar.valid_min = AttrObject[vv]['valid_min']# # # if ('valid_max' in AttrObject[vv]): # MyVar.valid_max = AttrObject[vv]['valid_max'] # if ('missing_value' in AttrObject[vv]): # MyVar.missing_value = AttrObject[vv]['missing_value'] # if ('_FillValue' in AttrObject[vv]): # MyVar._FillValue = AttrObject[vv]['_FillValue'] if ('reference_period' in AttrObject[vv]): MyVar.reference_period = AttrObject[vv]['reference_period'] if ('ancillary_variables' in AttrObject[vv]): MyVar.ancillary_variables = AttrObject[vv]['ancillary_variables'] # Provide the data to the variable - depending on howmany dimensions there are if (len(AttrObject[vv]['var_dims']) == 1): MyVar[:] = DataObject[vv] if (len(AttrObject[vv]['var_dims']) == 2): MyVar[:, :] = DataObject[vv] if (len(AttrObject[vv]['var_dims']) == 3): MyVar[:, :, :] = DataObject[vv] ncfw.close() return # WriteNCCF
if args.verb > 1: print 'copying unlimited dimension "%s" data' % name for i in xrange(dim.size): dst.variables[name][i] = template.variables[name][i] # create land use type dimension dst.createDimension('landusetype4', size=len(lu_names)) # create variable for land use type names and write the names into it lu_name_len = max(len(x) for x in lu_names) dst.createDimension('landusenameidx', size=lu_name_len) namevar = dst.createVariable('landusename', 'c', ('landusetype4', 'landusenameidx')) namevar.long_name = 'names of land use types' for i, name in enumerate(lu_names): namevar[i, :] = nc.stringtoarr(name, lu_name_len) # find auxiliary (coordinate-related variables) in the netcdf, such as boundaries and # time averaging information variables: they are not going to be combined by land use, # but are going to be copied to the output file auxVars = set() auxAttrs = {'bounds', 'edges', 'time_avg_info'} # attributes that may list auxiliary variables for var in template.variables.itervalues(): for attr in auxAttrs: if attr not in var.ncattrs(): continue for v in var.getncattr(attr).split(','): if v not in template.dimensions: auxVars.add(v) # find variables: if the list of variables is not provided on the command line, process # all variables in the netcdf files, except dimension variables and averaging information
def padded_string_to_arr(s, n=CHAR_ARRAY_LEN): """ Left-justify and pad a string with spaces up to total width n, and convert to a character array for writing to a NETCDF3 file """ return nc.stringtoarr(s.ljust(n), n)
def str_array_to_char_array_mapper(array,str_size): new_array = map(lambda x: stringtoarr(x,str_size), array) return new_array pass
dew_point_temperatures.units = "degrees Celcius" rain_rate = rootgrp.createVariable("rainfall_rate", "f4", ("time",)) rain_rate.coordinates = "lat lon" rain_rate.standard_name = "rainfall_rate" rain_rate.long_name = "Rainfall rate" rain_rate.units = "mm hr-1" total_rain = rootgrp.createVariable("cumulative_rainfall", "f4", ("time",)) total_rain.coordinates = "lat lon" total_rain.standard_name = "cumulative_rainfall" total_rain.long_name = "Cumulative rainfall" total_rain.units = "mm" # set the values of the variables station_name[:] = netCDF4.stringtoarr("Penlee", 50) altitude[:] = [station_altitude] latitudes[:] = [station_lat] longitudes[:] = [station_lon] times[:] = avg_timestamp air_temperatures[:] = avg_temp air_pressures[:] = avg_pressure relative_humiditys[:] = avg_rh dew_point_temperatures[:] = avg_dewpoint rain_rate[:] = avg_rainfall_rate total_rain[:] = cumulative_rainfall rootgrp.close() entries = (os.path.join(sourcefolder, fn) for fn in os.listdir(sourcefolder))
def _write_header(self): """Write header""" logger.debug('generating header') # set the 'z' dimension and the number of profiles (always 1) self.root_group.createDimension('z', np.sum(self.ssp.cur.data_valid)) self.root_group.createDimension('profile', 1) # var: profile # RECOMMENDED - If using the attribute below: cf_role. Data type can be whatever is appropriate for the # unique feature type. profile_str = "%s %.7f %.7f" % ( self.ssp.cur.meta.utc_time.strftime('%Y-%m-%dT%H:%M:%SZ'), self.ssp.cur.meta.longitude, self.ssp.cur.meta.latitude) default_profile_str_length = 64 profile_str_length = max(default_profile_str_length, len(profile_str)) self.root_group.createDimension('profile_id_length', profile_str_length) profile = self.root_group.createVariable('profile', 'S1', ( 'profile', 'profile_id_length', )) profile[:] = netCDF4.stringtoarr(profile_str, profile_str_length) profile.long_name = 'Unique identifier for each feature instance' # RECOMMENDED profile.cf_role = 'profile_id' # RECOMMENDED # var: time # Depending on the precision used for the variable, the data type could be int or double instead of float. time = self.root_group.createVariable('time', 'i4', ('profile', ), fill_value=0.0) time[:] = int(calendar.timegm(self.ssp.cur.meta.utc_time.timetuple())) time.long_name = 'cast time' # RECOMMENDED - Provide a descriptive, long name for this variable. time.standard_name = 'time' # REQUIRED - Do not change time.units = 'seconds since 1970-01-01 00:00:00' # REQUIRED - Use approved CF convention with approved UDUNITS. # time.calendar = 'julian' # REQUIRED - IF the calendar is not default calendar, which is "gregorian". time.axis = 'T' # REQUIRED - Do not change. # time._FillValue = 0.0 # REQUIRED if there could be missing values in the data. >> set at var creation # time.ancillary_variables = '' # RECOMMENDED - List other variables providing information about this variable. # time.comment = '' # RECOMMENDED - Add useful, additional information here. # var: lat # depending on the precision used for the variable, the data type could be int, float or double. lat = self.root_group.createVariable('lat', 'f8', ('profile', ), fill_value=180.0) lat[:] = self.ssp.cur.meta.latitude lat.long_name = 'latitude' # RECOMMENDED - Provide a descriptive, long name for this variable. lat.standard_name = 'latitude' # REQUIRED - Do not change. lat.units = 'degrees_north' # REQUIRED - CF recommends degrees_north, but at least must use UDUNITS. lat.axis = 'Y' # REQUIRED - Do not change. lat.valid_min = -90.0 # RECOMMENDED - Replace with correct value. lat.valid_max = 180.0 # RECOMMENDED - Replace with correct value. # lat._FillValue = 180.0 # REQUIRED if there could be missing values in the data. # lat.ancillary_variables = '' # RECOMMENDED - List other variables providing information about this variable. # lat.comment = '' # RECOMMENDED - Add useful, additional information here. # var: lon # Depending on the precision used for the variable, the data type could be int, float or double. lon = self.root_group.createVariable('lon', 'f8', ('profile', ), fill_value=360.0) lon[:] = self.ssp.cur.meta.longitude lon.long_name = 'longitude' # RECOMMENDED lon.standard_name = 'longitude' # REQUIRED - This is fixed, do not change. lon.units = 'degrees_east' # REQUIRED - CF recommends degrees_east, but at least use UDUNITS. lon.axis = 'X' # REQUIRED - Do not change. lon.valid_min = -180.0 # RECOMMENDED - Replace this with correct value. lon.valid_max = 360.0 # RECOMMENDED - Replace this with correct value. # lon:_FillValue = 360.0 # REQUIRED if there could be missing values in the data. # lon.ancillary_variables = '' # RECOMMENDED - List other variables providing information about this variable. # lon.comment = '' # RECOMMENDED - Add useful, additional information here. # var: crs # RECOMMENDED - A container variable storing information about the grid_mapping. # All the attributes within a grid_mapping variable are described in: # - http://cfconventions.org/Data/cf-conventions/cf-conventions-1.6/build/cf-conventions.html#grid-mappings- # and-projections. # For all the measurements based on WSG84, the default coordinate system used for GPS measurements, # the values shown here should be used. crs = self.root_group.createVariable('crs', 'f8', ('profile', )) crs[:] = 4326.0 crs.grid_mapping_name = 'latitude_longitude' # RECOMMENDED crs.epsg_code = 'EPSG:4326' # RECOMMENDED - European Petroleum Survey Group code for the grid mapping name. crs.semi_major_axis = 6378137.0 # RECOMMENDED crs.inverse_flattening = 298.257223563 # RECOMMENDED # global attributes: self.root_group.ncei_template_version = 'NCEI_NetCDF_Profile_Orthogonal_Template_v2.0' # REQUIRED(NCEI) self.root_group.featureType = 'profile' # REQUIRED - CF attribute for identifying the featureType.(CF) # SUGGESTED - The data type, as derived from Unidata's Common Data Model Scientific Data types and understood # by THREDDS. (ACDD) self.root_group.cdm_data_type = 'profile' # HIGHLY RECOMMENDED - Provide a useful title for the data in the file.(ACDD) self.root_group.title = '%s_%s profile' % (self.ssp.cur.meta.sensor, self.ssp.cur.meta.probe) # HIGHLY RECOMMENDED - Provide a useful summary or abstract for the data in the file.(ACDD) # self.root_group.summary = '' # HIGHLY RECOMMENDED - A comma separated list of keywords coming from the keywords_vocabulary.(ACDD) # self.root_group.keywords = '' # HIGHLY RECOMMENDED - A comma separated list of the conventions being followed. Always try to use latest # version.(CF / ACDD) self.root_group.Conventions = 'CF-1.6, ACDD-1.3' # RECOMMENDED - Creation date of this version of the data(netCDF). Use ISO 8601:2004 for date and time. (ACDD) self.root_group.date_created = '%s' % dt.datetime.utcnow().strftime( '%Y-%m-%dT%H:%M:%SZ') self.root_group.survey = '%s' % self.ssp.cur.meta.survey # RECOMMENDED - The name of the project(s) principally responsible for originating this data. # Multiple projects can be separated by commas.(ACDD) self.root_group.project = '%s' % self._project # SUGGESTED - Name of the platform(s) that supported the sensor data used to create this data set or product. # Platforms can be of any type, including satellite, ship, station, aircraft or other.(ACDD) # Match platform format with velocipy platform = str(self.ssp.cur.meta.vessel).upper() platform = platform.replace('NRT-', 'NOAA NAVIGATION RESPONSE TEAM-') if len(platform) > 2 and platform[:2] in ['RA', 'TJ', 'FH', 'FA']: platform = platform.replace('(SHIP)', 'NOAA SHIP') self.root_group.platform = '%s' % platform # RECOMMENDED -The name of the institution principally responsible for originating this data.. An institution # attribute can be used for each variable if variables come from more than one institution. (CF/ACDD) self.root_group.institution = '%s' % self.ssp.cur.meta.institution # RECOMMENDED - an instrument variable storing information about a parameter of the instrument used in the # measurement, the dimensions don't have to be specified if the same instrument is used for all the measurements. instrument = self.root_group.createVariable('instrument', 'i4') if self._instrument is None: instrument.long_name = '%s' % self.ssp.cur.meta.sensor probe = str(self.ssp.cur.meta.probe) sn = str(self.ssp.cur.meta.sn) match = re.match('^(\w+?) ?\(SN:(\w+?)\)', sn) if match: probe = match.group(1) sn = match.group(2) instrument.make_model = '%s' % probe if self.ssp.cur.meta.sn: instrument.serial_number = '%s' % sn else: # this part is used when a custom instrument is passed (for instance, for ISS format) tokens = self._instrument.split() if len(tokens) > 0: instrument.long_name = self._instrument.split()[0] if len(tokens) > 1: instrument.make_model = self._instrument.split()[1] if self.ssp.cur.meta.sn: instrument.serial_number = '%s' % self.ssp.cur.meta.sn # SUGGESTED - Published or web - based references that describe the data or methods used to produce it. # Recommend URIs(such as a URL or DOI) self.root_group.references = 'https://www.hydroffice.org/soundspeed/' # RECOMMENDED - Provide useful additional information here.(CF) # self.root_group.comment = b'Created using HydrOffice %s v.%s' % (ssp_name, ssp_version) # SUGGESTED - Version identifier of the data file or product as assigned by the data creator. (ACDD) self.root_group.product_version = 'Created using HydrOffice %s v.%s' % ( ssp_name, ssp_version)
def write_radar_file(ref, vel, filename=None): _time_units = 'seconds since 1970-01-01 00:00:00' _calendar = 'standard' if filename == None: print( "\n write_DART_ascii: No output file name is given, writing to %s" % "obs_seq.txt") filename = "obs_seq.nc" else: dirname = os.path.dirname(filename) basename = "%s_%s.nc" % ("obs_seq", os.path.basename(filename)) filename = os.path.join(dirname, basename) _stringlen = 8 _datelen = 19 # Extract grid and ref data dbz = ref.data lats = ref.lats lons = ref.lons hgts = ref.zg + ref.radar_hgt kind = ObType_LookUp(ref.field.upper()) R_xy = np.sqrt(ref.xg[20]**2 + ref.yg[20]**2) elevations = beam_elv(R_xy, ref.zg[:, 20, 20]) # if there is a zero dbz obs type, reform the data array try: nx1, ny1 = ref.zero_dbz.shape zero_data = np.ma.zeros((2, ny1, nx1), dtype=np.float32) zero_hgts = np.ma.zeros((2, ny1, nx1), dtype=np.float32) zero_data[0] = ref.zero_dbz zero_data[1] = ref.zero_dbz zero_hgts[0:2] = ref.zero_dbz_zg[0:2] cref = ref.cref zero_flag = True print( "\n write_DART_ascii: 0-DBZ separate type added to netcdf output\n" ) except AttributeError: zero_flag = False print("\n write_DART_ascii: No 0-DBZ separate type found\n") # Extract velocity data vr = vel.data platform_lat = vel.radar_lat platform_lon = vel.radar_lon platform_hgt = vel.radar_hgt # Use the volume mean time for the time of the volume dtime = ncdf.num2date(ref.time['data'].mean(), ref.time['units']) days = ncdf.date2num(dtime, units="days since 1601-01-01 00:00:00") seconds = np.int(86400. * (days - np.floor(days))) # create the fileput filename and create new netCDF4 file #filename = os.path.join(path, "%s_%s%s" % ("Inflation", DT.strftime("%Y-%m-%d_%H:%M:%S"), ".nc" )) print("\n --> Writing %s as the radar file..." % (filename)) rootgroup = ncdf.Dataset(filename, 'w', format='NETCDF4') # Create dimensions shape = dbz.shape rootgroup.createDimension('nz', shape[0]) rootgroup.createDimension('ny', shape[1]) rootgroup.createDimension('nx', shape[2]) rootgroup.createDimension('stringlen', _stringlen) rootgroup.createDimension('datelen', _datelen) if zero_flag: rootgroup.createDimension('nz2', 2) # Write some attributes rootgroup.time_units = _time_units rootgroup.calendar = _calendar rootgroup.stringlen = "%d" % (_stringlen) rootgroup.datelen = "%d" % (_datelen) rootgroup.platform_lat = platform_lat rootgroup.platform_lon = platform_lon rootgroup.platform_hgt = platform_hgt # Create variables R_type = rootgroup.createVariable('REF', 'f4', ('nz', 'ny', 'nx'), zlib=True, shuffle=True) V_type = rootgroup.createVariable('VEL', 'f4', ('nz', 'ny', 'nx'), zlib=True, shuffle=True) if zero_flag: R0_type = rootgroup.createVariable('0REF', 'f4', ('nz2', 'ny', 'nx'), zlib=True, shuffle=True) Z0_type = rootgroup.createVariable('0HGTS', 'f4', ('nz2', 'ny', 'nx'), zlib=True, shuffle=True) CREF_type = rootgroup.createVariable('CREF', 'f4', ('ny', 'nx'), zlib=True, shuffle=True) V_dates = rootgroup.createVariable('date', 'S1', ('datelen'), zlib=True, shuffle=True) V_xc = rootgroup.createVariable('XC', 'f4', ('nx'), zlib=True, shuffle=True) V_yc = rootgroup.createVariable('YC', 'f4', ('ny'), zlib=True, shuffle=True) V_el = rootgroup.createVariable('EL', 'f4', ('nz'), zlib=True, shuffle=True) V_lat = rootgroup.createVariable('LATS', 'f4', ('ny'), zlib=True, shuffle=True) V_lon = rootgroup.createVariable('LONS', 'f4', ('nx'), zlib=True, shuffle=True) V_hgt = rootgroup.createVariable('HGTS', 'f4', ('nz', 'ny', 'nx'), zlib=True, shuffle=True) # Write variables rootgroup.variables['date'][:] = ncdf.stringtoarr( dtime.strftime("%Y-%m-%d_%H:%M:%S"), _datelen) rootgroup.variables['REF'][:, :, :] = dbz[:] rootgroup.variables['VEL'][:, :, :] = vr[:] rootgroup.variables['XC'][:] = ref.xg[:] rootgroup.variables['YC'][:] = ref.yg[:] rootgroup.variables['EL'][:] = elevations[:] rootgroup.variables['HGTS'][:] = ref.zg[:] rootgroup.variables['LATS'][:] = lats[:] rootgroup.variables['LONS'][:] = lons[:] if zero_flag: rootgroup.variables['0REF'][:] = zero_data rootgroup.variables['0HGTS'][:] = zero_hgts rootgroup.variables['CREF'][:] = cref rootgroup.sync() rootgroup.close() return filename
longitudes.long_name = 'Observatory longitude' longitudes.units = 'degrees_east' times = rootgrp.createVariable('time', 'i4', ('time',)) times.standard_name = 'time' times.long_name = 'Time of measurement' times.units = 'seconds since 1970-01-01 00:00:00' air_temperatures = rootgrp.createVariable('air_temperature', 'f4', ('time',)) air_temperatures.coordinates = 'lat lon' air_temperatures.standard_name = 'air_temperature' air_temperatures.long_name = 'Air temperature in degrees Celcius' air_temperatures.units = 'degrees Celcius' # set the values of the variables station_name[:] = netCDF4.stringtoarr('Penlee', 50) altitude[:] = [station_altitude] latitudes[:] = [station_lat] longitudes[:] = [station_lon] times[:] = timestamp air_temperatures[:] = temp rootgrp.close() entries = (os.path.join(sourcefolder, fn) for fn in os.listdir(sourcefolder)) entries = ((os.stat(path), path) for path in entries) # leave only regular files, insert creation date entries = ((stat[ST_CTIME], path) for stat, path in entries if S_ISREG(stat[ST_MODE]))
def test_tutorial(): # 2 unlimited dimensions. #temp = rootgrp.createVariable('temp','f4',('time','level','lat','lon',)) # this makes the compression 'lossy' (preserving a precision of 1/1000) # try it and see how much smaller the file gets. temp = rootgrp.createVariable('temp','f4',('time','level','lat','lon',),least_significant_digit=3) # attributes. import time rootgrp.description = 'bogus example script' rootgrp.history = 'Created ' + time.ctime(time.time()) rootgrp.source = 'netCDF4 python module tutorial' latitudes.units = 'degrees north' longitudes.units = 'degrees east' levels.units = 'hPa' temp.units = 'K' times.units = 'hours since 0001-01-01 00:00:00.0' times.calendar = 'gregorian' for name in rootgrp.ncattrs(): print('Global attr', name, '=', getattr(rootgrp,name)) print(rootgrp) print(rootgrp.__dict__) print(rootgrp.variables) print(rootgrp.variables['temp']) import numpy # no unlimited dimension, just assign to slice. lats = numpy.arange(-90,91,2.5) lons = numpy.arange(-180,180,2.5) latitudes[:] = lats longitudes[:] = lons print('latitudes =\n',latitudes[:]) print('longitudes =\n',longitudes[:]) # append along two unlimited dimensions by assigning to slice. nlats = len(rootgrp.dimensions['lat']) nlons = len(rootgrp.dimensions['lon']) print('temp shape before adding data = ',temp.shape) from numpy.random.mtrand import uniform # random number generator. temp[0:5,0:10,:,:] = uniform(size=(5,10,nlats,nlons)) print('temp shape after adding data = ',temp.shape) # levels have grown, but no values yet assigned. print('levels shape after adding pressure data = ',levels.shape) # assign values to levels dimension variable. levels[:] = [1000.,850.,700.,500.,300.,250.,200.,150.,100.,50.] # fancy slicing tempdat = temp[::2, [1,3,6], lats>0, lons>0] print('shape of fancy temp slice = ',tempdat.shape) print(temp[0, 0, [0,1,2,3], [0,1,2,3]].shape) # fill in times. from datetime import datetime, timedelta from netCDF4 import num2date, date2num, date2index dates = [datetime(2001,3,1)+n*timedelta(hours=12) for n in range(temp.shape[0])] times[:] = date2num(dates,units=times.units,calendar=times.calendar) print('time values (in units %s): ' % times.units+'\\n',times[:]) dates = num2date(times[:],units=times.units,calendar=times.calendar) print('dates corresponding to time values:\\n',dates) rootgrp.close() # create a series of netCDF files with a variable sharing # the same unlimited dimension. for nfile in range(10): f = Dataset('mftest'+repr(nfile)+'.nc','w',format='NETCDF4_CLASSIC') f.createDimension('x',None) x = f.createVariable('x','i',('x',)) x[0:10] = numpy.arange(nfile*10,10*(nfile+1)) f.close() # now read all those files in at once, in one Dataset. from netCDF4 import MFDataset f = MFDataset('mftest*nc') print(f.variables['x'][:]) # example showing how to save numpy complex arrays using compound types. f = Dataset('complex.nc','w') size = 3 # length of 1-d complex array # create sample complex data. datac = numpy.exp(1j*(1.+numpy.linspace(0, numpy.pi, size))) print(datac.dtype) # create complex128 compound data type. complex128 = numpy.dtype([('real',numpy.float64),('imag',numpy.float64)]) complex128_t = f.createCompoundType(complex128,'complex128') # create a variable with this data type, write some data to it. f.createDimension('x_dim',None) v = f.createVariable('cmplx_var',complex128_t,'x_dim') data = numpy.empty(size,complex128) # numpy structured array data['real'] = datac.real; data['imag'] = datac.imag v[:] = data # close and reopen the file, check the contents. f.close() f = Dataset('complex.nc') print(f) print(f.variables['cmplx_var']) print(f.cmptypes) print(f.cmptypes['complex128']) v = f.variables['cmplx_var'] print(v.shape) datain = v[:] # read in all the data into a numpy structured array # create an empty numpy complex array datac2 = numpy.empty(datain.shape,numpy.complex128) # .. fill it with contents of structured array. datac2.real = datain['real'] datac2.imag = datain['imag'] print(datac.dtype,datac) print(datac2.dtype,datac2) # more complex compound type example. from netCDF4 import chartostring, stringtoarr f = Dataset('compound_example.nc','w') # create a new dataset. # create an unlimited dimension call 'station' f.createDimension('station',None) # define a compound data type (can contain arrays, or nested compound types). NUMCHARS = 80 # number of characters to use in fixed-length strings. winddtype = numpy.dtype([('speed','f4'),('direction','i4')]) statdtype = numpy.dtype([('latitude', 'f4'), ('longitude', 'f4'), ('surface_wind',winddtype), ('temp_sounding','f4',10),('press_sounding','i4',10), ('location_name','S1',NUMCHARS)]) # use this data type definitions to create a compound data types # called using the createCompoundType Dataset method. # create a compound type for vector wind which will be nested inside # the station data type. This must be done first! wind_data_t = f.createCompoundType(winddtype,'wind_data') # now that wind_data_t is defined, create the station data type. station_data_t = f.createCompoundType(statdtype,'station_data') # create nested compound data types to hold the units variable attribute. winddtype_units = numpy.dtype([('speed','S1',NUMCHARS),('direction','S1',NUMCHARS)]) statdtype_units = numpy.dtype([('latitude', 'S1',NUMCHARS), ('longitude', 'S1',NUMCHARS), ('surface_wind',winddtype_units), ('temp_sounding','S1',NUMCHARS), ('location_name','S1',NUMCHARS), ('press_sounding','S1',NUMCHARS)]) # create the wind_data_units type first, since it will nested inside # the station_data_units data type. wind_data_units_t = f.createCompoundType(winddtype_units,'wind_data_units') station_data_units_t =\ f.createCompoundType(statdtype_units,'station_data_units') # create a variable of of type 'station_data_t' statdat = f.createVariable('station_obs', station_data_t, ('station',)) # create a numpy structured array, assign data to it. data = numpy.empty(1,station_data_t) data['latitude'] = 40. data['longitude'] = -105. data['surface_wind']['speed'] = 12.5 data['surface_wind']['direction'] = 270 data['temp_sounding'] = (280.3,272.,270.,269.,266.,258.,254.1,250.,245.5,240.) data['press_sounding'] = range(800,300,-50) # variable-length string datatypes are not supported inside compound types, so # to store strings in a compound data type, each string must be # stored as fixed-size (in this case 80) array of characters. data['location_name'] = stringtoarr('Boulder, Colorado, USA',NUMCHARS) # assign structured array to variable slice. statdat[0] = data # or just assign a tuple of values to variable slice # (will automatically be converted to a structured array). statdat[1] = (40.78,-73.99,(-12.5,90), (290.2,282.5,279.,277.9,276.,266.,264.1,260.,255.5,243.), range(900,400,-50),stringtoarr('New York, New York, USA',NUMCHARS)) print(f.cmptypes) windunits = numpy.empty(1,winddtype_units) stationobs_units = numpy.empty(1,statdtype_units) windunits['speed'] = stringtoarr('m/s',NUMCHARS) windunits['direction'] = stringtoarr('degrees',NUMCHARS) stationobs_units['latitude'] = stringtoarr('degrees north',NUMCHARS) stationobs_units['longitude'] = stringtoarr('degrees west',NUMCHARS) stationobs_units['surface_wind'] = windunits stationobs_units['location_name'] = stringtoarr('None', NUMCHARS) stationobs_units['temp_sounding'] = stringtoarr('Kelvin',NUMCHARS) stationobs_units['press_sounding'] = stringtoarr('hPa',NUMCHARS) statdat.units = stationobs_units # close and reopen the file. f.close() f = Dataset('compound_example.nc') print(f) statdat = f.variables['station_obs'] print(statdat) # print out data in variable. print('data in a variable of compound type:') print('----') for data in statdat[:]: for name in statdat.dtype.names: if data[name].dtype.kind == 'S': # a string # convert array of characters back to a string for display. units = chartostring(statdat.units[name]) print(name,': value =',chartostring(data[name]),\ ': units=',units) elif data[name].dtype.kind == 'V': # a nested compound type units_list = [chartostring(s) for s in tuple(statdat.units[name])] print(name,data[name].dtype.names,': value=',data[name],': units=',\ units_list) else: # a numeric type. units = chartostring(statdat.units[name]) print(name,': value=',data[name],': units=',units) print('----') f.close() f = Dataset('tst_vlen.nc','w') vlen_t = f.createVLType(numpy.int32, 'phony_vlen') x = f.createDimension('x',3) y = f.createDimension('y',4) vlvar = f.createVariable('phony_vlen_var', vlen_t, ('y','x')) import random data = numpy.empty(len(y)*len(x),object) for n in range(len(y)*len(x)): data[n] = numpy.arange(random.randint(1,10),dtype='int32')+1 data = numpy.reshape(data,(len(y),len(x))) vlvar[:] = data print(vlvar) print('vlen variable =\n',vlvar[:]) print(f) print(f.variables['phony_vlen_var']) print(f.vltypes['phony_vlen']) z = f.createDimension('z', 10) strvar = f.createVariable('strvar',str,'z') chars = '1234567890aabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' data = numpy.empty(10,object) for n in range(10): stringlen = random.randint(2,12) data[n] = ''.join([random.choice(chars) for i in range(stringlen)]) strvar[:] = data print('variable-length string variable:\n',strvar[:]) print(f) print(f.variables['strvar']) f.close()
def save(nfn,data,dataname,x=None,y=None,z=None,t=None,tunits=None,dtim=None, xbnds=None,ybnds=None,zbnds=None,tbnds=None,dims=None,xname='Lon', yname='Lat',zname='Depth',tname='Time',dhist=None,dunits=None,t_atts=None,append=False,app_in_t=False,silent=False): if dims is None: raise FerrError("Dimensions and order not specified!") ndims = data.ndim ct_dims = 0 if x is not None: ct_dims = ct_dims + 1 if not isinstance(x,np.ndarray): raise FerrError("x is not an ndarray; all coord var data must be in np.ndarrays") if y is not None: ct_dims = ct_dims + 1 if not isinstance(y,np.ndarray): raise FerrError("y is not an ndarray; all coord var data must be in np.ndarrays") if z is not None: ct_dims = ct_dims + 1 if not isinstance(z,np.ndarray): raise FerrError("z is not an ndarray; all coord var data must be in np.ndarrays") if (t is not None) or (dtim is not None): ct_dims = ct_dims + 1 if t is not None: if not isinstance(t,np.ndarray): raise FerrError("t is not an ndarray; all coord var data must be in np.ndarrays") if dtim is not None: if not isinstance(dtim,np.ndarray): raise FerrError("dtim is not an ndarray; all coord var data must be in np.ndarrays") if bool(app_in_t) == False: ##if ndims < ct_dims: ##raise FerrError("Not enough dim info to go by. Please call with some dimensional data") if ndims != len(dims): raise FerrError("Dimensions and order not fully specified!") if ct_dims < len(dims): raise FerrError("Not enough dim info to go by. Please call with more coord vars data") T = tname.lower() Z = zname.lower() Y = yname.lower() X = xname.lower() global outf outf = None if append is False: if os.path.isfile(nfn) == True: ques_str = "output file \'%s\' exists. Delete? (y/n): " % nfn ans = raw_input(ques_str) if 'y' in ans.lower(): os.remove(nfn) h1 = "created by ferr.py on %s, using py netCDF4" % datetime.datetime.ctime(datetime.datetime.now()) outf = nc4.Dataset(nfn, 'w', clobber=False, format='NETCDF3_CLASSIC') outf.history = h1 if 't' in dims: outf.createDimension(T,None) time_var = outf.createVariable(tname.lower(),'d',(T,)) time_var.long_name = 'Time' time_var.axis = 'T' if dtim is not None: time_var.units = "days since 0001-01-01 00:00:00" time_var.time_origin = "0001-01-01 00:00:00" time_var[:] = nc4.date2num(dtim,units="days since 0001-01-01 00:00:00") elif t is not None: if tunits is None: raise FerrError("No time units specified, and only t-values given!") time_var.units = tunits time_var[:] = t if tbnds is not None: time_var.bounds = tname.lower() + "_bnds" if t_atts is not None: for ii in t_atts.keys(): setattr(time_var,ii,t_atts[ii]) if 'x' in dims: outf.createDimension(X,x.size) lon_var = outf.createVariable(xname.lower(),'d',(X,)) lon_var.long_name = 'Longitude' lon_var.axis = 'X' lon_var.units = 'degrees_east' if xbnds is not None: lon_var.point_spacing = 'uneven' lon_var.bounds = xname.lower() + "_bnds" else: lon_var.point_spacing = 'even' lon_var.modulo = np.array([360.]) lon_var[:] = x[:] if 'y' in dims: outf.createDimension(Y,y.size) lat_var = outf.createVariable(yname.lower(),'d',(Y,)) lat_var.long_name = 'Latitude' lat_var.axis = 'Y' lat_var.units = 'degrees_north' if ybnds is not None: lat_var.point_spacing = 'uneven' lat_var.bounds = yname.lower() + "_bnds" else: lat_var.point_spacing = 'even' lat_var[:] = y[:] if 'z' in dims: outf.createDimension(Z,z.size) depth_var = outf.createVariable(zname.lower(),'d',(Z,)) depth_var.long_name = 'Depth' depth_var.axis = 'Z' depth_var.units = 'meters' depth_var.positive = 'down' if zbnds is not None: depth_var.point_spacing = 'uneven' depth_var.bounds = zname.lower() + "_bnds" else: depth_var.point_spacing = 'even' depth_var[:] = z[:] if (tbnds is not None) | (xbnds is not None) | (ybnds is not None) | (zbnds is not None): outf.createDimension('bnds',2) if tbnds is not None: tbndsname = tname.lower() + "_bnds" tbnds_var = outf.createVariable(tbndsname,'d',(T,'bnds')) if isinstance(tbnds[0,0],dt): tbnds_var[:] = nc4.date2num(tbnds,units="days since 0001-01-01 00:00:00") elif isinstance(tbnds[0][0],float): tbnds_var[:] = tbnds if xbnds is not None: xbndsname = xname.lower() + "_bnds" xbnds_var = outf.createVariable(xbndsname,'d',(X,'bnds')) xbnds_var[:] = xbnds if ybnds is not None: ybndsname = yname.lower() + "_bnds" ybnds_var = outf.createVariable(ybndsname,'d',(Y,'bnds')) ybnds_var[:] = ybnds if zbnds is not None: zbndsname = zname.lower() + "_bnds" zbnds_var = outf.createVariable(zbndsname,'d',(Z,'bnds')) zbnds_var[:] = zbnds elif append is True: tax_found = False zax_found = False yax_found = False xax_found = False Tsize = 0 if os.path.isfile(nfn) is False: raise FerrError("file %s does not exist; can't append to it" % nfn) outf = use(nfn,silent=True,_append=True) indims = outf.d.keys() invars = outf.v.keys() cv1 = outf.cv cv1_keys = cv1.keys() fdims = outf.f.dimensions.keys() if (dataname in invars) & (app_in_t is False): raise FerrError("variable %s already exists; can't append it to file %s" % (dataname,nfn)) if (tbnds is not None) | (xbnds is not None) | (ybnds is not None) | (zbnds is not None): if 'bnds' not in fdims: outf.f.createDimension('bnds',2) if ('t' in dims) and (app_in_t is False): if (t is not None): Tsize = t.size for i in cv1_keys: if cv1[i] is 'tax': tax1 = outf.d[i][:] cmp1 = t == tax1 ##another option: sp.special.array_equiv() if isinstance(cmp1,np.ndarray): cmp1 = cmp1.all() if cmp1 is True: tax_found = True for fd in fdims: if i.lower() == fd.lower(): T = fd break if (dtim is not None): Tsize = dtim.size for i in cv1_keys: if cv1[i] is 'tax': tax1 = outf.dt_vals(i) cmp1 = dtim == tax1 if isinstance(cmp1,np.ndarray): cmp1 = cmp1.all() if cmp1: tax_found = True for fd in fdims: if i.lower() == fd.lower(): T = fd break if tax_found is False: while T in indims: if T[-1].isdigit(): T = T[0:-1] + str(int(T[-1]) + 1) else: T = T + '1' outf.f.createDimension(T,Tsize) time_var = outf.f.createVariable(T,'d',(T,)) time_var.long_name = 'Time' time_var.axis = 'T' if dtim is not None: time_var.units = "days since 0001-01-01 00:00:00" time_var.time_origin = "0001-01-01 00:00:00" time_var[:] = nc4.date2num(dtim,units="days since 0001-01-01 00:00:00") elif t is not None: if tunits is None: raise FerrError("No time units specified, and only t-values given!") time_var.units = tunits time_var[:] = t if tbnds is not None: time_var.bounds = T + "_bnds" if t_atts is not None: for ii in t_atts.keys(): setattr(time_var,ii,t_atts[ii]) if tbnds is not None: tbndsname = T + "_bnds" tbnds_var = outf.f.createVariable(tbndsname,'d',(T,'bnds')) if isinstance(tbnds[0,0],dt): tbnds_var[:] = nc4.date2num(tbnds,units="days since 0001-01-01 00:00:00") elif isinstance(tbnds[0][0],float): tbnds_var[:] = tbnds if('z' in dims): if (z is not None): for i in cv1_keys: if cv1[i] is 'zax': zax1 = outf.d[i][:] cmp1 = z == zax1 if isinstance(cmp1,np.ndarray): cmp1 = cmp1.all() if cmp1: zax_found = True for fd in fdims: if i.lower() == fd.lower(): Z = fd break if zax_found is False: while Z in indims: if Z[-1].isdigit(): Z = Z[0:-1] + str(int(Z[-1]) + 1) else: Z = Z + '1' outf.f.createDimension(Z,z.size) depth_var = outf.f.createVariable(Z,'d',(Z,)) depth_var.long_name = 'Depth' depth_var.axis = 'Z' depth_var.units = 'meters' depth_var.positive = 'down' if zbnds is not None: depth_var.point_spacing = 'uneven' depth_var.bounds = Z + "_bnds" else: depth_var.point_spacing = 'even' depth_var[:] = z[:] if zbnds is not None: zbndsname = Z + "_bnds" zbnds_var = outf.f.createVariable(zbndsname,'d',(Z,'bnds')) zbnds_var[:] = zbnds if 'y' in dims: if (y is not None): for i in cv1_keys: if cv1[i] is 'yax': yax1 = outf.d[i][:] cmp1 = y == yax1 if isinstance(cmp1,np.ndarray): cmp1 = cmp1.all() if cmp1: yax_found = True for fd in fdims: if i.lower() == fd.lower(): Y = fd break if yax_found is False: while Y in indims: if Y[-1].isdigit(): Y = Y[0:-1] + str(int(Y[-1]) + 1) else: Y = Y + '1' outf.f.createDimension(Y,y.size) lat_var = outf.f.createVariable(Y,'d',(Y,)) lat_var.long_name = 'Latitude' lat_var.axis = 'Y' lat_var.units = 'degrees_north' if ybnds is not None: lat_var.point_spacing = 'uneven' lat_var.bounds = Y + "_bnds" else: lat_var.point_spacing = 'even' lat_var[:] = y[:] if ybnds is not None: ybndsname = Y + "_bnds" ybnds_var = outf.f.createVariable(ybndsname,'d',(Y,'bnds')) ybnds_var[:] = ybnds if 'x' in dims: if (x is not None): for i in cv1_keys: if cv1[i] is 'xax': xax1 = outf.d[i][:] cmp1 = x == xax1 if isinstance(cmp1,np.ndarray): cmp1 = cmp1.all() if cmp1: xax_found = True for fd in fdims: if i.lower() == fd.lower(): X = fd break if xax_found is False: while X in indims: if X[-1].isdigit(): X = X[0:-1] + str(int(X[-1]) + 1) else: X = X + '1' outf.f.createDimension(X,x.size) lon_var = outf.f.createVariable(X,'d',(X,)) lon_var.long_name = 'Longitude' lon_var.axis = 'X' lon_var.units = 'degrees_east' if xbnds is not None: lon_var.point_spacing = 'uneven' lon_var.bounds = X + "_bnds" else: lon_var.point_spacing = 'even' lon_var.modulo = np.array([360.]) lon_var[:] = x[:] if xbnds is not None: xbndsname = X + "_bnds" xbnds_var = outf.f.createVariable(xbndsname,'d',(X,'bnds')) xbnds_var[:] = xbnds else: raise FerrError("kw 'append' has to be True or False") vdims = [] for i in xrange(len(dims)): if 'x' is dims.lower()[i]: vdims.append(X) if 'y' is dims.lower()[i]: vdims.append(Y) if 'z' is dims.lower()[i]: vdims.append(Z) if 't' is dims.lower()[i]: vdims.append(T) vdims = tuple(vdims) if append is False: dat_var = outf.createVariable(dataname,'f',vdims,fill_value=-1.e+34) dat_var.missing_value = -1.e+34 dat_var.long_name = dataname if dhist is not None: dat_var.history = dhist data = np.float32(data) data.set_fill_value(-1.e+34) data.data[data.mask] = data.fill_value if app_in_t is True: tsize = time_var.size if (tsize > 1) & (ndims == len(dims)): dims_a = nc4.stringtoarr(dims,len(dims)) data_tsize = data.shape[np.where(dims_a == 't')[0]] dat_var[tsize:tsize+data_tsize,...] = data else: dat_var[0,...] = data else: dat_var[:] = data else: if app_in_t is True: if dataname.lower() not in outf.v.keys(): raise FerrError("append mode, but var '%s' not found in file '%s'!" % (dataname,nfn)) dat_var = outf.v[dataname.lower()] tim_var = outf.d[T] tsize = tim_var.size data = np.float32(data) data.set_fill_value(-1.e+34) data.data[data.mask] = data.fill_value if t is not None: t_insize = t.size tim_var[tsize:tsize + t_insize] = t elif dtim is not None: t_insize = dtim.size tim_var[tsize:tsize + t_insize] = nc4.date2num(dtim,units="days since 0001-01-01 00:00:00") else: raise FerrError("no time data, though append in time option was True") if (tsize > 1) & (ndims == len(dims)): dims_a = nc4.stringtoarr(dims,len(dims)) data_tsize = data.shape[np.where(dims_a == 't')[0]] dat_var[tsize:tsize+data_tsize,...] = data else: dat_var[tsize,...] = data else: dat_var = outf.f.createVariable(dataname,'f',vdims,fill_value=-1.e+34) dat_var.missing_value = -1.e+34 dat_var.long_name = dataname if dhist is not None: dat_var.history = dhist data = np.float32(data) data.set_fill_value(-1.e+34) data.data[data.mask] = data.fill_value dat_var[:] = data if append is False: outf.close() else: outf.f.close() if bool(silent) is False: print "\ndata written to %s\n" % nfn return None
def write_exodus_file(filename, cells, vertices, shape="SHELL4"): """ Write Exodus-II file compatible with CUBIT. cells is a 0-based array (ncells, ncorners). vertices is (nvertices, dim). All cells are placed in a single block. Requires netCDF4 module. """ import numpy from netCDF4 import Dataset len_string = 33 root = Dataset(filename, 'w', format='NETCDF3_CLASSIC') # Set global attributes root.api_version = 4.98 root.version = 4.98 root.floating_point_word_size = 8 root.file_size = 0 root.title = "cubit" # Setup dimensions # Generic information root.createDimension('len_string', len_string) root.createDimension('len_line', 81) root.createDimension('four', 4) root.createDimension('num_qa_rec', 1) root.createDimension('time_step', None) # Mesh specific information (ncells, ncorners) = cells.shape (nvertices, dim) = vertices.shape root.createDimension('num_dim', dim) root.createDimension('num_el_blk', 1) root.createDimension('num_nod_per_el1', ncorners) root.createDimension('num_att_in_blk1', 1) root.createDimension('num_nodes', nvertices) root.createDimension('num_elem', ncells) root.createDimension('num_el_in_blk1', ncells) # Setup variables connect1 = root.createVariable('connect1', numpy.int32, ( 'num_el_in_blk1', 'num_nod_per_el1', )) coord = root.createVariable('coord', numpy.float64, ( 'num_dim', 'num_nodes', )) time_whole = root.createVariable('time_whole', numpy.float64, ('time_step', )) coor_names = root.createVariable('coor_names', 'S1', ( 'num_dim', 'len_string', )) qa_records = root.createVariable('qa_records', 'S1', ( 'num_qa_rec', 'four', 'len_string', )) eb_names = root.createVariable('eb_names', 'S1', ( 'num_el_blk', 'len_string', )) elem_map = root.createVariable('elem_map', numpy.int32, ('num_elem', )) eb_status = root.createVariable('eb_status', numpy.int32, ('num_el_blk', )) eb_prop1 = root.createVariable('eb_prop1', numpy.int32, ('num_el_blk', )) attrib1 = root.createVariable('attrib1', numpy.float64, ( 'num_el_in_blk1', 'num_att_in_blk1', )) # Set variable values connect1[:] = 1 + cells[:] connect1.elem_type = shape coord[:] = vertices.transpose()[:] from netCDF4 import stringtoarr if dim == 2: coor_names[0, :] = stringtoarr("x", len_string) coor_names[1, :] = stringtoarr("y", len_string) elif dim == 3: coor_names[0, :] = stringtoarr("x", len_string) coor_names[1, :] = stringtoarr("y", len_string) coor_names[2, :] = stringtoarr("z", len_string) qa_records[0, 0, :] = stringtoarr("CUBIT", len_string) qa_records[0, 1, :] = stringtoarr("11.0", len_string) qa_records[0, 2, :] = stringtoarr("01/01/2000", len_string) qa_records[0, 3, :] = stringtoarr("12:00:00", len_string) elem_map[:] = numpy.arange(1, ncells + 1, dtype=numpy.int32)[:] eb_status[:] = numpy.ones((1, ), dtype=numpy.int32)[:] eb_prop1[:] = numpy.ones((1, ), dtype=numpy.int32)[:] eb_prop1.name = "ID" attrib1[:] = numpy.ones((1, ncells), dtype=numpy.int32)[:] root.close()
def append_data(self, tzg): """ append the data found in 'tzg' to our netCDF file """ out = self.dataset appendDimLens = {} for adim in self.appendDims: # start by loading record dimensions that should be appended appendDimLens[adim] = len(out.dimensions[adim]) if adim in out.variables: # store the variables associated with this record dimension (processing as needed) dvar = get_substruct(tzg, self.bindings[adim]) nc_ovar = out.variables[adim] if dvar == None: raise KeyError("Can't find record variable %s for dim %s", self.bindings[adim], adim) if len(dvar) == 0: continue if adim == 'time': #special case for time. this is used with HSRL, #there needs to be a way to identify this case in the template, # the what why and how, so other time axes and sources work too # dvar[0] is datetime.datetime(2012, 6, 20, 0, 59, 31, 250001, tzinfo) if appendDimLens[adim] == 0: print 'adding first record' if 'dpl_py_binding' in out.variables[ 'time_coverage_start'].ncattrs(): del out.variables[ 'time_coverage_start'].dpl_py_binding # compute start time to the nearest second self.start_time = dvar[0].replace(microsecond=0) # write start of dataset in the form '2012-06-20T00:59:31Z' out.variables['time_coverage_start'][:] = \ stringtoarr(self.start_time.strftime(self.date_fmt), STRING_LENGTH_SHORT) # save end_time to nearest second self.end_time = dvar[-1].replace(second=dvar[-1].second, microsecond=0) for f in self.bindings: field = get_substruct(tzg, self.bindings[f]) if field != None: ovar = out.variables[f] basesh = [0, 0, 0, 0, 0, 0] didx = 0 if 'dpl_py_type' in ovar.ncattrs(): dpltype = ovar.dpl_py_type[:] if dpltype == 'matplotlib_num2date' or dpltype == 'python_datetime': #this is actually datetime, but older form is kept around to not break things JPG 20130211 print 'compute relative time for %s' % self.bindings[f] if not hasattr(self, 'start_time'): bt = out.variables['time_coverage_start'] btv = '' for x in range(bt.shape[0]): btv = btv + bt[ x] #chartostring(var[:].reshape([1]+list(var[:].shape)))[0] while len(btv) > 0 and btv[-1] == 'N': btv = btv[:-1] if len(btv) > 0: self.start_time = datetime.datetime.strptime( btv, self.date_fmt) else: self.start_time = field[0].replace( microsecond=0) # write start of dataset in the form '2012-06-20T00:59:31Z' out.variables['time_coverage_start'][:] = \ stringtoarr(self.start_time.strftime(self.date_fmt), STRING_LENGTH_SHORT) field = [(d - self.start_time).total_seconds() for d in field] if appendDimLens["time"] == 0: ovar.units = "seconds since " + self.start_time.strftime( self.date_fmt) #fixme this is crap. should be a more interpreted way that isn't slow or dangerous for dimname in ovar.dimensions: if dimname in self.appendDims: basesh[didx] = appendDimLens[dimname] didx += 1 print 'Appending variable ', f if len(ovar.shape) == 0: ovar[:] = field elif len(ovar.shape) == 1: ovar[basesh[0]:] = field else: topsh = [None for x in range(len(basesh))] for x in range(len(field.shape)): topsh[x] = basesh[x] + field.shape[x] print 'appending var', f, field.shape, ovar.shape, basesh, topsh ovar[tuple([ slice(basesh[x], topsh[x]) for x in range(len(ovar.shape)) ])] = field out.sync()
def write_test_wrf_file(): """Writes out an idealized atmosphere for testing the interpolation. """ # File to write out rootgrp = netCDF4.Dataset('wrfout_test_file', 'w') # Required dimensions dim_time = rootgrp.createDimension('Time', 0) # Unlimited dim_datestrlen = rootgrp.createDimension('DateStrLen', 19) dim_bottom_top = rootgrp.createDimension('bottom_top', 2) dim_bottom_top_stag = rootgrp.createDimension('bottom_top_stag', 3) dim_south_north = rootgrp.createDimension('south_north', 2) dim_south_north_stag = rootgrp.createDimension('south_north_stag', 3) dim_west_east = rootgrp.createDimension('west_east', 2) dim_west_east_stag = rootgrp.createDimension('west_east_stag', 3) # Times variable var_times = rootgrp.createVariable('Times', 'S1', ('Time', 'DateStrLen')) var_times = netCDF4.stringtoarr('2014-01-01_01:00:00', 19) # Requried 4D variables var_znu = rootgrp.createVariable('ZNU', 'f4', ('Time', 'bottom_top')) var_znu[0] = [0.99715, 0.99010] # 23 and 80 m AGL from calculate-eta-height.py var_znw = rootgrp.createVariable('ZNW', 'f4', ('Time', 'bottom_top_stag')) var_znw[0] = [1.0000, 0.99443, 0.98577] var_p = rootgrp.createVariable('P', 'f4', ('Time', 'bottom_top', 'south_north', 'west_east')) var_p[0] = [[101049, 101049], [101049, 101049], [100368, 100368], [100368, 100368]] var_pb = rootgrp.createVariable('PB', 'f4', ('Time', 'bottom_top', 'south_north', 'west_east')) var_pb[0] = [[0, 0], [0, 0], [0, 0], [0, 0]] var_psfc = rootgrp.createVariable('PSFC', 'f4', ('Time', 'south_north', 'west_east')) var_psfc[0] = [[101325, 101325], [101325, 101325]] var_t = rootgrp.createVariable('T', 'f4', ('Time', 'bottom_top', 'south_north', 'west_east')) var_t[0] = numpy.array([[[288.000, 288.000], [288.000, 288.000]], [[287.630, 287.630], [287.630, 287.630]]]) - 300 var_t2 = rootgrp.createVariable('T2', 'f4', ('Time', 'south_north', 'west_east')) var_t2[0] = [[288.000, 288.000], [288.000, 288.000]] var_th2 = rootgrp.createVariable('TH2', 'f4', ('Time', 'south_north', 'west_east')) var_th2[0] = [[288.000, 288.000], [288.000, 288.000]] var_u = rootgrp.createVariable('U', 'f4', ('Time', 'bottom_top', 'south_north','west_east_stag')) var_u[0] = [[[5, 5, 5], [5, 5, 5]], [[5.535, 5.535, 5.535], [5.535, 5.535, 5.535]]] # Log-law profile for 23 and 80 m (wind speed of u and v) var_v = rootgrp.createVariable('V', 'f4', ('Time', 'bottom_top', 'south_north_stag', 'west_east')) var_v[0] = [[[0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0]]] var_p_top = rootgrp.createVariable('P_TOP', 'f4', ('Time', )) var_p_top[0] = 0 # Map projection variables var_cosalpha = rootgrp.createVariable('COSALPHA', 'f4', ('Time', 'south_north', 'west_east')) var_cosalpha[0] = [[0.99, 0.99], [0.99, 0.99]] var_sinalpha = rootgrp.createVariable('SINALPHA', 'f4', ('Time', 'south_north', 'west_east')) var_sinalpha[0] = [[0.99, 0.99], [0.99, 0.99]] # Map coordinates var_xlong = rootgrp.createVariable('XLONG', 'f4', ('Time', 'south_north', 'west_east')) var_xlong[0] = [[0, 5000], [0, 5000]] var_xlat = rootgrp.createVariable('XLAT', 'f4', ('Time', 'south_north', 'west_east')) var_xlat[0] = [[0, 5000], [0, 5000]] # Close the file rootgrp.close()
def test_read_two_datasets(self): """Read a catalogue file for two data sets.""" # Empty file with unique temporary name testfile = tempfile.NamedTemporaryFile(suffix='.nc') # Build NetCDF manually nc = Dataset(testfile.name, 'w') nc.setncattr('Conventions', 'CF-1.6') nc.setncattr('identifier', 'citizen-of-the-world') nc.setncattr('python_class', 'eumopps.catalogue.catalogue.Catalogue') nc.createDimension('datasets', 2) nc.createDimension('default_strlen', 23) group0 = nc.createGroup('datasets_00000000') group0.setncattr('name', 'Ffflip') group0.setncattr('path', '/find/it/here') group0.setncattr('python_class', 'eumopps.catalogue.dataset.CatalogueDataSet') group1 = nc.createGroup('datasets_00000001') group1.setncattr('name', 'Ffflop') group1.setncattr('path', '/or/here') group1.setncattr('python_class', 'eumopps.catalogue.dataset.CatalogueDataSet') group1.createDimension('subsets', 1) subset = group1.createGroup('subsets_00000000') subset.setncattr('python_class', 'eumopps.catalogue.dataset.CatalogueDataSubset') layout = subset.createGroup('layout') layout.setncattr('python_class', 'eumopps.catalogue.storage.DataStorageFiles') layout.createDimension('patterns', 2) layout.createVariable('patterns', 'S1', ['patterns', 'default_strlen']) layout.variables['patterns'][0] = stringtoarr('splendid', 23) layout.variables['patterns'][1] = stringtoarr('pretty', 23) matches = subset.createGroup('matches') matches.setncattr('python_class', 'eumopps.catalogue.dataset.CatalogueFileEntry') matches.createDimension('list_count', 0) matches.createDimension('tags', 2) matches_name = matches.createVariable('name', 'S1', ['list_count', 'default_strlen']) matches_time = matches.createVariable('time', 'i8', ['list_count']) matches_time.units = 'seconds since 1850-01-01 00:00:00 UTC' matches_size = matches.createVariable('size', 'i8', ['list_count']) matches_tags = matches.createVariable( 'tags', 'S1', ['list_count', 'tags', 'default_strlen']) matches_name[0] = stringtoarr('bob', 23) # 2016-05-30 17:29:33 = 60780 days * 86400 + 17 hours * 3600 + 29 minutes * 60 + 33 matches_time[0] = 5251454973 matches_size[0] = 39877123421 tags = numpy.zeros((2, 23), 'S1') tags[0] = stringtoarr('onetag', 23) tags[1] = stringtoarr('twotags', 23) matches_tags[0] = tags subset.createDimension('archive_unused', 2) subset.createVariable('archive_unused', 'S1', ['archive_unused', 'default_strlen']) subset.variables['archive_unused'][0] = stringtoarr('nothing', 23) subset.variables['archive_unused'][1] = stringtoarr('notmuch', 23) group1.createDimension('non_matching', 3) group1.createVariable('non_matching', 'S1', ['non_matching', 'default_strlen']) group1.variables['non_matching'][0] = stringtoarr('floop', 23) group1.variables['non_matching'][1] = stringtoarr('sloop', 23) group1.variables['non_matching'][2] = stringtoarr('kaput', 23) nc.close() # TEMP: show netcdf contents # print '\n' + subprocess.Popen(['ncdump', testfile.name], stdout=subprocess.PIPE).communicate()[0] # Read reader = CatalogueReaderNetCDF() result = reader.load(testfile.name) # Check results self.assertTrue(isinstance(result, Catalogue)) self.assertEqual('citizen-of-the-world', result.identifier) self.assertTrue(isinstance(result.datasets, list)) self.assertEqual(2, len(result.datasets)) self.assertTrue(isinstance(result.datasets[0], CatalogueDataSet)) self.assertEqual('Ffflip', result.datasets[0].name) self.assertEqual('/find/it/here', result.datasets[0].path) self.assertTrue(isinstance(result.datasets[0].name, basestring)) self.assertTrue(isinstance(result.datasets[0].path, basestring)) self.assertTrue(isinstance(result.datasets[1], CatalogueDataSet)) self.assertEqual('Ffflop', result.datasets[1].name) self.assertEqual('/or/here', result.datasets[1].path) self.assertTrue(isinstance(result.datasets[1].name, basestring)) self.assertTrue(isinstance(result.datasets[1].path, basestring)) self.assertEqual(['floop', 'sloop', 'kaput'], result.datasets[1].non_matching) self.assertTrue( isinstance(result.datasets[1].non_matching[0], basestring)) self.assertTrue( isinstance(result.datasets[1].non_matching[1], basestring)) self.assertTrue( isinstance(result.datasets[1].non_matching[2], basestring)) self.assertEqual(1, len(result.datasets[1].subsets)) self.assertTrue( isinstance(result.datasets[1].subsets[0].layout, DataStorageFiles)) self.assertEqual( 1, isinstance(result.datasets[1].subsets[0].matches, list)) self.assertEqual(1, len(result.datasets[1].subsets[0].matches)) self.assertEqual('bob', result.datasets[1].subsets[0].matches[0].name) self.assertTrue( isinstance(result.datasets[1].subsets[0].matches[0].name, basestring)) self.assertEqual(datetime(2016, 05, 30, 17, 29, 33), result.datasets[1].subsets[0].matches[0].time) self.assertEqual(39877123421, result.datasets[1].subsets[0].matches[0].size) self.assertEqual(2, len(result.datasets[1].subsets[0].matches[0].tags)) self.assertEqual('onetag', result.datasets[1].subsets[0].matches[0].tags[0]) self.assertEqual('twotags', result.datasets[1].subsets[0].matches[0].tags[1]) self.assertEqual(['nothing', 'notmuch'], result.datasets[1].subsets[0].archive_unused)
def save(self, fname): '''Save the file to the disk. Create netCDF file from the ncfile object. Parameters ---------- fname : str File name. ''' try: sh.rm(fname) except: pass ncfile4 = Dataset(fname,'w',clobber=False,format='NETCDF4_CLASSIC') # Create dimensions for dim in self.dims.itervalues(): #print(dim) if dim["isunlimited"]: ncfile4.createDimension(dim['name'],None) if self.istop == -1: self.istop=dim['size'] else: ncfile4.createDimension(dim['name'],dim['size']) # Loop over variables for vari in self.variab: #print vari perem = self.variab[vari] var = ncfile4.createVariable(vari, perem['datatype'], perem['dimensions'], \ fill_value=perem['FillValue'],\ complevel=1) #attdict = perem['data'].__dict__ #if '_FillValue' in attdict: del attdict['_FillValue'] var.setncatts(perem['attributes']) # Zero size string variables are loaded as masked constants by netCDF4 (e.g. rotated_pole) # this workaround seems to solve the problem with not beeing able to # save this masked constant to netCDF4 variables # Error "Cannot set fill value of string with array of dtype "float64". if perem['datatype'].char in 'SU': if type(perem['data'][:]) == np.ma.core.MaskedConstant : perem['data'] = stringtoarr('',0) if perem['hasunlimdim']: # has an unlim dim, loop over unlim dim index. # range to copy if self.nchunk: start = self.istart; stop = self.istop; step = self.nchunk if step < 1: step = 1 for n in range(start, stop, step): nmax = n+step if nmax > self.istop: nmax=self.istop idata = perem['data'][n:nmax] var[n-self.istart:nmax-self.istart] = idata else: idata = perem['data'][:] var[0:len(unlimdim)] = idata else: # no unlim dim or 1-d variable, just copy all data at once. if perem['data'].shape != (): idata = perem['data'][:] var[:] = idata else: var[:] = perem['data'] ncfile4.sync() # flush data to disk #gattrs = self.ifile.ncattrs() for gatt in self.gattrs: setattr(ncfile4, gatt, self.gattrs[gatt]) ncfile4.close()
# test compound attributes. FILE_NAME = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name DIM_NAME = 'time' VAR_NAME = 'wind' VAR_NAME2 = 'forecast_wind' GROUP_NAME = 'forecasts' dtype = np.dtype([('speed', 'f4'), ('direction', 'f4')]) TYPE_NAME = 'wind_vector_type' TYPE_NAMEC = 'wind_vectorunits_type' dtypec = np.dtype([('speed', 'c', (8, )), ('direction', 'c', (8, ))]) missvals = np.empty(1, dtype) missvals['direction'] = 1.e20 missvals['speed'] = -999. windunits = np.zeros(1, dtypec) windunits['speed'] = stringtoarr('m/s',\ dtypec.fields['speed'][0].itemsize) windunits['direction'] = stringtoarr('degrees',\ dtypec.fields['direction'][0].itemsize) class VariablesTestCase(unittest.TestCase): def setUp(self): self.file = FILE_NAME f = Dataset(self.file, 'w') d = f.createDimension(DIM_NAME, None) g = f.createGroup(GROUP_NAME) wind_vector_type = f.createCompoundType(dtype, TYPE_NAME) wind_vectorunits_type = f.createCompoundType(dtypec, TYPE_NAMEC) v = f.createVariable(VAR_NAME, wind_vector_type, DIM_NAME) vv = g.createVariable(VAR_NAME2, wind_vector_type, DIM_NAME) v.missing_values = missvals
def create_netcdf( a_netcdf_filename, a_lat_points, a_lon_points, a_nb_levels, a_celerity_arr, a_u_arr, a_v_arr, a_time, a_loc_names ): """ dimensions: altitude = 401; profile = 1 ; variables: float altitude(altitude) ; altitude:long_name = "height above mean sea level" ; altitude:units = "km" ; altitude:positive = "up" ; double time(profile); time:long_name = "time" ; time:units = "days since 1970-01-01 00:00:00" ; string loc_name(profile) ; loc_name:units = "-" ; loc_name:long_name = "Location name" ; float lon(profile); lon:long_name = "longitude" ; lon:units = "degrees_east" ; float lat(profile); lat:long_name = "latitude" ; lat:units = "degrees_north" ; float celerity(profile, altitude) ; celerity:long_name = "celerity" ; celerity:units = "m s**-1" ; celerity:coordinates = "time lon lat altitude" ; float u(profile, altitude) ; u:long_name = "U velocity" ; celerity:units = "m s**-1" ; celerity:coordinates = "time lon lat altitude" ; float v(profile, altitude) ; u:long_name = "V velocity" ; celerity:units = "m s**-1" ; celerity:coordinates = "time lon lat altitude" ; attributes: :CF\:featureType = "profile"; """ print ("In create_netcdf %s" % (a_netcdf_filename)) conf = Conf.get_instance() netcdf_format = conf.get("NETCDF", "produced_format", "NETCDF3_CLASSIC") # create file dataset = Dataset(a_netcdf_filename, "w", format=netcdf_format) # create dimension dataset.createDimension("altitude", a_nb_levels) dataset.createDimension("profile", len(a_lat_points)) loc_name_len = dataset.createDimension("loc_name_len", 5) # create basic variables the_time = dataset.createVariable("time", "f8", ("profile")) lat = dataset.createVariable("latitude", "f4", ("profile")) lon = dataset.createVariable("longitude", "f4", ("profile")) altitudes = dataset.createVariable("altitude", "f4", ("altitude")) # create loc_name # In netcdf4 it would be # loc_names = dataset.createVariable('loc_name', str,('profile')) if netcdf_format == "NETCDF3_CLASSIC": loc_names = dataset.createVariable("loc_name", "c", ("profile", "loc_name_len")) else: loc_names = dataset.createVariable("loc_name", str, ("profile")) # create param variables # u and v wind components u = dataset.createVariable("u", "f4", ("profile", "altitude")) v = dataset.createVariable("v", "f4", ("profile", "altitude")) # celerity c = dataset.createVariable("c", "f4", ("profile", "altitude")) # dataset.sync() # add attributes dataset.description = "CTBTO Infrasound wind profiles" dataset.history = "Created " + time.ctime(time.time()) + " by infra-profile-generator-v1.2.2" dataset.source = "infra-profile-generator-v1.2.2" dataset.version = "infrasound profile v1.0-20090801" # dataset.station = 'IS42' lat.units = "degrees north" lat.long_name = "Latitude" lon.units = "degrees east" lon.long_name = "Longitude" altitudes.units = "m" altitudes.long_name = "Altitude" loc_names.units = "-" loc_names.long_name = "Location name" the_time.units = "hours since 1970-01-01 00:00:00.0" the_time.calendar = "gregorian" the_time.long_name = "Time" # param attributes u.units = "m s**-1" u.long_name = "U velocity" v.units = "m s**-1" v.long_name = "V velocity" c.units = "m s**-1" c.long_name = "Celerity" # create altitude alts = numpy.arange(0, 500 * a_nb_levels, 500) altitudes[:] = alts # add lat,lon lat[:] = a_lat_points lon[:] = a_lon_points # not used for the moment print ("a_loc_names %s\n" % (a_loc_names)) if netcdf_format == "NETCDF3_CLASSIC": # NETCDF3 CLASSIC doesn't know about str cpt = 0 for name in a_loc_names: loc_names[cpt] = stringtoarr(name, len(loc_name_len)) cpt += 1 else: # NETCDF4 cpt = 0 for name in a_loc_names: loc_names[cpt] = name cpt += 1 # add time dt = date2num(a_time, "days since 1970-01-01 00:00:00", calendar="gregorian") # create the time array data_time = numpy.repeat(dt, len(a_lat_points)) the_time[:] = data_time dataset.sync() c[:] = a_celerity_arr[:] u[:] = a_u_arr[:] v[:] = a_v_arr[:] dataset.sync() dataset.close() return 0
def load_into_netcdf(self): """Load the grib files into the netCDF file that has been setup """ if not hasattr(self, 'ncwriter'): self.setup_netcdf() self.logger.info('Making netCDF file %s' % self.ncfilename) field_dict = {} relevant_df = None for nc_field, grib_f in self.grib_vars: field, vertical_layer = grib_f.split(',') if vertical_layer is not '': field_dict[(field, vertical_layer)] = nc_field if relevant_df is None: relevant_df = self.index_df[ (self.index_df['field'] == field) & (self.index_df['vertical_layer'] == vertical_layer)] else: relevant_df = relevant_df.append(self.index_df[ (self.index_df['field'] == field) & (self.index_df['vertical_layer'] == vertical_layer)], ignore_index=True) else: field_dict[field] = nc_field if relevant_df is None: relevant_df = self.index_df[ (self.index_df['field'] == field)] else: relevant_df = relevant_df.append(self.index_df[ (self.index_df['field'] == field)], ignore_index=True) relevant_df.set_index('filename', inplace=True) times = [] levels = [] for filename in sorted(relevant_df.index.unique()): try: grbs = pygrib.open(os.path.join(self.grib_path, filename)) except IOError: try: grbs = pygrib.open(os.path.join(self.grib_path, filename+'.grib2')) except IOError: continue for filename, series in relevant_df.loc[filename].iterrows(): try: grb = grbs[series.grib_level] except IOError: continue thetime = grb.validDate if thetime not in times: timed = len(times) times.append(thetime) self.ncwriter.set_variable('Times', nc4.stringtoarr( thetime.strftime('%Y-%m-%d_%H:%M:%S'), 19), timed) else: timed = times.index(thetime) if self.vertical: thelevel = grb.level if thelevel not in levels: leveld = len(levels) levels.append(thelevel) else: leveld = levels.index(thelevel) nc_field = field_dict[series.field].upper() else: nc_field = field_dict[(series.field, series.vertical_layer)].upper() if not self.ncwriter.check_variable(nc_field): self.ncwriter.add_variable(nc_field, description=grb.name, units=grb.units, vertical=self.vertical) if not self.vertical: self.ncwriter.set_variable( nc_field, grb.values[self.min_lat:self.max_lat, self.min_lon:self.max_lon], timed) else: ivals = grb.values[self.min_lat:self.max_lat, self.min_lon:self.max_lon] try: self.ncwriter.set_variable(nc_field, ivals, [timed, leveld]) except RuntimeError: self.logger.debug( 'Error with leveld = %s and var = %s' % (leveld, grb.cfName)) else: if self.level == 'wrfprs': if not self.ncwriter.check_variable('P'): self.nc_writer.add_variable('P', units='mb', vertical=True) self.ncwriter.set_variable( 'P', (np.ones(ivals.shape) * grb.level), [timed, leveld]) self.ncwriter.close() return os.path.join(self.netcdf_path, self.ncfilename)
# now that wind_data_t is defined, create the station data type. station_data_t = f.createCompoundType(statdtype,'station_data') statdat = f.createVariable('station_obs', station_data_t, ('station',)) # create a numpy structured array, assign data to it. data = numpy.empty(2,station_data_t) data['latitude'] = 40. data['longitude'] = -105. data['surface_wind']['speed'] = 12.5 data['surface_wind']['direction'] = 270 data['temp_sounding'] = (280.3,272.,270.,269.,266.,258.,254.1,250.,245.5,240.) data['press_sounding'] = range(800,300,-50) data['location_name'][0] = stringtoarr('Boulder, Colorado, USA',NUMCHARS) print('data=',data) # x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', 'f8'), ('y', 'i8')]) # x = np.array([(1.0, 'ba'), (3.0, 'ab')], dtype=[('x', 'f8'), ('y', 'S1',2)]) # x = np.array([(1.0, 'ba'), (3.0, 'ab')], dtype=np.dtype({'names':['x','y'], 'formats':['f8',('S1',2)]})) x = np.array([(1.0, 'ba'), (3.0, 'ab')], dtype=np.dtype({'names':['x','y'], 'formats':['f8','S2']}))
# ---------------------------------------------------------------------- # Get coordinates of points from ExodusII file. exodus = netCDF4.Dataset(filenameExodus, 'a') points = exodus.variables['coord'][:].transpose() cellSizeDB = getCellSizeDB(points) cellSizeFn = getCellSizeFn(points) # Add cell size info to ExodusII file if not 'num_nod_var' in exodus.dimensions.keys(): exodus.createDimension('num_nod_var', 2) name_nod_var = exodus.createVariable('name_nod_var', 'S1', ( 'num_nod_var', 'len_string', )) name_nod_var[0, :] = netCDF4.stringtoarr("cell_size_db", 33) name_nod_var[1, :] = netCDF4.stringtoarr("cell_size_fn", 33) vals_nod_var = exodus.createVariable('vals_nod_var', numpy.float64, ( 'time_step', 'num_nod_var', 'num_nodes', )) time_whole = exodus.variables['time_whole'] time_whole[0] = 0.0 vals_nod_var = exodus.variables['vals_nod_var'] vals_nod_var[0, 0, :] = cellSizeDB.transpose() vals_nod_var[0, 1, :] = cellSizeFn.transpose() exodus.close()
def create_netcdf(a_netcdf_filename, a_lat_points, a_lon_points, a_nb_levels, a_celerity_arr, a_u_arr, a_v_arr, a_time, a_loc_names): """ dimensions: altitude = 401; profile = 1 ; variables: float altitude(altitude) ; altitude:long_name = "height above mean sea level" ; altitude:units = "km" ; altitude:positive = "up" ; double time(profile); time:long_name = "time" ; time:units = "days since 1970-01-01 00:00:00" ; string loc_name(profile) ; loc_name:units = "-" ; loc_name:long_name = "Location name" ; float lon(profile); lon:long_name = "longitude" ; lon:units = "degrees_east" ; float lat(profile); lat:long_name = "latitude" ; lat:units = "degrees_north" ; float celerity(profile, altitude) ; celerity:long_name = "celerity" ; celerity:units = "m s**-1" ; celerity:coordinates = "time lon lat altitude" ; float u(profile, altitude) ; u:long_name = "U velocity" ; u:units = "m s**-1" ; u:coordinates = "time lon lat altitude" ; float v(profile, altitude) ; u:long_name = "V velocity" ; v:units = "m s**-1" ; v:coordinates = "time lon lat altitude" ; attributes: :CF\:featureType = "profile"; """ print("In create_netcdf %s" %(a_netcdf_filename)) conf = Conf.get_instance() netcdf_format = conf.get('NETCDF', 'produced_format', 'NETCDF3_CLASSIC') #create file dataset = Dataset(a_netcdf_filename, 'w', format=netcdf_format) #create dimension dataset.createDimension('altitude', a_nb_levels) dataset.createDimension('profile', len(a_lat_points)) loc_name_len = dataset.createDimension('loc_name_len', 5) #create basic variables the_time = dataset.createVariable('time', 'f8', ('profile')) lat = dataset.createVariable('latitude', 'f4', ('profile')) lon = dataset.createVariable('longitude', 'f4', ('profile')) altitudes = dataset.createVariable('altitude', 'f4', ('altitude')) # create loc_name # In netcdf4 it would be #loc_names = dataset.createVariable('loc_name', str,('profile')) if netcdf_format == 'NETCDF3_CLASSIC': loc_names = dataset.createVariable('loc_name', 'c', ('profile','loc_name_len') ) else: loc_names = dataset.createVariable('loc_name', str, ('profile') ) # create param variables # u and v wind components u = dataset.createVariable('u', 'f4', ('profile', 'altitude')) v = dataset.createVariable('v', 'f4', ('profile', 'altitude')) # celerity c = dataset.createVariable('c', 'f4', ('profile','altitude')) #dataset.sync() # add attributes dataset.description = 'CTBTO Infrasound wind profiles' dataset.history = 'Created ' + time.ctime(time.time()) + ' by infra-profile-generator-v1.2.2' dataset.source = 'infra-profile-generator-v1.2.2' dataset.version = 'infrasound profile v1.0-20090801' #dataset.station = 'IS42' lat.units = 'degrees north' lat.long_name = 'Latitude' lon.units = 'degrees east' lon.long_name = 'Longitude' altitudes.units = 'm' altitudes.long_name = 'Altitude' loc_names.units = '-' loc_names.long_name = 'Location name' the_time.units = 'hours since 1970-01-01 00:00:00.0' the_time.calendar = 'gregorian' the_time.long_name = 'Time' # param attributes u.units = 'm s**-1' u.long_name = 'U velocity' v.units = 'm s**-1' v.long_name = 'V velocity' c.units = 'm s**-1' c.long_name = 'Celerity' # create altitude alts = numpy.arange(0, 500 * a_nb_levels, 500) altitudes[:] = alts # add lat,lon lat[:] = a_lat_points lon[:] = a_lon_points #not used for the moment print("a_loc_names %s\n" % (a_loc_names)) if netcdf_format == 'NETCDF3_CLASSIC': # NETCDF3 CLASSIC doesn't know about str cpt = 0 for name in a_loc_names: loc_names[cpt] = stringtoarr(name,len(loc_name_len)) cpt += 1 else: # NETCDF4 cpt = 0 for name in a_loc_names: loc_names[cpt] = name cpt += 1 #add time dt = date2num(a_time, "days since 1970-01-01 00:00:00", calendar = 'gregorian') #create the time array data_time = numpy.repeat(dt, len(a_lat_points) ) the_time[:] = data_time dataset.sync() c[:] = a_celerity_arr[:] u[:] = a_u_arr[:] v[:] = a_v_arr[:] dataset.sync() dataset.close() return 0
# ---------------------------------------------------------------------- # Get coordinates of points from ExodusII file. exodus = netCDF4.Dataset(filenameExodus, 'a') points = exodus.variables['coord'][:].transpose() cellSizeDB = getCellSizeDB(points) cellSizeFn = getCellSizeFn(points) # Add cell size info to ExodusII file if not 'num_nod_var' in exodus.dimensions.keys(): exodus.createDimension('num_nod_var', 2) name_nod_var = exodus.createVariable('name_nod_var', 'S1', ('num_nod_var', 'len_string',)) name_nod_var[0,:] = netCDF4.stringtoarr("cell_size_db", 33) name_nod_var[1,:] = netCDF4.stringtoarr("cell_size_fn", 33) vals_nod_var = exodus.createVariable('vals_nod_var', numpy.float64, ('time_step', 'num_nod_var', 'num_nodes',)) time_whole = exodus.variables['time_whole'] time_whole[0] = 0.0 vals_nod_var = exodus.variables['vals_nod_var'] vals_nod_var[0,0,:] = cellSizeDB.transpose() vals_nod_var[0,1,:] = cellSizeFn.transpose() exodus.close()
parser.add_option("-n", "--number", dest="number", type='int', help="test variant to set up, 1-5", metavar="NUMBER") options, args = parser.parse_args() if not options.filename: options.filename = 'landice_grid.nc' print 'No file specified. Attempting to use landice_grid.nc' if not options.afile: sys.exit("Error: A restart file from test A1 is required to set up this test. Specify with -a") # copy the restart file to be the new input file shutil.copyfile(options.afile, options.filename) # Open the file, get needed dimensions gridfile = NetCDFFile(options.filename,'r+') StrLen = len(gridfile.dimensions['StrLen']) gridfile.variables['xtime'][0,:] = netCDF4.stringtoarr('0000-01-01_00:00:00'.ljust(StrLen), StrLen) gridfile.variables['simulationStartTime'][:] = netCDF4.stringtoarr('0000-01-01_00:00:00'.ljust(StrLen), StrLen) b_moulin = {} # empty dictionary b_moulin[1]=((0,59000,8000,90.0),) b_moulin[2]= ((0,30000,3000,9.0), (1,8000,4000,9.0), (2,60000,7000,9.0), (3,35000,9000,9.0), (4,46000,10000,9.0), (5,26000,11000,9.0), (6,7000,12000,9.0), (7,5000,14000,9.0),
def hobo_to_netcdf(input_file, output, config_file=None, json_file=None, overwrite=False): """ Converte arquivos hobo (convertidos para csv) para formato netcdf :param input_file: :param output: :param config_file: :param json_file: :param overwrite: :return: """ logger.debug('Input file: {}'.format(input_file)) logger.debug('Output: {}'.format(output)) logger.debug('Config file: {}'.format(config_file)) logger.debug('NetCDF file json: {}'.format(json_file)) logger.debug('Overwrite flag: {}'.format(overwrite)) # Se output for um diretorio, gera um nome automatico if os.path.isdir(output): file_name = os.path.splitext(os.path.basename(input_file))[0] + '.nc' output_file = os.path.join(output, file_name) else: output_file = output # Verifica se arquivo ja existe e gera erro caso flag de overwrite nao for setado if os.path.exists(output_file) and overwrite is False: raise FileExistsError('File already exist. Use -ow flag to overwrite') # Abre arquivo e extrai informacoes title, serial_number, header, details = hobo.get_info(input_file) #if not details: # # Erro - nao tem nenhuma informacao sobre esse titulo de plot # print('ERRO: arquivo sem detalhes. Arquivo deve ser exportado com detalhes para permitir verificacao') # exit(ERROR_CODE) # Le arquivo .csv com configuracoes e informacoes adicionais das estacoes if os.path.exists(config_file) is False: raise FileNotFoundError('Config file not found {}'.format(config_file)) cfgs = pd.read_csv(config_file) row = cfgs.loc[ cfgs['Plot Title'] == title] # Procura por plot tittle igual do arquivo de entrada if row.empty: # Erro - nao tem nenhuma informacao sobre esse titulo de plot raise AttributeError( 'Plot title ({}) not found in config file'.format(title)) # Extrai informacoes importantes sobre a estacao do arquivo de configuracao station_id = row.iloc[0]['Codigo'] station_sn = row.iloc[0]['Numero de serie'] station_latitude = row.iloc[0]['Latitude [graus]'] station_longitude = row.iloc[0]['Longitude [graus]'] station_altitude = row.iloc[0]['Altitude [m]'] station_datetime_col = row.iloc[0]['Coluna data/hora'] station_gmt = int(row.iloc[0]['GMT']) station_uuid = row.iloc[0]['UUID'] #station_time_resolution = row.iloc[0]['Intervalo medidas (ISO8601)'] station_variable_col = row.iloc[0]['Coluna variavel'] # Encontra coluna datetime de formar flexivel (procura por nome parecido) datetime_col = None for col_name in header: found = False if util.find_matches(col_name, station_datetime_col): datetime_col = col_name found = True break if not datetime_col: raise AttributeError('col ({}) not found'.format(station_datetime_col)) # Encontra timezone e verifica se esta dentro do valor esperado gmt_hour_offset, gmt_minute_offset = util.get_gmt_offset(datetime_col) if station_gmt != gmt_hour_offset: print( 'Warning: found timezone (GMT{}) different from config (GMT{}). Using GMT{}.' .format(gmt_hour_offset, station_gmt, gmt_hour_offset)) # Encontra nome de coluna de dados por semelhança variable_col = None for col_name in header: found = False if util.find_matches(col_name, station_variable_col): variable_col = col_name found = True break if not variable_col: raise AttributeError('col ({}) not found'.format(station_variable_col)) # Verifica resolucao de tempo se esta dentro da esperada #details = hobo.process_details(details) # TODO: Encontrar o campo no dicionario sem ser case sensitive #serie = details['Details']['Series: ' + variable_col] #details = details['Details'] #serie = None #for k, v in details.items(): # found = False # if util.find_matches(k, ['Series:', station_variable_col]): # serie = v # found = True # break #if not serie: # print('ERRO: nao foi encontrada informacao da serie nos detalhes') # exit(ERROR_CODE) #filter_param = serie['Filter Parameters'] #filter_type = filter_param['Filter Type'] #filter_interval = filter_param['Filter Interval'] # TODO: isso nao esta bom, os formatos utilizados nao sao flexiveis e iguais. Automatizar mehlor isso no futuro #if filter_type != 'Sum of event values': # print('ERRO: serie com filtro inesperado: {}'.format(filter_type)) # exit(ERROR_CODE) #if filter_interval == '5 Minutes': # if station_time_resolution != 'PT5M': # print('AVISO: serie com intervalo {}, esperado era {}'.format(filter_interval, station_time_resolution)) # station_time_resolution = 'PT5M' #elif filter_interval == '1 Day': # if station_time_resolution != 'PT1D': # print('AVISO: serie com intervalo {}, esperado era {}'.format(filter_interval, station_time_resolution)) # station_time_resolution = 'PT1D' #else: # print('ERRO: resolucao temporal ainda nao implenteada: {}'.format(filter_interval)) # exit(ERROR_CODE) #if station_time_resolution == 'PT5M': # if filter_interval != '5 Minutes': # print('ERRO: serie com intervalo {}, esperado era {}'.format(filter_interval, station_time_resolution)) # exit(ERROR_CODE) #elif station_time_resolution == 'PT1D': # if filter_interval != '1 Day': # print('ERRO: serie com intervalo {}, esperado era {}'.format(filter_interval, station_time_resolution)) # exit(ERROR_CODE) #else: # print('Erro: resolucao temporal ainda nao implenteada: {}'.format(station_time_resolution)) # exit(ERROR_CODE) # Extrai dados da aquisicao table = hobo.get_data(input_file) # Separa precipitacao precipitation = table[variable_col] precipitation.index = table[datetime_col] precipitation = precipitation.dropna() # Deleta os NaN # Processa dados de data/hora #date_str = table[datetime_col] date_str = precipitation.index.to_series() date_time = pd.to_datetime(date_str, format='%m/%d/%y %I:%M:%S %p') #gmt_hour_offset = station_gmt #gmt_minute_offset = 0 tzinfo = timezone( timedelta(hours=gmt_hour_offset, minutes=gmt_minute_offset)) # gera os indices com informacao de fuso horarios incluido index = date_time.dt.tz_localize(tzinfo) # converte para UTC index_utc = index.dt.tz_convert('UTC') # Identifica primeiro e ultimo evento de aquisicao first_day_str = cf.datetime2str(index_utc.iloc[0]) last_day_str = cf.datetime2str(index_utc.iloc[-1]) #logger.debug("Inicio e fim de medidas em UTC: {} - {}".format(first_day_str, last_day_str)) # Gera nome do arquivo de saida se ainda nao foi definido if output_file is None: file_name = '{}_{}_{}.nc'.format(station_id, first_day_str, last_day_str) else: file_name = output_file nc_input_file = file_name # Adiciona pasta de saida no path se definido #if output_folder is None: # nc_input_file = file_name #else: # nc_input_file = os.path.join(output_folder, file_name) # Cria arquivo netCDF nc_file = NetCDFJSON() nc_file.write(nc_input_file) # Le arquivo json com configuracao da estrutura do netcdf if os.path.exists(json_file) is False: raise FileNotFoundError( 'NetCDF json file not found {}'.format(json_file)) nc_file.load_json(json_file) nc_file.create_from_json() # pega handlers para dimensoes timeDim = nc_file.get_dimension('time') nameDim = nc_file.get_dimension('name_strlen') # pega handlers para variaveis time = nc_file.get_variable('time') #time_bnds = nc_file.get_variable('time_bnds') lat = nc_file.get_variable('lat') lon = nc_file.get_variable('lon') alt = nc_file.get_variable('alt') station_name = nc_file.get_variable('station_name') np_time = index_utc.to_numpy() nc_time = date2num(np_time, units=time.units, calendar=time.calendar) # A precipitacao eh acumulada no tempo. O CF estabelece que neste tipo de caso # eh necessario informar as fronteiras do tempo no qual eh feito o acumulo. No caso de ser a medida # acumulada nos ultimos 5 minutos as fronteiras sao o tempo atual - 5 min, e o tempo atual #nc_superior_bound_time = nc_time #if station_time_resolution == 'PT5M': # delta = timedelta(minutes=5) #elif station_time_resolution == 'PT1D': # delta = timedelta(days=1)nc_ #else: # print('Erro. Intervalo de tempo ainda nao implementada {}'.format(station_time_resolution)) # exit(ERROR_CODE) #delta = cf.period_iso8601_to_relativetime(station_time_resolution) #inferior_bound_time = np_time - delta #nc_inferior_bound_time = date2num(inferior_bound_time, units=time.units, calendar=time.calendar) # combina bound inferior com bound superior #nc_time_bnds = np.stack((nc_inferior_bound_time, nc_superior_bound_time), axis=-1) # Seta variaveis lat[:] = np.array([station_latitude]) lon[:] = np.array([station_longitude]) alt[:] = np.array([station_altitude]) time[:] = nc_time #time_bnds[:] = nc_time_bnds station_name[:] = stringtoarr(station_id, nameDim.size) # Insere informacoes sobre a precipitacao nc_var = nc_file.get_variable('precipitation') FILL_VALUE = nc_var._FillValue #data_var = table[variable_col] data_var = precipitation data_var = data_var.replace(np.nan, FILL_VALUE) data_var = data_var.to_numpy() nc_var[:] = data_var data_len = len(data_var) # Faz processamento para metadados # Processa os dados ja convertidos para facilitar reutilizar o codigo depois # min max lat e lon min_lat = np.amin(lat) max_lat = np.amax(lat) min_lon = np.amin(lon) max_lon = np.amax(lon) # time duration min_time = num2date(np.amin(time), units=time.units, calendar=time.calendar) max_time = num2date(np.amax(time), units=time.units, calendar=time.calendar) min_time_str = cf.datetime2str(min_time) max_time_str = cf.datetime2str(max_time) time_delta = max_time - min_time time_delta_str = cf.timedelta2str(time_delta) # time resolution #time_resolution_str = station_time_resolution # Atualiza metadados gbd_index = nc_file.get_group('gbd_index') gbd_index.geospatial_lat_min = min_lat gbd_index.geospatial_lat_max = max_lat gbd_index.geospatial_lon_min = min_lon gbd_index.geospatial_lon_max = max_lon gbd_index.time_coverage_start = min_time_str gbd_index.time_coverage_end = max_time_str gbd_index.time_coverage_duration = time_delta_str #nc_file.rootgrp.time_coverage_resolution = time_resolution_str uuid = '{}/{}_{}_{}.nc'.format(station_uuid, station_id, first_day_str, last_day_str) gbd_index.uuid = uuid gbd_index.date_created = cf.datetime2str(datetime.now(timezone.utc)) gbd_index.history = '({}) Created with {}'.format(gbd_index.date_created, TOOL_NAME) gbd_index.keywords = [nc_var.standard_name, nc_var.units, station_id] gbd_index.key_variables = 'precipitation' # Gera dados do grupo gbd_index #nc_file.rootgrp.createGroup('gbd_index') #nc_file.rootgrp.gbd_index.uuid = uuid nc_file.close() # Imprime resultado print('Input file: {}'.format(input_file)) print('Output file: {}'.format(nc_input_file)) print('Latitude Min/Max: {} / {}'.format(min_lat, max_lat)) print('Longitude Min/Max: {} / {}'.format(min_lon, max_lon)) print('Datetime (UTC) Min/Max: {} / {}'.format(min_time_str, max_time_str)) print('Coverage duration: {}'.format(time_delta_str)) print('Data length: {}'.format(data_len)) #print('Resolucao: {}'.format(time_resolution_str)) # Checa se arquivo atende padrao CF utilizando o cfchecks if RUN_CFCHECKS: print('\nRunning cfchecks') sys.argv = ['', nc_input_file] sys.exit(main())
# test compound attributes. FILE_NAME = tempfile.mktemp(".nc") DIM_NAME = 'time' VAR_NAME = 'wind' VAR_NAME2 = 'forecast_wind' GROUP_NAME = 'forecasts' dtype=np.dtype([('speed', 'f4'), ('direction', 'f4')]) TYPE_NAME = 'wind_vector_type' TYPE_NAMEC = 'wind_vectorunits_type' dtypec=np.dtype([('speed', 'c',(8,)), ('direction', 'c',(8,))]) missvals = np.empty(1,dtype) missvals['direction']=1.e20 missvals['speed']=-999. windunits = np.zeros(1,dtypec) windunits['speed'] = stringtoarr('m/s',\ dtypec.fields['speed'][0].itemsize) windunits['direction'] = stringtoarr('degrees',\ dtypec.fields['direction'][0].itemsize) class VariablesTestCase(unittest.TestCase): def setUp(self): self.file = FILE_NAME f = Dataset(self.file, 'w') d = f.createDimension(DIM_NAME,None) g = f.createGroup(GROUP_NAME) wind_vector_type = f.createCompoundType(dtype, TYPE_NAME) wind_vectorunits_type = f.createCompoundType(dtypec, TYPE_NAMEC) v = f.createVariable(VAR_NAME,wind_vector_type, DIM_NAME) vv = g.createVariable(VAR_NAME2,wind_vector_type,DIM_NAME) v.missing_values = missvals
def write_exodus_file(filename, cells, vertices, shape="SHELL4"): """ Write Exodus-II file compatible with CUBIT. cells is a 0-based array (ncells, ncorners). vertices is (nvertices, dim). All cells are placed in a single block. Requires netCDF4 module. """ import numpy from netCDF4 import Dataset len_string = 33 root = Dataset(filename, 'w', format='NETCDF3_CLASSIC') # Set global attributes root.api_version = 4.98 root.version = 4.98 root.floating_point_word_size = 8 root.file_size = 0 root.title = "cubit" # Setup dimensions # Generic information root.createDimension('len_string', len_string) root.createDimension('len_line', 81) root.createDimension('four', 4) root.createDimension('num_qa_rec', 1) root.createDimension('time_step', None) # Mesh specific information (ncells, ncorners) = cells.shape (nvertices, dim) = vertices.shape root.createDimension('num_dim', dim) root.createDimension('num_el_blk', 1) root.createDimension('num_nod_per_el1', ncorners) root.createDimension('num_att_in_blk1', 1) root.createDimension('num_nodes', nvertices) root.createDimension('num_elem', ncells) root.createDimension('num_el_in_blk1', ncells) # Setup variables connect1 = root.createVariable('connect1', numpy.int32, ('num_el_in_blk1', 'num_nod_per_el1',)) coord = root.createVariable('coord', numpy.float64, ('num_dim', 'num_nodes',)) time_whole = root.createVariable('time_whole', numpy.float64, ('time_step',)) coor_names = root.createVariable('coor_names', 'S1', ('num_dim', 'len_string',)) qa_records = root.createVariable('qa_records', 'S1', ('num_qa_rec', 'four', 'len_string',)) eb_names = root.createVariable('eb_names', 'S1', ('num_el_blk', 'len_string',)) elem_map = root.createVariable('elem_map', numpy.int32, ('num_elem',)) eb_status = root.createVariable('eb_status', numpy.int32, ('num_el_blk',)) eb_prop1 = root.createVariable('eb_prop1', numpy.int32, ('num_el_blk',)) attrib1 = root.createVariable('attrib1', numpy.float64, ('num_el_in_blk1', 'num_att_in_blk1',)) # Set variable values connect1[:] = 1+cells[:] connect1.elem_type = shape coord[:] = vertices.transpose()[:] from netCDF4 import stringtoarr if dim == 2: coor_names[0,:] = stringtoarr("x", len_string) coor_names[1,:] = stringtoarr("y", len_string) elif dim == 3: coor_names[0,:] = stringtoarr("x", len_string) coor_names[1,:] = stringtoarr("y", len_string) coor_names[2,:] = stringtoarr("z", len_string) qa_records[0,0,:] = stringtoarr("CUBIT", len_string) qa_records[0,1,:] = stringtoarr("11.0", len_string) qa_records[0,2,:] = stringtoarr("01/01/2000", len_string) qa_records[0,3,:] = stringtoarr("12:00:00", len_string) elem_map[:] = numpy.arange(1, ncells+1, dtype=numpy.int32)[:] eb_status[:] = numpy.ones( (1,), dtype=numpy.int32)[:] eb_prop1[:] = numpy.ones( (1,), dtype=numpy.int32)[:] eb_prop1.name = "ID" attrib1[:] = numpy.ones( (1, ncells), dtype=numpy.int32)[:] root.close()
longitudes.units = 'degrees_east' times = rootgrp.createVariable('time', 'i4', ('time', )) times.standard_name = 'time' times.long_name = 'Time of measurement' times.units = 'seconds since 1970-01-01 00:00:00' air_temperatures = rootgrp.createVariable('air_temperature', 'f4', ('time', )) air_temperatures.coordinates = 'lat lon' air_temperatures.standard_name = 'air_temperature' air_temperatures.long_name = 'Air temperature in degrees Celcius' air_temperatures.units = 'degrees Celcius' # set the values of the variables station_name[:] = netCDF4.stringtoarr('Penlee', 50) altitude[:] = [station_altitude] latitudes[:] = [station_lat] longitudes[:] = [station_lon] times[:] = timestamp air_temperatures[:] = temp rootgrp.close() entries = (os.path.join(sourcefolder, fn) for fn in os.listdir(sourcefolder)) entries = ((os.stat(path), path) for path in entries) # leave only regular files, insert creation date entries = ((stat[ST_CTIME], path) for stat, path in entries if S_ISREG(stat[ST_MODE]))
metavar="NUMBER") options, args = parser.parse_args() if not options.filename: options.filename = 'landice_grid.nc' print 'No file specified. Attempting to use landice_grid.nc' if not options.afile: sys.exit( "Error: A restart file from test B5 is required to set up this test. Specify with -b" ) # copy the restart file to be the new input file shutil.copyfile(options.afile, options.filename) # Open the file, get needed dimensions gridfile = NetCDFFile(options.filename, 'r+') StrLen = len(gridfile.dimensions['StrLen']) gridfile.variables['xtime'][0, :] = netCDF4.stringtoarr( '0000-01-01_00:00:00'.ljust(StrLen), StrLen) gridfile.variables['simulationStartTime'][:] = netCDF4.stringtoarr( '0000-01-01_00:00:00'.ljust(StrLen), StrLen) # modify melt inputs gridfile.variables['externalWaterInput'][ 0, :] = gridfile.variables['externalWaterInput'][ 0, :] * 1.0e-12 # Make value at moulin locations tiny but positive # value for basalMeltInput doesn't matter, because it will be overwritten in the code. gridfile.close() print 'Successfully added initial conditions to: ', options.filename
def WriteNCCF(FileName,Dates,Latitudes,Longitudes,ClimPoints,DataObject,DimObject,AttrObject,GlobAttrObject): ''' Sort out the date/times to write out and time bounds ''' ''' Sort out clim bounds ''' ''' Sort out lat and long bounds ''' ''' Convert variables using the obtained scale_factor and add_offset: stored_var=int((var-offset)/scale) ''' ''' Write to file, set up given dimensions, looping through all potential variables and their attributes, and then the provided dictionary of global attributes ''' # Sort out date/times to write out print(Dates) TimPoints,TimBounds = MakeDaysSince(Dates['StYr'],Dates['StMon'],Dates['EdYr'],Dates['EdMon']) nTims = len(TimPoints) # Sort out clim bounds - paired strings ClimBounds = np.empty((12,2),dtype='|S10') for mm in range(12): ClimBounds[mm,0] = str(ClimPoints[0])+'-'+str(mm+1)+'-'+str(1) ClimBounds[mm,1] = str(ClimPoints[1])+'-'+str(mm+1)+'-'+str(MonthDays[mm]) # Sort out LatBounds and LonBounds LatBounds = np.empty((len(Latitudes),2),dtype='float') LonBounds = np.empty((len(Longitudes),2),dtype='float') LatBounds[:,0] = Latitudes - ((Latitudes[1]-Latitudes[0])/2.) LatBounds[:,1] = Latitudes + ((Latitudes[1]-Latitudes[0])/2.) LonBounds[:,0] = Longitudes - ((Longitudes[1]-Longitudes[0])/2.) LonBounds[:,1] = Longitudes + ((Longitudes[1]-Longitudes[0])/2.) #pdb.set_trace() # No need to convert float data using given scale_factor and add_offset to integers - done within writing program (packV = (V-offset)/scale # Not sure what this does to float precision though... # Change mdi into an integer -999 because these are stored as integers for vv in range(len(DataObject)): DataObject[vv][np.where(DataObject[vv] == OLDMDI)] = MDI # Create a new netCDF file - have tried zlib=True,least_significant_digit=3 (and 1) - no difference ncfw=Dataset(FileName,'w',format='NETCDF4_CLASSIC') # need to try NETCDF4 and also play with compression but test this first # Write out the global attributes if ('description' in GlobAttrObject): ncfw.description = GlobAttrObject['description'] #print(GlobAttrObject['description']) if ('File_created' in GlobAttrObject): ncfw.File_created = GlobAttrObject['File_created'] if ('Title' in GlobAttrObject): ncfw.Title = GlobAttrObject['Title'] if ('Institution' in GlobAttrObject): ncfw.Institution = GlobAttrObject['Institution'] if ('History' in GlobAttrObject): ncfw.History = GlobAttrObject['History'] if ('Licence' in GlobAttrObject): ncfw.Licence = GlobAttrObject['Licence'] if ('Project' in GlobAttrObject): ncfw.Project = GlobAttrObject['Project'] if ('Processing_level' in GlobAttrObject): ncfw.Processing_level = GlobAttrObject['Processing_level'] if ('Acknowledgement' in GlobAttrObject): ncfw.Acknowledgement = GlobAttrObject['Acknowledgement'] if ('Source' in GlobAttrObject): ncfw.Source = GlobAttrObject['Source'] if ('Comment' in GlobAttrObject): ncfw.Comment = GlobAttrObject['Comment'] if ('References' in GlobAttrObject): ncfw.References = GlobAttrObject['References'] if ('Creator_name' in GlobAttrObject): ncfw.Creator_name = GlobAttrObject['Creator_name'] if ('Creator_email' in GlobAttrObject): ncfw.Creator_email = GlobAttrObject['Creator_email'] if ('Version' in GlobAttrObject): ncfw.Version = GlobAttrObject['Version'] if ('doi' in GlobAttrObject): ncfw.doi = GlobAttrObject['doi'] if ('Conventions' in GlobAttrObject): ncfw.Conventions = GlobAttrObject['Conventions'] if ('netcdf_type' in GlobAttrObject): ncfw.netcdf_type = GlobAttrObject['netcdf_type'] # Loop through and set up the dimension names and quantities for vv in range(len(DimObject[0])): ncfw.createDimension(DimObject[0][vv],DimObject[1][vv]) # Go through each dimension and set up the variable and attributes for that dimension if needed for vv in range(len(DimObject)-2): # ignore first two elements of the list but count all other dictionaries print(DimObject[vv+2]['var_name']) # NOt 100% sure this works in a loop with overwriting # initiate variable with name, type and dimensions MyVar = ncfw.createVariable(DimObject[vv+2]['var_name'],DimObject[vv+2]['var_type'],DimObject[vv+2]['var_dims']) # Apply any other attributes if ('standard_name' in DimObject[vv+2]): MyVar.standard_name = DimObject[vv+2]['standard_name'] if ('long_name' in DimObject[vv+2]): MyVar.long_name = DimObject[vv+2]['long_name'] if ('units' in DimObject[vv+2]): MyVar.units = DimObject[vv+2]['units'] if ('axis' in DimObject[vv+2]): MyVar.axis = DimObject[vv+2]['axis'] if ('calendar' in DimObject[vv+2]): MyVar.calendar = DimObject[vv+2]['calendar'] if ('start_year' in DimObject[vv+2]): MyVar.start_year = DimObject[vv+2]['start_year'] if ('end_year' in DimObject[vv+2]): MyVar.end_year = DimObject[vv+2]['end_year'] if ('start_month' in DimObject[vv+2]): MyVar.start_month = DimObject[vv+2]['start_month'] if ('end_month' in DimObject[vv+2]): MyVar.end_month = DimObject[vv+2]['end_month'] if ('bounds' in DimObject[vv+2]): MyVar.bounds = DimObject[vv+2]['bounds'] if ('climatology' in DimObject[vv+2]): MyVar.climatology = DimObject[vv+2]['climatology'] if ('point_spacing' in DimObject[vv+2]): MyVar.point_spacing = DimObject[vv+2]['point_spacing'] # Provide the data to the variable if (DimObject[vv+2]['var_name'] == 'time'): MyVar[:] = TimPoints if (DimObject[vv+2]['var_name'] == 'bounds_time'): MyVar[:,:] = TimBounds if (DimObject[vv+2]['var_name'] == 'month'): for mm in range(12): MyVar[mm,:] = stringtoarr(MonthName[mm],10) if (DimObject[vv+2]['var_name'] == 'climbounds'): for mm in range(12): MyVar[mm,0,:] = stringtoarr(ClimBounds[mm,0],10) MyVar[mm,1,:] = stringtoarr(ClimBounds[mm,1],10) if (DimObject[vv+2]['var_name'] == 'latitude'): MyVar[:] = Latitudes if (DimObject[vv+2]['var_name'] == 'bounds_lat'): MyVar[:,:] = LatBounds if (DimObject[vv+2]['var_name'] == 'longitude'): MyVar[:] = Longitudes if (DimObject[vv+2]['var_name'] == 'bounds_lon'): MyVar[:,:] = LonBounds # Go through each variable and set up the variable attributes for vv in range(len(AttrObject)): # ignore first two elements of the list but count all other dictionaries print(AttrObject[vv]['var_name']) # NOt 100% sure this works in a loop with overwriting # initiate variable with name, type and dimensions MyVar = ncfw.createVariable(AttrObject[vv]['var_name'],AttrObject[vv]['var_type'],AttrObject[vv]['var_dims'],fill_value = AttrObject[vv]['_FillValue']) # Apply any other attributes if ('standard_name' in AttrObject[vv]): MyVar.standard_name = AttrObject[vv]['standard_name'] if ('long_name' in AttrObject[vv]): MyVar.long_name = AttrObject[vv]['long_name'] if ('cell_methods' in AttrObject[vv]): MyVar.cell_methods = AttrObject[vv]['cell_methods'] if ('comment' in AttrObject[vv]): MyVar.comment = AttrObject[vv]['comment'] if ('units' in AttrObject[vv]): MyVar.units = AttrObject[vv]['units'] if ('axis' in AttrObject[vv]): MyVar.axis = AttrObject[vv]['axis'] if ('add_offset' in AttrObject[vv]): MyVar.add_offset = AttrObject[vv]['add_offset'] if ('scale_factor' in AttrObject[vv]): MyVar.scale_factor = AttrObject[vv]['scale_factor'] if ('valid_min' in AttrObject[vv]): MyVar.valid_min = AttrObject[vv]['valid_min'] if ('valid_max' in AttrObject[vv]): MyVar.valid_max = AttrObject[vv]['valid_max'] if ('missing_value' in AttrObject[vv]): MyVar.missing_value = AttrObject[vv]['missing_value'] # if ('_FillValue' in AttrObject[vv]): # MyVar._FillValue = AttrObject[vv]['_FillValue'] if ('reference_period' in AttrObject[vv]): MyVar.reference_period = AttrObject[vv]['reference_period'] if ('ancillary_variables' in AttrObject[vv]): MyVar.ancillary_variables = AttrObject[vv]['ancillary_variables'] # Provide the data to the variable - depending on howmany dimensions there are if (len(AttrObject[vv]['var_dims']) == 1): MyVar[:] = DataObject[vv] if (len(AttrObject[vv]['var_dims']) == 2): MyVar[:,:] = DataObject[vv] if (len(AttrObject[vv]['var_dims']) == 3): MyVar[:,:,:] = DataObject[vv] ncfw.close() return # WriteNCCF
def __init__(self, nc_filename, sequences, letter_features_size, map_letter2features, window_size=DEFAULT_WINDOW_SIZE, map_label2class=None, word_vectors=None): """ nc_filename (str): A file to write the dataset in netCDF format sequences (list): A list of Sequence objects containing the data map_letter2features (dict): a map from letters to feature vectors map_label2class (dict): a map from label to class word_vectors (dict): a map from word to vector """ print 'preparing Currennt dataset' self.nc_filename = nc_filename self.sequences = sequences self.letter_features_size = letter_features_size self.input_pattern_size = letter_features_size * (2 * window_size + 1) if word_vectors: self.input_pattern_size += get_word_vectors_size(word_vectors) self.map_letter2features = map_letter2features self.window_size = window_size nc_file = Dataset(nc_filename, 'w') # collect label information # if given a map (say, from training set), use it if map_label2class: self.map_label2class = map_label2class max_label_length = 0 for label in self.map_label2class: max_label_length = max(max_label_length, len(label)) # otherwise create a new map else: labels = set() max_label_length = 0 for sequence in sequences: for word in sequence.words: for diac in word.diacs: labels.add(diac) max_label_length = max(max_label_length, len(diac)) labels.add(Word.WORD_BOUNDARY) # word boundary label (same as word boundary symbol) max_label_length = max(max_label_length, len(Word.WORD_BOUNDARY)) # create map from label (diacritic) to class (integer) map_label2class = dict() for label in labels: map_label2class[label] = len(map_label2class) # TODO: make sure classes are 0-indexed self.map_label2class = map_label2class print 'label2class map:', self.map_label2class # create dimensions dim_num_seqs = nc_file.createDimension('numSeqs', len(sequences)) num_timesteps = 0 for sequence in sequences: num_timesteps += sequence.num_letters(count_word_boundary=self.INCLUDE_WORD_BOUNDARY) dim_num_timesteps = nc_file.createDimension('numTimesteps', num_timesteps) dim_input_pattern_size = nc_file.createDimension('inputPattSize', self.input_pattern_size) dim_max_seq_tag_length = nc_file.createDimension('maxSeqTagLength', self.MAX_SEQ_TAG_LENGTH) # optional dimensions dim_num_labels = nc_file.createDimension('numLabels', len(map_label2class)) dim_max_label_length = nc_file.createDimension('maxLabelLength', max_label_length) dim_max_target_string_length = nc_file.createDimension('maxTargStringLength', self.MAX_TARGET_STRING_LENGTH) # create variables var_seq_tags = nc_file.createVariable('seqTags', 'S1', ('numSeqs', 'maxSeqTagLength')) var_seq_tags.longname = 'sequence tags' var_seq_lengths = nc_file.createVariable('seqLengths', 'i4', ('numSeqs')) var_seq_lengths.longname = 'sequence lengths' var_inputs = nc_file.createVariable('inputs', 'f4', ('numTimesteps', 'inputPattSize')) var_inputs.longname = 'inputs' var_target_classes = nc_file.createVariable('targetClasses', 'i4', ('numTimesteps')) var_target_classes.longname = 'target classes' # optional variables var_num_target_classes = nc_file.createVariable('numTargetClasses', 'i4') var_num_target_classes.longname = 'number of target classes' var_labels = nc_file.createVariable('labels', 'S1', ('numLabels', 'maxLabelLength')) var_labels.longname = 'target labels' var_target_strings = nc_file.createVariable('targetStrings', 'S1', ('numSeqs', 'maxTargStringLength')) var_target_strings.longname = 'target strings' # write data to variables print 'writing sequence tags' seq_tags = [] for sequence in sequences: seq_tags.append(stringtoarr(sequence.seq_id, self.MAX_SEQ_TAG_LENGTH)) var_seq_tags[:] = seq_tags print 'writing sequence lengths' seq_lengths = [] for sequence in sequences: seq_lengths.append(sequence.num_letters(count_word_boundary=self.INCLUDE_WORD_BOUNDARY)) var_seq_lengths[:] = seq_lengths print 'writing inputs' # create empty array for the inputs inputs = np.empty((0, self.input_pattern_size)) for sequence in sequences: sequence_features = self.generate_sequence_features(sequence) inputs = np.concatenate((inputs, sequence_features)) var_inputs[:,:] = inputs print 'writing target classes' target_classes = [] for sequence in sequences: if self.INCLUDE_WORD_BOUNDARY: target_classes.append(map_label2class[Word.WORD_BOUNDARY]) for word in sequence.words: for diac in word.diacs: assert(diac in map_label2class) target_classes.append(map_label2class[diac]) if self.INCLUDE_WORD_BOUNDARY: target_classes.append(map_label2class[Word.WORD_BOUNDARY]) var_target_classes[:] = target_classes # write data for optional variables var_num_target_classes[:] = len(map_label2class) labels_arr = np.empty((0, max_label_length)) labels_ordered = [i[0] for i in sorted(self.map_label2class.items(), key=operator.itemgetter(1))] for label in labels_ordered: labels_arr = np.concatenate((labels_arr, [stringtoarr(label, max_label_length)])) var_labels[:,:] = labels_arr print 'writing target strings' target_strings = np.empty((0, self.MAX_TARGET_STRING_LENGTH)) for sequence in sequences: sequence_letters = sequence.get_sequence_letters(include_word_boundary=self.INCLUDE_WORD_BOUNDARY) if len(sequence_letters) > self.MAX_TARGET_STRING_LENGTH: sys.stderr.write('Warning: length of sequence letters in sequence: ' + sequence.seq_id + \ ' > MAX_TARGET_STRING_LENGTH\n') target_strings = np.concatenate((target_strings, \ [stringtoarr(''.join(sequence_letters), self.MAX_TARGET_STRING_LENGTH)])) var_target_strings[:,:] = target_strings nc_file.close() print 'Currennt dataset written to:', nc_filename
station_data_units_t =\ f.createCompoundType(statdtype_units,'station_data_units') # create a variable of of type 'station_data_t' statdat = f.createVariable('station_obs', station_data_t, ('station',)) # create a numpy structured array, assign data to it. data = numpy.empty(1,station_data_t) data['latitude'] = 40. data['longitude'] = -105. data['surface_wind']['speed'] = 12.5 data['surface_wind']['direction'] = 270 data['temp_sounding'] = (280.3,272.,270.,269.,266.,258.,254.1,250.,245.5,240.) data['press_sounding'] = range(800,300,-50) # variable-length string datatypes are not supported inside compound types, so # to store strings in a compound data type, each string must be # stored as fixed-size (in this case 80) array of characters. data['location_name'] = stringtoarr('Boulder, Colorado, USA',NUMCHARS) # assign structured array to variable slice. statdat[0] = data # or just assign a tuple of values to variable slice # (will automatically be converted to a structured array). statdat[1] = (40.78,-73.99,(-12.5,90), (290.2,282.5,279.,277.9,276.,266.,264.1,260.,255.5,243.), range(900,400,-50),stringtoarr('New York, New York, USA',NUMCHARS)) print f.cmptypes windunits = numpy.empty(1,winddtype_units) stationobs_units = numpy.empty(1,statdtype_units) windunits['speed'] = stringtoarr('m/s',NUMCHARS) windunits['direction'] = stringtoarr('degrees',NUMCHARS) stationobs_units['latitude'] = stringtoarr('degrees north',NUMCHARS) stationobs_units['longitude'] = stringtoarr('degrees west',NUMCHARS) stationobs_units['surface_wind'] = windunits
wind_data_t = f.createCompoundType(winddtype, "wind_data") # now that wind_data_t is defined, create the station data type. station_data_t = f.createCompoundType(statdtype, "station_data") statdat = f.createVariable("station_obs", station_data_t, ("station",)) # create a numpy structured array, assign data to it. data = numpy.empty(2, station_data_t) data["latitude"] = 40.0 data["longitude"] = -105.0 data["surface_wind"]["speed"] = 12.5 data["surface_wind"]["direction"] = 270 data["temp_sounding"] = (280.3, 272.0, 270.0, 269.0, 266.0, 258.0, 254.1, 250.0, 245.5, 240.0) data["press_sounding"] = range(800, 300, -50) data["location_name"][0] = stringtoarr("Boulder, Colorado, USA", NUMCHARS) print "data=", data # x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', 'f8'), ('y', 'i8')]) # x = np.array([(1.0, 'ba'), (3.0, 'ab')], dtype=[('x', 'f8'), ('y', 'S1',2)]) # x = np.array([(1.0, 'ba'), (3.0, 'ab')], dtype=np.dtype({'names':['x','y'], 'formats':['f8',('S1',2)]})) x = np.array([(1.0, "ba"), (3.0, "ab")], dtype=np.dtype({"names": ["x", "y"], "formats": ["f8", "S2"]})) x_dtype = x.dtype from copy import deepcopy