forked from hdean83/notebook
/
cmg_ts2cf_no_stationdim.py
494 lines (373 loc) · 20.5 KB
/
cmg_ts2cf_no_stationdim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <markdowncell>
# #Convert existing time series NetCDF files in 4D (T,Z,Y,X) "grid" format to CF-1.6 format
# <codecell>
import glob
import os
import netCDF4
import cStringIO
import pandas as pd
import re
# <codecell>
projs = '''
ARGO_MERCHANT,B. Butman,Argo Merchant Experiment,A moored array deployed after the ARGO MERCHANT ran aground onNantucket Shoals designed to help understand the fate of the spilled oil.
BUZZ_BAY,B. Butman,Currents and Sediment Transport in Buzzards Bay,Investigation of the near-bottom circulation in Buzzards Bay and consequent transport of fine-grained sediments that may be contaminated with PCBs from inner New Bedford Harbor.
CAMP,B. Butman,California Area Monitoring Program (CAMP),A four-year multi-disciplinary field and laboratory study to investigate the sediment transport regime in the vicinity of production drilling rigs in the Santa Barbara Basin
CAPE_COD_BAY,B. Butman,Currents and Sediment Transport in Cape Cod Bay,A pilot study to determine the effect of winter storms on sediment movement at two potential dredge spoil disposal areas.
CC_MISC,B. Butman,Transport studies - Nauset Inlet,Part of a collaborative study of sediment movement in Nauset Inlet.
DEEP_REEF,J. Lacey,Gulf of Mexico - Pinnacles,Pressure data from the Gulf of Mexico
DWDS_106,B. Butman,Sediment Transport at Deep Water Dump Site 106,Near-bottom current measurements to understand the fate and transport of sludge from the New York Metropolitan region discharged at the sea surface.
ECOHAB_II,R. Signell,Ecology of Harmful Algal Blooms (ECOHAB-II),A field program to continue investigating the transport and fate of toxic dinoflagellate blooms in the western Gulf of Maine.
ECOHAB_I,R. Signell,Ecology of Harmful Algal Blooms (ECOHAB-I),A field program to study the transport and fate of toxic dinoflagellate blooms in the western Gulf of Maine.
EUROSTRATAFORM,C. Sherwood,EuroSTRATAFORM,The EuroSTRATAFORM Po and Apennine Sediment Transport and Accumulation (PASTA) experiment was an international study of sediment-transport processes and formation of geological strata in the Adriatic Sea.
FARALLONES,M. Noble,Farallons,Program to measure the currents and circulation on the continental slope off San Francisco CA and thus infer the transport of dredged materialat the newly-established deep-water disposal site.
GB_SED,B. Butman,Georges Bank Current and Sediment Transport Studies,A series of studies to assess environmental hazards to petroleum development in the Georges Bank and New England Shelf region
GLOBEC_GB,R. Schlitz,GLOBEC Georges Bank Program,A moored array program to investigate the circulation and mixing of plankton on Georges Bank.
GLOBEC_GSC,R. Schlitz,GLOBEC Great South Channel Circulation Experiment,A moored array program to investigate the recirculation of water and plankton around Georges Bank
GULF_MAINE,B. Butman,Deep Circulation in the Gulf of Maine,A two-year field study to investigate the deep flow between the major basins in the Gulf of Maine and the effects on the distribution of suspended sediments.
HUDSON_SVALLEY,B. Butman,Circulation and Sediment Transport in the Hudson Shelf Valley,Field experiments have been carried out to understand the transport of sediments and associated contaminants in the Hudson Shelf Valley offshore of New York.
KARIN_RIDGE,M. Noble,Karin Ridge Experiment,Current measurements collected at 2 sites in Karin Ridge Seamount.
LYDONIA_C,B. Butman,Lydonia Canyon Dynamics Experiment,A major field experiment to determine the importance of submarine canyons in sediment transport along and across the continental margin.
MAB_SED,B. Butman,Sediment Transport Observations in the Middle Atlantic Bight,A series of studies to assess environmental hazards to petroleum development in the Middle Atlantic Bight.
MAMALA_BAY,D. Cacchione,Mamala bay Experiment,Current measurements collected at 350-450 meters in Mamala Bay near Waikiki Beach.
MBAY_CIRC,R. Signell, Massachusetts Bay Circulation Experiment,Current measurements collected at 6 sites in Massachusetts Bay throughout the year to map the tidal wind and density driven currents.
MBAY_IWAVE,B. Butman,Massachusetts Bay Internal Wave Experiment,A 1-month 4-element moored array experiment to measure the currents associated with large-amplitude internal waves generated by tidal flow across Stellwagen Bank.
MBAY_LTB,B. Butman,Long-term observations in Massachusetts Bay; Site B-Scituate,Measurements of currents and other oceanographic properties were made to assess the impact of sewage discharge from the proposed outfall site.
MBAY_LT,B. Butman,Long-term observations in Massachusetts Bay; Site A-Boston Harbor,Measurements of currents and other oceanographic properties were made to assess the impact of sewage discharge from the proposed outfall site.
MBAY_STELL,R. Signell,Monitoring on Stellwagen Bank,A year-long series of current measurements on the eastern flank of Stellwagen Bank to document the currents at the mouth of Massachusetts Bay driven by the Maine Coastal current.
MBAY_WEST,B. Butman,Currents and Sediment Transport in Western Massachusetts Bay,A pilot winter-time experiment to investigate circulation and sediment transport. Designed to provide information to aid in citing the new ocean outfall for the Boston sewer system.
MOBILE_BAY,B. Butman,Mobile Bay Study,Measure currents and transport out of Mobile Bay.
MONTEREY_BAY,M. Noble,Monterey Bay National Marine Sanctuary Program,Part of a large multi-disciplinary experiment to characterize the geologic environment and to generate a sediment budget.
MONTEREY_CAN,M. Noble,Monterey Canyon Experiment, A program to determine the mechanisms that govern the circulation within and the transport of sediment and water through Monterey Submarine Canyon.
MYRTLEBEACH,J. Warner,Myrtle Beach Experiment SC,Measurements collected as part of a larger study to understand the physical processes that control the transport of sediments in Long Bay South Carolina.
NE_SLOPE,B. Butman,Currents on the New England Continental Slope,A study designed to describe the currents and to investigate the transport of sediment from the shelf to the slope.
OCEANOG_C,B. Butman,Oceanographer Canyon Dynamics Experiment,A field experiment to determine the importance of submarine canyons in sediment transport along and across the continental margin.
ORANGE_COUNTY,M. Noble,Orange County Sanitation District Studies,Observations to monitor coastal ocean process that transport suspended material and associated comtaminants across the shelf
PONCHARTRAIN,R. Signell,Lake Ponchartrain Project,A series of moored array studies to investigate the circulation and particle transport in Lake Pontchartrain.
PV_SHELF04,M. Noble,Palos Verdes Shelf 2004,Additional observations to estimate the quantity and direction of sediment erosion and transport on the shelf near the White Point ocean outfalls.
PV_SHELF07,M. Noble,Palos Verdes Shelf 2007,Follow-up observations to evaluate how often coastal ocean processes move the DDT contaminated sediments near the White Point ocean outfalls.
PV_SHELF,M. Noble,Palos Verdes Shelf Study,Initial observations of currents and circulation near the White Point ocean outfalls determine how often coastal ocean processes move the DDT contaminated sediments in this region.
SAB_SED,B. Butman,Sediment Transport Observations in the Southern Atlantic Bight,A series of studies to assess environmental hazards to petroleum development in the South Atlantic Bight.
SOUTHERN_CAL,M. Noble,Southern California Project,A series of moorings were deployed to understand how coastal ocean processes that move sediments change with location on the shelf.
STRESS,B. Butman,Sediment Transport on Shelves and Slopes (STRESS),Experiment on the California continental margin to investigate storm-driven sediment transport.
WRIGHTSVILLE,R. Thieler,Wrightsville Beach Study, Measurements of bottom currents and waves to investigate the flow field and sediment transport in a rippled scour depression offshore of Wrightsville Beach NC.
DIAMONDSHOALS,J. Warner,Cape Hatteras- Diamond Shoals,This experiment was designed to investigate the ocean circulation and sediment transport dynamics at Diamond Shoals NC.
CHANDELEUR,C. Sherwood,Chandeleur Islands Oceanographic Measurements,A program to measure waves water levels and currents near the Chandeleur Islands Louisiana and adjacent berm construction site.
WFAL,N. Ganju,West Falmouth Harbor Fluxes,Oceanographic and water-quality observations made at six locations in West Falmouth Harbor and Buzzards Bay.
BW2011,N. Ganju, Blackwater 2011, Oceanographic and Water-Quality Measurements made at several sites in 2 watersheds in Blackwater National Wildlife Refuge.
MVCO_11,C. Sherwood, OASIS MVCO 2011, Near-seabed Oceanographic Observations made as part of the 2011 OASIS Project at the MVCO.
HURRIRENE_BB,B. Butman, Observations in Buzzards Bay during and after a Hurricane, Oceanographic data collected in Buzzards Bay MA during Hurricane Irene August 2011.'''
# <codecell>
project = pd.read_csv(cStringIO.StringIO(projs.strip()),
sep=",\s*",index_col='project_id',
names=['project_id', 'project_pi', 'project_name','project_summary'])
# <codecell>
type(project.ix['PV_SHELF'])
# <codecell>
len(project)
# <codecell>
for index,row in project.iterrows():
print index,row['project_pi']
# <codecell>
"""function nname=lookup_cf(long_name)
% LOOKUP_CF Get CF equivalent name for EPIC variable long_name
% return the new name string or [] if there's no equivalent
%
if(strfind(lower(long_name),'temp'))
nname='sea_water_temperature';
elseif (strfind(lower(long_name),'cond'))
nname='sea_water_electrical_conductivity';
elseif (strfind(lower(long_name),'sal'))
nname='sea_water_salinity';
elseif (strfind(lower(long_name),'sigma'))
nname='sea_water_sigma_theta';
% also have to deal with the min, max std of vels for burst stats files
elseif (strfind(lower(long_name),'east'))
nname='eastward_sea_water_velocity';
elseif (strfind(lower(long_name),'north'))
nname='northward_sea_water_velocity';
elseif (strfind(lower(long_name),'vertical'))
nname='upward_sea_water_velocity';
elseif (strfind(lower(long_name),'pitch'))
nname='platform_pitch_angle';
elseif (strfind(lower(long_name),'roll'))
nname='platform_roll_angle';
elseif (strfind(lower(long_name),'head'))
nname='platform_orientation';
elseif (strfind(lower(long_name),'pres'))
if ~isempty(strfind(lower(long_name),'dev')) || ~isempty(strfind(lower(long_name),'std'))
nname=[];
else
nname='sea_water_pressure';
end
elseif (strfind(lower(long_name),'cond'))
nname='sea_water_electrical_conductivity';
elseif (strfind(lower(long_name),'speed'))
if (strfind(lower(long_name),'rotor'))
nname=[];
else
nname='sea_water_speed';
end
elseif (strfind(lower(long_name),'direction'))
nname='direction_of_sea_water_velocity';
else
nname=[];
end
disp([long_name ' : ' nname])
""";
# <codecell>
#os.chdir('/usgs/data2/emontgomery/stellwagen/Data/ARGO_MERCHANT')
#root_dir='/usgs/data2/emontgomery/stellwagen/Data/'
#root_dir='/usgs/data2/emontgomery/stellwagen/Data/MVCO_11'
root_dir='/usgs/data2/emontgomery/stellwagen/Data/'
odir='/usgs/data2/emontgomery/stellwagen/CF-1.6/'
os.chdir(root_dir)
# <codecell>
project.ix['ARGO_MERCHANT'].name
# <codecell>
# <codecell>
type(project.ix['ARGO_MERCHANT'].project_name)
# <codecell>
# now find all the unique names, long_names & units
names = set()
long_names = set()
units = set()
for path, subdirs, files in os.walk(root_dir):
for name in files:
file= os.path.join(path, name)
try:
nc=netCDF4.Dataset(file)
for var in nc.variables.keys():
names.add(var)
try:
long_names.add(nc.variables[var].long_name)
except:
pass
try:
units.add(nc.variables[var].units)
except:
pass
except:
pass
names= list(names)
long_names = list(long_names)
units = list(units)
print len(names)
print len(long_names)
print len(units)
# <codecell>
# <codecell>
# let's use Ellyn's approach of matching substrings in the long_names to deduce standard_names
# <codecell>
# standard_name to variable mapping
d={}
# <codecell>
# air temp, frtemp, laser temp factor, internal, temp diff
filter(lambda x:re.search(r'temp',x.lower()), long_names)
# <codecell>
d['sea_water_temperature']=['instrument transducer temp.', 'temperature (c)','fr temp',
'adp transducer temp.','adcp transducer temp.','transducer temp.','temp 1','temp 2',
'temperature','internal temperature','frtemp','temp 2 q','temp','temp lp','sea surface temperature (degrees C)']
# <codecell>
# seconds, second
filter(lambda x:re.search(r'cond',x.lower()), long_names)
# <codecell>
filter(lambda x:re.search(r'sal',x.lower()), long_names)
# <codecell>
d['sea_water_salinity'] = ['salinity 2 q','salinity 1','ctd salinity, pss-78','salinity','salinity (ppt)','salinity (psu)','salinity']
# <codecell>
filter(lambda x:re.search(r'sigma',x.lower()), long_names)
# <codecell>
# deal with burst data, std dev, resolution velocity, variance
filter(lambda x:re.search(r'east',x.lower()), long_names)
# <codecell>
d['eastward_sea_water_velocity']=['eastward velocity','east','mean eastward velocity','eastward velocity','east lp']
# <codecell>
filter(lambda x:re.search(r'north',x.lower()), long_names)
# <codecell>
d['northward_sea_water_velocity']=['northward velocity','north','mean northward velocity','northward velocity','north lp']
# <codecell>
filter(lambda x:re.search(r'vertical',x.lower()), long_names)
# <codecell>
# wind, rotor speed
filter(lambda x:re.search(r'speed',x.lower()), long_names)
# <codecell>
# std dev
filter(lambda x:re.search(r'pitch',x.lower()), long_names)
# <codecell>
filter(lambda x:re.search(r'roll',x.lower()), long_names)
# <codecell>
# std dev
filter(lambda x:re.search(r'heading',x.lower()), long_names)
# <codecell>
# std, dev, wave height spectra, barometric, presscheck
pres = filter(lambda x:re.search(r'press',x.lower()), long_names)
pres
# <codecell>
filter(lambda x:re.search(r'std|dev',x.lower()), pres)
# <codecell>
# save only direction and current direction
filter(lambda x:re.search(r'direct',x.lower()), long_names)
# <codecell>
def write_dsg(file,ofile,my_series):
nc = netCDF4.Dataset(file)
vars=nc.variables.keys()
coord_vars = ['time','time2','depth','lat','lon']
# find data variables by removing coordinate variables from the variable list
data_vars = [var for var in vars if var not in coord_vars]
nt = len(nc.dimensions['time'])
nz = len(nc.dimensions['depth'])
nco = netCDF4.Dataset(ofile,'w')
# create dimensions
nco.createDimension('time',nt)
if nz > 1:
nco.createDimension('depth',nz)
nchar=20
nco.createDimension('nchar',nchar)
# create coordinate variables
time_v = nco.createVariable('time', 'f8', ('time'))
lon_v = nco.createVariable('lon','f4')
lat_v = nco.createVariable('lat','f4')
if nz > 1:
depth_v = nco.createVariable('depth','f4',dimensions='depth')
else:
depth_v = nco.createVariable('depth','f4')
station_v = nco.createVariable('site','S1',('nchar'))
# write global attributes
g_attdict = nc.__dict__
g_attdict['Conventions'] = 'CF-1.6'
if nz>1:
g_attdict['featureType'] = 'timeSeriesProfile'
else:
g_attdict['featureType'] = 'timeSeries'
g_attdict['naming_authority'] = 'gov.usgs'
g_attdict['id'] = id
g_attdict['source'] = 'USGS'
g_attdict['institution'] = 'Woods Hole Coastal and Marine Science Center'
g_attdict['project'] = my_series.project_name.strip()
g_attdict['title'] = '%s/%s/%s' % (g_attdict['source'],g_attdict['id'],g_attdict['project'])
g_attdict['keywords']='Oceans > Ocean Pressure > Water Pressure, Oceans > Ocean Temperature > Water Temperature, Oceans > Salinity/Density > Conductivity, Oceans > Salinity/Density > Salinity'
g_attdict['keywords_vocabulary'] = 'GCMD Science Keywords'
g_attdict['standard_name_vocabulary'] = 'CF-1.6'
g_attdict['creator_email'] = 'emontgomery@usgs.gov'
g_attdict['creator_name'] = my_series.project_pi.strip()
g_attdict['publisher_name'] = 'Ellyn Montgomery'
g_attdict['summary'] = my_series.project_summary.strip()
nco.setncatts(g_attdict)
# write station variable
station_v.cf_role = 'timeseries_id'
station_v.standard_name = 'station_id'
data = numpy.empty((1,),'S'+repr(nchar))
data[0] = id
station_v[:] = netCDF4.stringtochar(data)
# write time variable
time_v.units = 'seconds since 1858-11-17 00:00:00 +0:00'
time_v.standard_name = 'time'
time_v.calendar = 'gregorian'
time_v[:] = (nc.variables['time'][:]-2400001.)*3600.*24. + nc.variables['time2'][:]/1000.
# write lon variable
lon_v.units = 'degree_east'
lon_v.standard_name = 'longitude'
lon_v[:] = nc.variables['lon'][:]
# write lat variable
lat_v.units = 'degree_north'
lat_v.standard_name = 'latitude'
lat_v[:] = nc.variables['lat'][:]
# write depth variable
depth_v.units = 'm'
depth_v.standard_name = 'depth'
depth_v.positive = 'down'
depth_v.axis = 'Z'
depth_v[:] = nc.variables['depth'][:]
# create the data variables
var_v=[]
for varname in data_vars:
ncvar = nc.variables[varname]
# if time series variable
if size(ncvar) == nt:
var = nco.createVariable(varname,ncvar.dtype,('time'))
elif size(ncvar) == nz:
var = nco.createVariable(varname,ncvar.dtype,('depth'))
else:
var = nco.createVariable(varname,ncvar.dtype,('time','depth'))
# load old variable attributes and modify if necessary
attdict = ncvar.__dict__
# if dounpackshort and 'scale_factor' in attdict: del attdict['scale_factor']
attdict['coordinates'] = 'time lon lat depth'
# assign standard_name if in dictionary
a =[k for (k, v) in d.iteritems() if attdict['long_name'].strip().lower() in v]
if len(a)==1: attdict['standard_name']=a[0]
# write variable attributes
var.setncatts(attdict)
# write the data
# print ncvar
var[:] = ncvar[:]
nco.close()
# <codecell>
for index,row in project.iterrows(): # loop through each project
idir=root_dir+index
for path, subdirs, files in os.walk(idir,topdown=True):
subdirs[:]=[]
print subdirs
for name in files:
file= os.path.join(path, name)
print file
ofile=os.path.join(odir, name)
write_dsg(file,ofile,project.ix[index])
# <codecell>
f = open('/usgs/data2/notebook/names.txt','w')
f.write("\n".join(names))
f.close()
f = open('/usgs/data2/notebook/long_names.txt','w')
f.write("\n".join(long_names))
f.close()
f = open('/usgs/data2/notebook/units.txt','w')
f.write("\n".join(units))
f.close()
# <codecell>
#root_dir='/usgs/data2/emontgomery/stellwagen/Data/'
root_dir ='/usgs/data2/emontgomery/stellwagen/Data'
odir='/usgs/data2/emontgomery/stellwagen/CF-1.6/'
os.chdir(idir)
# <codecell>
file = file.split('/')[-1]
# <codecell>
write_dsg(file,'/usgs/data2/notebook/data/test2.nc')
# <codecell>
ofile = '/usgs/data2/notebook/%s' % file
id = file.split('.')[0]
print id
# <codecell>
cd /usgs/data2/emontgomery/stellwagen/Data/ARGO_MERCHANT
# <codecell>
my_series=project.ix['ARGO_MERCHANT']
nc = netCDF4.Dataset(file)
g_attdict = nc.__dict__
g_attdict['Conventions'] = 'CF-1.6'
g_attdict['naming_authority'] = 'gov.usgs'
g_attdict['id'] = id
g_attdict['source'] = 'USGS'
g_attdict['institution'] = 'Woods Hole Coastal and Marine Science Center'
g_attdict['project'] = my_series.project_name.strip()
g_attdict['title'] = '%s/%s/%s' % (g_attdict['source'],g_attdict['id'],g_attdict['project'])
g_attdict['keywords']='Oceans > Ocean Pressure > Water Pressure, Oceans > Ocean Temperature > Water Temperature, Oceans > Salinity/Density > Conductivity, Oceans > Salinity/Density > Salinity'
g_attdict['keywords_vocabulary'] = 'GCMD Science Keywords'
g_attdict['standard_name_vocabulary'] = 'CF-1.6'
g_attdict['creator_email'] = 'emontgomery@usgs.gov'
g_attdict['creator_name'] = my_series.project_pi.strip()
g_attdict['publisher_name'] = 'Ellyn Montgomery'
g_attdict['summary'] = my_series.project_summary.strip()
# <codecell>
g_attdict
# <codecell>
for (k,v) in nc.variables.iteritems():
try:
print v.long_name
except:
pass
# <codecell>
a='FOOBAR '
# <codecell>
a.strip().lower()
# <codecell>