def create_burst_average_netcdf(input_netcdf_file_path, output_dir): """ generate the burst netcdf file for WQM product. see variable conf_file if editing of gatt and var att need to be done """ input_file_rel_path = get_input_file_rel_path(input_netcdf_file_path) input_netcdf_obj = Dataset(input_netcdf_file_path, 'r') global INSTRUMENT_SAMPLE_INTERVAL INSTRUMENT_SAMPLE_INTERVAL = getattr(input_netcdf_obj, 'instrument_sample_interval', 1) burst_vars = create_burst_average_var(input_netcdf_obj) time_burst_vals = burst_vars.values()[0]['time_mean'] tmp_netcdf_dir = tempfile.mkdtemp() output_netcdf_file_path = os.path.join(tmp_netcdf_dir, generate_netcdf_burst_filename(input_netcdf_file_path, burst_vars)) output_netcdf_obj = Dataset(output_netcdf_file_path, "w", format="NETCDF4") # read gatts from input, add them to output. Some gatts will be overwritten input_gatts = input_netcdf_obj.__dict__.keys() gatt_to_dispose = ['author', 'file_version_quality_control', 'quality_control_set', 'compliance_checker_version', 'compliance_checker_last_updated', 'quality_control_log'] for gatt in input_gatts: if gatt not in gatt_to_dispose: setattr(output_netcdf_obj, gatt, getattr(input_netcdf_obj, gatt)) if 'WQM' in output_netcdf_obj.instrument: output_netcdf_obj.title = 'Burst-averaged biogeochemical measurements at %s' % (input_netcdf_obj.site_code) elif 'CTD' in output_netcdf_obj.instrument: output_netcdf_obj.title = 'Burst-averaged moored CTD measurements at %s' % (input_netcdf_obj.site_code) m = re.match('.*\.nc', input_file_rel_path) output_netcdf_obj.input_file = m.group() output_netcdf_obj.date_created = DATE_UTC_NOW.strftime("%Y-%m-%dT%H:%M:%SZ") depth_burst_mean_val = burst_vars['DEPTH']['var_mean'] if np.isnan(depth_burst_mean_val).all(): output_netcdf_obj.geospatial_vertical_min = np.double(input_netcdf_obj['NOMINAL_DEPTH'][:]) output_netcdf_obj.geospatial_vertical_max = np.double(input_netcdf_obj['NOMINAL_DEPTH'][:]) else: output_netcdf_obj.geospatial_vertical_min = np.nanmin(depth_burst_mean_val) output_netcdf_obj.geospatial_vertical_max = np.nanmax(depth_burst_mean_val) # set up dimensions and variables output_netcdf_obj.createDimension("TIME", len(time_burst_vals)) var_time = output_netcdf_obj.createVariable("TIME", input_netcdf_obj["TIME"].dtype, ("TIME",)) dimensionless_var = list_dimensionless_var(input_netcdf_obj) # No FillValue for dimensions as for IMOS conventions for var in dimensionless_var: output_netcdf_obj.createVariable(var, input_netcdf_obj[var].dtype) output_netcdf_obj[var][:] = input_netcdf_obj[var][:] for var in burst_vars.keys(): var_dtype = input_netcdf_obj[var].dtype fillvalue = getattr(input_netcdf_obj[var], '_FillValue', None) output_var_mean = output_netcdf_obj.createVariable(var, var_dtype, ("TIME",), fill_value=fillvalue) output_var_min = output_netcdf_obj.createVariable('%s_burst_min' % var, var_dtype, ("TIME",), fill_value=fillvalue) output_var_max = output_netcdf_obj.createVariable('%s_burst_max' % var, var_dtype, ("TIME",), fill_value=fillvalue) output_var_sd = output_netcdf_obj.createVariable('%s_burst_sd' % var, var_dtype, ("TIME",), fill_value=fillvalue) output_var_num_obs = output_netcdf_obj.createVariable('%s_num_obs' % var, "i4", ("TIME",)) # set up 'bonus' var att from original FV01 file into FV02 input_var_object = input_netcdf_obj[var] input_var_list_att = input_var_object.__dict__.keys() var_att_disposable = ['name', 'long_name', \ '_FillValue', 'ancillary_variables', \ 'ChunkSize', 'coordinates'] for var_att in [att for att in input_var_list_att if att not in var_att_disposable]: setattr(output_netcdf_obj[var], var_att, getattr(input_netcdf_obj[var], var_att)) if var_att != 'comment': setattr(output_var_min, var_att, getattr(input_netcdf_obj[var], var_att)) setattr(output_var_max, var_att, getattr(input_netcdf_obj[var], var_att)) setattr(output_var_sd, var_att, getattr(input_netcdf_obj[var], var_att)) # make sur standard_deviation variable doesnt have a standard_name attr if hasattr(output_var_sd, 'standard_name'): delattr(output_var_sd, 'standard_name') setattr(output_var_mean, 'coordinates', getattr(input_netcdf_obj[var], 'coordinates', '')) setattr(output_var_mean, 'ancillary_variables', ('%s_num_obs %s_burst_sd %s_burst_min %s_burst_max' % (var, var, var, var))) setattr(output_var_mean, 'cell_methods', 'TIME: mean') setattr(output_var_min, 'cell_methods', 'TIME: minimum') setattr(output_var_max, 'cell_methods', 'TIME: maximum') setattr(output_var_sd, 'cell_methods', 'TIME: standard_deviation') setattr(output_var_sd, 'long_name', 'Standard deviation of values in burst, after rejection of flagged data') setattr(output_var_num_obs, 'long_name', 'Number of observations included in the averaging process') setattr(output_var_min, 'long_name', 'Minimum data value in burst, after rejection of flagged data') setattr(output_var_max, 'long_name', 'Maximum data value in burst, after rejection of flagged data') setattr(output_var_mean, 'long_name', 'Mean of %s values in burst, after rejection of flagged data' % (getattr(input_netcdf_obj[var], 'standard_name', getattr(input_netcdf_obj[var], 'long_name', '')))) output_var_num_obs.units = "1" var_units = getattr(input_netcdf_obj[var], 'units') if var_units: output_var_mean.units = var_units output_var_min.units = var_units output_var_max.units = var_units output_var_sd.units = var_units var_stdname = getattr(input_netcdf_obj[var], 'standard_name', '') if var_stdname != '': output_var_num_obs.standard_name = "%s number_of_observations" % var_stdname # set up var values output_var_mean[:] = np.ma.masked_invalid(burst_vars[var]['var_mean']) output_var_min[:] = np.ma.masked_invalid(burst_vars[var]['var_min']) output_var_max[:] = np.ma.masked_invalid(burst_vars[var]['var_max']) output_var_sd[:] = np.ma.masked_invalid(burst_vars[var]['var_sd']) output_var_num_obs[:] = np.ma.masked_invalid(burst_vars[var]['var_num_obs']) # add gatts and variable attributes as stored in config files conf_file_generic = os.path.join(os.path.dirname(__file__), 'generate_nc_file_att') generate_netcdf_att(output_netcdf_obj, conf_file_generic, conf_file_point_of_truth=True) # set up original varatts for the following dim, var varnames = dimensionless_var varnames.append('TIME') for varname in varnames: for varatt in input_netcdf_obj[varname].__dict__.keys(): output_netcdf_obj.variables[varname].setncattr(varatt, getattr(input_netcdf_obj[varname], varatt)) time_comment = '%s. Time stamp corresponds to the middle of the burst measurement.' % getattr(input_netcdf_obj['TIME'], 'comment', '') output_netcdf_obj.variables['TIME'].comment = time_comment.lstrip('. ') time_burst_val_dateobj = num2date(time_burst_vals, input_netcdf_obj['TIME'].units, input_netcdf_obj['TIME'].calendar) output_netcdf_obj.time_coverage_start = time_burst_val_dateobj.min().strftime('%Y-%m-%dT%H:%M:%SZ') output_netcdf_obj.time_coverage_end = time_burst_val_dateobj.max().strftime('%Y-%m-%dT%H:%M:%SZ') # append original gatt to burst average gatt gatt = 'comment' if hasattr(input_netcdf_obj, gatt): setattr(output_netcdf_obj, gatt, getattr(input_netcdf_obj, gatt)) gatt = 'history' setattr(output_netcdf_obj, gatt, ('%s. %s' % (getattr(input_netcdf_obj, gatt, ''), 'Created %s' % time.ctime(time.time()))).lstrip('. ')) gatt = 'abstract' setattr(output_netcdf_obj, gatt, ('%s. %s' % (getattr(output_netcdf_obj, gatt, ''), \ 'Data from the bursts have been cleaned and averaged to create data products. This file is one such product.')).lstrip('. ')) # add burst keywords gatt = 'keywords' keywords_burst = 'AVERAGED, BINNED' setattr(output_netcdf_obj, gatt, ('%s, %s' % (getattr(input_netcdf_obj, gatt, ''), keywords_burst)).lstrip(', ')) # add values to variables output_netcdf_obj['TIME'][:] = np.ma.masked_invalid(time_burst_vals) github_comment = 'Product created with %s' % get_git_revision_script_url(os.path.realpath(__file__)) output_netcdf_obj.lineage = ('%s. %s' % (getattr(output_netcdf_obj, 'lineage', ''), github_comment)).lstrip('. ') output_netcdf_obj.close() input_netcdf_obj.close() shutil.move(output_netcdf_file_path, output_dir) shutil.rmtree(tmp_netcdf_dir) return os.path.join(output_dir, os.path.basename(output_netcdf_file_path))