Example #1
0
def create_burst_average_netcdf(input_netcdf_file_path, output_dir):
    """
    generate the burst netcdf file for WQM product.
    see variable conf_file if editing of gatt and var att need to be done
    """
    input_file_rel_path = get_input_file_rel_path(input_netcdf_file_path)
    input_netcdf_obj    = Dataset(input_netcdf_file_path, 'r')

    global INSTRUMENT_SAMPLE_INTERVAL
    INSTRUMENT_SAMPLE_INTERVAL = getattr(input_netcdf_obj, 'instrument_sample_interval', 1)

    burst_vars          = create_burst_average_var(input_netcdf_obj)
    time_burst_vals     = burst_vars.values()[0]['time_mean']
    tmp_netcdf_dir      = tempfile.mkdtemp()

    output_netcdf_file_path = os.path.join(tmp_netcdf_dir, generate_netcdf_burst_filename(input_netcdf_file_path, burst_vars))
    output_netcdf_obj       = Dataset(output_netcdf_file_path, "w", format="NETCDF4")

    # read gatts from input, add them to output. Some gatts will be overwritten
    input_gatts     = input_netcdf_obj.__dict__.keys()
    gatt_to_dispose = ['author', 'file_version_quality_control', 'quality_control_set',
                       'compliance_checker_version', 'compliance_checker_last_updated',
                       'quality_control_log']

    for gatt in input_gatts:
        if gatt not in gatt_to_dispose:
            setattr(output_netcdf_obj, gatt, getattr(input_netcdf_obj, gatt))

    if 'WQM' in output_netcdf_obj.instrument:
        output_netcdf_obj.title = 'Burst-averaged biogeochemical measurements at %s' % (input_netcdf_obj.site_code)
    elif 'CTD' in output_netcdf_obj.instrument:
        output_netcdf_obj.title = 'Burst-averaged moored CTD measurements at %s' % (input_netcdf_obj.site_code)

    m = re.match('.*\.nc', input_file_rel_path)
    output_netcdf_obj.input_file   = m.group()
    output_netcdf_obj.date_created = DATE_UTC_NOW.strftime("%Y-%m-%dT%H:%M:%SZ")

    depth_burst_mean_val = burst_vars['DEPTH']['var_mean']
    if np.isnan(depth_burst_mean_val).all():
        output_netcdf_obj.geospatial_vertical_min = np.double(input_netcdf_obj['NOMINAL_DEPTH'][:])
        output_netcdf_obj.geospatial_vertical_max = np.double(input_netcdf_obj['NOMINAL_DEPTH'][:])
    else:
        output_netcdf_obj.geospatial_vertical_min = np.nanmin(depth_burst_mean_val)
        output_netcdf_obj.geospatial_vertical_max = np.nanmax(depth_burst_mean_val)

    # set up dimensions and variables
    output_netcdf_obj.createDimension("TIME", len(time_burst_vals))
    var_time = output_netcdf_obj.createVariable("TIME", input_netcdf_obj["TIME"].dtype,
                                                ("TIME",))

    dimensionless_var = list_dimensionless_var(input_netcdf_obj)
    # No FillValue for dimensions as for IMOS conventions
    for var in dimensionless_var:
        output_netcdf_obj.createVariable(var, input_netcdf_obj[var].dtype)
        output_netcdf_obj[var][:] = input_netcdf_obj[var][:]

    for var in burst_vars.keys():
        var_dtype = input_netcdf_obj[var].dtype
        fillvalue = getattr(input_netcdf_obj[var], '_FillValue', None)

        output_var_mean    = output_netcdf_obj.createVariable(var, var_dtype, ("TIME",), fill_value=fillvalue)
        output_var_min     = output_netcdf_obj.createVariable('%s_burst_min' % var, var_dtype, ("TIME",), fill_value=fillvalue)
        output_var_max     = output_netcdf_obj.createVariable('%s_burst_max' % var, var_dtype, ("TIME",), fill_value=fillvalue)
        output_var_sd      = output_netcdf_obj.createVariable('%s_burst_sd' % var, var_dtype, ("TIME",), fill_value=fillvalue)
        output_var_num_obs = output_netcdf_obj.createVariable('%s_num_obs' % var, "i4", ("TIME",))

        # set up 'bonus' var att from original FV01 file into FV02
        input_var_object   = input_netcdf_obj[var]
        input_var_list_att = input_var_object.__dict__.keys()
        var_att_disposable = ['name', 'long_name', \
                              '_FillValue', 'ancillary_variables', \
                              'ChunkSize', 'coordinates']
        for var_att in [att for att in input_var_list_att if att not in var_att_disposable]:
            setattr(output_netcdf_obj[var], var_att, getattr(input_netcdf_obj[var], var_att))
            if var_att != 'comment':
                setattr(output_var_min, var_att, getattr(input_netcdf_obj[var], var_att))
                setattr(output_var_max, var_att, getattr(input_netcdf_obj[var], var_att))
                setattr(output_var_sd, var_att, getattr(input_netcdf_obj[var], var_att))

        # make sur standard_deviation variable doesnt have a standard_name attr
        if hasattr(output_var_sd, 'standard_name'):
            delattr(output_var_sd, 'standard_name')


        setattr(output_var_mean, 'coordinates', getattr(input_netcdf_obj[var], 'coordinates', ''))
        setattr(output_var_mean, 'ancillary_variables', ('%s_num_obs %s_burst_sd %s_burst_min %s_burst_max' % (var, var, var, var)))

        setattr(output_var_mean, 'cell_methods', 'TIME: mean')
        setattr(output_var_min, 'cell_methods', 'TIME: minimum')
        setattr(output_var_max, 'cell_methods', 'TIME: maximum')
        setattr(output_var_sd, 'cell_methods', 'TIME: standard_deviation')

        setattr(output_var_sd, 'long_name', 'Standard deviation of values in burst, after rejection of flagged data')
        setattr(output_var_num_obs, 'long_name', 'Number of observations included in the averaging process')
        setattr(output_var_min, 'long_name', 'Minimum data value in burst, after rejection of flagged data')
        setattr(output_var_max, 'long_name', 'Maximum data value in burst, after rejection of flagged data')
        setattr(output_var_mean, 'long_name', 'Mean of %s values in burst, after rejection of flagged data' % (getattr(input_netcdf_obj[var], 'standard_name',
                                                                                                                       getattr(input_netcdf_obj[var], 'long_name', ''))))

        output_var_num_obs.units = "1"
        var_units = getattr(input_netcdf_obj[var], 'units')
        if var_units:
            output_var_mean.units = var_units
            output_var_min.units  = var_units
            output_var_max.units  = var_units
            output_var_sd.units   = var_units

        var_stdname = getattr(input_netcdf_obj[var], 'standard_name', '')
        if var_stdname != '':
            output_var_num_obs.standard_name = "%s number_of_observations" % var_stdname

        # set up var values
        output_var_mean[:]    = np.ma.masked_invalid(burst_vars[var]['var_mean'])
        output_var_min[:]     = np.ma.masked_invalid(burst_vars[var]['var_min'])
        output_var_max[:]     = np.ma.masked_invalid(burst_vars[var]['var_max'])
        output_var_sd[:]      = np.ma.masked_invalid(burst_vars[var]['var_sd'])
        output_var_num_obs[:] = np.ma.masked_invalid(burst_vars[var]['var_num_obs'])

    # add gatts and variable attributes as stored in config files
    conf_file_generic = os.path.join(os.path.dirname(__file__), 'generate_nc_file_att')
    generate_netcdf_att(output_netcdf_obj, conf_file_generic, conf_file_point_of_truth=True)

    # set up original varatts for the following dim, var
    varnames = dimensionless_var
    varnames.append('TIME')
    for varname in varnames:
        for varatt in input_netcdf_obj[varname].__dict__.keys():
            output_netcdf_obj.variables[varname].setncattr(varatt, getattr(input_netcdf_obj[varname], varatt))
    time_comment = '%s. Time stamp corresponds to the middle of the burst measurement.' % getattr(input_netcdf_obj['TIME'], 'comment', '')
    output_netcdf_obj.variables['TIME'].comment = time_comment.lstrip('. ')

    time_burst_val_dateobj = num2date(time_burst_vals, input_netcdf_obj['TIME'].units, input_netcdf_obj['TIME'].calendar)
    output_netcdf_obj.time_coverage_start = time_burst_val_dateobj.min().strftime('%Y-%m-%dT%H:%M:%SZ')
    output_netcdf_obj.time_coverage_end   = time_burst_val_dateobj.max().strftime('%Y-%m-%dT%H:%M:%SZ')

    # append original gatt to burst average gatt
    gatt = 'comment'
    if hasattr(input_netcdf_obj, gatt):
        setattr(output_netcdf_obj, gatt, getattr(input_netcdf_obj, gatt))

    gatt = 'history'
    setattr(output_netcdf_obj, gatt, ('%s. %s' % (getattr(input_netcdf_obj, gatt, ''), 'Created %s' % time.ctime(time.time()))).lstrip('. '))

    gatt = 'abstract'
    setattr(output_netcdf_obj, gatt, ('%s. %s' % (getattr(output_netcdf_obj, gatt, ''), \
                                                 'Data from the bursts have been cleaned and averaged to create data products. This file is one such product.')).lstrip('. '))

    # add burst keywords
    gatt           = 'keywords'
    keywords_burst = 'AVERAGED, BINNED'
    setattr(output_netcdf_obj, gatt, ('%s, %s' % (getattr(input_netcdf_obj, gatt, ''), keywords_burst)).lstrip(', '))

    # add values to variables
    output_netcdf_obj['TIME'][:] = np.ma.masked_invalid(time_burst_vals)

    github_comment = 'Product created with %s' % get_git_revision_script_url(os.path.realpath(__file__))
    output_netcdf_obj.lineage = ('%s. %s' % (getattr(output_netcdf_obj, 'lineage', ''), github_comment)).lstrip('. ')

    output_netcdf_obj.close()
    input_netcdf_obj.close()

    shutil.move(output_netcdf_file_path, output_dir)
    shutil.rmtree(tmp_netcdf_dir)
    return os.path.join(output_dir, os.path.basename(output_netcdf_file_path))