def main(argv): try: workDir = sys.argv[1] except Exception: log.exception("Invalid argument. Must be working Directory.") sys.exit(os.EX_USAGE) try: log.info("Executing WMO header formatting...") #Get tailored AWIPS NC4 file name nc4File = readPSF(workDir) #Get PCF parameters gzipFlag = readPCF(workDir, "Compression_Flag") jobStart = readPCF(workDir, "job_coverage_start") #Compress the file if gzipFlag and re.match("On", gzipFlag, re.I): gzFile = nc4File + '.gz' log.debug("Compressing file, " + gzFile + " , using gzip") gzipFile(workDir + '/' + nc4File) else: gzFile = nc4File #Find the WMO header string from file name idx = string.index(gzFile, "KNES") wmoHeader = string.join(string.split(gzFile[idx - 7:idx + 4], "_")) day = jobStart[6:8] hour = jobStart[8:10] min = jobStart[10:12] wmoHeader += " " + day + hour + min log.info("FOUND WMO header: " + wmoHeader) #Open and read in binary file, write wmo header to new file and wrote # contents of file wmoFile = gzFile + '.wmo' log.info("Writing WMO header file: " + wmoFile) with open(workDir + '/' + gzFile, 'rb') as old, open(workDir + '/' + wmoFile, 'wb') as new: new.write(wmoHeader + "\r\r\n") for chunk in iter(lambda: old.read(65536), b""): new.write(chunk) #Write new file name to PSF writePSF(workDir, wmoFile) log.info("WMO header formatting successful. Exiting.") except Exception: log.exception("Writing WMO header failed. Exiting.") sys.exit(os.EX_SOFTWARE)
def main(argv): try: workDir = argv[0] except Exception: log.exception("Invalid argument. Must be working Directory.") sys.exit(os.EX_USAGE) ops_dir = readPCF(workDir, "ops_dir") nc4_filename = readPSF(workDir) # Clear the PSF (prevents NDE from trying to ingest the .nc file if this job fails or succeeds but does not produce any images. writePSF(workDir, "", overwrite='yes', close=False) # Read data and get pyresample data geometry definitions. vars, proj_geom_def, grid_info, ll_x, ur_y, data_geom_def = getNC4VariablesAndGeometryDefinitions(workDir, ops_dir, nc4_filename) # Get desired layers and corresponding output filenames layer_names, output_filenames = getOutputLayersAndFilenames(workDir, nc4_filename) # Make the images imagesMade = regridAllImages(workDir, ops_dir, vars, proj_geom_def, grid_info, ll_x, ur_y, data_geom_def, layer_names, output_filenames, overwrite_psf='no') # Close PSF and finish writePSF(workDir, "", overwrite='no', close=True) log.info("Finished successfully.")
def getNC4VariablesAndGeometryDefinitions(workDir, ops_dir, nc4File): """ Reads the netCDF-4 file named in the PSF file and computes the geometry definitions for both the data and the desired grid. Returns a tuple with the following six elements. 0. vars: A hash containing the data read from the netCDF-4 file. 1. proj_geom_def: The geometry definition for the target grid. 2. grid_info: a dictionary - see util.py::parseProjString for details 3-4. ll_x, ur_y: The lower left X coordinate and upper right Y coordinate of the projection geometry definition. (I.e.: the upper left corner). 5. data_geom_def: The geometry definition for the data. """ # read the intermediate netCDF4 file and obtain the latitude/longitude data log.info("Reading NC4 file.") vars, meta = readNC4(nc4File) for variable_name in vars.iterkeys(): if re.match('lat', variable_name, re.I): lat = vars[variable_name]["data"] elif re.match('lon', variable_name, re.I): lon = vars[variable_name]["data"] checkInputValidity(workDir, meta) # Obtain the data's geometry definition for pyresample. log.info("Obtaining input data geometry definition.") data_geom_def = geometry.SwathDefinition(lons=lon, lats=lat) # Read in the proj4 string and obtain the grid's geometry definition for pyresample log.info("Obtaining target grid geometry definition.") proj4_string = readPCF(workDir, "grid") proj_geom_def, grid_info, proj4_dict, p, ll_x, ll_y, ur_x, ur_y = getProj4GeometryDefinition(proj4_string, ops_dir, data_geom_def) return vars, proj_geom_def, grid_info, ll_x, ur_y, data_geom_def
def getOutputLayersAndFilenames(workDir, nc4_intermediate_filename): """ Returns a tuple containing two elements: 0. layer_names: An array of strings containing the layers that are requested in the PCF 1. output_filenames: A dictionary mapping (layer_name -> output_filename) """ # Determine which layer(s) of the netCDF file to output as images, and the corresponding filenames. log.info("Determining output layer(s) and filename(s).") layer_names = readPCF(workDir, "layer_name").split(',') output_filename_prefixes = readPCF(workDir, "output_filename_prefix").split(',') if len(layer_names) != len(output_filename_prefixes): log.exception("Invalid arguments supplied to PCF. The number of layer_name(s) and output_filename_prefix(es) specified are not the same.") sys.exit(os.EX_USAGE) output_filenames = {} VERSION_NO = "v1r0" h5_input_filename = readPCF(workDir, "input1") match = re.search(".*_d\d{8}_t\d{7}_e\d{7}_b(\d*)_c.*\.h5", h5_input_filename) if match is not None: orbit_id = match.group(1) else: log.exception("Failed to determine output filename(s); couldn't parse IDPS .h5 filename. Exiting with failure.") sys.exit(os.EX_SOFTWARE) match = re.search(".*_(.*)_s(\d{15})_e(\d{15})_c(\d{15})\.nc", nc4_intermediate_filename) if match is not None: platform_name = match.group(1) start_time = match.group(2) end_time = match.group(3) creation_time = match.group(4) else: log.exception("Failed to determine output filename(s); couldn't parse intermediate .nc filename. Exiting with failure.") sys.exit(os.EX_SOFTWARE) for i in xrange(0, len(layer_names)): output_filenames[layer_names[i]] = output_filename_prefixes[i] + "-b" + orbit_id + "_" + VERSION_NO + "_" + platform_name + "_s" + start_time + "_e" + end_time + "_c" + creation_time + ".tif" return layer_names, output_filenames
def main(argv): try: workDir = sys.argv[1] except Exception: log.exception("Invalid argument. Must be working directory.") sys.exit(os.EX_USAGE) try: log.info("Executing GDS formatting for GHRSST tailored products...") #Open tailored NC4 GHRSST file nc4File = readPSF(workDir) nc4f = h5py.File(nc4File, 'r+') log.info("Formatting: " + nc4File + " to GHRSST Data Specification") #Open original GHRSST file origFile = readPCF(workDir, "input1") origf = h5py.File(origFile, 'r') #Read in attributes to be updated log.info("Reading GDS attributes from GHRSST file: " + origFile) creationDate = nc4f.attrs.get("date_created") creationDate = re.sub('[-:]', '', creationDate) startTime = origf.attrs.get("start_time") stopTime = origf.attrs.get("stop_time") nlat = origf.attrs.get("northernmost_latitude") slat = origf.attrs.get("southernmost_latitude") elon = origf.attrs.get("easternmost_longitude") wlon = origf.attrs.get("westernmost_longitude") origf.close() log.debug("Reading GHRSST attributes: ") log.debug("\tstart_time: " + startTime) log.debug("\tstop_time: " + stopTime) log.debug("\tnorthernmost_latitude: " + nlat) log.debug("\tsouthernmost_latitude: " + slat) log.debug("\teasternmost_longitude: " + elon) log.debug("\twesternmost_longitude: " + wlon) log.debug("\tdate_created: " + creationDate) #Write attributes to nc4 tailored file log.info("Writing GDS attributes") nc4f.attrs.modify("date_created", creationDate) nc4f.attrs.create("start_time", startTime) nc4f.attrs.create("stop_time", stopTime) nc4f.attrs.create("northernmost_latitude", nlat, dtype='float32') nc4f.attrs.create("southernmost_latitude", slat, dtype='float32') nc4f.attrs.create("easternmost_longitude", elon, dtype='float32') nc4f.attrs.create("westernmost_longitude", wlon, dtype='float32') #Change array attribute name "missing_value" to "_FillValue" log.info("Changing all missing_value array attribute to _FillValue") grp = nc4f.iteritems() for items in grp: missingValue = nc4f[items[0]].attrs.get("missing_value") if missingValue: arrayType = nc4f[items[0]].dtype.name nc4f[items[0]].attrs.__delitem__("missing_value") nc4f[items[0]].attrs.create("_FillValue", missingValue, dtype=arrayType) #Delete NDE global attributes (N/A) - don't want to confuse anyone log.info("Deleting any NDE specific global attributes") nc4f.attrs.__delitem__("nc4_compression_flag") nc4f.attrs.__delitem__("compression_level") nc4f.attrs.__delitem__("Metadata_Link") nc4f.close() log.info("GHRSST formatting successful. Exiting.") except Exception: log.exception( "Updating GHRSST tailored NC4 to GDS specification failed. Exiting." ) sys.exit(os.EX_SOFTWARE)
def regridAllImages(workDir, ops_dir, vars, proj_geom_def, grid_info, ul_x, ul_y, data_geom_def, layer_names, output_filenames, overwrite_psf='yes', scale_data_types=None, scale_data_ranges=None): """ Uses pyresample to regrid all of the images. Each image that is created is written to the PSF. If one or more images cannot be created, they will be skipped, but the other images will be created if possible. Parameters: workDir: vars: proj_geom_def: grid_info: ul_x: ul_y: data_geom_def: layer_names: The names of the layers (in the vars dictionary) to convert into geoTIFF format. output_filenames: A dictionary, mapping (layer_name -> output_filename). overwrite_psf: A If not specified, defaults to yes. scale_data_types: A dictionary mapping (layer_name -> data_type). scale_data_ranges: A dictionary mapping (layer_name -> tuple(min_valid_value_in_layer, max_valid_value_in_layer)) Returns: True if any images were written to the PSF, False if not. """ if not (overwrite_psf == 'yes' or overwrite_psf == 'no'): log.exception("Invalid value specified for overwrite_psf: '" + str(overwrite_psf) + "'. Must be 'yes' or 'no'.") # compute the information needed to re-project the data based on the input and output geometry definitions. log.info("Calculating re-gridding based on lat/lon information.") resampleRadius = float(readPCF(workDir, "resampleRadius")) valid_input_index, valid_output_index, index_array, distance_array = kd_tree.get_neighbour_info(data_geom_def, proj_geom_def, resampleRadius, neighbours=1, reduce_data=False) # Actually reproject the images, using the information computed above. # If one image fails to be re-projected, this script will return failure, but will still attempt to convert the others if possible. gtCitationGeoKey = readPCF(workDir, "GTCitationGeoKey") geogCitationGeoKey = readPCF(workDir, "GeogCitationGeoKey") last_failure_status = os.EX_OK for layer in layer_names: if not layer in vars: log.warning("The layer '" + layer + "' was not found in the NC4 file. Skipping.") continue output_filename = output_filenames[layer] original_data = vars[layer]["data"] fill_value = processFillValues(vars, layer, original_data) if numpy.sum(original_data == fill_value) == (original_data.shape[0] * original_data.shape[1]): log.info("The input layer '" + layer + "' is all fill values. Skipping.") continue log.info("Regridding layer: '" + layer + "'") resampled_data = kd_tree.get_sample_from_neighbour_info('nn', proj_geom_def.shape, original_data, valid_input_index, valid_output_index, index_array, fill_value=fill_value) if numpy.sum(resampled_data == fill_value) == (resampled_data.shape[0] * resampled_data.shape[1]): log.warning("Output file: '" + output_filename + "' was not produced. The result of re-sampling was all fill values. The input data probably missed the grid.") continue # If requested, do rescaling of the data. if scale_data_types is not None: if scale_data_ranges is not None: resampled_data, fill_value = scaleData(resampled_data, fill_value, scale_data_types[layer], min_in=scale_data_ranges[layer][0], max_in=scale_data_ranges[layer][1]) else: resampled_data, fill_value = scaleData(resampled_data, fill_value, scale_data_types[layer]) log.info("Creating geoTIFF file: '" + output_filename + "'.") createGeoTiff(output_filename, resampled_data, grid_info['proj4_str'], [grid_info['pixel_size_x'], grid_info['pixel_size_y']], [ul_x, ul_y]) # Edit the GeoTIFF keys. editStatus = editGeoTiffKeys(output_filename, workDir, ops_dir, gtCitationGeoKey=gtCitationGeoKey, geogCitationGeoKey=geogCitationGeoKey) if editStatus != os.EX_OK: last_failure_status = editStatus else: writePSF(workDir, output_filename, overwrite=overwrite_psf, close=False) overwrite_psf = 'no' if last_failure_status != os.EX_OK: log.exception("There was an error creating one or more of the geoTIFF output products. Exiting with failure.") sys.exit(last_failure_status) return (overwrite_psf == 'no')