def save_data_to_file (stem_name, grid_shape, output_path, data_array, data_type, file_permissions="a") : """ save numpy data to the appropriate file name given the information about the stem and path by default this will append to the end of a file, you can instead pass in a different set of file_permissions, using the standard permissions from python's open function """ temp_file = fbf.filename(stem_name, data_type, shape=grid_shape) temp_path = os.path.join(os.path.abspath(os.path.expanduser(output_path)), temp_file) temp_file_obj = open(temp_path, file_permissions) data_array.astype(data_type).tofile(temp_file_obj) temp_file_obj.close()
def space_day(*args) : """grid one day of input files in space given an input directory that contains appropriate files, grid them in space and put the resulting gridded files for that day in the output directory. Note: the output directory will also be used for intermediary working files. """ # set up some of our input from the caller for easy access desired_variables = list(args) if len(args) > 0 else [ ] input_path = options.inputPath output_path = options.outputPath min_scan_angle = options.minScanAngle grid_degrees = float(options.gridDegrees) # determine the grid size in number of elements grid_lon_size = int(math.ceil(360.0 / grid_degrees)) grid_lat_size = int(math.ceil(180.0 / grid_degrees)) space_grid_shape = (grid_lon_size, grid_lat_size) # TODO, is this the correct order? # look through our files and figure out what variables we expect from them possible_files = os.listdir(input_path) expected_vars = { } all_vars = set() date_time_temp = None for file_name in sorted(possible_files) : expected_vars[file_name] = general_guidebook.get_variable_names (file_name, user_requested_names=desired_variables) # if this file has no variables, remove it from our files for consideration if len(expected_vars[file_name]) <= 0 : del expected_vars[file_name] possible_files.remove(file_name) # otherwise, add the variables we found to our list of all variables and try to get a time from the file else : all_vars.update(expected_vars[file_name]) date_time_temp = general_guidebook.parse_datetime_from_filename(file_name) if date_time_temp is None else date_time_temp # check to make sure our intermediate file names don't exist already for var_name in all_vars : for suffix in io_manager.ALL_EXPECTED_SUFFIXES : # TODO, pull satellite and algorithm too temp_stem = io_manager.build_name_stem(var_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=suffix) temp_name = fbf.filename(temp_stem, TEMP_DATA_TYPE, shape=(space_grid_shape)) if os.path.exists(os.path.join(output_path, temp_name)) : LOG.warn ("Cannot process files because matching temporary or output files exist in the output directory.") return # loop to deal with data from each of the files for each_file in sorted(possible_files) : full_file_path = os.path.join(input_path, each_file) LOG.debug("Processing file: " + full_file_path) # load the aux data file_object, temp_aux_data = io_manager.load_aux_data(full_file_path, min_scan_angle) # calculate the indecies for the space grid based on the aux data # (we can do this now since the lon/lat is the same for each variable in the file) day_lon_index, day_lat_index, night_lon_index, night_lat_index = space_gridding.calculate_index_from_nav_data(temp_aux_data, grid_degrees) # loop to load each variable in the file and process it for variable_name in expected_vars[each_file] : LOG.debug("Processing variable: " + variable_name) # load the variable file_object, var_data = io_manager.load_variable_from_file (variable_name, file_path=full_file_path, file_object=file_object) # split the variable by day/night day_var_data = var_data[temp_aux_data[DAY_MASK_KEY]] night_var_data = var_data[temp_aux_data[NIGHT_MASK_KEY]] # space grid the data using the indexes we calculated earlier day_space_grid, day_density_map, day_nobs, day_max_depth = space_gridding.space_grid_data (grid_lon_size, grid_lat_size, day_var_data, day_lon_index, day_lat_index) night_space_grid, night_density_map, night_nobs, night_max_depth = space_gridding.space_grid_data (grid_lon_size, grid_lat_size, night_var_data, night_lon_index, night_lat_index) # save the space grids and density info for this variable and it's density map to files # day related files io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.DAY_TEMP_SUFFIX), space_grid_shape, output_path, day_space_grid, TEMP_DATA_TYPE) io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.DAY_DENSITY_TEMP_SUFFIX), space_grid_shape, output_path, day_density_map, TEMP_DATA_TYPE) io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.DAY_NOBS_TEMP_SUFFIX), space_grid_shape, output_path, day_nobs, TEMP_DATA_TYPE) # night related files io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.NIGHT_TEMP_SUFFIX), space_grid_shape, output_path, night_space_grid, TEMP_DATA_TYPE) io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.NIGHT_DENSITY_TEMP_SUFFIX), space_grid_shape, output_path, night_density_map, TEMP_DATA_TYPE) io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.NIGHT_NOBS_TEMP_SUFFIX), space_grid_shape, output_path, night_nobs, TEMP_DATA_TYPE) # make sure each file is closed when we're done with it io_manager.close_file(full_file_path, file_object) # collapse the per variable space grids to remove excess NaNs for variable_name in all_vars : LOG.debug("Packing space data for variable: " + variable_name) # load the variable's density maps var_workspace = Workspace.Workspace(dir=output_path) day_var_density = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.DAY_DENSITY_TEMP_SUFFIX)][:] night_var_density = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.NIGHT_DENSITY_TEMP_SUFFIX)][:] # only do the day data if we have some if numpy.sum(day_var_density) > 0 : # load the sparse space grid day_var_data = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.DAY_TEMP_SUFFIX)][:] # collapse the space grid final_day_data = space_gridding.pack_space_grid(day_var_data, day_var_density) # save the final array to an appropriately named file io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.DAY_SUFFIX), space_grid_shape, output_path, final_day_data, TEMP_DATA_TYPE, file_permissions="w") # load the nobs file nobs_counts = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.DAY_NOBS_TEMP_SUFFIX)][:] # collapse the nobs nobs_final = numpy.sum(nobs_counts, axis=0) # save the final nobs array to an appropriately named file io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.DAY_NOBS_SUFFIX), space_grid_shape, output_path, nobs_final, TEMP_DATA_TYPE, file_permissions="w") else : LOG.warn("No day data was found for variable " + variable_name + ". Day files will not be written.") # only do night data if we have some if numpy.sum(night_var_density) > 0 : # load the sparse space grid night_var_data = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.NIGHT_TEMP_SUFFIX)][:] # collapse the space grid final_night_data = space_gridding.pack_space_grid(night_var_data, night_var_density) # save the final array to an appropriately named file io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.NIGHT_SUFFIX), space_grid_shape, output_path, final_night_data, TEMP_DATA_TYPE, file_permissions="w") # load the nobs file nobs_counts = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.NIGHT_NOBS_TEMP_SUFFIX)][:] # collapse the nobs nobs_final = numpy.sum(nobs_counts, axis=0) # save the final nobs array to an appropriately named file io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=io_manager.NIGHT_NOBS_SUFFIX), space_grid_shape, output_path, nobs_final, TEMP_DATA_TYPE, file_permissions="w") else : LOG.warn("No night data was found for variable " + variable_name + ". Night files will not be written.") # remove the extra temporary files in the output directory remove_suffixes = ["*" + p + "*" for p in io_manager.EXPECTED_TEMP_SUFFIXES] remove_file_patterns(output_path, remove_suffixes)
def space_griding_day(*args) : """grid one day of input files in space given an input directory that contains appropriate files, grid them in space and put the resulting gridded files for that day in the output directory. Note: the output directory will also be used for intermediary working files. """ # set up some of our input from the caller for easy access desired_variables = list(args) if len(args) > 0 else [ ] input_path = stg_util.clean_path(options.inputPath) output_path = stg_util.clean_path(options.outputPath) stg_util.setup_dir_if_needed(output_path, "output") min_scan_angle = options.minScanAngle grid_degrees = float(options.gridDegrees) do_day_night = not options.keep_day_night_together do_multi_overpass = options.allow_multiple_overpasses_per_cell temp_str = "will allow" if do_multi_overpass else "will not allow" LOG.debug("Space griding " + temp_str + " multiple overpasses per grid cell.") # determine the grid size in number of elements grid_lon_size = int(math.ceil(360.0 / grid_degrees)) grid_lat_size = int(math.ceil(180.0 / grid_degrees)) space_grid_shape = (grid_lat_size, grid_lon_size) # I've confirmed with Nadia that this is the correct order # look through our files and figure out what variables we expect from them possible_files = os.listdir(input_path) expected_vars = { } all_vars = set() date_time_temp = None expected_num_files = None satellite = None instrument = None for file_name in sorted(possible_files) : expected_vars[file_name] = general_guidebook.get_variable_names (file_name, user_requested_names=desired_variables) # if this file has no variables, remove it from our files for consideration if len(expected_vars[file_name]) <= 0 : del expected_vars[file_name] possible_files.remove(file_name) # otherwise, add the variables we found to our list of all variables and try to get a time from the file else : all_vars.update(expected_vars[file_name]) # if we don't have it yet, update some general information about this run based on the file name temp_sat, temp_inst = general_guidebook.get_satellite_from_filename(file_name) satellite = temp_sat if satellite is None else satellite instrument = temp_inst if instrument is None else instrument date_time_temp = general_guidebook.parse_datetime_from_filename(file_name) if date_time_temp is None else date_time_temp expected_num_files = general_guidebook.get_expected_files_per_day(instrument) if expected_num_files is None else expected_num_files # check to make sure our intermediate file names don't exist already expected_space_file_suffixes = io_manager.get_list_of_suffixes(DAILY_SPACE_TYPE, ALL_FILES_TYPE) for var_name in all_vars : for suffix in expected_space_file_suffixes : temp_stem = io_manager.build_name_stem(var_name, date_time=date_time_temp, satellite=satellite, suffix=suffix) temp_name = fbf.filename(temp_stem, TEMP_DATA_TYPE, shape=space_grid_shape) if os.path.exists(os.path.join(output_path, temp_name)) : LOG.warn ("Cannot process files because matching temporary or output files exist in the output directory.") return # loop to deal with data from each of the files failed_files = 0 successful_files = 0 abstract_data_sets = io_manager.get_expected_abstract_sets(instrument, separate_day_night=do_day_night) collected_data = { } for each_file in sorted(possible_files) : full_file_path = os.path.join(input_path, each_file) LOG.debug("Processing file: " + full_file_path) # load the aux data file_object, temp_aux_data = io_manager.load_aux_data(full_file_path, min_scan_angle) # figure out what data sets we need to process data_sets = io_manager.get_expected_data_sets_from_aux_data (instrument, temp_aux_data, do_separate_day_night=do_day_night) ok_file = True lon_indices = { } lat_indices = { } try : # calculate the indices for the space grid based on the navigation data # (we can do this now since the lon/lat is the same for each variable in the file) for set_key in data_sets.keys() : set_mask = data_sets[set_key][SET_MASK_KEY] temp_lon_data = data_sets[set_key][LON_KEY][set_mask] temp_lat_data = data_sets[set_key][LAT_KEY][set_mask] lat_index, lon_index = space_gridding.calculate_index_from_nav_data(temp_lat_data, temp_lon_data, grid_degrees) lat_indices[set_key] = lat_index lon_indices[set_key] = lon_index except Exception, e : LOG.warn("Unable to process basic space griding for file: " + full_file_path) LOG.warn("This file will not be processed.") exc_type, exc_value, exc_traceback = sys.exc_info() LOG.debug(traceback.format_exception(exc_type, exc_value, exc_traceback)) ok_file = False failed_files += 1 # if the file looks alright so far, continue processing it if ok_file : # loop to load each variable in the file and process it for variable_name in expected_vars[each_file] : LOG.debug("Processing variable: " + variable_name) # load the variable file_object, var_data = io_manager.load_variable_from_file (variable_name, file_path=full_file_path, file_object=file_object) # split the variable data by sets separated_data = { } separated_time = { } separated_angles = { } for set_key in data_sets.keys() : separated_data [set_key] = var_data[data_sets[set_key][SET_MASK_KEY]] separated_time [set_key] = data_sets[set_key][SCAN_LINE_TIME_KEY] [data_sets[set_key][SET_MASK_KEY]] separated_angles[set_key] = data_sets[set_key][SENSOR_ZENITH_ANGLE_KEY][data_sets[set_key][SET_MASK_KEY]] ok_file = True space_grids = { } density_maps = { } nobs = { } max_depths = { } aux_times = { } aux_angles = { } try : # space grid the data using the indexes we calculated earlier for set_key in data_sets.keys() : # note: also preserve useful aux data for this file temp_space_grid, temp_density_map, temp_nobs, temp_max_depth, temp_aux_time, temp_aux_angle = \ space_gridding.space_grid_data(grid_lat_size, grid_lon_size, separated_data[set_key], lat_indices[set_key], lon_indices[set_key], aux_time=separated_time[set_key], aux_sensor_zenith_angle=separated_angles[set_key]) space_grids [set_key] = temp_space_grid density_maps[set_key] = temp_density_map nobs [set_key] = temp_nobs max_depths [set_key] = temp_max_depth aux_times [set_key] = temp_aux_time # save the avg time for each cell aux_angles [set_key] = temp_aux_angle # save the max angle for each cell except Exception, e : LOG.warn("Unable to process variable data space griding for file: " + full_file_path) LOG.warn("This variable will not be processed.") exc_type, exc_value, exc_traceback = sys.exc_info() LOG.debug(traceback.format_exception(exc_type, exc_value, exc_traceback)) ok_file = False failed_files += 1 # if the data in the file looks ok so far, save it to the output if ok_file : #print("space grid shape: " + str(space_grids[set_key].shape)) # save the space grids and density info for this variable and it's density map to files for set_key in data_sets.keys() : if do_multi_overpass : # save temporary data to accumulate it as we go through all the files for a day # save the gridded data io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp, satellite=satellite, suffix=set_key + "-" + TEMP_SUFFIX_KEY), space_grid_shape, output_path, space_grids[set_key], TEMP_DATA_TYPE) # save the grid density map io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp, satellite=satellite, suffix=set_key + "-" + DENSITY_SUFFIX + "-" + TEMP_SUFFIX_KEY), space_grid_shape, output_path, density_maps[set_key], TEMP_DATA_TYPE) # save the number of observations grid io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp, satellite=satellite, suffix=set_key + "-" + NOBS_SUFFIX + "-" + TEMP_SUFFIX_KEY), space_grid_shape, output_path, nobs[set_key], TEMP_DATA_TYPE) else : # if we haven't processed this variable yet, add a dictionary for it if variable_name not in collected_data : collected_data[variable_name] = { } # if we have no measurements, expand the array to depth 1 to make numpy happy space_grids[set_key] = _expand_array_if_needed(space_grids[set_key], 1) # if there isn't any data for this set in our collection, just put what we have in to start with if set_key not in collected_data[variable_name] : collected_data[variable_name][set_key] = { } current_set = collected_data[variable_name][set_key] # save the the 2D arrays current_set["density"] = density_maps[set_key] current_set["nobs"] = nobs[set_key] current_set["times"] = aux_times[set_key] current_set["angles"] = aux_angles[set_key] # save the space gridded data (the 3D array) new_depth = int(space_grids[set_key].shape[0] * ARRAY_GROWTH_FACTOR) # expand the arrays a little extra current_set["space-gridded-data"] = _expand_array_if_needed(space_grids[set_key], new_depth) else : # when we already have data for this set key, incorporate the new overpass appropriately # there are several possible cases: # we have no data in that cell of the grid <- use data from the new file # we have data in that cell, and it's the same orbit <- add the new data to the end of the old data # we have data in that cell, it's a diff orbit <- either replace the data in that cell or ignore the new data # (whether you replace or ignore depends on whether the new or old data has the worst sensor zenith angle) # for convenience current_set = collected_data[variable_name][set_key] # pre-calculate where there is any data at all in our old and new data sets have_old_data = current_set["nobs"] > 0 have_new_data = nobs[set_key] > 0 both_have_data = have_old_data & have_new_data # some other calculations to support our masking better_new_angle = aux_angles[set_key] < current_set["angles"] time_diff = numpy.abs(current_set["times"] - aux_times[set_key]) # figure out the masks that will control how we change our data # mask of the places where there is data in the new file, but we had none before have_only_new_data_mask = (~ have_old_data) & have_new_data # mask of the places where there is data in both and it's the same orbits use_both_mask = both_have_data & (time_diff <= space_gridding.SAME_TIME_RANGE_SECONDS) # mask of the places where there is data in both and it's different orbit use_only_new_data_mask = both_have_data & (time_diff > space_gridding.SAME_TIME_RANGE_SECONDS) & better_new_angle # Note: We will choose the orbit with the smallest maximum observed sensor zenith angle # in the grid cell – especially necessary at high latitudes use_new = have_only_new_data_mask | use_only_new_data_mask # expand the arrays if needed o_depth = current_set["space-gridded-data"].shape[0] # the depth of the old array n_depth = space_grids[set_key].shape[0] # the depth of the new array c_depth = numpy.max(current_set["density"][use_both_mask] + density_maps[set_key][use_both_mask]) if numpy.any(use_both_mask) else 0 # the combined depth new_depth = o_depth if o_depth >= n_depth else int(n_depth * ARRAY_GROWTH_FACTOR) new_depth = new_depth if new_depth >= c_depth else int(c_depth * ARRAY_GROWTH_FACTOR) new_space = _expand_array_if_needed(space_grids[set_key], new_depth) current_set["space-gridded-data"] = _expand_array_if_needed(current_set["space-gridded-data"], new_depth) # replace any data where we are going to use just the new set current_set["space-gridded-data"][:, use_new] = new_space[:, use_new] current_set["times"] [use_new] = aux_times[set_key][use_new] current_set["angles"] [use_new] = aux_angles[set_key][use_new] current_set["density"] [use_new] = density_maps[set_key][use_new] current_set["nobs"] [use_new] = nobs[set_key][use_new] # combine the data where we want to use both sets TODO, how can I do this in a more numpy and python friendly way? temp_shape = current_set["space-gridded-data"].shape for lat in range(temp_shape[1]) : for lon in range(temp_shape[2]) : if use_both_mask[lat, lon] : prev_num = current_set["density"][lat, lon] num_adding = density_maps[set_key] [lat, lon] new_total = prev_num + num_adding current_set["space-gridded-data"][prev_num:new_total, lat, lon] = new_space[:num_adding, lat, lon] # build a combined average of the times current_set["times"] [use_both_mask] = ((aux_times[set_key][use_both_mask] * nobs[set_key][use_both_mask]) + \ (current_set["times"][use_both_mask] * current_set["nobs"][use_both_mask])) \ / (nobs[set_key][use_both_mask] + current_set["nobs"][use_both_mask]) temp_new_angles = aux_angles[set_key][use_both_mask] temp_old_angles = current_set["angles"][use_both_mask] current_set["angles"] [use_both_mask] = numpy.where(temp_new_angles > temp_old_angles, temp_new_angles, temp_old_angles) # select the largest angle from the two sets current_set["density"][use_both_mask] += density_maps[set_key][use_both_mask] current_set["nobs"] [use_both_mask] += nobs[set_key][use_both_mask] # if we got to here we processed the file successfully successful_files += 1