Example #1
def save_data_to_file (stem_name, grid_shape, output_path, data_array, data_type, file_permissions="a") :
    save numpy data to the appropriate file name given the information about the stem and path
    by default this will append to the end of a file, you can instead pass in a different set of
    file_permissions, using the standard permissions from python's open function
    temp_file = fbf.filename(stem_name, data_type, shape=grid_shape)
    temp_path = os.path.join(os.path.abspath(os.path.expanduser(output_path)), temp_file)
    temp_file_obj = open(temp_path, file_permissions)
Example #2
 def space_day(*args) :
     """grid one day of input files in space
     given an input directory that contains appropriate files,
     grid them in space and put the resulting gridded files
     for that day in the output directory.
     Note: the output directory will also be used for intermediary working
     # set up some of our input from the caller for easy access
     desired_variables = list(args) if len(args) > 0 else [ ]
     input_path        = options.inputPath
     output_path       = options.outputPath
     min_scan_angle    = options.minScanAngle
     grid_degrees      = float(options.gridDegrees)
     # determine the grid size in number of elements
     grid_lon_size    = int(math.ceil(360.0 / grid_degrees))
     grid_lat_size    = int(math.ceil(180.0 / grid_degrees))
     space_grid_shape = (grid_lon_size, grid_lat_size) # TODO, is this the correct order?
     # look through our files and figure out what variables we expect from them
     possible_files    = os.listdir(input_path)
     expected_vars     = { }
     all_vars          = set()
     date_time_temp    = None
     for file_name in sorted(possible_files) :
         expected_vars[file_name] = general_guidebook.get_variable_names (file_name, user_requested_names=desired_variables)
         # if this file has no variables, remove it from our files for consideration
         if len(expected_vars[file_name]) <= 0 :
             del expected_vars[file_name]
         # otherwise, add the variables we found to our list of all variables and try to get a time from the file
         else :
             date_time_temp = general_guidebook.parse_datetime_from_filename(file_name) if date_time_temp is None else date_time_temp
     # check to make sure our intermediate file names don't exist already
     for var_name in all_vars :
         for suffix in io_manager.ALL_EXPECTED_SUFFIXES :
             # TODO, pull satellite and algorithm too
             temp_stem = io_manager.build_name_stem(var_name, date_time=date_time_temp, satellite=None, algorithm=None, suffix=suffix)
             temp_name = fbf.filename(temp_stem, TEMP_DATA_TYPE, shape=(space_grid_shape))
             if os.path.exists(os.path.join(output_path, temp_name)) :
                 LOG.warn ("Cannot process files because matching temporary or output files exist in the output directory.")
     # loop to deal with data from each of the files
     for each_file in sorted(possible_files) :
         full_file_path = os.path.join(input_path, each_file)
         LOG.debug("Processing file: " + full_file_path)
         # load the aux data
         file_object, temp_aux_data = io_manager.load_aux_data(full_file_path,
         # calculate the indecies for the space grid based on the aux data
         # (we can do this now since the lon/lat is the same for each variable in the file)
         day_lon_index, day_lat_index, night_lon_index, night_lat_index = space_gridding.calculate_index_from_nav_data(temp_aux_data,
         # loop to load each variable in the file and process it
         for variable_name in expected_vars[each_file] :
             LOG.debug("Processing variable: " + variable_name)
             # load the variable
             file_object, var_data = io_manager.load_variable_from_file (variable_name,
             # split the variable by day/night
             day_var_data   = var_data[temp_aux_data[DAY_MASK_KEY]]
             night_var_data = var_data[temp_aux_data[NIGHT_MASK_KEY]]
             # space grid the data using the indexes we calculated earlier
             day_space_grid,   day_density_map,   day_nobs,   day_max_depth   = space_gridding.space_grid_data (grid_lon_size, grid_lat_size,
                                                                                                                day_lon_index, day_lat_index)
             night_space_grid, night_density_map, night_nobs, night_max_depth = space_gridding.space_grid_data (grid_lon_size, grid_lat_size,
                                                                                                                night_lon_index, night_lat_index)
             # save the space grids and density info for this variable and it's density map to files
             # day related files
             io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp,
                                                                      satellite=None, algorithm=None,
                                          space_grid_shape, output_path, day_space_grid, TEMP_DATA_TYPE)
             io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp,
                                                                      satellite=None, algorithm=None,
                                          space_grid_shape, output_path, day_density_map, TEMP_DATA_TYPE)
             io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp,
                                                                      satellite=None, algorithm=None,
                                          space_grid_shape, output_path, day_nobs, TEMP_DATA_TYPE)
             # night related files
             io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp,
                                                                      satellite=None, algorithm=None,
                                          space_grid_shape, output_path, night_space_grid, TEMP_DATA_TYPE)
             io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp,
                                                                      satellite=None, algorithm=None,
                                          space_grid_shape, output_path, night_density_map, TEMP_DATA_TYPE)
             io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp,
                                                                      satellite=None, algorithm=None,
                                          space_grid_shape, output_path, night_nobs, TEMP_DATA_TYPE)
         # make sure each file is closed when we're done with it
         io_manager.close_file(full_file_path, file_object)
     # collapse the per variable space grids to remove excess NaNs
     for variable_name in all_vars :
         LOG.debug("Packing space data for variable: " + variable_name)
         # load the variable's density maps
         var_workspace     = Workspace.Workspace(dir=output_path)
         day_var_density   = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
                                                                      satellite=None, algorithm=None,
         night_var_density = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
                                                                      satellite=None, algorithm=None,
         # only do the day data if we have some
         if numpy.sum(day_var_density) > 0 :
             # load the sparse space grid
             day_var_data      = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
                                                                          satellite=None, algorithm=None,
             # collapse the space grid
             final_day_data    = space_gridding.pack_space_grid(day_var_data,   day_var_density)
             # save the final array to an appropriately named file
             io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
                                                                     satellite=None, algorithm=None,
                                          space_grid_shape, output_path, final_day_data,
                                          TEMP_DATA_TYPE, file_permissions="w")
             # load the nobs file
             nobs_counts       = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
                                                                          satellite=None, algorithm=None,
             # collapse the nobs
             nobs_final        = numpy.sum(nobs_counts, axis=0)
             # save the final nobs array to an appropriately named file
             io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
                                                                     satellite=None, algorithm=None,
                                          space_grid_shape, output_path,
                                          nobs_final, TEMP_DATA_TYPE, file_permissions="w")
         else :
             LOG.warn("No day data was found for variable " + variable_name + ". Day files will not be written.")
         # only do night data if we have some
         if numpy.sum(night_var_density) > 0 :
             # load the sparse space grid
             night_var_data      = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
                                                                          satellite=None, algorithm=None,
             # collapse the space grid
             final_night_data    = space_gridding.pack_space_grid(night_var_data,   night_var_density)
             # save the final array to an appropriately named file
             io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
                                                                     satellite=None, algorithm=None,
                                          space_grid_shape, output_path, final_night_data,
                                          TEMP_DATA_TYPE, file_permissions="w")
             # load the nobs file
             nobs_counts       = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
                                                                          satellite=None, algorithm=None,
             # collapse the nobs
             nobs_final        = numpy.sum(nobs_counts, axis=0)
             # save the final nobs array to an appropriately named file
             io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
                                                                     satellite=None, algorithm=None,
                                          space_grid_shape, output_path,
                                          nobs_final, TEMP_DATA_TYPE, file_permissions="w")
         else :
             LOG.warn("No night data was found for variable " + variable_name + ". Night files will not be written.")
     # remove the extra temporary files in the output directory
     remove_suffixes = ["*" + p + "*" for p in io_manager.EXPECTED_TEMP_SUFFIXES]
     remove_file_patterns(output_path, remove_suffixes)
Example #3
    def space_griding_day(*args) :
        """grid one day of input files in space
        given an input directory that contains appropriate files,
        grid them in space and put the resulting gridded files
        for that day in the output directory.
        Note: the output directory will also be used for intermediary working
        # set up some of our input from the caller for easy access
        desired_variables  = list(args) if len(args) > 0 else [ ]
        input_path         = stg_util.clean_path(options.inputPath)
        output_path        = stg_util.clean_path(options.outputPath)
        stg_util.setup_dir_if_needed(output_path, "output")
        min_scan_angle     = options.minScanAngle
        grid_degrees       = float(options.gridDegrees)
        do_day_night       = not options.keep_day_night_together
        do_multi_overpass  = options.allow_multiple_overpasses_per_cell

        temp_str = "will allow" if do_multi_overpass else "will not allow"
        LOG.debug("Space griding " + temp_str + " multiple overpasses per grid cell.")

        # determine the grid size in number of elements
        grid_lon_size      = int(math.ceil(360.0 / grid_degrees))
        grid_lat_size      = int(math.ceil(180.0 / grid_degrees))
        space_grid_shape   = (grid_lat_size, grid_lon_size) # I've confirmed with Nadia that this is the correct order
        # look through our files and figure out what variables we expect from them
        possible_files     = os.listdir(input_path)
        expected_vars      = { }
        all_vars           = set()
        date_time_temp     = None
        expected_num_files = None
        satellite          = None
        instrument         = None
        for file_name in sorted(possible_files) :
            expected_vars[file_name] = general_guidebook.get_variable_names (file_name, user_requested_names=desired_variables)

            # if this file has no variables, remove it from our files for consideration
            if len(expected_vars[file_name]) <= 0 :
                del expected_vars[file_name]

            # otherwise, add the variables we found to our list of all variables and try to get a time from the file
            else :
                # if we don't have it yet, update some general information about this run based on the file name
                temp_sat, temp_inst   = general_guidebook.get_satellite_from_filename(file_name)
                satellite             = temp_sat  if satellite  is None else satellite
                instrument            = temp_inst if instrument is None else instrument
                date_time_temp        = general_guidebook.parse_datetime_from_filename(file_name) if date_time_temp     is None else date_time_temp
                expected_num_files    = general_guidebook.get_expected_files_per_day(instrument)  if expected_num_files is None else expected_num_files
        # check to make sure our intermediate file names don't exist already
        expected_space_file_suffixes = io_manager.get_list_of_suffixes(DAILY_SPACE_TYPE, ALL_FILES_TYPE)
        for var_name in all_vars :

            for suffix in expected_space_file_suffixes :
                temp_stem = io_manager.build_name_stem(var_name, date_time=date_time_temp, satellite=satellite, suffix=suffix)
                temp_name = fbf.filename(temp_stem, TEMP_DATA_TYPE, shape=space_grid_shape)
                if os.path.exists(os.path.join(output_path, temp_name)) :
                    LOG.warn ("Cannot process files because matching temporary or output files exist in the output directory.")

        # loop to deal with data from each of the files
        failed_files       = 0
        successful_files   = 0
        abstract_data_sets = io_manager.get_expected_abstract_sets(instrument, separate_day_night=do_day_night)
        collected_data     = { }
        for each_file in sorted(possible_files) :

            full_file_path = os.path.join(input_path, each_file)

            LOG.debug("Processing file: " + full_file_path)

            # load the aux data
            file_object, temp_aux_data = io_manager.load_aux_data(full_file_path, min_scan_angle)
            # figure out what data sets we need to process
            data_sets = io_manager.get_expected_data_sets_from_aux_data (instrument, temp_aux_data, do_separate_day_night=do_day_night)

            ok_file     = True
            lon_indices = { }
            lat_indices = { }
            try :

                # calculate the indices for the space grid based on the navigation data
                # (we can do this now since the lon/lat is the same for each variable in the file)
                for set_key in data_sets.keys() :

                    set_mask      = data_sets[set_key][SET_MASK_KEY]
                    temp_lon_data = data_sets[set_key][LON_KEY][set_mask]
                    temp_lat_data = data_sets[set_key][LAT_KEY][set_mask]

                    lat_index, lon_index = space_gridding.calculate_index_from_nav_data(temp_lat_data, temp_lon_data, grid_degrees)
                    lat_indices[set_key] = lat_index
                    lon_indices[set_key] = lon_index

            except Exception, e :

                LOG.warn("Unable to process basic space griding for file: " + full_file_path)
                LOG.warn("This file will not be processed.")

                exc_type, exc_value, exc_traceback = sys.exc_info()
                LOG.debug(traceback.format_exception(exc_type, exc_value, exc_traceback))

                ok_file       = False
                failed_files += 1

            # if the file looks alright so far, continue processing it
            if ok_file :

                # loop to load each variable in the file and process it
                for variable_name in expected_vars[each_file] :

                    LOG.debug("Processing variable: " + variable_name)

                    # load the variable
                    file_object, var_data = io_manager.load_variable_from_file (variable_name,

                    # split the variable data by sets
                    separated_data   = { }
                    separated_time   = { }
                    separated_angles = { }
                    for set_key in data_sets.keys() :

                        separated_data  [set_key] =                                    var_data[data_sets[set_key][SET_MASK_KEY]]
                        separated_time  [set_key] = data_sets[set_key][SCAN_LINE_TIME_KEY]     [data_sets[set_key][SET_MASK_KEY]]
                        separated_angles[set_key] = data_sets[set_key][SENSOR_ZENITH_ANGLE_KEY][data_sets[set_key][SET_MASK_KEY]]

                    ok_file = True
                    space_grids  = { }
                    density_maps = { }
                    nobs         = { }
                    max_depths   = { }
                    aux_times    = { }
                    aux_angles   = { }
                    try :

                        # space grid the data using the indexes we calculated earlier
                        for set_key in data_sets.keys() :

                            # note: also preserve useful aux data for this file
                            temp_space_grid, temp_density_map, temp_nobs, temp_max_depth, temp_aux_time, temp_aux_angle = \
                            space_grids [set_key] = temp_space_grid
                            density_maps[set_key] = temp_density_map
                            nobs        [set_key] = temp_nobs
                            max_depths  [set_key] = temp_max_depth
                            aux_times   [set_key] = temp_aux_time  # save the avg time for each cell
                            aux_angles  [set_key] = temp_aux_angle # save the max angle for each cell

                    except Exception, e :

                        LOG.warn("Unable to process variable data space griding for file: " + full_file_path)
                        LOG.warn("This variable will not be processed.")

                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        LOG.debug(traceback.format_exception(exc_type, exc_value, exc_traceback))

                        ok_file       = False
                        failed_files += 1

                    # if the data in the file looks ok so far, save it to the output
                    if ok_file :

                        #print("space grid shape: " + str(space_grids[set_key].shape))

                        # save the space grids and density info for this variable and it's density map to files
                        for set_key in data_sets.keys() :

                            if do_multi_overpass :

                                # save temporary data to accumulate it as we go through all the files for a day

                                # save the gridded data
                                io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp,
                                                                                        suffix=set_key + "-" + TEMP_SUFFIX_KEY),
                                                            space_grid_shape, output_path, space_grids[set_key], TEMP_DATA_TYPE)
                                # save the grid density map
                                io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp,
                                                                                        suffix=set_key + "-" + DENSITY_SUFFIX + "-" + TEMP_SUFFIX_KEY),
                                                            space_grid_shape, output_path, density_maps[set_key], TEMP_DATA_TYPE)
                                # save the number of observations grid
                                io_manager.save_data_to_file(io_manager.build_name_stem (variable_name, date_time=date_time_temp,
                                                                                        suffix=set_key + "-" + NOBS_SUFFIX + "-" + TEMP_SUFFIX_KEY),
                                                            space_grid_shape, output_path, nobs[set_key], TEMP_DATA_TYPE)

                            else :

                                # if we haven't processed this variable yet, add a dictionary for it
                                if variable_name not in collected_data :
                                    collected_data[variable_name] = { }

                                # if we have no measurements, expand the array to depth 1 to make numpy happy
                                space_grids[set_key] = _expand_array_if_needed(space_grids[set_key], 1)

                                # if there isn't any data for this set in our collection, just put what we have in to start with
                                if set_key not in collected_data[variable_name] :

                                    collected_data[variable_name][set_key] = { }
                                    current_set = collected_data[variable_name][set_key]

                                    # save the the 2D arrays
                                    current_set["density"] = density_maps[set_key]
                                    current_set["nobs"]    = nobs[set_key]
                                    current_set["times"]   = aux_times[set_key]
                                    current_set["angles"]  = aux_angles[set_key]

                                    # save the space gridded data (the 3D array)
                                    new_depth = int(space_grids[set_key].shape[0] * ARRAY_GROWTH_FACTOR) # expand the arrays a little extra
                                    current_set["space-gridded-data"] = _expand_array_if_needed(space_grids[set_key], new_depth)

                                else : # when we already have data for this set key, incorporate the new overpass appropriately

                                    # there are several possible cases:
                                    #           we have no data in that cell of the grid           <- use data from the new file
                                    #           we have data in that cell, and it's the same orbit <- add the new data to the end of the old data
                                    #           we have data in that cell, it's a diff orbit       <- either replace the data in that cell or ignore the new data
                                    #                    (whether you replace or ignore depends on whether the new or old data has the worst sensor zenith angle)

                                    # for convenience
                                    current_set = collected_data[variable_name][set_key]

                                    # pre-calculate where there is any data at all in our old and new data sets
                                    have_old_data    = current_set["nobs"] > 0
                                    have_new_data    = nobs[set_key]       > 0
                                    both_have_data   = have_old_data & have_new_data

                                    # some other calculations to support our masking
                                    better_new_angle = aux_angles[set_key] < current_set["angles"]
                                    time_diff        = numpy.abs(current_set["times"] - aux_times[set_key])

                                    # figure out the masks that will control how we change our data

                                    # mask of the places where there is data in the new file, but we had none before
                                    have_only_new_data_mask = (~ have_old_data) & have_new_data
                                    # mask of the places where there is data in both and it's the same orbits
                                    use_both_mask = both_have_data & (time_diff <= space_gridding.SAME_TIME_RANGE_SECONDS)
                                    # mask of the places where there is data in both and it's different orbit
                                    use_only_new_data_mask = both_have_data & (time_diff > space_gridding.SAME_TIME_RANGE_SECONDS) & better_new_angle
                                    # Note: We will choose the orbit with the smallest maximum observed sensor zenith angle
                                    # in the grid cell – especially necessary at high latitudes
                                    use_new = have_only_new_data_mask | use_only_new_data_mask

                                    # expand the arrays if needed
                                    o_depth   = current_set["space-gridded-data"].shape[0] # the depth of the old array
                                    n_depth   = space_grids[set_key].shape[0]              # the depth of the new array
                                    c_depth   = numpy.max(current_set["density"][use_both_mask] + density_maps[set_key][use_both_mask]) if numpy.any(use_both_mask) else 0 # the combined depth
                                    new_depth = o_depth   if o_depth   >= n_depth else int(n_depth * ARRAY_GROWTH_FACTOR)
                                    new_depth = new_depth if new_depth >= c_depth else int(c_depth * ARRAY_GROWTH_FACTOR)
                                    new_space                         = _expand_array_if_needed(space_grids[set_key],              new_depth)
                                    current_set["space-gridded-data"] = _expand_array_if_needed(current_set["space-gridded-data"], new_depth)

                                    # replace any data where we are going to use just the new set
                                    current_set["space-gridded-data"][:, use_new] =          new_space[:, use_new]
                                    current_set["times"]                [use_new] =    aux_times[set_key][use_new]
                                    current_set["angles"]               [use_new] =   aux_angles[set_key][use_new]
                                    current_set["density"]              [use_new] = density_maps[set_key][use_new]
                                    current_set["nobs"]                 [use_new] =         nobs[set_key][use_new]

                                    # combine the data where we want to use both sets TODO, how can I do this in a more numpy and python friendly way?
                                    temp_shape = current_set["space-gridded-data"].shape
                                    for lat in range(temp_shape[1]) :
                                        for lon in range(temp_shape[2]) :
                                            if use_both_mask[lat, lon] :
                                                prev_num     = current_set["density"][lat, lon]
                                                num_adding   = density_maps[set_key] [lat, lon]
                                                new_total    = prev_num + num_adding
                                                current_set["space-gridded-data"][prev_num:new_total, lat, lon] =  new_space[:num_adding, lat, lon]
                                    # build a combined average of the times
                                    current_set["times"]  [use_both_mask] =  ((aux_times[set_key][use_both_mask] * nobs[set_key][use_both_mask]) + \
                                                                             (current_set["times"][use_both_mask] * current_set["nobs"][use_both_mask])) \
                                                                             / (nobs[set_key][use_both_mask] + current_set["nobs"][use_both_mask])
                                    temp_new_angles = aux_angles[set_key][use_both_mask]
                                    temp_old_angles = current_set["angles"][use_both_mask]
                                    current_set["angles"] [use_both_mask] = numpy.where(temp_new_angles > temp_old_angles, temp_new_angles, temp_old_angles) # select the largest angle from the two sets
                                    current_set["density"][use_both_mask] += density_maps[set_key][use_both_mask]
                                    current_set["nobs"]   [use_both_mask] +=         nobs[set_key][use_both_mask]

                        # if we got to here we processed the file successfully
                        successful_files += 1