def prepare_results_zipfile(collection_name, png_location, png_location_type="local", json_location=None, json_location_type="local"): """ Create a zipfile called <results_long_lat_collection> containing the 'results_summary.json', and the outputs of the analysis. Parameters ========== collection_name: str, typically "Sentinel2" or "Landsat8" or similar base_png_location: str, directory containing analysis/ subdirectory png_location_type: str, either "local" or "azure" base_json_location: str, directory containing "results_summary.json. If not specified, assume same as base_png_location json_location_type: str, either "local" or "azure" Returns ======= zip_filename: str, location of the produced zipfile """ tmpdir = tempfile.mkdtemp() zip_filename = os.path.join(tmpdir, "results_") if find_coords_string(png_location): zip_filename += find_coords_string(png_location) + "_" zip_filename += collection_name + ".zip" zf = ZipFile(zip_filename, mode="w") if not json_location: # assume json and png are in the same directory json_location = png_location if json_location_type == "local": if not os.path.isdir(json_location): raise RuntimeError("{} is not a directory".format(json_location)) dir_contents = os.listdir(json_location) if not os.path.exists( os.path.join(json_location, "results_summary.json")): raise RuntimeError( "Could not find results_summary.json in {}".format( json_location)) zf.write(os.path.join(json_location, "results_summary.json"), arcname="results_summary.json") if png_location_type == "local": if not os.path.exists(os.path.join(png_location, "analysis")): raise RuntimeError( "Could not find analysis dir in {}".format(png_location)) for root, dirnames, filenames in os.walk( os.path.join(png_location, "analysis")): for filename in filenames: full_filepath = os.path.join(root, filename) short_filepath = get_filepath_after_directory( full_filepath, "analysis") zf.write(full_filepath, arcname=short_filepath) zf.close() return zip_filename
def process_single_date(self, date_string): """ Each date will have a subdirectory called 'SPLIT' with ~400 NDVI sub-images. """ # if we are given a list of date strings to process, and this isn't # one of them, skip it. if self.dates_to_process and not date_string in self.dates_to_process: logger.info("{} will not process date {}".format( self.name, date_string)) return True # see if there is already a ndvi.json file in # the output location - if so, skip output_location = self.join_path(self.output_location, date_string, *(self.output_location_subdirs)) if (not self.replace_existing_files) and self.check_for_existing_files( output_location, self.num_files_per_point): return True input_path = self.join_path(self.input_location, date_string, *(self.input_location_subdirs)) all_input_files = self.list_directory(input_path, self.input_location_type) logger.info("input path is {}".format(input_path)) # list all the "NDVI" sub-images where RGB image passes quality check input_files = [ filename for filename in all_input_files if "_NDVI" in filename and self.check_sub_image(filename, input_path) ] if len(input_files) == 0: logger.info("{}: No sub-images for date {}".format( self.name, date_string)) return else: logger.info("{}: found {} sub-images".format( self.name, len(input_files))) # if we only want a subset of sub-images, truncate the list here if self.n_sub_images > 0: input_files = input_files[:self.n_sub_images] ndvi_vals = [] for ndvi_file in input_files: coords_string = find_coords_string(ndvi_file) ndvi_dict = self.process_sub_image( self.join_path(input_path, ndvi_file), date_string, coords_string) ndvi_vals.append(ndvi_dict) self.save_json(ndvi_vals, "ndvi_values.json", output_location, self.output_location_type) return True
def process_single_date(self, date_string): """ Each date will have a subdirectory called 'SPLIT' with ~400 BWNDVI sub-images. """ logger.info("{}: processing {}".format(self.name, date_string)) # if we are given a list of date strings to process, and this isn't # one of them, skip it. if self.dates_to_process and not date_string in self.dates_to_process: logger.info("{} will not process date {}".format( self.name, date_string)) return True # see if there is already a network_centralities.json file in # the output location - if so, skip output_location = self.join_path(self.output_location, date_string, *(self.output_location_subdirs)) if (not self.replace_existing_files) and self.check_for_existing_files( output_location, self.num_files_per_point): return True input_path = self.join_path(self.input_location, date_string, *(self.input_location_subdirs)) all_input_files = self.list_directory(input_path, self.input_location_type) # list all the "BWNDVI" sub-images where RGB image passes quality check input_files = [ filename for filename in all_input_files if "BWNDVI" in filename and self.check_sub_image(filename, input_path) ] if len(input_files) == 0: logger.info("{}: No sub-images for date {}".format( self.name, date_string)) return else: logger.info("{} found {} sub-images".format( self.name, len(input_files))) tmp_json_dir = tempfile.mkdtemp() # if we only want a subset of sub-images, truncate the list here if self.n_sub_images > 0: input_files = input_files[:self.n_sub_images] # create a multiprocessing pool to handle each sub-image in parallel with Pool(processes=self.n_threads) as pool: # prepare the arguments for the process_sub_image function arguments = [( i, self.get_file(self.join_path(input_path, filename), self.input_location_type), tmp_json_dir, date_string, find_coords_string(filename), ) for i, filename in enumerate(input_files)] pool.starmap(process_sub_image, arguments) # put all the output json files for subimages together into one for this date logger.info("\n Consolidating json from all subimages") all_subimages = consolidate_json_to_list(tmp_json_dir) self.save_json( all_subimages, "network_centralities.json", output_location, self.output_location_type, ) shutil.rmtree(tmp_json_dir) return True
def process_single_date(self, date_string): """ For a single set of .tif files corresponding to a date range (normally a sub-range of the full date range for the pipeline), construct RGB, and NDVI greyscale images. Then do processing and thresholding to make black+white NDVI images. Split the RGB and black+white NDVI ones into small (50x50pix) sub-images. Parameters ========== date_string: str, format YYYY-MM-DD Returns ======= True if everything was processed and saved OK, False otherwise. """ # first see if there are already files in the output location # (in which case we can skip this date) # normally the coordinates will be part of the file path coords_string = find_coords_string(self.input_location) # if not though, we might have coords set explicitly if (not coords_string) and "coords" in vars(self): coords_string = "{}_{}".format(self.coords[0], self.coords[1]) if not coords_string and date_string: raise RuntimeError( "{}: coords and date need to be defined, through file path or explicitly set" ) output_location = os.path.dirname( self.construct_image_savepath(date_string, coords_string)) if (not self.replace_existing_files) and self.check_for_existing_files( output_location, self.num_files_per_point): return True # If no files already there, proceed. input_filepath = self.join_path(self.input_location, date_string, *(self.input_location_subdirs)) logger.info("{} processing files in {}".format(self.name, input_filepath)) filenames = [ filename for filename in self.list_directory( input_filepath, self.input_location_type) if filename.endswith(".tif") ] if len(filenames) == 0: return True # extract this to feed into `convert_to_rgb()` band_dict = {} for icol, col in enumerate("rgb"): band = self.RGB_bands[icol] filename = self.get_file( self.join_path(input_filepath, "download.{}.tif".format(band)), self.input_location_type, ) band_dict[col] = {"band": band, "filename": filename} logger.info(filenames) tif_filebase = self.join_path(input_filepath, filenames[0].split(".")[0]) # save the rgb image rgb_ok = self.save_rgb_image(band_dict, date_string, coords_string) if not rgb_ok: logger.info("Problem with the rgb image?") return False # save the NDVI image ndvi_tif = self.get_file( self.join_path(input_filepath, "download.NDVI.tif"), self.input_location_type) ndvi_image = scale_tif(ndvi_tif) ndvi_filepath = self.construct_image_savepath(date_string, coords_string, "NDVI") self.save_image(ndvi_image, os.path.dirname(ndvi_filepath), os.path.basename(ndvi_filepath)) # preprocess and threshold the NDVI image processed_ndvi = process_and_threshold(ndvi_image) ndvi_bw_filepath = self.construct_image_savepath( date_string, coords_string, "BWNDVI") self.save_image( processed_ndvi, os.path.dirname(ndvi_bw_filepath), os.path.basename(ndvi_bw_filepath), ) # split and save sub-images self.split_and_save_sub_images(ndvi_image, date_string, coords_string, "NDVI") self.split_and_save_sub_images(processed_ndvi, date_string, coords_string, "BWNDVI") return True