예제 #1
0
def prepare_results_zipfile(collection_name,
                            png_location,
                            png_location_type="local",
                            json_location=None,
                            json_location_type="local"):
    """
    Create a zipfile called <results_long_lat_collection> containing the 'results_summary.json',
    and the outputs of the analysis.

    Parameters
    ==========
    collection_name: str, typically "Sentinel2" or "Landsat8" or similar
    base_png_location: str, directory containing analysis/ subdirectory
    png_location_type: str, either "local" or "azure"
    base_json_location: str, directory containing "results_summary.json.
                        If not specified, assume same as base_png_location
    json_location_type: str, either "local" or "azure"

    Returns
    =======
    zip_filename: str, location of the produced zipfile
    """
    tmpdir = tempfile.mkdtemp()
    zip_filename = os.path.join(tmpdir, "results_")
    if find_coords_string(png_location):
        zip_filename += find_coords_string(png_location) + "_"
    zip_filename += collection_name + ".zip"
    zf = ZipFile(zip_filename, mode="w")
    if not json_location:
        # assume json and png are in the same directory
        json_location = png_location
    if json_location_type == "local":
        if not os.path.isdir(json_location):
            raise RuntimeError("{} is not a directory".format(json_location))
        dir_contents = os.listdir(json_location)
        if not os.path.exists(
                os.path.join(json_location, "results_summary.json")):
            raise RuntimeError(
                "Could not find results_summary.json in {}".format(
                    json_location))
        zf.write(os.path.join(json_location, "results_summary.json"),
                 arcname="results_summary.json")
    if png_location_type == "local":
        if not os.path.exists(os.path.join(png_location, "analysis")):
            raise RuntimeError(
                "Could not find analysis dir in {}".format(png_location))
        for root, dirnames, filenames in os.walk(
                os.path.join(png_location, "analysis")):
            for filename in filenames:
                full_filepath = os.path.join(root, filename)
                short_filepath = get_filepath_after_directory(
                    full_filepath, "analysis")
                zf.write(full_filepath, arcname=short_filepath)
        zf.close()
    return zip_filename
    def process_single_date(self, date_string):
        """
        Each date will have a subdirectory called 'SPLIT' with ~400 NDVI
        sub-images.
        """
        # if we are given a list of date strings to process, and this isn't
        # one of them, skip it.
        if self.dates_to_process and not date_string in self.dates_to_process:
            logger.info("{} will not process date {}".format(
                self.name, date_string))
            return True

        # see if there is already a ndvi.json file in
        # the output location - if so, skip
        output_location = self.join_path(self.output_location, date_string,
                                         *(self.output_location_subdirs))
        if (not self.replace_existing_files) and self.check_for_existing_files(
                output_location, self.num_files_per_point):
            return True

        input_path = self.join_path(self.input_location, date_string,
                                    *(self.input_location_subdirs))
        all_input_files = self.list_directory(input_path,
                                              self.input_location_type)
        logger.info("input path is {}".format(input_path))

        # list all the "NDVI" sub-images where RGB image passes quality check
        input_files = [
            filename for filename in all_input_files if "_NDVI" in filename
            and self.check_sub_image(filename, input_path)
        ]

        if len(input_files) == 0:
            logger.info("{}: No sub-images for date {}".format(
                self.name, date_string))
            return
        else:
            logger.info("{}: found {} sub-images".format(
                self.name, len(input_files)))
        # if we only want a subset of sub-images, truncate the list here
        if self.n_sub_images > 0:
            input_files = input_files[:self.n_sub_images]

        ndvi_vals = []
        for ndvi_file in input_files:
            coords_string = find_coords_string(ndvi_file)
            ndvi_dict = self.process_sub_image(
                self.join_path(input_path, ndvi_file), date_string,
                coords_string)
            ndvi_vals.append(ndvi_dict)

        self.save_json(ndvi_vals, "ndvi_values.json", output_location,
                       self.output_location_type)

        return True
    def process_single_date(self, date_string):
        """
        Each date will have a subdirectory called 'SPLIT' with ~400 BWNDVI
        sub-images.
        """
        logger.info("{}: processing {}".format(self.name, date_string))
        # if we are given a list of date strings to process, and this isn't
        # one of them, skip it.
        if self.dates_to_process and not date_string in self.dates_to_process:
            logger.info("{} will not process date {}".format(
                self.name, date_string))
            return True
        # see if there is already a network_centralities.json file in
        # the output location - if so, skip
        output_location = self.join_path(self.output_location, date_string,
                                         *(self.output_location_subdirs))
        if (not self.replace_existing_files) and self.check_for_existing_files(
                output_location, self.num_files_per_point):
            return True

        input_path = self.join_path(self.input_location, date_string,
                                    *(self.input_location_subdirs))
        all_input_files = self.list_directory(input_path,
                                              self.input_location_type)

        # list all the "BWNDVI" sub-images where RGB image passes quality check
        input_files = [
            filename for filename in all_input_files if "BWNDVI" in filename
            and self.check_sub_image(filename, input_path)
        ]
        if len(input_files) == 0:
            logger.info("{}: No sub-images for date {}".format(
                self.name, date_string))
            return
        else:
            logger.info("{} found {} sub-images".format(
                self.name, len(input_files)))
        tmp_json_dir = tempfile.mkdtemp()

        # if we only want a subset of sub-images, truncate the list here
        if self.n_sub_images > 0:
            input_files = input_files[:self.n_sub_images]

        # create a multiprocessing pool to handle each sub-image in parallel
        with Pool(processes=self.n_threads) as pool:
            # prepare the arguments for the process_sub_image function
            arguments = [(
                i,
                self.get_file(self.join_path(input_path, filename),
                              self.input_location_type),
                tmp_json_dir,
                date_string,
                find_coords_string(filename),
            ) for i, filename in enumerate(input_files)]
            pool.starmap(process_sub_image, arguments)
        # put all the output json files for subimages together into one for this date
        logger.info("\n Consolidating json from all subimages")
        all_subimages = consolidate_json_to_list(tmp_json_dir)
        self.save_json(
            all_subimages,
            "network_centralities.json",
            output_location,
            self.output_location_type,
        )
        shutil.rmtree(tmp_json_dir)
        return True
    def process_single_date(self, date_string):
        """
        For a single set of .tif files corresponding to a date range
        (normally a sub-range of the full date range for the pipeline),
        construct RGB, and NDVI greyscale images.
        Then do processing and thresholding to make black+white NDVI images.
        Split the RGB and black+white NDVI ones into small (50x50pix)
        sub-images.

        Parameters
        ==========
        date_string: str, format YYYY-MM-DD

        Returns
        =======
        True if everything was processed and saved OK, False otherwise.
        """
        # first see if there are already files in the output location
        # (in which case we can skip this date)

        # normally the coordinates will be part of the file path
        coords_string = find_coords_string(self.input_location)
        # if not though, we might have coords set explicitly
        if (not coords_string) and "coords" in vars(self):
            coords_string = "{}_{}".format(self.coords[0], self.coords[1])

        if not coords_string and date_string:
            raise RuntimeError(
                "{}: coords and date need to be defined, through file path or explicitly set"
            )

        output_location = os.path.dirname(
            self.construct_image_savepath(date_string, coords_string))
        if (not self.replace_existing_files) and self.check_for_existing_files(
                output_location, self.num_files_per_point):
            return True

        # If no files already there, proceed.
        input_filepath = self.join_path(self.input_location, date_string,
                                        *(self.input_location_subdirs))
        logger.info("{} processing files in {}".format(self.name,
                                                       input_filepath))
        filenames = [
            filename for filename in self.list_directory(
                input_filepath, self.input_location_type)
            if filename.endswith(".tif")
        ]
        if len(filenames) == 0:
            return True

        # extract this to feed into `convert_to_rgb()`
        band_dict = {}
        for icol, col in enumerate("rgb"):
            band = self.RGB_bands[icol]
            filename = self.get_file(
                self.join_path(input_filepath, "download.{}.tif".format(band)),
                self.input_location_type,
            )
            band_dict[col] = {"band": band, "filename": filename}

        logger.info(filenames)
        tif_filebase = self.join_path(input_filepath,
                                      filenames[0].split(".")[0])

        # save the rgb image
        rgb_ok = self.save_rgb_image(band_dict, date_string, coords_string)
        if not rgb_ok:
            logger.info("Problem with the rgb image?")
            return False

        # save the NDVI image
        ndvi_tif = self.get_file(
            self.join_path(input_filepath, "download.NDVI.tif"),
            self.input_location_type)
        ndvi_image = scale_tif(ndvi_tif)
        ndvi_filepath = self.construct_image_savepath(date_string,
                                                      coords_string, "NDVI")
        self.save_image(ndvi_image, os.path.dirname(ndvi_filepath),
                        os.path.basename(ndvi_filepath))

        # preprocess and threshold the NDVI image
        processed_ndvi = process_and_threshold(ndvi_image)
        ndvi_bw_filepath = self.construct_image_savepath(
            date_string, coords_string, "BWNDVI")
        self.save_image(
            processed_ndvi,
            os.path.dirname(ndvi_bw_filepath),
            os.path.basename(ndvi_bw_filepath),
        )

        # split and save sub-images
        self.split_and_save_sub_images(ndvi_image, date_string, coords_string,
                                       "NDVI")

        self.split_and_save_sub_images(processed_ndvi, date_string,
                                       coords_string, "BWNDVI")

        return True