Example #1
0
def create_output_products(data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}

    """
    logger.info("CREATE_OUTPUT")
    full_metadata = data[1]
    dataset = xr.open_dataset(data[0], autoclose=True)
    task = CustomMosaicToolTask.objects.get(pk=task_id)

    task.result_path = os.path.join(task.get_result_path(), "png_mosaic.png")
    task.result_filled_path = os.path.join(task.get_result_path(), "filled_png_mosaic.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(), "data_netcdf.nc")
    task.animation_path = os.path.join(task.get_result_path(),
                                       "animation.gif") if task.animated_product.animation_id != 'none' else ""
    task.final_metadata_from_dataset(dataset)
    task.metadata_from_dict(full_metadata)

    bands = task.satellite.get_measurements()
    png_bands = [task.query_type.red, task.query_type.green, task.query_type.blue]

    dataset.to_netcdf(task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path, dataset.astype('int32'), bands=bands, no_data=task.satellite.no_data_value)
    write_png_from_xr(
        task.result_path,
        dataset,
        bands=png_bands,
        png_filled_path=task.result_filled_path,
        fill_color=task.query_type.fill,
        scale=task.satellite.get_scale(),
        low_res=True,
        no_data=task.satellite.no_data_value)

    if task.animated_product.animation_id != "none":
        with imageio.get_writer(task.animation_path, mode='I', duration=1.0) as writer:
            valid_range = reversed(
                range(len(full_metadata))) if task.animated_product.animation_id == "scene" and task.get_reverse_time(
                ) else range(len(full_metadata))
            for index in valid_range:
                path = os.path.join(task.get_temp_path(), "animation_{}.png".format(index))
                if os.path.exists(path):
                    image = imageio.imread(path)
                    writer.append_data(image)

    dates = list(map(lambda x: datetime.strptime(x, "%m/%d/%Y"), task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(
            task.plot_path,
            dates=dates,
            datasets=task._get_field_as_list('clean_pixel_percentages_per_acquisition'),
            data_labels="Clean Pixel Percentage (%)",
            titles="Clean Pixel Percentage Per Acquisition")

    logger.info("All products created.")
    # task.update_bounds_from_dataset(dataset)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status("OK", "All products have been generated. Your result will be loaded on the map.")
    shutil.rmtree(task.get_temp_path())
    return True
Example #2
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = NdviAnomalyTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    base_scene_time_range = parameters['time']

    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)

    # Generate the baseline data - one time slice at a time
    full_dataset = []
    for time_index, time in enumerate(time_chunk):
        updated_params.update({'time': _get_datetime_range_containing(time)})
        data = dc.get_dataset_by_extent(**updated_params)

        if check_cancel_task(self, task): return

        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue
        full_dataset.append(data.copy(deep=True))

    # load selected scene and mosaic just in case we got two scenes (handles scene boundaries/overlapping data)
    updated_params.update({'time': base_scene_time_range})
    selected_scene = dc.get_dataset_by_extent(**updated_params)

    if check_cancel_task(self, task): return

    if len(full_dataset) == 0 or 'time' not in selected_scene:
        return None

    #concat individual slices over time, compute metadata + mosaic
    baseline_data = xr.concat(full_dataset, 'time')
    baseline_clear_mask = task.satellite.get_clean_mask_func()(baseline_data)
    metadata = task.metadata_from_dataset(metadata, baseline_data,
                                          baseline_clear_mask, parameters)

    selected_scene_clear_mask = task.satellite.get_clean_mask_func()(
        selected_scene)
    metadata = task.metadata_from_dataset(metadata, selected_scene,
                                          selected_scene_clear_mask,
                                          parameters)
    selected_scene = task.get_processing_method()(
        selected_scene,
        clean_mask=selected_scene_clear_mask,
        intermediate_product=None,
        no_data=task.satellite.no_data_value)
    # we need to re generate the clear mask using the mosaic now.
    selected_scene_clear_mask = task.satellite.get_clean_mask_func()(
        selected_scene)

    if check_cancel_task(self, task): return

    ndvi_products = compute_ndvi_anomaly(
        baseline_data,
        selected_scene,
        baseline_clear_mask=baseline_clear_mask,
        selected_scene_clear_mask=selected_scene_clear_mask,
        no_data=task.satellite.no_data_value)
    full_product = xr.merge([ndvi_products, selected_scene])

    task.scenes_processed = F('scenes_processed') + 1
    task.save(update_fields=['scenes_processed'])

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    full_product.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #3
0
def recombine_time_chunks(self, chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = TsmTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    # sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None
    total_chunks = sorted(chunks, key=lambda x: x[0])
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    def combine_intermediates(dataset, dataset_intermediate):
        """
        functions used to combine time sliced data after being combined geographically.
        This compounds the results of the time slice and recomputes the normalized data.
        """
        # total data/clean refers to tsm
        dataset_intermediate['total_data'] += dataset.total_data
        dataset_intermediate['total_clean'] += dataset.total_clean
        dataset_intermediate['normalized_data'] = dataset_intermediate['total_data'] / dataset_intermediate[
            'total_clean']
        dataset_intermediate['min'] = xr.concat(
            [dataset_intermediate['min'], dataset['min']], dim='time').min(
                dim='time', skipna=True)
        dataset_intermediate['max'] = xr.concat(
            [dataset_intermediate['max'], dataset['max']], dim='time').max(
                dim='time', skipna=True)
        dataset_intermediate['wofs'] += dataset.wofs
        dataset_intermediate['wofs_total_clean'] += dataset.wofs_total_clean

    def generate_animation(index, combined_data):
        base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * index
        for index in range((task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1)):
            path = os.path.join(task.get_temp_path(), "animation_{}.nc".format(base_index + index))
            if os.path.exists(path):
                animated_data = xr.open_dataset(path)
                if task.animated_product.animation_id != "scene" and combined_data:
                    combine_intermediates(combined_data, animated_data)
                # need to wait until last step to mask out wofs < 0.8
                path = os.path.join(task.get_temp_path(), "animation_final_{}.nc".format(base_index + index))
                animated_data.to_netcdf(path)

    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0])
        if combined_data is None:
            if task.animated_product.animation_id != "none":
                generate_animation(index, combined_data)
            combined_data = data
            continue
        combine_intermediates(data, combined_data)
        if check_cancel_task(self, task): return
        # if we're animating, combine it all and save to disk.
        if task.animated_product.animation_id != "none":
            generate_animation(index, combined_data)

    path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Example #4
0
def create_output_products(data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}

    """
    logger.info("CREATE_OUTPUT")
    full_metadata = data[1]
    dataset = xr.open_dataset(data[0], autoclose=True)
    task = NdviAnomalyTask.objects.get(pk=task_id)

    task.result_path = os.path.join(task.get_result_path(),
                                    "ndvi_difference.png")
    task.scene_ndvi_path = os.path.join(task.get_result_path(),
                                        "scene_ndvi.png")
    task.baseline_ndvi_path = os.path.join(task.get_result_path(),
                                           "baseline_ndvi.png")
    task.ndvi_percentage_change_path = os.path.join(
        task.get_result_path(), "ndvi_percentage_change.png")
    task.result_mosaic_path = os.path.join(task.get_result_path(),
                                           "result_mosaic.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(),
                                         "data_netcdf.nc")
    task.final_metadata_from_dataset(dataset)
    task.metadata_from_dict(full_metadata)

    bands = task.satellite.get_measurements() + [
        'scene_ndvi', 'baseline_ndvi', 'ndvi_difference',
        'ndvi_percentage_change'
    ]

    dataset.to_netcdf(task.data_netcdf_path)

    write_geotiff_from_xr(task.data_path,
                          dataset.astype('float64'),
                          bands=bands,
                          no_data=task.satellite.no_data_value)
    write_single_band_png_from_xr(
        task.result_path,
        dataset,
        'ndvi_difference',
        color_scale=task.color_scales['ndvi_difference'],
        no_data=task.satellite.no_data_value)
    write_single_band_png_from_xr(
        task.ndvi_percentage_change_path,
        dataset,
        'ndvi_percentage_change',
        color_scale=task.color_scales['ndvi_percentage_change'],
        no_data=task.satellite.no_data_value)
    write_single_band_png_from_xr(task.scene_ndvi_path,
                                  dataset,
                                  'scene_ndvi',
                                  color_scale=task.color_scales['scene_ndvi'],
                                  no_data=task.satellite.no_data_value)
    write_single_band_png_from_xr(
        task.baseline_ndvi_path,
        dataset,
        'baseline_ndvi',
        color_scale=task.color_scales['baseline_ndvi'],
        no_data=task.satellite.no_data_value)

    write_png_from_xr(task.result_mosaic_path,
                      dataset,
                      bands=['red', 'green', 'blue'],
                      scale=task.satellite.get_scale(),
                      no_data=task.satellite.no_data_value)

    dates = list(
        map(lambda x: datetime.strptime(x, "%m/%d/%Y"),
            task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(task.plot_path,
                       dates=dates,
                       datasets=task._get_field_as_list(
                           'clean_pixel_percentages_per_acquisition'),
                       data_labels="Clean Pixel Percentage (%)",
                       titles="Clean Pixel Percentage Per Acquisition")

    logger.info("All products created.")
    task.rewrite_pathnames()
    # task.update_bounds_from_dataset(dataset)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status(
        "OK",
        "All products have been generated. Your result will be loaded on the map."
    )
    shutil.rmtree(task.get_temp_path())
    return True
Example #5
0
def recombine_geographic_chunks(chunks, task_id=None):
    """Recombine processed data over the geographic indices

    For each geographic chunk process spawned by the main task, open the resulting dataset
    and combine it into a single dataset. Combine metadata as well, writing to disk.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    logger.info("RECOMBINE_GEO")
    total_chunks = [chunks] if not isinstance(chunks, list) else chunks
    total_chunks = [chunk for chunk in total_chunks if chunk is not None]
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']

    metadata = {}
    task = AppNameTask.objects.get(pk=task_id)

    chunk_data = []

    for index, chunk in enumerate(total_chunks):
        metadata = task.combine_metadata(metadata, chunk[1])
        chunk_data.append(xr.open_dataset(chunk[0], autoclose=True))

    combined_data = combine_geographic_chunks(chunk_data)

    # if we're animating, combine it all and save to disk.
    # TODO: If there is no animation, delete this block. Otherwise, recombine all the geo chunks for each time chunk
    #       and save the result to disk.
    if task.animated_product.animation_id != "none":
        base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()
                      ['time'] is not None else 1) * time_chunk_id
        for index in range((task.get_chunk_size()['time'] if
                            task.get_chunk_size()['time'] is not None else 1)):
            animated_data = []
            for chunk in total_chunks:
                geo_chunk_index = chunk[2]['geo_chunk_id']
                # if we're animating, combine it all and save to disk.
                path = os.path.join(
                    task.get_temp_path(),
                    "animation_{}_{}.nc".format(str(geo_chunk_index),
                                                str(base_index + index)))
                if os.path.exists(path):
                    animated_data.append(xr.open_dataset(path, autoclose=True))
            path = os.path.join(task.get_temp_path(),
                                "animation_{}.nc".format(base_index + index))
            if len(animated_data) > 0:
                combine_geographic_chunks(animated_data).to_netcdf(path)

    path = os.path.join(task.get_temp_path(),
                        "recombined_geo_{}.nc".format(time_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining geographic chunks for time: " +
                str(time_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #6
0
def create_output_products(data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}

    """
    logger.info("CREATE_OUTPUT")
    full_metadata = data[1]
    dataset = xr.open_dataset(data[0], autoclose=True)
    task = AppNameTask.objects.get(pk=task_id)

    # TODO: Add any paths that you've added in your models.py Result model and remove the ones that aren't there.
    task.result_path = os.path.join(task.get_result_path(), "png_mosaic.png")
    task.result_filled_path = os.path.join(task.get_result_path(),
                                           "filled_png_mosaic.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(),
                                         "data_netcdf.nc")
    task.animation_path = os.path.join(task.get_result_path(
    ), "animation.gif") if task.animated_product.animation_id != 'none' else ""
    task.final_metadata_from_dataset(dataset)
    task.metadata_from_dict(full_metadata)

    # TODO: Set the bands that should be written to the final products
    bands = ['blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'cf_mask'
             ] if 'cf_mask' in dataset else [
                 'blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'pixel_qa'
             ]

    # TODO: If you're creating pngs, specify the RGB bands
    png_bands = [
        task.query_type.red, task.query_type.green, task.query_type.blue
    ]

    dataset.to_netcdf(task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path, dataset.astype('int32'), bands=bands)
    write_png_from_xr(task.result_path,
                      dataset,
                      bands=png_bands,
                      png_filled_path=task.result_filled_path,
                      fill_color=task.query_type.fill,
                      scale=(0, 4096))

    # TODO: if there is no animation, remove this. Otherwise, open each time iteration slice and write to disk.
    if task.animated_product.animation_id != "none":
        with imageio.get_writer(task.animation_path, mode='I',
                                duration=1.0) as writer:
            valid_range = reversed(
                range(len(full_metadata))
            ) if task.animated_product.animation_id == "scene" and task.get_reverse_time(
            ) else range(len(full_metadata))
            for index in valid_range:
                path = os.path.join(task.get_temp_path(),
                                    "animation_{}.png".format(index))
                if os.path.exists(path):
                    image = imageio.imread(path)
                    writer.append_data(image)

    # TODO: if you're capturing more tabular metadata, plot it here by converting these to lists.
    # an example of this is the current water detection app.
    dates = list(
        map(lambda x: datetime.strptime(x, "%m/%d/%Y"),
            task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(task.plot_path,
                       dates=dates,
                       datasets=task._get_field_as_list(
                           'clean_pixel_percentages_per_acquisition'),
                       data_labels="Clean Pixel Percentage (%)",
                       titles="Clean Pixel Percentage Per Acquisition")

    logger.info("All products created.")
    # task.update_bounds_from_dataset(dataset)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status(
        "OK",
        "All products have been generated. Your result will be loaded on the map."
    )
    shutil.rmtree(task.get_temp_path())
    return True
Example #7
0
def create_output_products(data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}

    """
    logger.info("CREATE_OUTPUT")
    full_metadata = data[1]
    dataset = xr.open_dataset(data[0], autoclose=True).astype('float64')
    task = WaterDetectionTask.objects.get(pk=task_id)

    task.result_path = os.path.join(task.get_result_path(), "water_percentage.png")
    task.water_observations_path = os.path.join(task.get_result_path(), "water_observations.png")
    task.clear_observations_path = os.path.join(task.get_result_path(), "clear_observations.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(), "data_netcdf.nc")
    task.animation_path = os.path.join(task.get_result_path(),
                                       "animation.gif") if task.animated_product.animation_id != 'none' else ""
    task.final_metadata_from_dataset(dataset)
    task.metadata_from_dict(full_metadata)

    bands = ['normalized_data', 'total_data', 'total_clean']
    band_paths = [task.result_path, task.water_observations_path, task.clear_observations_path]

    dataset.to_netcdf(task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path, dataset, bands=bands, no_data=task.satellite.no_data_value)

    for band, band_path in zip(bands, band_paths):
        write_single_band_png_from_xr(
            band_path,
            dataset,
            band,
            color_scale=task.color_scales[band],
            fill_color=task.query_type.fill,
            interpolate=False,
            no_data=task.satellite.no_data_value)

    if task.animated_product.animation_id != "none":
        with imageio.get_writer(task.animation_path, mode='I', duration=1.0) as writer:
            valid_range = range(len(full_metadata))
            for index in valid_range:
                path = os.path.join(task.get_temp_path(), "animation_{}.png".format(index))
                if os.path.exists(path):
                    image = imageio.imread(path)
                    writer.append_data(image)

    dates = list(map(lambda x: datetime.strptime(x, "%m/%d/%Y"), task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(
            task.plot_path,
            dates=dates,
            datasets=[
                task._get_field_as_list('clean_pixel_percentages_per_acquisition'), [
                    int(x) / max(int(y), 1)
                    for x, y in zip(
                        task._get_field_as_list('water_pixels_per_acquisition'),
                        task._get_field_as_list('clean_pixels_per_acquisition'))
                ]
            ],
            data_labels=["Clean Pixel Percentage (%)", "Water Pixel Percentage (%)"],
            titles=["Clean Pixel Percentage Per Acquisition", "Water Pixels Percentage Per Acquisition"])

    logger.info("All products created.")
    # task.update_bounds_from_dataset(dataset)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status("OK", "All products have been generated. Your result will be loaded on the map.")
    shutil.rmtree(task.get_temp_path())
    return True
Example #8
0
def create_output_products(data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}

    """
    logger.info("CREATE_OUTPUT")
    full_metadata = data[1]
    dataset = xr.open_dataset(data[0], autoclose=True)
    task = FractionalCoverTask.objects.get(pk=task_id)

    task.result_path = os.path.join(task.get_result_path(), "band_math.png")
    task.mosaic_path = os.path.join(task.get_result_path(), "png_mosaic.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(),
                                         "data_netcdf.nc")
    task.final_metadata_from_dataset(dataset)
    task.metadata_from_dict(full_metadata)

    bands = [
        'blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'cf_mask', 'pv',
        'npv', 'bs'
    ] if 'cf_mask' in dataset else [
        'blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'pixel_qa', 'pv',
        'npv', 'bs'
    ]

    dataset.to_netcdf(task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path, dataset.astype('int32'), bands=bands)
    write_png_from_xr(task.mosaic_path,
                      dataset,
                      bands=['red', 'green', 'blue'],
                      scale=(0, 4096))
    write_png_from_xr(task.result_path, dataset, bands=['bs', 'pv', 'npv'])

    dates = list(
        map(lambda x: datetime.strptime(x, "%m/%d/%Y"),
            task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(task.plot_path,
                       dates=dates,
                       datasets=task._get_field_as_list(
                           'clean_pixel_percentages_per_acquisition'),
                       data_labels="Clean Pixel Percentage (%)",
                       titles="Clean Pixel Percentage Per Acquisition")

    logger.info("All products created.")
    # task.update_bounds_from_dataset(dataset)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status(
        "OK",
        "All products have been generated. Your result will be loaded on the map."
    )
    shutil.rmtree(task.get_temp_path())
    return True
Example #9
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = CoastalChangeTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    iteration_data = None

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    starting_year = _get_datetime_range_containing(*time_chunk[0])
    comparison_year = _get_datetime_range_containing(*time_chunk[1])

    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)

    def _compute_mosaic(time):
        updated_params.update({'time': time})
        data = dc.get_dataset_by_extent(**updated_params)
        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            return None, None

        clear_mask = task.satellite.get_clean_mask_func()(data)
        metadata = task.metadata_from_dataset({}, data, clear_mask,
                                              updated_params)
        return task.get_processing_method()(
            data, clean_mask=clear_mask,
            no_data=task.satellite.no_data_value), metadata

    old_mosaic, old_metadata = _compute_mosaic(starting_year)
    new_mosaic, new_metadata = _compute_mosaic(comparison_year)

    if old_mosaic is None or new_mosaic is None:
        return None

    metadata = {**old_metadata, **new_metadata}

    output_product = compute_coastal_change(
        old_mosaic, new_mosaic, no_data=task.satellite.no_data_value)

    task.scenes_processed = F('scenes_processed') + 1
    task.save()

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    output_product.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #10
0
def create_output_products(data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}

    """
    logger.info("CREATE_OUTPUT")
    full_metadata = data[1]
    dataset = xr.open_dataset(data[0], autoclose=True)
    task = CoastalChangeTask.objects.get(pk=task_id)

    task.result_path = os.path.join(task.get_result_path(),
                                    "coastline_change.png")
    task.result_coastal_change_path = os.path.join(task.get_result_path(),
                                                   "coastal_change.png")
    task.result_mosaic_path = os.path.join(task.get_result_path(),
                                           "mosaic.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(),
                                         "data_netcdf.nc")
    task.animation_path = os.path.join(task.get_result_path(
    ), "animation.gif") if task.animated_product.animation_id != 'none' else ""
    task.final_metadata_from_dataset(dataset)
    task.metadata_from_dict(full_metadata)

    bands = task.satellite.get_measurements() + [
        'coastal_change', 'coastline_old', 'coastline_new'
    ]

    png_bands = ['red', 'green', 'blue']

    dataset.to_netcdf(task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path,
                          dataset.astype('int32'),
                          bands=bands,
                          no_data=task.satellite.no_data_value)
    write_png_from_xr(task.result_path,
                      mask_mosaic_with_coastlines(dataset),
                      bands=png_bands,
                      scale=task.satellite.get_scale(),
                      no_data=task.satellite.no_data_value)
    write_png_from_xr(task.result_coastal_change_path,
                      mask_mosaic_with_coastal_change(dataset),
                      bands=png_bands,
                      scale=task.satellite.get_scale(),
                      no_data=task.satellite.no_data_value)
    write_png_from_xr(task.result_mosaic_path,
                      dataset,
                      bands=png_bands,
                      scale=task.satellite.get_scale(),
                      no_data=task.satellite.no_data_value)

    if task.animated_product.animation_id != "none":
        with imageio.get_writer(task.animation_path, mode='I',
                                duration=1.0) as writer:
            for index in range(task.time_end - task.time_start):
                path = os.path.join(task.get_temp_path(),
                                    "animation_{}.png".format(index))
                if os.path.exists(path):
                    image = imageio.imread(path)
                    writer.append_data(image)

    logger.info("All products created.")
    task.rewrite_pathnames()
    # task.update_bounds_from_dataset(dataset)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status(
        "OK",
        "All products have been generated. Your result will be loaded on the map."
    )
    shutil.rmtree(task.get_temp_path())
    return True
Example #11
0
def recombine_time_chunks(chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids

    """
    logger.info("RECOMBINE_TIME")
    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None

    total_chunks = sorted(chunks, key=lambda x: x[0])
    task = SlipTask.objects.get(pk=task_id)
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    combined_data = None
    combined_slip = None
    for index, chunk in enumerate(reversed(total_chunks)):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0], autoclose=True)
        if combined_data is None:
            combined_data = data.drop('slip')
            # since this is going to interact with data/mosaicking, it needs a time dim
            combined_slip = xr.concat([data.slip.copy(deep=True)], 'time')
            continue
        #give time an indice to keep mosaicking from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = create_cfmask_clean_mask(
            data.cf_mask) if 'cf_mask' in data else create_bit_mask(
                data.pixel_qa, [1, 2])
        # modify clean mask so that only slip pixels that are still zero will be used. This will show all the pixels that caused the flag.
        clear_mask[xr.concat([combined_slip], 'time').values == 1] = False
        combined_data = create_mosaic(data.drop('slip'),
                                      clean_mask=clear_mask,
                                      intermediate_product=combined_data)
        combined_slip.values[combined_slip.values == 0] = data.slip.values[
            combined_slip.values == 0]

    # Since we added a time dim to combined_slip, we need to remove it here.
    combined_data['slip'] = combined_slip.isel(time=0, drop=True)
    path = os.path.join(task.get_temp_path(),
                        "recombined_time_{}.nc".format(geo_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #12
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = CustomMosaicToolTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    iteration_data = None
    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    times = list(
        map(_get_datetime_range_containing, time_chunk) if task.get_iterative(
        ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    #updated_params.update({'products': parameters['']})
    iteration_data = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()
                  ['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        data = dc.get_stacked_datasets_by_extent(**updated_params)

        if check_cancel_task(self, task): return

        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = task.satellite.get_clean_mask_func()(data)
        add_timestamp_data_to_xr(data)

        metadata = task.metadata_from_dataset(metadata, data, clear_mask,
                                              updated_params)

        iteration_data = task.get_processing_method()(
            data,
            clean_mask=clear_mask,
            intermediate_product=iteration_data,
            no_data=task.satellite.no_data_value,
            reverse_time=task.get_reverse_time())

        if check_cancel_task(self, task): return

        if task.animated_product.animation_id != "none":
            path = os.path.join(
                task.get_temp_path(),
                "animation_{}_{}.nc".format(str(geo_chunk_id),
                                            str(base_index + time_index)))
            if task.animated_product.animation_id == "scene":
                #need to clear out all the metadata..
                clear_attrs(data)
                #can't reindex on time - weird?
                export_xarray_to_netcdf(data.isel(time=0).drop('time'), path)
            elif task.animated_product.animation_id == "cumulative":
                export_xarray_to_netcdf(iteration_data, path)

        task.scenes_processed = F('scenes_processed') + 1
        # Avoid overwriting the task's status if it is cancelled.
        task.save(update_fields=['scenes_processed'])

    if iteration_data is None:
        return None
    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    export_xarray_to_netcdf(iteration_data, path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #13
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    geographic_chunk=None,
                    num_scn_per_chk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        num_scn_per_chk: A dictionary of the number of scenes per chunk for the baseline
                         and analysis extents. Used to determine task progress.
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = str(geo_chunk_id)
    task = SpectralAnomalyTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    # For both the baseline and analysis time ranges for this
    # geographic chunk, load, calculate the spectral index, composite,
    # and filter the data according to user-supplied parameters -
    # recording where the data was out of the filter's range so we can
    # create the output product (an image).
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    spectral_index = task.query_type.result_id
    composites = {}
    composites_out_of_range = {}
    no_data_value = task.satellite.no_data_value
    for composite_name in ['baseline', 'analysis']:
        if check_cancel_task(self, task): return

        # Use the corresponding time range for the baseline and analysis data.
        updated_params['time'] = \
            updated_params['baseline_time' if composite_name == 'baseline' else 'analysis_time']
        time_column_data = dc.get_dataset_by_extent(**updated_params)
        # If this geographic chunk is outside the data extents, return None.
        if len(time_column_data.dims) == 0: return None

        # Obtain the clean mask for the satellite.
        time_column_clean_mask = task.satellite.get_clean_mask_func()(
            time_column_data)
        measurements_list = task.satellite.measurements.replace(" ",
                                                                "").split(",")
        # Obtain the mask for valid Landsat values.
        time_column_invalid_mask = landsat_clean_mask_invalid(\
            time_column_data, platform=task.satellite.platform,
            collection=task.satellite.collection, level=task.satellite.level).values
        # Also exclude data points with the no_data value.
        no_data_mask = time_column_data[
            measurements_list[0]].values != no_data_value
        # Combine the clean masks.
        time_column_clean_mask = time_column_clean_mask | time_column_invalid_mask | no_data_mask

        # Obtain the composite.
        composite = task.get_processing_method()(
            time_column_data,
            clean_mask=time_column_clean_mask,
            no_data=task.satellite.no_data_value)
        # Obtain the mask for valid Landsat values.
        composite_invalid_mask = landsat_clean_mask_invalid(\
            composite, platform=task.satellite.platform,
            collection=task.satellite.collection, level=task.satellite.level).values
        # Also exclude data points with the no_data value via the compositing mask.
        composite_no_data_mask = composite[
            measurements_list[0]].values != no_data_value
        composite_clean_mask = composite_invalid_mask | composite_no_data_mask

        # Compute the spectral index for the composite.
        spec_ind_params = dict()
        if spectral_index == 'fractional_cover':
            spec_ind_params = dict(clean_mask=composite_clean_mask,
                                   no_data=no_data_value)
        spec_ind_result = spectral_indices_function_map[spectral_index](
            composite, **spec_ind_params)
        if spectral_index in ['ndvi', 'ndbi', 'ndwi', 'evi']:
            composite[spectral_index] = spec_ind_result
        else:  # Fractional Cover
            composite = xr.merge([composite, spec_ind_result])
            # Fractional Cover is supposed to have a range of [0, 100], with its bands -
            # 'bs', 'pv', and 'npv' - summing to 100. However, the function we use
            # can have the sum of those bands as high as 106.
            # frac_cov_min, frac_cov_max = spectral_indices_range_map[spectral_index]
            frac_cov_min, frac_cov_max = 0, 106
            for band in ['bs', 'pv', 'npv']:
                composite[band].values = \
                    np.interp(composite[band].values, (frac_cov_min, frac_cov_max),
                              spectral_indices_range_map[spectral_index])

        composites[composite_name] = composite

        # Determine where the composite is out of range.
        # We rename the resulting xarray.DataArray because calling to_netcdf()
        # on it at the end of this function will save it as a Dataset
        # with one data variable with the same name as the DataArray.
        if spectral_index in ['ndvi', 'ndbi', 'ndwi', 'evi']:
            composites_out_of_range[composite_name] = \
                xr_or(composite[spectral_index] < task.composite_threshold_min,
                      task.composite_threshold_max < composite[spectral_index]).rename(spectral_index)
        else:  # Fractional Cover
            # For fractional cover, a composite pixel is out of range if any of its
            # fractional cover bands are out of range.
            composites_out_of_range[composite_name] = xr_or(
                xr_or(
                    xr_or(composite['bs'] < task.composite_threshold_min,
                          task.composite_threshold_max < composite['bs']),
                    xr_or(composite['pv'] < task.composite_threshold_min,
                          task.composite_threshold_max < composite['pv'])),
                xr_or(composite['npv'] < task.composite_threshold_min,
                      task.composite_threshold_max <
                      composite['npv'])).rename(spectral_index)

        # Update the metadata with the current data (baseline or analysis).
        metadata = task.metadata_from_dataset(metadata, time_column_data,
                                              time_column_clean_mask,
                                              parameters)
        # Record task progress (baseline or analysis composite data obtained).
        task.scenes_processed = F(
            'scenes_processed') + num_scn_per_chk[composite_name]
        task.save(update_fields=['scenes_processed'])
    dc.close()

    if check_cancel_task(self, task): return
    # Create a difference composite.
    diff_composite = composites['analysis'] - composites['baseline']
    # Find where either the baseline or analysis composite was out of range for a pixel.
    composite_out_of_range = xr_or(*composites_out_of_range.values())
    # Find where either the baseline or analysis composite was no_data.
    if spectral_index in ['ndvi', 'ndbi', 'ndwi', 'evi']:
        composite_no_data = xr_or(
            composites['baseline'][measurements_list[0]] == no_data_value,
            composites['analysis'][measurements_list[0]] == no_data_value)
        if spectral_index == 'evi':  # EVI returns no_data for values outside [-1,1].
            composite_no_data = xr_or(
                composite_no_data,
                xr_or(composites['baseline'][spectral_index] == no_data_value,
                      composites['analysis'][spectral_index] == no_data_value))
    else:  # Fractional Cover
        composite_no_data = xr_or(
            xr_or(
                xr_or(composites['baseline']['bs'] == no_data_value,
                      composites['baseline']['pv'] == no_data_value),
                composites['baseline']['npv'] == no_data_value),
            xr_or(
                xr_or(composites['baseline']['bs'] == no_data_value,
                      composites['baseline']['pv'] == no_data_value),
                composites['baseline']['npv'] == no_data_value))
    composite_no_data = composite_no_data.rename(spectral_index)

    # Drop unneeded data variables.
    diff_composite = diff_composite.drop(measurements_list)

    if check_cancel_task(self, task): return

    composite_path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    export_xarray_to_netcdf(diff_composite, composite_path)
    composite_out_of_range_path = os.path.join(task.get_temp_path(),
                                               chunk_id + "_out_of_range.nc")
    logger.info("composite_out_of_range:" + str(composite_out_of_range))
    export_xarray_to_netcdf(composite_out_of_range,
                            composite_out_of_range_path)
    composite_no_data_path = os.path.join(task.get_temp_path(),
                                          chunk_id + "_no_data.nc")
    export_xarray_to_netcdf(composite_no_data, composite_no_data_path)
    return composite_path, composite_out_of_range_path, composite_no_data_path, \
           metadata, {'geo_chunk_id': geo_chunk_id}
Example #14
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = SpectralIndicesTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    times = list(
        map(_get_datetime_range_containing, time_chunk) if task.get_iterative(
        ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    iteration_data = None
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        data = dc.get_dataset_by_extent(**updated_params)

        if check_cancel_task(self, task): return

        if data is None:
            logger.info("Empty chunk.")
            continue
        if 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = task.satellite.get_clean_mask_func()(data)
        add_timestamp_data_to_xr(data)

        metadata = task.metadata_from_dataset(metadata, data, clear_mask,
                                              updated_params)

        iteration_data = task.get_processing_method()(
            data,
            clean_mask=clear_mask,
            intermediate_product=iteration_data,
            no_data=task.satellite.no_data_value,
            reverse_time=task.get_reverse_time())

        if check_cancel_task(self, task): return

        task.scenes_processed = F('scenes_processed') + 1
        task.save(update_fields=['scenes_processed'])
    if iteration_data is None:
        return None
    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    export_xarray_to_netcdf(iteration_data, path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #15
0
def recombine_time_chunks(self, chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = WaterDetectionTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None
    total_chunks = sorted(chunks, key=lambda x: x[0])
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']

    def combine_intermediates(dataset, dataset_intermediate):
        """
        functions used to combine time sliced data after being combined geographically.
        This compounds the results of the time slice and recomputes the normalized data.
        """
        dataset_intermediate['total_data'] += dataset.total_data
        dataset_intermediate['total_clean'] += dataset.total_clean
        dataset_intermediate['normalized_data'] = dataset_intermediate[
            'total_data'] / dataset_intermediate['total_clean']

    def generate_animation(index, combined_data):
        base_index = (task.get_chunk_size()['time'] if
                      task.get_chunk_size()['time'] is not None else 1) * index
        for index in range((task.get_chunk_size()['time'] if
                            task.get_chunk_size()['time'] is not None else 1)):
            path = os.path.join(task.get_temp_path(),
                                "animation_{}.nc".format(base_index + index))
            if os.path.exists(path):
                animated_data = xr.open_dataset(path)
                if task.animated_product.animation_id != "scene" and combined_data:
                    combine_intermediates(combined_data, animated_data)
                path = os.path.join(
                    task.get_temp_path(),
                    "animation_{}.png".format(base_index + index))

                write_single_band_png_from_xr(
                    path,
                    animated_data,
                    task.animated_product.data_variable,
                    color_scale=task.color_scales[
                        task.animated_product.data_variable],
                    fill_color=task.query_type.fill,
                    interpolate=False,
                    no_data=task.satellite.no_data_value)

    metadata = {}
    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0])
        if combined_data is None:
            if task.animated_product.animation_id != "none":
                generate_animation(index, combined_data)
            combined_data = data
            continue
        combine_intermediates(data, combined_data)
        # if we're animating, combine it all and save to disk.
        if task.animated_product.animation_id != "none":
            generate_animation(index, combined_data)

    path = os.path.join(task.get_temp_path(),
                        "recombined_time_{}.nc".format(geo_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #16
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = FractionalCoverTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    iteration_data = None
    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    times = list(
        map(_get_datetime_range_containing, time_chunk) if task.get_iterative(
        ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    #updated_params.update({'products': parameters['']})
    iteration_data = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()
                  ['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        data = dc.get_stacked_datasets_by_extent(**updated_params)
        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = create_cfmask_clean_mask(
            data.cf_mask) if 'cf_mask' in data else create_bit_mask(
                data.pixel_qa, [1, 2])
        add_timestamp_data_to_xr(data)

        metadata = task.metadata_from_dataset(metadata, data, clear_mask,
                                              updated_params)

        iteration_data = task.get_processing_method()(
            data, clean_mask=clear_mask, intermediate_product=iteration_data)

        task.scenes_processed = F('scenes_processed') + 1
        task.save()

    if iteration_data is None:
        return None

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    iteration_data.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #17
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Computes a single SLIP baseline comparison - returns a slip mask and mosaic.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = SlipTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1))

    time_range = _get_datetime_range_containing(time_chunk[0], time_chunk[-1])

    dc = DataAccessApi(config=task.config_path)
    updated_params = {**parameters}
    updated_params.update(geographic_chunk)
    updated_params.update({'time': time_range})
    data = dc.get_dataset_by_extent(**updated_params)

    #grab dem data as well
    dem_parameters = {**updated_params}
    dem_parameters.update({'product': 'terra_aster_gdm_' + task.area_id, 'platform': 'TERRA'})
    dem_parameters.pop('time')
    dem_parameters.pop('measurements')
    dem_data = dc.get_dataset_by_extent(**dem_parameters)

    if 'time' not in data or 'time' not in dem_data:
        return None

    #target data is most recent, with the baseline being everything else.
    target_data = xr.concat([data.isel(time=-1)], 'time')
    baseline_data = data.isel(time=slice(None, -1))

    target_clear_mask = task.satellite.get_clean_mask_func()(target_data)
    baseline_clear_mask = task.satellite.get_clean_mask_func()(baseline_data)
    combined_baseline = task.get_processing_method()(baseline_data,
                                                     clean_mask=baseline_clear_mask,
                                                     no_data=task.satellite.no_data_value,
                                                     reverse_time=task.get_reverse_time())

    target_data = create_mosaic(
        target_data,
        clean_mask=target_clear_mask,
        no_data=task.satellite.no_data_value,
        reverse_time=task.get_reverse_time())

    slip_data = compute_slip(combined_baseline, target_data, dem_data, no_data=task.satellite.no_data_value)
    target_data['slip'] = slip_data

    metadata = task.metadata_from_dataset(
        metadata, target_data, target_clear_mask, updated_params, time=data.time.values.astype('M8[ms]').tolist()[-1])

    task.scenes_processed = F('scenes_processed') + 1
    task.save()

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    clear_attrs(target_data)
    target_data.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Example #18
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = AppNameTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    iteration_data = None
    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    times = list(
        map(_get_datetime_range_containing, time_chunk) if task.get_iterative(
        ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    #updated_params.update({'products': parameters['']})
    iteration_data = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()
                  ['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        # TODO: If this is not a multisensory app replace get_stacked_datasets_by_extent with get_dataset_by_extent
        data = dc.get_stacked_datasets_by_extent(**updated_params)
        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        # TODO: Replace anything here with your processing - do you need to create additional masks? Apply bandmaths? etc.
        clear_mask = create_cfmask_clean_mask(
            data.cf_mask) if 'cf_mask' in data else create_bit_mask(
                data.pixel_qa, [1, 2])
        add_timestamp_data_to_xr(data)

        metadata = task.metadata_from_dataset(metadata, data, clear_mask,
                                              updated_params)

        # TODO: Make sure you're producing everything required for your algorithm.
        iteration_data = task.get_processing_method()(
            data, clean_mask=clear_mask, intermediate_product=iteration_data)

        # TODO: If there is no animation you can remove this block. Otherwise, save off the data that you need.
        if task.animated_product.animation_id != "none":
            path = os.path.join(
                task.get_temp_path(),
                "animation_{}_{}.nc".format(str(geo_chunk_id),
                                            str(base_index + time_index)))
            if task.animated_product.animation_id == "scene":
                #need to clear out all the metadata..
                clear_attrs(data)
                #can't reindex on time - weird?
                data.isel(time=0).drop('time').to_netcdf(path)
            elif task.animated_product.animation_id == "cumulative":
                iteration_data.to_netcdf(path)

        task.scenes_processed = F('scenes_processed') + 1
        task.save()

    if iteration_data is None:
        return None

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    iteration_data.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #19
0
def recombine_geographic_chunks(self, chunks, task_id=None):
    """Recombine processed data over the geographic indices

    For each geographic chunk process spawned by the main task, open the resulting dataset
    and combine it into a single dataset. Combine metadata as well, writing to disk.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}
        num_scn_per_chk: The number of scenes per chunk. Used to determine task progress.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = CustomMosaicToolTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    total_chunks = [chunks] if not isinstance(chunks, list) else chunks
    total_chunks = [chunk for chunk in total_chunks if chunk is not None]
    if len(total_chunks) == 0:
        return None
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']

    metadata = {}
    chunk_data = []
    for index, chunk in enumerate(total_chunks):
        metadata = task.combine_metadata(metadata, chunk[1])
        chunk_data.append(xr.open_dataset(chunk[0]))
    combined_data = combine_geographic_chunks(chunk_data)

    # if we're animating, combine it all and save to disk.
    if task.animated_product.animation_id != "none":
        base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()
                      ['time'] is not None else 1) * time_chunk_id
        for index in range((task.get_chunk_size()['time'] if
                            task.get_chunk_size()['time'] is not None else 1)):
            animated_data = []
            for chunk in total_chunks:
                geo_chunk_index = chunk[2]['geo_chunk_id']
                # if we're animating, combine it all and save to disk.
                path = os.path.join(
                    task.get_temp_path(),
                    "animation_{}_{}.nc".format(str(geo_chunk_index),
                                                str(base_index + index)))
                if os.path.exists(path):
                    animated_data.append(xr.open_dataset(path))
            path = os.path.join(task.get_temp_path(),
                                "animation_{}.nc".format(base_index + index))
            if len(animated_data) > 0:
                export_xarray_to_netcdf(
                    combine_geographic_chunks(animated_data), path)

    path = os.path.join(task.get_temp_path(),
                        "recombined_geo_{}.nc".format(time_chunk_id))
    export_xarray_to_netcdf(combined_data, path)
    logger.info("Done combining geographic chunks for time: " +
                str(time_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #20
0
def recombine_time_chunks(chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids

    """
    logger.info("RECOMBINE_TIME")
    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    total_chunks = sorted(chunks, key=lambda x: x[0]) if isinstance(
        chunks, list) else [chunks]
    task = AppNameTask.objects.get(pk=task_id)
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    #TODO: If there is no animation, remove this block. Otherwise, compute the data needed to create each frame.
    def generate_animation(index, combined_data):
        base_index = (task.get_chunk_size()['time'] if
                      task.get_chunk_size()['time'] is not None else 1) * index
        for index in range((task.get_chunk_size()['time'] if
                            task.get_chunk_size()['time'] is not None else 1)):
            path = os.path.join(task.get_temp_path(),
                                "animation_{}.nc".format(base_index + index))
            if os.path.exists(path):
                animated_data = xr.open_dataset(path, autoclose=True)
                if task.animated_product.animation_id == "cumulative":
                    animated_data = xr.concat([animated_data], 'time')
                    animated_data['time'] = [0]
                    clear_mask = create_cfmask_clean_mask(
                        animated_data.cf_mask
                    ) if 'cf_mask' in animated_data else create_bit_mask(
                        animated_data.pixel_qa, [1, 2])
                    animated_data = task.get_processing_method()(
                        animated_data,
                        clean_mask=clear_mask,
                        intermediate_product=combined_data)
                path = os.path.join(
                    task.get_temp_path(),
                    "animation_{}.png".format(base_index + index))
                write_png_from_xr(path,
                                  animated_data,
                                  bands=[
                                      task.query_type.red,
                                      task.query_type.green,
                                      task.query_type.blue
                                  ],
                                  scale=(0, 4096))

    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0], autoclose=True)
        if combined_data is None:
            # TODO: If there is no animation, remove this.
            if task.animated_product.animation_id != "none":
                generate_animation(index, combined_data)
            combined_data = data
            continue
        #give time an indice to keep mosaicking from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = create_cfmask_clean_mask(
            data.cf_mask) if 'cf_mask' in data else create_bit_mask(
                data.pixel_qa, [1, 2])
        combined_data = task.get_processing_method()(
            data, clean_mask=clear_mask, intermediate_product=combined_data)
        # if we're animating, combine it all and save to disk.
        # TODO: If there is no animation, remove this.
        if task.animated_product.animation_id != "none":
            generate_animation(index, combined_data)

    path = os.path.join(task.get_temp_path(),
                        "recombined_time_{}.nc".format(geo_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #21
0
def recombine_time_chunks(self, chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = CustomMosaicToolTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    # sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None
    total_chunks = sorted(chunks, key=lambda x: x[0])
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    def generate_animation(index, combined_data):
        base_index = (task.get_chunk_size()['time'] if
                      task.get_chunk_size()['time'] is not None else 1) * index
        for index in range((task.get_chunk_size()['time'] if
                            task.get_chunk_size()['time'] is not None else 1)):
            path = os.path.join(task.get_temp_path(),
                                "animation_{}.nc".format(base_index + index))
            if os.path.exists(path):
                animated_data = xr.open_dataset(path)
                if task.animated_product.animation_id == "cumulative":
                    animated_data = xr.concat([animated_data], 'time')
                    animated_data['time'] = [0]
                    clear_mask = task.satellite.get_clean_mask_func()(
                        animated_data)
                    animated_data = task.get_processing_method()(
                        animated_data,
                        clean_mask=clear_mask,
                        intermediate_product=combined_data,
                        no_data=task.satellite.no_data_value)
                path = os.path.join(
                    task.get_temp_path(),
                    "animation_{}.png".format(base_index + index))
                write_png_from_xr(path,
                                  animated_data,
                                  bands=[
                                      task.query_type.red,
                                      task.query_type.green,
                                      task.query_type.blue
                                  ],
                                  scale=task.satellite.get_scale(),
                                  no_data=task.satellite.no_data_value)

    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0])
        if combined_data is None:
            if task.animated_product.animation_id != "none":
                generate_animation(index, combined_data)
            combined_data = data
            continue
        #give time an index to keep compositing from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = task.satellite.get_clean_mask_func()(data)
        combined_data = task.get_processing_method()(
            data,
            clean_mask=clear_mask,
            intermediate_product=combined_data,
            no_data=task.satellite.no_data_value)
        if check_cancel_task(self, task): return
        # if we're animating, combine it all and save to disk.
        if task.animated_product.animation_id != "none":
            generate_animation(index, combined_data)

    path = os.path.join(task.get_temp_path(),
                        "recombined_time_{}.nc".format(geo_chunk_id))
    export_xarray_to_netcdf(combined_data, path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #22
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = TsmTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1))

    times = list(
        map(_get_datetime_range_containing, time_chunk)
        if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    #updated_params.update({'products': parameters['']})
    water_analysis = None
    tsm_analysis = None
    combined_data = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        data = dc.get_stacked_datasets_by_extent(**updated_params)

        if check_cancel_task(self, task): return

        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = task.satellite.get_clean_mask_func()(data)

        wofs_data = task.get_processing_method()(data,
                                                 clean_mask=clear_mask,
                                                 enforce_float64=True,
                                                 no_data=task.satellite.no_data_value)
        water_analysis = perform_timeseries_analysis(
            wofs_data, 'wofs', intermediate_product=water_analysis, no_data=task.satellite.no_data_value)

        clear_mask[(data.swir2.values > 100) | (wofs_data.wofs.values == 0)] = False
        tsm_data = tsm(data, clean_mask=clear_mask, no_data=task.satellite.no_data_value)
        tsm_analysis = perform_timeseries_analysis(
            tsm_data, 'tsm', intermediate_product=tsm_analysis, no_data=task.satellite.no_data_value)

        if check_cancel_task(self, task): return

        combined_data = tsm_analysis
        combined_data['wofs'] = water_analysis.total_data
        combined_data['wofs_total_clean'] = water_analysis.total_clean

        metadata = task.metadata_from_dataset(metadata, tsm_data, clear_mask, updated_params)
        if task.animated_product.animation_id != "none":
            path = os.path.join(task.get_temp_path(),
                                "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index)))
            animated_data = tsm_data.isel(
                time=0, drop=True) if task.animated_product.animation_id == "scene" else combined_data
            animated_data.to_netcdf(path)

        task.scenes_processed = F('scenes_processed') + 1
        task.save(update_fields=['scenes_processed'])
    if combined_data is None:
        return None
    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    combined_data.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Example #23
0
def recombine_time_chunks(self, chunks, task_id=None, num_scn_per_chk=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}
        num_scn_per_chk: The number of scenes per chunk. Used to determine task progress.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = FractionalCoverTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None
    total_chunks = sorted(chunks, key=lambda x: x[0])
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']

    metadata = {}
    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0])
        if combined_data is None:
            combined_data = data
            task.scenes_processed = F('scenes_processed') + num_scn_per_chk
            task.save(update_fields=['scenes_processed'])
            continue
        #give time an indice to keep mosaicking from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = task.satellite.get_clean_mask_func()(data)

        combined_data = task.get_processing_method()(
            data,
            clean_mask=clear_mask,
            intermediate_product=combined_data,
            no_data=task.satellite.no_data_value,
            reverse_time=task.get_reverse_time())
        if check_cancel_task(self, task): return
        task.scenes_processed = F('scenes_processed') + num_scn_per_chk
        task.save(update_fields=['scenes_processed'])
    if combined_data is None:
        return None

    path = os.path.join(task.get_temp_path(),
                        "recombined_time_{}.nc".format(geo_chunk_id))
    export_xarray_to_netcdf(combined_data, path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Example #24
0
def create_output_products(self, data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}
    """
    task = TsmTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    full_metadata = data[1]
    dataset = xr.open_dataset(data[0]).astype('float64')
    dataset['variability'] = dataset['max'] - dataset['normalized_data']
    dataset['wofs'] = dataset.wofs / dataset.wofs_total_clean
    nan_to_num(dataset, 0)
    dataset_masked = mask_water_quality(dataset, dataset.wofs)

    task.result_path = os.path.join(task.get_result_path(), "tsm.png")
    task.clear_observations_path = os.path.join(task.get_result_path(), "clear_observations.png")
    task.water_percentage_path = os.path.join(task.get_result_path(), "water_percentage.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(), "data_netcdf.nc")
    task.animation_path = os.path.join(task.get_result_path(),
                                       "animation.gif") if task.animated_product.animation_id != 'none' else ""
    task.final_metadata_from_dataset(dataset_masked)
    task.metadata_from_dict(full_metadata)

    bands = [task.query_type.data_variable, 'total_clean', 'wofs']
    band_paths = [task.result_path, task.clear_observations_path, task.water_percentage_path]

    dataset_masked.to_netcdf(task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path, dataset_masked, bands=bands, no_data=task.satellite.no_data_value)

    for band, band_path in zip(bands, band_paths):
        write_single_band_png_from_xr(
            band_path,
            dataset_masked,
            band,
            color_scale=task.color_scales[band],
            fill_color='black',
            interpolate=False,
            no_data=task.satellite.no_data_value)

    if task.animated_product.animation_id != "none":
        with imageio.get_writer(task.animation_path, mode='I', duration=1.0) as writer:
            valid_range = range(len(full_metadata))
            for index in valid_range:
                path = os.path.join(task.get_temp_path(), "animation_final_{}.nc".format(index))
                if os.path.exists(path):
                    png_path = os.path.join(task.get_temp_path(), "animation_{}.png".format(index))
                    animated_data = mask_water_quality(
                        xr.open_dataset(path).astype('float64'),
                        dataset.wofs) if task.animated_product.animation_id != "scene" else xr.open_dataset(
                            path)
                    write_single_band_png_from_xr(
                        png_path,
                        animated_data,
                        task.animated_product.data_variable,
                        color_scale=task.color_scales[task.animated_product.data_variable],
                        fill_color='black',
                        interpolate=False,
                        no_data=task.satellite.no_data_value)
                    image = imageio.imread(png_path)
                    writer.append_data(image)

    dates = list(map(lambda x: datetime.strptime(x, "%m/%d/%Y"), task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(
            task.plot_path,
            dates=dates,
            datasets=task._get_field_as_list('clean_pixel_percentages_per_acquisition'),
            data_labels="Clean Pixel Percentage (%)",
            titles="Clean Pixel Percentage Per Acquisition")

    logger.info("All products created.")
    task.update_bounds_from_dataset(dataset_masked)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status("OK", "All products have been generated. Your result will be loaded on the map.")
    return True
Example #25
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = WaterDetectionTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    times = list(
        map(_get_datetime_range_containing, time_chunk) if task.get_iterative(
        ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    water_analysis = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()
                  ['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        data = dc.get_stacked_datasets_by_extent(**updated_params)

        if check_cancel_task(self, task): return

        if data is None:
            logger.info("Empty chunk.")
            continue
        if 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = task.satellite.get_clean_mask_func()(data)

        # Ensure data variables have the range of Landsat 7 Collection 1 Level 2
        # since the color scales are tailored for that dataset.
        platform = task.satellite.platform
        collection = task.satellite.collection
        level = task.satellite.level
        if (platform, collection) != ('LANDSAT_7', 'c1'):
            data = \
                convert_range(data, from_platform=platform,
                            from_collection=collection, from_level=level,
                            to_platform='LANDSAT_7', to_collection='c1', to_level='l2')

        wofs_data = task.get_processing_method()(
            data, clean_mask=clear_mask, no_data=task.satellite.no_data_value)
        water_analysis = perform_timeseries_analysis(
            wofs_data,
            'wofs',
            intermediate_product=water_analysis,
            no_data=task.satellite.no_data_value)

        metadata = task.metadata_from_dataset(metadata, wofs_data,
                                              clear_mask.data, updated_params)
        if task.animated_product.animation_id != "none":
            path = os.path.join(
                task.get_temp_path(),
                "animation_{}_{}.nc".format(str(geo_chunk_id),
                                            str(base_index + time_index)))
            animated_data = wofs_data.isel(
                time=0, drop=True
            ) if task.animated_product.animation_id == "scene" else water_analysis
            export_xarray_to_netcdf(animated_data, path)

        if check_cancel_task(self, task): return

        task.scenes_processed = F('scenes_processed') + 1
        task.save(update_fields=['scenes_processed'])
    if water_analysis is None:
        return None
    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    export_xarray_to_netcdf(water_analysis, path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }