Beispiel #1
0
def perform_task_chunking(self, parameters, task_id=None):
    """Chunk parameter sets into more manageable sizes

    Uses functions provided by the task model to create a group of
    parameter sets that make up the arg.

    Args:
        parameters: parameter stream containing all kwargs to load data

    Returns:
        parameters with a list of geographic and time ranges
    """
    if parameters is None:
        return None

    task = CustomMosaicToolTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    dc = DataAccessApi(config=task.config_path)
    dates = dc.list_combined_acquisition_dates(**parameters)
    task_chunk_sizing = task.get_chunk_size()

    geographic_chunks = create_geographic_chunks(
        longitude=parameters['longitude'],
        latitude=parameters['latitude'],
        geographic_chunk_size=task_chunk_sizing['geographic'])

    time_chunks = create_time_chunks(dates,
                                     _reversed=task.get_reverse_time(),
                                     time_chunk_size=task_chunk_sizing['time'])
    logger.info("Time chunks: {}, Geo chunks: {}".format(
        len(time_chunks), len(geographic_chunks)))

    dc.close()
    if check_cancel_task(self, task): return
    task.update_status("WAIT", "Chunked parameter set.")
    return {
        'parameters': parameters,
        'geographic_chunks': geographic_chunks,
        'time_chunks': time_chunks
    }
Beispiel #2
0
def start_chunk_processing(self, chunk_details, task_id=None):
    """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks.

    The most efficient way to do this is to create a group of time chunks for each geographic chunk,
    recombine over the time index, then combine geographic last.
    If we create an animation, this needs to be reversed - e.g. group of geographic for each time,
    recombine over geographic, then recombine time last.

    The full processing pipeline is completed, then the create_output_products task is triggered, completing the task.
    """
    if chunk_details is None:
        return None

    parameters = chunk_details.get('parameters')
    geographic_chunks = chunk_details.get('geographic_chunks')
    time_chunks = chunk_details.get('time_chunks')

    task = TsmTask.objects.get(pk=task_id)

    # Get an estimate of the amount of work to be done: the number of scenes
    # to process, also considering intermediate chunks to be combined.
    num_scenes = len(geographic_chunks) * sum([len(time_chunk) for time_chunk in time_chunks])
    logger.info("num_scenes: {}".format(num_scenes))
    # recombine_geographic_chunks() scenes:
    # num_scn_per_chk * len(time_chunks) * len(geographic_chunks)
    num_scn_per_chk = round(num_scenes / (len(time_chunks) * len(geographic_chunks)))
    # Scene processing progress is tracked in processing_task() and recombine_geographic_chunks().
    task.total_scenes = 2 * num_scenes
    logger.info("task.total_scenes: {}"
                .format(task.total_scenes))
    task.scenes_processed = 0
    task.save(update_fields=['total_scenes', 'scenes_processed'])

    if check_cancel_task(self, task): return
    task.update_status("WAIT", "Starting processing.")

    logger.info("START_CHUNK_PROCESSING")

    processing_pipeline = (group([
        group([
            processing_task.s(
                task_id=task_id,
                geo_chunk_id=geo_index,
                time_chunk_id=time_index,
                geographic_chunk=geographic_chunk,
                time_chunk=time_chunk,
                **parameters) for geo_index, geographic_chunk in enumerate(geographic_chunks)
        ]) | recombine_geographic_chunks.s(task_id=task_id, num_scn_per_chk=num_scn_per_chk)
        for time_index, time_chunk in enumerate(time_chunks)
    ]) | recombine_time_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id)\
       | task_clean_up.si(task_id=task_id, task_model='TsmTask')).apply_async()

    return True
Beispiel #3
0
def validate_parameters(self, parameters, task_id=None):
    """Validate parameters generated by the parameter parsing task

    All validation should be done here - are there data restrictions?
    Combinations that aren't allowed? etc.

    Returns:
        parameter dict with all keyword args required to load data.
        -or-
        updates the task with ERROR and a message, returning None

    """
    task = CloudCoverageTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    dc = DataAccessApi(config=task.config_path)

    #validate for any number of criteria here - num acquisitions, etc.
    acquisitions = dc.list_acquisition_dates(**parameters)

    if len(acquisitions) < 1:
        task.complete = True
        task.update_status("ERROR",
                           "There are no acquistions for this parameter set.")
        return None

    if check_cancel_task(self, task): return
    task.update_status("WAIT", "Validated parameters.")

    if not dc.validate_measurements(parameters['product'],
                                    parameters['measurements']):
        task.complete = True
        task.update_status(
            "ERROR",
            "The provided Satellite model measurements aren't valid for the product. Please check the measurements listed in the {} model."
            .format(task.satellite.name))
        return None

    dc.close()
    return parameters
Beispiel #4
0
def recombine_geographic_chunks(self, chunks, task_id=None, num_scn_per_chk=None):
    """Recombine processed data over the geographic indices

    For each geographic chunk process spawned by the main task, open the resulting dataset
    and combine it into a single dataset. Combine metadata as well, writing to disk.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}
        num_scn_per_chk: The number of scenes per chunk. Used to determine task progress.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = TsmTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    total_chunks = [chunks] if not isinstance(chunks, list) else chunks
    total_chunks = [chunk for chunk in total_chunks if chunk is not None]
    if len(total_chunks) == 0:
        return None
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']

    metadata = {}
    chunk_data = []
    for index, chunk in enumerate(total_chunks):
        metadata = task.combine_metadata(metadata, chunk[1])
        chunk_data.append(xr.open_dataset(chunk[0]))
        task.scenes_processed = F('scenes_processed') + num_scn_per_chk
        task.save(update_fields=['scenes_processed'])
    combined_data = combine_geographic_chunks(chunk_data)

    if task.animated_product.animation_id != "none":
        base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id
        for index in range((task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1)):
            animated_data = []
            for chunk in total_chunks:
                geo_chunk_index = chunk[2]['geo_chunk_id']
                # if we're animating, combine it all and save to disk.
                path = os.path.join(task.get_temp_path(),
                                    "animation_{}_{}.nc".format(str(geo_chunk_index), str(base_index + index)))
                if os.path.exists(path):
                    animated_data.append(xr.open_dataset(path))
            path = os.path.join(task.get_temp_path(), "animation_{}.nc".format(base_index + index))
            if len(animated_data) > 0:
                combine_geographic_chunks(animated_data).to_netcdf(path)

    path = os.path.join(task.get_temp_path(), "recombined_geo_{}.nc".format(time_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining geographic chunks for time: " + str(time_chunk_id))
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Beispiel #5
0
def start_chunk_processing(self, chunk_details, task_id=None):
    """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks.

    The most efficient way to do this is to create a group of time chunks for each geographic chunk,
    recombine over the time index, then combine geographic last.
    If we create an animation, this needs to be reversed - e.g. group of geographic for each time,
    recombine over geographic, then recombine time last.

    The full processing pipeline is completed, then the create_output_products task is triggered, completing the task.
    """
    if chunk_details is None:
        return None

    parameters = chunk_details.get('parameters')
    geographic_chunks = chunk_details.get('geographic_chunks')
    time_chunks = chunk_details.get('time_chunks')

    task = SpectralIndicesTask.objects.get(pk=task_id)

    # Track task progress.
    num_scenes = len(geographic_chunks) * sum(
        [len(time_chunk) for time_chunk in time_chunks])
    # Scene processing progress is tracked in processing_task().
    task.total_scenes = num_scenes
    task.scenes_processed = 0
    task.save(update_fields=['total_scenes', 'scenes_processed'])

    if check_cancel_task(self, task): return
    task.update_status("WAIT", "Starting processing.")

    logger.info("START_CHUNK_PROCESSING")

    processing_pipeline = (
        group([
            group([
                processing_task.s(task_id=task_id,
                                  geo_chunk_id=geo_index,
                                  time_chunk_id=time_index,
                                  geographic_chunk=geographic_chunk,
                                  time_chunk=time_chunk,
                                  **parameters)
                for time_index, time_chunk in enumerate(time_chunks)
            ]) | recombine_time_chunks.s(task_id=task_id)
            | process_band_math.s(task_id=task_id)
            for geo_index, geographic_chunk in enumerate(geographic_chunks)
        ]) | recombine_geographic_chunks.s(task_id=task_id)
        | create_output_products.s(task_id=task_id)
        | task_clean_up.si(task_id=task_id,
                           task_model='SpectralIndicesTask')).apply_async()

    return True
Beispiel #6
0
def recombine_geographic_chunks(self, chunks, task_id=None):
    """Recombine processed data over the geographic indices

    For each geographic chunk process spawned by the main task, open the resulting dataset
    and combine it into a single dataset. Combine metadata as well, writing to disk.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = CoastalChangeTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    total_chunks = [chunks] if not isinstance(chunks, list) else chunks
    total_chunks = [chunk for chunk in total_chunks if chunk is not None]
    if len(total_chunks) == 0:
        return None
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']

    metadata = {}
    chunk_data = []
    for index, chunk in enumerate(total_chunks):
        metadata = task.combine_metadata(metadata, chunk[1])
        chunk_data.append(xr.open_dataset(chunk[0]))
    combined_data = combine_geographic_chunks(chunk_data)

    if task.animated_product.animation_id != "none":
        path = os.path.join(task.get_temp_path(),
                            "animation_{}.png".format(time_chunk_id))
        animated_data = mask_mosaic_with_coastlines(
            combined_data
        ) if task.animated_product.animation_id == "coastline_change" else mask_mosaic_with_coastal_change(
            combined_data)
        write_png_from_xr(path,
                          animated_data,
                          bands=['red', 'green', 'blue'],
                          scale=task.satellite.get_scale(),
                          no_data=task.satellite.no_data_value)

    path = os.path.join(task.get_temp_path(),
                        "recombined_geo_{}.nc".format(time_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining geographic chunks for time: " +
                str(time_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Beispiel #7
0
def create_output_products(self, data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}
    """
    task = FractionalCoverTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    full_metadata = data[1]
    dataset = xr.open_dataset(data[0])

    task.result_path = os.path.join(task.get_result_path(), "band_math.png")
    task.mosaic_path = os.path.join(task.get_result_path(), "png_mosaic.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(), "data_netcdf.nc")
    task.final_metadata_from_dataset(dataset)
    task.metadata_from_dict(full_metadata)

    bands = task.satellite.get_measurements() + ['pv', 'npv', 'bs']

    export_xarray_to_netcdf(dataset, task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path, dataset.astype('int32'), bands=bands, no_data=task.satellite.no_data_value)
    write_png_from_xr(
        task.mosaic_path,
        dataset,
        bands=['red', 'green', 'blue'],
        scale=task.satellite.get_scale(),
        no_data=task.satellite.no_data_value)
    write_png_from_xr(task.result_path, dataset, bands=['bs', 'pv', 'npv'])

    dates = list(map(lambda x: datetime.strptime(x, "%m/%d/%Y"), task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(
            task.plot_path,
            dates=dates,
            datasets=task._get_field_as_list('clean_pixel_percentages_per_acquisition'),
            data_labels="Clean Pixel Percentage (%)",
            titles="Clean Pixel Percentage Per Acquisition")

    logger.info("All products created.")
    # task.update_bounds_from_dataset(dataset)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status("OK", "All products have been generated. Your result will be loaded on the map.")
    return True
Beispiel #8
0
def recombine_time_chunks(self, chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = UrbanizationTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None
    total_chunks = sorted(chunks, key=lambda x: x[0])
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']

    metadata = {}
    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0])
        if combined_data is None:
            combined_data = data
            continue
        #give time an indice to keep mosaicking from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = task.satellite.get_clean_mask_func()(data)
        combined_data = task.get_processing_method()(data,
                                                     clean_mask=clear_mask,
                                                     intermediate_product=combined_data,
                                                     no_data=task.satellite.no_data_value,
                                                     reverse_time=task.get_reverse_time())
    if combined_data is None:
        return None

    path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id))
    export_xarray_to_netcdf(combined_data, path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Beispiel #9
0
def start_chunk_processing(self, chunk_details, task_id=None):
    """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks.

    The most efficient way to do this is to create a group of time chunks for each geographic chunk,
    recombine over the time index, then combine geographic last.
    If we create an animation, this needs to be reversed - e.g. group of geographic for each time,
    recombine over geographic, then recombine time last.

    The full processing pipeline is completed, then the create_output_products task is triggered, completing the task.
    """
    if chunk_details is None:
        return None

    parameters = chunk_details.get('parameters')
    geographic_chunks = chunk_details.get('geographic_chunks')
    time_chunks = chunk_details.get('time_chunks')

    task = CoastalChangeTask.objects.get(pk=task_id)

    # This calculation does not account for time chunking because this app
    # does not support time chunking.
    num_times_fst_lst_yrs = len(time_chunks[0][0]) + len(time_chunks[0][1])
    task.total_scenes = len(geographic_chunks) * len(
        time_chunks) * num_times_fst_lst_yrs
    task.scenes_processed = 0
    task.save()

    if check_cancel_task(self, task): return
    task.update_status("WAIT", "Starting processing.")

    logger.info("START_CHUNK_PROCESSING")

    processing_pipeline = (group([
        group([
            processing_task.s(
                task_id=task_id,
                geo_chunk_id=geo_index,
                time_chunk_id=time_index,
                geographic_chunk=geographic_chunk,
                time_chunk=time_chunk,
                **parameters) for geo_index, geographic_chunk in enumerate(geographic_chunks)
        ]) | recombine_geographic_chunks.s(task_id=task_id) for time_index, time_chunk in enumerate(time_chunks)
    ]) | recombine_time_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id)\
       | task_clean_up.si(task_id=task_id, task_model='CoastalChangeTask')).apply_async()

    return True
Beispiel #10
0
def start_chunk_processing(self, chunk_details, task_id=None):
    """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks.

    The most efficient way to do this is to create a group of time chunks for each geographic chunk,
    recombine over the time index, then combine geographic last.
    If we create an animation, this needs to be reversed - e.g. group of geographic for each time,
    recombine over geographic, then recombine time last.

    The full processing pipeline is completed, then the create_output_products task is triggered, completing the task.
    """
    if chunk_details is None:
        return None

    parameters = chunk_details.get('parameters')
    geographic_chunks = chunk_details.get('geographic_chunks')
    time_chunks = chunk_details.get('time_chunks')

    assert len(
        time_chunks
    ) == 1, "There should only be one time chunk for NDVI anomaly operations."

    task = NdviAnomalyTask.objects.get(pk=task_id)
    task.total_scenes = len(geographic_chunks) * len(time_chunks) * (
        task.get_chunk_size()['time']
        if task.get_chunk_size()['time'] is not None else len(time_chunks[0]))
    task.scenes_processed = 0
    if check_cancel_task(self, task): return
    task.update_status("WAIT", "Starting processing.")

    logger.info("START_CHUNK_PROCESSING")

    processing_pipeline = (group([
        group([
            processing_task.s(
                task_id=task_id,
                geo_chunk_id=geo_index,
                time_chunk_id=time_index,
                geographic_chunk=geographic_chunk,
                time_chunk=time_chunk,
                **parameters) for time_index, time_chunk in enumerate(time_chunks)
        ]) for geo_index, geographic_chunk in enumerate(geographic_chunks)
    ]) | recombine_geographic_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id) \
       | task_clean_up.si(task_id=task_id, task_model='NdviAnomalyTask')).apply_async()

    return True
Beispiel #11
0
def recombine_geographic_chunks(self, chunks, task_id=None):
    """Recombine processed data over the geographic indices

    For each geographic chunk process spawned by the main task, open the resulting dataset
    and combine it into a single dataset. Combine metadata as well, writing to disk.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    total_chunks = [chunks] if not isinstance(chunks, list) else chunks
    total_chunks = [chunk for chunk in total_chunks if chunk is not None]
    if len(total_chunks) == 0: return None

    task = SpectralAnomalyTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    metadata = {}
    composite_chunk_data = []
    out_of_range_chunk_data = []
    no_data_chunk_data = []
    for index, chunk in enumerate(total_chunks):
        metadata = task.combine_metadata(metadata, chunk[3])
        composite_chunk_data.append(xr.open_dataset(chunk[0]))
        out_of_range_chunk_data.append(xr.open_dataset(chunk[1]))
        no_data_chunk_data.append(xr.open_dataset(chunk[2]))

    combined_composite_data = combine_geographic_chunks(composite_chunk_data)
    combined_out_of_range_data = combine_geographic_chunks(
        out_of_range_chunk_data)
    combined_no_data = combine_geographic_chunks(no_data_chunk_data)

    composite_path = os.path.join(task.get_temp_path(), "full_composite.nc")
    export_xarray_to_netcdf(combined_composite_data, composite_path)
    composite_out_of_range_path = os.path.join(
        task.get_temp_path(), "full_composite_out_of_range.nc")
    export_xarray_to_netcdf(combined_out_of_range_data,
                            composite_out_of_range_path)
    no_data_path = os.path.join(task.get_temp_path(),
                                "full_composite_no_data.nc")
    export_xarray_to_netcdf(combined_no_data, no_data_path)
    return composite_path, composite_out_of_range_path, no_data_path, metadata
Beispiel #12
0
def process_band_math(self, chunk, task_id=None):
    """Apply some band math to a chunk and return the args

    Opens the chunk dataset and applys some band math defined by _apply_band_math(dataset)
    _apply_band_math creates some product using the bands already present in the dataset and
    returns the dataarray. The data array is then appended under 'band_math', then saves the
    result to disk in the same path as the nc file already exists.
    """
    task = SpectralIndicesTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    spectral_indices_map = {
        'ndvi':
        lambda ds: (ds.nir - ds.red) / (ds.nir + ds.red),
        'evi':
        lambda ds: 2.5 * (ds.nir - ds.red) /
        (ds.nir + 6 * ds.red - 7.5 * ds.blue + 1),
        'savi':
        lambda ds: (ds.nir - ds.red) / (ds.nir + ds.red + 0.5) * (1.5),
        'nbr':
        lambda ds: (ds.nir - ds.swir2) / (ds.nir + ds.swir2),
        'nbr2':
        lambda ds: (ds.swir1 - ds.swir2) / (ds.swir1 + ds.swir2),
        'ndwi':
        lambda ds: (ds.nir - ds.swir1) / (ds.nir + ds.swir1),
        'ndbi':
        lambda ds: (ds.swir1 - ds.nir) / (ds.nir + ds.swir1),
    }

    def _apply_band_math(dataset):
        return spectral_indices_map[task.query_type.result_id](dataset)

    if chunk is None:
        return None

    dataset = xr.open_dataset(chunk[0]).load()
    dataset['band_math'] = _apply_band_math(dataset)
    #remove previous nc and write band math to disk
    os.remove(chunk[0])
    export_xarray_to_netcdf(dataset, chunk[0])
    return chunk
Beispiel #13
0
def recombine_geographic_chunks(self, chunks, task_id=None):
    """Recombine processed data over the geographic indices

    For each geographic chunk process spawned by the main task, open the resulting dataset
    and combine it into a single dataset. Combine metadata as well, writing to disk.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    logger.info("recombine_geographic_chunks() begin!")

    task = CloudCoverageTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    total_chunks = [chunks] if not isinstance(chunks, list) else chunks
    total_chunks = [chunk for chunk in total_chunks if chunk is not None]
    if len(total_chunks) == 0:
        return None
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']

    metadata = {}
    chunk_data = []
    for index, chunk in enumerate(total_chunks):
        metadata = task.combine_metadata(metadata, chunk[1])
        chunk_data.append(xr.open_dataset(chunk[0]))
    combined_data = combine_geographic_chunks(chunk_data)

    path = os.path.join(task.get_temp_path(),
                        "recombined_geo_{}.nc".format(time_chunk_id))
    export_xarray_to_netcdf(combined_data, path)
    logger.info("Done combining geographic chunks for time: " +
                str(time_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Beispiel #14
0
def recombine_time_chunks(self, chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    logger.info("RECOMBINE_TIME")

    task = CoastalChangeTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    total_chunks = sorted(chunks, key=lambda x: x[0]) if isinstance(
        chunks, list) else [chunks]
    if len(total_chunks) == 0:
        return None
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])

    # if we've computed an animation, only the last one will be needed for the next pass.
    #if there is no animation then this is fine anyways.
    path = total_chunks[-1][0]

    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Beispiel #15
0
def parse_parameters_from_task(self, task_id=None):
    """Parse out required DC parameters from the task model.

    See the DataAccessApi docstrings for more information.
    Parses out platforms, products, etc. to be used with DataAccessApi calls.

    If this is a multisensor app, platform and product should be pluralized and used
    with the get_stacked_datasets_by_extent call rather than the normal get.

    Returns:
        parameter dict with all keyword args required to load data.

    """
    task = SpectralAnomalyTask.objects.get(pk=task_id)

    parameters = {
        'platform':
        task.satellite.datacube_platform,
        'product':
        task.satellite.get_products(task.area_id)[0],
        'time': (task.time_start, task.time_end),
        'baseline_time': (task.baseline_time_start, task.baseline_time_end),
        'analysis_time': (task.analysis_time_start, task.analysis_time_end),
        'longitude': (task.longitude_min, task.longitude_max),
        'latitude': (task.latitude_min, task.latitude_max),
        'measurements':
        task.satellite.get_measurements(),
        'composite_range':
        (task.composite_threshold_min, task.composite_threshold_max),
        'change_range': (task.change_threshold_min, task.change_threshold_max),
    }

    task.execution_start = datetime.now()
    if check_cancel_task(self, task): return
    task.update_status("WAIT", "Parsed out parameters.")

    return parameters
Beispiel #16
0
def process_band_math(self, chunk, task_id=None):
    """Apply some band math to a chunk and return the args

    Opens the chunk dataset and applys some band math defined by _apply_band_math(dataset)
    _apply_band_math creates some product using the bands already present in the dataset and
    returns the dataarray. The data array is then appended under 'band_math', then saves the
    result to disk in the same path as the nc file already exists.
    """
    task = SpectralIndicesTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    def _apply_band_math(dataset):
        return spectral_indices_map[task.query_type.result_id](dataset)

    if chunk is None:
        return None

    dataset = xr.open_dataset(chunk[0]).load()
    dataset['band_math'] = _apply_band_math(dataset)
    
    #remove previous nc and write band math to disk
    os.remove(chunk[0])
    export_xarray_to_netcdf(dataset, chunk[0])
    return chunk
Beispiel #17
0
def process_band_math(self, chunk, task_id=None, num_scn_per_chk=None):
    """Apply some band math to a chunk and return the args

    Opens the chunk dataset and applys some band math defined by _apply_band_math(dataset)
    _apply_band_math creates some product using the bands already present in the dataset and
    returns the dataarray. The data array is then appended under 'band_math', then saves the
    result to disk in the same path as the nc file already exists.

    Args:
        chunk: The return from the recombine_time_chunks function - path, metadata, and {chunk ids}
        num_scn_per_chk: The number of scenes per chunk. Used to determine task progress.
    """
    task = FractionalCoverTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    def _apply_band_math(dataset):
        clear_mask = task.satellite.get_clean_mask_func()(dataset).values
        # mask out water manually. Necessary for frac. cover.
        wofs = wofs_classify(dataset, clean_mask=clear_mask, mosaic=True)
        clear_mask[wofs.wofs.values == 1] = False
        return frac_coverage_classify(\
            dataset, clean_mask=clear_mask, no_data=task.satellite.no_data_value,
            platform=task.satellite.platform, collection=task.satellite.collection
        )

    if chunk is None:
        return None

    dataset = xr.open_dataset(chunk[0]).load()
    dataset = xr.merge([dataset, _apply_band_math(dataset)])
    #remove previous nc and write band math to disk
    os.remove(chunk[0])
    export_xarray_to_netcdf(dataset, chunk[0])
    task.scenes_processed = F('scenes_processed') + num_scn_per_chk
    task.save(update_fields=['scenes_processed'])
    return chunk
Beispiel #18
0
def create_output_products(self, data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}
    """
    task = CustomMosaicToolTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    full_metadata = data[1]
    dataset = xr.open_dataset(data[0])

    task.result_path = os.path.join(task.get_result_path(), "png_mosaic.png")
    task.result_filled_path = os.path.join(task.get_result_path(),
                                           "filled_png_mosaic.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(),
                                         "data_netcdf.nc")
    task.animation_path = os.path.join(task.get_result_path(
    ), "animation.gif") if task.animated_product.animation_id != 'none' else ""
    task.final_metadata_from_dataset(dataset)
    task.metadata_from_dict(full_metadata)

    bands = task.satellite.get_measurements()
    png_bands = [
        task.query_type.red, task.query_type.green, task.query_type.blue
    ]

    export_xarray_to_netcdf(dataset, task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path,
                          dataset.astype('int32'),
                          bands=bands,
                          no_data=task.satellite.no_data_value)
    write_png_from_xr(task.result_path,
                      dataset,
                      bands=png_bands,
                      png_filled_path=task.result_filled_path,
                      fill_color=task.query_type.fill,
                      scale=task.satellite.get_scale(),
                      no_data=task.satellite.no_data_value)

    if task.animated_product.animation_id != "none":
        with imageio.get_writer(task.animation_path, mode='I',
                                duration=1.0) as writer:
            valid_range = reversed(
                range(len(full_metadata))
            ) if task.animated_product.animation_id == "scene" and task.get_reverse_time(
            ) else range(len(full_metadata))
            for index in valid_range:
                path = os.path.join(task.get_temp_path(),
                                    "animation_{}.png".format(index))
                if os.path.exists(path):
                    image = imageio.imread(path)
                    writer.append_data(image)

    dates = list(
        map(lambda x: datetime.strptime(x, "%m/%d/%Y"),
            task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(task.plot_path,
                       dates=dates,
                       datasets=task._get_field_as_list(
                           'clean_pixel_percentages_per_acquisition'),
                       data_labels="Clean Pixel Percentage (%)",
                       titles="Clean Pixel Percentage Per Acquisition")

    logger.info("All products created.")
    # task.update_bounds_from_dataset(dataset)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status(
        "OK",
        "All products have been generated. Your result will be loaded on the map."
    )
    return True
Beispiel #19
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = SpectralIndicesTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    times = list(
        map(_get_datetime_range_containing, time_chunk) if task.get_iterative(
        ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    iteration_data = None
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        data = dc.get_dataset_by_extent(**updated_params)

        if check_cancel_task(self, task): return

        if data is None:
            logger.info("Empty chunk.")
            continue
        if 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = task.satellite.get_clean_mask_func()(data)
        add_timestamp_data_to_xr(data)

        metadata = task.metadata_from_dataset(metadata, data, clear_mask,
                                              updated_params)

        iteration_data = task.get_processing_method()(
            data,
            clean_mask=clear_mask,
            intermediate_product=iteration_data,
            no_data=task.satellite.no_data_value,
            reverse_time=task.get_reverse_time())

        if check_cancel_task(self, task): return

        task.scenes_processed = F('scenes_processed') + 1
        task.save(update_fields=['scenes_processed'])
    if iteration_data is None:
        return None
    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    export_xarray_to_netcdf(iteration_data, path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Beispiel #20
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Computes a single SLIP baseline comparison - returns a slip mask and mosaic.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = SlipTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    time_range = _get_datetime_range_containing(time_chunk[0], time_chunk[-1])
    dc = DataAccessApi(config=task.config_path)
    updated_params = {**parameters}
    updated_params.update(geographic_chunk)
    updated_params.update({'time': time_range})
    data = dc.get_dataset_by_extent(**updated_params)

    #grab dem data as well
    dem_parameters = {**updated_params}
    dem_parameters.update({'product': 'terra_aster_gdm_' + task.area_id, 'platform': 'TERRA'})
    dem_parameters.pop('time')
    dem_parameters.pop('measurements')
    dem_data = dc.get_dataset_by_extent(**dem_parameters)

    if 'time' not in data or 'time' not in dem_data:
        return None

    #target data is most recent, with the baseline being everything else.
    target_data = xr.concat([data.isel(time=-1)], 'time')
    baseline_data = data.isel(time=slice(None, -1))

    target_clear_mask = task.satellite.get_clean_mask_func()(target_data)
    baseline_clear_mask = task.satellite.get_clean_mask_func()(baseline_data)
    combined_baseline = task.get_processing_method()(baseline_data,
                                                     clean_mask=baseline_clear_mask,
                                                     no_data=task.satellite.no_data_value,
                                                     reverse_time=task.get_reverse_time())

    if check_cancel_task(self, task): return

    target_data = create_mosaic(
        target_data,
        clean_mask=target_clear_mask,
        no_data=task.satellite.no_data_value,
        reverse_time=task.get_reverse_time())

    if check_cancel_task(self, task): return

    slip_data = compute_slip(combined_baseline, target_data, dem_data, no_data=task.satellite.no_data_value)
    target_data['slip'] = slip_data

    metadata = task.metadata_from_dataset(
        metadata, target_data, target_clear_mask, updated_params, time=data.time.values.astype('M8[ms]').tolist()[-1])

    if check_cancel_task(self, task): return

    task.scenes_processed = F('scenes_processed') + 1
    task.save(update_fields=['scenes_processed'])

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    clear_attrs(target_data)
    export_xarray_to_netcdf(target_data, path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Beispiel #21
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = NdviAnomalyTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    base_scene_time_range = parameters['time']

    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)

    # Generate the baseline data - one time slice at a time
    full_dataset = []
    for time_index, time in enumerate(time_chunk):
        updated_params.update({'time': _get_datetime_range_containing(time)})
        data = dc.get_dataset_by_extent(**updated_params)

        if check_cancel_task(self, task): return

        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue
        full_dataset.append(data.copy(deep=True))

    # load selected scene and mosaic just in case we got two scenes (handles scene boundaries/overlapping data)
    updated_params.update({'time': base_scene_time_range})
    selected_scene = dc.get_dataset_by_extent(**updated_params)

    if check_cancel_task(self, task): return

    if len(full_dataset) == 0 or 'time' not in selected_scene:
        return None

    #concat individual slices over time, compute metadata + mosaic
    baseline_data = xr.concat(full_dataset, 'time')
    baseline_clear_mask = task.satellite.get_clean_mask_func()(baseline_data)
    metadata = task.metadata_from_dataset(metadata, baseline_data,
                                          baseline_clear_mask, parameters)

    selected_scene_clear_mask = task.satellite.get_clean_mask_func()(
        selected_scene)
    metadata = task.metadata_from_dataset(metadata, selected_scene,
                                          selected_scene_clear_mask,
                                          parameters)
    selected_scene = task.get_processing_method()(
        selected_scene,
        clean_mask=selected_scene_clear_mask,
        intermediate_product=None,
        no_data=task.satellite.no_data_value)
    # we need to re generate the clear mask using the mosaic now.
    selected_scene_clear_mask = task.satellite.get_clean_mask_func()(
        selected_scene)

    if check_cancel_task(self, task): return

    ndvi_products = compute_ndvi_anomaly(
        baseline_data,
        selected_scene,
        baseline_clear_mask=baseline_clear_mask,
        selected_scene_clear_mask=selected_scene_clear_mask,
        no_data=task.satellite.no_data_value)
    full_product = xr.merge([ndvi_products, selected_scene])

    task.scenes_processed = F('scenes_processed') + 1
    task.save(update_fields=['scenes_processed'])

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    full_product.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Beispiel #22
0
def create_output_products(self, data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}
    """
    task = SpectralIndicesTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    full_metadata = data[1]
    dataset = xr.open_dataset(data[0])

    task.result_path = os.path.join(task.get_result_path(), "band_math.png")
    task.mosaic_path = os.path.join(task.get_result_path(), "png_mosaic.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(), "data_netcdf.nc")
    task.final_metadata_from_dataset(dataset)
    task.metadata_from_dict(full_metadata)

    bands = task.satellite.get_measurements() + ['band_math']

    export_xarray_to_netcdf(dataset, task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path, dataset.astype('int32'), bands=bands, no_data=task.satellite.no_data_value)

    # Ensure data variables have the range of Landsat 7 Collection 1 Level 2
    # since the color scales are tailored for that dataset.
    platform = task.satellite.platform
    collection = task.satellite.collection
    level = task.satellite.level
    if (platform, collection) != ('LANDSAT_7', 'c1'):
        dataset = \
            convert_range(dataset, from_platform=platform, 
                        from_collection=collection, from_level=level,
                        to_platform='LANDSAT_7', to_collection='c1', to_level='l2')

    write_png_from_xr(
        task.mosaic_path,
        dataset,
        bands=['red', 'green', 'blue'],
        scale=task.satellite.get_scale(),
        no_data=task.satellite.no_data_value)
    write_single_band_png_from_xr(
        task.result_path,
        dataset,
        band='band_math',
        color_scale=task.color_scale_path.get(task.query_type.result_id),
        no_data=task.satellite.no_data_value)

    dates = list(map(lambda x: datetime.strptime(x, "%m/%d/%Y"), task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(
            task.plot_path,
            dates=dates,
            datasets=task._get_field_as_list('clean_pixel_percentages_per_acquisition'),
            data_labels="Clean Pixel Percentage (%)",
            titles="Clean Pixel Percentage Per Acquisition")

    logger.info("All products created.")
    # task.update_bounds_from_dataset(dataset)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status("OK", "All products have been generated. Your result will be loaded on the map.")
    return True
Beispiel #23
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = TsmTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1))

    times = list(
        map(_get_datetime_range_containing, time_chunk)
        if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    #updated_params.update({'products': parameters['']})
    water_analysis = None
    tsm_analysis = None
    combined_data = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        data = dc.get_stacked_datasets_by_extent(**updated_params)

        if check_cancel_task(self, task): return

        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = task.satellite.get_clean_mask_func()(data)

        wofs_data = task.get_processing_method()(data,
                                                 clean_mask=clear_mask,
                                                 enforce_float64=True,
                                                 no_data=task.satellite.no_data_value)
        water_analysis = perform_timeseries_analysis(
            wofs_data, 'wofs', intermediate_product=water_analysis, no_data=task.satellite.no_data_value)

        clear_mask[(data.swir2.values > 100) | (wofs_data.wofs.values == 0)] = False
        tsm_data = tsm(data, clean_mask=clear_mask, no_data=task.satellite.no_data_value)
        tsm_analysis = perform_timeseries_analysis(
            tsm_data, 'tsm', intermediate_product=tsm_analysis, no_data=task.satellite.no_data_value)

        if check_cancel_task(self, task): return

        combined_data = tsm_analysis
        combined_data['wofs'] = water_analysis.total_data
        combined_data['wofs_total_clean'] = water_analysis.total_clean

        metadata = task.metadata_from_dataset(metadata, tsm_data, clear_mask, updated_params)
        if task.animated_product.animation_id != "none":
            path = os.path.join(task.get_temp_path(),
                                "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index)))
            animated_data = tsm_data.isel(
                time=0, drop=True) if task.animated_product.animation_id == "scene" else combined_data
            animated_data.to_netcdf(path)

        task.scenes_processed = F('scenes_processed') + 1
        task.save(update_fields=['scenes_processed'])
    if combined_data is None:
        return None
    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    combined_data.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Beispiel #24
0
def recombine_time_chunks(self, chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = TsmTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    # sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None
    total_chunks = sorted(chunks, key=lambda x: x[0])
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    def combine_intermediates(dataset, dataset_intermediate):
        """
        functions used to combine time sliced data after being combined geographically.
        This compounds the results of the time slice and recomputes the normalized data.
        """
        # total data/clean refers to tsm
        dataset_intermediate['total_data'] += dataset.total_data
        dataset_intermediate['total_clean'] += dataset.total_clean
        dataset_intermediate['normalized_data'] = dataset_intermediate['total_data'] / dataset_intermediate[
            'total_clean']
        dataset_intermediate['min'] = xr.concat(
            [dataset_intermediate['min'], dataset['min']], dim='time').min(
                dim='time', skipna=True)
        dataset_intermediate['max'] = xr.concat(
            [dataset_intermediate['max'], dataset['max']], dim='time').max(
                dim='time', skipna=True)
        dataset_intermediate['wofs'] += dataset.wofs
        dataset_intermediate['wofs_total_clean'] += dataset.wofs_total_clean

    def generate_animation(index, combined_data):
        base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * index
        for index in range((task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1)):
            path = os.path.join(task.get_temp_path(), "animation_{}.nc".format(base_index + index))
            if os.path.exists(path):
                animated_data = xr.open_dataset(path)
                if task.animated_product.animation_id != "scene" and combined_data:
                    combine_intermediates(combined_data, animated_data)
                # need to wait until last step to mask out wofs < 0.8
                path = os.path.join(task.get_temp_path(), "animation_final_{}.nc".format(base_index + index))
                animated_data.to_netcdf(path)

    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0])
        if combined_data is None:
            if task.animated_product.animation_id != "none":
                generate_animation(index, combined_data)
            combined_data = data
            continue
        combine_intermediates(data, combined_data)
        if check_cancel_task(self, task): return
        # if we're animating, combine it all and save to disk.
        if task.animated_product.animation_id != "none":
            generate_animation(index, combined_data)

    path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Beispiel #25
0
def create_output_products(self, data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}
    """
    task = CoastalChangeTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    full_metadata = data[1]
    dataset = xr.open_dataset(data[0])

    task.result_path = os.path.join(task.get_result_path(),
                                    "coastline_change.png")
    task.result_coastal_change_path = os.path.join(task.get_result_path(),
                                                   "coastal_change.png")
    task.result_mosaic_path = os.path.join(task.get_result_path(),
                                           "mosaic.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(),
                                         "data_netcdf.nc")
    task.animation_path = os.path.join(task.get_result_path(
    ), "animation.gif") if task.animated_product.animation_id != 'none' else ""
    task.final_metadata_from_dataset(dataset)
    task.metadata_from_dict(full_metadata)

    bands = task.satellite.get_measurements() + [
        'coastal_change', 'coastline_old', 'coastline_new'
    ]
    png_bands = ['red', 'green', 'blue']

    dataset.to_netcdf(task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path,
                          dataset.astype('int32'),
                          bands=bands,
                          no_data=task.satellite.no_data_value)
    write_png_from_xr(task.result_path,
                      mask_mosaic_with_coastlines(dataset),
                      bands=png_bands,
                      scale=task.satellite.get_scale(),
                      no_data=task.satellite.no_data_value)
    write_png_from_xr(task.result_coastal_change_path,
                      mask_mosaic_with_coastal_change(dataset),
                      bands=png_bands,
                      scale=task.satellite.get_scale(),
                      no_data=task.satellite.no_data_value)
    write_png_from_xr(task.result_mosaic_path,
                      dataset,
                      bands=png_bands,
                      scale=task.satellite.get_scale(),
                      no_data=task.satellite.no_data_value)

    if task.animated_product.animation_id != "none":
        with imageio.get_writer(task.animation_path, mode='I',
                                duration=1.0) as writer:
            for index in range(task.time_end - task.time_start):
                path = os.path.join(task.get_temp_path(),
                                    "animation_{}.png".format(index))
                if os.path.exists(path):
                    image = imageio.imread(path)
                    writer.append_data(image)

    logger.info("All products created.")
    # task.update_bounds_from_dataset(dataset)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status(
        "OK",
        "All products have been generated. Your result will be loaded on the map."
    )
    return True
Beispiel #26
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = CoastalChangeTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    starting_year = _get_datetime_range_containing(*time_chunk[0])
    comparison_year = _get_datetime_range_containing(*time_chunk[1])

    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)

    def _compute_mosaic(time):
        """
        Loads data for some time range for the current geographic chunk,
        returning 3 objects - the mosaic, the task metadata, and the number of
        acquisitions that were in the retrieved data.
        """
        updated_params.update({'time': time})
        data = dc.get_dataset_by_extent(**updated_params)
        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            return None, None, None

        clear_mask = task.satellite.get_clean_mask_func()(data)
        metadata = task.metadata_from_dataset({}, data, clear_mask,
                                              updated_params)
        return task.get_processing_method()(data, clean_mask=clear_mask, no_data=task.satellite.no_data_value), \
               metadata, len(data['time'])

    if check_cancel_task(self, task): return
    old_mosaic, old_metadata, num_scenes_old = _compute_mosaic(starting_year)
    if old_mosaic is None: return None
    task.scenes_processed = F('scenes_processed') + num_scenes_old
    # Avoid overwriting the task's status if it is cancelled.
    task.save(update_fields=['scenes_processed'])

    if check_cancel_task(self, task): return
    new_mosaic, new_metadata, num_scenes_new = _compute_mosaic(comparison_year)
    if new_mosaic is None: return None
    task.scenes_processed = F('scenes_processed') + num_scenes_new
    task.save(update_fields=['scenes_processed'])

    if check_cancel_task(self, task): return

    metadata = {**old_metadata, **new_metadata}

    output_product = compute_coastal_change(
        old_mosaic, new_mosaic, no_data=task.satellite.no_data_value)

    if check_cancel_task(self, task): return

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    output_product.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Beispiel #27
0
def recombine_time_chunks(self, chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = WaterDetectionTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None
    total_chunks = sorted(chunks, key=lambda x: x[0])
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']

    def combine_intermediates(dataset, dataset_intermediate):
        """
        functions used to combine time sliced data after being combined geographically.
        This compounds the results of the time slice and recomputes the normalized data.
        """
        dataset_intermediate['total_data'] += dataset.total_data
        dataset_intermediate['total_clean'] += dataset.total_clean
        dataset_intermediate['normalized_data'] = dataset_intermediate[
            'total_data'] / dataset_intermediate['total_clean']

    def generate_animation(index, combined_data):
        base_index = (task.get_chunk_size()['time'] if
                      task.get_chunk_size()['time'] is not None else 1) * index
        for index in range((task.get_chunk_size()['time'] if
                            task.get_chunk_size()['time'] is not None else 1)):
            path = os.path.join(task.get_temp_path(),
                                "animation_{}.nc".format(base_index + index))
            if os.path.exists(path):
                animated_data = xr.open_dataset(path)
                if task.animated_product.animation_id != "scene" and combined_data:
                    combine_intermediates(combined_data, animated_data)
                path = os.path.join(
                    task.get_temp_path(),
                    "animation_{}.png".format(base_index + index))

                write_single_band_png_from_xr(
                    path,
                    animated_data,
                    task.animated_product.data_variable,
                    color_scale=task.color_scales[
                        task.animated_product.data_variable],
                    fill_color=task.query_type.fill,
                    interpolate=False,
                    no_data=task.satellite.no_data_value)

    metadata = {}
    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0])
        if combined_data is None:
            if task.animated_product.animation_id != "none":
                generate_animation(index, combined_data)
            combined_data = data
            continue
        combine_intermediates(data, combined_data)
        # if we're animating, combine it all and save to disk.
        if task.animated_product.animation_id != "none":
            generate_animation(index, combined_data)

    path = os.path.join(task.get_temp_path(),
                        "recombined_time_{}.nc".format(geo_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Beispiel #28
0
def processing_task(self,
                    task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = CustomMosaicToolTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    iteration_data = None
    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    times = list(
        map(_get_datetime_range_containing, time_chunk) if task.get_iterative(
        ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    #updated_params.update({'products': parameters['']})
    iteration_data = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()
                  ['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        data = dc.get_stacked_datasets_by_extent(**updated_params)

        if check_cancel_task(self, task): return

        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = task.satellite.get_clean_mask_func()(data)
        add_timestamp_data_to_xr(data)

        metadata = task.metadata_from_dataset(metadata, data, clear_mask,
                                              updated_params)

        iteration_data = task.get_processing_method()(
            data,
            clean_mask=clear_mask,
            intermediate_product=iteration_data,
            no_data=task.satellite.no_data_value,
            reverse_time=task.get_reverse_time())

        if check_cancel_task(self, task): return

        if task.animated_product.animation_id != "none":
            path = os.path.join(
                task.get_temp_path(),
                "animation_{}_{}.nc".format(str(geo_chunk_id),
                                            str(base_index + time_index)))
            if task.animated_product.animation_id == "scene":
                #need to clear out all the metadata..
                clear_attrs(data)
                #can't reindex on time - weird?
                export_xarray_to_netcdf(data.isel(time=0).drop('time'), path)
            elif task.animated_product.animation_id == "cumulative":
                export_xarray_to_netcdf(iteration_data, path)

        task.scenes_processed = F('scenes_processed') + 1
        # Avoid overwriting the task's status if it is cancelled.
        task.save(update_fields=['scenes_processed'])

    if iteration_data is None:
        return None
    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    export_xarray_to_netcdf(iteration_data, path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Beispiel #29
0
def create_output_products(self, data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}
    """
    task = TsmTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    full_metadata = data[1]
    dataset = xr.open_dataset(data[0]).astype('float64')
    dataset['variability'] = dataset['max'] - dataset['normalized_data']
    dataset['wofs'] = dataset.wofs / dataset.wofs_total_clean
    nan_to_num(dataset, 0)
    dataset_masked = mask_water_quality(dataset, dataset.wofs)

    task.result_path = os.path.join(task.get_result_path(), "tsm.png")
    task.clear_observations_path = os.path.join(task.get_result_path(), "clear_observations.png")
    task.water_percentage_path = os.path.join(task.get_result_path(), "water_percentage.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(), "data_netcdf.nc")
    task.animation_path = os.path.join(task.get_result_path(),
                                       "animation.gif") if task.animated_product.animation_id != 'none' else ""
    task.final_metadata_from_dataset(dataset_masked)
    task.metadata_from_dict(full_metadata)

    bands = [task.query_type.data_variable, 'total_clean', 'wofs']
    band_paths = [task.result_path, task.clear_observations_path, task.water_percentage_path]

    dataset_masked.to_netcdf(task.data_netcdf_path)
    write_geotiff_from_xr(task.data_path, dataset_masked, bands=bands, no_data=task.satellite.no_data_value)

    for band, band_path in zip(bands, band_paths):
        write_single_band_png_from_xr(
            band_path,
            dataset_masked,
            band,
            color_scale=task.color_scales[band],
            fill_color='black',
            interpolate=False,
            no_data=task.satellite.no_data_value)

    if task.animated_product.animation_id != "none":
        with imageio.get_writer(task.animation_path, mode='I', duration=1.0) as writer:
            valid_range = range(len(full_metadata))
            for index in valid_range:
                path = os.path.join(task.get_temp_path(), "animation_final_{}.nc".format(index))
                if os.path.exists(path):
                    png_path = os.path.join(task.get_temp_path(), "animation_{}.png".format(index))
                    animated_data = mask_water_quality(
                        xr.open_dataset(path).astype('float64'),
                        dataset.wofs) if task.animated_product.animation_id != "scene" else xr.open_dataset(
                            path)
                    write_single_band_png_from_xr(
                        png_path,
                        animated_data,
                        task.animated_product.data_variable,
                        color_scale=task.color_scales[task.animated_product.data_variable],
                        fill_color='black',
                        interpolate=False,
                        no_data=task.satellite.no_data_value)
                    image = imageio.imread(png_path)
                    writer.append_data(image)

    dates = list(map(lambda x: datetime.strptime(x, "%m/%d/%Y"), task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(
            task.plot_path,
            dates=dates,
            datasets=task._get_field_as_list('clean_pixel_percentages_per_acquisition'),
            data_labels="Clean Pixel Percentage (%)",
            titles="Clean Pixel Percentage Per Acquisition")

    logger.info("All products created.")
    task.update_bounds_from_dataset(dataset_masked)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status("OK", "All products have been generated. Your result will be loaded on the map.")
    return True
Beispiel #30
0
def recombine_time_chunks(self, chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """
    task = CustomMosaicToolTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    # sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None
    total_chunks = sorted(chunks, key=lambda x: x[0])
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    def generate_animation(index, combined_data):
        base_index = (task.get_chunk_size()['time'] if
                      task.get_chunk_size()['time'] is not None else 1) * index
        for index in range((task.get_chunk_size()['time'] if
                            task.get_chunk_size()['time'] is not None else 1)):
            path = os.path.join(task.get_temp_path(),
                                "animation_{}.nc".format(base_index + index))
            if os.path.exists(path):
                animated_data = xr.open_dataset(path)
                if task.animated_product.animation_id == "cumulative":
                    animated_data = xr.concat([animated_data], 'time')
                    animated_data['time'] = [0]
                    clear_mask = task.satellite.get_clean_mask_func()(
                        animated_data)
                    animated_data = task.get_processing_method()(
                        animated_data,
                        clean_mask=clear_mask,
                        intermediate_product=combined_data,
                        no_data=task.satellite.no_data_value)
                path = os.path.join(
                    task.get_temp_path(),
                    "animation_{}.png".format(base_index + index))
                write_png_from_xr(path,
                                  animated_data,
                                  bands=[
                                      task.query_type.red,
                                      task.query_type.green,
                                      task.query_type.blue
                                  ],
                                  scale=task.satellite.get_scale(),
                                  no_data=task.satellite.no_data_value)

    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0])
        if combined_data is None:
            if task.animated_product.animation_id != "none":
                generate_animation(index, combined_data)
            combined_data = data
            continue
        #give time an index to keep compositing from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = task.satellite.get_clean_mask_func()(data)
        combined_data = task.get_processing_method()(
            data,
            clean_mask=clear_mask,
            intermediate_product=combined_data,
            no_data=task.satellite.no_data_value)
        if check_cancel_task(self, task): return
        # if we're animating, combine it all and save to disk.
        if task.animated_product.animation_id != "none":
            generate_animation(index, combined_data)

    path = os.path.join(task.get_temp_path(),
                        "recombined_time_{}.nc".format(geo_chunk_id))
    export_xarray_to_netcdf(combined_data, path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }