Esempio n. 1
0
 def generate_animation(index, combined_data):
     base_index = (task.get_chunk_size()['time'] if
                   task.get_chunk_size()['time'] is not None else 1) * index
     for index in range((task.get_chunk_size()['time'] if
                         task.get_chunk_size()['time'] is not None else 1)):
         path = os.path.join(task.get_temp_path(),
                             "animation_{}.nc".format(base_index + index))
         if os.path.exists(path):
             animated_data = xr.open_dataset(path, autoclose=True)
             if task.animated_product.animation_id == "cumulative":
                 animated_data = xr.concat([animated_data], 'time')
                 animated_data['time'] = [0]
                 clear_mask = create_cfmask_clean_mask(
                     animated_data.cf_mask
                 ) if 'cf_mask' in animated_data else create_bit_mask(
                     animated_data.pixel_qa, [1, 2])
                 animated_data = task.get_processing_method()(
                     animated_data,
                     clean_mask=clear_mask,
                     intermediate_product=combined_data)
             path = os.path.join(
                 task.get_temp_path(),
                 "animation_{}.png".format(base_index + index))
             write_png_from_xr(path,
                               animated_data,
                               bands=[
                                   task.query_type.red,
                                   task.query_type.green,
                                   task.query_type.blue
                               ],
                               scale=(0, 4096))
Esempio n. 2
0
    def _apply_band_math(dataset):
        clear_mask = create_cfmask_clean_mask(dataset.cf_mask) if 'cf_mask' in dataset else create_bit_mask(
            dataset.pixel_qa, [1, 2])
        # mask out water manually. Necessary for frac. cover.
        wofs = wofs_classify(dataset, clean_mask=clear_mask, mosaic=True)
        clear_mask[wofs.wofs.values == 1] = False

        return frac_coverage_classify(dataset, clean_mask=clear_mask)
Esempio n. 3
0
def recombine_time_chunks(chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids

    """
    logger.info("RECOMBINE_TIME")
    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None

    total_chunks = sorted(chunks, key=lambda x: x[0])
    task = SlipTask.objects.get(pk=task_id)
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    combined_data = None
    combined_slip = None
    for index, chunk in enumerate(reversed(total_chunks)):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0], autoclose=True)
        if combined_data is None:
            combined_data = data.drop('slip')
            # since this is going to interact with data/mosaicking, it needs a time dim
            combined_slip = xr.concat([data.slip.copy(deep=True)], 'time')
            continue
        #give time an indice to keep mosaicking from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = create_cfmask_clean_mask(data.cf_mask) if 'cf_mask' in data else create_bit_mask(data.pixel_qa,
                                                                                                      [1, 2])
        # modify clean mask so that only slip pixels that are still zero will be used. This will show all the pixels that caused the flag.
        clear_mask[xr.concat([combined_slip], 'time').values == 1] = False
        combined_data = create_mosaic(data.drop('slip'), clean_mask=clear_mask, intermediate_product=combined_data)
        combined_slip.values[combined_slip.values == 0] = data.slip.values[combined_slip.values == 0]

    # Since we added a time dim to combined_slip, we need to remove it here.
    combined_data['slip'] = combined_slip.isel(time=0, drop=True)
    path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Esempio n. 4
0
def recombine_time_chunks(chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids

    """
    logger.info("RECOMBINE_TIME")
    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None

    total_chunks = sorted(chunks, key=lambda x: x[0])
    task = BandMathTask.objects.get(pk=task_id)
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0], autoclose=True)
        if combined_data is None:
            combined_data = data
            continue
        #give time an indice to keep mosaicking from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = create_cfmask_clean_mask(
            data.cf_mask) if 'cf_mask' in data else create_bit_mask(
                data.pixel_qa, [1, 2])
        combined_data = task.get_processing_method()(
            data, clean_mask=clear_mask, intermediate_product=combined_data)

    path = os.path.join(task.get_temp_path(),
                        "recombined_time_{}.nc".format(geo_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Esempio n. 5
0
    def _compute_mosaic(time):
        updated_params.update({'time': time})
        data = dc.get_dataset_by_extent(**updated_params)
        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            return None, None

        clear_mask = create_cfmask_clean_mask(
            data.cf_mask) if 'cf_mask' in data else create_bit_mask(
                data.pixel_qa, [1, 2])
        metadata = task.metadata_from_dataset({}, data, clear_mask,
                                              updated_params)
        return task.get_processing_method()(data,
                                            clean_mask=clear_mask), metadata
Esempio n. 6
0
def recombine_time_chunks(chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids

    """
    logger.info("RECOMBINE_TIME")
    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    total_chunks = sorted(chunks, key=lambda x: x[0])
    task = CustomMosaicToolTask.objects.get(pk=task_id)
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    def generate_animation(index, combined_data):
        base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * index
        for index in range((task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1)):
            path = os.path.join(task.get_temp_path(), "animation_{}.nc".format(base_index + index))
            if os.path.exists(path):
                animated_data = xr.open_dataset(path, autoclose=True)
                if task.animated_product.animation_id == "cumulative":
                    animated_data = xr.concat([animated_data], 'time')
                    animated_data['time'] = [0]
                    clear_mask = create_cfmask_clean_mask(
                        animated_data.cf_mask) if 'cf_mask' in animated_data else create_bit_mask(
                            animated_data.pixel_qa, [1, 2])
                    animated_data = task.get_processing_method()(animated_data,
                                                                 clean_mask=clear_mask,
                                                                 intermediate_product=combined_data)
                path = os.path.join(task.get_temp_path(), "animation_{}.png".format(base_index + index))
                write_png_from_xr(
                    path,
                    animated_data,
                    bands=[task.query_type.red, task.query_type.green, task.query_type.blue],
                    scale=(0, 4096))

    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0], autoclose=True)
        if combined_data is None:
            if task.animated_product.animation_id != "none":
                generate_animation(index, combined_data)
            combined_data = data
            continue
        #give time an indice to keep mosaicking from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = create_cfmask_clean_mask(data.cf_mask) if 'cf_mask' in data else create_bit_mask(data.pixel_qa,
                                                                                                      [1, 2])
        combined_data = task.get_processing_method()(data, clean_mask=clear_mask, intermediate_product=combined_data)
        # if we're animating, combine it all and save to disk.
        if task.animated_product.animation_id != "none":
            generate_animation(index, combined_data)

    path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Esempio n. 7
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = CustomMosaicToolTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    iteration_data = None
    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1))

    times = list(
        map(_get_datetime_range_containing, time_chunk)
        if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    #updated_params.update({'products': parameters['']})
    iteration_data = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})

        data = dc.get_stacked_datasets_by_extent(**updated_params)

        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = create_cfmask_clean_mask(data.cf_mask) if 'cf_mask' in data else create_bit_mask(data.pixel_qa,
                                                                                                      [1, 2])
        add_timestamp_data_to_xr(data)

        metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params)

        iteration_data = task.get_processing_method()(data, clean_mask=clear_mask, intermediate_product=iteration_data)

        if task.animated_product.animation_id != "none":
            path = os.path.join(task.get_temp_path(),
                                "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index)))
            if task.animated_product.animation_id == "scene":
                #need to clear out all the metadata..
                clear_attrs(data)
                #can't reindex on time - weird?
                data.isel(time=0).drop('time').to_netcdf(path)
            elif task.animated_product.animation_id == "cumulative":
                iteration_data.to_netcdf(path)

        task.scenes_processed = F('scenes_processed') + 1
        task.save()
    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")

    if iteration_data is None:
        return None
    iteration_data.to_netcdf(path)

    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Esempio n. 8
0
def generate_tsm_chunk(time_num,
                       chunk_num,
                       processing_options=None,
                       query=None,
                       acquisition_list=None,
                       lat_range=None,
                       lon_range=None):
    """
    responsible for generating a piece of a water_detection product. This grabs the x/y area specified in the lat/lon ranges, gets all data
    from acquisition_list, which is a list of acquisition dates, and creates the custom mosaic using the function named in processing_options.
    saves the result to disk using time/chunk num, and returns the path and the acquisition date keyed metadata.
    """
    time_index = 0
    wofs_data = None
    tsm_data = None
    water_analysis = None
    tsm_analysis = None
    acquisition_metadata = {}
    print("Starting chunk: " + str(time_num) + " " + str(chunk_num))
    # holds some acquisition based metadata.
    while time_index < len(acquisition_list):
        # check if the task has been cancelled. if the result obj doesn't exist anymore then return.
        try:
            result = Result.objects.get(query_id=query.query_id)
        except:
            print("Cancelled task as result does not exist")
            return
        if result.status == "CANCEL":
            print("Cancelling...")
            return "CANCEL"

        # time ranges set based on if the acquisition_list has been reversed or not. If it has, then the 'start' index is the later date, and must be handled appropriately.
        start = acquisition_list[time_index] + datetime.timedelta(
            seconds=1) if processing_options[
                'reverse_time'] else acquisition_list[time_index]
        if processing_options['time_slices_per_iteration'] is not None and (
                time_index + processing_options['time_slices_per_iteration'] -
                1) < len(acquisition_list):
            end = acquisition_list[
                time_index + processing_options['time_slices_per_iteration'] -
                1]
        else:
            end = acquisition_list[-1] if processing_options[
                'reverse_time'] else acquisition_list[-1] + datetime.timedelta(
                    seconds=1)
        time_range = (end,
                      start) if processing_options['reverse_time'] else (start,
                                                                         end)

        raw_data = dc.get_dataset_by_extent(query.product,
                                            product_type=None,
                                            platform=query.platform,
                                            time=(start, end),
                                            longitude=lon_range,
                                            latitude=lat_range)

        # get the actual data and perform analysis.
        if "cf_mask" not in raw_data:
            time_index = time_index + processing_options[
                'time_slices_per_iteration']
            continue
        clean_mask = create_cfmask_clean_mask(raw_data.cf_mask)

        wofs_data = processing_options['processing_method'](
            raw_data, clean_mask=clean_mask, enforce_float64=True)
        water_analysis = perform_timeseries_analysis_iterative(
            wofs_data, intermediate_product=water_analysis)
        #filter for swir2<1%, where valid range=[0,10000] so 1%=100.* scale of 0.0001
        clean_mask[raw_data.swir2.values > 100] = False
        #manually set all non-water pixels to nodata.
        clean_mask[wofs_data.wofs.values == 0] = False
        tsm_data = tsm(raw_data, clean_mask=clean_mask, no_data=-1)
        tsm_analysis = perform_timeseries_analysis_iterative(
            tsm_data, intermediate_product=tsm_analysis, no_data=-1)

        # here the clear mask has all the clean pixels for each acquisition.
        # add to the comma seperated list of data.
        for timeslice in range(clean_mask.shape[0]):
            time = acquisition_list[time_index + timeslice]
            clean_pixels = np.sum(clean_mask[timeslice, :, :] == True)
            if time not in acquisition_metadata:
                acquisition_metadata[time] = {}
                acquisition_metadata[time]['clean_pixels'] = 0
            acquisition_metadata[time]['clean_pixels'] += clean_pixels

            # create the files requied for animation..
            # if the dir doesn't exist, create it, then fill with a .png/.tif
            # from the scene data.
            if query.animated_product != "None":
                animated_product = AnimationType.objects.get(
                    type_id=query.animated_product)
                animated_data = tsm_data.isel(time=timeslice).drop(
                    "time"
                ) if animated_product.type_id == "scene" else tsm_analysis
                if not os.path.exists(base_temp_path + query.query_id + '/' +
                                      str(time_num)):
                    os.mkdir(base_temp_path + query.query_id + '/' +
                             str(time_num))
                animated_data.to_netcdf(base_temp_path + query.query_id + '/' +
                                        str(time_num) + '/' + str(chunk_num) +
                                        str(time_index + timeslice) + ".nc")
        time_index = time_index + processing_options[
            'time_slices_per_iteration']

    # Save this geographic chunk to disk.
    geo_path = base_temp_path + query.query_id + "/geo_chunk_" + \
        str(time_num) + "_" + str(chunk_num)
    if water_analysis is None:
        return [None, None, None]
    water_analysis.to_netcdf(geo_path + "_water.nc")
    tsm_analysis.to_netcdf(geo_path + "_tsm.nc")
    print("Done with chunk: " + str(time_num) + " " + str(chunk_num))
    return [geo_path + "_water.nc", geo_path + "_tsm.nc", acquisition_metadata]
Esempio n. 9
0
def generate_water_chunk(time_num, chunk_num, processing_options=None, query=None, acquisition_list=None, lat_range=None, lon_range=None):
    """
    responsible for generating a piece of a water_detection product. This grabs the x/y area specified in the lat/lon ranges, gets all data
    from acquisition_list, which is a list of acquisition dates, and creates the custom mosaic using the function named in processing_options.
    saves the result to disk using time/chunk num, and returns the path and the acquisition date keyed metadata.
    """
    time_index = 0
    wofs_data = None
    water_analysis = None
    acquisition_metadata = {}
    print("Starting chunk: " + str(time_num) + " " + str(chunk_num))
    # holds some acquisition based metadata.
    while time_index < len(acquisition_list):
        # check if the task has been cancelled. if the result obj doesn't exist anymore then return.
        try:
            result = Result.objects.get(query_id=query.query_id)
        except:
            print("Cancelled task as result does not exist")
            return
        if result.status == "CANCEL":
            print("Cancelling...")
            return "CANCEL"

        # time ranges set based on if the acquisition_list has been reversed or not. If it has, then the 'start' index is the later date, and must be handled appropriately.
        start = acquisition_list[time_index] + datetime.timedelta(seconds=1) if processing_options['reverse_time'] else acquisition_list[time_index]
        if processing_options['time_slices_per_iteration'] is not None and (time_index + processing_options['time_slices_per_iteration'] - 1) < len(acquisition_list):
            end = acquisition_list[time_index + processing_options['time_slices_per_iteration'] - 1]
        else:
            end = acquisition_list[-1] if processing_options['reverse_time'] else acquisition_list[-1] + datetime.timedelta(seconds=1)
        time_range = (end, start) if processing_options['reverse_time'] else (start, end)

        raw_data = dc.get_dataset_by_extent(query.product, product_type=None, platform=query.platform, time=(start, end), longitude=lon_range, latitude=lat_range)

        # get the actual data and perform analysis.
        if "cf_mask" not in raw_data:
            time_index = time_index + processing_options['time_slices_per_iteration']
            continue
        clean_mask = create_cfmask_clean_mask(raw_data.cf_mask)

        wofs_data = processing_options['processing_method'](raw_data, clean_mask=clean_mask, enforce_float64=True)
        water_analysis = perform_timeseries_analysis_iterative(wofs_data, intermediate_product=water_analysis)

        # here the clear mask has all the clean pixels for each acquisition.
        # add to the comma seperated list of data.
        for timeslice in range(clean_mask.shape[0]):
            time = acquisition_list[time_index + timeslice]
            clean_pixels = np.sum(clean_mask[timeslice, :, :] == True)
            water_pixels = np.sum(wofs_data.wofs.values[timeslice, :, :] == 1)
            if time not in acquisition_metadata:
                acquisition_metadata[time] = {}
                acquisition_metadata[time]['clean_pixels'] = 0
                acquisition_metadata[time]['water_pixels'] = 0
            acquisition_metadata[time]['clean_pixels'] += clean_pixels
            acquisition_metadata[time]['water_pixels'] += water_pixels

            # create the files requied for animation..
            # if the dir doesn't exist, create it, then fill with a .png/.tif
            # from the scene data.
            if query.animated_product != "None":
                animated_product = AnimationType.objects.get(
                    type_id=query.animated_product)
                animated_data = wofs_data.isel(time=timeslice).drop(
                    "time") if animated_product.type_id == "scene_water" else water_analysis
                if not os.path.exists(base_temp_path + query.query_id + '/' + str(time_num)):
                    os.mkdir(base_temp_path + query.query_id +
                             '/' + str(time_num))
                animated_data.to_netcdf(base_temp_path + query.query_id + '/' + str(
                    time_num) + '/' + str(chunk_num) + str(time_index + timeslice) + ".nc")
        time_index = time_index + processing_options['time_slices_per_iteration']

    # Save this geographic chunk to disk.
    geo_path = base_temp_path + query.query_id + "/geo_chunk_" + \
        str(time_num) + "_" + str(chunk_num) + ".nc"
    if water_analysis is None:
        return [None, None]
    water_analysis.to_netcdf(geo_path)
    print("Done with chunk: " + str(time_num) + " " + str(chunk_num))
    return [geo_path, acquisition_metadata]
Esempio n. 10
0
def generate_mosaic_chunk(time_num,
                          chunk_num,
                          processing_options=None,
                          query=None,
                          acquisition_list=None,
                          lat_range=None,
                          lon_range=None,
                          measurements=None):
    """
    responsible for generating a piece of a custom mosaic product. This grabs the x/y area specified in the lat/lon ranges, gets all data
    from acquisition_list, which is a list of acquisition dates, and creates the custom mosaic using the function named in processing_options.
    saves the result to disk using time/chunk num, and returns the path and the acquisition date keyed metadata.
    """
    time_index = 0
    iteration_data = None
    acquisition_metadata = {}
    print("Starting chunk: " + str(time_num) + " " + str(chunk_num))
    # holds some acquisition based metadata.
    while time_index < len(acquisition_list):
        # check if the task has been cancelled. if the result obj doesn't exist anymore then return.
        try:
            result = Result.objects.get(query_id=query.query_id)
        except:
            print("Cancelled task as result does not exist")
            return
        if result.status == "CANCEL":
            print("Cancelling...")
            return "CANCEL"

        # time ranges set based on if the acquisition_list has been reversed or not. If it has, then the 'start' index is the later date, and must be handled appropriately.
        start = acquisition_list[time_index] + datetime.timedelta(
            seconds=1) if processing_options[
                'reverse_time'] else acquisition_list[time_index]
        if processing_options['time_slices_per_iteration'] is not None and (
                time_index + processing_options['time_slices_per_iteration'] -
                1) < len(acquisition_list):
            end = acquisition_list[
                time_index + processing_options['time_slices_per_iteration'] -
                1]
        else:
            end = acquisition_list[-1] if processing_options[
                'reverse_time'] else acquisition_list[-1] + datetime.timedelta(
                    seconds=1)
        time_range = (end,
                      start) if processing_options['reverse_time'] else (start,
                                                                         end)

        raw_data = dc.get_dataset_by_extent(query.product,
                                            product_type=None,
                                            platform=query.platform,
                                            time=time_range,
                                            longitude=lon_range,
                                            latitude=lat_range,
                                            measurements=measurements)

        if "cf_mask" not in raw_data:
            time_index = time_index + (
                processing_options['time_slices_per_iteration']
                if processing_options['time_slices_per_iteration'] is not None
                else 10000)
            continue
        clear_mask = create_cfmask_clean_mask(raw_data.cf_mask)

        # update metadata. # here the clear mask has all the clean
        # pixels for each acquisition.
        for timeslice in range(clear_mask.shape[0]):
            time = acquisition_list[time_index + timeslice]
            clean_pixels = np.sum(clear_mask[timeslice, :, :] == True)
            if time not in acquisition_metadata:
                acquisition_metadata[time] = {}
                acquisition_metadata[time]['clean_pixels'] = 0
            acquisition_metadata[time]['clean_pixels'] += clean_pixels

        # Removes the cf mask variable from the dataset after the clear mask has been created.
        # prevents the cf mask from being put through the mosaicing function as it doesn't fit
        # the correct format w/ nodata values for mosaicing.
        raw_data = raw_data.drop('cf_mask')

        iteration_data = processing_options['processing_method'](
            raw_data,
            clean_mask=clear_mask,
            intermediate_product=iteration_data)
        time_index = time_index + (
            processing_options['time_slices_per_iteration']
            if processing_options['time_slices_per_iteration'] is not None else
            10000)

    # Save this geographic chunk to disk.
    geo_path = base_temp_path + query.query_id + "/geo_chunk_" + \
        str(time_num) + "_" + str(chunk_num) + ".nc"
    # if this is an empty chunk, just return an empty dataset.
    if iteration_data is None:
        return [None, None]
    iteration_data.to_netcdf(geo_path)
    print("Done with chunk: " + str(time_num) + " " + str(chunk_num))
    return [geo_path, acquisition_metadata]
Esempio n. 11
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = NdviAnomalyTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    base_scene_time_range = parameters['time']

    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)

    # Generate the baseline data - one time slice at a time
    full_dataset = []
    for time_index, time in enumerate(time_chunk):
        updated_params.update({'time': _get_datetime_range_containing(time)})
        data = dc.get_dataset_by_extent(**updated_params)
        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue
        full_dataset.append(data.copy(deep=True))

    # load selected scene and mosaic just in case we got two scenes (handles scene boundaries/overlapping data)
    updated_params.update({'time': base_scene_time_range})
    selected_scene = dc.get_dataset_by_extent(**updated_params)

    if len(full_dataset) == 0 or 'time' not in selected_scene:
        return None

    #concat individual slices over time, compute metadata + mosaic
    baseline_data = xr.concat(full_dataset, 'time')
    baseline_clear_mask = create_cfmask_clean_mask(
        baseline_data.cf_mask
    ) if 'cf_mask' in baseline_data else create_bit_mask(
        baseline_data.pixel_qa, [1, 2])
    metadata = task.metadata_from_dataset(metadata, baseline_data,
                                          baseline_clear_mask, parameters)

    selected_scene_clear_mask = create_cfmask_clean_mask(
        selected_scene.cf_mask
    ) if 'cf_mask' in selected_scene else create_bit_mask(
        selected_scene.pixel_qa, [1, 2])
    metadata = task.metadata_from_dataset(metadata, selected_scene,
                                          selected_scene_clear_mask,
                                          parameters)
    selected_scene = task.get_processing_method()(
        selected_scene,
        clean_mask=selected_scene_clear_mask,
        intermediate_product=None)
    # we need to re generate the clear mask using the mosaic now.
    selected_scene_clear_mask = create_cfmask_clean_mask(
        selected_scene.cf_mask
    ) if 'cf_mask' in selected_scene else create_bit_mask(
        selected_scene.pixel_qa, [1, 2])

    ndvi_products = compute_ndvi_anomaly(
        baseline_data,
        selected_scene,
        baseline_clear_mask=baseline_clear_mask,
        selected_scene_clear_mask=selected_scene_clear_mask)

    full_product = xr.merge([ndvi_products, selected_scene])

    task.scenes_processed = F('scenes_processed') + 1
    task.save()

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    full_product.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Esempio n. 12
0
def generate_TOOL_chunk(time_num, chunk_num, processing_options=None, query=None, acquisition_list=None, lat_range=None, lon_range=None, measurements=None):
    """
    responsible for generating a piece of a custom mosaic product. This grabs the x/y area specified in the lat/lon ranges, gets all data
    from acquisition_list, which is a list of acquisition dates, and creates the custom mosaic using the function named in processing_options.
    saves the result to disk using time/chunk num, and returns the path and the acquisition date keyed metadata.
    """
    time_index = 0
    iteration_data = None
    acquisition_metadata = {}
    print("Starting chunk: " + str(time_num) + " " + str(chunk_num))
    # holds some acquisition based metadata.
    while time_index < len(acquisition_list):
        # check if the task has been cancelled. if the result obj doesn't exist anymore then return.
        try:
            result = Result.objects.get(query_id=query.query_id)
        except:
            print("Cancelled task as result does not exist")
            return
        if result.status == "CANCEL":
            print("Cancelling...")
            return "CANCEL"

        # time ranges set based on if the acquisition_list has been reversed or not. If it has, then the 'start' index is the later date, and must be handled appropriately.
        start = acquisition_list[time_index] + datetime.timedelta(seconds=1) if processing_options['reverse_time'] else acquisition_list[time_index]
        if processing_options['time_slices_per_iteration'] is not None and (time_index + processing_options['time_slices_per_iteration'] - 1) < len(acquisition_list):
            end = acquisition_list[time_index + processing_options['time_slices_per_iteration'] - 1]
        else:
            end = acquisition_list[-1] if processing_options['reverse_time'] else acquisition_list[-1] + datetime.timedelta(seconds=1)
        time_range = (end, start) if processing_options['reverse_time'] else (start, end)

        raw_data = dc.get_dataset_by_extent(query.product, product_type=None, platform=query.platform, time=time_range, longitude=lon_range, latitude=lat_range, measurements=measurements)

        if "cf_mask" not in raw_data:
            time_index = time_index + (processing_options['time_slices_per_iteration'] if processing_options['time_slices_per_iteration'] is not None else 10000)
            continue
        clear_mask = create_cfmask_clean_mask(raw_data.cf_mask)

        # update metadata. # here the clear mask has all the clean
        # pixels for each acquisition.
        for timeslice in range(clear_mask.shape[0]):
            time = acquisition_list[time_index + timeslice]
            clean_pixels = np.sum(
                clear_mask[timeslice, :, :] == True)
            if time not in acquisition_metadata:
                acquisition_metadata[time] = {}
                acquisition_metadata[time]['clean_pixels'] = 0
            acquisition_metadata[time][
                'clean_pixels'] += clean_pixels

        # Removes the cf mask variable from the dataset after the clear mask has been created.
        # prevents the cf mask from being put through the mosaicing function as it doesn't fit
        # the correct format w/ nodata values for mosaicing.
        raw_data = raw_data.drop('cf_mask')

        iteration_data = processing_options['processing_method'](
            raw_data, clean_mask=clear_mask, intermediate_product=iteration_data)
        time_index = time_index + (processing_options['time_slices_per_iteration'] if processing_options['time_slices_per_iteration'] is not None else 10000)

    # Save this geographic chunk to disk.
    geo_path = base_temp_path + query.query_id + "/geo_chunk_" + \
        str(time_num) + "_" + str(chunk_num) + ".nc"
    # if this is an empty chunk, just return an empty dataset.
    if iteration_data is None:
        return [None, None]
    iteration_data.to_netcdf(geo_path)
    print("Done with chunk: " + str(time_num) + " " + str(chunk_num))
    return [geo_path, acquisition_metadata]
Esempio n. 13
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Computes a single SLIP baseline comparison - returns a slip mask and mosaic.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = SlipTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1))

    time_range = _get_datetime_range_containing(time_chunk[0], time_chunk[-1])

    dc = DataAccessApi(config=task.config_path)
    updated_params = {**parameters}
    updated_params.update(geographic_chunk)
    updated_params.update({'time': time_range})
    data = dc.get_dataset_by_extent(**updated_params)

    #grab dem data as well
    dem_parameters = {**updated_params}
    dem_parameters.update({'product': 'terra_aster_gdm_' + task.area_id, 'platform': 'TERRA'})
    dem_parameters.pop('time')
    dem_parameters.pop('measurements')
    dem_data = dc.get_dataset_by_extent(**dem_parameters)

    if 'time' not in data or 'time' not in dem_data:
        return None

    #target data is most recent, with the baseline being everything else.
    target_data = xr.concat([data.isel(time=-1)], 'time')
    baseline_data = data.isel(time=slice(None, -1))

    target_clear_mask = create_cfmask_clean_mask(target_data.cf_mask) if 'cf_mask' in target_data else create_bit_mask(
        target_data.pixel_qa, [1, 2])
    baseline_clear_mask = create_cfmask_clean_mask(
        baseline_data.cf_mask) if 'cf_mask' in baseline_data else create_bit_mask(baseline_data.pixel_qa, [1, 2])
    combined_baseline = task.get_processing_method()(baseline_data, clean_mask=baseline_clear_mask)

    target_data = create_mosaic(target_data, clean_mask=target_clear_mask)

    slip_data = compute_slip(combined_baseline, target_data, dem_data)
    target_data['slip'] = slip_data

    metadata = task.metadata_from_dataset(
        metadata, target_data, target_clear_mask, updated_params, time=data.time.values.astype('M8[ms]').tolist()[-1])

    task.scenes_processed = F('scenes_processed') + 1
    task.save()

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    clear_attrs(target_data)
    target_data.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Esempio n. 14
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = TsmTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    times = list(
        map(_get_datetime_range_containing, time_chunk) if task.get_iterative(
        ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    #updated_params.update({'products': parameters['']})
    water_analysis = None
    tsm_analysis = None
    combined_data = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()
                  ['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        data = dc.get_stacked_datasets_by_extent(**updated_params)
        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = create_cfmask_clean_mask(
            data.cf_mask) if 'cf_mask' in data else create_bit_mask(
                data.pixel_qa, [1, 2])

        wofs_data = task.get_processing_method()(data,
                                                 clean_mask=clear_mask,
                                                 enforce_float64=True)
        water_analysis = perform_timeseries_analysis(
            wofs_data, 'wofs', intermediate_product=water_analysis)

        clear_mask[(data.swir2.values > 100) |
                   (wofs_data.wofs.values == 0)] = False
        tsm_data = tsm(data, clean_mask=clear_mask, no_data=-9999)
        tsm_analysis = perform_timeseries_analysis(
            tsm_data, 'tsm', intermediate_product=tsm_analysis, no_data=-9999)

        combined_data = tsm_analysis
        combined_data['wofs'] = water_analysis.total_data
        combined_data['wofs_total_clean'] = water_analysis.total_clean

        metadata = task.metadata_from_dataset(metadata, tsm_data, clear_mask,
                                              updated_params)
        if task.animated_product.animation_id != "none":
            path = os.path.join(
                task.get_temp_path(),
                "animation_{}_{}.nc".format(str(geo_chunk_id),
                                            str(base_index + time_index)))
            animated_data = tsm_data.isel(
                time=0, drop=True
            ) if task.animated_product.animation_id == "scene" else combined_data
            animated_data.to_netcdf(path)

        task.scenes_processed = F('scenes_processed') + 1
        task.save()

    if combined_data is None:
        return None

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    combined_data.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }