Esempio n. 1
0
def perform_task_chunking(parameters, task_id=None):
    """Chunk parameter sets into more manageable sizes

    Uses functions provided by the task model to create a group of
    parameter sets that make up the arg.

    Args:
        parameters: parameter stream containing all kwargs to load data

    Returns:
        parameters with a list of geographic and time ranges

    """

    if parameters is None:
        return None

    task = SlipTask.objects.get(pk=task_id)
    dc = DataAccessApi(config=task.config_path)

    dates = dc.list_acquisition_dates(**parameters)
    task_chunk_sizing = task.get_chunk_size()

    geographic_chunks = create_geographic_chunks(
        longitude=parameters['longitude'],
        latitude=parameters['latitude'],
        geographic_chunk_size=task_chunk_sizing['geographic'])

    time_chunks = generate_baseline(dates, task.baseline_length)

    logger.info("Time chunks: {}, Geo chunks: {}".format(len(time_chunks), len(geographic_chunks)))

    dc.close()
    task.update_status("WAIT", "Chunked parameter set.")
    return {'parameters': parameters, 'geographic_chunks': geographic_chunks, 'time_chunks': time_chunks}
Esempio n. 2
0
def perform_task_chunking(parameters, task_id=None):
    """Chunk parameter sets into more manageable sizes

    Uses functions provided by the task model to create a group of
    parameter sets that make up the arg.

    Args:
        parameters: parameter stream containing all kwargs to load data

    Returns:
        parameters with a list of geographic and time ranges

    """

    if parameters is None:
        return None

    task = CoastalChangeTask.objects.get(pk=task_id)
    dc = DataAccessApi(config=task.config_path)

    dates = dc.list_acquisition_dates(**parameters)
    task_chunk_sizing = task.get_chunk_size()

    geographic_chunks = create_geographic_chunks(
        longitude=parameters['longitude'],
        latitude=parameters['latitude'],
        geographic_chunk_size=task_chunk_sizing['geographic'])

    grouped_dates = group_datetimes_by_year(dates)
    # we need to pair these with the first year - subsequent years.
    time_chunks = None
    if task.animated_product.animation_id == 'none':
        # first and last only
        time_chunks = [[
            grouped_dates[task.time_start], grouped_dates[task.time_end]
        ]]
    else:
        initial_year = grouped_dates.pop(task.time_start)
        time_chunks = [[initial_year, grouped_dates[year]]
                       for year in grouped_dates]
    logger.info("Time chunks: {}, Geo chunks: {}".format(
        len(time_chunks), len(geographic_chunks)))

    dc.close()
    task.update_status("WAIT", "Chunked parameter set.")

    return {
        'parameters': parameters,
        'geographic_chunks': geographic_chunks,
        'time_chunks': time_chunks
    }
Esempio n. 3
0
def update_data_cube_details(ingested_only=True):
    dataset_types = DatasetType.objects.using('agdc').filter(
        Q(definition__has_keys=['managed'])
        & Q(definition__has_keys=['measurements']))

    dc = DataAccessApi(config='/home/' + settings.LOCAL_USER +
                       '/Datacube/data_cube_ui/config/.datacube.conf')

    for dataset_type in dataset_types:
        ingestion_details, created = IngestionDetails.objects.get_or_create(
            datase_type_ref=dataset_type.id,
            product=dataset_type.name,
            platform=dataset_type.metadata['platform']['code'])
        ingestion_details.update_with_query_metadata(
            dc.get_datacube_metadata(dataset_type.name))
Esempio n. 4
0
def init_worker(**kwargs):
    """
    Creates an instance of the DataAccessApi worker.
    """

    print("Creating DC instance for worker.")
    global dc
    dc = DataAccessApi()
Esempio n. 5
0
def validate_parameters(parameters, task_id=None):
    """Validate parameters generated by the parameter parsing task

    All validation should be done here - are there data restrictions?
    Combinations that aren't allowed? etc.

    Returns:
        parameter dict with all keyword args required to load data.
        -or-
        updates the task with ERROR and a message, returning None

    """
    task = FractionalCoverTask.objects.get(pk=task_id)
    dc = DataAccessApi(config=task.config_path)

    #validate for any number of criteria here - num acquisitions, etc.
    acquisitions = dc.list_combined_acquisition_dates(**parameters)

    if len(acquisitions) < 1:
        task.complete = True
        task.update_status("ERROR", "There are no acquistions for this parameter set.")
        return None

    if task.compositor.id == "median_pixel" and (task.time_end - task.time_start).days > 367:
        task.complete = True
        task.update_status("ERROR", "Median pixel operations are only supported for single year time periods.")
        return None

    task.update_status("WAIT", "Validated parameters.")

    if not dc.validate_measurements(parameters['products'][0], parameters['measurements']):
        parameters['measurements'] = ['blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'pixel_qa']

    dc.close()
    return parameters
Esempio n. 6
0
def validate_parameters(parameters, task_id=None):
    """Validate parameters generated by the parameter parsing task

    All validation should be done here - are there data restrictions?
    Combinations that aren't allowed? etc.

    Returns:
        parameter dict with all keyword args required to load data.
        -or-
        updates the task with ERROR and a message, returning None

    """
    task = CloudCoverageTask.objects.get(pk=task_id)
    dc = DataAccessApi(config=task.config_path)

    #validate for any number of criteria here - num acquisitions, etc.
    acquisitions = dc.list_acquisition_dates(**parameters)

    if len(acquisitions) < 1:
        task.complete = True
        task.update_status("ERROR",
                           "There are no acquistions for this parameter set.")
        return None

    task.update_status("WAIT", "Validated parameters.")

    if not dc.validate_measurements(parameters['product'],
                                    parameters['measurements']):
        parameters['measurements'] = ['blue', 'green', 'red', 'pixel_qa']

    dc.close()
    return parameters
Esempio n. 7
0
def validate_parameters(parameters, task_id=None):
    """Validate parameters generated by the parameter parsing task

    All validation should be done here - are there data restrictions?
    Combinations that aren't allowed? etc.

    Returns:
        parameter dict with all keyword args required to load data.
        -or-
        updates the task with ERROR and a message, returning None

    """
    task = NdviAnomalyTask.objects.get(pk=task_id)
    dc = DataAccessApi(config=task.config_path)

    acquisitions = dc.list_acquisition_dates(**parameters)

    if len(acquisitions) < 1:
        task.complete = True
        task.update_status("ERROR",
                           "There are no acquistions for this parameter set.")
        return None

    # the actual acquisitino exists, lets try the baseline:
    validation_params = {**parameters}
    # there were no acquisitions in the year 1000, hopefully
    validation_params.update({
        'time': (task.time_start.replace(year=task.time_start.year - 5),
                 task.time_start - timedelta(microseconds=1))
    })
    acquisitions = dc.list_acquisition_dates(**validation_params)

    # list/map/int chain required to cast int to each baseline month, it won't work if they're strings.
    grouped_dates = group_datetimes_by_month(
        acquisitions,
        months=list(map(int, task.baseline_selection.split(","))))

    if not grouped_dates:
        task.complete = True
        task.update_status("ERROR",
                           "There are no acquistions for this parameter set.")
        return None
    task.update_status("WAIT", "Validated parameters.")

    if not dc.validate_measurements(parameters['product'],
                                    parameters['measurements']):
        parameters['measurements'] = [
            'blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'pixel_qa'
        ]

    dc.close()
    return parameters
Esempio n. 8
0
def perform_task_chunking(parameters, task_id=None):
    """Chunk parameter sets into more manageable sizes

    Uses functions provided by the task model to create a group of
    parameter sets that make up the arg.

    Args:
        parameters: parameter stream containing all kwargs to load data

    Returns:
        parameters with a list of geographic and time ranges

    """

    if parameters is None:
        return None

    task = NdviAnomalyTask.objects.get(pk=task_id)
    dc = DataAccessApi(config=task.config_path)
    dates = dc.list_acquisition_dates(**parameters)
    task_chunk_sizing = task.get_chunk_size()

    geographic_chunks = create_geographic_chunks(
        longitude=parameters['longitude'],
        latitude=parameters['latitude'],
        geographic_chunk_size=task_chunk_sizing['geographic'])

    grouped_dates_params = {**parameters}
    grouped_dates_params.update({
        'time': (datetime(1000, 1,
                          1), task.time_start - timedelta(microseconds=1))
    })
    acquisitions = dc.list_acquisition_dates(**grouped_dates_params)
    grouped_dates = group_datetimes_by_month(
        acquisitions,
        months=list(map(int, task.baseline_selection.split(","))))
    # create a single monolithic list of all acq. dates - there should be only one.
    time_chunks = []
    for date_group in grouped_dates:
        time_chunks.extend(grouped_dates[date_group])
    # time chunks casted to a list, essnetially.
    time_chunks = [time_chunks]

    logger.info("Time chunks: {}, Geo chunks: {}".format(
        len(time_chunks), len(geographic_chunks)))

    dc.close()
    task.update_status("WAIT", "Chunked parameter set.")

    return {
        'parameters': parameters,
        'geographic_chunks': geographic_chunks,
        'time_chunks': time_chunks
    }
Esempio n. 9
0
def validate_parameters(parameters, task_id=None):
    """Validate parameters generated by the parameter parsing task

    All validation should be done here - are there data restrictions?
    Combinations that aren't allowed? etc.

    Returns:
        parameter dict with all keyword args required to load data.
        -or-
        updates the task with ERROR and a message, returning None

    """
    task = SlipTask.objects.get(pk=task_id)
    dc = DataAccessApi(config=task.config_path)

    acquisitions = dc.list_acquisition_dates(**parameters)

    if len(acquisitions) < 1:
        task.complete = True
        task.update_status("ERROR", "There are no acquistions for this parameter set.")
        return None

    if len(acquisitions) < task.baseline_length + 1:
        task.complete = True
        task.update_status("ERROR", "There are an insufficient number of acquisitions for your baseline length.")
        return None

    validation_parameters = {**parameters}
    validation_parameters.pop('time')
    validation_parameters.pop('measurements')
    validation_parameters.update({'product': 'terra_aster_gdm_' + task.area_id, 'platform': 'TERRA'})
    if len(dc.list_acquisition_dates(**validation_parameters)) < 1:
        task.complete = True
        task.update_status("ERROR", "There is no elevation data for this parameter set.")
        return None

    task.update_status("WAIT", "Validated parameters.")

    if not dc.validate_measurements(parameters['product'], parameters['measurements']):
        parameters['measurements'] = ['blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'pixel_qa']

    dc.close()
    return parameters
Esempio n. 10
0
def validate_parameters(parameters, task_id=None):
    """Validate parameters generated by the parameter parsing task

    All validation should be done here - are there data restrictions?
    Combinations that aren't allowed? etc.

    Returns:
        parameter dict with all keyword args required to load data.
        -or-
        updates the task with ERROR and a message, returning None

    """
    task = AppNameTask.objects.get(pk=task_id)
    dc = DataAccessApi(config=task.config_path)

    #validate for any number of criteria here - num acquisitions, etc.
    # TODO: if this is not a multisensory app, replace list_combined_acquisition_dates with list_acquisition_dates
    acquisitions = dc.list_combined_acquisition_dates(**parameters)

    # TODO: are there any additional validations that need to be done here?
    if len(acquisitions) < 1:
        task.complete = True
        task.update_status("ERROR",
                           "There are no acquistions for this parameter set.")
        return None

    if task.animated_product.animation_id != "none" and task.compositor.id == "median_pixel":
        task.complete = True
        task.update_status(
            "ERROR",
            "Animations cannot be generated for median pixel operations.")
        return None

    task.update_status("WAIT", "Validated parameters.")

    # TODO: Check that the measurements exist - for Landsat, we're making sure that cf_mask/pixel_qa are interchangable.
    # replace ['products'][0] with ['products'] if this is not a multisensory app.
    if not dc.validate_measurements(parameters['products'][0],
                                    parameters['measurements']):
        parameters['measurements'] = [
            'blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'pixel_qa'
        ]

    dc.close()
    return parameters
Esempio n. 11
0
def validate_parameters(parameters, task_id=None):
    """Validate parameters generated by the parameter parsing task

    All validation should be done here - are there data restrictions?
    Combinations that aren't allowed? etc.

    Returns:
        parameter dict with all keyword args required to load data.
        -or-
        updates the task with ERROR and a message, returning None

    """
    task = CoastalChangeTask.objects.get(pk=task_id)
    dc = DataAccessApi(config=task.config_path)

    validation_params = dict(parameters)
    # verify that both the start and end year have acquisitions
    for year in parameters['time']:
        validation_params.update(
            {'time': (year, year.replace(year=year.year + 1))})
        acquisitions = dc.list_acquisition_dates(**validation_params)
        if len(acquisitions) < 1:
            task.complete = True
            task.update_status(
                "ERROR",
                "There must be at least one acquisition in both the start and ending year."
            )
            return None

    task.update_status("WAIT", "Validated parameters.")

    if not dc.validate_measurements(parameters['product'],
                                    parameters['measurements']):
        parameters['measurements'] = [
            'blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'pixel_qa'
        ]

    dc.close()
    return parameters
Esempio n. 12
0
def init_worker(**kwargs):
    print("Creating DC instance for worker.")
    global dc
    dc = DataAccessApi()
Esempio n. 13
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = CoastalChangeTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    iteration_data = None

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    starting_year = _get_datetime_range_containing(*time_chunk[0])
    comparison_year = _get_datetime_range_containing(*time_chunk[1])

    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)

    def _compute_mosaic(time):
        updated_params.update({'time': time})
        data = dc.get_dataset_by_extent(**updated_params)
        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            return None, None

        clear_mask = create_cfmask_clean_mask(
            data.cf_mask) if 'cf_mask' in data else create_bit_mask(
                data.pixel_qa, [1, 2])
        metadata = task.metadata_from_dataset({}, data, clear_mask,
                                              updated_params)
        return task.get_processing_method()(data,
                                            clean_mask=clear_mask), metadata

    old_mosaic, old_metadata = _compute_mosaic(starting_year)
    new_mosaic, new_metadata = _compute_mosaic(comparison_year)

    if old_mosaic is None or new_mosaic is None:
        return None

    metadata = {**old_metadata, **new_metadata}

    output_product = compute_coastal_change(old_mosaic, new_mosaic)

    task.scenes_processed = F('scenes_processed') + 1
    task.save()

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    output_product.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Esempio n. 14
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = NdviAnomalyTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    base_scene_time_range = parameters['time']

    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)

    # Generate the baseline data - one time slice at a time
    full_dataset = []
    for time_index, time in enumerate(time_chunk):
        updated_params.update({'time': _get_datetime_range_containing(time)})
        data = dc.get_dataset_by_extent(**updated_params)
        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue
        full_dataset.append(data.copy(deep=True))

    # load selected scene and mosaic just in case we got two scenes (handles scene boundaries/overlapping data)
    updated_params.update({'time': base_scene_time_range})
    selected_scene = dc.get_dataset_by_extent(**updated_params)

    if len(full_dataset) == 0 or 'time' not in selected_scene:
        return None

    #concat individual slices over time, compute metadata + mosaic
    baseline_data = xr.concat(full_dataset, 'time')
    baseline_clear_mask = create_cfmask_clean_mask(
        baseline_data.cf_mask
    ) if 'cf_mask' in baseline_data else create_bit_mask(
        baseline_data.pixel_qa, [1, 2])
    metadata = task.metadata_from_dataset(metadata, baseline_data,
                                          baseline_clear_mask, parameters)

    selected_scene_clear_mask = create_cfmask_clean_mask(
        selected_scene.cf_mask
    ) if 'cf_mask' in selected_scene else create_bit_mask(
        selected_scene.pixel_qa, [1, 2])
    metadata = task.metadata_from_dataset(metadata, selected_scene,
                                          selected_scene_clear_mask,
                                          parameters)
    selected_scene = task.get_processing_method()(
        selected_scene,
        clean_mask=selected_scene_clear_mask,
        intermediate_product=None)
    # we need to re generate the clear mask using the mosaic now.
    selected_scene_clear_mask = create_cfmask_clean_mask(
        selected_scene.cf_mask
    ) if 'cf_mask' in selected_scene else create_bit_mask(
        selected_scene.pixel_qa, [1, 2])

    ndvi_products = compute_ndvi_anomaly(
        baseline_data,
        selected_scene,
        baseline_clear_mask=baseline_clear_mask,
        selected_scene_clear_mask=selected_scene_clear_mask)

    full_product = xr.merge([ndvi_products, selected_scene])

    task.scenes_processed = F('scenes_processed') + 1
    task.save()

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    full_product.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }
Esempio n. 15
0
def get_acquisition_list(task, area_id, platform, date):
    dc = DataAccessApi(config=task.config_path)
    # lists all acquisition dates for use in single tmeslice queries.
    product = Satellite.objects.get(datacube_platform=platform).product_prefix + area_id
    acquisitions = dc.list_acquisition_dates(product, platform, time=(datetime(1900, 1, 1), date))
    return acquisitions
Esempio n. 16
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Computes a single SLIP baseline comparison - returns a slip mask and mosaic.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = SlipTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1))

    time_range = _get_datetime_range_containing(time_chunk[0], time_chunk[-1])

    dc = DataAccessApi(config=task.config_path)
    updated_params = {**parameters}
    updated_params.update(geographic_chunk)
    updated_params.update({'time': time_range})
    data = dc.get_dataset_by_extent(**updated_params)

    #grab dem data as well
    dem_parameters = {**updated_params}
    dem_parameters.update({'product': 'terra_aster_gdm_' + task.area_id, 'platform': 'TERRA'})
    dem_parameters.pop('time')
    dem_parameters.pop('measurements')
    dem_data = dc.get_dataset_by_extent(**dem_parameters)

    if 'time' not in data or 'time' not in dem_data:
        return None

    #target data is most recent, with the baseline being everything else.
    target_data = xr.concat([data.isel(time=-1)], 'time')
    baseline_data = data.isel(time=slice(None, -1))

    target_clear_mask = create_cfmask_clean_mask(target_data.cf_mask) if 'cf_mask' in target_data else create_bit_mask(
        target_data.pixel_qa, [1, 2])
    baseline_clear_mask = create_cfmask_clean_mask(
        baseline_data.cf_mask) if 'cf_mask' in baseline_data else create_bit_mask(baseline_data.pixel_qa, [1, 2])
    combined_baseline = task.get_processing_method()(baseline_data, clean_mask=baseline_clear_mask)

    target_data = create_mosaic(target_data, clean_mask=target_clear_mask)

    slip_data = compute_slip(combined_baseline, target_data, dem_data)
    target_data['slip'] = slip_data

    metadata = task.metadata_from_dataset(
        metadata, target_data, target_clear_mask, updated_params, time=data.time.values.astype('M8[ms]').tolist()[-1])

    task.scenes_processed = F('scenes_processed') + 1
    task.save()

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    clear_attrs(target_data)
    target_data.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Esempio n. 17
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = CustomMosaicToolTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    iteration_data = None
    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1))

    times = list(
        map(_get_datetime_range_containing, time_chunk)
        if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    #updated_params.update({'products': parameters['']})
    iteration_data = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})

        data = dc.get_stacked_datasets_by_extent(**updated_params)

        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = create_cfmask_clean_mask(data.cf_mask) if 'cf_mask' in data else create_bit_mask(data.pixel_qa,
                                                                                                      [1, 2])
        add_timestamp_data_to_xr(data)

        metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params)

        iteration_data = task.get_processing_method()(data, clean_mask=clear_mask, intermediate_product=iteration_data)

        if task.animated_product.animation_id != "none":
            path = os.path.join(task.get_temp_path(),
                                "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index)))
            if task.animated_product.animation_id == "scene":
                #need to clear out all the metadata..
                clear_attrs(data)
                #can't reindex on time - weird?
                data.isel(time=0).drop('time').to_netcdf(path)
            elif task.animated_product.animation_id == "cumulative":
                iteration_data.to_netcdf(path)

        task.scenes_processed = F('scenes_processed') + 1
        task.save()
    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")

    if iteration_data is None:
        return None
    iteration_data.to_netcdf(path)

    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Esempio n. 18
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = TsmTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1),
                max(time_ranges) + timedelta(microseconds=1))

    times = list(
        map(_get_datetime_range_containing, time_chunk) if task.get_iterative(
        ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])])
    dc = DataAccessApi(config=task.config_path)
    updated_params = parameters
    updated_params.update(geographic_chunk)
    #updated_params.update({'products': parameters['']})
    water_analysis = None
    tsm_analysis = None
    combined_data = None
    base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()
                  ['time'] is not None else 1) * time_chunk_id
    for time_index, time in enumerate(times):
        updated_params.update({'time': time})
        data = dc.get_stacked_datasets_by_extent(**updated_params)
        if data is None or 'time' not in data:
            logger.info("Invalid chunk.")
            continue

        clear_mask = create_cfmask_clean_mask(
            data.cf_mask) if 'cf_mask' in data else create_bit_mask(
                data.pixel_qa, [1, 2])

        wofs_data = task.get_processing_method()(data,
                                                 clean_mask=clear_mask,
                                                 enforce_float64=True)
        water_analysis = perform_timeseries_analysis(
            wofs_data, 'wofs', intermediate_product=water_analysis)

        clear_mask[(data.swir2.values > 100) |
                   (wofs_data.wofs.values == 0)] = False
        tsm_data = tsm(data, clean_mask=clear_mask, no_data=-9999)
        tsm_analysis = perform_timeseries_analysis(
            tsm_data, 'tsm', intermediate_product=tsm_analysis, no_data=-9999)

        combined_data = tsm_analysis
        combined_data['wofs'] = water_analysis.total_data
        combined_data['wofs_total_clean'] = water_analysis.total_clean

        metadata = task.metadata_from_dataset(metadata, tsm_data, clear_mask,
                                              updated_params)
        if task.animated_product.animation_id != "none":
            path = os.path.join(
                task.get_temp_path(),
                "animation_{}_{}.nc".format(str(geo_chunk_id),
                                            str(base_index + time_index)))
            animated_data = tsm_data.isel(
                time=0, drop=True
            ) if task.animated_product.animation_id == "scene" else combined_data
            animated_data.to_netcdf(path)

        task.scenes_processed = F('scenes_processed') + 1
        task.save()

    if combined_data is None:
        return None

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    combined_data.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {
        'geo_chunk_id': geo_chunk_id,
        'time_chunk_id': time_chunk_id
    }