def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = TsmTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) water_analysis = None tsm_analysis = None combined_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) wofs_data = task.get_processing_method()(data, clean_mask=clear_mask, enforce_float64=True, no_data=task.satellite.no_data_value) water_analysis = perform_timeseries_analysis( wofs_data, 'wofs', intermediate_product=water_analysis, no_data=task.satellite.no_data_value) clear_mask[(data.swir2.values > 100) | (wofs_data.wofs.values == 0)] = False tsm_data = tsm(data, clean_mask=clear_mask, no_data=task.satellite.no_data_value) tsm_analysis = perform_timeseries_analysis( tsm_data, 'tsm', intermediate_product=tsm_analysis, no_data=task.satellite.no_data_value) if check_cancel_task(self, task): return combined_data = tsm_analysis combined_data['wofs'] = water_analysis.total_data combined_data['wofs_total_clean'] = water_analysis.total_clean metadata = task.metadata_from_dataset(metadata, tsm_data, clear_mask, updated_params) if task.animated_product.animation_id != "none": path = os.path.join(task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) animated_data = tsm_data.isel( time=0, drop=True) if task.animated_product.animation_id == "scene" else combined_data animated_data.to_netcdf(path) task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) if combined_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") combined_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = AppNameTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) iteration_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) # TODO: If this is not a multisensory app replace get_stacked_datasets_by_extent with get_dataset_by_extent data = dc.get_stacked_datasets_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue # TODO: Replace anything here with your processing - do you need to create additional masks? Apply bandmaths? etc. clear_mask = create_cfmask_clean_mask( data.cf_mask) if 'cf_mask' in data else create_bit_mask( data.pixel_qa, [1, 2]) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) # TODO: Make sure you're producing everything required for your algorithm. iteration_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=iteration_data) # TODO: If there is no animation you can remove this block. Otherwise, save off the data that you need. if task.animated_product.animation_id != "none": path = os.path.join( task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) if task.animated_product.animation_id == "scene": #need to clear out all the metadata.. clear_attrs(data) #can't reindex on time - weird? data.isel(time=0).drop('time').to_netcdf(path) elif task.animated_product.animation_id == "cumulative": iteration_data.to_netcdf(path) task.scenes_processed = F('scenes_processed') + 1 task.save() if iteration_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") iteration_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = FractionalCoverTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) iteration_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = create_cfmask_clean_mask( data.cf_mask) if 'cf_mask' in data else create_bit_mask( data.pixel_qa, [1, 2]) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=iteration_data) task.scenes_processed = F('scenes_processed') + 1 task.save() if iteration_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") iteration_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = SpectralIndicesTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) iteration_data = None for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_dataset_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None: logger.info("Empty chunk.") continue if 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=iteration_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) if check_cancel_task(self, task): return task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) if iteration_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(iteration_data, path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = CustomMosaicToolTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) iteration_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=iteration_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) if check_cancel_task(self, task): return if task.animated_product.animation_id != "none": path = os.path.join( task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) if task.animated_product.animation_id == "scene": #need to clear out all the metadata.. clear_attrs(data) #can't reindex on time - weird? export_xarray_to_netcdf(data.isel(time=0).drop('time'), path) elif task.animated_product.animation_id == "cumulative": export_xarray_to_netcdf(iteration_data, path) task.scenes_processed = F('scenes_processed') + 1 # Avoid overwriting the task's status if it is cancelled. task.save(update_fields=['scenes_processed']) if iteration_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(iteration_data, path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = WaterDetectionTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) water_analysis = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None: logger.info("Empty chunk.") continue if 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) # Ensure data variables have the range of Landsat 7 Collection 1 Level 2 # since the color scales are tailored for that dataset. platform = task.satellite.platform collection = task.satellite.collection level = task.satellite.level if (platform, collection) != ('LANDSAT_7', 'c1'): data = \ convert_range(data, from_platform=platform, from_collection=collection, from_level=level, to_platform='LANDSAT_7', to_collection='c1', to_level='l2') wofs_data = task.get_processing_method()( data, clean_mask=clear_mask, no_data=task.satellite.no_data_value) water_analysis = perform_timeseries_analysis( wofs_data, 'wofs', intermediate_product=water_analysis, no_data=task.satellite.no_data_value) metadata = task.metadata_from_dataset(metadata, wofs_data, clear_mask.data, updated_params) if task.animated_product.animation_id != "none": path = os.path.join( task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) animated_data = wofs_data.isel( time=0, drop=True ) if task.animated_product.animation_id == "scene" else water_analysis export_xarray_to_netcdf(animated_data, path) if check_cancel_task(self, task): return task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) if water_analysis is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(water_analysis, path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }