def generate_animation(index, combined_data): base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * index for index in range((task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1)): path = os.path.join(task.get_temp_path(), "animation_{}.nc".format(base_index + index)) if os.path.exists(path): animated_data = xr.open_dataset(path, autoclose=True) if task.animated_product.animation_id == "cumulative": animated_data = xr.concat([animated_data], 'time') animated_data['time'] = [0] clear_mask = create_cfmask_clean_mask( animated_data.cf_mask ) if 'cf_mask' in animated_data else create_bit_mask( animated_data.pixel_qa, [1, 2]) animated_data = task.get_processing_method()( animated_data, clean_mask=clear_mask, intermediate_product=combined_data) path = os.path.join( task.get_temp_path(), "animation_{}.png".format(base_index + index)) write_png_from_xr(path, animated_data, bands=[ task.query_type.red, task.query_type.green, task.query_type.blue ], scale=(0, 4096))
def recombine_time_chunks(chunks, task_id=None): """Recombine processed chunks over the time index. Open time chunked processed datasets and recombine them using the same function that was used to process them. This assumes an iterative algorithm - if it is not, then it will simply return the data again. Args: chunks: list of the return from the processing_task function - path, metadata, and {chunk ids} Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ logger.info("RECOMBINE_TIME") #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2.. chunks = chunks if isinstance(chunks, list) else [chunks] chunks = [chunk for chunk in chunks if chunk is not None] if len(chunks) == 0: return None total_chunks = sorted(chunks, key=lambda x: x[0]) task = BandMathTask.objects.get(pk=task_id) geo_chunk_id = total_chunks[0][2]['geo_chunk_id'] time_chunk_id = total_chunks[0][2]['time_chunk_id'] metadata = {} combined_data = None for index, chunk in enumerate(total_chunks): metadata.update(chunk[1]) data = xr.open_dataset(chunk[0], autoclose=True) if combined_data is None: combined_data = data continue #give time an indice to keep mosaicking from breaking. data = xr.concat([data], 'time') data['time'] = [0] clear_mask = create_cfmask_clean_mask( data.cf_mask) if 'cf_mask' in data else create_bit_mask( data.pixel_qa, [1, 2]) combined_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=combined_data) path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id)) combined_data.to_netcdf(path) logger.info("Done combining time chunks for geo: " + str(geo_chunk_id)) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def _compute_mosaic(time): updated_params.update({'time': time}) data = dc.get_dataset_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") return None, None clear_mask = create_cfmask_clean_mask( data.cf_mask) if 'cf_mask' in data else create_bit_mask( data.pixel_qa, [1, 2]) metadata = task.metadata_from_dataset({}, data, clear_mask, updated_params) return task.get_processing_method()(data, clean_mask=clear_mask), metadata
def recombine_time_chunks(chunks, task_id=None): """Recombine processed chunks over the time index. Open time chunked processed datasets and recombine them using the same function that was used to process them. This assumes an iterative algorithm - if it is not, then it will simply return the data again. Args: chunks: list of the return from the processing_task function - path, metadata, and {chunk ids} Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ logger.info("RECOMBINE_TIME") #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2.. chunks = chunks if isinstance(chunks, list) else [chunks] chunks = [chunk for chunk in chunks if chunk is not None] total_chunks = sorted(chunks, key=lambda x: x[0]) task = CustomMosaicToolTask.objects.get(pk=task_id) geo_chunk_id = total_chunks[0][2]['geo_chunk_id'] time_chunk_id = total_chunks[0][2]['time_chunk_id'] metadata = {} def generate_animation(index, combined_data): base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * index for index in range((task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1)): path = os.path.join(task.get_temp_path(), "animation_{}.nc".format(base_index + index)) if os.path.exists(path): animated_data = xr.open_dataset(path, autoclose=True) if task.animated_product.animation_id == "cumulative": animated_data = xr.concat([animated_data], 'time') animated_data['time'] = [0] clear_mask = create_cfmask_clean_mask( animated_data.cf_mask) if 'cf_mask' in animated_data else create_bit_mask( animated_data.pixel_qa, [1, 2]) animated_data = task.get_processing_method()(animated_data, clean_mask=clear_mask, intermediate_product=combined_data) path = os.path.join(task.get_temp_path(), "animation_{}.png".format(base_index + index)) write_png_from_xr( path, animated_data, bands=[task.query_type.red, task.query_type.green, task.query_type.blue], scale=(0, 4096)) combined_data = None for index, chunk in enumerate(total_chunks): metadata.update(chunk[1]) data = xr.open_dataset(chunk[0], autoclose=True) if combined_data is None: if task.animated_product.animation_id != "none": generate_animation(index, combined_data) combined_data = data continue #give time an indice to keep mosaicking from breaking. data = xr.concat([data], 'time') data['time'] = [0] clear_mask = create_cfmask_clean_mask(data.cf_mask) if 'cf_mask' in data else create_bit_mask(data.pixel_qa, [1, 2]) combined_data = task.get_processing_method()(data, clean_mask=clear_mask, intermediate_product=combined_data) # if we're animating, combine it all and save to disk. if task.animated_product.animation_id != "none": generate_animation(index, combined_data) path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id)) combined_data.to_netcdf(path) logger.info("Done combining time chunks for geo: " + str(geo_chunk_id)) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = CustomMosaicToolTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) iteration_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = create_cfmask_clean_mask(data.cf_mask) if 'cf_mask' in data else create_bit_mask(data.pixel_qa, [1, 2]) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()(data, clean_mask=clear_mask, intermediate_product=iteration_data) if task.animated_product.animation_id != "none": path = os.path.join(task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) if task.animated_product.animation_id == "scene": #need to clear out all the metadata.. clear_attrs(data) #can't reindex on time - weird? data.isel(time=0).drop('time').to_netcdf(path) elif task.animated_product.animation_id == "cumulative": iteration_data.to_netcdf(path) task.scenes_processed = F('scenes_processed') + 1 task.save() path = os.path.join(task.get_temp_path(), chunk_id + ".nc") if iteration_data is None: return None iteration_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def _run_ccd_on_pixel(ds): """Performs CCD on a 1x1xn dataset. Returns CCD results. Creates a CCD result from a 1x1xn dimensioned dataset. Flattens all bands to perform analysis. Inputs allows for missing bands. cf_mask is required. Args: ds: xArray dataset with dimensions 1x1xn with any number of SR bands, cf_mask required. Returns: The result of ccd.detect """ if 'time' not in ds.dims: raise Exception("You're missing time dims!") available_bands = ds.data_vars scene_count = ds.dims['time'] date = [_n64_to_datetime(t).date().toordinal() for t in ds.time.values] #qa = np.zeros(scene_count) red = np.ones( scene_count) if 'red' not in available_bands else ds.red.values green = np.ones( scene_count) if 'green' not in available_bands else ds.green.values blue = np.ones( scene_count) if 'blue' not in available_bands else ds.blue.values nir = np.ones( scene_count) if 'nir' not in available_bands else ds.nir.values swir1 = np.ones( scene_count) if 'swir1' not in available_bands else ds.swir1.values swir2 = np.ones( scene_count) if 'swir2' not in available_bands else ds.swir2.values thermals = np.ones(scene_count) * ( 273.15) * 10 if 'thermal' not in available_bands else ds.object.values #Generate CFMask from pixel_qa cloud_mask = utils.create_bit_mask( ds.pixel_qa, [1, 2]) & utils.create_bit_mask(ds.pixel_qa, [1, 2]) cloud_mask2 = cloud_mask.ravel() qa = cloud_mask.astype(int).ravel() params = { 'QA_BITPACKED': False, 'QA_FILL': 255, 'QA_CLEAR': 0, 'QA_CLOUD': 1 } #params = (date, blue, green, red, nir, swir1, swir2, thermals, qa,params = params) return ccd.detect(date, blue, green, red, nir, swir1, swir2, thermals, qa, params=params)
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = NdviAnomalyTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) base_scene_time_range = parameters['time'] dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) # Generate the baseline data - one time slice at a time full_dataset = [] for time_index, time in enumerate(time_chunk): updated_params.update({'time': _get_datetime_range_containing(time)}) data = dc.get_dataset_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue full_dataset.append(data.copy(deep=True)) # load selected scene and mosaic just in case we got two scenes (handles scene boundaries/overlapping data) updated_params.update({'time': base_scene_time_range}) selected_scene = dc.get_dataset_by_extent(**updated_params) if len(full_dataset) == 0 or 'time' not in selected_scene: return None #concat individual slices over time, compute metadata + mosaic baseline_data = xr.concat(full_dataset, 'time') baseline_clear_mask = create_cfmask_clean_mask( baseline_data.cf_mask ) if 'cf_mask' in baseline_data else create_bit_mask( baseline_data.pixel_qa, [1, 2]) metadata = task.metadata_from_dataset(metadata, baseline_data, baseline_clear_mask, parameters) selected_scene_clear_mask = create_cfmask_clean_mask( selected_scene.cf_mask ) if 'cf_mask' in selected_scene else create_bit_mask( selected_scene.pixel_qa, [1, 2]) metadata = task.metadata_from_dataset(metadata, selected_scene, selected_scene_clear_mask, parameters) selected_scene = task.get_processing_method()( selected_scene, clean_mask=selected_scene_clear_mask, intermediate_product=None) # we need to re generate the clear mask using the mosaic now. selected_scene_clear_mask = create_cfmask_clean_mask( selected_scene.cf_mask ) if 'cf_mask' in selected_scene else create_bit_mask( selected_scene.pixel_qa, [1, 2]) ndvi_products = compute_ndvi_anomaly( baseline_data, selected_scene, baseline_clear_mask=baseline_clear_mask, selected_scene_clear_mask=selected_scene_clear_mask) full_product = xr.merge([ndvi_products, selected_scene]) task.scenes_processed = F('scenes_processed') + 1 task.save() path = os.path.join(task.get_temp_path(), chunk_id + ".nc") full_product.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def recombine_time_chunks(chunks, task_id=None): """Recombine processed chunks over the time index. Open time chunked processed datasets and recombine them using the same function that was used to process them. This assumes an iterative algorithm - if it is not, then it will simply return the data again. Args: chunks: list of the return from the processing_task function - path, metadata, and {chunk ids} Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ logger.info("RECOMBINE_TIME") #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2.. chunks = chunks if isinstance(chunks, list) else [chunks] chunks = [chunk for chunk in chunks if chunk is not None] if len(chunks) == 0: return None total_chunks = sorted(chunks, key=lambda x: x[0]) task = SlipTask.objects.get(pk=task_id) geo_chunk_id = total_chunks[0][2]['geo_chunk_id'] time_chunk_id = total_chunks[0][2]['time_chunk_id'] metadata = {} combined_data = None combined_slip = None for index, chunk in enumerate(reversed(total_chunks)): metadata.update(chunk[1]) data = xr.open_dataset(chunk[0], autoclose=True) if combined_data is None: combined_data = data.drop('slip') # since this is going to interact with data/mosaicking, it needs a time dim combined_slip = xr.concat([data.slip.copy(deep=True)], 'time') continue #give time an indice to keep mosaicking from breaking. data = xr.concat([data], 'time') data['time'] = [0] clear_mask = create_cfmask_clean_mask(data.cf_mask) if 'cf_mask' in data else create_bit_mask(data.pixel_qa, [1, 2]) # modify clean mask so that only slip pixels that are still zero will be used. This will show all the pixels that caused the flag. clear_mask[xr.concat([combined_slip], 'time').values == 1] = False combined_data = create_mosaic(data.drop('slip'), clean_mask=clear_mask, intermediate_product=combined_data) combined_slip.values[combined_slip.values == 0] = data.slip.values[combined_slip.values == 0] # Since we added a time dim to combined_slip, we need to remove it here. combined_data['slip'] = combined_slip.isel(time=0, drop=True) path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id)) combined_data.to_netcdf(path) logger.info("Done combining time chunks for geo: " + str(geo_chunk_id)) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Computes a single SLIP baseline comparison - returns a slip mask and mosaic. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = SlipTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) time_range = _get_datetime_range_containing(time_chunk[0], time_chunk[-1]) dc = DataAccessApi(config=task.config_path) updated_params = {**parameters} updated_params.update(geographic_chunk) updated_params.update({'time': time_range}) data = dc.get_dataset_by_extent(**updated_params) #grab dem data as well dem_parameters = {**updated_params} dem_parameters.update({'product': 'terra_aster_gdm_' + task.area_id, 'platform': 'TERRA'}) dem_parameters.pop('time') dem_parameters.pop('measurements') dem_data = dc.get_dataset_by_extent(**dem_parameters) if 'time' not in data or 'time' not in dem_data: return None #target data is most recent, with the baseline being everything else. target_data = xr.concat([data.isel(time=-1)], 'time') baseline_data = data.isel(time=slice(None, -1)) target_clear_mask = create_cfmask_clean_mask(target_data.cf_mask) if 'cf_mask' in target_data else create_bit_mask( target_data.pixel_qa, [1, 2]) baseline_clear_mask = create_cfmask_clean_mask( baseline_data.cf_mask) if 'cf_mask' in baseline_data else create_bit_mask(baseline_data.pixel_qa, [1, 2]) combined_baseline = task.get_processing_method()(baseline_data, clean_mask=baseline_clear_mask) target_data = create_mosaic(target_data, clean_mask=target_clear_mask) slip_data = compute_slip(combined_baseline, target_data, dem_data) target_data['slip'] = slip_data metadata = task.metadata_from_dataset( metadata, target_data, target_clear_mask, updated_params, time=data.time.values.astype('M8[ms]').tolist()[-1]) task.scenes_processed = F('scenes_processed') + 1 task.save() path = os.path.join(task.get_temp_path(), chunk_id + ".nc") clear_attrs(target_data) target_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def _apply_band_math(dataset): clear_mask = create_cfmask_clean_mask(dataset.cf_mask) if 'cf_mask' in dataset else create_bit_mask( dataset.pixel_qa, [1, 2]) # mask out water manually. Necessary for frac. cover. wofs = wofs_classify(dataset, clean_mask=clear_mask, mosaic=True) clear_mask[wofs.wofs.values == 1] = False return frac_coverage_classify(dataset, clean_mask=clear_mask)
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = TsmTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) water_analysis = None tsm_analysis = None combined_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = create_cfmask_clean_mask( data.cf_mask) if 'cf_mask' in data else create_bit_mask( data.pixel_qa, [1, 2]) wofs_data = task.get_processing_method()(data, clean_mask=clear_mask, enforce_float64=True) water_analysis = perform_timeseries_analysis( wofs_data, 'wofs', intermediate_product=water_analysis) clear_mask[(data.swir2.values > 100) | (wofs_data.wofs.values == 0)] = False tsm_data = tsm(data, clean_mask=clear_mask, no_data=-9999) tsm_analysis = perform_timeseries_analysis( tsm_data, 'tsm', intermediate_product=tsm_analysis, no_data=-9999) combined_data = tsm_analysis combined_data['wofs'] = water_analysis.total_data combined_data['wofs_total_clean'] = water_analysis.total_clean metadata = task.metadata_from_dataset(metadata, tsm_data, clear_mask, updated_params) if task.animated_product.animation_id != "none": path = os.path.join( task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) animated_data = tsm_data.isel( time=0, drop=True ) if task.animated_product.animation_id == "scene" else combined_data animated_data.to_netcdf(path) task.scenes_processed = F('scenes_processed') + 1 task.save() if combined_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") combined_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }