def csv_import(task_id): # pragma: no cover task = ImportTask.objects.get(pk=task_id) log = StringIO() task.task_id = csv_import.request.id task.log("Started import at %s" % timezone.now()) task.log("--------------------------------") task.save() try: with transaction.atomic(): model = class_from_string(task.model_class) records = model.import_csv(task, log) task.save() task.log(log.getvalue()) task.log("Import finished at %s" % timezone.now()) task.log("%d record(s) added." % len(records)) except Exception as e: import traceback traceback.print_exc(e) task.log("\nError: %s\n" % e) task.log(log.getvalue()) raise e return task
def download_album(user_id, community_id, album_id, task_id): logger.info(user_id, community_id, album_id, task_id) try: user = Users.objects.get(id=user_id) except (DoesNotExist, ValidationError): raise Exception('User does not exist') api = API(user.access_token, v=5.95) response = api.photos.get(owner_id=f'-{community_id}', album_id=album_id, count=50) items = response['items'] count = response['count'] folder = generate_uuid1() photo_album = PhotoAlbum(folder) photo_album.add(items) path = photo_album.make_archive() try: archive = open(path, 'rb') task = Tasks.objects.get(id=task_id) task.archive.put(archive, content_type='application/zip') task.save() finally: archive.close() return
def create_task_and_maps(task_parameters, include_heatmap=True): ''' task_parameters: 'basemap_ending_year': (int, "ending_year"), 'basemap_sample_size': (int, "sample_size"), 'basemap_starting_year': (int, "starting_year"), 'number_of_terms': int, 'ranking_algorithm': int, 'similarity_algorithm': int, 'filtering_algorithm': int, 'basemap_author': (str, 'author'), 'basemap_institution': (str, 'institution'), 'basemap_term_type': (int, 'term_type'), if include_heatmap then also pass the following: 'heatmap_starting_year': (int, "starting_year"), 'heatmap_ending_year': (int, "ending_year"), 'heatmap_sample_size': (int, "sample_size"), 'heatmap_author': (str, 'author'), 'heatmap_institution': (str, 'institution'), 'heatmap_term_type': (int, 'term_type'), ) ''' # set up new objects basemap = Basemap(finished=False, **filter_basemap_args(task_parameters)) basemap.save() if include_heatmap: heatmap = Heatmap(finished=False, **filter_heatmap_args(task_parameters)) heatmap.save() task = Task(basemap=basemap, heatmap=heatmap) else: task = Task(basemap=basemap) task.save() return task
def setup_task_ispaid(booking): global notpaid_manager event_date = booking.selection_set.all().first().datetime_init diff = (event_date - datetime.now()).days task_date = (datetime.now() + timedelta(hours=24)) if diff >= 1 else (event_date - timedelta(hours=1)) schedule, _ = CrontabSchedule.objects.get_or_create( minute=task_date.minute, hour=task_date.hour, day_of_week=get_cron_weekday(task_date.strftime("%A")), day_of_month=task_date.day, month_of_year=task_date.month, timezone=pytz.timezone('Europe/Madrid')) task = PeriodicTask.objects.create( crontab=schedule, name="Check after 24h if " + str(booking.id) + " is_paid", task='trainWellApp.tasks.booking_notpaid', args=json.dumps([booking.id]), ) notpaid_manager[booking.id] = task.id task.kwargs = json.dumps({'id': task.id}) task.save()
def process_band_math(self, chunk, task_id=None, num_scn_per_chk=None): """Apply some band math to a chunk and return the args Opens the chunk dataset and applys some band math defined by _apply_band_math(dataset) _apply_band_math creates some product using the bands already present in the dataset and returns the dataarray. The data array is then appended under 'band_math', then saves the result to disk in the same path as the nc file already exists. Args: chunk: The return from the recombine_time_chunks function - path, metadata, and {chunk ids} num_scn_per_chk: The number of scenes per chunk. Used to determine task progress. """ task = FractionalCoverTask.objects.get(pk=task_id) if check_cancel_task(self, task): return def _apply_band_math(dataset): clear_mask = task.satellite.get_clean_mask_func()(dataset) # mask out water manually. Necessary for frac. cover. wofs = wofs_classify(dataset, clean_mask=clear_mask, mosaic=True) clear_mask[wofs.wofs.values == 1] = False return frac_coverage_classify(dataset, clean_mask=clear_mask, no_data=task.satellite.no_data_value) if chunk is None: return None dataset = xr.open_dataset(chunk[0]).load() dataset = xr.merge([dataset, _apply_band_math(dataset)]) #remove previous nc and write band math to disk os.remove(chunk[0]) dataset.to_netcdf(chunk[0]) task.scenes_processed = F('scenes_processed') + num_scn_per_chk task.save(update_fields=['scenes_processed']) return chunk
def start_chunk_processing(self, chunk_details, task_id=None): """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks. The most efficient way to do this is to create a group of time chunks for each geographic chunk, recombine over the time index, then combine geographic last. If we create an animation, this needs to be reversed - e.g. group of geographic for each time, recombine over geographic, then recombine time last. The full processing pipeline is completed, then the create_output_products task is triggered, completing the task. """ if chunk_details is None: return None parameters = chunk_details.get('parameters') geographic_chunks = chunk_details.get('geographic_chunks') time_chunks = chunk_details.get('time_chunks') task = FractionalCoverTask.objects.get(pk=task_id) # Get an estimate of the amount of work to be done: the number of scenes # to process, also considering intermediate chunks to be combined. num_scenes = len(geographic_chunks) * sum([len(time_chunk) for time_chunk in time_chunks]) # recombine_time_chunks() and process_band_math() scenes: # num_scn_per_chk * len(time_chunks) * len(geographic_chunks) num_scn_per_chk = round(num_scenes / (len(time_chunks) * len(geographic_chunks))) # recombine_geographic_chunks() and create_output_products() scenes: # num_scn_per_chk_geo * len(geographic_chunks) num_scn_per_chk_geo = round(num_scenes / len(geographic_chunks)) # Scene processing progress is tracked in: processing_task(), recombine_time_chunks(), # and process_band_math(). Scenes in process_band_math() are counted twice # for the sake of tracking progress because it takes so long to run. So 1 + 1 + 2 = 4. task.total_scenes = 4 * num_scenes task.scenes_processed = 0 task.save(update_fields=['total_scenes', 'scenes_processed']) if check_cancel_task(self, task): return task.update_status("WAIT", "Starting processing.") logger.info("START_CHUNK_PROCESSING") processing_pipeline = (group([ group([ processing_task.s( task_id=task_id, geo_chunk_id=geo_index, time_chunk_id=time_index, geographic_chunk=geographic_chunk, time_chunk=time_chunk, **parameters) for time_index, time_chunk in enumerate(time_chunks) ]) | recombine_time_chunks.s(task_id=task_id, num_scn_per_chk=num_scn_per_chk) | process_band_math.s(task_id=task_id, num_scn_per_chk=2*num_scn_per_chk_geo) for geo_index, geographic_chunk in enumerate(geographic_chunks) ]) | recombine_geographic_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id) | task_clean_up.si(task_id=task_id, task_model='FractionalCoverTask')).apply_async() return True
def csv_import(task_id): #pragma: no cover from django.db import transaction # there is a possible race condition between this task starting # so we have a bit of loop here to fetch the task tries = 0 task = None while tries < 5 and not task: try: task = ImportTask.objects.get(pk=task_id) except Exception as e: # this object just doesn't exist yet, sleep a bit then try again tries+=1 if tries >= 5: raise e else: sleep(1) transaction.enter_transaction_management() transaction.managed() log = StringIO.StringIO() try: task.task_id = csv_import.request.id task.log("Started import at %s" % timezone.now()) task.log("--------------------------------") task.save() transaction.commit() model = class_from_string(task.model_class) records = model.import_csv(task, log) task.log(log.getvalue()) task.log("Import finished at %s" % timezone.now()) task.log("%d record(s) added." % len(records)) transaction.commit() except Exception as e: transaction.rollback() import traceback traceback.print_exc(e) task.log("\nError: %s\n" % e) task.log(log.getvalue()) transaction.commit() raise e finally: transaction.leave_transaction_management() return task
def recombine_time_chunks(self, chunks, task_id=None, num_scn_per_chk=None): """Recombine processed chunks over the time index. Open time chunked processed datasets and recombine them using the same function that was used to process them. This assumes an iterative algorithm - if it is not, then it will simply return the data again. Args: chunks: list of the return from the processing_task function - path, metadata, and {chunk ids} num_scn_per_chk: The number of scenes per chunk. Used to determine task progress. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ task = FractionalCoverTask.objects.get(pk=task_id) if check_cancel_task(self, task): return #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2.. chunks = chunks if isinstance(chunks, list) else [chunks] chunks = [chunk for chunk in chunks if chunk is not None] if len(chunks) == 0: return None total_chunks = sorted(chunks, key=lambda x: x[0]) geo_chunk_id = total_chunks[0][2]['geo_chunk_id'] time_chunk_id = total_chunks[0][2]['time_chunk_id'] metadata = {} combined_data = None for index, chunk in enumerate(total_chunks): metadata.update(chunk[1]) data = xr.open_dataset(chunk[0]) if combined_data is None: combined_data = data task.scenes_processed = F('scenes_processed') + num_scn_per_chk task.save(update_fields=['scenes_processed']) continue #give time an indice to keep mosaicking from breaking. data = xr.concat([data], 'time') data['time'] = [0] clear_mask = task.satellite.get_clean_mask_func()(data) combined_data = task.get_processing_method()(data, clean_mask=clear_mask, intermediate_product=combined_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) if check_cancel_task(self, task): return task.scenes_processed = F('scenes_processed') + num_scn_per_chk task.save(update_fields=['scenes_processed']) if combined_data is None: return None path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id)) export_xarray_to_netcdf(combined_data, path) logger.info("Done combining time chunks for geo: " + str(geo_chunk_id)) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def start_chunk_processing(self, chunk_details, task_id=None): """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks. The most efficient way to do this is to create a group of time chunks for each geographic chunk, recombine over the time index, then combine geographic last. If we create an animation, this needs to be reversed - e.g. group of geographic for each time, recombine over geographic, then recombine time last. The full processing pipeline is completed, then the create_output_products task is triggered, completing the task. """ if chunk_details is None: return None parameters = chunk_details.get('parameters') geographic_chunks = chunk_details.get('geographic_chunks') time_chunks = chunk_details.get('time_chunks') task = SpectralIndicesTask.objects.get(pk=task_id) # Track task progress. num_scenes = len(geographic_chunks) * sum( [len(time_chunk) for time_chunk in time_chunks]) # Scene processing progress is tracked in processing_task(). task.total_scenes = num_scenes task.scenes_processed = 0 task.save(update_fields=['total_scenes', 'scenes_processed']) if check_cancel_task(self, task): return task.update_status("WAIT", "Starting processing.") logger.info("START_CHUNK_PROCESSING") processing_pipeline = ( group([ group([ processing_task.s(task_id=task_id, geo_chunk_id=geo_index, time_chunk_id=time_index, geographic_chunk=geographic_chunk, time_chunk=time_chunk, **parameters) for time_index, time_chunk in enumerate(time_chunks) ]) | recombine_time_chunks.s(task_id=task_id) | process_band_math.s(task_id=task_id) for geo_index, geographic_chunk in enumerate(geographic_chunks) ]) | recombine_geographic_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id) | task_clean_up.si(task_id=task_id, task_model='SpectralIndicesTask')).apply_async() return True
def recombine_geographic_chunks(self, chunks, task_id=None, num_scn_per_chk=None): """Recombine processed data over the geographic indices For each geographic chunk process spawned by the main task, open the resulting dataset and combine it into a single dataset. Combine metadata as well, writing to disk. Args: chunks: list of the return from the processing_task function - path, metadata, and {chunk ids} num_scn_per_chk: The number of scenes per chunk. Used to determine task progress. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ task = TsmTask.objects.get(pk=task_id) if check_cancel_task(self, task): return total_chunks = [chunks] if not isinstance(chunks, list) else chunks total_chunks = [chunk for chunk in total_chunks if chunk is not None] if len(total_chunks) == 0: return None geo_chunk_id = total_chunks[0][2]['geo_chunk_id'] time_chunk_id = total_chunks[0][2]['time_chunk_id'] metadata = {} chunk_data = [] for index, chunk in enumerate(total_chunks): metadata = task.combine_metadata(metadata, chunk[1]) chunk_data.append(xr.open_dataset(chunk[0])) task.scenes_processed = F('scenes_processed') + num_scn_per_chk task.save(update_fields=['scenes_processed']) combined_data = combine_geographic_chunks(chunk_data) if task.animated_product.animation_id != "none": base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id for index in range((task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1)): animated_data = [] for chunk in total_chunks: geo_chunk_index = chunk[2]['geo_chunk_id'] # if we're animating, combine it all and save to disk. path = os.path.join(task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_index), str(base_index + index))) if os.path.exists(path): animated_data.append(xr.open_dataset(path)) path = os.path.join(task.get_temp_path(), "animation_{}.nc".format(base_index + index)) if len(animated_data) > 0: combine_geographic_chunks(animated_data).to_netcdf(path) path = os.path.join(task.get_temp_path(), "recombined_geo_{}.nc".format(time_chunk_id)) combined_data.to_netcdf(path) logger.info("Done combining geographic chunks for time: " + str(time_chunk_id)) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def generateOutput(output_task_id): task = OutputTask.objects.get(id=output_task_id) search_data = cPickle.loads(task.search_form.encode('utf-8')) experiments = find(search_data) return_data = search_data['out']['data'] if return_data == 'I': pass elif return_data == 'P': pass elif return_data == 'S': pass elif return_data == 'A': pass else: task.is_finished = True task.save() return # TODO -- write the results of the search # create the temporary directory to store the script results tmp_dir = tempfile.mkdtemp() out_dir = os.path.join(os.path.dirname(__file__), 'site_media', 'tasks') # TODO -- run the script script_name = 'test.sh' script = os.path.join(settings.TASK_SCRIPT_DIR, script_name) subprocess.call(script, shell=True, cwd=tmp_dir) # collect all files and zip them result_files = os.listdir(tmp_dir) tmp_tar = tempfile.NamedTemporaryFile(delete=False) t = tarfile.open(fileobj=tmp_tar, mode='w:bz2') for f in result_files: filename = os.path.join(tmp_dir, f) t.add(filename, arcname=f) t.close() tmp_tar.close() # move the results to a django-accessible directory shutil.move(tmp_tar.name, task.output_abs_path()) # delete all files (and temp_dir) shutil.rmtree(tmp_dir) # mark the task as finished task.is_finished = True task.save()
def create_task_with_existing_basemap(basemap_id, heatmap_task_parameters): """heatmap_task_parameters: 'heatmap_starting_year': (int, "starting_year"), 'heatmap_ending_year': (int, "ending_year"), 'heatmap_sample_size': (int, "sample_size"), 'heatmap_author': (str, 'author'), 'heatmap_conference': (str, 'conference'), 'heatmap_journal': (str, 'journal'), 'heatmap_term_type': (int, 'term_type'), """ basemap = Basemap.objects.get(id=basemap_id) heatmap = Heatmap(finished=False, **filter_heatmap_args(heatmap_task_parameters)) heatmap.save() task = Task(basemap=basemap, heatmap=heatmap) task.save() return task
def start_chunk_processing(self, chunk_details, task_id=None): """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks. The most efficient way to do this is to create a group of time chunks for each geographic chunk, recombine over the time index, then combine geographic last. If we create an animation, this needs to be reversed - e.g. group of geographic for each time, recombine over geographic, then recombine time last. The full processing pipeline is completed, then the create_output_products task is triggered, completing the task. """ if chunk_details is None: return None parameters = chunk_details.get('parameters') geographic_chunks = chunk_details.get('geographic_chunks') time_chunks = chunk_details.get('time_chunks') task = CoastalChangeTask.objects.get(pk=task_id) # This calculation does not account for time chunking because this app # does not support time chunking. num_times_fst_lst_yrs = len(time_chunks[0][0]) + len(time_chunks[0][1]) task.total_scenes = len(geographic_chunks) * len( time_chunks) * num_times_fst_lst_yrs task.scenes_processed = 0 task.save() if check_cancel_task(self, task): return task.update_status("WAIT", "Starting processing.") logger.info("START_CHUNK_PROCESSING") processing_pipeline = (group([ group([ processing_task.s( task_id=task_id, geo_chunk_id=geo_index, time_chunk_id=time_index, geographic_chunk=geographic_chunk, time_chunk=time_chunk, **parameters) for geo_index, geographic_chunk in enumerate(geographic_chunks) ]) | recombine_geographic_chunks.s(task_id=task_id) for time_index, time_chunk in enumerate(time_chunks) ]) | recombine_time_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id)\ | task_clean_up.si(task_id=task_id, task_model='CoastalChangeTask')).apply_async() return True
def delay_task(*args, **kwargs): """ Run task at time args = PeriodicTask.id """ try: task_id = args[0] print 'task id = %s' % task_id task = PeriodicTask.objects.get(task='w3af_webui.tasks.delay_task', name='delay_%s' % task_id, ) task.interval = None task.enabled = False task.save() scan_create_start(task_id) except Exception, e: logger.error("delay task exception %s" % e) raise Exception, e
def delay_task(*args, **kwargs): """ Run task at time args = PeriodicTask.id """ try: task_id = args[0] print 'task id = %s' % task_id task = PeriodicTask.objects.get( task='w3af_webui.tasks.delay_task', name='delay_%s' % task_id, ) task.interval = None task.enabled = False task.save() scan_create_start(task_id) except Exception, e: logger.error("delay task exception %s" % e) raise Exception, e
def csv_import(task): #pragma: no cover from django.db import transaction transaction.enter_transaction_management() transaction.managed() log = StringIO.StringIO() try: task.task_id = csv_import.request.id task.log("Started import at %s" % datetime.now()) task.log("--------------------------------") task.save() transaction.commit() model = class_from_string(task.model_class) records = model.import_csv(task.csv_file.file, task.created_by, log) task.log(log.getvalue()) task.log("Import finished at %s" % datetime.now()) task.log("%d record(s) added." % len(records)) transaction.commit() except Exception as e: transaction.rollback() import traceback traceback.print_exc(e) task.log("\nError: %s\n" % e) task.log(log.getvalue()) transaction.commit() raise e finally: transaction.leave_transaction_management() return task
def monthly_task(*args, **kwargs): try: task_id = args[0] task = PeriodicTask.objects.get(task='w3af_webui.tasks.monthly_task', name=task_id, ) now = datetime.now() next_time = now + relativedelta(months=+1) delta = next_time - now interval = IntervalSchedule.from_schedule(schedule(delta)) interval.save() logger.info('set interval %s for celery task %s' % ( interval, task.name, )) task.interval = interval task.save() scan_create_start(task_id) except Exception, e: logger.error("monthly task exception %s" % e) raise Exception, e
def monthly_task(*args, **kwargs): try: task_id = args[0] task = PeriodicTask.objects.get( task='w3af_webui.tasks.monthly_task', name=task_id, ) now = datetime.now() next_time = now + relativedelta(months=+1) delta = next_time - now interval = IntervalSchedule.from_schedule(schedule(delta)) interval.save() logger.info('set interval %s for celery task %s' % ( interval, task.name, )) task.interval = interval task.save() scan_create_start(task_id) except Exception, e: logger.error("monthly task exception %s" % e) raise Exception, e
def expire_tasks(): """Find any tasks that are past their expiration date and unassign them. We currently run this once per day (at 7 AM server time). """ from teams.models import Task expired_tasks = Task.objects.incomplete().filter( expiration_date__isnull=False, expiration_date__lt=datetime.now(), ) for task in expired_tasks: task.assignee = task.expiration_date = None # run each inside a try/except so that one # rotten apple doesn't make a huge mess try: task.save() except Exception as e: logger.error('Error on expiring tasks', extra={ 'task': task, 'exception': e, })
def save_failed_task(self, exc, task_id, args, kwargs, traceback): """ :type exc: Exception """ task = FailedTask() task.celery_task_id = task_id task.full_name = self.name task.name = self.name.split('.')[-1] task.exception_class = exc.__class__.__name__ task.exception_msg = unicode(exc).strip() task.traceback = unicode(traceback).strip() task.updated_at = timezone.now() if args: task.args = json.dumps(list(args)) if kwargs: task.kwargs = json.dumps(kwargs) # Find if task with same args, name and exception already exists # If it do, update failures count and last updated_at #: :type: FailedTask existing_task = FailedTask.objects.filter( args=task.args, kwargs=task.kwargs, full_name=task.full_name, exception_class=task.exception_class, exception_msg=task.exception_msg, ) if len(existing_task): existing_task = existing_task[0] existing_task.failures += 1 existing_task.updated_at = task.updated_at existing_task.save(force_update=True, update_fields=('updated_at', 'failures')) else: task.save(force_insert=True)
def setup_task_event_done(booking): global events_done_manager event_date = booking.selection_set.all().last() task_date = event_date.datetime_init + timedelta(hours=1) schedule, _ = CrontabSchedule.objects.get_or_create( minute=task_date.minute, hour=task_date.hour, day_of_week=get_cron_weekday(task_date.strftime("%A")), day_of_month=task_date.day, month_of_year=task_date.month, timezone=pytz.timezone('Europe/Madrid')) task = PeriodicTask.objects.create( crontab=schedule, name="Booking " + str(booking.id) + " happened", task='trainWellApp.tasks.event_done', args=json.dumps([booking.id]), ) events_done_manager[booking.id] = task.id task.kwargs = json.dumps({'id': task.id}) task.save()
def setup_task_invoice(invoice): global invoices_manager curr_year = datetime.now().year task_date = datetime.now().replace(year=curr_year + 2) # By law 2 years. schedule, _ = CrontabSchedule.objects.get_or_create( minute=task_date.minute, hour=task_date.hour, day_of_week=get_cron_weekday(task_date.strftime("%A")), day_of_month=task_date.day, month_of_year=task_date.month, timezone=pytz.timezone('Europe/Madrid')) task = PeriodicTask.objects.create( crontab=schedule, name="Invoice " + str(invoice.id) + " deleted", task='trainWellApp.tasks.invoice_timeout', args=json.dumps([invoice.id]), ) invoices_manager[invoice.id] = task.id task.kwargs = json.dumps({'id': task.id}) task.save()
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = SpectralIndicesTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) iteration_data = None for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_dataset_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None: logger.info("Empty chunk.") continue if 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=iteration_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) if check_cancel_task(self, task): return task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) if iteration_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(iteration_data, path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = NdviAnomalyTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) base_scene_time_range = parameters['time'] dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) # Generate the baseline data - one time slice at a time full_dataset = [] for time_index, time in enumerate(time_chunk): updated_params.update({'time': _get_datetime_range_containing(time)}) data = dc.get_dataset_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None or 'time' not in data: logger.info("Invalid chunk.") continue full_dataset.append(data.copy(deep=True)) # load selected scene and mosaic just in case we got two scenes (handles scene boundaries/overlapping data) updated_params.update({'time': base_scene_time_range}) selected_scene = dc.get_dataset_by_extent(**updated_params) if check_cancel_task(self, task): return if len(full_dataset) == 0 or 'time' not in selected_scene: return None #concat individual slices over time, compute metadata + mosaic baseline_data = xr.concat(full_dataset, 'time') baseline_clear_mask = task.satellite.get_clean_mask_func()(baseline_data) metadata = task.metadata_from_dataset(metadata, baseline_data, baseline_clear_mask, parameters) selected_scene_clear_mask = task.satellite.get_clean_mask_func()( selected_scene) metadata = task.metadata_from_dataset(metadata, selected_scene, selected_scene_clear_mask, parameters) selected_scene = task.get_processing_method()( selected_scene, clean_mask=selected_scene_clear_mask, intermediate_product=None, no_data=task.satellite.no_data_value) # we need to re generate the clear mask using the mosaic now. selected_scene_clear_mask = task.satellite.get_clean_mask_func()( selected_scene) if check_cancel_task(self, task): return ndvi_products = compute_ndvi_anomaly( baseline_data, selected_scene, baseline_clear_mask=baseline_clear_mask, selected_scene_clear_mask=selected_scene_clear_mask, no_data=task.satellite.no_data_value) full_product = xr.merge([ndvi_products, selected_scene]) task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) path = os.path.join(task.get_temp_path(), chunk_id + ".nc") full_product.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = CustomMosaicToolTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) iteration_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=iteration_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) if check_cancel_task(self, task): return if task.animated_product.animation_id != "none": path = os.path.join( task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) if task.animated_product.animation_id == "scene": #need to clear out all the metadata.. clear_attrs(data) #can't reindex on time - weird? export_xarray_to_netcdf(data.isel(time=0).drop('time'), path) elif task.animated_product.animation_id == "cumulative": export_xarray_to_netcdf(iteration_data, path) task.scenes_processed = F('scenes_processed') + 1 # Avoid overwriting the task's status if it is cancelled. task.save(update_fields=['scenes_processed']) if iteration_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(iteration_data, path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def faq_csv_import(org_id, task_id): # pragma: no cover task = ImportTask.objects.get(id=task_id) org = Org.objects.get(id=org_id) task.task_id = faq_csv_import.request.id task.log("Started import at %s" % timezone.now()) task.log("--------------------------------") task.save() try: with transaction.atomic() and open( task.csv_file.path ) as csv_file: # transaction prevents partial csv import # Load csv into Dict records = csv.DictReader(csv_file) lines = 0 for line in records: lines += 1 # Get or create parent Language object parent_lang = line["Parent Language"] # Get label objects labels = get_labels(task, org, line["Labels"]) # Create parent FAQ parent_faq = FAQ.create(org, line["Parent Question"], line["Parent Answer"], parent_lang, None, labels) # Start creation of translation FAQs # get a list of the csv keys keys = list(line) # remove non-translation keys parent_keys = [ "Parent Question", "Parent Language", "Parent Answer", "Parent ID", "Labels" ] [keys.remove(parent_key) for parent_key in parent_keys] # get a set of unique translation language codes lang_codes = set() for key in keys: lang_code, name = key.split(" ") lang_codes.add(lang_code) # Loop through for each translation for lang_code in lang_codes: # Create translation FAQ FAQ.create( org, line["%s Question" % lang_code], line["%s Answer" % lang_code], lang_code, parent_faq, labels, ) task.save() task.log("Import finished at %s" % timezone.now()) task.log("%d FAQ(s) added." % lines) except Exception as e: if not settings.TESTING: traceback.print_exc(e) task.log("\nError: %s\n" % e) raise e return task
def perform_sync_task(task): """ Run sync_task TODO: move to SyncTask object """ logger.info('start with sync of server %s' % task.server.name) data_set = task.data_set if task.server.is_local_server: path_and_parameters = task.server.url.split('?') path = path_and_parameters[0] regex = r'^' + path + r'/?$' external_server = ServerMapping.objects.get( relative_path__regex=regex, ).external_server if len(path_and_parameters) > 1: parameters = path_and_parameters[1] service_url = external_server + '?' + parameters else: service_url = external_server else: service_url = task.server.url password = task.server.password username = task.server.username wms = WebMapService( service_url, version='1.1.1', password=password, username=username, ) if task.tag: tag, new = task.tag, False else: tag, new = Tag.objects.get_or_create( slug='server_%s' % task.server.name) # layers = Layer.objects.filter(server=task.server) layers = Layer.objects.filter(source_ident=task.source_ident) layer_dict = dict(layers.values_list('layers', 'id')) #update server info task.server.title = wms.identification.title task.server.abstract = wms.identification.abstract task.server.save() new = 0 new_names = [] removed = 0 removed_names = [] updated = 0 for wmslayer in wms.contents: if wmslayer in layer_dict: layer = layers.get(pk=layer_dict[wmslayer]) del layer_dict[wmslayer] updated += 1 else: layer = Layer() layer.server = task.server layer.layers = wmslayer layer.name = wms[wmslayer].title layer.slug = slugify(layer.name) layer.save() layer.tags.add(tag) new += 1 new_names.append(layer.name) layer.data_set = data_set layer.is_local_server = task.server.is_local_server layer.is_clickable = task.server.is_clickable if not layer.js_popup_class and task.server.js_popup_class: layer.js_popup_class = task.server.js_popup_class layer.valid = True if data_set: layer.owner_type = Layer.OWNER_TYPE_DATASET else: layer.owner_type = Layer.OWNER_TYPE_PUBLIC layer.source_ident = task.source_ident layer.save() #nog iets met styles? for name, id in layer_dict.items(): layer = layers.get(pk=id) layer.valid = False layer.save() removed += 1 removed_names.append(layer.name) logger.info('%i new layers: %s.' % (new, str(', '.join(new_names)))) logger.info('%i updated layers.' % (updated)) logger.info('%i removed layers: %s.' % ( removed, str(', '.join(removed_names)))) task.last_sync = datetime.datetime.now() task.last_result = '%i new, %i updated, %i removed' % ( new, updated, removed) task.save()
def processing_task(self, task_id=None, geo_chunk_id=None, geographic_chunk=None, num_scn_per_chk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude num_scn_per_chk: A dictionary of the number of scenes per chunk for the baseline and analysis extents. Used to determine task progress. parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = str(geo_chunk_id) task = SpectralAnomalyTask.objects.get(pk=task_id) if check_cancel_task(self, task): return if not os.path.exists(task.get_temp_path()): return None metadata = {} # For both the baseline and analysis time ranges for this # geographic chunk, load, calculate the spectral index, composite, # and filter the data according to user-supplied parameters - # recording where the data was out of the filter's range so we can # create the output product (an image). dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) spectral_index = task.query_type.result_id composites = {} composites_out_of_range = {} no_data_value = task.satellite.no_data_value for composite_name in ['baseline', 'analysis']: if check_cancel_task(self, task): return # Use the corresponding time range for the baseline and analysis data. updated_params['time'] = \ updated_params['baseline_time' if composite_name == 'baseline' else 'analysis_time'] time_column_data = dc.get_dataset_by_extent(**updated_params) # If this geographic chunk is outside the data extents, return None. if len(time_column_data.dims) == 0: return None # Obtain the clean mask for the satellite. time_column_clean_mask = task.satellite.get_clean_mask_func()( time_column_data) measurements_list = task.satellite.measurements.replace(" ", "").split(",") # Obtain the mask for valid Landsat values. time_column_invalid_mask = landsat_clean_mask_invalid(\ time_column_data, platform=task.satellite.platform, collection=task.satellite.collection, level=task.satellite.level).values # Also exclude data points with the no_data value. no_data_mask = time_column_data[ measurements_list[0]].values != no_data_value # Combine the clean masks. time_column_clean_mask = time_column_clean_mask | time_column_invalid_mask | no_data_mask # Obtain the composite. composite = task.get_processing_method()( time_column_data, clean_mask=time_column_clean_mask, no_data=task.satellite.no_data_value) # Obtain the mask for valid Landsat values. composite_invalid_mask = landsat_clean_mask_invalid(\ composite, platform=task.satellite.platform, collection=task.satellite.collection, level=task.satellite.level).values # Also exclude data points with the no_data value via the compositing mask. composite_no_data_mask = composite[ measurements_list[0]].values != no_data_value composite_clean_mask = composite_invalid_mask | composite_no_data_mask # Compute the spectral index for the composite. spec_ind_params = dict() if spectral_index == 'fractional_cover': spec_ind_params = dict(clean_mask=composite_clean_mask, no_data=no_data_value) spec_ind_result = spectral_indices_function_map[spectral_index]( composite, **spec_ind_params) if spectral_index in ['ndvi', 'ndbi', 'ndwi', 'evi']: composite[spectral_index] = spec_ind_result else: # Fractional Cover composite = xr.merge([composite, spec_ind_result]) # Fractional Cover is supposed to have a range of [0, 100], with its bands - # 'bs', 'pv', and 'npv' - summing to 100. However, the function we use # can have the sum of those bands as high as 106. # frac_cov_min, frac_cov_max = spectral_indices_range_map[spectral_index] frac_cov_min, frac_cov_max = 0, 106 for band in ['bs', 'pv', 'npv']: composite[band].values = \ np.interp(composite[band].values, (frac_cov_min, frac_cov_max), spectral_indices_range_map[spectral_index]) composites[composite_name] = composite # Determine where the composite is out of range. # We rename the resulting xarray.DataArray because calling to_netcdf() # on it at the end of this function will save it as a Dataset # with one data variable with the same name as the DataArray. if spectral_index in ['ndvi', 'ndbi', 'ndwi', 'evi']: composites_out_of_range[composite_name] = \ xr_or(composite[spectral_index] < task.composite_threshold_min, task.composite_threshold_max < composite[spectral_index]).rename(spectral_index) else: # Fractional Cover # For fractional cover, a composite pixel is out of range if any of its # fractional cover bands are out of range. composites_out_of_range[composite_name] = xr_or( xr_or( xr_or(composite['bs'] < task.composite_threshold_min, task.composite_threshold_max < composite['bs']), xr_or(composite['pv'] < task.composite_threshold_min, task.composite_threshold_max < composite['pv'])), xr_or(composite['npv'] < task.composite_threshold_min, task.composite_threshold_max < composite['npv'])).rename(spectral_index) # Update the metadata with the current data (baseline or analysis). metadata = task.metadata_from_dataset(metadata, time_column_data, time_column_clean_mask, parameters) # Record task progress (baseline or analysis composite data obtained). task.scenes_processed = F( 'scenes_processed') + num_scn_per_chk[composite_name] task.save(update_fields=['scenes_processed']) dc.close() if check_cancel_task(self, task): return # Create a difference composite. diff_composite = composites['analysis'] - composites['baseline'] # Find where either the baseline or analysis composite was out of range for a pixel. composite_out_of_range = xr_or(*composites_out_of_range.values()) # Find where either the baseline or analysis composite was no_data. if spectral_index in ['ndvi', 'ndbi', 'ndwi', 'evi']: composite_no_data = xr_or( composites['baseline'][measurements_list[0]] == no_data_value, composites['analysis'][measurements_list[0]] == no_data_value) if spectral_index == 'evi': # EVI returns no_data for values outside [-1,1]. composite_no_data = xr_or( composite_no_data, xr_or(composites['baseline'][spectral_index] == no_data_value, composites['analysis'][spectral_index] == no_data_value)) else: # Fractional Cover composite_no_data = xr_or( xr_or( xr_or(composites['baseline']['bs'] == no_data_value, composites['baseline']['pv'] == no_data_value), composites['baseline']['npv'] == no_data_value), xr_or( xr_or(composites['baseline']['bs'] == no_data_value, composites['baseline']['pv'] == no_data_value), composites['baseline']['npv'] == no_data_value)) composite_no_data = composite_no_data.rename(spectral_index) # Drop unneeded data variables. diff_composite = diff_composite.drop(measurements_list) if check_cancel_task(self, task): return composite_path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(diff_composite, composite_path) composite_out_of_range_path = os.path.join(task.get_temp_path(), chunk_id + "_out_of_range.nc") logger.info("composite_out_of_range:" + str(composite_out_of_range)) export_xarray_to_netcdf(composite_out_of_range, composite_out_of_range_path) composite_no_data_path = os.path.join(task.get_temp_path(), chunk_id + "_no_data.nc") export_xarray_to_netcdf(composite_no_data, composite_no_data_path) return composite_path, composite_out_of_range_path, composite_no_data_path, \ metadata, {'geo_chunk_id': geo_chunk_id}
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Computes a single SLIP baseline comparison - returns a slip mask and mosaic. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = SlipTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) time_range = _get_datetime_range_containing(time_chunk[0], time_chunk[-1]) dc = DataAccessApi(config=task.config_path) updated_params = {**parameters} updated_params.update(geographic_chunk) updated_params.update({'time': time_range}) data = dc.get_dataset_by_extent(**updated_params) #grab dem data as well dem_parameters = {**updated_params} dem_parameters.update({'product': 'terra_aster_gdm_' + task.area_id, 'platform': 'TERRA'}) dem_parameters.pop('time') dem_parameters.pop('measurements') dem_data = dc.get_dataset_by_extent(**dem_parameters) if 'time' not in data or 'time' not in dem_data: return None #target data is most recent, with the baseline being everything else. target_data = xr.concat([data.isel(time=-1)], 'time') baseline_data = data.isel(time=slice(None, -1)) target_clear_mask = task.satellite.get_clean_mask_func()(target_data) baseline_clear_mask = task.satellite.get_clean_mask_func()(baseline_data) combined_baseline = task.get_processing_method()(baseline_data, clean_mask=baseline_clear_mask, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) target_data = create_mosaic( target_data, clean_mask=target_clear_mask, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) slip_data = compute_slip(combined_baseline, target_data, dem_data, no_data=task.satellite.no_data_value) target_data['slip'] = slip_data metadata = task.metadata_from_dataset( metadata, target_data, target_clear_mask, updated_params, time=data.time.values.astype('M8[ms]').tolist()[-1]) task.scenes_processed = F('scenes_processed') + 1 task.save() path = os.path.join(task.get_temp_path(), chunk_id + ".nc") clear_attrs(target_data) target_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = CoastalChangeTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) starting_year = _get_datetime_range_containing(*time_chunk[0]) comparison_year = _get_datetime_range_containing(*time_chunk[1]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) def _compute_mosaic(time): updated_params.update({'time': time}) data = dc.get_dataset_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") return None, None clear_mask = task.satellite.get_clean_mask_func()(data) metadata = task.metadata_from_dataset({}, data, clear_mask, updated_params) return task.get_processing_method()( data, clean_mask=clear_mask, no_data=task.satellite.no_data_value), metadata old_mosaic, old_metadata = _compute_mosaic(starting_year) new_mosaic, new_metadata = _compute_mosaic(comparison_year) if old_mosaic is None or new_mosaic is None: return None metadata = {**old_metadata, **new_metadata} output_product = compute_coastal_change( old_mosaic, new_mosaic, no_data=task.satellite.no_data_value) task.scenes_processed = F('scenes_processed') + 1 task.save() path = os.path.join(task.get_temp_path(), chunk_id + ".nc") output_product.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = TsmTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) water_analysis = None tsm_analysis = None combined_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) wofs_data = task.get_processing_method()(data, clean_mask=clear_mask, enforce_float64=True, no_data=task.satellite.no_data_value) water_analysis = perform_timeseries_analysis( wofs_data, 'wofs', intermediate_product=water_analysis, no_data=task.satellite.no_data_value) clear_mask[(data.swir2.values > 100) | (wofs_data.wofs.values == 0)] = False tsm_data = tsm(data, clean_mask=clear_mask, no_data=task.satellite.no_data_value) tsm_analysis = perform_timeseries_analysis( tsm_data, 'tsm', intermediate_product=tsm_analysis, no_data=task.satellite.no_data_value) if check_cancel_task(self, task): return combined_data = tsm_analysis combined_data['wofs'] = water_analysis.total_data combined_data['wofs_total_clean'] = water_analysis.total_clean metadata = task.metadata_from_dataset(metadata, tsm_data, clear_mask, updated_params) if task.animated_product.animation_id != "none": path = os.path.join(task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) animated_data = tsm_data.isel( time=0, drop=True) if task.animated_product.animation_id == "scene" else combined_data animated_data.to_netcdf(path) task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) if combined_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") combined_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = FractionalCoverTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) iteration_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = create_cfmask_clean_mask( data.cf_mask) if 'cf_mask' in data else create_bit_mask( data.pixel_qa, [1, 2]) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=iteration_data) task.scenes_processed = F('scenes_processed') + 1 task.save() if iteration_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") iteration_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = AppNameTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) iteration_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) # TODO: If this is not a multisensory app replace get_stacked_datasets_by_extent with get_dataset_by_extent data = dc.get_stacked_datasets_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue # TODO: Replace anything here with your processing - do you need to create additional masks? Apply bandmaths? etc. clear_mask = create_cfmask_clean_mask( data.cf_mask) if 'cf_mask' in data else create_bit_mask( data.pixel_qa, [1, 2]) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) # TODO: Make sure you're producing everything required for your algorithm. iteration_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=iteration_data) # TODO: If there is no animation you can remove this block. Otherwise, save off the data that you need. if task.animated_product.animation_id != "none": path = os.path.join( task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) if task.animated_product.animation_id == "scene": #need to clear out all the metadata.. clear_attrs(data) #can't reindex on time - weird? data.isel(time=0).drop('time').to_netcdf(path) elif task.animated_product.animation_id == "cumulative": iteration_data.to_netcdf(path) task.scenes_processed = F('scenes_processed') + 1 task.save() if iteration_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") iteration_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def start_chunk_processing(self, chunk_details, task_id=None): """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks. The most efficient way to do this is to create a group of time chunks for each geographic chunk, recombine over the time index, then combine geographic last. If we create an animation, this needs to be reversed - e.g. group of geographic for each time, recombine over geographic, then recombine time last. The full processing pipeline is completed, then the create_output_products task is triggered, completing the task. """ if chunk_details is None: return None parameters = chunk_details.get('parameters') geographic_chunks = chunk_details.get('geographic_chunks') task = SpectralAnomalyTask.objects.get(pk=task_id) api = DataAccessApi(config=task.config_path) # Get an estimate of the amount of work to be done: the number of scenes # to process, also considering intermediate chunks to be combined. # Determine the number of scenes for the baseline and analysis extents. num_scenes = {} params_temp = parameters.copy() for composite_name in ['baseline', 'analysis']: num_scenes[composite_name] = 0 for geographic_chunk in geographic_chunks: params_temp.update(geographic_chunk) params_temp['measurements'] = [] # Use the corresponding time range for the baseline and analysis data. params_temp['time'] = \ params_temp['baseline_time' if composite_name == 'baseline' else 'analysis_time'] params_temp_clean = params_temp.copy() del params_temp_clean['baseline_time'], params_temp_clean['analysis_time'], \ params_temp_clean['composite_range'], params_temp_clean['change_range'] data = api.dc.load(**params_temp_clean) if 'time' in data.coords: num_scenes[composite_name] += len(data.time) # The number of scenes per geographic chunk for baseline and analysis extents. num_scn_per_chk_geo = { k: round(v / len(geographic_chunks)) for k, v in num_scenes.items() } # Scene processing progress is tracked in processing_task(). task.total_scenes = sum(num_scenes.values()) task.scenes_processed = 0 task.save(update_fields=['total_scenes', 'scenes_processed']) if check_cancel_task(self, task): return task.update_status("WAIT", "Starting processing.") processing_pipeline = (group([ processing_task.s( task_id=task_id, geo_chunk_id=geo_index, geographic_chunk=geographic_chunk, num_scn_per_chk=num_scn_per_chk_geo, **parameters) for geo_index, geographic_chunk in enumerate(geographic_chunks) ]) | recombine_geographic_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id) \ | task_clean_up.si(task_id=task_id, task_model='SpectralAnomalyTask')).apply_async() return True
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = WaterDetectionTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) water_analysis = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None: logger.info("Empty chunk.") continue if 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) # Ensure data variables have the range of Landsat 7 Collection 1 Level 2 # since the color scales are tailored for that dataset. platform = task.satellite.platform collection = task.satellite.collection level = task.satellite.level if (platform, collection) != ('LANDSAT_7', 'c1'): data = \ convert_range(data, from_platform=platform, from_collection=collection, from_level=level, to_platform='LANDSAT_7', to_collection='c1', to_level='l2') wofs_data = task.get_processing_method()( data, clean_mask=clear_mask, no_data=task.satellite.no_data_value) water_analysis = perform_timeseries_analysis( wofs_data, 'wofs', intermediate_product=water_analysis, no_data=task.satellite.no_data_value) metadata = task.metadata_from_dataset(metadata, wofs_data, clear_mask.data, updated_params) if task.animated_product.animation_id != "none": path = os.path.join( task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) animated_data = wofs_data.isel( time=0, drop=True ) if task.animated_product.animation_id == "scene" else water_analysis export_xarray_to_netcdf(animated_data, path) if check_cancel_task(self, task): return task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) if water_analysis is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(water_analysis, path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def csv_import(task_id): # pragma: no cover from django.db import transaction # there is a possible race condition between this task starting # so we have a bit of loop here to fetch the task tries = 0 task = None while tries < 5 and not task: try: task = ImportTask.objects.get(pk=task_id) except Exception as e: # this object just doesn't exist yet, sleep a bit then try again tries += 1 if tries >= 5: raise e else: sleep(1) log = StringIO() if StrictVersion(django.get_version()) < StrictVersion('1.6'): transaction.enter_transaction_management() transaction.managed() try: task.task_id = csv_import.request.id task.log("Started import at %s" % timezone.now()) task.log("--------------------------------") task.save() transaction.commit() model = class_from_string(task.model_class) records = model.import_csv(task, log) task.save() task.log(log.getvalue()) task.log("Import finished at %s" % timezone.now()) task.log("%d record(s) added." % len(records)) transaction.commit() except Exception as e: transaction.rollback() import traceback traceback.print_exc(e) task.log("\nError: %s\n" % e) task.log(log.getvalue()) transaction.commit() raise e finally: transaction.leave_transaction_management() else: task.task_id = csv_import.request.id task.log("Started import at %s" % timezone.now()) task.log("--------------------------------") task.save() try: with transaction.atomic(): model = class_from_string(task.model_class) records = model.import_csv(task, log) task.save() task.log(log.getvalue()) task.log("Import finished at %s" % timezone.now()) task.log("%d record(s) added." % len(records)) except Exception as e: import traceback traceback.print_exc(e) task.log("\nError: %s\n" % e) task.log(log.getvalue()) raise e return task