def load_process_save_chunk(output_files: OutputDriver, chunk: Tuple[slice, slice, slice], task: StatsTask, timer: MultiTimer): try: with timer.time('loading_data'): geom = geometry_for_task(task) data = load_data(chunk, task.sources, geom=geom) last_idx = len(task.output_products) - 1 for idx, (prod_name, stat) in enumerate(task.output_products.items()): _LOG.debug( "Computing %s in tile %s %s; %s", prod_name, task.spatial_id, "({})".format(", ".join(prettier_slice(c) for c in chunk)), timer) measurements = stat.data_measurements with timer.time(prod_name): result = stat.compute(data) if idx == last_idx: # make sure input data is released early del data # restore nodata values back result = cast_back(result, measurements) # For each of the data variables, shove this chunk into the output results with timer.time('writing_data'): output_files.write_chunk(prod_name, chunk, result) except EmptyChunkException: _LOG.debug( 'Error: No data returned while loading %s for %s. May have all been masked', chunk, task)
def main(index, stats_config_file, qsub, runner, save_tasks, load_tasks, tile_index, tile_index_file, output_location, year, task_slice, batch): try: _log_setup() if qsub is not None and batch is not None: for i in range(batch): child = qsub.clone() child.reset_internal_args() child.add_internal_args('--task-slice', '{}::{}'.format(i, batch)) click.echo(repr(child)) exit_code, _ = child(auto=True, auto_clean=[('--batch', 1)]) if exit_code != 0: return exit_code return 0 elif qsub is not None: # TODO: verify config before calling qsub submit click.echo(repr(qsub)) exit_code, _ = qsub(auto=True) return exit_code timer = MultiTimer().start('main') config = normalize_config(read_config(stats_config_file), tile_index, tile_index_file, year, output_location) app = StatsApp(config, index) app.log_config() if save_tasks is not None: app.save_tasks_to_file(save_tasks, index) failed = 0 else: if load_tasks is not None: tasks = unpickle_stream(load_tasks) else: tasks = app.generate_tasks(index) successful, failed = app.run_tasks(tasks, runner, task_slice) timer.pause('main') _LOG.info('Stats processing completed in %s seconds.', timer.run_times['main']) if failed > 0: raise click.ClickException( '%s of %s tasks not completed successfully.' % (failed, successful + failed)) except Exception as e: _LOG.error(e) sys.exit(1) return 0
def execute_task(task: StatsTask, output_driver, chunking) -> StatsTask: """ Load data, run the statistical operations and write results out to the filesystem. :param datacube_stats.models.StatsTask task: :type output_driver: OutputDriver :param chunking: dict of dimension sizes to chunk the computation by """ timer = MultiTimer().start('total') process_chunk = load_process_save_chunk_iteratively if task.is_iterative else load_process_save_chunk try: with output_driver(task=task) as output_files: # currently for polygons process will load entirely if len(chunking) == 0: chunking = { 'x': task.sample_tile.shape[2], 'y': task.sample_tile.shape[1] } for sub_tile_slice in tile_iter(task.sample_tile, chunking): process_chunk(output_files, sub_tile_slice, task, timer) except OutputFileAlreadyExists as e: _LOG.warning(str(e)) except OutputDriverResult as e: # was run interactively # re-raise result to be caught again by StatsApp.execute_task raise e except Exception as e: _LOG.error("Error processing task: %s", task) raise StatsProcessingException("Error processing task: %s" % task) timer.pause('total') _LOG.debug('Completed %s %s task with %s data sources; %s', task.spatial_id, [d.strftime('%Y-%m-%d') for d in task.time_period], task.data_sources_length(), timer) return task
def load_process_save_chunk_iteratively(output_files: OutputDriver, chunk: Tuple[slice, slice, slice], task: StatsTask, timer: MultiTimer): procs = [(stat.make_iterative_proc(), name, stat) for name, stat in task.output_products.items()] def update(ds): for proc, name, _ in procs: with timer.time(name): proc(ds) def save(name, ds): for var_name, var in ds.data_vars.items(): output_files.write_data(name, var_name, chunk, var.values) geom = geometry_for_task(task) for ds in load_data_lazy(chunk, task.sources, geom=geom, timer=timer): update(ds) with timer.time('writing_data'): for proc, name, stat in procs: save(name, cast_back(proc(), stat.data_measurements))