def check_existing(input_filename: str): config, tasks = task_app.load_tasks(input_filename) _LOG.info('Checking for existing output files.') # tile_index is X, Y, T task_app.check_existing_files( _get_filename(config, *task['tile_index']) for task in tasks)
def ingest_cmd(driver_manager, config_file, year, queue_size, save_tasks, load_tasks, dry_run, executor): index = driver_manager.index if config_file: config = load_config_from_file(index, config_file) variable_params = get_variable_params(config) source_type, output_type = make_output_type(driver_manager, config) tasks = create_task_list(driver_manager, output_type, year, source_type, config) elif load_tasks: config, tasks = load_tasks_(load_tasks) source_type, output_type = make_output_type(driver_manager, config) else: click.echo('Must specify exactly one of --config-file, --load-tasks') return 1 if dry_run: check_existing_files( get_filename(config, task['tile_index'], task['tile'].sources) for task in tasks) return 0 if save_tasks: save_tasks_(config, tasks, save_tasks) return 0 successful, failed = process_tasks(driver_manager, config, source_type, output_type, tasks, queue_size, executor) click.echo('%d successful, %d failed' % (successful, failed)) return 0
def run(index, dry_run: bool, tag: str, task_desc_file: str, qsub: QSubLauncher, runner: TaskRunner, *args, **kwargs): _LOG.info('Starting Fractional Cover processing...') _LOG.info('Tag: %r', tag) task_desc = serialise.load_structure(Path(task_desc_file), TaskDescription) config, tasks = task_app.load_tasks(task_desc.runtime_state.task_serialisation_path) if dry_run: task_app.check_existing_files((task['filename'] for task in tasks)) return 0 task_func = partial(do_fc_task, config) process_func = partial(process_result, index) try: runner(task_desc, tasks, task_func, process_func) _LOG.info("Runner finished normally, triggering shutdown.") finally: runner.stop()
def ingest_cmd(index, config_file, year, queue_size, save_tasks, load_tasks, dry_run, allow_product_changes, executor): # pylint: disable=too-many-locals if config_file: config = load_config_from_file(config_file) elif load_tasks: config, tasks = load_tasks_(load_tasks) else: click.echo('Must specify exactly one of --config-file, --load-tasks') sys.exit(-1) try: # ignore the added filename key which is not part of the schema filename = config['filename'] del config['filename'] IngestorConfig.validate(config) config['filename'] = filename except InvalidDocException as e: exception, = e.args _LOG.error(exception.message) sys.exit(-1) if config_file: driver = get_driver_from_config(config) source_type, output_type = ensure_output_type( index, config, driver.format, allow_product_changes=allow_product_changes) tasks = create_task_list(index, output_type, year, source_type, config) elif load_tasks: driver = get_driver_from_config(config) source_type, output_type = ensure_output_type( index, config, driver.format, allow_product_changes=allow_product_changes) if dry_run: check_existing_files( get_filename(config, task['tile_index'], task['tile'].sources) for task in tasks) elif save_tasks: save_tasks_(config, tasks, save_tasks) else: successful, failed = process_tasks(index, config, source_type, output_type, tasks, queue_size, executor) click.echo('%d successful, %d failed' % (successful, failed)) sys.exit(failed)
def ingest_cmd(index, config_file, year, queue_size, save_tasks, load_tasks, dry_run, allow_product_changes, executor): # pylint: disable=too-many-locals try: if config_file: config, tasks = load_config_from_file(config_file), None elif load_tasks: config, tasks = load_tasks_(load_tasks) else: click.echo( 'Must specify exactly one of --config-file, --load-tasks') sys.exit(-1) except InvalidDocException as e: exception, = e.args _LOG.error(exception.message) sys.exit(-1) driver = get_driver_from_config(config) try: source_type, output_type = ensure_output_type( index, config, driver.format, allow_product_changes=allow_product_changes) except ValueError as e: _LOG.error(str(e)) sys.exit(-1) if tasks is None: tasks = create_task_list(index, output_type, year, source_type, config) if dry_run: check_existing_files( get_filename(config, task['tile_index'], task['tile'].sources) for task in tasks) elif save_tasks: save_tasks_(config, tasks, save_tasks) else: successful, failed = process_tasks(index, config, source_type, output_type, tasks, queue_size, executor) click.echo('%d successful, %d failed' % (successful, failed)) sys.exit(failed)
def ndvi_app(index, config, tasks, executor, dry_run, queue_size, *args, **kwargs): click.echo('Starting NDVI processing...') if dry_run: check_existing_files((task['filename'] for task in tasks)) return 0 results = [] task_queue = itertools.islice(tasks, queue_size) for task in task_queue: _LOG.info('Running task: %s', task['tile_index']) results.append(executor.submit(do_ndvi_task, config=config, task=task)) click.echo('Task queue filled, waiting for first result...') successful = failed = 0 while results: result, results = executor.next_completed(results, None) # submit a new task to replace the one we just finished task = next(tasks, None) if task: _LOG.info('Running task: %s', task['tile_index']) results.append( executor.submit(do_ndvi_task, config=config, task=task)) # Process the result try: datasets = executor.result(result) for dataset in datasets.values: index.datasets.add(dataset, skip_sources=True) _LOG.info('Dataset added') successful += 1 except Exception as err: # pylint: disable=broad-except _LOG.exception('Task failed: %s', err) failed += 1 continue finally: # Release the task to free memory so there is no leak in executor/scheduler/worker process executor.release(result) click.echo('%d successful, %d failed' % (successful, failed)) _LOG.info('Completed: %d successful, %d failed', successful, failed)
def run(index, dry_run: bool, task_desc_file: str, runner: TaskRunner, qsub): _LOG.info('Starting DEA Stacker processing...') task_desc = serialise.load_structure(Path(task_desc_file), TaskDescription) config, tasks = task_app.load_tasks( task_desc.runtime_state.task_serialisation_path) if dry_run: task_app.check_existing_files((task['filename'] for task in tasks)) return task_func = partial(stacker.do_stack_task, config) process_func = partial(stacker.process_result, index) try: runner(task_desc, tasks, task_func, process_func) _LOG.info("Runner finished normally, triggering shutdown.") finally: runner.stop()
def run(index, dry_run: bool, input_filename: str, runner: TaskRunner, skip_indexing: bool, **kwargs): config, tasks = task_app.load_tasks(input_filename) work_dir = Path(input_filename).parent # TODO: Get rid of this completely task_desc = TaskDescription( type_='fc', task_dt=datetime.utcnow().astimezone(timezone.utc), events_path=work_dir, logs_path=work_dir, jobs_path=work_dir, parameters=None, runtime_state=None, ) if dry_run: _LOG.info('Starting Fractional Cover Dry Run...') task_app.check_existing_files( (task['filename_dataset'] for task in tasks)) return 0 _LOG.info('Starting Fractional Cover processing...') task_func = partial(_do_fc_task, config) if skip_indexing: process_func = _skip_indexing_and_only_log else: process_func = partial(_index_datasets, index) try: runner(task_desc, tasks, task_func, process_func) _LOG.info("Runner finished normally, triggering shutdown.") except Exception as err: if "Error 104" in err: _LOG.info( "Processing completed and shutdown was initiated. Exception: %s", str(err)) else: _LOG.info("Exception during processing: %s", err) finally: runner.stop() return 0
def ndvi_app(index, config, tasks, executor, dry_run, queue_size, *args, **kwargs): click.echo('Starting NDVI processing...') if dry_run: check_existing_files((task['filename'] for task in tasks)) return 0 results = [] task_queue = itertools.islice(tasks, queue_size) for task in task_queue: _LOG.info('Running task: %s', task['tile_index']) results.append(executor.submit(do_ndvi_task, config=config, task=task)) click.echo('Task queue filled, waiting for first result...') successful = failed = 0 while results: result, results = executor.next_completed(results, None) # submit a new task to replace the one we just finished task = next(tasks, None) if task: _LOG.info('Running task: %s', task['tile_index']) results.append(executor.submit(do_ndvi_task, config=config, task=task)) # Process the result try: datasets = executor.result(result) for dataset in datasets.values: index.datasets.add(dataset, skip_sources=True) _LOG.info('Dataset added') successful += 1 except Exception as err: # pylint: disable=broad-except _LOG.exception('Task failed: %s', err) failed += 1 continue finally: # Release the task to free memory so there is no leak in executor/scheduler/worker process executor.release(result) click.echo('%d successful, %d failed' % (successful, failed)) _LOG.info('Completed: %d successful, %d failed', successful, failed)
def ingest_cmd(index, config_file, year, queue_size, save_tasks, load_tasks, dry_run, executor, allow_product_changes): # pylint: disable=too-many-locals if config_file: config = load_config_from_file(config_file) driver = get_driver_from_config(config) source_type, output_type = ensure_output_type( index, config, driver.format, allow_product_changes=allow_product_changes) tasks = create_task_list(index, output_type, year, source_type, config) elif load_tasks: config, tasks = load_tasks_(load_tasks) driver = get_driver_from_config(config) source_type, output_type = ensure_output_type( index, config, driver.format, allow_product_changes=allow_product_changes) else: click.echo('Must specify exactly one of --config-file, --load-tasks') sys.exit(-1) if dry_run: check_existing_files( get_filename(config, task['tile_index'], task['tile'].sources) for task in tasks) elif save_tasks: save_tasks_(config, tasks, save_tasks) else: successful, failed = process_tasks(index, config, source_type, output_type, tasks, queue_size, executor) click.echo('%d successful, %d failed' % (successful, failed)) sys.exit(failed)
def wofs_app(index, config, tasks, executor, dry_run, queue_size, print_output_product, skip_indexing, *args, **kwargs): if dry_run: check_existing_files((task['file_path'] for task in tasks)) return 0 else: if not skip_indexing: # Ensure output product is in index config['wofs_dataset_type'] = index.products.add( config['wofs_dataset_type']) # add is idempotent if print_output_product: click.echo(json.dumps(config['wofs_dataset_type'].definition, indent=4)) return 0 click.echo('Starting processing...') results = [] def submit_task(task): _LOG.info('Queuing task: %s', task['tile_index']) results.append(executor.submit(do_wofs_task, config=config, **task)) task_queue = itertools.islice(tasks, queue_size) for task in task_queue: submit_task(task) click.echo('Queue filled, waiting for first result...') successful = failed = 0 while results: result, results = executor.next_completed(results, None) # submit a new task to replace the one we just finished task = next(tasks, None) if task: submit_task(task) # Process the result try: datasets = executor.result(result) for dataset in datasets: if not skip_indexing: start = time.clock() index.datasets.add(dataset, sources_policy='skip') indexing_time = time.clock() - start _LOG.info('Dataset added to index in %fs: id=%s path=%s', indexing_time, dataset.id, dataset.local_path) else: _LOG.info('Dataset completed: id=%s path=%s', dataset.id, dataset.local_path) successful += 1 except Exception as err: # pylint: disable=broad-except _LOG.exception('Task failed: %s', err) failed += 1 continue finally: # Release the task to free memory so there is no leak in executor/scheduler/worker process executor.release(result) click.echo('%d successful, %d failed' % (successful, failed)) _LOG.info('Completed: %d successful, %d failed', successful, failed)