Esempio n. 1
0
def main(index, stats_config_file, qsub, runner, save_tasks, load_tasks,
         tile_index, tile_index_file, output_location, year, task_slice,
         batch):

    try:
        _log_setup()

        if qsub is not None and batch is not None:
            for i in range(batch):
                child = qsub.clone()
                child.reset_internal_args()
                child.add_internal_args('--task-slice',
                                        '{}::{}'.format(i, batch))
                click.echo(repr(child))
                exit_code, _ = child(auto=True, auto_clean=[('--batch', 1)])
                if exit_code != 0:
                    return exit_code
            return 0

        elif qsub is not None:
            # TODO: verify config before calling qsub submit
            click.echo(repr(qsub))
            exit_code, _ = qsub(auto=True)
            return exit_code

        timer = MultiTimer().start('main')

        config = normalize_config(read_config(stats_config_file), tile_index,
                                  tile_index_file, year, output_location)

        app = StatsApp(config, index)
        app.log_config()

        if save_tasks is not None:
            app.save_tasks_to_file(save_tasks, index)
            failed = 0
        else:
            if load_tasks is not None:
                tasks = unpickle_stream(load_tasks)
            else:
                tasks = app.generate_tasks(index)

            successful, failed = app.run_tasks(tasks, runner, task_slice)

        timer.pause('main')
        _LOG.info('Stats processing completed in %s seconds.',
                  timer.run_times['main'])

        if failed > 0:
            raise click.ClickException(
                '%s of %s tasks not completed successfully.' %
                (failed, successful + failed))

    except Exception as e:
        _LOG.error(e)
        sys.exit(1)

    return 0
Esempio n. 2
0
def prune_tasks(quartile):
    # 'prunes' the output pickle file from save_tasks() with a list of dates to create a new pickle file
    
    # Import netcdf with the list of dates
    spei_q_dates = xr.open_dataset(f"spei_q{quartile}_dates.nc")

    pruned = (transform_task(task, spei_q_dates, quartile)
              for task in unpickle_stream('task.pickle'))
    pruned = (task for task in pruned if task is not None)

    pickle_stream(pruned, f'task_q{quartile}.pickle')
Esempio n. 3
0
def execute_tasks(quartile):
    # using the stats_config.yaml call a datacube statistics function on the pickle file containing the tasks / dates previously pruned
    with open('stats_config.yaml') as fl:
        config = yaml.load(fl)

    print(yaml.dump(config, indent=4))

    task_file = f'task_q{quartile}.pickle'

    print('executing tasks')
    app = StatsApp(config)

    p = multiprocessing.Pool()
    p.map(app.execute_task, list(unpickle_stream(task_file)))
Esempio n. 4
0
    def run(self, runner, task_file=None, task_slice=None):
        if task_file:
            tasks = unpickle_stream(task_file)
        else:
            tasks = self.generate_tasks(self.configure_outputs())

        if task_slice is not None:
            tasks = islice(tasks, task_slice.start, task_slice.stop,
                           task_slice.step)

        app_info = _get_app_metadata(self.config_file)

        output_driver = partial(self.output_driver,
                                output_path=self.location,
                                app_info=app_info,
                                storage=self.storage,
                                global_attributes=self.global_attributes,
                                var_attributes=self.var_attributes)
        task_runner = partial(execute_task,
                              output_driver=output_driver,
                              chunking=self.computation.get('chunking', {}))

        # does not need to be thorough for now
        task_desc = TaskDescription(
            type_='datacube_stats',
            task_dt=datetime.utcnow().replace(tzinfo=tz.tzutc()),
            events_path=Path(self.location),
            logs_path=Path(self.location),
            parameters=DefaultJobParameters(query={},
                                            source_products=[],
                                            output_products=[]))

        result = runner(task_desc, tasks, task_runner)

        _LOG.debug('Stopping runner.')
        runner.stop()
        _LOG.debug('Runner stopped.')

        return result