Python check_existing_files Examples, datacube.ui.task_app.check_existing_files Python Examples

Example #1

0

Show file

def check_existing(input_filename: str):
    config, tasks = task_app.load_tasks(input_filename)

    _LOG.info('Checking for existing output files.')
    # tile_index is X, Y, T
    task_app.check_existing_files(
        _get_filename(config, *task['tile_index']) for task in tasks)

Example #2

0

Show file

def ingest_cmd(driver_manager, config_file, year, queue_size, save_tasks,
               load_tasks, dry_run, executor):
    index = driver_manager.index
    if config_file:
        config = load_config_from_file(index, config_file)
        variable_params = get_variable_params(config)
        source_type, output_type = make_output_type(driver_manager, config)

        tasks = create_task_list(driver_manager, output_type, year,
                                 source_type, config)
    elif load_tasks:
        config, tasks = load_tasks_(load_tasks)
        source_type, output_type = make_output_type(driver_manager, config)
    else:
        click.echo('Must specify exactly one of --config-file, --load-tasks')
        return 1

    if dry_run:
        check_existing_files(
            get_filename(config, task['tile_index'], task['tile'].sources)
            for task in tasks)
        return 0

    if save_tasks:
        save_tasks_(config, tasks, save_tasks)
        return 0

    successful, failed = process_tasks(driver_manager, config, source_type,
                                       output_type, tasks, queue_size,
                                       executor)
    click.echo('%d successful, %d failed' % (successful, failed))

    return 0

Example #3

0

Show file

def run(index,
        dry_run: bool,
        tag: str,
        task_desc_file: str,
        qsub: QSubLauncher,
        runner: TaskRunner,
        *args, **kwargs):
    _LOG.info('Starting Fractional Cover processing...')
    _LOG.info('Tag: %r', tag)

    task_desc = serialise.load_structure(Path(task_desc_file), TaskDescription)
    config, tasks = task_app.load_tasks(task_desc.runtime_state.task_serialisation_path)

    if dry_run:
        task_app.check_existing_files((task['filename'] for task in tasks))
        return 0

    task_func = partial(do_fc_task, config)
    process_func = partial(process_result, index)

    try:
        runner(task_desc, tasks, task_func, process_func)
        _LOG.info("Runner finished normally, triggering shutdown.")
    finally:
        runner.stop()

Example #4

0

Show file

def ingest_cmd(index, config_file, year, queue_size, save_tasks, load_tasks,
               dry_run, allow_product_changes, executor):
    # pylint: disable=too-many-locals

    if config_file:
        config = load_config_from_file(config_file)
    elif load_tasks:
        config, tasks = load_tasks_(load_tasks)
    else:
        click.echo('Must specify exactly one of --config-file, --load-tasks')
        sys.exit(-1)

    try:
        # ignore the added filename key which is not part of the schema
        filename = config['filename']
        del config['filename']

        IngestorConfig.validate(config)

        config['filename'] = filename
    except InvalidDocException as e:
        exception, = e.args
        _LOG.error(exception.message)
        sys.exit(-1)

    if config_file:
        driver = get_driver_from_config(config)
        source_type, output_type = ensure_output_type(
            index,
            config,
            driver.format,
            allow_product_changes=allow_product_changes)
        tasks = create_task_list(index, output_type, year, source_type, config)
    elif load_tasks:
        driver = get_driver_from_config(config)
        source_type, output_type = ensure_output_type(
            index,
            config,
            driver.format,
            allow_product_changes=allow_product_changes)

    if dry_run:
        check_existing_files(
            get_filename(config, task['tile_index'], task['tile'].sources)
            for task in tasks)
    elif save_tasks:
        save_tasks_(config, tasks, save_tasks)
    else:
        successful, failed = process_tasks(index, config, source_type,
                                           output_type, tasks, queue_size,
                                           executor)
        click.echo('%d successful, %d failed' % (successful, failed))

        sys.exit(failed)

Example #5

0

Show file

File: ingest.py Project: Max-AR/datacube-core

def ingest_cmd(index, config_file, year, queue_size, save_tasks, load_tasks,
               dry_run, allow_product_changes, executor):
    # pylint: disable=too-many-locals

    try:
        if config_file:
            config, tasks = load_config_from_file(config_file), None
        elif load_tasks:
            config, tasks = load_tasks_(load_tasks)
        else:
            click.echo(
                'Must specify exactly one of --config-file, --load-tasks')
            sys.exit(-1)

    except InvalidDocException as e:
        exception, = e.args
        _LOG.error(exception.message)
        sys.exit(-1)

    driver = get_driver_from_config(config)

    try:
        source_type, output_type = ensure_output_type(
            index,
            config,
            driver.format,
            allow_product_changes=allow_product_changes)
    except ValueError as e:
        _LOG.error(str(e))
        sys.exit(-1)

    if tasks is None:
        tasks = create_task_list(index, output_type, year, source_type, config)

    if dry_run:
        check_existing_files(
            get_filename(config, task['tile_index'], task['tile'].sources)
            for task in tasks)
    elif save_tasks:
        save_tasks_(config, tasks, save_tasks)
    else:
        successful, failed = process_tasks(index, config, source_type,
                                           output_type, tasks, queue_size,
                                           executor)
        click.echo('%d successful, %d failed' % (successful, failed))

        sys.exit(failed)

Example #6

0

Show file

def ndvi_app(index, config, tasks, executor, dry_run, queue_size, *args,
             **kwargs):
    click.echo('Starting NDVI processing...')

    if dry_run:
        check_existing_files((task['filename'] for task in tasks))
        return 0

    results = []
    task_queue = itertools.islice(tasks, queue_size)
    for task in task_queue:
        _LOG.info('Running task: %s', task['tile_index'])
        results.append(executor.submit(do_ndvi_task, config=config, task=task))

    click.echo('Task queue filled, waiting for first result...')

    successful = failed = 0
    while results:
        result, results = executor.next_completed(results, None)

        # submit a new task to replace the one we just finished
        task = next(tasks, None)
        if task:
            _LOG.info('Running task: %s', task['tile_index'])
            results.append(
                executor.submit(do_ndvi_task, config=config, task=task))

        # Process the result
        try:
            datasets = executor.result(result)
            for dataset in datasets.values:
                index.datasets.add(dataset, skip_sources=True)
                _LOG.info('Dataset added')
            successful += 1
        except Exception as err:  # pylint: disable=broad-except
            _LOG.exception('Task failed: %s', err)
            failed += 1
            continue
        finally:
            # Release the task to free memory so there is no leak in executor/scheduler/worker process
            executor.release(result)

    click.echo('%d successful, %d failed' % (successful, failed))
    _LOG.info('Completed: %d successful, %d failed', successful, failed)

Example #7

0

Show file

File: stacker.py Project: senani/digitalearthau

def run(index, dry_run: bool, task_desc_file: str, runner: TaskRunner, qsub):
    _LOG.info('Starting DEA Stacker processing...')

    task_desc = serialise.load_structure(Path(task_desc_file), TaskDescription)
    config, tasks = task_app.load_tasks(
        task_desc.runtime_state.task_serialisation_path)

    if dry_run:
        task_app.check_existing_files((task['filename'] for task in tasks))
        return

    task_func = partial(stacker.do_stack_task, config)
    process_func = partial(stacker.process_result, index)

    try:
        runner(task_desc, tasks, task_func, process_func)
        _LOG.info("Runner finished normally, triggering shutdown.")
    finally:
        runner.stop()

Example #8

0

Show file

File: fc_app.py Project: GeoscienceAustralia/fc

def run(index, dry_run: bool, input_filename: str, runner: TaskRunner,
        skip_indexing: bool, **kwargs):
    config, tasks = task_app.load_tasks(input_filename)
    work_dir = Path(input_filename).parent

    # TODO: Get rid of this completely
    task_desc = TaskDescription(
        type_='fc',
        task_dt=datetime.utcnow().astimezone(timezone.utc),
        events_path=work_dir,
        logs_path=work_dir,
        jobs_path=work_dir,
        parameters=None,
        runtime_state=None,
    )

    if dry_run:
        _LOG.info('Starting Fractional Cover Dry Run...')
        task_app.check_existing_files(
            (task['filename_dataset'] for task in tasks))
        return 0

    _LOG.info('Starting Fractional Cover processing...')
    task_func = partial(_do_fc_task, config)

    if skip_indexing:
        process_func = _skip_indexing_and_only_log
    else:
        process_func = partial(_index_datasets, index)

    try:
        runner(task_desc, tasks, task_func, process_func)
        _LOG.info("Runner finished normally, triggering shutdown.")
    except Exception as err:
        if "Error 104" in err:
            _LOG.info(
                "Processing completed and shutdown was initiated. Exception: %s",
                str(err))
        else:
            _LOG.info("Exception during processing: %s", err)
    finally:
        runner.stop()
    return 0

Example #9

0

Show file

File: ndvi_app.py Project: GeoscienceAustralia/ndvi

def ndvi_app(index, config, tasks, executor, dry_run, queue_size, *args, **kwargs):
    click.echo('Starting NDVI processing...')

    if dry_run:
        check_existing_files((task['filename'] for task in tasks))
        return 0

    results = []
    task_queue = itertools.islice(tasks, queue_size)
    for task in task_queue:
        _LOG.info('Running task: %s', task['tile_index'])
        results.append(executor.submit(do_ndvi_task, config=config, task=task))

    click.echo('Task queue filled, waiting for first result...')

    successful = failed = 0
    while results:
        result, results = executor.next_completed(results, None)

        # submit a new task to replace the one we just finished
        task = next(tasks, None)
        if task:
            _LOG.info('Running task: %s', task['tile_index'])
            results.append(executor.submit(do_ndvi_task, config=config, task=task))

        # Process the result
        try:
            datasets = executor.result(result)
            for dataset in datasets.values:
                index.datasets.add(dataset, skip_sources=True)
                _LOG.info('Dataset added')
            successful += 1
        except Exception as err:  # pylint: disable=broad-except
            _LOG.exception('Task failed: %s', err)
            failed += 1
            continue
        finally:
            # Release the task to free memory so there is no leak in executor/scheduler/worker process
            executor.release(result)

    click.echo('%d successful, %d failed' % (successful, failed))
    _LOG.info('Completed: %d successful, %d failed', successful, failed)

Example #10

0

Show file

def ingest_cmd(index, config_file, year, queue_size, save_tasks, load_tasks,
               dry_run, executor, allow_product_changes):
    # pylint: disable=too-many-locals

    if config_file:
        config = load_config_from_file(config_file)
        driver = get_driver_from_config(config)
        source_type, output_type = ensure_output_type(
            index,
            config,
            driver.format,
            allow_product_changes=allow_product_changes)

        tasks = create_task_list(index, output_type, year, source_type, config)
    elif load_tasks:
        config, tasks = load_tasks_(load_tasks)
        driver = get_driver_from_config(config)
        source_type, output_type = ensure_output_type(
            index,
            config,
            driver.format,
            allow_product_changes=allow_product_changes)
    else:
        click.echo('Must specify exactly one of --config-file, --load-tasks')
        sys.exit(-1)

    if dry_run:
        check_existing_files(
            get_filename(config, task['tile_index'], task['tile'].sources)
            for task in tasks)
    elif save_tasks:
        save_tasks_(config, tasks, save_tasks)
    else:
        successful, failed = process_tasks(index, config, source_type,
                                           output_type, tasks, queue_size,
                                           executor)
        click.echo('%d successful, %d failed' % (successful, failed))

        sys.exit(failed)

Example #11

0

Show file

def wofs_app(index, config, tasks, executor, dry_run, queue_size,
             print_output_product, skip_indexing, *args, **kwargs):
    if dry_run:
        check_existing_files((task['file_path'] for task in tasks))
        return 0
    else:
        if not skip_indexing:
            # Ensure output product is in index
            config['wofs_dataset_type'] = index.products.add(
                config['wofs_dataset_type'])  # add is idempotent

    if print_output_product:
        click.echo(json.dumps(config['wofs_dataset_type'].definition,
                              indent=4))
        return 0

    click.echo('Starting processing...')
    results = []

    def submit_task(task):
        _LOG.info('Queuing task: %s', task['tile_index'])
        results.append(executor.submit(do_wofs_task, config=config, **task))

    task_queue = itertools.islice(tasks, queue_size)
    for task in task_queue:
        submit_task(task)
    click.echo('Queue filled, waiting for first result...')

    successful = failed = 0
    while results:
        result, results = executor.next_completed(results, None)

        # submit a new task to replace the one we just finished
        task = next(tasks, None)
        if task:
            submit_task(task)

        # Process the result
        try:
            datasets = executor.result(result)
            for dataset in datasets:
                if not skip_indexing:
                    start = time.clock()
                    index.datasets.add(dataset, sources_policy='skip')
                    indexing_time = time.clock() - start
                    _LOG.info('Dataset added to index in %fs: id=%s path=%s',
                              indexing_time, dataset.id, dataset.local_path)
                else:
                    _LOG.info('Dataset completed: id=%s path=%s', dataset.id,
                              dataset.local_path)
            successful += 1
        except Exception as err:  # pylint: disable=broad-except
            _LOG.exception('Task failed: %s', err)
            failed += 1
            continue
        finally:
            # Release the task to free memory so there is no leak in executor/scheduler/worker process
            executor.release(result)

    click.echo('%d successful, %d failed' % (successful, failed))
    _LOG.info('Completed: %d successful, %d failed', successful, failed)