Beispiel #1
0
def load_structure(path: pathlib.Path, expected_type):
    """
    Load the expected NamedTuple (with type hints) from a yaml/json

    :param expected_type: the class/type you expect to get back.
    """
    return dict_to_type(paths.read_document(path), expected_type)
Beispiel #2
0
def ensure_products(index, app_config_files):
    for app_config_file in app_config_files:
        # TODO: Add more validation of config?

        click.secho(f"Loading {app_config_file}", bold=True)
        app_config = paths.read_document(app_config_file)
        in_product, out_product = _ensure_products(app_config, index)
        click.secho(f"Product {in_product.name} → {out_product.name}")
Beispiel #3
0
def ensure_products(index, app_config, dry_run):
    """
    Ensure the products exist for the given FC config, creating them if necessary.
    If dry run is disabled, the validated output product definition will be added to the database.
    """
    # TODO: Add more validation of config?
    click.secho(f"Loading {app_config}", bold=True)
    app_config_file = paths.read_document(app_config)
    _, out_product = _ensure_products(app_config_file, index, dry_run)
    click.secho(
        f"Output product definition for {out_product.name} product exits in the database for the given "
        f"FC input config file")
Beispiel #4
0
def _make_config_and_description(index: Index, task_desc_path: Path) -> Tuple[dict, TaskDescription]:
    task_desc = serialise.load_structure(task_desc_path, TaskDescription)

    task_time: datetime = task_desc.task_dt
    app_config = task_desc.runtime_state.config_path

    config = paths.read_document(app_config)

    # TODO: This carries over the old behaviour of each load. Should probably be replaced with *tag*
    config['task_timestamp'] = int(task_time.timestamp())
    config['app_config_file'] = Path(app_config)
    config = make_fc_config(index, config)

    return config, task_desc
Beispiel #5
0
def _make_config_and_description(
        index: Index, task_desc_path: Path) -> Tuple[dict, TaskDescription]:
    task_desc = serialise.load_structure(task_desc_path, TaskDescription)

    app_config = task_desc.runtime_state.config_path

    config = paths.read_document(app_config)

    config['output_type'] = config[
        'output_type']  # TODO: Temporary until ODC code is updated
    config['app_config_file'] = str(app_config)
    config = stacker.make_stacker_config(index, config)
    config['taskfile_version'] = make_tag(task_desc)
    config['version'] = digitalearthau.__version__ + ' ' + datacube.__version__

    return config, task_desc
Beispiel #6
0
def submit(index: Index,
           app_config: str,
           project: str,
           queue: str,
           no_qsub: bool,
           time_range: Tuple[datetime, datetime],
           tag: str):
    _LOG.info('Tag: %s', tag)

    app_config_path = Path(app_config).resolve()
    app_config = paths.read_document(app_config_path)

    task_desc, task_path = init_task_app(
        job_type="fc",
        source_products=[app_config['source_product']],
        output_products=[app_config['output_product']],
        # TODO: Use @datacube.ui.click.parsed_search_expressions to allow params other than time from the cli?
        datacube_query_args=Query(index=index, time=time_range).search_terms,
        app_config_path=app_config_path,
        pbs_project=project,
        pbs_queue=queue
    )
    _LOG.info("Created task description: %s", task_path)

    if no_qsub:
        _LOG.info('Skipping submission due to --no-qsub')
        return 0

    submit_subjob(
        name='generate',
        task_desc=task_desc,
        command=[
            'generate', '-v', '-v',
            '--task-desc', str(task_path),
            '--tag', tag
        ],
        qsub_params=dict(
            mem='20G',
            wd=True,
            ncpus=1,
            walltime='1h',
            name='fc-generate-{}'.format(tag)
        )
    )
Beispiel #7
0
def generate(index: Index, app_config: str, output_filename: str,
             dry_run: bool, time_range: Tuple[datetime, datetime]):
    """
    Generate Tasks into file and Queue PBS job to process them

    By default, also ensures the Output Product is present in the database.

    --dry-run will still generate a tasks file, but not add the output product to the database.
    """
    app_config_file = Path(app_config).resolve()
    app_config = paths.read_document(app_config_file)

    wofs_config = _make_wofs_config(index, app_config, dry_run)

    # Patch in config file location, for recording in dataset metadata
    wofs_config['app_config_file'] = app_config_file

    wofs_tasks = _make_wofs_tasks(index, wofs_config, time_range)
    num_tasks_saved = task_app.save_tasks(wofs_config, wofs_tasks,
                                          output_filename)
    _LOG.info('Found %d tasks', num_tasks_saved)
Beispiel #8
0
def generate(index: Index, app_config: str, output_filename: str,
             dry_run: bool):
    """
    Generate Tasks into a queue file.

    By default, also ensures the Output Product is present in the database.

    --dry-run will still generate a tasks file, but not add the output product to the database.
    """
    app_config_file = Path(app_config).resolve()
    app_config = paths.read_document(app_config_file)

    fc_config = _make_fc_config(index, app_config, dry_run)

    # Patch in config file location, for recording in dataset metadata
    fc_config['app_config_file'] = app_config_file

    fc_tasks = _make_fc_tasks(index, fc_config)

    num_tasks_saved = save_tasks(fc_config, fc_tasks, output_filename)
    _LOG.info('Found %d tasks', num_tasks_saved)
Beispiel #9
0
def submit(index: Index, app_config: str, project: str, queue: str,
           no_qsub: bool, time_range: Tuple[datetime, datetime], tag: str,
           email_options: str, email_id: str, dry_run: bool):
    """
    Kick off two stage PBS job

    Stage 1 (Generate task file):
        The task-app machinery loads a config file, from a path specified on the
        command line, into a dict.

        If dry is enabled, a dummy DatasetType is created for tasks generation without indexing
        the product in the database.
        If dry run is disabled, generate tasks into file and queue PBS job to process them.

    Stage 2 (Run):
        During normal run, following are performed:
           1) Tasks shall be yielded for dispatch to workers.
           2) Load data
           3) Run FC algorithm
           4) Attach metadata
           5) Write output files and
           6) Finally index the newly created FC output netCDF files

        If dry run is enabled, application only prepares a list of output files to be created and does not
        record anything in the database.
    """
    _LOG.info('Tag: %s', tag)

    app_config_path = Path(app_config).resolve()
    app_config = paths.read_document(app_config_path)

    if not time_range or not all(time_range):
        query_args = Query(index=index).search_terms
    else:
        query_args = Query(index=index, time=time_range).search_terms

    task_desc, task_path = init_task_app(
        job_type="fc",
        source_products=[app_config['source_product']],
        output_products=[app_config['output_product']],
        # TODO: Use @datacube.ui.click.parsed_search_expressions to allow params other than time from the cli?
        datacube_query_args=query_args,
        app_config_path=app_config_path,
        pbs_project=project,
        pbs_queue=queue)
    _LOG.info("Created task description: %s", task_path)

    if no_qsub:
        _LOG.info('Skipping submission due to --no-qsub')
        return 0

    # If dry run is not enabled just pass verbose option
    dry_run_option = '--dry-run' if dry_run else '-v'
    extra_qsub_args = '-M {0} -m {1}'.format(email_id, email_options)

    # Append email options and email id to the PbsParameters dict key, extra_qsub_args
    task_desc.runtime_state.pbs_parameters.extra_qsub_args.extend(
        extra_qsub_args.split(' '))

    submit_subjob(name='generate',
                  task_desc=task_desc,
                  command=[
                      'generate',
                      '-vv',
                      '--task-desc',
                      str(task_path),
                      '--tag',
                      tag,
                      '--log-queries',
                      '--email-id',
                      email_id,
                      '--email-options',
                      email_options,
                      dry_run_option,
                  ],
                  qsub_params=dict(name='fc-generate-{}'.format(tag),
                                   mem='medium',
                                   wd=True,
                                   nodes=1,
                                   walltime='1h'))