Beispiel #1
0
def diff(ctx):
    """Compare the config lockfile against the current config."""
    init_config()
    config = get_config()

    with open(CONFIG_DIR / 'cfg-lock.json', 'r') as lockfile:
        # Remove trailing newlines to match `json.dumps` behavior
        lockfile_config = lockfile.read().rstrip('\n')
    current_config = export_config_json(config)

    diff = list(
        difflib.unified_diff(
            lockfile_config.splitlines(keepends=True),
            current_config.splitlines(keepends=True),
            fromfile='lockfile',
            tofile='current_config',
        ))

    if len(diff) == 0:
        print('🎉🦆 Configuration comparison passed.')

    else:
        diff_str = ''.join(diff)
        raise RuntimeError(
            f'Configuration differs from lockfile:\n{diff_str}\n\n'
            'Please re-export the config (`inv config.export`).', )
        ctx.exit(1)
Beispiel #2
0
def fetch(pattern, dry_run, workers) -> None:
    """Fetch assets for datasets matching PATTERN."""
    # Hack to work around issue with sphinx-click:
    #     https://github.com/click-contrib/sphinx-click/issues/86#issuecomment-991196764
    from qgreenland.util.config.config import (
        get_config,
        init_config,
    )
    from qgreenland.util.luigi import fetch_tasks_from_dataset

    init_config()
    config = get_config()

    dataset_matches = select(
        lambda i: fnmatch(i[1].id, pattern),
        config.datasets,
    ).values()

    print('Fetching all assets for the following datasets:')
    print(textwrap.indent(
        '\n'.join([d.id for d in dataset_matches]),
        '  - ',
    ))
    if dry_run:
        print('DRY RUN enabled. Aborting fetch.')
        return

    fetch_tasks = lmapcat(
        lambda i: fetch_tasks_from_dataset(i),
        dataset_matches,
    )

    result = luigi.build(
        fetch_tasks,
        workers=workers,
        # Unlike CLI, running tasks from Python does not feature an "identical
        # process lock" by default.
        no_lock=False,
        detailed_summary=True,
    )

    if not result.scheduling_succeeded:
        raise click.UsageError('Scheduling failed. See error log above.')
Beispiel #3
0
def provenance(layer_id):
    """List steps that are taken to process layer LAYER_ID."""
    # Hack to work around issue with sphinx-click:
    #     https://github.com/click-contrib/sphinx-click/issues/86#issuecomment-991196764
    from qgreenland.util.config.config import (
        get_config,
        init_config,
    )

    init_config()
    config = get_config()

    try:
        layer_cfg = config.layers[layer_id]
    except KeyError:
        print(f'Could not find layer {layer_id}.')
        sys.exit(1)

    print(layer_provenance_text(layer_cfg))
Beispiel #4
0
def validate(ctx, verbose=False):
    """Validate the configuration files.

    The validation is built-in to the code that loads the config files, and this
    happens when initializing the configuration. Any validation errors will be
    raised from the import statement.
    """
    init_config()
    config = get_config()

    if verbose:
        print('Layers:')
        pprint(config.layers)
        print()
        print('Datasets:')
        pprint(config.datasets)
        print()
        print('Layer Tree:')
        print(config.layer_tree.render())
        print()

    print('🎉🦆 Configuration validation passed.')
Beispiel #5
0
def layers(
    pattern: str,
    format: str,
) -> None:
    """List available layers matching PATTERN."""
    # Hack to work around issue with sphinx-click:
    #     https://github.com/click-contrib/sphinx-click/issues/86#issuecomment-991196764
    from qgreenland.util.config.config import (
        get_config,
        init_config,
    )
    init_config()
    config = get_config()

    layers = select(
        lambda i: fnmatch(i[1].id, pattern),
        config.layers,
    ).values()

    if format == 'ids':
        for layer in layers:
            print(layer.id)
    elif format == 'titles':
        for layer in layers:
            print(f'{layer.id}: {layer.title}')
    elif format == 'json':
        # TODO: This doesn't use the __json__ helper and dumps the full layer
        # config, as opposed to the filtered version that goes in the lockfile.
        # This is more useful for debugging.
        print(
            json.dumps(
                # Full dump:
                [layer.dict() for layer in layers],
                # Filtered dump:
                # list(layers),
                cls=MagicJSONEncoder,
                indent=2,
                sort_keys=True,
            ))
Beispiel #6
0
"""Exports layer configuration as a CSV file."""

# Hack to import from qgreenland
import os, sys  # noqa: E401
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
PARENT_DIR = os.path.dirname(THIS_DIR)
sys.path.insert(0, PARENT_DIR)

if __name__ == '__main__':
    from qgreenland.util.config.config import (
        init_config,
        get_config,
    )
    from qgreenland.util.config.export import export_config_csv

    init_config()
    export_config_csv(get_config())
Beispiel #7
0
def export(ctx):
    """Export the config as a JSON string."""
    init_config()
    config = get_config()

    print(export_config_json(config))
Beispiel #8
0
def run(
    include: tuple[str, ...],
    exclude: tuple[str, ...],
    exclude_manual_assets: bool,
    force_package_zip,
    force_no_package_zip,
    dry_run: bool,
    fetch_only: bool,
    workers: int,
) -> None:
    """Run pipelines for layers matching filters."""
    # Hack to work around issue with sphinx-click:
    #     https://github.com/click-contrib/sphinx-click/issues/86#issuecomment-991196764
    from qgreenland.util.luigi.tasks.pipeline import (
        LayerPipelines,
        QGreenlandAll,
        QGreenlandNoZip,
    )
    from qgreenland.util.config.config import (
        get_config,
        init_config,
    )

    if force_package_zip and force_no_package_zip:
        raise RuntimeError('Can not force zip AND no zip.')

    init_config(
        include_patterns=include,
        exclude_patterns=exclude,
        exclude_manual_assets=exclude_manual_assets,
    )
    config = get_config()
    filtered = include or exclude
    skip_zip = force_no_package_zip or (filtered and not force_package_zip)

    if fetch_only:
        # Don't do anything except fetch the input asset for each layer.
        tasks = [LayerPipelines(fetch_only=fetch_only)]
    elif skip_zip:
        tasks = [QGreenlandNoZip()]
    else:
        tasks = [QGreenlandAll()]

    print(f'Running tasks: {str(tasks)}')
    print()

    if include or exclude or exclude_manual_assets or dry_run:
        action = 'Fetching data' if fetch_only else 'Running pipelines'
        print(f'{action} for the following layers:')
        for layer in config.layers.keys():
            print(f'  - {layer}')
        print()

    if dry_run:
        print('DRY RUN enabled. Aborting run.')
        return

    result = luigi.build(
        tasks,
        workers=workers,
        # Unlike CLI, running tasks from Python does not feature an "identical
        # process lock" by default.
        no_lock=False,
        detailed_summary=True,
    )

    if not result.scheduling_succeeded:
        raise SystemExit('Scheduling failed. See log above for details.')