def test_dataset_and_asset_indexes(): config = get_config() for dataset_key, dataset in config.datasets.items(): assert dataset_key == dataset.id for asset_key, asset in dataset.assets.items(): assert asset_key == asset.id
def diff(ctx): """Compare the config lockfile against the current config.""" init_config() config = get_config() with open(CONFIG_DIR / 'cfg-lock.json', 'r') as lockfile: # Remove trailing newlines to match `json.dumps` behavior lockfile_config = lockfile.read().rstrip('\n') current_config = export_config_json(config) diff = list( difflib.unified_diff( lockfile_config.splitlines(keepends=True), current_config.splitlines(keepends=True), fromfile='lockfile', tofile='current_config', )) if len(diff) == 0: print('🎉🦆 Configuration comparison passed.') else: diff_str = ''.join(diff) raise RuntimeError( f'Configuration differs from lockfile:\n{diff_str}\n\n' 'Please re-export the config (`inv config.export`).', ) ctx.exit(1)
def test_immutable_model(): config = get_config() # Immutable models raise a TypeError on item assignment with a message like: # `TypeError: "Layer" is immutable and does not support item # assignment` with pytest.raises(TypeError): # TODO: Remove type-ignore below. # https://github.com/pytest-dev/pytest/issues/8984 config.layers['background'].description = 'override' # type:ignore
def make_qgis_project_file(path: Path) -> None: """Create a QGIS project file with the correct stuff in it. path: the desired path to .qgs/.qgz project file, e.g.: /luigi/data/qgreenland/qgreenland.qgs Developed from examples: https://docs.qgis.org/testing/en/docs/pyqgis_developer_cookbook/intro.html#using-pyqgis-in-standalone-scripts """ config = get_config() # Create a new project; initializes basic structure project = qgc.QgsProject.instance() project.write(str(path)) project_crs = qgc.QgsCoordinateReferenceSystem(config.project.crs) project.setCrs(project_crs) # Set the map background color to be gray (same color as Quantarctica) project.setBackgroundColor(QColor(200, 200, 200)) # Set the default extent. Eventually we may want to pull the extent directly # from the configured 'map frame' layer. view = project.viewSettings() project_rectangle = qgc.QgsReferencedRectangle( qgc.QgsRectangle( config.project.boundaries['data'].bbox.min_x, config.project.boundaries['data'].bbox.min_y, config.project.boundaries['data'].bbox.max_x, config.project.boundaries['data'].bbox.max_y, ), project_crs, ) view.setDefaultViewExtent(project_rectangle) _add_decorations(project) package_layer_tree = prune_layers_not_in_package(config.layer_tree) _add_layers_and_groups(project, package_layer_tree) # TODO: is it normal to write multiple times? project.write() # Release all file locks! If we don't do this, we won't be able to clean up # layer source files after zipping the project. project.clear()
def generate_layer_pipelines( *, fetch_only: bool = False, ) -> list[luigi.Task]: """Generate a list of pre-configured tasks based on layer configuration. Instead of calling tasks now, we return a list of callables with the arguments already populated. """ config = get_config() tasks: list[luigi.Task] = [] layers = config.layers.values() for layer_cfg in layers: # Check if it's an online layer; those have no fetching or processing # pipeline. if isinstance(layer_cfg.input.asset, OnlineAsset): continue # Create tasks, making each task dependent on the previous task. task = fetch_task_from_layer(layer_cfg) if fetch_only: tasks.append(task) continue # If the layer has no steps, it's just fetched and finalized. if layer_cfg.steps: for step_number, _ in enumerate(layer_cfg.steps): task = ChainableTask( requires_task=task, layer_id=layer_cfg.id, step_number=step_number, ) # We only need the last task in the layer pipeline to run all # "required" tasks in a layer pipeline. task = FinalizeTask( requires_task=task, layer_id=layer_cfg.id, ) tasks.append(task) return tasks
def provenance(layer_id): """List steps that are taken to process layer LAYER_ID.""" # Hack to work around issue with sphinx-click: # https://github.com/click-contrib/sphinx-click/issues/86#issuecomment-991196764 from qgreenland.util.config.config import ( get_config, init_config, ) init_config() config = get_config() try: layer_cfg = config.layers[layer_id] except KeyError: print(f'Could not find layer {layer_id}.') sys.exit(1) print(layer_provenance_text(layer_cfg))
def fetch(pattern, dry_run, workers) -> None: """Fetch assets for datasets matching PATTERN.""" # Hack to work around issue with sphinx-click: # https://github.com/click-contrib/sphinx-click/issues/86#issuecomment-991196764 from qgreenland.util.config.config import ( get_config, init_config, ) from qgreenland.util.luigi import fetch_tasks_from_dataset init_config() config = get_config() dataset_matches = select( lambda i: fnmatch(i[1].id, pattern), config.datasets, ).values() print('Fetching all assets for the following datasets:') print(textwrap.indent( '\n'.join([d.id for d in dataset_matches]), ' - ', )) if dry_run: print('DRY RUN enabled. Aborting fetch.') return fetch_tasks = lmapcat( lambda i: fetch_tasks_from_dataset(i), dataset_matches, ) result = luigi.build( fetch_tasks, workers=workers, # Unlike CLI, running tasks from Python does not feature an "identical # process lock" by default. no_lock=False, detailed_summary=True, ) if not result.scheduling_succeeded: raise click.UsageError('Scheduling failed. See error log above.')
def validate(ctx, verbose=False): """Validate the configuration files. The validation is built-in to the code that loads the config files, and this happens when initializing the configuration. Any validation errors will be raised from the import statement. """ init_config() config = get_config() if verbose: print('Layers:') pprint(config.layers) print() print('Datasets:') pprint(config.datasets) print() print('Layer Tree:') print(config.layer_tree.render()) print() print('🎉🦆 Configuration validation passed.')
def layers( pattern: str, format: str, ) -> None: """List available layers matching PATTERN.""" # Hack to work around issue with sphinx-click: # https://github.com/click-contrib/sphinx-click/issues/86#issuecomment-991196764 from qgreenland.util.config.config import ( get_config, init_config, ) init_config() config = get_config() layers = select( lambda i: fnmatch(i[1].id, pattern), config.layers, ).values() if format == 'ids': for layer in layers: print(layer.id) elif format == 'titles': for layer in layers: print(f'{layer.id}: {layer.title}') elif format == 'json': # TODO: This doesn't use the __json__ helper and dumps the full layer # config, as opposed to the filtered version that goes in the lockfile. # This is more useful for debugging. print( json.dumps( # Full dump: [layer.dict() for layer in layers], # Filtered dump: # list(layers), cls=MagicJSONEncoder, indent=2, sort_keys=True, ))
def run(self): config = get_config() with self.output().temporary_path() as temp_path: export_config_manifest(config, output_path=temp_path)
"""Exports layer configuration as a CSV file.""" # Hack to import from qgreenland import os, sys # noqa: E401 THIS_DIR = os.path.dirname(os.path.abspath(__file__)) PARENT_DIR = os.path.dirname(THIS_DIR) sys.path.insert(0, PARENT_DIR) if __name__ == '__main__': from qgreenland.util.config.config import ( init_config, get_config, ) from qgreenland.util.config.export import export_config_csv init_config() export_config_csv(get_config())
def export(ctx): """Export the config as a JSON string.""" init_config() config = get_config() print(export_config_json(config))
def node(self): """Find the corresponding LayerNode in the config tree.""" config = get_config() return leaf_lookup(config.layer_tree, target_node_name=self.layer_id)
def test_get_layer_config_one(): config = get_config() # If the layer does not exist, an exception will be raised and pytest will # appropriately fail. assert config.layers['background']
def test_get_layer_config_all(): config = get_config() layer_config = config.layers # There are at least 2 layers. assert len(layer_config.keys()) >= 2
def layer_cfg(self): """Find the config associated with this layer.""" config = get_config() return config.layers[self.layer_id]
def test_layer_indexes(): config = get_config() for key, layer in config.layers.items(): assert key == layer.id
def dataset_cfg(self): config = get_config() return config.datasets[self.dataset_id]
def run( include: tuple[str, ...], exclude: tuple[str, ...], exclude_manual_assets: bool, force_package_zip, force_no_package_zip, dry_run: bool, fetch_only: bool, workers: int, ) -> None: """Run pipelines for layers matching filters.""" # Hack to work around issue with sphinx-click: # https://github.com/click-contrib/sphinx-click/issues/86#issuecomment-991196764 from qgreenland.util.luigi.tasks.pipeline import ( LayerPipelines, QGreenlandAll, QGreenlandNoZip, ) from qgreenland.util.config.config import ( get_config, init_config, ) if force_package_zip and force_no_package_zip: raise RuntimeError('Can not force zip AND no zip.') init_config( include_patterns=include, exclude_patterns=exclude, exclude_manual_assets=exclude_manual_assets, ) config = get_config() filtered = include or exclude skip_zip = force_no_package_zip or (filtered and not force_package_zip) if fetch_only: # Don't do anything except fetch the input asset for each layer. tasks = [LayerPipelines(fetch_only=fetch_only)] elif skip_zip: tasks = [QGreenlandNoZip()] else: tasks = [QGreenlandAll()] print(f'Running tasks: {str(tasks)}') print() if include or exclude or exclude_manual_assets or dry_run: action = 'Fetching data' if fetch_only else 'Running pipelines' print(f'{action} for the following layers:') for layer in config.layers.keys(): print(f' - {layer}') print() if dry_run: print('DRY RUN enabled. Aborting run.') return result = luigi.build( tasks, workers=workers, # Unlike CLI, running tasks from Python does not feature an "identical # process lock" by default. no_lock=False, detailed_summary=True, ) if not result.scheduling_succeeded: raise SystemExit('Scheduling failed. See log above for details.')