def validate_designs( relations: List[RelationDescription], keep_going=False, skip_sources=False, skip_dependencies=False, ) -> None: """ Make sure that all table design files pass the validation checks. See module documentation for list of checks. """ config = etl.config.get_dw_config() _error_occurred.clear() valid_descriptions = validate_semantics(relations, keep_going=keep_going) ordered_descriptions = validate_execution_order(valid_descriptions, keep_going=keep_going) validate_reload(config.schemas, valid_descriptions, keep_going=keep_going) if skip_sources: logger.info("Skipping validation of designs against upstream sources") else: with Timer() as timer: validate_upstream_sources(config.schemas, ordered_descriptions, keep_going=keep_going) logger.info("Validated designs against upstream sources (%s)", timer) if skip_dependencies: logger.info("Skipping validation of transforms against data warehouse") else: with Timer() as timer: validate_transforms(config.dsn_etl, ordered_descriptions, keep_going=keep_going) logger.info("Validated transforms against data warehouse (%s)", timer) if _error_occurred.is_set(): raise ETLDelayedExit( "At least one error occurred while validating with 'keep going' option" )
def unload_to_s3(config: DataWarehouseConfig, relations: List[RelationDescription], allow_overwrite: bool, keep_going: bool, dry_run: bool) -> None: """ Create CSV files for selected tables based on the S3 path in an "unload" source. """ logger.info("Loading table design for %d relation(s) to look for unloadable relations", len(relations)) etl.relation.RelationDescription.load_in_parallel(relations) unloadable_relations = [d for d in relations if d.is_unloadable] if not unloadable_relations: logger.warning("Found no relations that are unloadable.") return logger.info("Starting to unload %s relation(s)", len(unloadable_relations)) target_lookup = {schema.name: schema for schema in config.schemas if schema.is_an_unload_target} relation_target_tuples = [] for relation in unloadable_relations: if relation.unload_target not in target_lookup: raise TableDesignSemanticError("Unload target specified, but not defined: '%s'" % relation.unload_target) relation_target_tuples.append((relation, target_lookup[relation.unload_target])) error_occurred = False conn = etl.db.connection(config.dsn_etl, autocommit=True, readonly=True) with closing(conn) as conn: for i, (relation, unload_schema) in enumerate(relation_target_tuples): try: index = {"current": i + 1, "final": len(relation_target_tuples)} unload_relation(conn, relation, unload_schema, index, allow_overwrite=allow_overwrite, dry_run=dry_run) except Exception as exc: if keep_going: error_occurred = True logger.warning("Unload failed for '%s'", relation.identifier) logger.exception("Ignoring this exception and proceeding as requested:") else: raise DataUnloadError(exc) from exc if error_occurred: raise ETLDelayedExit("At least one error occurred while unloading with 'keep going' option")