Esempio n. 1
0
def validate_designs(
    relations: List[RelationDescription],
    keep_going=False,
    skip_sources=False,
    skip_dependencies=False,
) -> None:
    """
    Make sure that all table design files pass the validation checks.

    See module documentation for list of checks.
    """
    config = etl.config.get_dw_config()
    _error_occurred.clear()

    valid_descriptions = validate_semantics(relations, keep_going=keep_going)
    ordered_descriptions = validate_execution_order(valid_descriptions,
                                                    keep_going=keep_going)

    validate_reload(config.schemas, valid_descriptions, keep_going=keep_going)

    if skip_sources:
        logger.info("Skipping validation of designs against upstream sources")
    else:
        with Timer() as timer:
            validate_upstream_sources(config.schemas,
                                      ordered_descriptions,
                                      keep_going=keep_going)
            logger.info("Validated designs against upstream sources (%s)",
                        timer)

    if skip_dependencies:
        logger.info("Skipping validation of transforms against data warehouse")
    else:
        with Timer() as timer:
            validate_transforms(config.dsn_etl,
                                ordered_descriptions,
                                keep_going=keep_going)
            logger.info("Validated transforms against data warehouse (%s)",
                        timer)

    if _error_occurred.is_set():
        raise ETLDelayedExit(
            "At least one error occurred while validating with 'keep going' option"
        )
Esempio n. 2
0
def unload_to_s3(config: DataWarehouseConfig, relations: List[RelationDescription],
                 allow_overwrite: bool, keep_going: bool, dry_run: bool) -> None:
    """
    Create CSV files for selected tables based on the S3 path in an "unload" source.
    """
    logger.info("Loading table design for %d relation(s) to look for unloadable relations", len(relations))
    etl.relation.RelationDescription.load_in_parallel(relations)

    unloadable_relations = [d for d in relations if d.is_unloadable]
    if not unloadable_relations:
        logger.warning("Found no relations that are unloadable.")
        return
    logger.info("Starting to unload %s relation(s)", len(unloadable_relations))

    target_lookup = {schema.name: schema for schema in config.schemas if schema.is_an_unload_target}
    relation_target_tuples = []
    for relation in unloadable_relations:
        if relation.unload_target not in target_lookup:
            raise TableDesignSemanticError("Unload target specified, but not defined: '%s'" % relation.unload_target)
        relation_target_tuples.append((relation, target_lookup[relation.unload_target]))

    error_occurred = False
    conn = etl.db.connection(config.dsn_etl, autocommit=True, readonly=True)
    with closing(conn) as conn:
        for i, (relation, unload_schema) in enumerate(relation_target_tuples):
            try:
                index = {"current": i + 1, "final": len(relation_target_tuples)}
                unload_relation(conn, relation, unload_schema, index,
                                allow_overwrite=allow_overwrite, dry_run=dry_run)
            except Exception as exc:
                if keep_going:
                    error_occurred = True
                    logger.warning("Unload failed for '%s'", relation.identifier)
                    logger.exception("Ignoring this exception and proceeding as requested:")
                else:
                    raise DataUnloadError(exc) from exc

    if error_occurred:
        raise ETLDelayedExit("At least one error occurred while unloading with 'keep going' option")