geo_checks.clear()

        count_checks.append(
            COUNT_CHECK.make_check(
                check_id=f"count_check_{subject}",
                pass_value=2,
                params=dict(table_name=f"{dag_id}_{subject}_new"),
                result_checker=operator.ge,
            )
        )

        geo_checks.append(
            GEO_CHECK.make_check(
                check_id=f"geo_check_{subject}",
                params=dict(
                    table_name=f"{dag_id}_{subject}_new",
                    geotype=["POLYGON", "MULTIPOLYGON"],
                ),
                pass_value=1,
            )
        )

        total_checks = count_checks + geo_checks
        check_name[f"{subject}"] = total_checks

    # 9. Execute bundled checks (step 7) on database
    multi_checks = [
        PostgresMultiCheckOperator(
            task_id=f"multi_check_{subject}", checks=check_name[f"{subject}"]
        )
        for subject in files_to_proces.keys()
    ]
    )

    # PREPARE CHECKS
    count_checks.append(
        COUNT_CHECK.make_check(
            check_id="count_check",
            pass_value=25,
            params=dict(table_name=f"{dag_id}_{dag_id}"),
            result_checker=operator.ge,
        ))

    geo_checks.append(
        GEO_CHECK.make_check(
            check_id="geo_check",
            params=dict(
                table_name=f"{dag_id}_{dag_id}",
                geotype=["POINT"],
            ),
            pass_value=1,
        ))

    total_checks = count_checks + geo_checks

    # 10. RUN bundled CHECKS
    multi_checks = PostgresMultiCheckOperator(task_id="multi_check",
                                              checks=total_checks)

    # 11. Grant database permissions
    grant_db_permissions = PostgresPermissionsOperator(task_id="grants",
                                                       dag_name=dag_id)

(slack_at_start >> mkdir >> download_data >> create_SQL >> create_table >>
Example #3
0
        count_checks.clear()
        geo_checks.clear()

        count_checks.append(
            COUNT_CHECK.make_check(
                check_id=f"count_check_{key}",
                pass_value=2,
                params=dict(table_name=key),
                result_checker=operator.ge,
            ))

        geo_checks.append(
            GEO_CHECK.make_check(
                check_id=f"geo_check_{key}",
                params=dict(
                    table_name=key,
                    geotype=["POINT", "MULTIPOLYGON"],
                ),
                pass_value=1,
            ))

        total_checks = count_checks + geo_checks
        check_name[key] = total_checks

    # 13. Execute bundled checks on database
    multi_checks = [
        PostgresMultiCheckOperator(task_id=f"multi_check_{key}",
                                   checks=check_name[key])
        for key in files_to_download.keys()
    ]

    # 14. Grant database permissions
        # XXX Get colnames from schema (provenance info)
        checks.append(
            COLNAMES_CHECK.make_check(
                check_id=f"colname_check_{table_name}",
                parameters=["pte", table_name],
                pass_value=field_names,
                result_checker=operator.ge,
            )
        )

        checks.append(
            GEO_CHECK.make_check(
                check_id=f"geo_check_{table_name}",
                params=dict(
                    table_name=f"pte.{table_name}",
                    geo_column="geometrie",
                    geotype=geo_type,
                ),
                pass_value=1,
            )
        )

    multi_check = PostgresMultiCheckOperator(task_id="multi_check", checks=checks)

    rename_columns = ProvenanceRenameOperator(
        task_id="rename_columns", dataset_name="rioolnetwerk", pg_schema="pte"
    )

    rename_tables = PostgresOperator(task_id="rename_tables", sql=RENAME_TABLES_SQL,)

        geo_checks.clear()

        count_checks.append(
            COUNT_CHECK.make_check(
                check_id=f"count_check_{file_name}",
                pass_value=2,
                params=dict(table_name=file_name),
                result_checker=operator.ge,
            )
        )

        geo_checks.append(
            GEO_CHECK.make_check(
                check_id=f"geo_check_{file_name}",
                params=dict(
                    table_name=file_name,
                    geotype=["POLYGON", "MULTIPOLYGON"],
                ),
                pass_value=1,
            )
        )

        total_checks = count_checks + geo_checks
        check_name[file_name] = total_checks

    # 8. Execute bundled checks (step 7) on database
    multi_checks = [
        PostgresMultiCheckOperator(
            task_id=f"multi_check_{file_name}", checks=check_name[file_name]
        )
        for file_name in data_endpoints.keys()
    ]
        count_checks.clear()
        geo_checks.clear()

        count_checks.append(
            COUNT_CHECK.make_check(
                check_id=f"count_check_{key}",
                pass_value=500,
                params=dict(table_name=f"{dag_id}_{key}_new"),
                result_checker=operator.ge,
            ))

        geo_checks.append(
            GEO_CHECK.make_check(
                check_id=f"geo_check_{key}",
                params=dict(
                    table_name=f"{dag_id}_{key}_new",
                    geotype=["MULTILINESTRING"],
                ),
                pass_value=1,
            ))

        total_checks = count_checks + geo_checks
        check_name[f"{key}"] = total_checks

    # 9. Execute bundled checks on database
    multi_checks = [
        PostgresMultiCheckOperator(task_id=f"multi_check_{key}",
                                   checks=check_name[f"{key}"])
        for key in files_to_download.keys()
    ]

    # 10. Rename TABLE
Example #7
0
        count_checks.append(
            COUNT_CHECK.make_check(
                check_id=f"count_check_{table_name}",
                pass_value=10,
                params=dict(table_name=f"{dag_id}_{table_name}_new"),
                result_checker=operator.ge,
            )
        )

        geo_checks.append(
            GEO_CHECK.make_check(
                check_id=f"geo_check_{table_name}",
                params=dict(
                    table_name=f"{dag_id}_{table_name}_new",
                    geotype=[
                        "MULTIPOLYGON",
                    ],
                    geo_column="geometrie",
                ),
                pass_value=1,
            )
        )

        total_checks = count_checks + geo_checks
        check_name["{table_name}"] = total_checks

    # 7. Execute bundled checks on database
    multi_checks = [
        PostgresMultiCheckOperator(
            task_id=f"multi_check_{table_name}",
            checks=check_name["{table_name}"],
Example #8
0
                "id",
                "naam",
                "status",
                "aanwijzingsdatum",
                "intrekkingsdatum",
                "geometry",
            },
            result_checker=operator.ge,
        )
    )

    checks.append(
        GEO_CHECK.make_check(
            check_id="geo_check",
            params=dict(
                table_name="pte.beschermde_stadsdorpsgezichten", geotype="MULTIPOLYGON",
            ),
            pass_value=1,
        )
    )

    correct_geo = PostgresOperator(task_id="correct_geo", sql=CORRECT_GEO,)
    multi_check = PostgresMultiCheckOperator(task_id="multi_check", checks=checks)

    rename_table = PostgresOperator(task_id=f"rename_table", sql=RENAME_TABLES_SQL,)

(
    slack_at_start
    >> drop_imported_table
    >> swift_load_task
    >> correct_geo
Example #9
0
    # PREPARE CHECKS
    for resource in variables:
        count_checks.append(
            COUNT_CHECK.make_check(
                check_id=f"count_check_{resource}",
                pass_value=50,
                params=dict(table_name=f"{dag_id}_{resource}_new "),
                result_checker=operator.ge,
            ))

        geo_checks.append(
            GEO_CHECK.make_check(
                check_id=f"geo_check_{resource}",
                params=dict(
                    table_name=f"{dag_id}_{resource}_new",
                    geotype=["POINT"],
                    geo_column="geometrie",
                ),
                pass_value=1,
            ))

    total_checks = count_checks + geo_checks

    # 9. RUN bundled CHECKS
    multi_checks = [
        PostgresMultiCheckOperator(task_id=f"multi_check_{resource}",
                                   checks=total_checks)
        for resource in variables
    ]

    # 10. Rename COLUMNS based on provenance (if specified)
Example #10
0
            count_checks.append(
                COUNT_CHECK.make_check(
                    check_id=f"count_check_{file_name}",
                    pass_value=2,
                    params=dict(table_name=f"{dag_id}_{file_name}_new"),
                    result_checker=operator.ge,
                ))

            geo_checks.append(
                GEO_CHECK.make_check(
                    check_id=f"geo_check_{file_name}",
                    params=dict(
                        table_name=f"{dag_id}_{file_name}_new",
                        geotype=[
                            "POINT",
                            "POLYGON",
                            "MULTIPOLYGON",
                            "MULTILINESTRING",
                            "LINESTRING",
                        ],
                    ),
                    pass_value=1,
                ))

            total_checks = count_checks + geo_checks
            check_name[f"{file_name}"] = total_checks

    # 14. Execute bundled checks on database
    multi_checks = [
        PostgresMultiCheckOperator(task_id=f"multi_check_{file_name}",
                                   checks=check_name[f"{file_name}"])
        for data in files_to_SQL.values() for file_name in data.keys()
                geo_checks.clear()

                count_checks.append(
                    COUNT_CHECK.make_check(
                        check_id=f"count_check_{splitted_tablename}",
                        pass_value=2,
                        params=dict(
                            table_name=f"{dag_id}_{splitted_tablename}_new"),
                        result_checker=operator.ge,
                    ))

                geo_checks.append(
                    GEO_CHECK.make_check(
                        check_id=f"geo_check_{splitted_tablename}",
                        params=dict(
                            table_name=f"{dag_id}_{splitted_tablename}_new",
                            geotype=["MULTIPOLYGON"],
                        ),
                        pass_value=1,
                    ))

            total_checks = count_checks + geo_checks
            check_name[f"{splitted_tablename}"] = total_checks

    # 11. Execute bundled checks on database (see checks definition here above)
    multi_checks = [
        PostgresMultiCheckOperator(
            task_id=f"multi_check_{splitted_tablename}",
            checks=check_name[f"{splitted_tablename}"],
        ) for key in files_to_download.keys()
        for splitted_tablename in key.split("-") if "themas" not in key
    ]
Example #12
0
                result_checker=operator.ge,
            ))

        colname_checks.append(
            COLNAMES_CHECK.make_check(
                check_id=f"colname_check_{route.name}",
                parameters=["public", route.tmp_db_table_name],
                pass_value=set(route.columns),
                result_checker=operator.ge,
            ))

        geo_checks.append(
            GEO_CHECK.make_check(
                check_id=f"geo_check_{route.name}",
                params=dict(
                    table_name=route.tmp_db_table_name,
                    geotype=route.geometry_type.upper(),
                ),
                pass_value=1,
            ))

    checks = count_checks + colname_checks + geo_checks
    multi_check = PostgresMultiCheckOperator(task_id="multi_check",
                                             checks=checks)

    renames = [
        PostgresTableRenameOperator(
            task_id=f"rename_{route.name}",
            old_table_name=route.tmp_db_table_name,
            new_table_name=route.db_table_name,
        ) for route in ROUTES
    ]
    count_checks.append(
        COUNT_CHECK.make_check(
            check_id="count_check",
            pass_value=1,
            params=dict(table_name=f"{schema_name}_{table_name}_new"),
            result_checker=operator.ge,
        )
    )

    geo_checks.append(
        GEO_CHECK.make_check(
            check_id="geo_check",
            params=dict(
                table_name=f"{schema_name}_{table_name}_new",
                geotype=[
                    "POLYGON",
                    "MULTIPOLYGON",
                ],
            ),
            pass_value=1,
        )
    )

    check_name[dag_id] = count_checks

    # 7. Execute bundled checks on database (in this case just a count check)
    multi_checks = PostgresMultiCheckOperator(task_id="count_check", checks=check_name[dag_id])

    # 8. Create the DB target table (as specified in the JSON data schema)
    # if table not exists yet
    create_table = SqlAlchemyCreateObjectOperator(
Example #14
0
            check_id="count_check",
            pass_value=25,
            params=dict(table_name=f"{dag_id}_{dag_id}_new"),
            result_checker=operator.ge,
        )
    )

    geo_checks.append(
        GEO_CHECK.make_check(
            check_id="geo_check",
            params=dict(
                table_name=f"{dag_id}_{dag_id}_new",
                geotype=[
                    "MULTIPOLYGON",
                    "POLYGON",
                    "POINT",
                    "MULTILINESTRING",
                    "LINESTRING",
                    "GEOMETRYCOLLECTION",
                ],
                geo_column="geometrie",
            ),
            pass_value=1,
        )
    )

    total_checks = count_checks + geo_checks

    # 12. RUN bundled CHECKS
    multi_checks = PostgresMultiCheckOperator(task_id="multi_check", checks=total_checks)

    # 13. Create the DB target table (as specified in the JSON data schema)
Example #15
0
        count_checks.clear()
        geo_checks.clear()

        count_checks.append(
            COUNT_CHECK.make_check(
                check_id=f"count_check_{key}",
                pass_value=2,
                params=dict(table_name=f"{key}"),
                result_checker=operator.ge,
            )
        )

        geo_checks.append(
            GEO_CHECK.make_check(
                check_id=f"geo_check_{key}",
                params=dict(table_name=f"{key}", geotype=["POINT"],),
                pass_value=1,
            )
        )

        total_checks = count_checks + geo_checks
        check_name[f"{key}"] = total_checks

    # 11. Execute bundled checks on database
    multi_checks = [
        PostgresMultiCheckOperator(
            task_id=f"multi_check_{key}", checks=check_name[f"{key}"]
        )
        for key in files_to_download.keys()
    ]
Example #16
0
        check_id="count_check",
        pass_value=1587,
        params=dict(table_name="fietspaaltjes"),
        result_checker=operator.ge,
    )

    colname_check = COLNAMES_CHECK.make_check(
        check_id="colname_check",
        parameters=["fietspaaltjes"],
        pass_value=set(["id"]),
        result_checker=operator.ge,
    )

    geo_check = GEO_CHECK.make_check(
        check_id="geo_check",
        params=dict(table_name="fietspaaltjes", geotype="POINT"),
        pass_value=1,
    )

    checks = [count_check, colname_check, geo_check]
    multi = PostgresMultiCheckOperator(task_id="multi",
                                       checks=checks,
                                       params=make_params(checks))

    # swift_task
    # sqls = [
    #     "delete from biz_data where biz_id = {{ params.tba }}",
    #     "insert into biz_data (biz_id, naam) values (123456789, 'testje')",
    # ]
    # pgtest = PostgresOperator(task_id="pgtest", sql=sqls)