geo_checks.clear() count_checks.append( COUNT_CHECK.make_check( check_id=f"count_check_{subject}", pass_value=2, params=dict(table_name=f"{dag_id}_{subject}_new"), result_checker=operator.ge, ) ) geo_checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{subject}", params=dict( table_name=f"{dag_id}_{subject}_new", geotype=["POLYGON", "MULTIPOLYGON"], ), pass_value=1, ) ) total_checks = count_checks + geo_checks check_name[f"{subject}"] = total_checks # 9. Execute bundled checks (step 7) on database multi_checks = [ PostgresMultiCheckOperator( task_id=f"multi_check_{subject}", checks=check_name[f"{subject}"] ) for subject in files_to_proces.keys() ]
) # PREPARE CHECKS count_checks.append( COUNT_CHECK.make_check( check_id="count_check", pass_value=25, params=dict(table_name=f"{dag_id}_{dag_id}"), result_checker=operator.ge, )) geo_checks.append( GEO_CHECK.make_check( check_id="geo_check", params=dict( table_name=f"{dag_id}_{dag_id}", geotype=["POINT"], ), pass_value=1, )) total_checks = count_checks + geo_checks # 10. RUN bundled CHECKS multi_checks = PostgresMultiCheckOperator(task_id="multi_check", checks=total_checks) # 11. Grant database permissions grant_db_permissions = PostgresPermissionsOperator(task_id="grants", dag_name=dag_id) (slack_at_start >> mkdir >> download_data >> create_SQL >> create_table >>
count_checks.clear() geo_checks.clear() count_checks.append( COUNT_CHECK.make_check( check_id=f"count_check_{key}", pass_value=2, params=dict(table_name=key), result_checker=operator.ge, )) geo_checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{key}", params=dict( table_name=key, geotype=["POINT", "MULTIPOLYGON"], ), pass_value=1, )) total_checks = count_checks + geo_checks check_name[key] = total_checks # 13. Execute bundled checks on database multi_checks = [ PostgresMultiCheckOperator(task_id=f"multi_check_{key}", checks=check_name[key]) for key in files_to_download.keys() ] # 14. Grant database permissions
# XXX Get colnames from schema (provenance info) checks.append( COLNAMES_CHECK.make_check( check_id=f"colname_check_{table_name}", parameters=["pte", table_name], pass_value=field_names, result_checker=operator.ge, ) ) checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{table_name}", params=dict( table_name=f"pte.{table_name}", geo_column="geometrie", geotype=geo_type, ), pass_value=1, ) ) multi_check = PostgresMultiCheckOperator(task_id="multi_check", checks=checks) rename_columns = ProvenanceRenameOperator( task_id="rename_columns", dataset_name="rioolnetwerk", pg_schema="pte" ) rename_tables = PostgresOperator(task_id="rename_tables", sql=RENAME_TABLES_SQL,)
geo_checks.clear() count_checks.append( COUNT_CHECK.make_check( check_id=f"count_check_{file_name}", pass_value=2, params=dict(table_name=file_name), result_checker=operator.ge, ) ) geo_checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{file_name}", params=dict( table_name=file_name, geotype=["POLYGON", "MULTIPOLYGON"], ), pass_value=1, ) ) total_checks = count_checks + geo_checks check_name[file_name] = total_checks # 8. Execute bundled checks (step 7) on database multi_checks = [ PostgresMultiCheckOperator( task_id=f"multi_check_{file_name}", checks=check_name[file_name] ) for file_name in data_endpoints.keys() ]
count_checks.clear() geo_checks.clear() count_checks.append( COUNT_CHECK.make_check( check_id=f"count_check_{key}", pass_value=500, params=dict(table_name=f"{dag_id}_{key}_new"), result_checker=operator.ge, )) geo_checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{key}", params=dict( table_name=f"{dag_id}_{key}_new", geotype=["MULTILINESTRING"], ), pass_value=1, )) total_checks = count_checks + geo_checks check_name[f"{key}"] = total_checks # 9. Execute bundled checks on database multi_checks = [ PostgresMultiCheckOperator(task_id=f"multi_check_{key}", checks=check_name[f"{key}"]) for key in files_to_download.keys() ] # 10. Rename TABLE
count_checks.append( COUNT_CHECK.make_check( check_id=f"count_check_{table_name}", pass_value=10, params=dict(table_name=f"{dag_id}_{table_name}_new"), result_checker=operator.ge, ) ) geo_checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{table_name}", params=dict( table_name=f"{dag_id}_{table_name}_new", geotype=[ "MULTIPOLYGON", ], geo_column="geometrie", ), pass_value=1, ) ) total_checks = count_checks + geo_checks check_name["{table_name}"] = total_checks # 7. Execute bundled checks on database multi_checks = [ PostgresMultiCheckOperator( task_id=f"multi_check_{table_name}", checks=check_name["{table_name}"],
"id", "naam", "status", "aanwijzingsdatum", "intrekkingsdatum", "geometry", }, result_checker=operator.ge, ) ) checks.append( GEO_CHECK.make_check( check_id="geo_check", params=dict( table_name="pte.beschermde_stadsdorpsgezichten", geotype="MULTIPOLYGON", ), pass_value=1, ) ) correct_geo = PostgresOperator(task_id="correct_geo", sql=CORRECT_GEO,) multi_check = PostgresMultiCheckOperator(task_id="multi_check", checks=checks) rename_table = PostgresOperator(task_id=f"rename_table", sql=RENAME_TABLES_SQL,) ( slack_at_start >> drop_imported_table >> swift_load_task >> correct_geo
# PREPARE CHECKS for resource in variables: count_checks.append( COUNT_CHECK.make_check( check_id=f"count_check_{resource}", pass_value=50, params=dict(table_name=f"{dag_id}_{resource}_new "), result_checker=operator.ge, )) geo_checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{resource}", params=dict( table_name=f"{dag_id}_{resource}_new", geotype=["POINT"], geo_column="geometrie", ), pass_value=1, )) total_checks = count_checks + geo_checks # 9. RUN bundled CHECKS multi_checks = [ PostgresMultiCheckOperator(task_id=f"multi_check_{resource}", checks=total_checks) for resource in variables ] # 10. Rename COLUMNS based on provenance (if specified)
count_checks.append( COUNT_CHECK.make_check( check_id=f"count_check_{file_name}", pass_value=2, params=dict(table_name=f"{dag_id}_{file_name}_new"), result_checker=operator.ge, )) geo_checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{file_name}", params=dict( table_name=f"{dag_id}_{file_name}_new", geotype=[ "POINT", "POLYGON", "MULTIPOLYGON", "MULTILINESTRING", "LINESTRING", ], ), pass_value=1, )) total_checks = count_checks + geo_checks check_name[f"{file_name}"] = total_checks # 14. Execute bundled checks on database multi_checks = [ PostgresMultiCheckOperator(task_id=f"multi_check_{file_name}", checks=check_name[f"{file_name}"]) for data in files_to_SQL.values() for file_name in data.keys()
geo_checks.clear() count_checks.append( COUNT_CHECK.make_check( check_id=f"count_check_{splitted_tablename}", pass_value=2, params=dict( table_name=f"{dag_id}_{splitted_tablename}_new"), result_checker=operator.ge, )) geo_checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{splitted_tablename}", params=dict( table_name=f"{dag_id}_{splitted_tablename}_new", geotype=["MULTIPOLYGON"], ), pass_value=1, )) total_checks = count_checks + geo_checks check_name[f"{splitted_tablename}"] = total_checks # 11. Execute bundled checks on database (see checks definition here above) multi_checks = [ PostgresMultiCheckOperator( task_id=f"multi_check_{splitted_tablename}", checks=check_name[f"{splitted_tablename}"], ) for key in files_to_download.keys() for splitted_tablename in key.split("-") if "themas" not in key ]
result_checker=operator.ge, )) colname_checks.append( COLNAMES_CHECK.make_check( check_id=f"colname_check_{route.name}", parameters=["public", route.tmp_db_table_name], pass_value=set(route.columns), result_checker=operator.ge, )) geo_checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{route.name}", params=dict( table_name=route.tmp_db_table_name, geotype=route.geometry_type.upper(), ), pass_value=1, )) checks = count_checks + colname_checks + geo_checks multi_check = PostgresMultiCheckOperator(task_id="multi_check", checks=checks) renames = [ PostgresTableRenameOperator( task_id=f"rename_{route.name}", old_table_name=route.tmp_db_table_name, new_table_name=route.db_table_name, ) for route in ROUTES ]
count_checks.append( COUNT_CHECK.make_check( check_id="count_check", pass_value=1, params=dict(table_name=f"{schema_name}_{table_name}_new"), result_checker=operator.ge, ) ) geo_checks.append( GEO_CHECK.make_check( check_id="geo_check", params=dict( table_name=f"{schema_name}_{table_name}_new", geotype=[ "POLYGON", "MULTIPOLYGON", ], ), pass_value=1, ) ) check_name[dag_id] = count_checks # 7. Execute bundled checks on database (in this case just a count check) multi_checks = PostgresMultiCheckOperator(task_id="count_check", checks=check_name[dag_id]) # 8. Create the DB target table (as specified in the JSON data schema) # if table not exists yet create_table = SqlAlchemyCreateObjectOperator(
check_id="count_check", pass_value=25, params=dict(table_name=f"{dag_id}_{dag_id}_new"), result_checker=operator.ge, ) ) geo_checks.append( GEO_CHECK.make_check( check_id="geo_check", params=dict( table_name=f"{dag_id}_{dag_id}_new", geotype=[ "MULTIPOLYGON", "POLYGON", "POINT", "MULTILINESTRING", "LINESTRING", "GEOMETRYCOLLECTION", ], geo_column="geometrie", ), pass_value=1, ) ) total_checks = count_checks + geo_checks # 12. RUN bundled CHECKS multi_checks = PostgresMultiCheckOperator(task_id="multi_check", checks=total_checks) # 13. Create the DB target table (as specified in the JSON data schema)
count_checks.clear() geo_checks.clear() count_checks.append( COUNT_CHECK.make_check( check_id=f"count_check_{key}", pass_value=2, params=dict(table_name=f"{key}"), result_checker=operator.ge, ) ) geo_checks.append( GEO_CHECK.make_check( check_id=f"geo_check_{key}", params=dict(table_name=f"{key}", geotype=["POINT"],), pass_value=1, ) ) total_checks = count_checks + geo_checks check_name[f"{key}"] = total_checks # 11. Execute bundled checks on database multi_checks = [ PostgresMultiCheckOperator( task_id=f"multi_check_{key}", checks=check_name[f"{key}"] ) for key in files_to_download.keys() ]
check_id="count_check", pass_value=1587, params=dict(table_name="fietspaaltjes"), result_checker=operator.ge, ) colname_check = COLNAMES_CHECK.make_check( check_id="colname_check", parameters=["fietspaaltjes"], pass_value=set(["id"]), result_checker=operator.ge, ) geo_check = GEO_CHECK.make_check( check_id="geo_check", params=dict(table_name="fietspaaltjes", geotype="POINT"), pass_value=1, ) checks = [count_check, colname_check, geo_check] multi = PostgresMultiCheckOperator(task_id="multi", checks=checks, params=make_params(checks)) # swift_task # sqls = [ # "delete from biz_data where biz_id = {{ params.tba }}", # "insert into biz_data (biz_id, naam) values (123456789, 'testje')", # ] # pgtest = PostgresOperator(task_id="pgtest", sql=sqls)