Example #1
0
    extract_geojson = BashOperator(
        task_id="extract_geojson",
        bash_command=f"ogr2ogr -f 'PGDump' -nlt MULTILINESTRING "
        "-t_srs EPSG:28992 -s_srs EPSG:4326 "
        f"-nln {dag_id}_new "
        f"{tmp_file_prefix}.sql {tmp_file_prefix}.json",
    )

    load_table = BashOperator(
        task_id="load_table",
        bash_command=f"psql {pg_params} < {tmp_file_prefix}.sql",
    )

    check_count = PostgresCheckOperator(
        task_id="check_count",
        sql=SQL_CHECK_COUNT,
        params=dict(tablename=f"{dag_id}_new", mincount=3),
    )

    check_geo = PostgresCheckOperator(
        task_id="check_geo",
        sql=SQL_CHECK_GEO,
        params=dict(tablename=f"{dag_id}_new", geotype="ST_MultiLineString"),
    )

    check_colnames = PostgresValueCheckOperator(
        task_id="check_colnames",
        sql=SQL_CHECK_COLNAMES,
        pass_value=colnames,
        params=dict(tablename=f"{dag_id}_new"),
    )
    fetch_csv = SwiftOperator(
        task_id="fetch_csv",
        container="grex",
        object_id=csv_file,
        output_path=f"{tmp_dir}/{csv_file}",
    )

    load_data = PythonOperator(
        task_id="load_data",
        python_callable=load_grex,
        op_args=[f"{tmp_dir}/{csv_file}", table_name_new],
    )

    check_count = PostgresCheckOperator(
        task_id="check_count",
        sql=SQL_CHECK_COUNT,
        params=dict(tablename=table_name_new, mincount=400),
    )

    check_geo = PostgresCheckOperator(
        task_id="check_geo",
        sql=SQL_CHECK_GEO,
        params=dict(tablename=table_name_new,
                    geotype="ST_MultiPolygon",
                    geo_column="geometry"),
    )

    rename_table = PostgresOperator(task_id="rename_table",
                                    sql=SQL_TABLE_RENAME)

mk_tmp_dir >> fetch_csv >> load_data >> check_count >> check_geo >> rename_table
        task_id="convert_shp",
        bash_command=f"iconv -f iso-8859-1 -t utf-8  {tmp_dir}/{dag_id}.sql > "
        f"{tmp_dir}/{dag_id}.utf8.sql",
    )

    create_tables = PostgresOperator(
        task_id="create_tables",
        sql=[
            f"{tmp_dir}/{dag_id}.utf8.sql",
            PROCESS_TABLE,
        ],
    )

    check_count = PostgresCheckOperator(
        task_id="check_count",
        sql=SQL_CHECK_COUNT,
        params=dict(tablename=f"{dag_id}_new", mincount=110),
    )

    check_colnames = PostgresValueCheckOperator(
        task_id="check_colnames",
        sql=SQL_CHECK_COLNAMES,
        pass_value=colnames,
        result_checker=checker,
        params=dict(tablename=f"{dag_id}_new"),
    )

    check_geo = PostgresCheckOperator(
        task_id="check_geo",
        sql=SQL_CHECK_GEO,
        params=dict(
Example #4
0
        ],
    )

    create_tables = BashOperator(
        task_id="create_tables",
        bash_command=f"psql {pg_params} < {sql_path}/biz_data_create.sql",
    )

    import_data = BashOperator(
        task_id="import_data",
        bash_command=f"psql {pg_params} < {tmp_dir}/biz_data_insert.sql",
    )

    check_count = PostgresCheckOperator(
        task_id="check_count",
        sql=SQL_CHECK_COUNT,
        params=dict(tablename="biz_view_new", mincount=48),
    )

    check_geo = PostgresCheckOperator(
        task_id="check_geo",
        sql=SQL_CHECK_GEO,
        params=dict(tablename="biz_view_new",
                    geotype="ST_Polygon",
                    geo_column="geometrie"),
    )

    check_colnames = PostgresValueCheckOperator(
        task_id="check_colnames",
        sql=SQL_CHECK_COLNAMES,
        pass_value=colnames,
                                     dataset_name="huishoudelijkafval",
                                     subset_tables=tables['dump_file'])

    # 6. DWH STADSDELEN SOURCE
    # Load voertuigenplanning data into DB
    load_dwh = PythonOperator(
        task_id="load_from_dwh_stadsdelen",
        python_callable=load_from_dwh,
        op_args=[f"{dag_id}_{to_snake_case(tables['dwh_stadsdelen'])}_new"],
    )

    # 7. Check minimum number of records
    check_count = PostgresCheckOperator(
        task_id="check_count",
        sql=SQL_CHECK_COUNT,
        params=dict(
            tablename=f"{dag_id}_{to_snake_case(tables['dwh_stadsdelen'])}_new",
            mincount=1000),
    )

    # 8. DWH STADSDELEN SOURCE
    # Rename COLUMNS based on provenance (if specified)
    provenance_dwh_data = ProvenanceRenameOperator(
        task_id="provenance_dwh",
        dataset_name=dag_id,
        prefix_table_name=f"{dag_id}_",
        postfix_table_name="_new",
        subset_tables=["".join(f"{tables['dwh_stadsdelen']}")],
        rename_indexes=False,
        pg_schema="public",
    )
Example #6
0
                bash_command=f"iconv -f iso-8859-1 -t utf-8 "
                f"{tmp_dir}/{dag_id}_{name}.sql > "
                f"{tmp_dir}/{dag_id}_{name}.utf8.sql",
            ))

        remove_drops.append(
            BashOperator(
                task_id=f"remove_drops_{name}",
                bash_command=f'perl -i -ne "print unless /DROP TABLE/" '
                f"{tmp_dir}/{name}.utf8.sql",
            ))

        check_counts.append(
            PostgresCheckOperator(
                task_id=f"check_count_{name}",
                sql=SQL_CHECK_COUNT,
                params=dict(tablename=f"{dag_id}_{name}_new",
                            mincount=mincount),
            ))

        check_colnames.append(
            PostgresValueCheckOperator(
                task_id=f"check_colnames_{name}",
                sql=SQL_CHECK_COLNAMES,
                pass_value=colnames,
                result_checker=checker,
                params=dict(tablename=f"{dag_id}_{name}_new"),
            ))

    load_dumps = PostgresOperator(
        task_id="load_dumps",
        sql=[
Example #7
0
    )

    drop_tables = PostgresOperator(
        task_id="drop_tables",
        sql=SQL_DROPS,
        params=dict(tablename=dag_id),
    )

    import_table = PostgresFilesOperator(
        task_id="import_table",
        sql_files=[sql_file_new_path],
    )

    check_count = PostgresCheckOperator(
        task_id="check_count",
        sql=SQL_CHECK_COUNT,
        params=dict(tablename="bb_quotum_new", mincount=90),
    )

    check_geo = PostgresCheckOperator(
        task_id="check_geo",
        sql=SQL_CHECK_GEO,
        params=dict(tablename="bb_quotum_new",
                    geotype="ST_MultiPolygon",
                    geo_column="geo"),
    )

    check_cols = PostgresValueCheckOperator(task_id="check_cols",
                                            sql=SQL_CHECK_COLS,
                                            pass_value=[[3]])

with DAG(
    "horeca_exploitatievergunning",
    default_args=default_args,
    description="Horeca Exploitatievergunning",
    schedule_interval="0 9 * * *",
) as dag:

    load_data = PythonOperator(
        task_id="load_data", python_callable=load_from_dwh, op_args=[table_name_new],
    )

    check_count = PostgresCheckOperator(
        task_id="check_count",
        sql=SQL_CHECK_COUNT,
        params=dict(tablename=table_name_new, mincount=4000),
    )

    check_geo1 = PostgresCheckOperator(
        task_id="check_geo1",
        sql=SQL_CHECK_GEO,
        params=dict(
            tablename=table_name_new,
            geotype="ST_MultiPolygon",
            geo_column="terrasgeometrie",
            notnull=False,
        ),
    )

    check_geo2 = PostgresCheckOperator(
        f"{tmp_dir}/{dag_id}.utf8.sql",
    )

    create_table = BashOperator(
        task_id="create_table",
        bash_command=f"psql {pg_params} < {tmp_dir}/{dag_id}.utf8.sql",
    )

    add_category = BashOperator(
        task_id="add_category",
        bash_command=f"psql {pg_params} < {sql_path}/add_categorie.sql",
    )

    check_count = PostgresCheckOperator(
        task_id="check_count",
        sql=SQL_CHECK_COUNT,
        params=dict(tablename=f"{dag_id}_new", mincount=75),
    )

    check_geo = PostgresCheckOperator(
        task_id="check_geo",
        sql=SQL_CHECK_GEO,
        params=dict(
            tablename=f"{dag_id}_new",
            geotype=["ST_Polygon", "ST_MultiPolygon"],
            check_valid=False,
        ),
    )

    rename_table = PostgresOperator(
        task_id="rename_table",
    extract_geojson = BashOperator(
        task_id="extract_geojson",
        bash_command=f"ogr2ogr --config PG_USE_COPY YES -f 'PGDump' "
        f"-nln {dag_id}_new "
        f"{tmp_dir}/objects.sql {tmp_dir}/objects.geo.json",
    )

    create_table = BashOperator(
        task_id="create_table",
        bash_command=f"psql {pg_params} < {tmp_dir}/objects.sql",
    )

    check_count = PostgresCheckOperator(
        task_id="check_count",
        sql=SQL_CHECK_COUNT,
        params=dict(tablename=f"{dag_id}_new", mincount=129410),
    )

    rename_table = PostgresOperator(
        task_id="rename_table",
        sql=SQL_TABLE_RENAME,
        params=dict(tablename=f"{dag_id}"),
    )

(
    slack_at_start
    >> objects_fetch
    >> types_fetch
    >> import_data
    >> extract_geojson