Esempio n. 1
0
                    f"create or replace view {v['dag'] + '.' + v['name']} as ({v['select']})"
                ],
                postgres_conn_id='etl_postgres')
            opr_pause.set_downstream(opr_make_view)

            if 'id' in v.keys() and v['id'] != None:
                opr_socrata_upload = PythonOperator(
                    task_id=f"upload_{v['name']}",
                    python_callable=destinations.upload_to_socrata,
                    provide_context=True,
                    op_kwargs={
                        "id": v['id'],
                        "method": v['method'],
                        "table": v['dag'] + '.' + v['name']
                    })
                opr_make_view.set_downstream(opr_socrata_upload)

            # opr_csv_dump = PostgresOperator(
            #     task_id=f"dump_to_csv_{v['name']}",
            #     sql=f"""COPY (select distinct * from {v['dag'] + '.' + v['name']}) TO '/tmp/{v['name']}.csv' WITH (FORMAT CSV, FORCE_QUOTE *, QUOTE '"', HEADER);""",
            #     postgres_conn_id='etl_postgres'
            # )

            # opr_make_view.set_downstream(opr_csv_dump)

            # opr_fix_dupes = BashOperator(
            #     task_id=f"dedupe_rows_{v['name']}",
            #     bash_command=f"cat /tmp/{v['name']}.csv | awk '!seen[$0]++' >| /tmp/accela_extract/{v['name']}_deduped.csv",
            # )

            # opr_csv_dump.set_downstream(opr_fix_dupes)
Esempio n. 2
0
        for d, v in od_config['views'].items():

            v['name'] = f"{od_name}_{d}"
            v['dag'] = dag.dag_id

            # Make view & set downstream of opr_pause
            opr_make_view = PostgresOperator(
                task_id=f"make_view_{v['name']}",
                sql=
                f"create or replace view {v['dag'] + '.' + v['name']} as ({v['select']})",
                postgres_conn_id='etl_postgres')
            opr_pause.set_downstream(opr_make_view)

            # Get appropriate Operator & set downstream of make_view
            opr_dump_file = destinations.pg_to_file(v)
            opr_make_view.set_downstream(opr_dump_file)

            if v['export'] == 'shapefile':
                filepath = f"/tmp/{v['name']}.zip"
            elif v['export'] == 'geojson':
                filepath = f"/tmp/{v['name']}.json"

            if 'id' in v.keys() and len(
                    v['id']) > 0 and v['destination'] == 'ago':
                # Upload to AGO and set downstream of dump_file
                opr_upload = PythonOperator(
                    task_id=f"upload_{v['name']}",
                    python_callable=destinations.upload_to_ago,
                    op_kwargs={
                        "id": v['id'],
                        "filepath": filepath
# create dag and schedule a load interval every day at midnight (7am UTC)
dag = DAG('extract_and_load',
          catchup=False,
          default_args=default_args,
          schedule_interval=timedelta(days=1),
          max_active_runs=1)

# task to create table if it does not exist
task_create_table = PostgresOperator(
    task_id='task_create_table',
    sql='./extract_load_pipeline/sql/create_postgres_table.sql',
    postgres_conn_id='my_local_db',
    dag=dag)

# extracts bq to a gcs bucket as csv
task_bq_to_gcs = PythonOperator(
    task_id='task_bq_to_gcs',
    python_callable=bq_to_gcs,
    provide_context=True,
    op_kwargs={'start_date': default_args['start_date']},
    dag=dag)

# loads postgres table from csv
task_gcs_to_postgres = PythonOperator(task_id='task_gcs_to_postgres',
                                      python_callable=load_table,
                                      provide_context=True,
                                      dag=dag)

task_create_table.set_downstream(task_bq_to_gcs)
task_bq_to_gcs.set_downstream(task_gcs_to_postgres)
Esempio n. 4
0
                         DROP VIEW IF EXISTS model_draft.final_result_hoes CASCADE;
            DROP MATERIALIZED VIEW IF EXISTS model_draft.substations_to_drop_hoes CASCADE;
            DROP MATERIALIZED VIEW IF EXISTS model_draft.buffer_75_hoes CASCADE;
            DROP MATERIALIZED VIEW IF EXISTS model_draft.buffer_75_a_hoes CASCADE;
                         DROP VIEW IF EXISTS model_draft.summary_de_hoes CASCADE;
            DROP MATERIALIZED VIEW IF EXISTS model_draft.summary_hoes CASCADE;
                         DROP VIEW IF EXISTS model_draft.summary_total_hoes CASCADE;
                         DROP VIEW IF EXISTS model_draft.substation_hoes CASCADE;
                         DROP VIEW IF EXISTS model_draft.relation_substations_with_hoes CASCADE;
                         DROP VIEW IF EXISTS model_draft.node_substations_with_hoes CASCADE;
                         DROP VIEW IF EXISTS model_draft.way_substations_with_hoes CASCADE;
                         DROP VIEW IF EXISTS model_draft.way_substations CASCADE; ''',
    )


    create_table_ego_grid_ehv_substation.set_downstream(create_view_way_substations)
    create_view_way_substations.set_downstream(create_view_way_substations_with_hoes)
    create_view_way_substations_with_hoes.set_downstream(create_view_node_substations_with_hoes)
    create_view_node_substations_with_hoes.set_downstream(create_view_relation_substations_with_hoes)
    create_view_relation_substations_with_hoes.set_downstream(create_view_substation_hoes)
    create_view_substation_hoes.set_downstream(create_view_summary_total_hoes)
    create_view_summary_total_hoes.set_downstream(create_view_summary_hoes)
    create_view_summary_hoes.set_downstream(create_index_gist)
    create_index_gist.set_downstream(create_view_summary_de_hoes)
    create_view_summary_de_hoes.set_downstream(create_view_buffer_75_hoes)
    create_view_buffer_75_hoes.set_downstream(create_view_buffer_75_a_hoes)
    create_view_buffer_75_a_hoes.set_downstream(create_view_substations_to_drop_hoes)
    create_view_substations_to_drop_hoes.set_downstream(create_view_final_result_hoes)
    create_view_final_result_hoes.set_downstream(insert_into_ego_grid_ehv_substation)
    insert_into_ego_grid_ehv_substation.set_downstream(drop)
Esempio n. 5
0
            "create index demos_pid_idx on graphql.demos using btree(parcelno)",
            "alter table graphql.demos add constraint demos_to_pid foreign key (parcelno) references graphql.parcels(parcelno)"
        ],
        "property_sales": [
            "drop table if exists graphql.sales cascade",
            "create table graphql.sales as (select * from assessor.sales s where s.pnum in (select parcelno from graphql.parcels))",
            "alter table graphql.sales rename column pnum to parcelno",
            "create index sales_pid_idx on graphql.sales using btree(parcelno)",
            "alter table graphql.sales add constraint sales_to_pid foreign key (parcelno) references graphql.parcels(parcelno)"
        ],
        "ownership": [
            "drop table if exists graphql.ownership cascade",
            "create table graphql.ownership as (select * from assessor.parcelmaster pm where pm.pnum in (select parcelno from graphql.parcels))",
            "alter table graphql.ownership rename column pnum to parcelno",
            "create index ownership_pid_idx on graphql.ownership using btree(parcelno)",
            "alter table graphql.ownership add constraint ownership_to_pid foreign key (parcelno) references graphql.parcels(parcelno)"
        ]
    }

    for key in views:
        value = views[key]

        opr_make_views = PostgresOperator(task_id=f"make_{key}",
                                          sql=value,
                                          postgres_conn_id='etl_postgres')

        opr_make_parcels.set_downstream(opr_make_views)
        opr_make_views.set_downstream(opr_make_pgfunctions)

opr_make_pgfunctions >> opr_dump_schema >> opr_transfer_schema >> opr_transfer_script >> opr_kill_postgraphile >> opr_create_db >> opr_restart_postgraphile
Esempio n. 6
0
    t.set_upstream(dummy_op)
    t.set_downstream(create_temp_scores_table_op)

archive_trained_models_op = BashOperator(
    task_id='archive_trained_models',
    bash_command='scripts/bash/archive_trained_models.sh',
    dag=dag
)

notify_processing_completion_op = SlackAPIPostOperator(
    task_id='notify_processing_completion',
    token=Variable.get('slack_token'),
    channel='#engineering-commits',
    username='******',
    icon_url=Variable.get('tia_slack_icon_url'),
    text='*user_work_experience_job_posting_similarity_scores* has been refreshed on {{ts}}',
    dag=dag
)

create_temp_scores_table_op.set_downstream(copy_scores_to_temp_table_op)
copy_scores_to_temp_table_op.set_downstream(remove_scores_op)
copy_scores_to_temp_table_op.set_downstream(update_scores_op)
delete_temp_scores_table_op.set_upstream(remove_scores_op)
delete_temp_scores_table_op.set_upstream(update_scores_op)
delete_temp_scores_table_op.set_downstream(notify_processing_completion_op)

dummy_op.set_upstream(compute_title_feature_op)
dummy_op.set_upstream(compute_skill_feature_op)
dummy_op.set_upstream(compute_description_feature_op)
dummy_op.set_downstream(archive_trained_models_op)
Esempio n. 7
0
dag = DAG('transfer_data',
          catchup=False,
          default_args=default_args,
          schedule_interval=None,
          max_active_runs=1)

task_create_tables = PostgresOperator(
    task_id='task_create_tables',
    sql=SQL_PATH + 'create_transfer_and_aggregate_tables.sql',
    postgres_conn_id='my_local_db',
    dag=dag)

task_create_views = PostgresOperator(task_id='task_create_views',
                                     sql=SQL_PATH + 'create_views.sql',
                                     postgres_conn_id='my_local_db',
                                     dag=dag)

task_load_transfer_table = PythonOperator(task_id='task_load_transfer_table',
                                          python_callable=load_transfer,
                                          provide_context=True,
                                          dag=dag)

task_transfer_to_aggregate_table = PostgresOperator(
    task_id='task_transfer_to_aggregate_table',
    sql=SQL_PATH + 'insert_into_aggregate.sql',
    postgres_conn_id='my_local_db',
    dag=dag)

task_create_tables.set_downstream(task_create_views)
task_create_views.set_downstream(task_load_transfer_table)
task_load_transfer_table.set_downstream(task_transfer_to_aggregate_table)
Esempio n. 8
0
    send_email(to=[
        '*****@*****.**',
        '*****@*****.**',
        '*****@*****.**',
        '*****@*****.**',
        '*****@*****.**',
        '*****@*****.**',
        '*****@*****.**',
        '*****@*****.**',
        '*****@*****.**',
        '*****@*****.**',
    ],
               subject=f"Dockless Stats for { kwargs['yesterday_ds'] }",
               html_content=email_template)

    return True


email_task = PythonOperator(task_id='scoot_stat_email',
                            python_callable=email_callback,
                            provide_context=True,
                            dag=dag)
set_xcom = PythonOperator(task_id="computing_stats",
                          provide_context=True,
                          python_callable=set_xcom_variables,
                          dag=dag)

task1.set_downstream(set_xcom)
task2.set_downstream(set_xcom)
email_task.set_upstream(set_xcom)
Esempio n. 9
0
    bash_command='echo "works" ',
    dag=dag,
)

# t7 = BashOperator(
#     task_id='delete_previousdays_records',
#     depends_on_past=False,
#     bash_command='echo "works" ',
#     dag=dag,
# )

#Setting Up Dependencies

# This means that t2 will depend on t1
# running successfully to run.
t1.set_downstream(t3)

# similar to above where t3 will depend on t1
t3.set_upstream(t2)
t3.set_downstream(t4)
t4.set_downstream(t5)
t3.set_downstream(t4)

# The bit shift operator can also be
# used to chain operations:
# t1 >> t2

# And the upstream dependency with the
# bit shift operator:
# t2 << t1
Esempio n. 10
0
            task_id=f"truncate_{tables[t]}_update",
            sql=f"truncate table accela.{tables[t]}_update",
            postgres_conn_id='etl_postgres')

        opr_table_pause = BashOperator(task_id=f"pause_{tables[t]}",
                                       bash_command="echo 'Fake'")

        for f in txtfile:
            opr_copy_table = PostgresOperator(
                task_id=f"copy_{tables[t]}_{f.split('_')[0]}",
                sql=[
                    f"""COPY accela.{tables[t]}_update FROM '/tmp/accela_extract/{f}' (FORMAT CSV, HEADER TRUE, QUOTE '"', DELIMITER '|', ENCODING 'latin-1')"""
                ],
                postgres_conn_id='etl_postgres')

            opr_truncate_table.set_downstream(opr_copy_table)
            opr_copy_table.set_downstream(opr_table_pause)

        opr_insert_update = PostgresOperator(
            task_id=f"insert_{tables[t]}_{f.split('_')[0]}",
            sql=
            f"insert into accela.{tables[t]} select * from accela.{tables[t]}_update",
            postgres_conn_id='etl_postgres')

        opr_table_pause.set_downstream(opr_insert_update)

    # # Create Postgres tables
    # for t in tables:
    #     opr_make_pgtables = PythonOperator(
    #         task_id='csv_to_pg_'+t.lower().replace('_',''),
    #         provide_context=True,