def workflow(src_conn_id, src_schema, dt, target_conn_id, target_table): # TODO Split code into functions etl_conn = PostgresHook(postgres_conn_id='etl').get_conn() with Session(etl_conn, f'{dag.dag_id}__{src_schema}') as session: # Load data source_conn = MsSqlHook(mssql_conn_id=src_conn_id, schema=src_schema).get_conn() query = f""" SELECT id, start_time, end_time, type, data FROM dbo.Orders WHERE CONVERT(DATE, start_time) = '{dt}' """ df = pd.read_sql_query(query, source_conn) # Skip if no rows if df.empty: raise AirflowSkipException('No rows to load') # Add service fields df['etl_source'] = src_schema df['etl_id'] = session.id df['hash_id'] = hash_pandas_object(df[['etl_source', 'id']]) # Export data to CSV buffer buffer = StringIO() df.to_csv(buffer, index=False, sep='|', na_rep='NUL', quoting=csv.QUOTE_MINIMAL, header=False, float_format='%.8f', doublequote=False, escapechar='\\') buffer.seek(0) # Push CSV target_conn = VerticaHook(vertica_conn_id=target_conn_id).get_conn() copy_stmt = f""" COPY {target_table}({df.columns.to_list()}) FROM STDIN DELIMITER '|' ENCLOSED '"' ABORT ON ERROR NULL 'NUL' """ cursor = target_conn.cursor() cursor.copy(copy_stmt, buffer) session.loaded_rows = cursor.rowcount session.successful = True