def load_data(**kwargs): """Print the Airflow context and kwargs variable from the context.""" connection = create_engine(CONNECTION_URI) # this will come from ds df = pd.DataFrame({ "house_id": [1, 2, 3], "price": [11.1, 12.3, 14 - 5], "typex": ["A", "B", "C"], "timex": [datetime.now(), datetime.now(), datetime.now()], }) try: df.to_sql( TABLE_NAME, connection, if_exists="append", ) dt = pd.read_sql(f"SELECT * FROM {TABLE_NAME}", connection) finally: connection.dispose() print(f"data loaded {len(df)} rows, total rows {len(dt)}") return len(dt)
def daily_query(): """ Constructs query parameters that get endpoints starting from a Day prior to the Date it's run. """ return { "date_modified__gte": datetime.isoformat( datetime.now(timezone.utc) - timedelta(days=1) ) }
def load_data(**kwargs): """Print the Airflow context and kwargs variable from the context.""" connection = create_engine(CONNECTION_URI) # this will come from ds df = pd.DataFrame({'house_id': [1, 2, 3], 'price': [11.1, 12.3, 14-5], 'typex': ['A', 'B', 'C'], 'timex': [datetime.now(), datetime.now(), datetime.now()]}) df.to_sql(TABLE_NAME, connection, if_exists='append', ) dt = pd.read_sql(f'SELECT * FROM {TABLE_NAME}', connection) connection.dispose() print(f'data loaded {len(df)} rows, total rows {len(dt)}') return len(dt)
print(f"postal data with {df.shape} exits") engine.dispose() except sqlalchemy.exc.ProgrammingError as e: print(e.__dict__["statement"]) return False return True with DAG( dag_id="populate_postal_codes", description=f"Populate postal code to {TABLE_NAME}", default_args=args, # Start 10 minutes ago # days_ago(2) start_date=datetime.now(), schedule_interval="@once", ) as dag: push_postal_data = PythonOperator( task_id="load_postal_data", python_callable=get_postal, op_args=[ False, ], dag=dag, provide_context=True, ) check_postal_data = PythonOperator( task_id="check_postal_data",
with connection.connect() as conn: conn.execute(f"DELETE FROM {TABLE_NAME};") print(f'data removed {repr(data_size)} rows') connection.dispose() return f'[+] data removing task completed' with DAG( dag_id='df_to_postgres_sqlalchemy', description=f'Load data to postgress table {repr("boliga")}', default_args=args, start_date=datetime.now() - timedelta(minutes=10), # Start 10 minutes ago # days_ago(2) schedule_interval='*/10 * * * *', ) as dag: load_dataframe = PythonOperator( task_id='load_data_with_sqlalchemy', python_callable=load_data, dag=dag, provide_context=True ) remove_dataframe = PythonOperator( task_id='remove_data_after_50_rows', dag=dag,
if data_size and data_size > 50: with connection.connect() as conn: conn.execute(f"DELETE FROM {TABLE_NAME};") print(f"data removed {repr(data_size)} rows") finally: connection.dispose() return "[+] data removing task completed" with DAG( dag_id="df_to_postgres_sqlalchemy", description=f'Load data to postgress table {repr("boliga")}', default_args=args, start_date=( datetime.now() - timedelta(minutes=10)), # Start 10 minutes ago # days_ago(2) schedule_interval="*/10 * * * *", ) as dag: load_dataframe = PythonOperator( task_id="load_data_with_sqlalchemy", python_callable=load_data, dag=dag, provide_context=True, ) remove_dataframe = PythonOperator( task_id="remove_data_after_50_rows", dag=dag, python_callable=remove_data,