예제 #1
0
def load_data(**kwargs):
    """Print the Airflow context and kwargs variable from the context."""

    connection = create_engine(CONNECTION_URI)
    # this will come from ds

    df = pd.DataFrame({
        "house_id": [1, 2, 3],
        "price": [11.1, 12.3, 14 - 5],
        "typex": ["A", "B", "C"],
        "timex": [datetime.now(),
                  datetime.now(),
                  datetime.now()],
    })
    try:
        df.to_sql(
            TABLE_NAME,
            connection,
            if_exists="append",
        )

        dt = pd.read_sql(f"SELECT * FROM {TABLE_NAME}", connection)
    finally:
        connection.dispose()
    print(f"data loaded {len(df)} rows, total rows {len(dt)}")

    return len(dt)
예제 #2
0
def daily_query():
    """
    Constructs query parameters that get endpoints starting from a Day prior to the Date it's run.
    """
    return {
        "date_modified__gte": datetime.isoformat(
            datetime.now(timezone.utc) - timedelta(days=1)
        )
    }
def load_data(**kwargs):
    """Print the Airflow context and kwargs variable from the context."""
    
    connection = create_engine(CONNECTION_URI)
    # this will come from ds

    df = pd.DataFrame({'house_id': [1, 2, 3],
                        'price': [11.1, 12.3, 14-5],
                        'typex': ['A', 'B', 'C'],
                        'timex': [datetime.now(), datetime.now(), datetime.now()]})
    df.to_sql(TABLE_NAME,
                connection,
                if_exists='append',
                )

    dt = pd.read_sql(f'SELECT * FROM {TABLE_NAME}', connection)


    connection.dispose()
    print(f'data loaded {len(df)} rows, total rows {len(dt)}')

    return len(dt)
예제 #4
0
        print(f"postal data with {df.shape} exits")
        engine.dispose()

    except sqlalchemy.exc.ProgrammingError as e:
        print(e.__dict__["statement"])
        return False

    return True


with DAG(
        dag_id="populate_postal_codes",
        description=f"Populate postal code to {TABLE_NAME}",
        default_args=args,
        # Start 10 minutes ago # days_ago(2)
        start_date=datetime.now(),
        schedule_interval="@once",
) as dag:

    push_postal_data = PythonOperator(
        task_id="load_postal_data",
        python_callable=get_postal,
        op_args=[
            False,
        ],
        dag=dag,
        provide_context=True,
    )

    check_postal_data = PythonOperator(
        task_id="check_postal_data",
        with connection.connect() as conn:
            conn.execute(f"DELETE FROM {TABLE_NAME};")

        print(f'data removed {repr(data_size)} rows')

    connection.dispose()
    return f'[+] data removing task completed'
    



with DAG(
    dag_id='df_to_postgres_sqlalchemy',
    description=f'Load data to postgress table {repr("boliga")}',
    default_args=args,
    start_date=datetime.now() - timedelta(minutes=10), # Start 10 minutes ago # days_ago(2)
    schedule_interval='*/10 * * * *',
    ) as dag:

    
    load_dataframe = PythonOperator(
        task_id='load_data_with_sqlalchemy',
        python_callable=load_data,
        dag=dag,
        provide_context=True

    )

    remove_dataframe = PythonOperator(
        task_id='remove_data_after_50_rows',
        dag=dag,
예제 #6
0
        if data_size and data_size > 50:
            with connection.connect() as conn:
                conn.execute(f"DELETE FROM {TABLE_NAME};")

            print(f"data removed {repr(data_size)} rows")
    finally:
        connection.dispose()
    return "[+] data removing task completed"


with DAG(
        dag_id="df_to_postgres_sqlalchemy",
        description=f'Load data to postgress table {repr("boliga")}',
        default_args=args,
        start_date=(
            datetime.now() -
            timedelta(minutes=10)),  # Start 10 minutes ago # days_ago(2)
        schedule_interval="*/10 * * * *",
) as dag:

    load_dataframe = PythonOperator(
        task_id="load_data_with_sqlalchemy",
        python_callable=load_data,
        dag=dag,
        provide_context=True,
    )

    remove_dataframe = PythonOperator(
        task_id="remove_data_after_50_rows",
        dag=dag,
        python_callable=remove_data,