Python PythonOperator.doc_mdの例

プログラミング言語: Python

名前空間/パッケージ名: airflow.operators.python_operator

クラス/型: PythonOperator

メソッド/関数: doc_md

hotexamples.comのコード掲載数: 11

Python PythonOperator.doc_md - 11件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのairflow.operators.python_operator.PythonOperator.doc_mdの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

set_downstream(30)

PythonOperator(30)

set_upstream(30)

execute(12)

doc_md(11)

run(8)

clear(2)

determine_op_kwargs(2)

doc(2)

set_downstram(1)

コード例 #1

ファイルを表示

    # [START load_function]
    def load(**kwargs):
        ti = kwargs['ti']
        total_value_string = ti.xcom_pull(task_ids='transform', key='total_order_value')
        total_order_value = json.loads(total_value_string)

        print(total_order_value)
    # [END load_function]

    # [START main_flow]
    extract_task = PythonOperator(
        task_id='extract',
        python_callable=extract,
    )
    extract_task.doc_md = """\
#### Extract task
A simple Extract task to get data ready for the rest of the data pipeline.
In this case, getting data is simulated by reading from a hardcoded JSON string.
This data is then put into xcom, so that it can be processed by the next task.
"""

    transform_task = PythonOperator(
        task_id='transform',
        python_callable=transform,
    )
    transform_task.doc_md = """\
#### Transform task
A simple Transform task which takes in the collection of order data from xcom
and computes the total order value.
This computed value is then put into xcom, so that it can be processed by the next task.

コード例 #2

ファイルを表示

ファイル: example_1.py プロジェクト: fossabot/docker-airflow

        dag_id,
        default_args=default_args(),
        schedule_interval="10 * * * *",
        start_date=datetime(2021, 1, 1,
                            tzinfo=pendulum.timezone("Asia/Tokyo")),
) as dag:
    dag.doc_md = __doc__

    start = DummyOperator(task_id="start")

    a = PythonOperator(
        task_id="a",
        params={},
        python_callable=task_sample,
    )
    a.doc_md = task_sample.__doc__

    b = BranchPythonOperator(
        task_id="b",
        params={},
        python_callable=task_branch,
    )

    c = DummyOperator(task_id="c")

    d = DummyOperator(task_id="d")

    e = ShortCircuitOperator(
        task_id="e",
        params={},
        trigger_rule="none_failed",

コード例 #3

ファイルを表示

         echo There are $NUM_TO_PROCESS files to process.
         test $NUM_TO_PROCESS -gt 0
         '''),
     params={'product': product},
 )
 # Thanks https://stackoverflow.com/questions/48580341/how-to-add-manual-tasks-in-an-apache-airflow-dag
 manual_sign_off = PythonOperator(
     task_id=f"manual_sign_off_{product}",
     python_callable=task_to_fail,
     retries=1,
     retry_delay=TIMEOUT,
 )
 manual_sign_off.doc_md = dedent("""
         ## Instructions
         Perform some manual checks that the number of COGs to be generated seems to be about right.
         
         You can also do spot checks that files don't already exist in S3.
         
         Once you're happy, mark this job as **Success** for the DAG to continue running.
     """)
 submit_task_id = f'submit_cog_convert_job_{product}'
 submit_bulk_cog_convert = SSHOperator(
     task_id=submit_task_id,
     command=dedent(COMMON + """
         cd {{work_dir}}
         mkdir out
         
         qsub <<EOF
         #!/bin/bash
         #PBS -l wd,walltime=5:00:00,mem=190GB,ncpus=48,jobfs=1GB
         #PBS -P {{params.project}}
         #PBS -q {{params.queue}}

コード例 #4

ファイルを表示

ファイル: data_clean.py プロジェクト: jr4fs/query-data-preprocessing

    df.to_sql(f'clean_{tablename}', con, if_exists='replace', index=False)


for table in tables:
    clean_data = PythonOperator(
        task_id=f'clean_data_{table}',
        python_callable=clean_data_df,
        op_kwargs={'tablename': table},
        dag=dag,
    )
    load_data >> clean_data

# [START documentation]
dag.doc_md = __doc__

load_data.doc_md = """\
#### Load Data 
This task loads data from the csv files in the data directory (set as 
an environment variable DATA_DIR) into the database Airflow creates.
"""

read_data.doc_md = """\
#### Read Data 
This task does nothing. It demonstrates how to use the SQLite operator.
"""

clean_data.doc_md = """\
#### Clean Data 
This task removes a column with pandas. It demonstrates how to alter data 
and write it back into the same table.
"""

コード例 #5

ファイルを表示

ファイル: arxiv_redshift_dag.py プロジェクト: jbmadsen/ArXiv-Metadata-ETL

re_parse_authors_data = PythonOperator(
    task_id='re_parse_authors',
    dag=dag,
    provide_context=True,
    python_callable=helpers.load_authors,
    op_kwargs={
        'aws_credentials_id': 'aws_credentials',
        'redshift_connection_id': 'redshift',
        's3_credentials_id': 's3_credentials',
        'region': 'us-east-1',
        'bucket': 'arxiv-etl',
        'file_name': 'staging/authors/authors-parsed.json'
    },
)
re_parse_authors_data.doc_md = """
# Parses data from S3 locally and re-formats it to easily work with Redshift COPY, then saves it back to S3
"""

stage_authors_to_redshift = StageFromS3ToRedshiftOperator(
    task_id='stage_authors',
    dag=dag,
    provide_context=True,
    table="staging.authors",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="arxiv-etl",
    s3_key="staging/authors/authors_parsed.csv",
    region="us-east-1",
    file_type="csv")
stage_authors_to_redshift.doc_md = """

コード例 #6

ファイルを表示

    dag=math_dag
)

t2 = PythonOperator(
    task_id="subtraction_task",
    python_callable=sub_nos,
    depends_on_past=False,
    retries=3,
    dag=math_dag
)

square_task = PythonOperator(
    task_id="square_task",
    python_callable=square_no,
    depends_on_past=True,
    retries=3,
    dag=math_dag
)

math_dag.doc_md = __doc__

t1.doc_md = """\
#### Addition Task Documentation
A simple task to add two numbers
![miztiik-success-green](https://img.shields.io/badge/Miztiik:Automation:Airflow:Level-300-blue)
"""

# Configure Task Dependencies
t1 >> t2
t1 >> square_task

コード例 #7

ファイルを表示

ファイル: house_prices.py プロジェクト: jsmithdataanalytics/house_price_tracker

    task_id='get_listings',
    python_callable=get_listings,
    dag=dag,
)

t2 = PythonOperator(
    task_id='send_email',
    provide_context=True,
    python_callable=send_email,
    dag=dag,
)

# noinspection PyStatementEffect
t1 >> t2

# Documentation
dag.doc_md = f"""
#### DAG Documentation
{dag.description}
"""

t1.doc_md = """
#### Task Documentation
Retrieves and stores Zoopla data
"""

t2.doc_md = """
#### Task Documentation
Sends email notification when new data is available
"""

コード例 #8

ファイルを表示

ファイル: covid_daily.py プロジェクト: abhijeetanandshah6997/airflow_covid19_daily_india

    dag=dag,
)

t2 = PythonOperator(
    task_id='Fetch_Data_and_Create_CSV',
    python_callable=task2,
    retries=3,
    dag=dag,
)

t3 = PythonOperator(
    task_id='Upload_Big_Query',
    python_callable=task3,
    retries=3,
    dag=dag,
)

dag.doc_md = __doc__

t1.doc_md = """\
            #### Task 1 : Install Requirements.
            Install requirements present in requirements.txt
            """

t2.doc_md = """\
            #### Task 2 : Fetch data from API & create a local csv.
            The API provides the change in Covid-19 Cases state-wise everyday
            """

t1 >> t2 >> t3

コード例 #9

ファイルを表示

        file.columns = file.columns.map(lambda x: x.replace('(', '').replace(
            ')', ''))  # удаляем символы скобок из имен колонок
        engine = PostgresHook(
            postgres_conn_id='postgres_local').get_sqlalchemy_engine()
        file.to_sql('airflow_stg_mining_po',
                    con=engine,
                    index=True,
                    if_exists='replace',
                    schema='beeline')

    # читаем файл и записываем во временную таблицу целевой БД
    process_file = PythonOperator(task_id='process_file',
                                  provide_context=True,
                                  python_callable=process_xls_file)

    process_file.doc_md = """\
        #### Task Documentation
        You can document your task using the attributes `doc_md` (markdown),
        `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets
        rendered in the UI's Task Instance Details page.
        ![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png)
        """

    # обновляем целевую таблицу
    update_target_table = PostgresOperator(task_id='update_target_table',
                                           sql='''
            insert into beeline.airflow_mining_po 
                select * from beeline.airflow_stg_mining_po
            on conflict do nothing;
        ''',
                                           postgres_conn_id='postgres_local',

コード例 #10

ファイルを表示

                           python_callable=createlog,
                           dag=dag)

ExtracttoDF = PythonOperator(task_id='sqlite_to_df',
                             python_callable=getdf,
                             dag=dag)

LoadTask = PythonOperator(task_id='Destinationdb',
                          python_callable=createdb,
                          dag=dag)

UpsertTask = PythonOperator(task_id='Destinationdb_Upsert',
                            python_callable=updatedb,
                            dag=dag)

dag.doc_md = __doc__

ExtracttoDF.doc_md = """\
Extract data from source DB
"""

templated_command = """
{% for i in range(5) %}
    echo "{{ ds }}"
    echo "{{ macros.ds_add(ds, 7)}}"
    echo "{{ params.my_param }}"
{% endfor %}
"""

[ExtracttoDF, CreateLog] >> LoadTask >> UpsertTask

コード例 #11

ファイルを表示

    """
    message_task(SQL_CONN_STRING, KEY_WORDS, FREQ)


with DAG(
    'create_postgres_db',
    description="Creates Postgres DB for tweets if it doesn't already exist",
    schedule_interval="@once",
    default_args=default_args
) as create_pgdb_dag:

    create_db = PythonOperator(
        task_id='create_db', python_callable=create_postgres_db,
        dag=create_pgdb_dag
    )
    create_db.doc_md = """\
    #### CREATE PGDB
    Creates a database in Postgres for the transformed tweet data, \
    if one does not already exist
    """

    create_db


with DAG(
    'tweetl_dag',
    description='Performs ETL round and triggers slackbot',
    schedule_interval=timedelta(seconds=FREQ),
    catchup=False,
    default_args=default_args
) as tweetl_dag: