# [START upload_sheet_to_gcs] upload_sheet_to_gcs = GoogleSheetsToGCSOperator( task_id="upload_sheet_to_gcs", destination_bucket=GCS_BUCKET, spreadsheet_id=SPREADSHEET_ID, ) # [END upload_sheet_to_gcs] # [START create_spreadsheet] create_spreadsheet = GoogleSheetsCreateSpreadsheetOperator( task_id="create_spreadsheet", spreadsheet=SPREADSHEET) # [END create_spreadsheet] # [START print_spreadsheet_url] print_spreadsheet_url = BashOperator( task_id="print_spreadsheet_url", bash_command=f"echo {create_spreadsheet.output['spreadsheet_url']}", ) # [END print_spreadsheet_url] # [START upload_gcs_to_sheet] upload_gcs_to_sheet = GCSToGoogleSheetsOperator( task_id="upload_gcs_to_sheet", bucket_name=GCS_BUCKET, object_name="{{ task_instance.xcom_pull('upload_sheet_to_gcs')[0] }}", spreadsheet_id=NEW_SPREADSHEET_ID, ) # [END upload_gcs_to_sheet] create_spreadsheet >> print_spreadsheet_url upload_sheet_to_gcs >> upload_gcs_to_sheet
"retry_delay": timedelta(minutes=5), } def _wait_for_file(): return os.path.exists("/opt/airflow/data/wait.txt") with DAG( "08_sensor", default_args=default_args, description="A simple tutorial DAG", schedule_interval=timedelta(days=1), ) as dag: t1 = BashOperator( task_id="touch_file_1", bash_command="touch /opt/airflow/data/1.txt", ) wait = PythonSensor( task_id="wait_for_file", python_callable=_wait_for_file, timeout=6000, poke_interval=10, retries=100, mode="poke", ) t3 = BashOperator( task_id="touch_file_3", depends_on_past=True, bash_command="touch /opt/airflow/data/2.txt",
schedule_interval='@once', start_date=START_DATE, catchup=False, tags=["example"], ) as build_dag: # [START howto_operator_create_build_from_storage] create_build_from_storage = CloudBuildCreateBuildOperator( task_id="create_build_from_storage", project_id=GCP_PROJECT_ID, build=create_build_from_storage_body) # [END howto_operator_create_build_from_storage] # [START howto_operator_create_build_from_storage_result] create_build_from_storage_result = BashOperator( bash_command=f"echo { create_build_from_storage.output['results'] }", task_id="create_build_from_storage_result", ) # [END howto_operator_create_build_from_storage_result] # [START howto_operator_create_build_from_repo] create_build_from_repo = CloudBuildCreateBuildOperator( task_id="create_build_from_repo", project_id=GCP_PROJECT_ID, build=create_build_from_repo_body) # [END howto_operator_create_build_from_repo] # [START howto_operator_create_build_from_repo_result] create_build_from_repo_result = BashOperator( bash_command=f"echo { create_build_from_repo.output['results'] }", task_id="create_build_from_repo_result", )
dag = DAG(dag_id='example_bash_operator', default_args=args, schedule_interval='0 0 * * *', start_date=days_ago(2), dagrun_timeout=timedelta(minutes=60), tags=['example']) run_this_last = DummyOperator( task_id='run_this_last', dag=dag, ) # [START howto_operator_bash] run_this = BashOperator( task_id='run_after_loop', bash_command='echo 1', dag=dag, ) # [END howto_operator_bash] run_this >> run_this_last for i in range(3): task = BashOperator( task_id='runme_' + str(i), bash_command='echo "{{ task_instance_key_str }}" && sleep 1', dag=dag, ) task >> run_this # [START howto_operator_bash_template]
) as dag: # [START howto_operator_video_intelligence_detect_labels] detect_video_label = CloudVideoIntelligenceDetectVideoLabelsOperator( input_uri=INPUT_URI, output_uri=None, video_context=None, timeout=5, task_id="detect_video_label", ) # [END howto_operator_video_intelligence_detect_labels] # [START howto_operator_video_intelligence_detect_labels_result] detect_video_label_result = BashOperator( bash_command="echo {{ task_instance.xcom_pull('detect_video_label')" "['annotationResults'][0]['shotLabelAnnotations'][0]['entity']}}", task_id="detect_video_label_result", ) # [END howto_operator_video_intelligence_detect_labels_result] # [START howto_operator_video_intelligence_detect_explicit_content] detect_video_explicit_content = CloudVideoIntelligenceDetectVideoExplicitContentOperator( input_uri=INPUT_URI, output_uri=None, video_context=None, retry=Retry(maximum=10.0), timeout=5, task_id="detect_video_explicit_content", ) # [END howto_operator_video_intelligence_detect_explicit_content]
@task def print_value(value, ts=None): """Dummy function""" log.info("The knights of Ni say: %s (at %s)", value, ts) with DAG( dag_id='example_xcom_args', start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, schedule_interval=None, tags=['example'], ) as dag: print_value(generate_value()) with DAG( "example_xcom_args_with_operators", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, schedule_interval=None, tags=['example'], ) as dag2: bash_op1 = BashOperator(task_id="c", bash_command="echo c") bash_op2 = BashOperator(task_id="d", bash_command="echo c") xcom_args_a = print_value("first!") xcom_args_b = print_value("second!") bash_op1 >> xcom_args_a >> xcom_args_b >> bash_op2
schedule_interval=None, tags=['example'], ) as dag: create_bucket1 = GCSCreateBucketOperator(task_id="create_bucket1", bucket_name=BUCKET_1, project_id=PROJECT_ID) create_bucket2 = GCSCreateBucketOperator(task_id="create_bucket2", bucket_name=BUCKET_2, project_id=PROJECT_ID) list_buckets = GCSListObjectsOperator(task_id="list_buckets", bucket=BUCKET_1) list_buckets_result = BashOperator( task_id="list_buckets_result", bash_command="echo \"{{ task_instance.xcom_pull('list_buckets') }}\"", ) upload_file = LocalFilesystemToGCSOperator( task_id="upload_file", src=PATH_TO_UPLOAD_FILE, dst=BUCKET_FILE_LOCATION, bucket=BUCKET_1, ) transform_file = GCSFileTransformOperator( task_id="transform_file", source_bucket=BUCKET_1, source_object=BUCKET_FILE_LOCATION, transform_script=["python", PATH_TO_TRANSFORM_SCRIPT]) # [START howto_operator_gcs_bucket_create_acl_entry_task]
with DAG( "example_passing_params_via_test_command", schedule_interval='*/1 * * * *', start_date=days_ago(1), dagrun_timeout=timedelta(minutes=4), tags=['example'], ) as dag: my_templated_command = dedent(""" echo " 'foo was passed in via Airflow CLI Test command with value {{ params.foo }} " echo " 'miff was passed in via BashOperator with value {{ params.miff }} " """) run_this = PythonOperator( task_id='run_this', python_callable=my_py_command, params={"miff": "agg"}, ) also_run_this = BashOperator( task_id='also_run_this', bash_command=my_templated_command, params={"miff": "agg"}, ) env_var_test_task = PythonOperator(task_id='env_var_test_task', python_callable=print_env_vars) run_this >> also_run_this
date = str(ds) prices = prices_json['bpi'][date] df = pd.DataFrame(views_json['items']) df[['bitcoin_price_index']] = prices # convert DF to CSV and store in /tmp/home/bitcoin df.to_csv(f"/tmp/home/bitcoin/viewsAndFiles_{ds_nodash}.csv", index=False) with DAG(dag_id="bitcoin-views-and-price-pipeline", start_date=airflow.utils.dates.days_ago(5), schedule_interval="@daily") as dag: fetch_bitcoin_views = BashOperator( task_id="fetch_daily_bitcoin_views", bash_command= "curl -o /tmp/views_{{ ds }}.json -L 'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/all-agents/Bitcoin/daily/{{ ds_nodash }}00/{{ ds_nodash }}00'" ) fetch_bitcoin_prices = BashOperator( task_id="fetch_daily_bitcoin_prices", bash_command= "curl -o /tmp/prices_{{ ds }}.json -L 'https://api.coindesk.com/v1/bpi/historical/close.json?start={{ ds }}&end={{ ds }}'" ) save_to_csv = PythonOperator(task_id="save_to_csv", python_callable=_save_to_csv, provide_context=True) [fetch_bitcoin_views, fetch_bitcoin_prices] >> save_to_csv
"example_gcp_cloud_build", default_args=dict(start_date=dates.days_ago(1)), schedule_interval='@once', tags=['example'], ) as dag: # [START howto_operator_create_build_from_storage] create_build_from_storage = CloudBuildCreateBuildOperator( task_id="create_build_from_storage", project_id=GCP_PROJECT_ID, body=create_build_from_storage_body) # [END howto_operator_create_build_from_storage] # [START howto_operator_create_build_from_storage_result] create_build_from_storage_result = BashOperator( bash_command= "echo '{{ task_instance.xcom_pull('create_build_from_storage')['images'][0] }}'", task_id="create_build_from_storage_result", ) # [END howto_operator_create_build_from_storage_result] create_build_from_repo = CloudBuildCreateBuildOperator( task_id="create_build_from_repo", project_id=GCP_PROJECT_ID, body=create_build_from_repo_body) create_build_from_repo_result = BashOperator( bash_command= "echo '{{ task_instance.xcom_pull('create_build_from_repo')['images'][0] }}'", task_id="create_build_from_repo_result", )
from airflow.operators.bash import BashOperator from airflow.operators.python import PythonOperator dag = DAG( dag_id="09_no_catchup", schedule_interval="@daily", start_date=dt.datetime(year=2019, month=1, day=1), end_date=dt.datetime(year=2019, month=1, day=5), catchup=False, ) fetch_events = BashOperator( task_id="fetch_events", bash_command=("mkdir -p /data/events && " "curl -o /data/events/{{ds}}.json " "http://events_api:5000/events?" "start_date={{ds}}&" "end_date={{next_ds}}"), dag=dag, ) def _calculate_stats(**context): """Calculates event statistics.""" input_path = context["templates_dict"]["input_path"] output_path = context["templates_dict"]["output_path"] events = pd.read_json(input_path) stats = events.groupby(["date", "user"]).size().reset_index() Path(output_path).parent.mkdir(exist_ok=True)
) as dag: with open(dag.params["region_cfg"], 'r') as stream: regions = yaml.safe_load(stream) last_exec_date = dag.get_latest_execution_date() if last_exec_date is None: last_exec_date = datetime.datetime(year=1970, month=1, day=1) unique_id = str(round(last_exec_date.timestamp())) directory_output = WORKING_DIR + "/data/exports/whole-genome-clades/" + unique_id + "/" mk_dir_task = BashOperator( task_id='make_directory', bash_command='mkdir -p {{params.directory_output}}', params={"directory_output": directory_output}, dag=dag, ) clades = [ "B.1.2", "B.1.596", "B.1", "B.1.1.519", "B.1.243", "B.1.234", "B.1.526.1", "B.1.1", "B.1.526.2", "B.1.575", "R.1", "B.1.1.7", "B.1.429", "B.1.427", "B.1.351", "P.1", "B.1.526", "P.2", "B.1.525", "B.1.617", "B.1.617.1", "B.1.617.2" ] for clade in clades: params = {} params[
task_id='is_forex_currencies_file_available', fs_conn_id='forex_path', filepath='forex_currencies.csv', poke_interval=5, timeout=20 ) downloading_rates = PythonOperator( task_id='downloading_rates', python_callable=_download_rates ) saving_rates = BashOperator( task_id='saving_rates', bash_command=""" hdfs dfs -mkdir -p /forex && \ hdfs dfs -put -f $AIRFLOW_HOME/dags/files/forex_rates.json /forex """ ) creating_forex_rates_table = HiveOperator( task_id="creating_forex_rates_table", hive_cli_conn_id="hive_default", hql=""" CREATE EXTERNAL TABLE IF NOT EXISTS forex_rates( base STRING, last_update DATE, eur DOUBLE, usd DOUBLE, nzd DOUBLE, gbp DOUBLE,
location=location, ) # [START howto_operator_bigquery_get_data] get_data = BigQueryGetDataOperator( task_id="get_data", dataset_id=DATASET_NAME, table_id=TABLE_1, max_results=10, selected_fields="value,name", location=location, ) # [END howto_operator_bigquery_get_data] get_data_result = BashOperator( task_id="get_data_result", bash_command=f"echo {get_data.output}", ) # [START howto_operator_bigquery_check] check_count = BigQueryCheckOperator( task_id="check_count", sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}", use_legacy_sql=False, location=location, ) # [END howto_operator_bigquery_check] # [START howto_operator_bigquery_value_check] check_value = BigQueryValueCheckOperator( task_id="check_value", sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}",
with DAG('pipeline', start_date=datetime(2022, 3, 28), schedule_interval='@daily', default_args=default_args, catchup=False) as dag: is_csv_available = FileSensor(task_id='is_csv_available', fs_conn_id='path', filepath="owid-covid-data.csv", poke_interval=5, timeout=20) push_to_hive = BashOperator(task_id="push_to_hive", bash_command=""" hdfs dfs -mkdir -p /covidData && \ hdfs dfs -put -f $AIRFLOW_HOME/dags/files/owid-covid-data.csv /covidData """) create_hive_table = HiveOperator(task_id='create_hive_table', hive_cli_conn_id='hive_conn', hql=""" CREATE EXTERNAL TABLE IF NOT EXISTS cov_data( iso_code STRING, continent STRING, location STRING, `date` STRING, total_cases BIGINT, new_cases BIGINT, new_cases_smoothed FLOAT, total_deaths BIGINT,
"email": "*****@*****.**", "start_date": datetime(2021, 3, 12, 17), "depends_on_past": False, "retries": 1, "retry_delay": timedelta(minutes=5), "email_on_retry": False, "email_on_failure": IS_PROD } with DAG("gojek-assignment", default_args=DEFAULT_ARGS, schedule_interval="0 22 * * *", max_active_runs=1, catchup=True, dagrun_timeout=timedelta(minutes=90)) as dag: q1_job = BashOperator(task_id="gojek-assignment_q1", bash_command="python q1.py", dag=dag, execution_timeout=timedelta(minutes=60), retry_delay=timedelta(minutes=10), retries=2) q2_job = BashOperator(task_id="gojek-assignment_q2", bash_command="python q2.py", dag=dag, execution_timeout=timedelta(minutes=60), retry_delay=timedelta(minutes=10), retries=2) q1_job >> q2_job
) # [END howto_operator_gcp_pubsub_create_subscription] # [START howto_operator_gcp_pubsub_pull_message_with_sensor] subscription = "{{ task_instance.xcom_pull('subscribe_task') }}" pull_messages = PubSubPullSensor( task_id="pull_messages", ack_messages=True, project_id=GCP_PROJECT_ID, subscription=subscription, ) # [END howto_operator_gcp_pubsub_pull_message_with_sensor] # [START howto_operator_gcp_pubsub_pull_messages_result] pull_messages_result = BashOperator(task_id="pull_messages_result", bash_command=echo_cmd) # [END howto_operator_gcp_pubsub_pull_messages_result] # [START howto_operator_gcp_pubsub_publish] publish_task = PubSubPublishMessageOperator( task_id="publish_task", project_id=GCP_PROJECT_ID, topic=TOPIC_FOR_SENSOR_DAG, messages=[MESSAGE] * 10, ) # [END howto_operator_gcp_pubsub_publish] # [START howto_operator_gcp_pubsub_unsubscribe] unsubscribe_task = PubSubDeleteSubscriptionOperator( task_id="unsubscribe_task", project_id=GCP_PROJECT_ID,
from datetime import datetime from pathlib import Path import pandas as pd from airflow import DAG from airflow.operators.bash import BashOperator from airflow.operators.python import PythonOperator dag = DAG(dag_id="01_unscheduled", start_date=datetime(2019, 1, 1), schedule_interval=None) fetch_events = BashOperator( task_id="fetch_events", bash_command=("mkdir -p /data/events && " "curl -o /data/events.json http://events_api:5000/events"), dag=dag, ) def _calculate_stats(input_path, output_path): """Calculates event statistics.""" Path(output_path).parent.mkdir(exist_ok=True) events = pd.read_json(input_path) stats = events.groupby(["date", "user"]).size().reset_index() stats.to_csv(output_path, index=False)
'example_gcp_vision_annotate_image', default_args=default_args, schedule_interval=None ) as dag_annotate_image: # ############################## # # ### Annotate image example ### # # ############################## # # [START howto_operator_vision_annotate_image] annotate_image = CloudVisionImageAnnotateOperator( request=annotate_image_request, retry=Retry(maximum=10.0), timeout=5, task_id='annotate_image' ) # [END howto_operator_vision_annotate_image] # [START howto_operator_vision_annotate_image_result] annotate_image_result = BashOperator( bash_command="echo {{ task_instance.xcom_pull('annotate_image')" "['logoAnnotations'][0]['description'] }}", task_id='annotate_image_result', ) # [END howto_operator_vision_annotate_image_result] # [START howto_operator_vision_detect_text] detect_text = CloudVisionDetectTextOperator( image=DETECT_IMAGE, retry=Retry(maximum=10.0), timeout=5, task_id="detect_text", language_hints="en", web_detection_params={'include_geo_results': True}, ) # [END howto_operator_vision_detect_text]
schedule_interval='@once', tags=['example'], ) as dag: create_bucket1 = GCSCreateBucketOperator(task_id="create_bucket1", bucket_name=BUCKET_1, project_id=PROJECT_ID) create_bucket2 = GCSCreateBucketOperator(task_id="create_bucket2", bucket_name=BUCKET_2, project_id=PROJECT_ID) list_buckets = GCSListObjectsOperator(task_id="list_buckets", bucket=BUCKET_1) list_buckets_result = BashOperator( task_id="list_buckets_result", bash_command=f"echo {list_buckets.output}", ) upload_file = LocalFilesystemToGCSOperator( task_id="upload_file", src=PATH_TO_UPLOAD_FILE, dst=BUCKET_FILE_LOCATION, bucket=BUCKET_1, ) transform_file = GCSFileTransformOperator( task_id="transform_file", source_bucket=BUCKET_1, source_object=BUCKET_FILE_LOCATION, transform_script=["python", PATH_TO_TRANSFORM_SCRIPT], )
"name": MODEL_NAME, }, ) # [END howto_operator_gcp_mlengine_create_model] # [START howto_operator_gcp_mlengine_get_model] get_model = MLEngineGetModelOperator( task_id="get-model", project_id=PROJECT_ID, model_name=MODEL_NAME, ) # [END howto_operator_gcp_mlengine_get_model] # [START howto_operator_gcp_mlengine_print_model] get_model_result = BashOperator( bash_command="echo \"{{ task_instance.xcom_pull('get-model') }}\"", task_id="get-model-result", ) # [END howto_operator_gcp_mlengine_print_model] # [START howto_operator_gcp_mlengine_create_version1] create_version = MLEngineCreateVersionOperator( task_id="create-version", project_id=PROJECT_ID, model_name=MODEL_NAME, version={ "name": "v1", "description": "First-version", "deployment_uri": '{}/keras_export/'.format(JOB_DIR), "runtime_version": "1.15", "machineType": "mls1-c1-m2", "framework": "TENSORFLOW",
'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), # 'queue': 'bash_queue', # 'pool': 'backfill', # 'priority_weight': 10, # 'schedule_interval': timedelta(1), # 'end_date': datetime(2016, 1, 1), } dag = DAG('tutorial', default_args=default_args) # t1, t2 and t3 are examples of tasks created by instatiating operators t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag) t1.doc_md = """\ #### Task Documentation You can document your task using the attributes `doc_md` (markdown), `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets rendered in the UI's Task Details page. ![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png) """ dag.doc_md = __doc__ t2 = BashOperator(task_id='sleep', depends_on_past=False, bash_command='sleep 5', dag=dag)
labels={"foo": "bar"}, name="airflow-private-image-pod", is_delete_operator_pod=True, in_cluster=True, task_id="task-two", get_logs=True, ) # [END howto_operator_k8s_private_image] # [START howto_operator_k8s_write_xcom] write_xcom = KubernetesPodOperator( namespace='default', image='alpine', cmds=[ "sh", "-c", "mkdir -p /airflow/xcom/;echo '[1,2,3,4]' > /airflow/xcom/return.json" ], name="write-xcom", do_xcom_push=True, is_delete_operator_pod=True, in_cluster=True, task_id="write-xcom", get_logs=True, ) pod_task_xcom_result = BashOperator( bash_command="echo \"{{ task_instance.xcom_pull('write-xcom')[0] }}\"", task_id="pod_task_xcom_result", ) # [END howto_operator_k8s_write_xcom]
con = sqlite3.connect(data_dir / "commit.db") with con: commits.to_sql(valid_json.stem[:-6] + "_commits", con, if_exists="replace") files_changed.to_sql( valid_json.stem[:-6] + "_files_changed", con, if_exists="replace" ) git_log_etl = DAG("git_log_etl", default_args={"start_date": "2021-01-01"}) clear_data_dir = BashOperator( task_id="clear_data_dir", bash_command=""" cd {{ var.value.data_dir }} rm -rf *.json rm -rf *.csv rm -rf *.db """, dag=git_log_etl, ) clear_repos_dir = BashOperator( task_id="clear_repos_dir", bash_command=""" cd {{ var.value.repos_dir }} rm -rf * """, dag=git_log_etl, ) git_clone = BashOperator(
'processing_tasks.training_model_c' ]) print(accuracies) def _is_accurate(): return ('accurate') with DAG('xcom_dag', schedule_interval='@daily', default_args=default_args, catchup=False) as dag: downloading_data = BashOperator(task_id='downloading_data', bash_command='sleep 3', do_xcom_push=False) with TaskGroup('processing_tasks') as processing_tasks: training_model_a = PythonOperator(task_id='training_model_a', python_callable=_training_model) training_model_b = PythonOperator(task_id='training_model_b', python_callable=_training_model) training_model_c = PythonOperator(task_id='training_model_c', python_callable=_training_model) choose_model = PythonOperator(task_id='task_4', python_callable=_choose_best_model)
tags=['lambda','imageprocessing']) # arg = json.dumps(kwargs['dag_run'].conf # print(arg) face_detection = BranchPythonOperator( depends_on_past=False, task_id='face_detection', python_callable=face_detection, provide_context=True, dag=dag, ) # [START howto_operator_bash] photo_not_meet_requirement = BashOperator( task_id='photo_not_meet_requirement', bash_command='echo photo_not_meet_requirement', dag=dag, ) check_duplicate = BranchPythonOperator( task_id='check_duplicate', python_callable=check_duplicate, provide_context=True, dag=dag, ) duplicate_face = BashOperator( task_id='duplicate_face', bash_command='echo duplicate_face', dag=dag, ) failure = BashOperator( task_id='failure',
to_channels = ['toTwitter_A', 'toTwitter_B', 'toTwitter_C', 'toTwitter_D'] yesterday = date.today() - timedelta(days=1) dt = yesterday.strftime("%Y-%m-%d") # define where you want to store the tweets csv file in your local directory local_dir = "/tmp/" # define the location where you want to store in HDFS hdfs_dir = " /tmp/" for channel in to_channels: file_name = "to_" + channel + "_" + yesterday.strftime( "%Y-%m-%d") + ".csv" load_to_hdfs = BashOperator( task_id="put_" + channel + "_to_hdfs", bash_command="HADOOP_USER_NAME=hdfs hadoop fs -put -f " + local_dir + file_name + hdfs_dir + channel + "/", ) load_to_hdfs << analyze_tweets load_to_hive = HiveOperator( task_id="load_" + channel + "_to_hive", hql="LOAD DATA INPATH '" + hdfs_dir + channel + "/" + file_name + "' " "INTO TABLE " + channel + " " "PARTITION(dt='" + dt + "')", ) load_to_hive << load_to_hdfs load_to_hive >> hive_to_mysql
upload_sheet_to_gcs = GoogleSheetsToGCSOperator( task_id="upload_sheet_to_gcs", destination_bucket=GCS_BUCKET, spreadsheet_id=SPREADSHEET_ID, ) # [END upload_sheet_to_gcs] # [START create_spreadsheet] create_spreadsheet = GoogleSheetsCreateSpreadsheet( task_id="create_spreadsheet", spreadsheet=SPREADSHEET) # [END create_spreadsheet] # [START print_spreadsheet_url] print_spreadsheet_url = BashOperator( task_id="print_spreadsheet_url", bash_command= "echo {{ task_instance.xcom_pull('create_spreadsheet', key='spreadsheet_url') }}", ) # [END print_spreadsheet_url] # [START upload_gcs_to_sheet] upload_gcs_to_sheet = GCStoGoogleSheets( task_id="upload_gcs_to_sheet", bucket_name=GCS_BUCKET, object_name="{{ task_instance.xcom_pull('upload_sheet_to_gcs')[0] }}", spreadsheet_id=NEW_SPREADSHEET_ID, ) # [END upload_gcs_to_sheet] create_spreadsheet >> print_spreadsheet_url upload_sheet_to_gcs >> upload_gcs_to_sheet
firstname TEXT NOT NULL, lastname TEXT NOT NULL, country TEXT NOT NULL, username TEXT NOT NULL, password TEXT NOT NULL, email TEXT NOT NULL PRIMARY KEY ); ''') is_api_available = HttpSensor(task_id='is_api_available', http_conn_id='user_api', endpoint='api/') extracting_users = SimpleHttpOperator( task_id='extracting_user', http_conn_id='user_api', endpoint='api/', method='GET', response_filter=lambda response: json.loads(response.text), log_response=True) processing_user = PythonOperator(task_id='processing_user', python_callable=_processing_user) storing_user = BashOperator( task_id='storing_user', bash_command= 'echo -e ".separator ","\n.import /tmp/processed_user.csv users" | sqlite3 /home/airflow/airflow/airflow.db' ) creating_table >> is_api_available >> extracting_users >> processing_user >> storing_user
seven_days_ago = datetime.combine(datetime.today() - timedelta(7), datetime.min.time()) args = { 'owner': 'airflow', 'start_date': seven_days_ago, } dag = DAG(dag_id='example_bash_operator', default_args=args, schedule_interval=None) cmd = 'ls -l' run_this_last = DummyOperator(task_id='run_this_last', dag=dag) run_this = BashOperator(task_id='run_after_loop', bash_command='echo 1', dag=dag) run_this.set_downstream(run_this_last) for i in range(3): i = str(i) task = BashOperator( task_id='runme_' + i, bash_command='echo "{{ task_instance_key_str }}" && sleep 1', dag=dag) task.set_downstream(run_this) task = BashOperator( task_id='also_run_this', bash_command='echo "run_id={{ run_id }} | dag_run={{ dag_run }}"', dag=dag)