def test_execute(self, mock_hook): operator = GoogleCloudStorageDownloadOperator(task_id=TASK_ID, bucket=TEST_BUCKET, object=TEST_OBJECT, filename=LOCAL_FILE_PATH) operator.execute(None) mock_hook.return_value.download.assert_called_once_with( bucket=TEST_BUCKET, object=TEST_OBJECT, filename=LOCAL_FILE_PATH)
def test_execute(self, mock_hook): operator = GoogleCloudStorageDownloadOperator(task_id=TASK_ID, bucket=TEST_BUCKET, object=TEST_OBJECT, filename=LOCAL_FILE_PATH) operator.execute(None) mock_hook.return_value.download.assert_called_once_with( bucket=TEST_BUCKET, object=TEST_OBJECT, filename=LOCAL_FILE_PATH )
def shakespeare_subdag(parent_dag, subdag_task_id, play_name): with DAG('{}.{}'.format(parent_dag.dag_id, subdag_task_id), schedule_interval=parent_dag.schedule_interval, start_date=parent_dag.start_date, default_args=parent_dag.default_args) as subdag: download = GoogleCloudStorageDownloadOperator( task_id='download', bucket='smenyc2018-subdag-data', object='{}.enc'.format(play_name), filename='/home/airflow/gcs/data/{}.enc'.format(play_name)) decrypt = BashOperator( task_id='decrypt', bash_command= 'openssl enc -in /home/airflow/gcs/data/{play_name}.enc ' '-out /home/airflow/gcs/data/{play_name}.txt -d -aes-128-cbc -k "hello-nyc"' .format(play_name=play_name)) wordcount = BashOperator( task_id='wordcount', bash_command= 'wc -w /home/airflow/gcs/data/{play_name}.txt | tee /home/airflow/gcs/data/{play_name}_wordcount.txt' .format(play_name=play_name)) download >> decrypt >> wordcount return subdag
return 2 dag = DAG('ejercicio_7', description='Ejercicio 7', schedule_interval='0 1 * * *', start_date=datetime(2020, 1, 1), catchup=False) dummy_operator = DummyOperator(task_id='dummy_task', retries=3, dag=dag) download_operator = GoogleCloudStorageDownloadOperator( task_id='downloader', bucket='fictizia', object='breast-cancer-wisconsin.data', google_cloud_storage_conn_id='google_cloud_default', filename=GLOBAL_PATH, dag=dag) load_operator = FileToGoogleCloudStorageOperator( task_id='uploader', bucket='fictizia', src=GLOBAL_OUTPUT_PATH, dst='my_file.json', google_cloud_storage_conn_id='google_cloud_default', dag=dag) producer_operator = PythonOperator(task_id='producer', python_callable=produce_data, dag=dag)
schedule_interval=None, default_args=DEFAULT_ARGS) as dag: DATAFLOW_EXECUTION = DataFlowJavaOperator( task_id='wordcount-run', jar=DATAFLOW_JAR_LOCATION, options={ 'autoscalingAlgorithm': 'THROUGHPUT_BASED', 'maxNumWorkers': '3', 'inputFile': f'{INPUT_BUCKET}/input.txt', 'output': f'{OUTPUT_BUCKET}/{OUTPUT_PREFIX}' }) DOWNLOAD_EXPECTED = GoogleCloudStorageDownloadOperator( task_id='download_ref_string', bucket=REF_BUCKET, object='ref.txt', store_to_xcom_key='ref_str', ) DOWNLOAD_RESULT_ONE = GoogleCloudStorageDownloadOperator( task_id=DOWNLOAD_TASK_PREFIX + '_1', bucket=OUTPUT_BUCKET_NAME, object=OUTPUT_PREFIX + '-00000-of-00003', store_to_xcom_key='res_str_1', ) DOWNLOAD_RESULT_TWO = GoogleCloudStorageDownloadOperator( task_id=DOWNLOAD_TASK_PREFIX + '_2', bucket=OUTPUT_BUCKET_NAME, object=OUTPUT_PREFIX + '-00001-of-00003', store_to_xcom_key='res_str_2',
) # [END howto_operator_gcs_bucket_create_acl_entry_task] # [START howto_operator_gcs_object_create_acl_entry_task] gcs_object_create_acl_entry_task = GoogleCloudStorageObjectCreateAclEntryOperator( bucket=BUCKET_1, object_name=BUCKET_FILE_LOCATION, entity=GCS_ACL_ENTITY, role=GCS_ACL_OBJECT_ROLE, task_id="gcs_object_create_acl_entry_task", ) # [END howto_operator_gcs_object_create_acl_entry_task] download_file = GoogleCloudStorageDownloadOperator( task_id="download_file", object_name=BUCKET_FILE_LOCATION, bucket=BUCKET_1, filename=PATH_TO_SAVED_FILE, ) copy_file = GoogleCloudStorageToGoogleCloudStorageOperator( task_id="copy_file", source_bucket=BUCKET_1, source_object=BUCKET_FILE_LOCATION, destination_bucket=BUCKET_2, destination_object=BUCKET_FILE_LOCATION, ) delete_files = GoogleCloudStorageDeleteOperator(task_id="delete_files", bucket_name=BUCKET_1, prefix="")
default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': YESTERDAY, 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), } with DAG('subdag_example_before', default_args=default_args, catchup=False) as dag: start = DummyOperator(task_id='start') download_romeo = GoogleCloudStorageDownloadOperator(task_id='download_romeo', bucket='smenyc2018-subdag-data', object='romeo.enc', filename='/home/airflow/gcs/data/romeo.enc') download_othello = GoogleCloudStorageDownloadOperator(task_id='download_othello', bucket='smenyc2018-subdag-data', object='othello.enc', filename='/home/airflow/gcs/data/othello.enc') download_hamlet = GoogleCloudStorageDownloadOperator(task_id='download_hamlet', bucket='smenyc2018-subdag-data', object='hamlet.enc', filename='/home/airflow/gcs/data/hamlet.enc') decrypt_romeo = BashOperator(task_id='decrypt_romeo', bash_command='openssl enc -in /home/airflow/gcs/data/romeo.enc -out /home/airflow/gcs/data/romeo.txt -d -aes-128-cbc -k "hello-nyc"') decrypt_othello = BashOperator(task_id='decrypt_othello', bash_command='openssl enc -in /home/airflow/gcs/data/othello.enc -out /home/airflow/gcs/data/othello.txt -d -aes-128-cbc -k "hello-nyc"') decrypt_hamlet = BashOperator(task_id='decrypt_hamlet',
from datetime import timedelta, datetime from airflow import models from airflow.contrib.operators.gcs_download_operator import GoogleCloudStorageDownloadOperator from airflow.operators.python_operator import PythonOperator from transformations import transform_account default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2019, 11, 1), 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=30), } with models.DAG("gcs_transform", default_args=default_args, schedule_interval=None) as dag: download_file = GoogleCloudStorageDownloadOperator( task_id="download_file", bucket='tpc-di_data', object='Batch2/Account.txt', filename='account_download.txt', google_cloud_storage_conn_id='google_cloud_default') transform_file = PythonOperator(task_id='run_script', python_callable=transform_account.main) download_file >> transform_file
jar=dataflow_jar_location, start_date=yesterday, options={ 'autoscalingAlgorithm': 'THROUGHPUT_BASED', 'maxNumWorkers': '3', 'inputFile': input_bucket + '/input.txt', 'output': output_bucket + '/' + output_prefix }) download_expected = GoogleCloudStorageDownloadOperator( task_id='download_ref_string', bucket=ref_bucket, object='ref.txt', store_to_xcom_key='ref_str', start_date=yesterday) download_result_one = GoogleCloudStorageDownloadOperator( task_id=download_task_prefix + '_1', bucket=output_bucket_name, object=output_prefix + '-00000-of-00003', store_to_xcom_key='res_str_1', start_date=yesterday) download_result_two = GoogleCloudStorageDownloadOperator( task_id=download_task_prefix + '_2', bucket=output_bucket_name, object=output_prefix + '-00001-of-00003', store_to_xcom_key='res_str_2', start_date=yesterday) download_result_three = GoogleCloudStorageDownloadOperator(