Ejemplo n.º 1
0
    def test_execute(self, mock_hook):
        operator = GoogleCloudStorageDownloadOperator(task_id=TASK_ID,
                                                      bucket=TEST_BUCKET,
                                                      object=TEST_OBJECT,
                                                      filename=LOCAL_FILE_PATH)

        operator.execute(None)
        mock_hook.return_value.download.assert_called_once_with(
            bucket=TEST_BUCKET, object=TEST_OBJECT, filename=LOCAL_FILE_PATH)
    def test_execute(self, mock_hook):
        operator = GoogleCloudStorageDownloadOperator(task_id=TASK_ID,
                                                      bucket=TEST_BUCKET,
                                                      object=TEST_OBJECT,
                                                      filename=LOCAL_FILE_PATH)

        operator.execute(None)
        mock_hook.return_value.download.assert_called_once_with(
            bucket=TEST_BUCKET, object=TEST_OBJECT, filename=LOCAL_FILE_PATH
        )
Ejemplo n.º 3
0
def shakespeare_subdag(parent_dag, subdag_task_id, play_name):
    with DAG('{}.{}'.format(parent_dag.dag_id, subdag_task_id),
             schedule_interval=parent_dag.schedule_interval,
             start_date=parent_dag.start_date,
             default_args=parent_dag.default_args) as subdag:
        download = GoogleCloudStorageDownloadOperator(
            task_id='download',
            bucket='smenyc2018-subdag-data',
            object='{}.enc'.format(play_name),
            filename='/home/airflow/gcs/data/{}.enc'.format(play_name))
        decrypt = BashOperator(
            task_id='decrypt',
            bash_command=
            'openssl enc -in /home/airflow/gcs/data/{play_name}.enc '
            '-out /home/airflow/gcs/data/{play_name}.txt -d -aes-128-cbc -k "hello-nyc"'
            .format(play_name=play_name))
        wordcount = BashOperator(
            task_id='wordcount',
            bash_command=
            'wc -w /home/airflow/gcs/data/{play_name}.txt | tee /home/airflow/gcs/data/{play_name}_wordcount.txt'
            .format(play_name=play_name))
        download >> decrypt >> wordcount
    return subdag
Ejemplo n.º 4
0
    return 2


dag = DAG('ejercicio_7',
          description='Ejercicio 7',
          schedule_interval='0 1 * * *',
          start_date=datetime(2020, 1, 1),
          catchup=False)

dummy_operator = DummyOperator(task_id='dummy_task', retries=3, dag=dag)

download_operator = GoogleCloudStorageDownloadOperator(
    task_id='downloader',
    bucket='fictizia',
    object='breast-cancer-wisconsin.data',
    google_cloud_storage_conn_id='google_cloud_default',
    filename=GLOBAL_PATH,
    dag=dag)

load_operator = FileToGoogleCloudStorageOperator(
    task_id='uploader',
    bucket='fictizia',
    src=GLOBAL_OUTPUT_PATH,
    dst='my_file.json',
    google_cloud_storage_conn_id='google_cloud_default',
    dag=dag)

producer_operator = PythonOperator(task_id='producer',
                                   python_callable=produce_data,
                                   dag=dag)
                schedule_interval=None,
                default_args=DEFAULT_ARGS) as dag:

    DATAFLOW_EXECUTION = DataFlowJavaOperator(
        task_id='wordcount-run',
        jar=DATAFLOW_JAR_LOCATION,
        options={
            'autoscalingAlgorithm': 'THROUGHPUT_BASED',
            'maxNumWorkers': '3',
            'inputFile': f'{INPUT_BUCKET}/input.txt',
            'output': f'{OUTPUT_BUCKET}/{OUTPUT_PREFIX}'
        })

    DOWNLOAD_EXPECTED = GoogleCloudStorageDownloadOperator(
        task_id='download_ref_string',
        bucket=REF_BUCKET,
        object='ref.txt',
        store_to_xcom_key='ref_str',
    )

    DOWNLOAD_RESULT_ONE = GoogleCloudStorageDownloadOperator(
        task_id=DOWNLOAD_TASK_PREFIX + '_1',
        bucket=OUTPUT_BUCKET_NAME,
        object=OUTPUT_PREFIX + '-00000-of-00003',
        store_to_xcom_key='res_str_1',
    )

    DOWNLOAD_RESULT_TWO = GoogleCloudStorageDownloadOperator(
        task_id=DOWNLOAD_TASK_PREFIX + '_2',
        bucket=OUTPUT_BUCKET_NAME,
        object=OUTPUT_PREFIX + '-00001-of-00003',
        store_to_xcom_key='res_str_2',
Ejemplo n.º 6
0
    )
    # [END howto_operator_gcs_bucket_create_acl_entry_task]

    # [START howto_operator_gcs_object_create_acl_entry_task]
    gcs_object_create_acl_entry_task = GoogleCloudStorageObjectCreateAclEntryOperator(
        bucket=BUCKET_1,
        object_name=BUCKET_FILE_LOCATION,
        entity=GCS_ACL_ENTITY,
        role=GCS_ACL_OBJECT_ROLE,
        task_id="gcs_object_create_acl_entry_task",
    )
    # [END howto_operator_gcs_object_create_acl_entry_task]

    download_file = GoogleCloudStorageDownloadOperator(
        task_id="download_file",
        object_name=BUCKET_FILE_LOCATION,
        bucket=BUCKET_1,
        filename=PATH_TO_SAVED_FILE,
    )

    copy_file = GoogleCloudStorageToGoogleCloudStorageOperator(
        task_id="copy_file",
        source_bucket=BUCKET_1,
        source_object=BUCKET_FILE_LOCATION,
        destination_bucket=BUCKET_2,
        destination_object=BUCKET_FILE_LOCATION,
    )

    delete_files = GoogleCloudStorageDeleteOperator(task_id="delete_files",
                                                    bucket_name=BUCKET_1,
                                                    prefix="")
Ejemplo n.º 7
0

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': YESTERDAY,
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

with DAG('subdag_example_before', default_args=default_args, catchup=False) as dag:
  start = DummyOperator(task_id='start')
  download_romeo = GoogleCloudStorageDownloadOperator(task_id='download_romeo',
                                                      bucket='smenyc2018-subdag-data',
                                                      object='romeo.enc',
                                                      filename='/home/airflow/gcs/data/romeo.enc')
  download_othello = GoogleCloudStorageDownloadOperator(task_id='download_othello',
                                                      bucket='smenyc2018-subdag-data',
                                                      object='othello.enc',
                                                      filename='/home/airflow/gcs/data/othello.enc')
  download_hamlet = GoogleCloudStorageDownloadOperator(task_id='download_hamlet',
                                                      bucket='smenyc2018-subdag-data',
                                                      object='hamlet.enc',
                                                      filename='/home/airflow/gcs/data/hamlet.enc')

  decrypt_romeo = BashOperator(task_id='decrypt_romeo',
                               bash_command='openssl enc -in /home/airflow/gcs/data/romeo.enc -out /home/airflow/gcs/data/romeo.txt -d -aes-128-cbc -k "hello-nyc"')
  decrypt_othello = BashOperator(task_id='decrypt_othello',
                                 bash_command='openssl enc -in /home/airflow/gcs/data/othello.enc -out /home/airflow/gcs/data/othello.txt -d -aes-128-cbc -k "hello-nyc"')
  decrypt_hamlet = BashOperator(task_id='decrypt_hamlet',
Ejemplo n.º 8
0
from datetime import timedelta, datetime
from airflow import models
from airflow.contrib.operators.gcs_download_operator import GoogleCloudStorageDownloadOperator
from airflow.operators.python_operator import PythonOperator

from transformations import transform_account

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2019, 11, 1),
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=30),
}

with models.DAG("gcs_transform",
                default_args=default_args,
                schedule_interval=None) as dag:
    download_file = GoogleCloudStorageDownloadOperator(
        task_id="download_file",
        bucket='tpc-di_data',
        object='Batch2/Account.txt',
        filename='account_download.txt',
        google_cloud_storage_conn_id='google_cloud_default')

    transform_file = PythonOperator(task_id='run_script',
                                    python_callable=transform_account.main)

    download_file >> transform_file
Ejemplo n.º 9
0
                                           jar=dataflow_jar_location,
                                           start_date=yesterday,
                                           options={
                                               'autoscalingAlgorithm':
                                               'THROUGHPUT_BASED',
                                               'maxNumWorkers':
                                               '3',
                                               'inputFile':
                                               input_bucket + '/input.txt',
                                               'output':
                                               output_bucket + '/' +
                                               output_prefix
                                           })
 download_expected = GoogleCloudStorageDownloadOperator(
     task_id='download_ref_string',
     bucket=ref_bucket,
     object='ref.txt',
     store_to_xcom_key='ref_str',
     start_date=yesterday)
 download_result_one = GoogleCloudStorageDownloadOperator(
     task_id=download_task_prefix + '_1',
     bucket=output_bucket_name,
     object=output_prefix + '-00000-of-00003',
     store_to_xcom_key='res_str_1',
     start_date=yesterday)
 download_result_two = GoogleCloudStorageDownloadOperator(
     task_id=download_task_prefix + '_2',
     bucket=output_bucket_name,
     object=output_prefix + '-00001-of-00003',
     store_to_xcom_key='res_str_2',
     start_date=yesterday)
 download_result_three = GoogleCloudStorageDownloadOperator(