def test_execute(self, mock_hook): operator = GCSToLocalFilesystemOperator( task_id=TASK_ID, bucket=TEST_BUCKET, object_name=TEST_OBJECT, filename=LOCAL_FILE_PATH, ) operator.execute(None) mock_hook.return_value.download.assert_called_once_with( bucket_name=TEST_BUCKET, object_name=TEST_OBJECT, filename=LOCAL_FILE_PATH )
pipeline_options={ 'output': GCS_OUTPUT, }, job_class='org.apache.beam.examples.WordCount', dataflow_config={ "check_if_running": CheckJobRunning.IgnoreJob, "location": 'europe-west3', "poll_sleep": 10, }, ) # [END howto_operator_start_java_job_jar_on_gcs] # [START howto_operator_start_java_job_local_jar] jar_to_local = GCSToLocalFilesystemOperator( task_id="jar-to-local", bucket=GCS_JAR_BUCKET_NAME, object_name=GCS_JAR_OBJECT_NAME, filename="/tmp/dataflow-{{ ds_nodash }}.jar", ) start_java_job_local = BeamRunJavaPipelineOperator( task_id="start-java-job-local", jar="/tmp/dataflow-{{ ds_nodash }}.jar", pipeline_options={ 'output': GCS_OUTPUT, }, job_class='org.apache.beam.examples.WordCount', dataflow_config={ "check_if_running": CheckJobRunning.WaitForRun, "location": 'europe-west3', "poll_sleep": 10, },
# [END howto_operator_gcs_bucket_create_acl_entry_task] # [START howto_operator_gcs_object_create_acl_entry_task] gcs_object_create_acl_entry_task = GCSObjectCreateAclEntryOperator( bucket=BUCKET_1, object_name=BUCKET_FILE_LOCATION, entity=GCS_ACL_ENTITY, role=GCS_ACL_OBJECT_ROLE, task_id="gcs_object_create_acl_entry_task", ) # [END howto_operator_gcs_object_create_acl_entry_task] # [START howto_operator_gcs_download_file_task] download_file = GCSToLocalFilesystemOperator( task_id="download_file", object_name=BUCKET_FILE_LOCATION, bucket=BUCKET_1, filename=PATH_TO_SAVED_FILE, ) # [END howto_operator_gcs_download_file_task] copy_file = GCSToGCSOperator( task_id="copy_file", source_bucket=BUCKET_1, source_object=BUCKET_FILE_LOCATION, destination_bucket=BUCKET_2, destination_object=BUCKET_FILE_LOCATION, ) delete_files = GCSDeleteObjectsOperator( task_id="delete_files", bucket_name=BUCKET_1, objects=[BUCKET_FILE_LOCATION] )
'output': '/tmp/example_beam', }, "trigger_rule": "all_done", } with models.DAG( "example_beam_native_java_direct_runner", schedule_interval=None, # Override to match your needs start_date=days_ago(1), tags=['example'], ) as dag_native_java_direct_runner: # [START howto_operator_start_java_direct_runner_pipeline] jar_to_local_direct_runner = GCSToLocalFilesystemOperator( task_id="jar_to_local_direct_runner", bucket=GCS_JAR_DIRECT_RUNNER_BUCKET_NAME, object_name=GCS_JAR_DIRECT_RUNNER_OBJECT_NAME, filename="/tmp/beam_wordcount_direct_runner_{{ ds_nodash }}.jar", ) start_java_pipeline_direct_runner = BeamRunJavaPipelineOperator( task_id="start_java_pipeline_direct_runner", jar="/tmp/beam_wordcount_direct_runner_{{ ds_nodash }}.jar", pipeline_options={ 'output': '/tmp/start_java_pipeline_direct_runner', 'inputFile': GCS_INPUT, }, job_class='org.apache.beam.examples.WordCount', ) jar_to_local_direct_runner >> start_java_pipeline_direct_runner # [END howto_operator_start_java_direct_runner_pipeline]
# under the License. import os from airflow import models from airflow.providers.google.cloud.transfers.gcs_to_local import GCSToLocalFilesystemOperator from airflow.utils.dates import days_ago PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-id") BUCKET = os.environ.get("GCP_GCS_BUCKET", "test-gcs-example-bucket") PATH_TO_REMOTE_FILE = os.environ.get("GCP_GCS_PATH_TO_UPLOAD_FILE", "test-gcs-example-remote.txt") PATH_TO_LOCAL_FILE = os.environ.get("GCP_GCS_PATH_TO_SAVED_FILE", "test-gcs-example-local.txt") with models.DAG( "example_gcs_to_local", start_date=days_ago(1), schedule_interval=None, tags=['example'], ) as dag: # [START howto_operator_gcs_download_file_task] download_file = GCSToLocalFilesystemOperator( task_id="download_file", object_name=PATH_TO_REMOTE_FILE, bucket=BUCKET, filename=PATH_TO_LOCAL_FILE, ) # [END howto_operator_gcs_download_file_task]