def upload_file(): """A callable to upload file to AWS bucket""" s3_hook = S3Hook() s3_hook.load_file(filename=UPLOAD_FILE, key=PREFIX, bucket_name=S3BUCKET_NAME) with models.DAG( 'example_s3_to_gcs', schedule_interval=None, start_date=days_ago(2), tags=['example'], ) as dag: create_s3_bucket = S3CreateBucketOperator( task_id="create_s3_bucket", bucket_name=S3BUCKET_NAME, region_name='us-east-1' ) upload_to_s3 = PythonOperator(task_id='upload_file_to_s3', python_callable=upload_file) create_gcs_bucket = GCSCreateBucketOperator( task_id="create_bucket", bucket_name=GCS_BUCKET, project_id=GCP_PROJECT_ID, ) # [START howto_transfer_s3togcs_operator] transfer_to_gcs = S3ToGCSOperator( task_id='s3_to_gcs_task', bucket=S3BUCKET_NAME, prefix=PREFIX, dest_gcs="gs://" + GCS_BUCKET ) # [END howto_transfer_s3togcs_operator]
BUCKET_NAME = os.environ.get('BUCKET_NAME', 'test-s3-bucket-tagging') TAG_KEY = os.environ.get('TAG_KEY', 'test-s3-bucket-tagging-key') TAG_VALUE = os.environ.get('TAG_VALUE', 'test-s3-bucket-tagging-value') with DAG( dag_id='s3_bucket_tagging_dag', schedule_interval=None, start_date=days_ago(2), max_active_runs=1, tags=['example'], ) as dag: create_bucket = S3CreateBucketOperator( task_id='s3_bucket_tagging_dag_create', bucket_name=BUCKET_NAME, region_name='us-east-1', ) delete_bucket = S3DeleteBucketOperator( task_id='s3_bucket_tagging_dag_delete', bucket_name=BUCKET_NAME, force_delete=True, ) # [START howto_operator_s3_bucket_tagging] get_tagging = S3GetBucketTaggingOperator( task_id='s3_bucket_tagging_dag_get_tagging', bucket_name=BUCKET_NAME) put_tagging = S3PutBucketTaggingOperator( task_id='s3_bucket_tagging_dag_put_tagging',
string_data="input", key=f"path/data{i}", bucket_name=BUCKET_NAME, ) with DAG(dag_id=os.path.basename(__file__).replace(".py", ""), tags=['python'], default_args=default_args, schedule_interval=None, max_active_runs=1) as dag: # [START howto_operator_s3_bucket] create_bucket = S3CreateBucketOperator( task_id='s3_bucket_dag_create', bucket_name=BUCKET_NAME, region_name='ap-southeast-2', ) add_keys_to_bucket = PythonOperator( task_id="s3_bucket_dag_add_keys_to_bucket", python_callable=upload_keys) delete_bucket = S3DeleteBucketOperator( task_id='s3_bucket_dag_delete', bucket_name=BUCKET_NAME, force_delete=True, ) # [END howto_operator_s3_bucket] create_bucket >> add_keys_to_bucket
key=f"path/data{i}", bucket_name=BUCKET_NAME, ) with DAG( dag_id='s3_bucket_dag', schedule_interval=None, start_date=days_ago(2), default_args={"bucket_name": BUCKET_NAME}, max_active_runs=1, tags=['example'], ) as dag: # [START howto_operator_s3_bucket] create_bucket = S3CreateBucketOperator( task_id='s3_bucket_dag_create', region_name='us-east-1', ) # Using a task-decorated function to add keys add_keys_to_bucket = upload_keys() delete_bucket = S3DeleteBucketOperator( task_id='s3_bucket_dag_delete', force_delete=True, ) # [END howto_operator_s3_bucket] create_bucket >> add_keys_to_bucket >> delete_bucket
def setUp(self): self.create_bucket_operator = S3CreateBucketOperator(task_id=TASK_ID, bucket_name=BUCKET_NAME,)
'email_on_retry': False } dag = DAG('upload_raw_data', default_args=default_args, description='Upload data to S3 with Airflow', schedule_interval='@daily', max_active_runs=1) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) # create buckets create_s3_bucket2quotes = S3CreateBucketOperator( task_id='Create_quotes_bucket', bucket_name=Variable.get('S3_RAW_QUOTES_BUCKET_NAME'), aws_conn_id='aws_credentials', region_name='us-west-2', dag=dag) create_s3_bucket2options = S3CreateBucketOperator( task_id='Create_options_bucket', bucket_name=Variable.get('S3_RAW_OPTIONS_BUCKET_NAME'), aws_conn_id='aws_credentials', region_name='us-west-2', dag=dag) # upload daily data upload_quotes2s3 = S3UploadOperator( task_id='Upload_quotes_to_s3', aws_credentials_id='aws_credentials',
dag = DAG('create_datalake', default_args=DEFAULT_ARGS, description='Upload data to S3 with Airflow', schedule_interval='@daily', max_active_runs=3 ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) # create buckets create_s3_bucket2misc = S3CreateBucketOperator( task_id='Create_misc_bucket', bucket_name=Variable.get('S3_CODES_BUCKET_NAME'), aws_conn_id='aws_credentials', region_name='us-west-2', dag=dag ) create_s3_bucket2data = S3CreateBucketOperator( task_id='Create_output_bucket', bucket_name=Variable.get('S3_DATA_BUCKET_NAME'), aws_conn_id='aws_credentials', region_name='us-west-2', dag=dag ) # upload etl code to be used by emr