def upload_file():
    """A callable to upload file to AWS bucket"""
    s3_hook = S3Hook()
    s3_hook.load_file(filename=UPLOAD_FILE, key=PREFIX, bucket_name=S3BUCKET_NAME)


with models.DAG(
    'example_s3_to_gcs',
    schedule_interval=None,
    start_date=days_ago(2),
    tags=['example'],
) as dag:
    create_s3_bucket = S3CreateBucketOperator(
        task_id="create_s3_bucket", bucket_name=S3BUCKET_NAME, region_name='us-east-1'
    )

    upload_to_s3 = PythonOperator(task_id='upload_file_to_s3', python_callable=upload_file)

    create_gcs_bucket = GCSCreateBucketOperator(
        task_id="create_bucket",
        bucket_name=GCS_BUCKET,
        project_id=GCP_PROJECT_ID,
    )
    # [START howto_transfer_s3togcs_operator]
    transfer_to_gcs = S3ToGCSOperator(
        task_id='s3_to_gcs_task', bucket=S3BUCKET_NAME, prefix=PREFIX, dest_gcs="gs://" + GCS_BUCKET
    )
    # [END howto_transfer_s3togcs_operator]
Exemple #2
0
BUCKET_NAME = os.environ.get('BUCKET_NAME', 'test-s3-bucket-tagging')
TAG_KEY = os.environ.get('TAG_KEY', 'test-s3-bucket-tagging-key')
TAG_VALUE = os.environ.get('TAG_VALUE', 'test-s3-bucket-tagging-value')

with DAG(
        dag_id='s3_bucket_tagging_dag',
        schedule_interval=None,
        start_date=days_ago(2),
        max_active_runs=1,
        tags=['example'],
) as dag:

    create_bucket = S3CreateBucketOperator(
        task_id='s3_bucket_tagging_dag_create',
        bucket_name=BUCKET_NAME,
        region_name='us-east-1',
    )

    delete_bucket = S3DeleteBucketOperator(
        task_id='s3_bucket_tagging_dag_delete',
        bucket_name=BUCKET_NAME,
        force_delete=True,
    )

    # [START howto_operator_s3_bucket_tagging]
    get_tagging = S3GetBucketTaggingOperator(
        task_id='s3_bucket_tagging_dag_get_tagging', bucket_name=BUCKET_NAME)

    put_tagging = S3PutBucketTaggingOperator(
        task_id='s3_bucket_tagging_dag_put_tagging',
            string_data="input",
            key=f"path/data{i}",
            bucket_name=BUCKET_NAME,
        )


with DAG(dag_id=os.path.basename(__file__).replace(".py", ""),
         tags=['python'],
         default_args=default_args,
         schedule_interval=None,
         max_active_runs=1) as dag:

    # [START howto_operator_s3_bucket]
    create_bucket = S3CreateBucketOperator(
        task_id='s3_bucket_dag_create',
        bucket_name=BUCKET_NAME,
        region_name='ap-southeast-2',
    )

    add_keys_to_bucket = PythonOperator(
        task_id="s3_bucket_dag_add_keys_to_bucket",
        python_callable=upload_keys)

    delete_bucket = S3DeleteBucketOperator(
        task_id='s3_bucket_dag_delete',
        bucket_name=BUCKET_NAME,
        force_delete=True,
    )
    # [END howto_operator_s3_bucket]

    create_bucket >> add_keys_to_bucket
Exemple #4
0
            key=f"path/data{i}",
            bucket_name=BUCKET_NAME,
        )


with DAG(
        dag_id='s3_bucket_dag',
        schedule_interval=None,
        start_date=days_ago(2),
        default_args={"bucket_name": BUCKET_NAME},
        max_active_runs=1,
        tags=['example'],
) as dag:

    # [START howto_operator_s3_bucket]
    create_bucket = S3CreateBucketOperator(
        task_id='s3_bucket_dag_create',
        region_name='us-east-1',
    )

    # Using a task-decorated function to add keys
    add_keys_to_bucket = upload_keys()

    delete_bucket = S3DeleteBucketOperator(
        task_id='s3_bucket_dag_delete',
        force_delete=True,
    )
    # [END howto_operator_s3_bucket]

    create_bucket >> add_keys_to_bucket >> delete_bucket
Exemple #5
0
 def setUp(self):
     self.create_bucket_operator = S3CreateBucketOperator(task_id=TASK_ID, bucket_name=BUCKET_NAME,)
Exemple #6
0
    'email_on_retry': False
}

dag = DAG('upload_raw_data',
          default_args=default_args,
          description='Upload data to S3 with Airflow',
          schedule_interval='@daily',
          max_active_runs=1)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

# create buckets

create_s3_bucket2quotes = S3CreateBucketOperator(
    task_id='Create_quotes_bucket',
    bucket_name=Variable.get('S3_RAW_QUOTES_BUCKET_NAME'),
    aws_conn_id='aws_credentials',
    region_name='us-west-2',
    dag=dag)

create_s3_bucket2options = S3CreateBucketOperator(
    task_id='Create_options_bucket',
    bucket_name=Variable.get('S3_RAW_OPTIONS_BUCKET_NAME'),
    aws_conn_id='aws_credentials',
    region_name='us-west-2',
    dag=dag)

# upload daily data

upload_quotes2s3 = S3UploadOperator(
    task_id='Upload_quotes_to_s3',
    aws_credentials_id='aws_credentials',
Exemple #7
0
dag = DAG('create_datalake',
          default_args=DEFAULT_ARGS,
          description='Upload data to S3 with Airflow',
          schedule_interval='@daily',
          max_active_runs=3
          )

start_operator = DummyOperator(task_id='Begin_execution',  dag=dag)


# create buckets

create_s3_bucket2misc = S3CreateBucketOperator(
    task_id='Create_misc_bucket',
    bucket_name=Variable.get('S3_CODES_BUCKET_NAME'),
    aws_conn_id='aws_credentials',
    region_name='us-west-2',
    dag=dag
    )

create_s3_bucket2data = S3CreateBucketOperator(
    task_id='Create_output_bucket',
    bucket_name=Variable.get('S3_DATA_BUCKET_NAME'),
    aws_conn_id='aws_credentials',
    region_name='us-west-2',
    dag=dag
    )


# upload etl code to be used by emr