Exemple #1
0
 def test_parse_bucket_key(self, key, bucket, parsed_key, parsed_bucket):
     op = S3KeySensor(
         task_id='s3_key_sensor',
         bucket_key=key,
         bucket_name=bucket,
     )
     self.assertEqual(op.bucket_key, parsed_key)
     self.assertEqual(op.bucket_name, parsed_bucket)
Exemple #2
0
 def test_bucket_name_none_and_bucket_key_as_relative_path(self):
     """
     Test if exception is raised when bucket_name is None
     and bucket_key is provided as relative path rather than s3:// url.
     :return:
     """
     with self.assertRaises(AirflowException):
         S3KeySensor(task_id='s3_key_sensor', bucket_key="file_in_bucket")
Exemple #3
0
 def test_bucket_name_provided_and_bucket_key_is_s3_url(self):
     """
     Test if exception is raised when bucket_name is provided
     while bucket_key is provided as a full s3:// url.
     :return:
     """
     with self.assertRaises(AirflowException):
         S3KeySensor(task_id='s3_key_sensor',
                     bucket_key="s3://test_bucket/file",
                     bucket_name='test_bucket')
Exemple #4
0
    def test_poke(self, mock_hook):
        op = S3KeySensor(task_id='s3_key_sensor',
                         bucket_key='s3://test_bucket/file')

        mock_check_for_key = mock_hook.return_value.check_for_key
        mock_check_for_key.return_value = False
        self.assertFalse(op.poke(None))
        mock_check_for_key.assert_called_once_with(op.bucket_key,
                                                   op.bucket_name)

        mock_hook.return_value.check_for_key.return_value = True
        self.assertTrue(op.poke(None))
Exemple #5
0
    def test_poke_wildcard(self, mock_hook):
        op = S3KeySensor(task_id='s3_key_sensor',
                         bucket_key='s3://test_bucket/file',
                         wildcard_match=True)

        mock_check_for_wildcard_key = mock_hook.return_value.check_for_wildcard_key
        mock_check_for_wildcard_key.return_value = False
        assert not op.poke(None)
        mock_check_for_wildcard_key.assert_called_once_with(
            op.bucket_key, op.bucket_name)

        mock_check_for_wildcard_key.return_value = True
        assert op.poke(None)
Exemple #6
0
    def test_parse_bucket_key(self, key, bucket, parsed_key, parsed_bucket,
                              mock_hook):
        mock_hook.return_value.check_for_key.return_value = False

        op = S3KeySensor(
            task_id='s3_key_sensor',
            bucket_key=key,
            bucket_name=bucket,
        )

        op.poke(None)

        self.assertEqual(op.bucket_key, parsed_key)
        self.assertEqual(op.bucket_name, parsed_bucket)
Exemple #7
0
def Sensor_0(config):
    sensor = S3KeySensor(
        bucket_key=
        "s3://abinitio-spark-redshift-testing/prophecy-libs-assembly-2.jar",
        wildcard_match=False,
        aws_conn_id="aws_default_pankaj",
        verify=False,
        soft_fail=False,
        poke_interval=60,
        timeout=604800,
        mode="poke",
        task_id="Sensor_0",
        exponential_backoff=False)

    return sensor
Exemple #8
0
def check_minio_2():
    @task()
    def log():
        print("Something happened in bucket")

    sensor = S3KeySensor(
        task_id="check_s3_for_file_in_s3",
        bucket_key="s3://my-first-bucket/my-test-file-2.txt-*",
        wildcard_match=True,
        aws_conn_id="local_minio",
    )

    aaa = log()

    sensor >> aaa
def Wait_for_Data(config):
    sensor = S3KeySensor(
        bucket_key = "s3://abinitio-spark-redshift-testing/gates/2021-05-03",
        wildcard_match = False,
        aws_conn_id = "aws_default",
        verify = False,
        soft_fail = False,
        poke_interval = 60,
        timeout = 604800,
        mode = "poke",
        task_id = "Wait_for_Data",
        exponential_backoff = False
    )

    return sensor
Exemple #10
0
    def test_parse_bucket_key_from_jinja(self, mock_hook):
        mock_hook.return_value.check_for_key.return_value = False

        Variable.set("test_bucket_key", "s3://bucket/key")

        execution_date = datetime(2020, 1, 1)

        dag = DAG("test_s3_key", start_date=execution_date)
        op = S3KeySensor(
            task_id='s3_key_sensor',
            bucket_key='{{ var.value.test_bucket_key }}',
            bucket_name=None,
            dag=dag,
        )

        ti = TaskInstance(task=op, execution_date=execution_date)
        context = ti.get_template_context()
        ti.render_templates(context)

        op.poke(None)

        self.assertEqual(op.bucket_key, "key")
        self.assertEqual(op.bucket_name, "bucket")
Exemple #11
0
            "Value": "Advertiser MR PID Airflow Project"
        },
    ],
}

cluster_creator = EmrCreateJobFlowOperator(
    task_id="create_emr_cluster",
    aws_conn_id="aws_default",
    emr_conn_id="emr_default",
    job_flow_overrides=JOB_FLOW_OVERRIDES,
    dag=dag,
)

sensor_stage1_key = S3KeySensor(
    task_id="s3_sensor_stage1_key",
    bucket_name="{{ dag_run.conf['metaBucketName'] }}",
    bucket_key="{{ dag_run.conf['instanceId'] }}/step_1_meta_enc_kc/_SUCCESS",
)

SPARK_STEP_1 = [{
    "Name": "adv-mr-pid-stage1",
    "ActionOnFailure": "TERMINATE_JOB_FLOW",
    "HadoopJarStep": {
        "Jar":
        "command-runner.jar",
        "Args": [
            "spark-submit",
            "--deploy-mode",
            "cluster",
            "--master",
            "yarn",
Exemple #12
0
from airflow.providers.amazon.aws.sensors.s3_key import S3KeySensor

default_args = {
    "owner": "iotoi",
    "start_date": days_ago(1),
}

dag = DAG(
    "check_minio_1",
    default_args=default_args,
    schedule_interval="@once",
    tags=["iotoi-samples"],
)

t1 = BashOperator(
    task_id="bash_test",
    bash_command='echo "hello, it should work" > s3_conn_test.txt',
    dag=dag,
)

sensor = S3KeySensor(
    task_id="check_s3_for_file_in_s3",
    bucket_key="*",
    bucket_name="my-first-bucket",
    wildcard_match=True,
    aws_conn_id="local_minio",
    poke_interval=10,
    dag=dag,
)

t1.set_upstream(sensor)
    dag=dag,
)

stage1_checker = EmrStepSensor(
    task_id="watch_stage1",
    job_flow_id=
    "{{ task_instance.xcom_pull(task_ids='create_emr_cluster', key='return_value') }}",
    step_id=
    "{{ task_instance.xcom_pull(task_ids='add_stage_1', key='return_value')[0] }}",
    aws_conn_id="aws_default",
    dag=dag,
)

sensor_stage2_key = S3KeySensor(
    task_id="s3_sensor_stage2_key",
    bucket_name="{{ dag_run.conf['advBucketName'] }}",
    bucket_key=
    "{{ dag_run.conf['instanceId'] }}/step_1_meta_enc_kc_kp/_SUCCESS",
)

SPARK_STEP_2 = [{
    "Name": "meta-mr-pid-stage2",
    "ActionOnFailure": "TERMINATE_JOB_FLOW",
    "HadoopJarStep": {
        "Jar":
        "command-runner.jar",
        "Args": [
            "spark-submit",
            "--deploy-mode",
            "cluster",
            "--master",
            "yarn",