예제 #1
0
    def setUp(self):
        args = {
            'owner': 'airflow',
            'start_date': DEFAULT_DATE,
        }
        dag = DAG(TEST_DAG_ID + 'test_schedule_dag_once', default_args=args)
        dag.schedule_interval = '@once'
        self.dag = dag

        self.sensor = GCSUploadSessionCompleteSensor(task_id='sensor_1',
                                                     bucket='test-bucket',
                                                     prefix='test-prefix/path',
                                                     inactivity_period=12,
                                                     poke_interval=10,
                                                     min_objects=1,
                                                     allow_delete=False,
                                                     dag=self.dag)

        self.last_mocked_date = datetime(2019, 4, 24, 0, 0, 0)
예제 #2
0
 def test_files_deleted_between_pokes_allow_delete(self):
     self.sensor = GCSUploadSessionCompleteSensor(task_id='sensor',
                                                  bucket='test-bucket',
                                                  prefix='test-prefix/path',
                                                  inactivity_period=12,
                                                  poke_interval=10,
                                                  min_objects=1,
                                                  allow_delete=True,
                                                  previous_num_objects=0,
                                                  dag=self.dag)
     self.sensor.is_bucket_updated(2)
     self.assertEqual(self.sensor.inactivity_seconds, 0)
     self.sensor.is_bucket_updated(1)
     self.assertEqual(self.sensor.previous_num_objects, 1)
     self.assertEqual(self.sensor.inactivity_seconds, 0)
     self.sensor.is_bucket_updated(2)
     self.assertEqual(self.sensor.inactivity_seconds, 0)
     self.sensor.is_bucket_updated(2)
     self.assertEqual(self.sensor.inactivity_seconds, 10)
     self.assertTrue(self.sensor.is_bucket_updated(2))
예제 #3
0
 def test_files_deleted_between_pokes_allow_delete(self):
     self.sensor = GCSUploadSessionCompleteSensor(
         task_id='sensor_2',
         bucket='test-bucket',
         prefix='test-prefix/path',
         inactivity_period=12,
         poke_interval=10,
         min_objects=1,
         allow_delete=True,
         dag=self.dag,
     )
     self.sensor.is_bucket_updated({'a', 'b'})
     assert self.sensor.inactivity_seconds == 0
     self.sensor.is_bucket_updated({'a'})
     assert len(self.sensor.previous_objects) == 1
     assert self.sensor.inactivity_seconds == 0
     self.sensor.is_bucket_updated({'a', 'c'})
     assert self.sensor.inactivity_seconds == 0
     self.sensor.is_bucket_updated({'a', 'd'})
     assert self.sensor.inactivity_seconds == 0
     self.sensor.is_bucket_updated({'a', 'd'})
     assert self.sensor.inactivity_seconds == 10
     assert self.sensor.is_bucket_updated({'a', 'd'})
예제 #4
0
    def setUp(self):
        args = {
            'owner': 'airflow',
            'start_date': DEFAULT_DATE,
        }
        dag = DAG(TEST_DAG_ID + 'test_schedule_dag_once', default_args=args)
        dag.schedule_interval = '@once'
        self.dag = dag

        self.sensor = GCSUploadSessionCompleteSensor(
            task_id='sensor_1',
            bucket='test-bucket',
            prefix='test-prefix/path',
            inactivity_period=12,
            poke_interval=10,
            min_objects=1,
            allow_delete=False,
            google_cloud_conn_id=TEST_GCP_CONN_ID,
            delegate_to=TEST_DELEGATE_TO,
            impersonation_chain=TEST_IMPERSONATION_CHAIN,
            dag=self.dag)

        self.last_mocked_date = datetime(2019, 4, 24, 0, 0, 0)
예제 #5
0
class TestGCSUploadSessionCompleteSensor(TestCase):
    def setUp(self):
        args = {
            'owner': 'airflow',
            'start_date': DEFAULT_DATE,
        }
        dag = DAG(TEST_DAG_ID + 'test_schedule_dag_once', default_args=args)
        dag.schedule_interval = '@once'
        self.dag = dag

        self.sensor = GCSUploadSessionCompleteSensor(
            task_id='sensor_1',
            bucket='test-bucket',
            prefix='test-prefix/path',
            inactivity_period=12,
            poke_interval=10,
            min_objects=1,
            allow_delete=False,
            google_cloud_conn_id=TEST_GCP_CONN_ID,
            delegate_to=TEST_DELEGATE_TO,
            impersonation_chain=TEST_IMPERSONATION_CHAIN,
            dag=self.dag,
        )

        self.last_mocked_date = datetime(2019, 4, 24, 0, 0, 0)

    @mock.patch("airflow.providers.google.cloud.sensors.gcs.GCSHook")
    def test_get_gcs_hook(self, mock_hook):
        self.sensor._get_gcs_hook()
        mock_hook.assert_called_once_with(
            gcp_conn_id=TEST_GCP_CONN_ID,
            delegate_to=TEST_DELEGATE_TO,
            impersonation_chain=TEST_IMPERSONATION_CHAIN,
        )
        assert mock_hook.return_value == self.sensor.hook

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time',
                mock_time)
    def test_files_deleted_between_pokes_throw_error(self):
        self.sensor.is_bucket_updated({'a', 'b'})
        with pytest.raises(AirflowException):
            self.sensor.is_bucket_updated({'a'})

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time',
                mock_time)
    def test_files_deleted_between_pokes_allow_delete(self):
        self.sensor = GCSUploadSessionCompleteSensor(
            task_id='sensor_2',
            bucket='test-bucket',
            prefix='test-prefix/path',
            inactivity_period=12,
            poke_interval=10,
            min_objects=1,
            allow_delete=True,
            dag=self.dag,
        )
        self.sensor.is_bucket_updated({'a', 'b'})
        assert self.sensor.inactivity_seconds == 0
        self.sensor.is_bucket_updated({'a'})
        assert len(self.sensor.previous_objects) == 1
        assert self.sensor.inactivity_seconds == 0
        self.sensor.is_bucket_updated({'a', 'c'})
        assert self.sensor.inactivity_seconds == 0
        self.sensor.is_bucket_updated({'a', 'd'})
        assert self.sensor.inactivity_seconds == 0
        self.sensor.is_bucket_updated({'a', 'd'})
        assert self.sensor.inactivity_seconds == 10
        assert self.sensor.is_bucket_updated({'a', 'd'})

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time',
                mock_time)
    def test_incoming_data(self):
        self.sensor.is_bucket_updated({'a'})
        assert self.sensor.inactivity_seconds == 0
        self.sensor.is_bucket_updated({'a', 'b'})
        assert self.sensor.inactivity_seconds == 0
        self.sensor.is_bucket_updated({'a', 'b', 'c'})
        assert self.sensor.inactivity_seconds == 0

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time',
                mock_time)
    def test_no_new_data(self):
        self.sensor.is_bucket_updated({'a'})
        assert self.sensor.inactivity_seconds == 0
        self.sensor.is_bucket_updated({'a'})
        assert self.sensor.inactivity_seconds == 10

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time',
                mock_time)
    def test_no_new_data_success_criteria(self):
        self.sensor.is_bucket_updated({'a'})
        assert self.sensor.inactivity_seconds == 0
        self.sensor.is_bucket_updated({'a'})
        assert self.sensor.inactivity_seconds == 10
        assert self.sensor.is_bucket_updated({'a'})

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time',
                mock_time)
    def test_not_enough_objects(self):
        self.sensor.is_bucket_updated(set())
        assert self.sensor.inactivity_seconds == 0
        self.sensor.is_bucket_updated(set())
        assert self.sensor.inactivity_seconds == 10
        assert not self.sensor.is_bucket_updated(set())
예제 #6
0
class TestGCSUploadSessionCompleteSensor(TestCase):

    def setUp(self):
        args = {
            'owner': 'airflow',
            'start_date': DEFAULT_DATE,
        }
        dag = DAG(TEST_DAG_ID + 'test_schedule_dag_once', default_args=args)
        dag.schedule_interval = '@once'
        self.dag = dag

        self.sensor = GCSUploadSessionCompleteSensor(
            task_id='sensor_1',
            bucket='test-bucket',
            prefix='test-prefix/path',
            inactivity_period=12,
            poke_interval=10,
            min_objects=1,
            allow_delete=False,
            dag=self.dag
        )

        self.last_mocked_date = datetime(2019, 4, 24, 0, 0, 0)

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time', mock_time)
    def test_files_deleted_between_pokes_throw_error(self):
        self.sensor.is_bucket_updated({'a', 'b'})
        with self.assertRaises(AirflowException):
            self.sensor.is_bucket_updated({'a'})

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time', mock_time)
    def test_files_deleted_between_pokes_allow_delete(self):
        self.sensor = GCSUploadSessionCompleteSensor(
            task_id='sensor_2',
            bucket='test-bucket',
            prefix='test-prefix/path',
            inactivity_period=12,
            poke_interval=10,
            min_objects=1,
            allow_delete=True,
            dag=self.dag
        )
        self.sensor.is_bucket_updated({'a', 'b'})
        self.assertEqual(self.sensor.inactivity_seconds, 0)
        self.sensor.is_bucket_updated({'a'})
        self.assertEqual(len(self.sensor.previous_objects), 1)
        self.assertEqual(self.sensor.inactivity_seconds, 0)
        self.sensor.is_bucket_updated({'a', 'c'})
        self.assertEqual(self.sensor.inactivity_seconds, 0)
        self.sensor.is_bucket_updated({'a', 'd'})
        self.assertEqual(self.sensor.inactivity_seconds, 0)
        self.sensor.is_bucket_updated({'a', 'd'})
        self.assertEqual(self.sensor.inactivity_seconds, 10)
        self.assertTrue(self.sensor.is_bucket_updated({'a', 'd'}))

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time', mock_time)
    def test_incoming_data(self):
        self.sensor.is_bucket_updated({'a'})
        self.assertEqual(self.sensor.inactivity_seconds, 0)
        self.sensor.is_bucket_updated({'a', 'b'})
        self.assertEqual(self.sensor.inactivity_seconds, 0)
        self.sensor.is_bucket_updated({'a', 'b', 'c'})
        self.assertEqual(self.sensor.inactivity_seconds, 0)

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time', mock_time)
    def test_no_new_data(self):
        self.sensor.is_bucket_updated({'a'})
        self.assertEqual(self.sensor.inactivity_seconds, 0)
        self.sensor.is_bucket_updated({'a'})
        self.assertEqual(self.sensor.inactivity_seconds, 10)

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time', mock_time)
    def test_no_new_data_success_criteria(self):
        self.sensor.is_bucket_updated({'a'})
        self.assertEqual(self.sensor.inactivity_seconds, 0)
        self.sensor.is_bucket_updated({'a'})
        self.assertEqual(self.sensor.inactivity_seconds, 10)
        self.assertTrue(self.sensor.is_bucket_updated({'a'}))

    @mock.patch('airflow.providers.google.cloud.sensors.gcs.get_time', mock_time)
    def test_not_enough_objects(self):
        self.sensor.is_bucket_updated(set())
        self.assertEqual(self.sensor.inactivity_seconds, 0)
        self.sensor.is_bucket_updated(set())
        self.assertEqual(self.sensor.inactivity_seconds, 10)
        self.assertFalse(self.sensor.is_bucket_updated(set()))
예제 #7
0
    # [END howto_sensor_object_exists_task]
    # [START howto_sensor_object_with_prefix_exists_task]
    gcs_object_with_prefix_exists = GCSObjectsWithPrefixExistenceSensor(
        bucket=BUCKET_1,
        prefix=PATH_TO_UPLOAD_FILE_PREFIX,
        mode='poke',
        task_id="gcs_object_with_prefix_exists_task",
    )
    # [END howto_sensor_object_with_prefix_exists_task]

    # [START howto_sensor_gcs_upload_session_complete_task]
    gcs_upload_session_complete = GCSUploadSessionCompleteSensor(
        bucket=BUCKET_1,
        prefix=PATH_TO_MANUAL_UPLOAD_FILE_PREFIX,
        inactivity_period=60,
        min_objects=1,
        allow_delete=True,
        previous_objects=set(),
        task_id="gcs_upload_session_complete_task",
    )
    # [END howto_sensor_gcs_upload_session_complete_task]

    # [START howto_sensor_object_update_exists_task]
    gcs_update_object_exists = GCSObjectUpdateSensor(
        bucket=BUCKET_1,
        object=BUCKET_MANUAL_UPLOAD_FILE_LOCATION,
        task_id="gcs_object_update_sensor_task",
    )
    # [END howto_sensor_object_update_exists_task]

    delete_bucket = GCSDeleteBucketOperator(task_id="delete_bucket",