Ejemplo n.º 1
0
    def test_init(self):
        sensor = WasbPrefixSensor(task_id='wasb_sensor',
                                  dag=self.dag,
                                  **self._config)
        self.assertEqual(sensor.container_name, self._config['container_name'])
        self.assertEqual(sensor.prefix, self._config['prefix'])
        self.assertEqual(sensor.wasb_conn_id, self._config['wasb_conn_id'])
        self.assertEqual(sensor.check_options, {})
        self.assertEqual(sensor.timeout, self._config['timeout'])

        sensor = WasbPrefixSensor(task_id='wasb_sensor',
                                  dag=self.dag,
                                  check_options={'timeout': 2},
                                  **self._config)
        self.assertEqual(sensor.check_options, {'timeout': 2})
Ejemplo n.º 2
0
 def test_poke(self, mock_hook):
     mock_instance = mock_hook.return_value
     sensor = WasbPrefixSensor(task_id='wasb_sensor',
                               dag=self.dag,
                               check_options={'timeout': 2},
                               **self._config)
     sensor.poke(None)
     mock_instance.check_for_prefix.assert_called_once_with('container',
                                                            'prefix',
                                                            timeout=2)
    def transform(self, src_operator: BaseOperator,
                  parent_fragment: DAGFragment,
                  upstream_fragments: List[DAGFragment]) -> DAGFragment:
        """
        You need to add the ``wasb_conn_id`` to the source operator (or preferably DAG) for this to work.
        The ``container_name`` and ``prefix`` for the blob based sensors are coped from the ``bucket_name``
        and ``bucket_key`` of the s3 sensor, so make sure they are templatized for changing between
        `s3://` and `wasb://` paths, etc. using config
        """
        s3_key_sensor: S3KeySensor = src_operator
        wasb_conn_id = s3_key_sensor.params.get('wasb_conn_id', None)
        if not wasb_conn_id:
            wasb_conn_id = self.dag.params.get('wasb_conn_id', None)

        if not wasb_conn_id:
            raise TransformerException(
                "Could not find wasb_conn_id in operator or DAG params")

        if s3_key_sensor.wildcard_match:
            wasb_sensor_op = WasbWildcardPrefixSensor(
                task_id=src_operator.task_id,
                wasb_conn_id=wasb_conn_id,
                container_name=s3_key_sensor.bucket_name,
                wildcard_prefix=s3_key_sensor.bucket_key,
                dag=self.dag)
        else:
            wasb_sensor_op = WasbPrefixSensor(
                task_id=src_operator.task_id,
                wasb_conn_id=wasb_conn_id,
                container_name=s3_key_sensor.bucket_name,
                prefix=s3_key_sensor.bucket_key,
                dag=self.dag)

        self.copy_op_attrs(wasb_sensor_op, src_operator)
        self.sign_op(wasb_sensor_op)

        return DAGFragment([wasb_sensor_op])
Ejemplo n.º 4
0
output_container = '222'
processing_file_prefix = ''

blob_service = WasbHook(wasb_conn_id=wasb_connection_id)

dag = DAG(
    dag_id='azure_blob_reader',
    default_args=default_args,
    description='A dag to pull new images from blob and process them',
    schedule_interval=timedelta(days=1),
)

new_files = WasbPrefixSensor(
    task_id='new_files_sensor',
    container_name=input_container,
    prefix=processing_file_prefix,
    wasb_conn_id=wasb_connection_id,
    dag=dag,
)


def move_blobs_to_processing(**context):
    results = blob_service.connection.list_blobs(
        input_container, processing_file_prefix)
    blobs_moved = 0
    blob_urls = []
    for blob in results:
        print("\t Blob name: " + blob.name)
        # Generate a SAS token for blob access
        blob_input_url = blob_service.connection.make_blob_url(
            input_container,
Ejemplo n.º 5
0
import airflow.utils.dates
from airflow.models import DAG
from airflow.operators.bash_operator import BashOperator

from airflow.contrib.sensors.wasb_sensor import WasbBlobSensor, WasbPrefixSensor

dag = DAG(dag_id="azure_blob_sensor",
          start_date=airflow.utils.dates.days_ago(3),
          schedule_interval="@once")

data_arrival_sensor = WasbBlobSensor(task_id="data_arrival_sensor",
                                     container_name="landing",
                                     blob_name="raw_data.csv",
                                     wasb_conn_id="blob_default",
                                     poke_interval=60,
                                     timeout=60 * 60 * 24)

data_file_prefix_sensor = WasbPrefixSensor(task_id="data_file_prefix_sensor",
                                           container_name="landing",
                                           prefix="raw_",
                                           wasb_conn_id="blob_default",
                                           poke_interval=60,
                                           timeout=60 * 60 * 24)

data_has_arrived = BashOperator(task_id="data_has_arrived",
                                bash_command="echo 'The data has arrived!'",
                                dag=dag)

[data_arrival_sensor, data_file_prefix_sensor] >> data_has_arrived