Ejemplo n.º 1
0
 def test_execute(self, mock_hook):
     mock_instance = mock_hook.return_value
     operator = FileToWasbOperator(task_id='wasb_sensor',
                                   dag=self.dag,
                                   load_options={'timeout': 2},
                                   **self._config)
     operator.execute(None)
     mock_instance.load_file.assert_called_once_with('file',
                                                     'container',
                                                     'blob',
                                                     timeout=2)
 def test_execute(self, mock_hook):
     mock_instance = mock_hook.return_value
     operator = FileToWasbOperator(
         task_id='wasb_sensor',
         dag=self.dag,
         load_options={'timeout': 2},
         **self._config
     )
     operator.execute(None)
     mock_instance.load_file.assert_called_once_with(
         'file', 'container', 'blob', timeout=2
     )
Ejemplo n.º 3
0
    def _get_test_dag(self):
        with DAG(dag_id='test_dag', default_args=DEFAULT_DAG_ARGS) as dag:
            op1 = SparkSubmitOperator(task_id='op1')
            op2 = EmrAddStepsOperator(task_id='op2', job_flow_id='foo')
            op3 = S3ListOperator(task_id='op3', bucket='foo')
            op4 = EmrCreateJobFlowOperator(task_id='op4')
            op5 = TriggerDagRunOperator(task_id='op5', trigger_dag_id='foo')
            op6 = FileToWasbOperator(task_id='op6',
                                     container_name='foo',
                                     blob_name='foo',
                                     file_path='foo')
            op7 = EmailOperator(task_id='op7',
                                subject='foo',
                                to='foo',
                                html_content='foo')
            op8 = S3CopyObjectOperator(task_id='op8',
                                       dest_bucket_key='foo',
                                       source_bucket_key='foo')
            op9 = BranchPythonOperator(task_id='op9', python_callable=print)
            op10 = PythonOperator(task_id='op10', python_callable=range)

            op1 >> [op2, op3, op4]
            op2 >> [op5, op6]
            op6 >> [op7, op8, op9]
            op3 >> [op7, op8]
            op8 >> [op9, op10]

        return dag
Ejemplo n.º 4
0
    def test_init(self):
        operator = FileToWasbOperator(task_id='wasb_operator',
                                      dag=self.dag,
                                      **self._config)
        self.assertEqual(operator.file_path, self._config['file_path'])
        self.assertEqual(operator.container_name,
                         self._config['container_name'])
        self.assertEqual(operator.blob_name, self._config['blob_name'])
        self.assertEqual(operator.wasb_conn_id, self._config['wasb_conn_id'])
        self.assertEqual(operator.load_options, {})
        self.assertEqual(operator.retries, self._config['retries'])

        operator = FileToWasbOperator(task_id='wasb_operator',
                                      dag=self.dag,
                                      load_options={'timeout': 2},
                                      **self._config)
        self.assertEqual(operator.load_options, {'timeout': 2})
Ejemplo n.º 5
0
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.contrib.operators.file_to_wasb import FileToWasbOperator
from airflow.utils.dates import days_ago

default_args = {"owner": "me", "start_date": days_ago(2)}

dag = DAG("upload_test_file",
          description="uploading a test file",
          default_args=default_args,
          schedule_interval="@once")

with dag:
    download_task = BashOperator(
        task_id="download_file",
        bash_command="wget "
        "http://data.insideairbnb.com/denmark/hovedstaden/copenhagen/2020-06-26"
        "/data/listings.csv.gz "
        "-O /tmp/listings.csv.gz")

    unzip = BashOperator(task_id="unzip",
                         bash_command="gunzip -f /tmp/listings.csv.gz")

    upload_task = FileToWasbOperator(task_id="test_upload",
                                     file_path="/tmp/listings.csv",
                                     container_name="raw",
                                     blob_name="my_test.csv")

    download_task >> unzip >> upload_task