Ejemplo n.º 1
0
 def test_file_sensore(self, patched_poke):
     patched_poke.return_value = True
     sensor = QuboleFileSensor(
         task_id='test_qubole_file_sensor',
         data={"files": ["s3://some_bucket/some_file"]}
     )
     self.assertTrue(sensor.poke({}))
Ejemplo n.º 2
0
 def test_file_sensore(self, patched_poke):
     patched_poke.return_value = True
     sensor = QuboleFileSensor(
         task_id='test_qubole_file_sensor',
         data={"files": ["s3://some_bucket/some_file"]}
     )
     self.assertTrue(sensor.poke({}))
Ejemplo n.º 3
0
with DAG(
    dag_id='example_qubole_sensor',
    default_args=default_args,
    schedule_interval=None,
    doc_md=__doc__,
    tags=['example'],
) as dag:

    t1 = QuboleFileSensor(
        task_id='check_s3_file',
        qubole_conn_id='qubole_default',
        poke_interval=60,
        timeout=600,
        data={
            "files":
                [
                    "s3://paid-qubole/HadoopAPIExamples/jars/hadoop-0.20.1-dev-streaming.jar",
                    "s3://paid-qubole/HadoopAPITests/data/{{ ds.split('-')[2] }}.tsv"
                ]  # will check for availability of all the files in array
        }
    )

    t2 = QubolePartitionSensor(
        task_id='check_hive_partition',
        poke_interval=10,
        timeout=60,
        data={"schema": "default",
              "table": "my_partitioned_table",
              "columns": [
                  {"column": "month", "values":
    'start_date': dates.days_ago(2),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False
}

dag = DAG('example_qubole_sensor', default_args=default_args, schedule_interval=None)

dag.doc_md = __doc__

t1 = QuboleFileSensor(
    task_id='check_s3_file',
    qubole_conn_id='qubole_default',
    poke_interval=60,
    timeout=600,
    data={"files":
              ["s3://paid-qubole/HadoopAPIExamples/jars/hadoop-0.20.1-dev-streaming.jar",
               "s3://paid-qubole/HadoopAPITests/data/{{ ds.split('-')[2] }}.tsv"
               ] # will check for availability of all the files in array
          },
    dag=dag
)

t2 = QubolePartitionSensor(
    task_id='check_hive_partition',
    poke_interval=10,
    timeout=60,
    data={"schema":"default",
          "table":"my_partitioned_table",
          "columns":[
              {"column" : "month", "values" : ["{{ ds.split('-')[1] }}"]},
              {"column" : "day", "values" : ["{{ ds.split('-')[2] }}" , "{{ yesterday_ds.split('-')[2] }}"]}