Beispiel #1
0
    def test_init_with_template_connection(self):
        with DAG(DAG_ID, start_date=DEFAULT_DATE):
            task = QuboleOperator(task_id=TASK_ID, qubole_conn_id="{{ qubole_conn_id }}")

        task.render_template_fields({'qubole_conn_id': TEMPLATE_CONN})
        assert task.task_id == TASK_ID
        assert task.qubole_conn_id == TEMPLATE_CONN
Beispiel #2
0
    def test_notify(self):
        dag = DAG(DAG_ID, start_date=DEFAULT_DATE)

        with dag:
            task = QuboleOperator(task_id=TASK_ID, command_type='sparkcmd', notify=True, dag=dag)

        assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[0] == "--notify"
Beispiel #3
0
    def test_get_hook(self):
        dag = DAG(DAG_ID, start_date=DEFAULT_DATE)

        with dag:
            task = QuboleOperator(task_id=TASK_ID, command_type='hivecmd', dag=dag)

        hook = task.get_hook()
        assert hook.__class__ == QuboleHook
Beispiel #4
0
    def test_hyphen_args_note_id(self):
        dag = DAG(DAG_ID, start_date=DEFAULT_DATE)

        with dag:
            task = QuboleOperator(task_id=TASK_ID,
                                  command_type='sparkcmd',
                                  note_id="123",
                                  dag=dag)

        self.assertEqual(
            task.get_hook().create_cmd_args({'run_id': 'dummy'})[0],
            "--note-id=123")
Beispiel #5
0
    def test_extra_serialized_field(self):
        dag = DAG(DAG_ID, start_date=DEFAULT_DATE)
        with dag:
            QuboleOperator(
                task_id=TASK_ID,
                command_type='shellcmd',
                qubole_conn_id=TEST_CONN,
            )

        serialized_dag = SerializedDAG.to_dict(dag)
        self.assertIn("qubole_conn_id", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict[TASK_ID]
        self.assertEqual(getattr(simple_task, "qubole_conn_id"), TEST_CONN)

        #########################################################
        # Verify Operator Links work with Serialized Operator
        #########################################################
        self.assertIsInstance(
            list(simple_task.operator_extra_links)[0], QDSLink)

        ti = TaskInstance(task=simple_task, execution_date=DEFAULT_DATE)
        ti.xcom_push('qbol_cmd_id', 12345)

        # check for positive case
        url = simple_task.get_extra_links(DEFAULT_DATE, 'Go to QDS')
        self.assertEqual(url, 'http://localhost/v2/analyze?command_id=12345')

        # check for negative case
        url2 = simple_task.get_extra_links(datetime(2017, 1, 2), 'Go to QDS')
        self.assertEqual(url2, '')
Beispiel #6
0
    def test_init_with_template_cluster_label(self):
        dag = DAG(DAG_ID, start_date=DEFAULT_DATE)
        task = QuboleOperator(task_id=TASK_ID,
                              dag=dag,
                              cluster_label='{{ params.cluster_label }}',
                              params={'cluster_label': 'default'})

        ti = TaskInstance(task, DEFAULT_DATE)
        ti.render_templates()

        self.assertEqual(task.cluster_label, 'default')
Beispiel #7
0
    def test_get_redirect_url(self):
        dag = DAG(DAG_ID, start_date=DEFAULT_DATE)

        with dag:
            task = QuboleOperator(task_id=TASK_ID,
                                  qubole_conn_id=TEST_CONN,
                                  command_type='shellcmd',
                                  parameters="param1 param2",
                                  dag=dag)

        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti.xcom_push('qbol_cmd_id', 12345)

        # check for positive case
        url = task.get_extra_links(DEFAULT_DATE, 'Go to QDS')
        self.assertEqual(url, 'http://localhost/v2/analyze?command_id=12345')

        # check for negative case
        url2 = task.get_extra_links(datetime(2017, 1, 2), 'Go to QDS')
        self.assertEqual(url2, '')
Beispiel #8
0
    def test_position_args_parameters(self):
        dag = DAG(DAG_ID, start_date=DEFAULT_DATE)

        with dag:
            task = QuboleOperator(
                task_id=TASK_ID, command_type='pigcmd', parameters="key1=value1 key2=value2", dag=dag
            )

        assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[1] == "key1=value1"
        assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[2] == "key2=value2"

        cmd = "s3distcp --src s3n://airflow/source_hadoopcmd --dest s3n://airflow/destination_hadoopcmd"
        task = QuboleOperator(task_id=TASK_ID + "_1", command_type='hadoopcmd', dag=dag, sub_command=cmd)

        assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[1] == "s3distcp"
        assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[2] == "--src"
        assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[3] == "s3n://airflow/source_hadoopcmd"
        assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[4] == "--dest"
        assert (
            task.get_hook().create_cmd_args({'run_id': 'dummy'})[5] == "s3n://airflow/destination_hadoopcmd"
        )
Beispiel #9
0
        :param ti: The TaskInstance object.
        :type ti: airflow.models.TaskInstance
        :return: True if the files are the same, False otherwise.
        :rtype: bool
        """
        qubole_result_1 = hive_show_table.get_results(ti)
        qubole_result_2 = hive_s3_location.get_results(ti)
        return filecmp.cmp(qubole_result_1, qubole_result_2)

    hive_show_table = QuboleOperator(
        task_id='hive_show_table',
        command_type='hivecmd',
        query='show tables',
        cluster_label='{{ params.cluster_label }}',
        fetch_logs=True,
        # If `fetch_logs`=true, will fetch qubole command logs and concatenate
        # them into corresponding airflow task logs
        tags='airflow_example_run',
        # To attach tags to qubole command, auto attach 3 tags - dag_id, task_id, run_id
        params={
            'cluster_label': 'default',
        },
    )

    hive_s3_location = QuboleOperator(
        task_id='hive_s3_location',
        command_type="hivecmd",
        script_location=
        "s3n://public-qubole/qbol-library/scripts/show_table.hql",
        notify=True,
        tags=['tag1', 'tag2'],
        # If the script at s3 location has any qubole specific macros to be replaced
Beispiel #10
0
 def test_init_with_default_connection(self):
     op = QuboleOperator(task_id=TASK_ID)
     self.assertEqual(op.task_id, TASK_ID)
     self.assertEqual(op.qubole_conn_id, DEFAULT_CONN)
Beispiel #11
0
 def test_parameter_pool_passed(self):
     test_pool = 'test_pool'
     op = QuboleOperator(task_id=TASK_ID, pool=test_pool)
     self.assertEqual(op.pool, test_pool)
Beispiel #12
0
 def test_init_with_default_connection(self):
     op = QuboleOperator(task_id=TASK_ID)
     assert op.task_id == TASK_ID
     assert op.qubole_conn_id == DEFAULT_CONN
Beispiel #13
0
 def test_parameter_include_header_missing(self, mock_get_results):
     dag = DAG(DAG_ID, start_date=DEFAULT_DATE)
     qubole_operator = QuboleOperator(task_id=TASK_ID, dag=dag, command_type='prestocmd')
     qubole_operator.get_results()
     mock_get_results.asset_called_with('include_headers', False)
Beispiel #14
0
 def test_parameter_pool_passed(self):
     test_pool = 'test_pool'
     op = QuboleOperator(task_id=TASK_ID, pool=test_pool)
     assert op.pool == test_pool