def setUp(self): args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG('test_dag_id', default_args=args) self.next_day = (DEFAULT_DATE + timedelta(days=1)).isoformat()[:10] self.database = 'airflow' self.partition_by = 'ds' self.table = 'static_babynames_partitioned' self.hql = """ CREATE DATABASE IF NOT EXISTS {{ params.database }}; USE {{ params.database }}; DROP TABLE IF EXISTS {{ params.table }}; CREATE TABLE IF NOT EXISTS {{ params.table }} ( state string, year string, name string, gender string, num int) PARTITIONED BY ({{ params.partition_by }} string); ALTER TABLE {{ params.table }} ADD PARTITION({{ params.partition_by }}='{{ ds }}'); """ self.hook = MockHiveMetastoreHook() op = MockHiveOperator(task_id='HiveHook_' + str(random.randint(1, 10000)), params={ 'database': self.database, 'table': self.table, 'partition_by': self.partition_by }, hive_cli_conn_id='hive_cli_default', hql=self.hql, dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_hiveconf_jinja_translate(self): hql = "SELECT ${num_col} FROM ${hiveconf:table};" op = MockHiveOperator( hiveconf_jinja_translate=True, task_id='dry_run_basic_hql', hql=hql, dag=self.dag ) op.prepare_template() self.assertEqual(op.hql, "SELECT {{ num_col }} FROM {{ table }};")
def test_hiveconf(self): hql = "SELECT * FROM ${hiveconf:table} PARTITION (${hiveconf:day});" op = MockHiveOperator( hiveconfs={'table': 'static_babynames', 'day': '{{ ds }}'}, task_id='dry_run_basic_hql', hql=hql, dag=self.dag, ) op.prepare_template() self.assertEqual(op.hql, "SELECT * FROM ${hiveconf:table} PARTITION (${hiveconf:day});")
def test_hive_airflow_default_config_queue_override(self): specific_mapred_queue = 'default' op = MockHiveOperator( task_id='test_default_config_queue', hql=self.hql, mapred_queue=specific_mapred_queue, mapred_queue_priority='HIGH', mapred_job_name='airflow.test_default_config_queue', dag=self.dag) self.assertEqual(op.get_hook().mapred_queue, specific_mapred_queue)
def test_hive_airflow_default_config_queue(self): op = MockHiveOperator( task_id='test_default_config_queue', hql=self.hql, mapred_queue_priority='HIGH', mapred_job_name='airflow.test_default_config_queue', dag=self.dag, ) # just check that the correct default value in test_default.cfg is used test_config_hive_mapred_queue = conf.get('hive', 'default_hive_mapred_queue') self.assertEqual(op.get_hook().mapred_queue, test_config_hive_mapred_queue)
def test_mapred_job_name(self, mock_get_hook): mock_hook = mock.MagicMock() mock_get_hook.return_value = mock_hook op = MockHiveOperator(task_id='test_mapred_job_name', hql=self.hql, dag=self.dag) fake_execution_date = timezone.datetime(2018, 6, 19) fake_ti = TaskInstance(task=op, execution_date=fake_execution_date) fake_ti.hostname = 'fake_hostname' fake_context = {'ti': fake_ti} op.execute(fake_context) assert ("Airflow HiveOperator task for {}.{}.{}.{}".format( fake_ti.hostname, self.dag.dag_id, op.task_id, fake_execution_date.isoformat()) == mock_hook.mapred_job_name)