def _run_task_instance(self, ti, mark_success, pool, session=None): # set proper state and try number to keep logger in sync if isinstance(ti, SimpleTaskInstance): from airflow.models import TaskInstance dag = self.dag_bag.get_dag(ti.dag_id) task = dag.get_task(ti.task_id) ti = TaskInstance(task, ti.execution_date) ti.state = State.RUNNING ti._try_number += 1 # Update hostname to allow runtime logging across all environments ti.hostname = get_hostname() # let save state session.merge(ti) session.commit() # backward compatible with airflow loggers from airflow.utils.log import logging_mixin logging_mixin.set_context(logging.root, ti) try: ti._run_raw_task(mark_success=mark_success, job_id=ti.job_id, pool=pool) finally: for handler in logging.root.handlers: if handler.name == "task": handler.close() self._sync_remote_logs(ti)
def test_mapred_job_name(self, mock_get_hook): mock_hook = mock.MagicMock() mock_get_hook.return_value = mock_hook op = MockHiveOperator(task_id='test_mapred_job_name', hql=self.hql, dag=self.dag) fake_execution_date = timezone.datetime(2018, 6, 19) fake_ti = TaskInstance(task=op, execution_date=fake_execution_date) fake_ti.hostname = 'fake_hostname' fake_context = {'ti': fake_ti} op.execute(fake_context) assert ("Airflow HiveOperator task for {}.{}.{}.{}".format( fake_ti.hostname, self.dag.dag_id, op.task_id, fake_execution_date.isoformat()) == mock_hook.mapred_job_name)
def test_heartbeat_failed_fast(self, mock_getpid): """ Test that task heartbeat will sleep when it fails fast """ mock_getpid.return_value = 1 heartbeat_records = [] def heartbeat_recorder(**kwargs): heartbeat_records.append(timezone.utcnow()) with create_session() as session: dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag_id = 'test_heartbeat_failed_fast' task_id = 'test_heartbeat_failed_fast_op' dag = dagbag.get_dag(dag_id) task = dag.get_task(task_id) dag.create_dagrun(run_id="test_heartbeat_failed_fast_run", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.state = State.RUNNING ti.hostname = get_hostname() ti.pid = 1 session.commit() job = LocalTaskJob(task_instance=ti, executor=MockExecutor(do_update=False)) job.heartrate = 2 job.heartbeat_callback = heartbeat_recorder job._execute() self.assertGreater(len(heartbeat_records), 1) for i in range(1, len(heartbeat_records)): time1 = heartbeat_records[i - 1] time2 = heartbeat_records[i] # Assert that difference small enough delta = (time2 - time1).total_seconds() self.assertAlmostEqual(delta, job.heartrate, delta=0.05)
def test_mapred_job_name(self, mock_get_hook): mock_hook = mock.MagicMock() mock_get_hook.return_value = mock_hook t = HiveOperator( task_id='test_mapred_job_name', hql=self.hql, dag=self.dag) fake_execution_date = timezone.datetime(2018, 6, 19) fake_ti = TaskInstance(task=t, execution_date=fake_execution_date) fake_ti.hostname = 'fake_hostname' fake_context = {'ti': fake_ti} t.execute(fake_context) self.assertEqual( "Airflow HiveOperator task for {}.{}.{}.{}" .format(fake_ti.hostname, self.dag.dag_id, t.task_id, fake_execution_date.isoformat()), mock_hook.mapred_job_name)
def test_heartbeat_failed_fast(self, mock_getpid): """ Test that task heartbeat will sleep when it fails fast """ mock_getpid.return_value = 1 self.mock_base_job_sleep.side_effect = time.sleep with create_session() as session: dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag_id = 'test_heartbeat_failed_fast' task_id = 'test_heartbeat_failed_fast_op' dag = dagbag.get_dag(dag_id) task = dag.get_task(task_id) dag.create_dagrun(run_id="test_heartbeat_failed_fast_run", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.state = State.RUNNING ti.hostname = get_hostname() ti.pid = 1 session.commit() job = LocalTaskJob(task_instance=ti, executor=MockExecutor(do_update=False)) job.heartrate = 2 heartbeat_records = [] job.heartbeat_callback = lambda session: heartbeat_records.append( job.latest_heartbeat) job._execute() self.assertGreater(len(heartbeat_records), 2) for i in range(1, len(heartbeat_records)): time1 = heartbeat_records[i - 1] time2 = heartbeat_records[i] self.assertGreaterEqual((time2 - time1).total_seconds(), job.heartrate)