def result(self, context: Context) -> Tuple[RunnerStatus, dict]: if self.error_msg: context.set_data(f'failed_{self.task_id}', {'error': self.error_msg}) return RunnerStatus.FAILED, {} if not self.started: return RunnerStatus.RUNNING, {} resp = self.spark_service.get_sparkapp_info(self.sparkapp_name) logging.info(f'sparkapp resp: {resp.__dict__}') if not resp.state: return RunnerStatus.RUNNING, {} return self.__class__.SPARKAPP_STATE_TO_RUNNER_STATUS.get( resp.state, RunnerStatus.FAILED), resp.to_dict()
def test_cronjob_alone(self): cronjob = WorkflowCronJob(task_id=self.test_id) context = Context(data={}, internal={}, db_engine=db.engine) cronjob.start(context) status, output = cronjob.result(context) self.assertEqual(status, RunnerStatus.DONE) self.assertTrue(output['msg'] is not None)
def _check_items(self): with get_session(self.db_engine) as session: items = session.query(SchedulerItem).filter_by( status=ItemStatus.ON.value).all() for item in items: if not item.need_run(): continue # NOTE: use `func.now()` to let sqlalchemy handles # the timezone. item.last_run_at = func.now() if item.interval_time < 0: # finish run-once item automatically item.status = ItemStatus.OFF.value pp = Pipeline(**(json.loads(item.pipeline))) context = Context(data=pp.meta, internal={}, db_engine=self.db_engine) runner = SchedulerRunner( item_id=item.id, pipeline=item.pipeline, context=ContextEncoder().encode(context), ) session.add(runner) try: logging.info( f'[composer] insert runner, item_id: {item.id}') session.commit() except Exception as e: # pylint: disable=broad-except logging.error( f'[composer] failed to create scheduler_runner, ' f'item_id: {item.id}, exception: {e}') session.rollback()
def test_thread_reaper(self): tr = ThreadReaper(worker_num=1) runner = TaskRunner(1) tr.enqueue('1', runner, Context(data={}, internal={}, db_engine=db.engine)) self.assertEqual(True, tr.is_full(), 'should be full') ok = tr.enqueue('2', runner, Context(data={}, internal={}, db_engine=db.engine)) self.assertEqual(False, ok, 'should not be enqueued') time.sleep(10) self.assertEqual(False, tr.is_full(), 'should not be full') ok = tr.enqueue('3', runner, Context(data={}, internal={}, db_engine=db.engine)) self.assertEqual(True, ok, 'should be enqueued') tr.stop(wait=True)
def result(self, context: Context) -> Tuple[RunnerStatus, dict]: time.sleep(2) now = datetime.datetime.utcnow() timeout = random.randint(0, 10) # mock timeout if self._start_at is not None and self._start_at + datetime.timedelta( seconds=timeout) < now: # kill runner logging.info(f'[memory_runner] {self.task_id} is timeout, ' f'start at: {self._start_at}') return RunnerStatus.FAILED, {} # use `get_session` to query database with get_session(context.db_engine) as session: count = session.query(SchedulerRunner).count() # write data to context context.set_data(f'is_done_{self.task_id}', { 'status': 'OK', 'count': count }) return RunnerStatus.DONE, {}