def _handle_log_stream(self, stream): log_lines = [] last_emit_time = time.time() last_heart_beat = time.time() status = True try: for log_line in stream: new_log_lines, new_status = self._prepare_log_lines(log_line) log_lines += new_log_lines if not new_status: status = new_status publish_cond = (len(log_lines) == publisher.MESSAGES_COUNT or (log_lines and time.time() - last_emit_time > publisher.MESSAGES_TIMEOUT)) if publish_cond: self._handle_logs(log_lines) log_lines = [] last_emit_time = time.time() if time.time() - last_heart_beat > self.HEART_BEAT_INTERVAL: last_heart_beat = time.time() RedisHeartBeat.build_ping(build_id=self.build_job.id) if log_lines: self._handle_logs(log_lines) except (BuildError, APIError) as e: self._handle_logs( LogSpec(log_line='Build Error {}'.format(e), log_level=publisher.ERROR)) return False return status
def set_status(self, status, message=None, traceback=None, **kwargs): if status in ExperimentLifeCycle.HEARTBEAT_STATUS: RedisHeartBeat.experiment_ping(self.id) if ExperimentLifeCycle.can_transition(status_from=self.last_status, status_to=status): ExperimentStatus.objects.create(experiment=self, status=status, message=message, traceback=traceback)
def set_status(self, status, created_at=None, message=None, traceback=None, **kwargs): if status in ExperimentLifeCycle.HEARTBEAT_STATUS: RedisHeartBeat.experiment_ping(self.id) last_status = self.last_status_before(status_date=created_at) if ExperimentLifeCycle.can_transition(status_from=last_status, status_to=status): params = {'created_at': created_at} if created_at else {} ExperimentStatus.objects.create(experiment=self, status=status, message=message, traceback=traceback, **params)
def test_redis_heartbeat_raises_for_wrong_values(self): with self.assertRaises(ValueError): RedisHeartBeat(experiment=1, job=1) with self.assertRaises(ValueError): RedisHeartBeat(job=1, build=1) with self.assertRaises(ValueError): RedisHeartBeat(experiment=1, job=1, build=1) with self.assertRaises(ValueError): RedisHeartBeat()
def test_build_jobs_check_heartbeat(self): build1 = BuildJobFactory() BuildJobStatusFactory(job=build1, status=JobLifeCycle.RUNNING) RedisHeartBeat.build_ping(build_id=build1.id) build2 = BuildJobFactory() BuildJobStatusFactory(job=build2, status=JobLifeCycle.RUNNING) build_jobs_check_heartbeat(build1.id) build1.refresh_from_db() self.assertEqual(build1.last_status, JobLifeCycle.RUNNING) build_jobs_check_heartbeat(build2.id) build2.refresh_from_db() self.assertEqual(build2.last_status, JobLifeCycle.FAILED)
def test_jobs_check_heartbeat(self): job1 = JobFactory() JobStatusFactory(job=job1, status=JobLifeCycle.RUNNING) RedisHeartBeat.job_ping(job_id=job1.id) job2 = JobFactory() JobStatusFactory(job=job2, status=JobLifeCycle.RUNNING) jobs_check_heartbeat(job1.id) job1.refresh_from_db() self.assertEqual(job1.last_status, JobLifeCycle.RUNNING) jobs_check_heartbeat(job2.id) job2.refresh_from_db() self.assertEqual(job2.last_status, JobLifeCycle.FAILED)
def test_experiments_check_heartbeat(self): experiment1 = ExperimentFactory() ExperimentStatusFactory(experiment=experiment1, status=ExperimentLifeCycle.RUNNING) RedisHeartBeat.experiment_ping(experiment_id=experiment1.id) experiment2 = ExperimentFactory() ExperimentStatusFactory(experiment=experiment2, status=ExperimentLifeCycle.RUNNING) experiments_check_heartbeat(experiment1.id) experiment1.refresh_from_db() self.assertEqual(experiment1.last_status, ExperimentLifeCycle.RUNNING) experiments_check_heartbeat(experiment2.id) experiment2.refresh_from_db() self.assertEqual(experiment2.last_status, ExperimentLifeCycle.FAILED)
def jobs_check_heartbeat(job_id): if RedisHeartBeat.job_is_alive(job_id=job_id): return job = get_valid_job(job_id=job_id) if not job: return # Job is zombie status job.set_status(JobLifeCycle.FAILED, message='Job is in zombie state (no heartbeat was reported).')
def build_jobs_check_heartbeat(build_job_id): if RedisHeartBeat.build_is_alive(build_id=build_job_id): return build_job = get_valid_build_job(build_job_id=build_job_id) if not build_job: return # BuildJob is zombie status build_job.set_status(JobLifeCycle.FAILED, message='BuildJob is in zombie state (no heartbeat was reported).')
def experiments_check_heartbeat(experiment_id): if RedisHeartBeat.experiment_is_alive(experiment_id=experiment_id): return experiment = get_valid_experiment(experiment_id=experiment_id) if not experiment: return # Experiment is zombie status experiment.set_status(ExperimentLifeCycle.FAILED, message='Experiment is in zombie state (no heartbeat was reported).')
def _handle_log_stream(self, stream): log_lines = [] last_heart_beat = time.time() status = True try: for log_line in stream: new_log_lines, new_status = self._prepare_log_lines(log_line) log_lines += new_log_lines if not new_status: status = new_status self._handle_logs(log_lines) log_lines = [] if time.time() - last_heart_beat > self.HEART_BEAT_INTERVAL: last_heart_beat = time.time() RedisHeartBeat.build_ping(build_id=self.build_job.id) if log_lines: self._handle_logs(log_lines) except (BuildError, APIError) as e: self._handle_logs('{}: Could not build the image, ' 'encountered {}'.format(LogLevels.ERROR, e)) return False return status
def _ping_heartbeat(self) -> None: RedisHeartBeat.build_ping(self.id)
def post(self, request, *args, **kwargs): RedisHeartBeat.experiment_ping(experiment_id=self.experiment.id) return Response(status=status.HTTP_200_OK)
def test_post_internal_build_heartbeat(self): self.assertEqual(RedisHeartBeat.build_is_alive(self.build.id), False) resp = self.internal_client.post(self.url) assert resp.status_code == status.HTTP_200_OK self.assertEqual(RedisHeartBeat.build_is_alive(self.build.id), True)
def post(self, request, *args, **kwargs): RedisHeartBeat.job_ping(job_id=self.job.id) return Response(status=status.HTTP_200_OK)
def post(self, request, *args, **kwargs): build = self.get_object() RedisHeartBeat.build_ping(build_id=build.id) return Response(status=status.HTTP_200_OK)
def _ping_heartbeat(self) -> None: RedisHeartBeat.job_ping(self.id)
def test_post_job_heartbeat(self): self.assertEqual(RedisHeartBeat.job_is_alive(self.job.id), False) resp = self.auth_client.post(self.url) assert resp.status_code == status.HTTP_200_OK self.assertEqual(RedisHeartBeat.job_is_alive(self.job.id), True)
def test_redis_heartbeat_build(self): heartbeat = RedisHeartBeat(build=1) self.assertEqual(heartbeat.redis_key, RedisHeartBeat.KEY_BUILD.format(1)) self.assertEqual(heartbeat.is_alive(), False) self.assertEqual(RedisHeartBeat.build_is_alive(1), False) heartbeat.ping() self.assertEqual(heartbeat.is_alive(), True) self.assertEqual(RedisHeartBeat.build_is_alive(1), True) heartbeat.clear() self.assertEqual(heartbeat.is_alive(), False) self.assertEqual(RedisHeartBeat.build_is_alive(1), False) RedisHeartBeat.build_ping(1) self.assertEqual(heartbeat.is_alive(), True) self.assertEqual(RedisHeartBeat.build_is_alive(1), True)
def test_redis_heartbeat_job(self): heartbeat = RedisHeartBeat(job=1) self.assertEqual(heartbeat.redis_key, RedisHeartBeat.KEY_JOB.format(1)) self.assertEqual(heartbeat.is_alive(), False) self.assertEqual(RedisHeartBeat.job_is_alive(1), False) heartbeat.ping() self.assertEqual(heartbeat.is_alive(), True) self.assertEqual(RedisHeartBeat.job_is_alive(1), True) heartbeat.clear() self.assertEqual(heartbeat.is_alive(), False) self.assertEqual(RedisHeartBeat.job_is_alive(1), False) RedisHeartBeat.job_ping(1) self.assertEqual(heartbeat.is_alive(), True) self.assertEqual(RedisHeartBeat.job_is_alive(1), True)
def test_redis_heartbeat_experiment(self): heartbeat = RedisHeartBeat(experiment=1) self.assertEqual(heartbeat.redis_key, RedisHeartBeat.KEY_EXPERIMENT.format(1)) self.assertEqual(heartbeat.is_alive(), False) self.assertEqual(RedisHeartBeat.experiment_is_alive(1), False) heartbeat.ping() self.assertEqual(heartbeat.is_alive(), True) self.assertEqual(RedisHeartBeat.experiment_is_alive(1), True) heartbeat.clear() self.assertEqual(heartbeat.is_alive(), False) self.assertEqual(RedisHeartBeat.experiment_is_alive(1), False) RedisHeartBeat.experiment_ping(1) self.assertEqual(heartbeat.is_alive(), True) self.assertEqual(RedisHeartBeat.experiment_is_alive(1), True)