def setUp(self): logger.addHandler(YabiDBHandler()) create_workflow_with_job_and_a_task(self) self.wfl_logger = create_workflow_logger(logger, self.workflow.pk) self.job_logger = create_job_logger(logger, self.job.pk) self.task_logger = create_task_logger(logger, self.task.pk) self.other_wfl_logger = create_workflow_logger(logger, self.workflow.pk + 1) self.other_job_logger = create_job_logger(logger, self.job.pk + 1) self.other_task_logger = create_task_logger(logger, self.task.pk + 1)
def stage_out_files(self): """ Stage out files from fs backend to stageout area. """ task_logger = create_task_logger(logger, self.task.pk) # first we need a stage out directory task_logger.info("Stageout to %s", self.task.stageout) backend_for_stageout = FSBackend.urifactory(self.yabiusername, self.task.stageout) backend_for_stageout.mkdir(self.task.stageout) # now the stageout proper method = self.task.job.preferred_stageout_method src = self.working_output_dir_uri() dst = self.task.stageout if method == 'lcopy' and not is_same_location(src, dst): method = 'copy' task_logger.info("Stageout: %sing '%s' to '%s'", method, src, dst) if method == 'lcopy': return self.local_copy_recursive(src, dst) if method == 'copy': return FSBackend.remote_copy(self.yabiusername, src, dst) raise RuntimeError("Invalid stageout method %s for task %s" % (method, self.task.pk))
def stage_in(self, stagein): """Perform a single stage in.""" task_logger = create_task_logger(logger, stagein.task.pk) task_logger.info("Stagein: %sing '%s' to '%s'", stagein.method, stagein.src, stagein.dst) method = stagein.method if method == 'link' and not self.link_supported: method = 'lcopy' if method == 'lcopy' and not self.lcopy_supported: method = 'copy' if stagein.method != method: task_logger.info("Preferred stagein method '%s' not supported, changing it to '%s'", stagein.method, method) stagein.method = method stagein.save() if stagein.method == 'copy': if stagein.src.endswith('/'): FSBackend.remote_copy(self.yabiusername, stagein.src, stagein.dst) else: FSBackend.remote_file_copy(self.yabiusername, stagein.src, stagein.dst) if stagein.method == 'lcopy': if stagein.src.endswith('/'): self.local_copy_recursive(stagein.src, stagein.dst) else: self.local_copy(stagein.src, stagein.dst) if stagein.method == 'link': if stagein.src.endswith('/'): listing = self.ls(stagein.src).values()[0] for entry in listing['files'] + listing['directories']: name, _, _, _ = entry self.symbolic_link(url_join(stagein.src, name), url_join(stagein.dst, name)) else: self.symbolic_link(stagein.src, stagein.dst)
def change_task_status(task_id, status): task_logger = create_task_logger(logger, task_id) try: task_logger.debug("Setting status of task {0} to {1}".format( task_id, status)) task = Task.objects.get(pk=task_id) task.set_status(status) task.save() job_old_status = task.job.status job_status = task.job.update_status() job_status_changed = (job_old_status != job_status) if job_status_changed: old_status = task.job.workflow.status task.job.workflow.update_status() if task.job.is_finished: on_job_finished.apply_async((task.job.pk, )) new_status = task.job.workflow.status if old_status != new_status and new_status == STATUS_COMPLETE: on_workflow_completed.apply_async((task.job.workflow.pk, )) else: process_workflow_jobs_if_needed(task) except Exception: task_logger.exception( "Exception when updating task's {0} status to {1}".format( task_id, status)) raise
def mark_task_as_error(task_id, error_msg="Some error occured"): task_logger = create_task_logger(logger, task_id) task_logger.error("Task chain for Task {0} failed with '{1}'".format( task_id, error_msg)) remove_task_retrying_mark(task_id) change_task_status(task_id, STATUS_ERROR) set_task_error_message(task_id, error_msg)
def decorated_function(task_id, *args, **kwargs): task_logger = create_task_logger(logger, task_id) original_function_name = original_function.__name__ retry_celery_task = partial(retry_current_celery_task, original_function_name, task_id) try: result = original_function(task_id, *args, **kwargs) except RetryPollingException as exc: # constant for polling countdown = 30 retry_celery_task(exc, countdown, polling_task=True) except Exception as exc: task_logger.exception( "Exception in celery task {0} for task {1}".format( original_function_name, task_id)) mark_task_as_retrying(task_id) task = Task.objects.get(pk=task_id) countdown = backoff(task.retry_count) retry_celery_task(exc, countdown) if task_id is not None and is_task_retrying(task_id): task_logger.info('Task %s recovered from error' % task_id) remove_task_retrying_mark(task_id) return result
def change_task_status(task_id, status): task_logger = create_task_logger(logger, task_id) try: task_logger.debug("Setting status of task {0} to {1}".format(task_id, status)) task = Task.objects.get(pk=task_id) task.set_status(status) task.save() job_old_status = task.job.status job_status = task.job.update_status() job_status_changed = job_old_status != job_status if job_status_changed: old_status = task.job.workflow.status task.job.workflow.update_status() if task.job.is_finished: on_job_finished.apply_async((task.job.pk,)) new_status = task.job.workflow.status if old_status != new_status and new_status == STATUS_COMPLETE: on_workflow_completed.apply_async((task.job.workflow.pk,)) else: process_workflow_jobs_if_needed(task) except Exception: task_logger.exception("Exception when updating task's {0} status to {1}".format(task_id, status)) raise
def test_logging_should_work_for_every_yabi_logger(self): NAMES = ('yabi', 'yabi.backend', 'yabi.yabi.models') loggers = map(logging.getLogger, NAMES) task_loggers = map(lambda l: create_task_logger(l, TASK_ID), loggers) for task_logger in task_loggers: task_logger.debug(MSG) self.assertEquals(len(NAMES), Syslog.objects.filter(message=MSG).count(), "All loggers should log to Syslog")
def clean_up_task(self): # remove working directory working_dir_backend = FSBackend.urifactory(self.yabiusername, self.working_dir_uri()) # Don't fail just because we can't delete the working dir # We do log the error and proceed. try: working_dir_backend.rm(self.working_dir_uri()) except: task_logger = create_task_logger(logger, self.task.pk) task_logger.exception("Couldn't delete working dir '%s' of task %s.", self.working_dir_uri(), self.task.pk)
def stage_in_files(self): self.before_stage_in_files() task_logger = create_task_logger(logger, self.task.pk) stageins = self.task.get_stageins() task_logger.info("About to stagein %d stageins", len(stageins)) for stagein in stageins: backend = FSBackend.urifactory(self.yabiusername, stagein.src) backend.stage_in(stagein) if stagein.matches_filename(ENVVAR_FILENAME): self.save_envvars(self.task, stagein.src)
def test_logging_with_task_logger(self): logger = logging.getLogger('yabi.yabiengine') task_logger = create_task_logger(logger, TASK_ID) task_logger.debug(MSG) self.assertEquals(1, Syslog.objects.filter(message=MSG).count(), "Message should been syslogged") syslog = Syslog.objects.get(message=MSG) self.assertEquals('task', syslog.table_name) self.assertEquals(TASK_ID, syslog.table_id)
def spawn_task(task): task_logger = create_task_logger(logger, task.pk) task_logger.info("Starting spawn_task for task %s.", task.pk) if _tasks_per_user_reached_limit(task): return False task.set_status('requested') task.save() task_chain = chain(stage_in_files.s(task.pk), submit_task.s(), poll_task_status.s(), stage_out_files.s(), clean_up_task.s()) task_chain.apply_async() task_logger.info("Finished spawn_task for task %s.", task.pk) return True
def spawn_task(task): task_logger = create_task_logger(logger, task.pk) task_logger.info("Starting spawn_task for task %s.", task.pk) if _tasks_per_user_reached_limit(task): return False task.set_status("requested") task.save() task_chain = chain( stage_in_files.s(task.pk), submit_task.s(), poll_task_status.s(), stage_out_files.s(), clean_up_task.s() ) task_chain.apply_async() task_logger.info("Finished spawn_task for task %s.", task.pk) return True
def retry_current_celery_task(original_function_name, task_id, exc, countdown, polling_task=False): task_logger = create_task_logger(logger, task_id) task_logger.warning('{0}.retry {1} in {2} seconds'.format( original_function_name, task_id, countdown)) try: current_task = get_current_celery_task() current_task.retry(exc=exc, countdown=countdown) except celery.exceptions.RetryTaskError: # This is normal operation, Celery is signaling to the Worker # that this task should be retried by throwing an RetryTaskError # Just re-raise it try: if not polling_task: task = Task.objects.get(pk=task_id) task.retry_count = current_task.request.retries + 1 logger.debug("Retry is: %s", current_task.request.retries) task.save() except Exception: # Never fail on saving this logger.exception("Failed on saving retry_count for task %d", task_id) raise except Exception as ex: if ex is exc: # The same exception we passed to retry() has been re-raised # This means the max_retry limit has been exceeded task_logger.error( "{0}.retry {1} exceeded retry limit - changing status to error" .format(original_function_name, task_id)) else: # Some other Exception occured, log the details task_logger.exception( ("{0}.retry {1} failed - changing status to error".format( original_function_name, task_id))) mark_task_as_error(task_id, str(ex)) raise
def decorated_function(task_id, *args, **kwargs): task_logger = create_task_logger(logger, task_id) original_function_name = original_function.__name__ retry_celery_task = partial(retry_current_celery_task, original_function_name, task_id) try: result = original_function(task_id, *args, **kwargs) except RetryPollingException as exc: # constant for polling countdown = 30 retry_celery_task(exc, countdown, polling_task=True) except Exception as exc: task_logger.exception("Exception in celery task {0} for task {1}".format(original_function_name, task_id)) mark_task_as_retrying(task_id) task = Task.objects.get(pk=task_id) countdown = backoff(task.retry_count) retry_celery_task(exc, countdown) if task_id is not None and is_task_retrying(task_id): task_logger.info("Task %s recovered from error" % task_id) remove_task_retrying_mark(task_id) return result
def retry_current_celery_task(original_function_name, task_id, exc, countdown, polling_task=False): task_logger = create_task_logger(logger, task_id) task_logger.warning("{0}.retry {1} in {2} seconds".format(original_function_name, task_id, countdown)) try: current_task = get_current_celery_task() current_task.retry(exc=exc, countdown=countdown) except celery.exceptions.RetryTaskError: # This is normal operation, Celery is signaling to the Worker # that this task should be retried by throwing an RetryTaskError # Just re-raise it try: if not polling_task: task = Task.objects.get(pk=task_id) task.retry_count = current_task.request.retries + 1 logger.debug("Retry is: %s", current_task.request.retries) task.save() except Exception: # Never fail on saving this logger.exception("Failed on saving retry_count for task %d", task_id) raise except Exception as ex: if ex is exc: # The same exception we passed to retry() has been re-raised # This means the max_retry limit has been exceeded task_logger.error( "{0}.retry {1} exceeded retry limit - changing status to error".format(original_function_name, task_id) ) else: # Some other Exception occured, log the details task_logger.exception( ("{0}.retry {1} failed - changing status to error".format(original_function_name, task_id)) ) mark_task_as_error(task_id, str(ex)) raise
def mark_task_as_error(task_id, error_msg="Some error occured"): task_logger = create_task_logger(logger, task_id) task_logger.error("Task chain for Task {0} failed with '{1}'".format(task_id, error_msg)) remove_task_retrying_mark(task_id) change_task_status(task_id, STATUS_ERROR) set_task_error_message(task_id, error_msg)
def task(self, val): self._task = val self.executer.uri = self._task.job.exec_backend self.task_logger = create_task_logger(logger, self._task.pk)