def stop_operation(operation_id): # TODO: Review this implementation after DAINT maintenance operation = dao.get_operation_by_id(operation_id) if not operation or operation.has_finished: LOGGER.warning("Operation already stopped: %s" % operation_id) return True LOGGER.debug("Stopping HPC operation: %s" % str(operation_id)) op_ident = OperationDAO().get_operation_process_for_operation( operation_id) if op_ident is not None: # TODO: Handle login transport = unicore_client.Transport( os.environ[HPCSchedulerClient.CSCS_LOGIN_TOKEN_ENV_KEY]) # Abort HPC job job = Job(transport, op_ident.job_id) if job.is_running(): job.abort() # Kill thread operation_thread = get_op_thread(operation_id) if operation_thread is None: LOGGER.warning("Thread for operation {} is not available".format( operation_id)) else: operation_thread.stop() while not operation_thread.stopped(): LOGGER.info( "Thread for operation {} is stopping".format(operation_id)) BurstService().persist_operation_state(operation, STATUS_CANCELED) return True
def check_operations_job(): operations = dao.get_operations() if operations is None or len(operations) == 0: return for operation in operations: HPCOperationService.LOGGER.info("Start processing operation {}".format(operation.id)) try: op_ident = dao.get_operation_process_for_operation(operation.id) if op_ident is not None: transport = Transport(os.environ[HPCSchedulerClient.CSCS_LOGIN_TOKEN_ENV_KEY]) job = Job(transport, op_ident.job_id) job_status = job.properties['status'] if job.is_running(): if operation.status == STATUS_PENDING and job_status == HPCJobStatus.READY.value: HPCOperationService._operation_started(operation) HPCOperationService.LOGGER.info( "CSCS job status: {} for operation {}.".format(job_status, operation.id)) return HPCOperationService.LOGGER.info( "Job for operation {} has status {}".format(operation.id, job_status)) if job_status == HPCJobStatus.SUCCESSFUL.value: simulator_gid = operation.view_model_gid HPCOperationService._operation_finished(operation, simulator_gid) else: HPCOperationService._operation_error(operation) except Exception: HPCOperationService.LOGGER.error( "There was an error on background processing process for operation {}".format(operation.id), exc_info=True)