Example #1
0
    def stop_operation(operation_id):
        # TODO: Review this implementation after DAINT maintenance
        operation = dao.get_operation_by_id(operation_id)
        if not operation or operation.has_finished:
            LOGGER.warning("Operation already stopped: %s" % operation_id)
            return True

        LOGGER.debug("Stopping HPC operation: %s" % str(operation_id))
        op_ident = OperationDAO().get_operation_process_for_operation(
            operation_id)
        if op_ident is not None:
            # TODO: Handle login
            transport = unicore_client.Transport(
                os.environ[HPCSchedulerClient.CSCS_LOGIN_TOKEN_ENV_KEY])
            # Abort HPC job
            job = Job(transport, op_ident.job_id)
            if job.is_running():
                job.abort()

        # Kill thread
        operation_thread = get_op_thread(operation_id)
        if operation_thread is None:
            LOGGER.warning("Thread for operation {} is not available".format(
                operation_id))
        else:
            operation_thread.stop()
            while not operation_thread.stopped():
                LOGGER.info(
                    "Thread for operation {} is stopping".format(operation_id))
        BurstService().persist_operation_state(operation, STATUS_CANCELED)
        return True
    def check_operations_job():
        operations = dao.get_operations()
        if operations is None or len(operations) == 0:
            return

        for operation in operations:
            HPCOperationService.LOGGER.info("Start processing operation {}".format(operation.id))
            try:
                op_ident = dao.get_operation_process_for_operation(operation.id)
                if op_ident is not None:
                    transport = Transport(os.environ[HPCSchedulerClient.CSCS_LOGIN_TOKEN_ENV_KEY])
                    job = Job(transport, op_ident.job_id)
                    job_status = job.properties['status']
                    if job.is_running():
                        if operation.status == STATUS_PENDING and job_status == HPCJobStatus.READY.value:
                            HPCOperationService._operation_started(operation)
                        HPCOperationService.LOGGER.info(
                            "CSCS job status: {} for operation {}.".format(job_status, operation.id))
                        return
                    HPCOperationService.LOGGER.info(
                        "Job for operation {} has status {}".format(operation.id, job_status))
                    if job_status == HPCJobStatus.SUCCESSFUL.value:
                        simulator_gid = operation.view_model_gid
                        HPCOperationService._operation_finished(operation, simulator_gid)
                    else:
                        HPCOperationService._operation_error(operation)
            except Exception:
                HPCOperationService.LOGGER.error(
                    "There was an error on background processing process for operation {}".format(operation.id),
                    exc_info=True)