Beispiel #1
0
def main():

    debug = int(os.environ["GNT_DEBUG"])

    logname = pathutils.GetLogFilename("jobs")
    utils.SetupLogging(logname, "job-startup", debug=debug)

    (job_id, livelock_name) = _GetMasterInfo()

    utils.SetupLogging(logname, "job-%s" % (job_id, ), debug=debug)

    exit_code = 1
    try:
        logging.debug("Preparing the context and the configuration")
        context = masterd.GanetiContext(livelock_name)

        logging.debug("Registering a SIGTERM handler")

        cancel = [False]

        def _TermHandler(signum, _frame):
            logging.info("Killed by signal %d", signum)
            cancel[0] = True

        signal.signal(signal.SIGTERM, _TermHandler)

        logging.debug("Picking up job %d", job_id)
        context.jobqueue.PickupJob(job_id)

        # waiting for the job to finish
        time.sleep(1)
        while not context.jobqueue.HasJobBeenFinalized(job_id):
            if cancel[0]:
                logging.debug("Got cancel request, cancelling job %d", job_id)
                r = context.jobqueue.CancelJob(job_id)
                logging.debug("CancelJob result for job %d: %s", job_id, r)
                cancel[0] = False
            time.sleep(1)

        # wait until the queue finishes
        logging.debug("Waiting for the queue to finish")
        while context.jobqueue.PrepareShutdown():
            time.sleep(1)
        logging.debug("Shutting the queue down")
        context.jobqueue.Shutdown()
        exit_code = 0
    except Exception:  # pylint: disable=W0703
        logging.exception("Exception when trying to run job %d", job_id)
    finally:
        logging.debug("Job %d finalized", job_id)
        logging.debug("Removing livelock file %s", livelock_name.GetPath())
        os.remove(livelock_name.GetPath())

    sys.exit(exit_code)
Beispiel #2
0
def main():

    debug = int(os.environ["GNT_DEBUG"])

    logname = pathutils.GetLogFilename("jobs")
    utils.SetupLogging(logname, "job-post-hooks-startup", debug=debug)
    job_id = _GetMasterInfo()
    utils.SetupLogging(logname, "job-%s-post-hooks" % (job_id, ), debug=debug)

    try:
        job = JobQueue.SafeLoadJobFromDisk(None,
                                           job_id,
                                           try_archived=False,
                                           writable=False)
        assert job.id == job_id, "The job id received %d differs " % job_id + \
          "from the serialized one %d" % job.id

        target_op = None
        for op in job.ops:
            if op.start_timestamp is None:
                break
            target_op = op

        # We should run post hooks only if opcode execution has been started.
        # Note that currently the opcodes inside a job execute sequentially.
        if target_op is None:
            sys.exit(0)

        livelock_name = livelock.LiveLockName("post-hooks-executor-%d" %
                                              job_id)
        context = masterd.GanetiContext(livelock_name)
        cfg_tmp = context.GetConfig(job_id)
        # Get static snapshot of the config and release it in order to prevent
        # further synchronizations.
        cfg = cfg_tmp.GetDetachedConfig()
        cfg_tmp.OutDate()

        hooksmaster.ExecGlobalPostHooks(
            target_op.input.OP_ID, cfg.GetMasterNodeName(),
            context.GetRpc(cfg).call_hooks_runner, logging.warning,
            cfg.GetClusterName(), cfg.GetMasterNode(), job_id,
            constants.POST_HOOKS_STATUS_DISAPPEARED)
    except Exception:  # pylint: disable=W0703
        logging.exception("Exception when trying to run post hooks of job %d",
                          job_id)
    finally:
        logging.debug("Post hooks exec for disappeared job %d finalized",
                      job_id)
        logging.debug("Removing livelock file %s", livelock_name.GetPath())
        os.remove(livelock_name.GetPath())

    sys.exit(0)
Beispiel #3
0
def main():

  debug = int(os.environ["GNT_DEBUG"])

  logname = pathutils.GetLogFilename("jobs")
  utils.SetupLogging(logname, "job-startup", debug=debug)

  (job_id, llock, secret_params_serialized) = _SetupJob()

  secret_params = ""
  if secret_params_serialized:
    secret_params_json = serializer.LoadJson(secret_params_serialized)
    secret_params = RestorePrivateValueWrapping(secret_params_json)

  utils.SetupLogging(logname, "job-%s" % (job_id,), debug=debug)

  try:
    logging.debug("Preparing the context and the configuration")
    context = masterd.GanetiContext(llock)

    logging.debug("Registering signal handlers")

    cancel = [False]
    prio_change = [False]

    def _TermHandler(signum, _frame):
      logging.info("Killed by signal %d", signum)
      cancel[0] = True
    signal.signal(signal.SIGTERM, _TermHandler)

    def _HupHandler(signum, _frame):
      logging.debug("Received signal %d, old flag was %s, will set to True",
                    signum, mcpu.sighupReceived)
      mcpu.sighupReceived[0] = True
    signal.signal(signal.SIGHUP, _HupHandler)

    def _User1Handler(signum, _frame):
      logging.info("Received signal %d, indicating priority change", signum)
      prio_change[0] = True
    signal.signal(signal.SIGUSR1, _User1Handler)

    job = context.jobqueue.SafeLoadJobFromDisk(job_id, False)

    job.SetPid(os.getpid())

    if secret_params:
      for i in range(0, len(secret_params)):
        if hasattr(job.ops[i].input, "osparams_secret"):
          job.ops[i].input.osparams_secret = secret_params[i]

    execfun = mcpu.Processor(context, job_id, job_id).ExecOpCode
    proc = _JobProcessor(context.jobqueue, execfun, job)
    result = _JobProcessor.DEFER
    while result != _JobProcessor.FINISHED:
      result = proc()
      if result == _JobProcessor.WAITDEP and not cancel[0]:
        # Normally, the scheduler should avoid starting a job where the
        # dependencies are not yet finalised. So warn, but wait an continue.
        logging.warning("Got started despite a dependency not yet finished")
        time.sleep(5)
      if cancel[0]:
        logging.debug("Got cancel request, cancelling job %d", job_id)
        r = context.jobqueue.CancelJob(job_id)
        job = context.jobqueue.SafeLoadJobFromDisk(job_id, False)
        proc = _JobProcessor(context.jobqueue, execfun, job)
        logging.debug("CancelJob result for job %d: %s", job_id, r)
        cancel[0] = False
      if prio_change[0]:
        logging.debug("Received priority-change request")
        try:
          fname = os.path.join(pathutils.LUXID_MESSAGE_DIR, "%d.prio" % job_id)
          new_prio = int(utils.ReadFile(fname))
          utils.RemoveFile(fname)
          logging.debug("Changing priority of job %d to %d", job_id, new_prio)
          r = context.jobqueue.ChangeJobPriority(job_id, new_prio)
          job = context.jobqueue.SafeLoadJobFromDisk(job_id, False)
          proc = _JobProcessor(context.jobqueue, execfun, job)
          logging.debug("Result of changing priority of %d to %d: %s", job_id,
                        new_prio, r)
        except Exception: # pylint: disable=W0703
          logging.warning("Informed of priority change, but could not"
                          " read new priority")
        prio_change[0] = False

  except Exception: # pylint: disable=W0703
    logging.exception("Exception when trying to run job %d", job_id)
  finally:
    logging.debug("Job %d finalized", job_id)
    logging.debug("Removing livelock file %s", llock.GetPath())
    os.remove(llock.GetPath())

  sys.exit(0)
Beispiel #4
0
def main():

  debug = int(os.environ["GNT_DEBUG"])

  logname = pathutils.GetLogFilename("jobs")
  utils.SetupLogging(logname, "job-startup", debug=debug)

  (job_id, livelock_name) = _GetMasterInfo()

  utils.SetupLogging(logname, "job-%s" % (job_id,), debug=debug)

  exit_code = 1
  try:
    logging.debug("Preparing the context and the configuration")
    context = masterd.GanetiContext(livelock_name)

    logging.debug("Registering signal handlers")

    cancel = [False]
    prio_change = [False]

    def _TermHandler(signum, _frame):
      logging.info("Killed by signal %d", signum)
      cancel[0] = True
    signal.signal(signal.SIGTERM, _TermHandler)

    def _HupHandler(signum, _frame):
      logging.debug("Received signal %d, old flag was %s, will set to True",
                    signum, mcpu.sighupReceived)
      mcpu.sighupReceived[0] = True
    signal.signal(signal.SIGHUP, _HupHandler)

    def _User1Handler(signum, _frame):
      logging.info("Received signal %d, indicating priority change", signum)
      prio_change[0] = True
    signal.signal(signal.SIGUSR1, _User1Handler)

    logging.debug("Picking up job %d", job_id)
    context.jobqueue.PickupJob(job_id)

    # waiting for the job to finish
    time.sleep(1)
    while not context.jobqueue.HasJobBeenFinalized(job_id):
      if cancel[0]:
        logging.debug("Got cancel request, cancelling job %d", job_id)
        r = context.jobqueue.CancelJob(job_id)
        logging.debug("CancelJob result for job %d: %s", job_id, r)
        cancel[0] = False
      if prio_change[0]:
        logging.debug("Received priority-change request")
        try:
          fname = os.path.join(pathutils.LUXID_MESSAGE_DIR, "%d.prio" % job_id)
          new_prio = int(utils.ReadFile(fname))
          utils.RemoveFile(fname)
          logging.debug("Changing priority of job %d to %d", job_id, new_prio)
          r = context.jobqueue.ChangeJobPriority(job_id, new_prio)
          logging.debug("Result of changing priority of %d to %d: %s", job_id,
                        new_prio, r)
        except Exception: # pylint: disable=W0703
          logging.warning("Informed of priority change, but could not"
                          " read new priority")
        prio_change[0] = False
      time.sleep(1)

    # wait until the queue finishes
    logging.debug("Waiting for the queue to finish")
    while context.jobqueue.PrepareShutdown():
      time.sleep(1)
    logging.debug("Shutting the queue down")
    context.jobqueue.Shutdown()
    exit_code = 0
  except Exception: # pylint: disable=W0703
    logging.exception("Exception when trying to run job %d", job_id)
  finally:
    logging.debug("Job %d finalized", job_id)
    logging.debug("Removing livelock file %s", livelock_name.GetPath())
    os.remove(livelock_name.GetPath())

  sys.exit(exit_code)