Ejemplo n.º 1
0
def main():

    debug = int(os.environ["GNT_DEBUG"])

    logname = pathutils.GetLogFilename("jobs")
    utils.SetupLogging(logname, "job-startup", debug=debug)

    (job_id, livelock_name) = _GetMasterInfo()

    utils.SetupLogging(logname, "job-%s" % (job_id, ), debug=debug)

    exit_code = 1
    try:
        logging.debug("Preparing the context and the configuration")
        context = masterd.GanetiContext(livelock_name)

        logging.debug("Registering a SIGTERM handler")

        cancel = [False]

        def _TermHandler(signum, _frame):
            logging.info("Killed by signal %d", signum)
            cancel[0] = True

        signal.signal(signal.SIGTERM, _TermHandler)

        logging.debug("Picking up job %d", job_id)
        context.jobqueue.PickupJob(job_id)

        # waiting for the job to finish
        time.sleep(1)
        while not context.jobqueue.HasJobBeenFinalized(job_id):
            if cancel[0]:
                logging.debug("Got cancel request, cancelling job %d", job_id)
                r = context.jobqueue.CancelJob(job_id)
                logging.debug("CancelJob result for job %d: %s", job_id, r)
                cancel[0] = False
            time.sleep(1)

        # wait until the queue finishes
        logging.debug("Waiting for the queue to finish")
        while context.jobqueue.PrepareShutdown():
            time.sleep(1)
        logging.debug("Shutting the queue down")
        context.jobqueue.Shutdown()
        exit_code = 0
    except Exception:  # pylint: disable=W0703
        logging.exception("Exception when trying to run job %d", job_id)
    finally:
        logging.debug("Job %d finalized", job_id)
        logging.debug("Removing livelock file %s", livelock_name.GetPath())
        os.remove(livelock_name.GetPath())

    sys.exit(exit_code)
Ejemplo n.º 2
0
def main():

    debug = int(os.environ["GNT_DEBUG"])

    logname = pathutils.GetLogFilename("jobs")
    utils.SetupLogging(logname, "job-post-hooks-startup", debug=debug)
    job_id = _GetMasterInfo()
    utils.SetupLogging(logname, "job-%s-post-hooks" % (job_id, ), debug=debug)

    try:
        job = JobQueue.SafeLoadJobFromDisk(None,
                                           job_id,
                                           try_archived=False,
                                           writable=False)
        assert job.id == job_id, "The job id received %d differs " % job_id + \
          "from the serialized one %d" % job.id

        target_op = None
        for op in job.ops:
            if op.start_timestamp is None:
                break
            target_op = op

        # We should run post hooks only if opcode execution has been started.
        # Note that currently the opcodes inside a job execute sequentially.
        if target_op is None:
            sys.exit(0)

        livelock_name = livelock.LiveLockName("post-hooks-executor-%d" %
                                              job_id)
        context = masterd.GanetiContext(livelock_name)
        cfg_tmp = context.GetConfig(job_id)
        # Get static snapshot of the config and release it in order to prevent
        # further synchronizations.
        cfg = cfg_tmp.GetDetachedConfig()
        cfg_tmp.OutDate()

        hooksmaster.ExecGlobalPostHooks(
            target_op.input.OP_ID, cfg.GetMasterNodeName(),
            context.GetRpc(cfg).call_hooks_runner, logging.warning,
            cfg.GetClusterName(), cfg.GetMasterNode(), job_id,
            constants.POST_HOOKS_STATUS_DISAPPEARED)
    except Exception:  # pylint: disable=W0703
        logging.exception("Exception when trying to run post hooks of job %d",
                          job_id)
    finally:
        logging.debug("Post hooks exec for disappeared job %d finalized",
                      job_id)
        logging.debug("Removing livelock file %s", livelock_name.GetPath())
        os.remove(livelock_name.GetPath())

    sys.exit(0)
Ejemplo n.º 3
0
from qa import qa_performance
from qa import qa_job
from qa import qa_rapi
from qa import qa_tags
from qa import qa_utils

from ganeti import utils
from ganeti import rapi  # pylint: disable=W0611
from ganeti import constants
from ganeti import netutils
from ganeti import pathutils

import ganeti.rapi.client  # pylint: disable=W0611
from ganeti.rapi.client import UsesRapiClient

_QA_PROFILE = pathutils.GetLogFilename("qa-profile")
_PROFILE_LOG_INDENT = ""


def _FormatHeader(line, end=72, mark="-", color=None):
    """Fill a line up to the end column.

  """
    line = (mark * 4) + " " + line + " "
    line += "-" * (end - len(line))
    line = line.rstrip()
    line = colors.colorize(line, color=color)
    return line


def _DescriptionOf(fn):
Ejemplo n.º 4
0
from ganeti import pathutils
from ganeti import vcluster

import colors
import qa_config
import qa_error

from qa_logging import FormatInfo

_MULTIPLEXERS = {}

#: Unique ID per QA run
_RUN_UUID = utils.NewUUID()

#: Path to the QA query output log file
_QA_OUTPUT = pathutils.GetLogFilename("qa-output")

(INST_DOWN, INST_UP) = range(500, 502)

(FIRST_ARG, RETURN_VALUE) = range(1000, 1002)


def _RaiseWithInfo(msg, error_desc):
    """Raises a QA error with the given content, and adds a message if present.

  """
    if msg:
        output = "%s: %s" % (msg, error_desc)
    else:
        output = error_desc
    raise qa_error.Error(output)
Ejemplo n.º 5
0
def main():

  debug = int(os.environ["GNT_DEBUG"])

  logname = pathutils.GetLogFilename("jobs")
  utils.SetupLogging(logname, "job-startup", debug=debug)

  (job_id, llock, secret_params_serialized) = _SetupJob()

  secret_params = ""
  if secret_params_serialized:
    secret_params_json = serializer.LoadJson(secret_params_serialized)
    secret_params = RestorePrivateValueWrapping(secret_params_json)

  utils.SetupLogging(logname, "job-%s" % (job_id,), debug=debug)

  try:
    logging.debug("Preparing the context and the configuration")
    context = masterd.GanetiContext(llock)

    logging.debug("Registering signal handlers")

    cancel = [False]
    prio_change = [False]

    def _TermHandler(signum, _frame):
      logging.info("Killed by signal %d", signum)
      cancel[0] = True
    signal.signal(signal.SIGTERM, _TermHandler)

    def _HupHandler(signum, _frame):
      logging.debug("Received signal %d, old flag was %s, will set to True",
                    signum, mcpu.sighupReceived)
      mcpu.sighupReceived[0] = True
    signal.signal(signal.SIGHUP, _HupHandler)

    def _User1Handler(signum, _frame):
      logging.info("Received signal %d, indicating priority change", signum)
      prio_change[0] = True
    signal.signal(signal.SIGUSR1, _User1Handler)

    job = context.jobqueue.SafeLoadJobFromDisk(job_id, False)

    job.SetPid(os.getpid())

    if secret_params:
      for i in range(0, len(secret_params)):
        if hasattr(job.ops[i].input, "osparams_secret"):
          job.ops[i].input.osparams_secret = secret_params[i]

    execfun = mcpu.Processor(context, job_id, job_id).ExecOpCode
    proc = _JobProcessor(context.jobqueue, execfun, job)
    result = _JobProcessor.DEFER
    while result != _JobProcessor.FINISHED:
      result = proc()
      if result == _JobProcessor.WAITDEP and not cancel[0]:
        # Normally, the scheduler should avoid starting a job where the
        # dependencies are not yet finalised. So warn, but wait an continue.
        logging.warning("Got started despite a dependency not yet finished")
        time.sleep(5)
      if cancel[0]:
        logging.debug("Got cancel request, cancelling job %d", job_id)
        r = context.jobqueue.CancelJob(job_id)
        job = context.jobqueue.SafeLoadJobFromDisk(job_id, False)
        proc = _JobProcessor(context.jobqueue, execfun, job)
        logging.debug("CancelJob result for job %d: %s", job_id, r)
        cancel[0] = False
      if prio_change[0]:
        logging.debug("Received priority-change request")
        try:
          fname = os.path.join(pathutils.LUXID_MESSAGE_DIR, "%d.prio" % job_id)
          new_prio = int(utils.ReadFile(fname))
          utils.RemoveFile(fname)
          logging.debug("Changing priority of job %d to %d", job_id, new_prio)
          r = context.jobqueue.ChangeJobPriority(job_id, new_prio)
          job = context.jobqueue.SafeLoadJobFromDisk(job_id, False)
          proc = _JobProcessor(context.jobqueue, execfun, job)
          logging.debug("Result of changing priority of %d to %d: %s", job_id,
                        new_prio, r)
        except Exception: # pylint: disable=W0703
          logging.warning("Informed of priority change, but could not"
                          " read new priority")
        prio_change[0] = False

  except Exception: # pylint: disable=W0703
    logging.exception("Exception when trying to run job %d", job_id)
  finally:
    logging.debug("Job %d finalized", job_id)
    logging.debug("Removing livelock file %s", llock.GetPath())
    os.remove(llock.GetPath())

  sys.exit(0)
Ejemplo n.º 6
0
def GetPaths():
    """Returns a tuple of path objects to process.

  """
    getent = runtime.GetEnts()
    masterd_log = constants.DAEMONS_LOGFILES[constants.MASTERD]
    noded_log = constants.DAEMONS_LOGFILES[constants.NODED]
    confd_log = constants.DAEMONS_LOGFILES[constants.CONFD]
    wconfd_log = constants.DAEMONS_LOGFILES[constants.WCONFD]
    luxid_log = constants.DAEMONS_LOGFILES[constants.LUXID]
    rapi_log = constants.DAEMONS_LOGFILES[constants.RAPI]
    mond_log = constants.DAEMONS_LOGFILES[constants.MOND]
    metad_log = constants.DAEMONS_LOGFILES[constants.METAD]

    mond_extra_log = constants.DAEMONS_EXTRA_LOGFILES[constants.MOND]
    metad_extra_log = constants.DAEMONS_EXTRA_LOGFILES[constants.METAD]

    jobs_log = pathutils.GetLogFilename("jobs")

    rapi_dir = os.path.join(pathutils.DATA_DIR, "rapi")
    cleaner_log_dir = os.path.join(pathutils.LOG_DIR, "cleaner")
    master_cleaner_log_dir = os.path.join(pathutils.LOG_DIR, "master-cleaner")

    # A note on the ordering: The parent directory (type C{DIR}) must always be
    # listed before files (type C{FILE}) in that directory. Once the directory is
    # set, only files directly in that directory can be listed.
    paths = [
        (pathutils.DATA_DIR, DIR, 0o755, getent.masterd_uid,
         getent.masterd_gid),
        (pathutils.CLUSTER_DOMAIN_SECRET_FILE, FILE, 0o640, getent.masterd_uid,
         getent.masterd_gid, False),
        (pathutils.CLUSTER_CONF_FILE, FILE, 0o640, getent.masterd_uid,
         getent.confd_gid, False),
        (pathutils.LOCK_STATUS_FILE, FILE, 0o640, getent.masterd_uid,
         getent.confd_gid, False),
        (pathutils.TEMP_RES_STATUS_FILE, FILE, 0o640, getent.masterd_uid,
         getent.confd_gid, False),
        (pathutils.CONFD_HMAC_KEY, FILE, 0o440, getent.confd_uid,
         getent.masterd_gid, False),
        (pathutils.SSH_KNOWN_HOSTS_FILE, FILE, 0o644, getent.masterd_uid,
         getent.masterd_gid, False),
        (pathutils.RAPI_CERT_FILE, FILE, 0o440, getent.rapi_uid,
         getent.masterd_gid, False),
        (pathutils.SPICE_CERT_FILE, FILE, 0o440, getent.noded_uid,
         getent.masterd_gid, False),
        (pathutils.SPICE_CACERT_FILE, FILE, 0o440, getent.noded_uid,
         getent.masterd_gid, False),
        (pathutils.NODED_CERT_FILE, FILE, pathutils.NODED_CERT_MODE,
         getent.masterd_uid, getent.masterd_gid, False),
        (pathutils.NODED_CLIENT_CERT_FILE, FILE, pathutils.NODED_CERT_MODE,
         getent.masterd_uid, getent.masterd_gid, False),
        (pathutils.WATCHER_PAUSEFILE, FILE, 0o644, getent.masterd_uid,
         getent.masterd_gid, False),
    ]

    ss = ssconf.SimpleStore()
    for ss_path in ss.GetFileList():
        paths.append((ss_path, FILE, constants.SS_FILE_PERMS, getent.noded_uid,
                      getent.noded_gid, False))

    paths.extend([
        (pathutils.QUEUE_DIR, DIR, 0o750, getent.masterd_uid,
         getent.daemons_gid),
        (pathutils.QUEUE_DIR, QUEUE_DIR, constants.JOB_QUEUE_FILES_PERMS,
         getent.masterd_uid, getent.daemons_gid),
        (pathutils.JOB_QUEUE_DRAIN_FILE, FILE, 0o644, getent.masterd_uid,
         getent.daemons_gid, False),
        (pathutils.JOB_QUEUE_LOCK_FILE, FILE, constants.JOB_QUEUE_FILES_PERMS,
         getent.masterd_uid, getent.daemons_gid, False),
        (pathutils.JOB_QUEUE_SERIAL_FILE, FILE,
         constants.JOB_QUEUE_FILES_PERMS, getent.masterd_uid,
         getent.daemons_gid, False),
        (pathutils.JOB_QUEUE_VERSION_FILE, FILE,
         constants.JOB_QUEUE_FILES_PERMS, getent.masterd_uid,
         getent.daemons_gid, False),
        (pathutils.JOB_QUEUE_ARCHIVE_DIR, DIR, 0o750, getent.masterd_uid,
         getent.daemons_gid),
        (rapi_dir, DIR, 0o750, getent.rapi_uid, getent.masterd_gid),
        (pathutils.RAPI_USERS_FILE, FILE, 0o640, getent.rapi_uid,
         getent.masterd_gid, False),
        (pathutils.RUN_DIR, DIR, 0o775, getent.masterd_uid,
         getent.daemons_gid),
        (pathutils.SOCKET_DIR, DIR, 0o770, getent.masterd_uid,
         getent.daemons_gid),
        (pathutils.MASTER_SOCKET, FILE, 0o660, getent.masterd_uid,
         getent.daemons_gid, False),
        (pathutils.QUERY_SOCKET, FILE, 0o660, getent.luxid_uid,
         getent.daemons_gid, False),
        (pathutils.BDEV_CACHE_DIR, DIR, 0o755, getent.noded_uid,
         getent.masterd_gid),
        (pathutils.UIDPOOL_LOCKDIR, DIR, 0o750, getent.noded_uid,
         getent.masterd_gid),
        (pathutils.DISK_LINKS_DIR, DIR, 0o755, getent.noded_uid,
         getent.masterd_gid),
        (pathutils.CRYPTO_KEYS_DIR, DIR, 0o700, getent.noded_uid,
         getent.masterd_gid),
        (pathutils.IMPORT_EXPORT_DIR, DIR, 0o755, getent.noded_uid,
         getent.masterd_gid),
        (pathutils.LOG_DIR, DIR, 0o770, getent.masterd_uid,
         getent.daemons_gid),
        (masterd_log, FILE, 0o600, getent.masterd_uid, getent.masterd_gid,
         False),
        (confd_log, FILE, 0o600, getent.confd_uid, getent.masterd_gid, False),
        (wconfd_log, FILE, 0o600, getent.wconfd_uid, getent.masterd_gid,
         False),
        (luxid_log, FILE, 0o600, getent.luxid_uid, getent.masterd_gid, False),
        (noded_log, FILE, 0o600, getent.noded_uid, getent.masterd_gid, False),
        (rapi_log, FILE, 0o600, getent.rapi_uid, getent.masterd_gid, False),
        (mond_log, FILE, 0o600, getent.mond_uid, getent.masterd_gid, False),
        (mond_extra_log["access"], FILE, 0o600, getent.mond_uid,
         getent.masterd_gid, False),
        (mond_extra_log["error"], FILE, 0o600, getent.mond_uid,
         getent.masterd_gid, False),
        (metad_log, FILE, 0o600, getent.metad_uid, getent.metad_gid, False),
        (metad_extra_log["access"], FILE, 0o600, getent.metad_uid,
         getent.metad_gid, False),
        (metad_extra_log["error"], FILE, 0o600, getent.metad_uid,
         getent.metad_gid, False),
        (jobs_log, FILE, 0o600, getent.luxid_uid, getent.luxid_gid, False),
        (pathutils.LOG_OS_DIR, DIR, 0o750, getent.noded_uid,
         getent.daemons_gid),
        (pathutils.LOG_XEN_DIR, DIR, 0o750, getent.noded_uid,
         getent.daemons_gid),
        (pathutils.LOG_KVM_DIR, DIR, 0o750, getent.noded_uid,
         getent.daemons_gid),
        (cleaner_log_dir, DIR, 0o750, getent.noded_uid, getent.noded_gid),
        (master_cleaner_log_dir, DIR, 0o750, getent.masterd_uid,
         getent.masterd_gid),
        (pathutils.INSTANCE_REASON_DIR, DIR, 0o755, getent.noded_uid,
         getent.noded_gid),
        (pathutils.LIVELOCK_DIR, DIR, 0o750, getent.masterd_uid,
         getent.daemons_gid),
        (pathutils.LUXID_MESSAGE_DIR, DIR, 0o750, getent.masterd_uid,
         getent.daemons_gid),
    ])

    return paths
Ejemplo n.º 7
0
def main():

  debug = int(os.environ["GNT_DEBUG"])

  logname = pathutils.GetLogFilename("jobs")
  utils.SetupLogging(logname, "job-startup", debug=debug)

  (job_id, livelock_name) = _GetMasterInfo()

  utils.SetupLogging(logname, "job-%s" % (job_id,), debug=debug)

  exit_code = 1
  try:
    logging.debug("Preparing the context and the configuration")
    context = masterd.GanetiContext(livelock_name)

    logging.debug("Registering signal handlers")

    cancel = [False]
    prio_change = [False]

    def _TermHandler(signum, _frame):
      logging.info("Killed by signal %d", signum)
      cancel[0] = True
    signal.signal(signal.SIGTERM, _TermHandler)

    def _HupHandler(signum, _frame):
      logging.debug("Received signal %d, old flag was %s, will set to True",
                    signum, mcpu.sighupReceived)
      mcpu.sighupReceived[0] = True
    signal.signal(signal.SIGHUP, _HupHandler)

    def _User1Handler(signum, _frame):
      logging.info("Received signal %d, indicating priority change", signum)
      prio_change[0] = True
    signal.signal(signal.SIGUSR1, _User1Handler)

    logging.debug("Picking up job %d", job_id)
    context.jobqueue.PickupJob(job_id)

    # waiting for the job to finish
    time.sleep(1)
    while not context.jobqueue.HasJobBeenFinalized(job_id):
      if cancel[0]:
        logging.debug("Got cancel request, cancelling job %d", job_id)
        r = context.jobqueue.CancelJob(job_id)
        logging.debug("CancelJob result for job %d: %s", job_id, r)
        cancel[0] = False
      if prio_change[0]:
        logging.debug("Received priority-change request")
        try:
          fname = os.path.join(pathutils.LUXID_MESSAGE_DIR, "%d.prio" % job_id)
          new_prio = int(utils.ReadFile(fname))
          utils.RemoveFile(fname)
          logging.debug("Changing priority of job %d to %d", job_id, new_prio)
          r = context.jobqueue.ChangeJobPriority(job_id, new_prio)
          logging.debug("Result of changing priority of %d to %d: %s", job_id,
                        new_prio, r)
        except Exception: # pylint: disable=W0703
          logging.warning("Informed of priority change, but could not"
                          " read new priority")
        prio_change[0] = False
      time.sleep(1)

    # wait until the queue finishes
    logging.debug("Waiting for the queue to finish")
    while context.jobqueue.PrepareShutdown():
      time.sleep(1)
    logging.debug("Shutting the queue down")
    context.jobqueue.Shutdown()
    exit_code = 0
  except Exception: # pylint: disable=W0703
    logging.exception("Exception when trying to run job %d", job_id)
  finally:
    logging.debug("Job %d finalized", job_id)
    logging.debug("Removing livelock file %s", livelock_name.GetPath())
    os.remove(livelock_name.GetPath())

  sys.exit(exit_code)
Ejemplo n.º 8
0
# to be re-exported but pylint complains because the imported names
# are not actually used in this module.

import re
import socket

from ganeti._constants import *
from ganeti._vcsversion import *
from ganeti import compat
from ganeti import pathutils

ALLOCATABLE_KEY = "allocatable"
FAILED_KEY = "failed"

DAEMONS_LOGFILES = \
    dict((daemon, pathutils.GetLogFilename(DAEMONS_LOGBASE[daemon]))
         for daemon in DAEMONS_LOGBASE)

DAEMONS_EXTRA_LOGFILES = \
  dict((daemon, dict((extra,
       pathutils.GetLogFilename(DAEMONS_EXTRA_LOGBASE[daemon][extra]))
       for extra in DAEMONS_EXTRA_LOGBASE[daemon]))
         for daemon in DAEMONS_EXTRA_LOGBASE)

# When the Xen toolstack used is "xl", live migration requires the source host
# to connect to the target host via ssh (xl runs this command). We need to pass
# the command xl runs some extra info so that it can use Ganeti's key
# verification and not fail. Note that this string is incomplete: it must be
# filled with the cluster name before being used.
XL_SSH_CMD = ("ssh -l %s -oGlobalKnownHostsFile=%s"
              " -oUserKnownHostsFile=/dev/null"