def waitForVmwareVmx():
    """!
    Monitor vmware-vmx every 5 seconds to see if it is running
    """
    vmx_conf_file_path = "%s/esxi.vmx" % Vsp.get_esxi_dir()

    proc_id = Vsp.get_vmx_proc_id(vmx_conf_file_path)

    while Vsp.is_process_running(proc_id):
        Logging.log(Logging.LOG_DEBUG, "vmware-vmx is still running")
        time.sleep(5)

    #We are here means the process has exited
    Logging.log(Logging.LOG_DEBUG, "vmware-vmx is not running")
def main():
    """!
    Entry point of the wrapper, intialize logger and signal handler. Starts
    vmware-vmx and starts monitoring it 
    """
    
    Logging.log_init('vmware_vmx_wrapper', 'vmware_vmx_wrapper', 0,
                     Logging.component_id(Logging.LCI_VSP), Logging.LOG_DEBUG,
                     Logging.LOG_LOCAL0, Logging.LCT_SYSLOG)
     
    Logging.log(Logging.LOG_INFO, 
                "vsp_vmware_vmx_wrapper started")
    
    signal.signal(signal.SIGINT, terminate_term_handler)
    signal.signal(signal.SIGTERM, terminate_term_handler)
    signal.signal(signal.SIGQUIT, terminate_quit_handler)
    signal.signal(signal.SIGUSR1, terminate_usr1_handler)
    
    #get the esxi dir
    esxi_dir = Vsp.get_esxi_dir()
    
    vmx_conf_file_path = "%s/%s" %(esxi_dir, ESXI_VMX_NAME)
      
    #Check if the vm configuration exists
    if not os.path.exists(vmx_conf_file_path): 
        Logging.log(Logging.LOG_ERR, 
                    "VM configuration %s doesn't exist" % vmx_conf_file_path)
        sys.exit(1)

    # Check if vmware-vmx is already running. The fuction returns None if no
    # process_id exists or if there's multiple process_ids associated with
    # the vmx_conf. However, it's not possible to launch two running vmware_vmx
    # using the same vmx_conf. The chance this returns None b/c of multiple
    # process ids is almost nonexistant. 
    proc_id = Vsp.get_vmx_proc_id(vmx_conf_file_path)

    #Start vmware-vmx if an instance has not been started
    if proc_id == None:

        # Clean up the "shutting down" file used for ESXi HPN dependency
        if os.path.exists(SHUTDOWN_MARKER):
            os.remove(SHUTDOWN_MARKER)
        cleanup_locks(esxi_dir)
        start_vmware_vmx(vmx_conf_file_path)

    #Get the new process id
    proc_id = Vsp.get_vmx_proc_id(vmx_conf_file_path)

    monitor_vmware_vmx(proc_id)
Esempio n. 3
0
def event(reason, version_info):
    """!
    State machine event loop. The three possible states are:
    1. initial 
    2. disconnected
    3. connected
                                               
    The graphical FSM may look like:           ___ 
                                              |   |(resend_event)
                                           ___|___|________
    ____________     init_connect_timeout  |              |
    |          | ------------------------->| disconnected |
    | initial  |-------------------------->|______________|
    |__________|   (wdt_trigger)            |          |
              |                             |          | (wdt_trigger)   
              |               (wdt_trigger) |          |
              |                            _|__________|__
              |       wdt_connect          |             |
              |--------------------------->| connected   |    
                                           |_____________|
                                                |   |
                                                |___|  (resend_event)
    """
    global g_curr_wdt_state

    mgmtd_pids = []

    old_wdt_state = g_curr_wdt_state
    new_wdt_state = None
    
    mgmtd_pids = Vsp.get_pids('mgmtd')
    if mgmtd_pids == None or g_mgmtd_pid not in mgmtd_pids:
        Logging.log(Logging.LOG_ERR, "Unexpected termination of mgmtd, kill watchdog!")
        sys.exit()

    # 
    # wish python had switch statement, that would make it more pretty.
    #
    # Based on the current state, call appropriate state handler function,
    # this function based on given input check if state needs to be changed,
    # if yes, then would take required action as well (send event).

    if g_curr_wdt_state in STATE_HANDLER_DICT.keys():
        new_wdt_state = \
            STATE_HANDLER_DICT[g_curr_wdt_state](reason, version_info)
    else:
        # unknown state detected, KILL ME !!!!
        error_str = "Unknown state: %s, reset to initial" % g_curr_wdt_state
        Logging.log(Logging.LOG_ERR, error_str)
        sys.exit()
    
    if old_wdt_state != new_wdt_state:
        set_state(new_wdt_state, reason, version_info)
    else:
        # No state change detected. Verify if we need to resend event.
        check_resend_event(reason, version_info)
def start_vmware_vmx(path):
    """!
    Start vmware-vmx with given vm
    """
    
    Logging.log(Logging.LOG_INFO, "Starting vm %s" % path)

    vsp_ramfs = RamFs.RamFs(vsp_ramfs_path)
    if vsp_ramfs.is_mounted():
        # we generally should not hit this path, we unmount the ramfs when 
        # we stop vmware-vmx
        Logging.log(Logging.LOG_INFO, 
                    "VSP ramfs is already mounted %s, unmounting" % \
                    vsp_ramfs_path)
        try:
            vsp_ramfs.unmount_ramfs()
        except RamFs.RamFsCmdException as e:
            # we'll proceed with starting vmx even if we can't unmount
            Logging.log(Logging.LOG_ERR, e.msg)

    if not vsp_ramfs.is_mounted():
        try:
            vsp_ramfs.mount_ramfs(vsp_ovhd_ramfs_min_size_mb)
        except (OSError, RamFs.RamFsCmdException) as e:
            Logging.log(Logging.LOG_ERR, str(e))
            Logging.log(Logging.LOG_ERR, 
                        "Unable to create ramfs %s" \
                        " not starting VMX" % vsp_ramfs_path)
            # skip starting VMX, the caller will look for vmx status
            return
            
    
    # Link in performance tweaks library
    env_dict = os.environ.copy()
    Mgmt.open()
    
    if Vsp.is_memlock_enabled():
        if env_dict.has_key("LD_PRELOAD"):
            env_dict["LD_PRELOAD"] = vmperf_path + " " + env_dict["LD_PRELOAD"]
        else:
            env_dict["LD_PRELOAD"] = vmperf_path

    # Check the ESXi debug option to see which binary we need to run
    vmx_option = get_debug_option()
    Mgmt.close()

    binary_path = option_to_path[vmx_option]
    Logging.log(Logging.LOG_DEBUG, "BINARY PATH: %s" % binary_path)

    pobj = subprocess.Popen([binary_path, "-qx", path], env = env_dict)
    pobj.wait()
def stop_vmware_vmx():
    """!
    Stop vmware-vmx.
    """
    global g_shutdown_requested
    #We just use vmrun stop to terminate vm right now but this
    #will change when we handle graceful shutdown

    path = "%s/%s" % (Vsp.get_esxi_dir(), ESXI_VMX_NAME)

    Logging.log(Logging.LOG_INFO, "Stopping vm %s" % path)
    pobj = subprocess.Popen([vmrun_path, "stop", "%s" % path])
    pobj.wait()

    g_shutdown_requested = True;
Esempio n. 6
0
def main():
    """!
    Entry point to the watchdog. Initialize logger and starts attempting to
    communicate with ESXi
    """
    global g_mgmtd_pid

    g_mgmtd_pid = None

    mgmtd_pids = []

    Logging.log_init('esxi_watchdog', 'esxi_watchdog', 0,
                     Logging.component_id(Logging.LCI_VSP), Logging.LOG_DEBUG,
                     Logging.LOG_LOCAL0, Logging.LCT_SYSLOG)

    Logging.log(Logging.LOG_INFO, "esxi watchdog started")

    # Bug 117274: It may happen that we get multiple pids for mgmtd process,
    # pidof ran between fork-exec call, retry to allow mgmtd to settle
    for i in range(1, MAX_MGMTD_SETTLE_RETRY):
        mgmtd_pids = Vsp.get_pids('mgmtd')
        if len(mgmtd_pids) > 1:
            # multiple pids detected, give mgmtd sometime to settle
            time.sleep(MGMTD_SETTLE_TIMEOUT)
        else:
            g_mgmtd_pid = mgmtd_pids[0]
            break

    # Bug 112192: monitor mgmtd pid, if mgmtd crashes/exits
    # terminate watchdog as well
    if g_mgmtd_pid == None:
        # mgmtd not up kill watchdog process
        Logging.log(Logging.LOG_ERR, "Mgmtd is not ready, kill watchdog!")
        sys.exit();

    Mgmt.open()
    signal.signal(signal.SIGINT, terminate_handler)
    signal.signal(signal.SIGTERM, terminate_handler)
    signal.signal(signal.SIGQUIT, terminate_handler)

    # Invalidate the session file if it exists on startup
    if os.path.exists(SESSION_FILE):
        os.remove(SESSION_FILE)

    monitor_esxi()
    Mgmt.close()
Esempio n. 7
0
def run_esxcli_command(command, use_session):
    """!
    Run esxcli command to determine connectivity
    """
    version="unknown"
    build="unknown"
    version_info = "unknown"
    curr_reason = None
    env_vars = None

    if use_session == False:
        env_vars = Vsp.make_esxcli_env_vars()
        if env_vars == None:
            curr_reason = "invalid ESXi password"
            return (curr_reason, version_info)

    pobj = subprocess.Popen(command, env=env_vars, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    ret_code = pobj.wait()
        
    if ret_code == 99:
        Logging.log(Logging.LOG_INFO, "Watchdog timed out connecting to ESXi")
        curr_reason = "disconnected"
    
    # run_for will return 99 if timed out. Otherwise, check stdout
    if pobj.stdout:
        error_message = pobj.stdout.readline()
        for (reason, message) in REASONS_DICT.iteritems():
            if error_message.find(message) != -1:
                curr_reason = reason
    # if we can sucessfully get the version
    # extract Version and Build information from stdout:
    # Product: VMware ESXi
    # Version: major-ver.minor-ver.maintenance-ver
    # Build: build#
    if not curr_reason:
        for line in pobj.stdout:
            if line.rstrip().find("Version") != -1:
                version_str = line.rstrip().split()
                version = version_str[1]
            if line.rstrip().find("Build") != -1:
                build_str = line.rstrip().split()
                build = build_str[1].replace('Releasebuild-', '')
        version_info = version + "." + build
    
    return (curr_reason, version_info)
def monitor_vmware_vmx(proc_id):
    """!
    Monitor vmware-vmx every 0.5 seconds to see
    if it is running
    """
    
    while Vsp.is_process_running(proc_id):
        Logging.log(Logging.LOG_DEBUG, "vmware-vmx is running")   
        time.sleep(vmx_poll_time)
    
    #We are here means the process has exited   
    Logging.log(Logging.LOG_DEBUG, "vmware-vmx is not running") 

    # whenever vmware-vmx stops we want to unmount the ramfs
    # also before unmounting we want to save off the vix logs
    # in the vmware-admin directory
    shutdown_vsp_ramfs()

    # Clean up the "shutting down" file used for ESXi HPN dependency
    if os.path.exists(SHUTDOWN_MARKER):
        os.remove(SHUTDOWN_MARKER)

    if not g_shutdown_requested:
        sys.exit(1)  
def terminate_term_handler(signum, frame):
    """!
    Signal handler for SIGTERM and SIGINT. Whenever one of the
    signals occur, we will attempt to make the vicfg-hostops call to gracefully
    power down the host. If there is an issue with the password, we will
    immediately power down the host with vmrun stop. If the issue is connection
    related, we will not do anything and will let PM retry again (if possible)
    when the next signal is sent.
    """

    do_forceful = False

    Logging.log(Logging.LOG_DEBUG, "Wrapper: got TERM signal")

    # We used to open and close the session in main(). However, if mgmtd
    # crashes and is restarted by PM, the session we created will not be
    # stale and any queries will fail miserably. To mitigate this issue,
    # we'll open and close the session as tightly as possible
    Mgmt.open()

    env_vars = Vsp.make_vicfg_env_vars()
    Logging.log(Logging.LOG_DEBUG, "Wrapper env: %s" % env_vars)

    Mgmt.close()

    # Send signal to active VM migration task so it can clean up
    stop_migrate_deploy()

    if env_vars == None or not os.path.exists(CONNECTED_MARKER):
        Logging.log(Logging.LOG_NOTICE,
                  "Cannot get password or currently disconnected from ESXi")
        do_forceful = True

    # Do one last check for connectivity just in case the watchdog
    # says we are connected when really we are not due to a change of
    # IP or password on the ESXi side (but our sessionfile is still valid)
    # XXX/rcenteno Enhance with the session file
    if not do_forceful and not check_connectivity(env_vars):
        Logging.log(Logging.LOG_NOTICE, "ESXi connectivity not found")
        do_forceful = True


    if do_forceful:
        Logging.log(Logging.LOG_NOTICE, "Performing forceful power off")
        # We cannot get the ESXi password or cannot connect to ESXi,
        # so we must forcibly power down
        stop_vmware_vmx();

    else:
        global g_shutdown_requested

        # update IQN cache value
        iqn = Vsp.get_iqn(env_vars, RUNFOR_TIMEOUT)

        if iqn:
            try:
                iqn_cache = open(iqn_cache_path, 'w')
                iqn_cache.write(iqn)
                iqn_cache.close()
            except Exception as e:
                Logging.log(Logging.LOG_ERR,
                    "Exception while updating IQN cache file")

        # Create the shutdown marker that the hpn will use to know it should
        # wait for ESXi shutdown first
        open(SHUTDOWN_MARKER, 'w').close()

        # Enable SSH on the host
        pobj = subprocess.Popen(ENABLE_SSH_COMMAND, env=env_vars,
                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        ret_code = pobj.wait()

        if ret_code == 0:
            # Save the state on the host
            pobj = subprocess.Popen(SAVE_STATE_COMMAND, env=env_vars,
                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            ret_code = pobj.wait()

            if ret_code == 0:
                Logging.log(Logging.LOG_INFO, "Saved state on the host")

            # Disable SSH on the host
            pobj = subprocess.Popen(DISABLE_SSH_COMMAND, env=env_vars,
                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            ret_code = pobj.wait()

        # Regardless of what happened earlier, send the host operations command
        Logging.log(Logging.LOG_INFO, "ESXi graceful shutdown in progress")
        pobj = subprocess.Popen(SHUTDOWN_COMMAND, env=env_vars,
                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        ret_code = pobj.wait()

        if ret_code == 0:
            g_shutdown_requested = True

        # the command timed out
        elif ret_code == 99:
            Logging.log(Logging.LOG_INFO,
                        "Timed out trying to send graceful shutdown request")