def Run( vars, log ):
    """
    Change this nodes run level at PLC.

    Replaces the behavior of UpdateBootStateWithPLC.  Where previously, the
    boot_state of a node would be altered by the BM, now the run_level is
    updated, and the boot_state is preserved as a record of a User's
    preference.

    The current value of the RUN_LEVEL key in vars is used.
    Optionally, notify the contacts of the run level change.
    If this is the case, the following keys/values
    should be set in vars before calling this step:
    STATE_CHANGE_NOTIFY= 1
    STATE_CHANGE_NOTIFY_MESSAGE= "<notify message>"
    The second value is a message to send the users from notify_messages.py

    Return 1 if succesfull, a BootManagerException otherwise.
    """

    log.write( "\n\nStep: Updating node run level at PLC.\n" )

    update_vals= {}
    # translate boot_state values to run_level value
    if vars['RUN_LEVEL'] in ['diag', 'diagnose', 'disabled', 'disable']:
        vars['RUN_LEVEL']='safeboot'
    update_vals['run_level']=vars['RUN_LEVEL']
    try:
        BootAPI.call_api_function( vars, "ReportRunlevel", (update_vals,) )
        log.write( "Successfully updated run level for this node at PLC\n" )
    except BootManagerException, e:
        log.write( "Unable to update run level for this node at PLC: %s.\n" % e )
def Run( vars, log ):
    """
    Change this nodes boot state at PLC.

    The only valid transition is from reinstall to boot.  All other changes to
    the boot state of a node should be performed by the Admin, Tech or PI
    through the API or Web interface.

    The current value of the BOOT_STATE key in vars is used.
    Optionally, notify the contacts of the boot state change.
    If this is the case, the following keys/values
    should be set in vars before calling this step:
    STATE_CHANGE_NOTIFY= 1
    STATE_CHANGE_NOTIFY_MESSAGE= "<notify message>"
    The second value is a message to send the users from notify_messages.py

    Return 1 if succesfull, a BootManagerException otherwise.
    """

    log.write( "\n\nStep: Updating node boot state at PLC.\n" )

    update_vals= {}
    update_vals['boot_state']= vars['BOOT_STATE']
    try:
        BootAPI.call_api_function( vars, "BootUpdateNode", (update_vals,) )
        log.write( "Successfully updated boot state for this node at PLC\n" )
    except BootManagerException, e:
        log.write( "Unable to update boot state for this node at PLC: %s.\n" % e )
def Run(vars, log):
    """
        Stop the RunlevelAgent.py script.  Should proceed
        kexec to reset run_level to 'boot' before kexec
    """

    log.write("\n\nStep: Stopping RunlevelAgent.py\n")

    try:
        cmd = "{}/RunlevelAgent.py".format(vars['BM_SOURCE_DIR'])
        # raise error if script is not present.
        os.stat(cmd)
        os.system("/usr/bin/python2 {} stop".format(cmd))
    except KeyError as var:
        raise BootManagerException(
            "Missing variable in vars: {}\n".format(var))
    except ValueError as var:
        raise BootManagerException(
            "Variable in vars, shouldn't be: {}\n".format(var))

    try:
        update_vals = {}
        update_vals['run_level'] = 'boot'
        BootAPI.call_api_function(vars, "ReportRunlevel", (update_vals, ))
    except BootManagerException as e:
        log.write(
            "Unable to update boot state for this node at PLC: {}.\n".format(
                e))

    return 1
def Run(vars, log):
    """
    Change this nodes run level at PLC.

    Replaces the behavior of UpdateBootStateWithPLC.  Where previously, the
    boot_state of a node would be altered by the BM, now the run_level is
    updated, and the boot_state is preserved as a record of a User's
    preference.

    The current value of the RUN_LEVEL key in vars is used.
    Optionally, notify the contacts of the run level change.
    If this is the case, the following keys/values
    should be set in vars before calling this step:
    STATE_CHANGE_NOTIFY = 1
    STATE_CHANGE_NOTIFY_MESSAGE = "<notify message>"
    The second value is a message to send the users from notify_messages.py

    Return 1 if succesfull, a BootManagerException otherwise.
    """

    log.write("\n\nStep: Updating node run level at PLC.\n")

    update_vals = {}
    # translate boot_state values to run_level value
    if vars['RUN_LEVEL'] in ['diag', 'diagnose', 'disabled', 'disable']:
        vars['RUN_LEVEL'] = 'safeboot'
    update_vals['run_level'] = vars['RUN_LEVEL']
    try:
        BootAPI.call_api_function(vars, "ReportRunlevel", (update_vals, ))
        log.write("Successfully updated run level for this node at PLC\n")
    except BootManagerException as e:
        log.write(
            "Unable to update run level for this node at PLC: {}.\n".format(e))

    notify = vars.get("STATE_CHANGE_NOTIFY", 0)

    if notify:
        message = vars['STATE_CHANGE_NOTIFY_MESSAGE']
        include_pis = 0
        include_techs = 1
        include_support = 0

        sent = 0
        try:
            sent = BootAPI.call_api_function(
                vars, "BootNotifyOwners",
                (message, include_pis, include_techs, include_support))
        except BootManagerException as e:
            log.write("Call to BootNotifyOwners failed: {}.\n".format(e))

        if sent == 0:
            log.write("Unable to notify site contacts of state change.\n")

    return 1
예제 #5
0
def Run(vars, log):
    """
    Change this nodes boot state at PLC.

    The only valid transition is from reinstall to boot.  All other changes to
    the boot state of a node should be performed by the Admin, Tech or PI
    through the API or Web interface.

    The current value of the BOOT_STATE key in vars is used.
    Optionally, notify the contacts of the boot state change.
    If this is the case, the following keys/values
    should be set in vars before calling this step:
    STATE_CHANGE_NOTIFY = 1
    STATE_CHANGE_NOTIFY_MESSAGE = "<notify message>"
    The second value is a message to send the users from notify_messages.py

    Return 1 if succesfull, a BootManagerException otherwise.
    """

    log.write("\n\nStep: Updating node boot state at PLC.\n")

    update_vals = {}
    update_vals['boot_state'] = vars['BOOT_STATE']
    try:
        BootAPI.call_api_function(vars, "BootUpdateNode", (update_vals, ))
        log.write("Successfully updated boot state for this node at PLC\n")
    except BootManagerException as e:
        log.write(
            "Unable to update boot state for this node at PLC: {}.\n".format(
                e))

    notify = vars.get("STATE_CHANGE_NOTIFY", 0)

    if notify:
        message = vars['STATE_CHANGE_NOTIFY_MESSAGE']
        include_pis = 0
        include_techs = 1
        include_support = 0

        sent = 0
        try:
            sent = BootAPI.call_api_function(
                vars, "BootNotifyOwners",
                (message, include_pis, include_techs, include_support))
        except BootManagerException as e:
            log.write("Call to BootNotifyOwners failed: {}.\n".format(e))

        if sent == 0:
            log.write("Unable to notify site contacts of state change.\n")

    return 1
예제 #6
0
def Run( vars, log ):
    """
        UpdateLastBootOnce will update the last_* values for the node only
        once per boot.  This helps calculate last_time_spent_online and
        last_time_spent_offline for collecting run-time metrics.
    """

    log.write( "\n\nStep: Updating node last boot times at PLC.\n" )

    update_vals= {}
    try:
        if not os.path.isfile("/tmp/UPDATE_LAST_BOOT_ONCE"):
            BootAPI.call_api_function( vars, "BootUpdateNode", (update_vals,) )
            log.write( "Successfully updated boot state for this node at PLC\n" )
            os.system("touch /tmp/UPDATE_LAST_BOOT_ONCE")
    except BootManagerException, e:
        log.write( "Unable to update last boot times for this node at PLC: %s.\n" % e )
예제 #7
0
def Run(vars, log):
    """
    UpdateLastBootOnce will update the last_* values for the node only
    once per boot.  This helps calculate last_time_spent_online and
    last_time_spent_offline for collecting run-time metrics.
    """

    log.write("\n\nStep: Updating node last boot times at PLC.\n")

    update_vals = {}
    try:
        if not os.path.isfile("/tmp/UPDATE_LAST_BOOT_ONCE"):
            BootAPI.call_api_function(vars, "BootUpdateNode", (update_vals,) )
            log.write("Successfully updated boot state for this node at PLC\n")
            os.system("touch /tmp/UPDATE_LAST_BOOT_ONCE")
    except BootManagerException as e:
        log.write("Unable to update last boot times for this node at PLC: {}.\n"
                  .format(e))
    return 1
예제 #8
0
def create_raid_partition(partitions, vars, log):
    """
    create raid array using specified partitions.  
    """
    raid_part = None
    raid_enabled = False
    node_tags = BootAPI.call_api_function(vars, "GetNodeTags",
                                          ({
                                              'node_id': vars['NODE_ID']
                                          }, ))
    for node_tag in node_tags:
        if node_tag['tagname'] == 'raid_enabled' and \
           node_tag['value'] == '1':
            raid_enabled = True
            break
    if not raid_enabled:
        return raid_part

    try:
        log.write("Software raid enabled.\n")
        # wipe everything
        utils.sysexec_noerr("mdadm --stop /dev/md0", log)
        time.sleep(1)
        for part_path in partitions:
            utils.sysexec_noerr(
                "mdadm --zero-superblock {} ".format(part_path), log)

        # assume each partiton is on a separate disk
        num_parts = len(partitions)
        if num_parts < 2:
            log.write(
                "Not enough disks for raid. Found: {}\n".format(partitions))
            raise BootManagerException(
                "Not enough disks for raid. Found: {}\n".format(partitions))
        if num_parts == 2:
            lvl = 1
        else:
            lvl = 5

        # make the array
        part_list = " ".join(partitions)
        raid_part = "/dev/md0"
        cmd = "mdadm --create {raid_part} --chunk=128 --level=raid{lvl} "\
              "--raid-devices={num_parts} {part_list}".format(**locals())
        utils.sysexec(cmd, log)

    except BootManagerException as e:
        log.write("create_raid_partition failed.\n")
        raid_part = None

    return raid_part
def create_raid_partition(partitions, vars, log):
    """
    create raid array using specified partitions.  
    """
    raid_part = None
    raid_enabled = False
    node_tags = BootAPI.call_api_function(vars, "GetNodeTags", ({"node_id": vars["NODE_ID"]},))
    for node_tag in node_tags:
        if node_tag["tagname"] == "raid_enabled" and node_tag["value"] == "1":
            raid_enabled = True
            break
    if not raid_enabled:
        return raid_part

    try:
        log.write("Software raid enabled.\n")
        # wipe everything
        utils.sysexec_noerr("mdadm --stop /dev/md0", log)
        time.sleep(1)
        for part_path in partitions:
            utils.sysexec_noerr("mdadm --zero-superblock %s " % part_path, log)

        # assume each partiton is on a separate disk
        num_parts = len(partitions)
        if num_parts < 2:
            log.write("Not enough disks for raid. Found: %s\n" % partitions)
            raise BootManagerException("Not enough disks for raid. Found: %s\n" % partitions)
        if num_parts == 2:
            lvl = 1
        else:
            lvl = 5

        # make the array
        part_list = " ".join(partitions)
        raid_part = "/dev/md0"
        cmd = (
            "mdadm --create %(raid_part)s --chunk=128 --level=raid%(lvl)s " % locals()
            + "--raid-devices=%(num_parts)s %(part_list)s" % locals()
        )
        utils.sysexec(cmd, log)

    except BootManagerException, e:
        log.write("create_raid_partition failed.\n")
        raid_part = None
def Run(vars, log):
    """
    Make sure the hardware we are running on is sufficient for
    the PlanetLab OS to be installed on. In the process, identify
    the list of block devices that may be used for a node installation,
    and identify the cdrom device that we booted off of.

    Return 1 if requiremenst met, 0 if requirements not met. Raise
    BootManagerException if any problems occur that prevent the requirements
    from being checked.

    Expect the following variables from the store:

    MINIMUM_MEMORY          minimum amount of memory in kb required
                            for install
    NODE_ID                 the node_id from the database for this node
    MINIMUM_DISK_SIZE       any disks smaller than this size, in GB, are not used
    TOTAL_MINIMUM_DISK_SIZE total disk size in GB, if all usable disks
                            meet this number, there isn't enough disk space for
                            this node to be usable after install
    SKIP_HARDWARE_REQUIREMENT_CHECK
                            If set, don't check if minimum requirements are met
    Sets the following variables:
    INSTALL_BLOCK_DEVICES    list of block devices to install onto
    """

    log.write("\n\nStep: Checking if hardware requirements met.\n")        
        
    try:
        MINIMUM_MEMORY = int(vars["MINIMUM_MEMORY"])
        if MINIMUM_MEMORY == "":
            raise ValueError("MINIMUM_MEMORY")

        NODE_ID = vars["NODE_ID"]
        if NODE_ID == "":
            raise ValueError("NODE_ID")

        MINIMUM_DISK_SIZE = int(vars["MINIMUM_DISK_SIZE"])

        # use vs_ or lxc_variants
        varname = vars['virt'] + "_TOTAL_MINIMUM_DISK_SIZE"
        TOTAL_MINIMUM_DISK_SIZE = int(vars[varname])

        SKIP_HARDWARE_REQUIREMENT_CHECK = int(vars["SKIP_HARDWARE_REQUIREMENT_CHECK"])
        
    except KeyError as var:
        raise BootManagerException("Missing variable in install store: {}".format(var))
    except ValueError as var:
        raise BootManagerException("Variable in install store blank, shouldn't be: {}".format(var))

    # lets see if we have enough memory to run
    log.write("Checking for available memory.\n")

    total_mem = systeminfo.get_total_phsyical_mem(vars, log)
    if total_mem is None:
        raise BootManagerException("Unable to read total physical memory")
        
    if total_mem < MINIMUM_MEMORY:
        if not SKIP_HARDWARE_REQUIREMENT_CHECK:
            log.write("Insufficient memory to run node: {} kb\n".format(total_mem))
            log.write("Required memory: {} kb\n".format(MINIMUM_MEMORY))

            include_pis = 0
            include_techs = 1
            include_support = 0
            
            sent = 0
            try:
                sent = BootAPI.call_api_function(vars, "BootNotifyOwners",
                                                 (notify_messages.MSG_INSUFFICIENT_MEMORY,
                                                  include_pis,
                                                  include_techs,
                                                  include_support))
            except BootManagerException as e:
                log.write("Call to BootNotifyOwners failed: {}.\n".format(e))
                
            if sent == 0:
                log.write("Unable to notify site contacts of problem.\n")
            else:
                log.write("Notified contacts of problem.\n")
                
            return 0
        else:
            log.write("Memory requirements not met, but running anyway: {} kb\n"
                      .format(total_mem))
    else:
        log.write("Looks like we have enough memory: {} kb\n".format(total_mem))



    # get a list of block devices to attempt to install on
    # (may include cdrom devices)
    install_devices = systeminfo.get_block_devices_dict(vars, log)

    # save the list of block devices in the log
    log.write("Detected block devices:\n")
    log.write(repr(install_devices) + "\n")

    if not install_devices or len(install_devices) == 0:
        log.write("No block devices detected.\n")
        
        include_pis = 0
        include_techs = 1
        include_support = 0
        
        sent = 0
        try:
            sent = BootAPI.call_api_function(vars, "BootNotifyOwners",
                                             (notify_messages.MSG_INSUFFICIENT_DISK,
                                              include_pis,
                                              include_techs,
                                              include_support))
        except BootManagerException as e:
            log.write("Call to BootNotifyOwners failed: {}.\n".format(e))
            
        if sent == 0:
            log.write("Unable to notify site contacts of problem.\n")

        return 0

    # now, lets remove any block devices we know won't work (readonly,cdroms),
    # or could be other writable removable disks (usb keychains, zip disks, etc)
    # i'm not aware of anything that helps with the latter test, so,
    # what we'll probably do is simply not use any block device below
    # some size threshold (set in installstore)

    # also, keep track of the total size for all devices that appear usable
    total_size = 0

    # do not modify subject of current loop
    ignored_devices = []
    for device, details in install_devices.items():

        major, minor, blocks, gb_size, readonly = details
        
        # if the device string starts with
        # planetlab or dm- (device mapper), ignore it (could be old lvm setup)
        if device[:14] == "/dev/planetlab" or device[:8] == "/dev/dm-":
            ignored_devices.append(device)
            continue

        if gb_size < MINIMUM_DISK_SIZE:
            log.write("Device is too small to use: {} \n"
                      "(appears to be {:4.2f} Gb)\n".format(device, gb_size))
            ignored_devices.append(device)
            continue

        if readonly:
            log.write("Device is readonly, not using: {}\n".format(device))
            ignored_devices.append(device)
            continue
            
        # add this sector count to the total count of usable
        # sectors we've found.
        total_size = total_size + gb_size

    # delayed erasure
    for device in ignored_devices:
        try:
            del install_devices[device]
        except KeyError as e:
            pass

    if len(install_devices) == 0:
        log.write("No suitable block devices found for install.\n")

        include_pis = 0
        include_techs = 1
        include_support = 0
        
        sent = 0
        try:
            sent = BootAPI.call_api_function(vars, "BootNotifyOwners",
                                             (notify_messages.MSG_INSUFFICIENT_DISK,
                                              include_pis,
                                              include_techs,
                                              include_support))
        except BootManagerException as e:
            log.write("Call to BootNotifyOwners failed: {}.\n".format(e))
            
        if sent == 0:
            log.write("Unable to notify site contacts of problem.\n")

        return 0


    # show the devices we found that are usable
    log.write("Usable block devices:\n")
    log.write(repr(install_devices.keys()) + "\n")

    # save the list of devices for the following steps
    vars["INSTALL_BLOCK_DEVICES"] = install_devices.keys()


    # ensure the total disk size is large enough. if
    # not, we need to email the tech contacts the problem, and
    # put the node into debug mode.
    if total_size < TOTAL_MINIMUM_DISK_SIZE:
        if not SKIP_HARDWARE_REQUIREMENT_CHECK:
            log.write("The total usable disk size of all disks is " \
                       "insufficient to be usable as a PlanetLab node.\n")
            include_pis = 0
            include_techs = 1
            include_support = 0
            
            sent = 0
            try:
                sent = BootAPI.call_api_function(vars, "BootNotifyOwners",
                                                 (notify_messages.MSG_INSUFFICIENT_DISK,
                                                  include_pis,
                                                  include_techs,
                                                  include_support))
            except BootManagerException as e:
                log.write("Call to BootNotifyOwners failed: {}.\n".format(e))
            
            if sent == 0:
                log.write("Unable to notify site contacts of problem.\n")

            return 0
        
        else:
            log.write("The total usable disk size of all disks is " \
                       "insufficient, but running anyway.\n")
            
    log.write("Total size for all usable block devices: {:4.2f} Gb\n".format(total_size))

    return 1
        raise BootManagerException, "Unable to read total physical memory"
        
    if total_mem < MINIMUM_MEMORY:
        if not SKIP_HARDWARE_REQUIREMENT_CHECK:
            log.write( "Insufficient memory to run node: %s kb\n" % total_mem )
            log.write( "Required memory: %s kb\n" % MINIMUM_MEMORY )

            include_pis= 0
            include_techs= 1
            include_support= 0
            
            sent= 0
            try:
                sent= BootAPI.call_api_function( vars, "BootNotifyOwners",
                                         (notify_messages.MSG_INSUFFICIENT_MEMORY,
                                          include_pis,
                                          include_techs,
                                          include_support) )
            except BootManagerException, e:
                log.write( "Call to BootNotifyOwners failed: %s.\n" % e )
                
            if sent == 0:
                log.write( "Unable to notify site contacts of problem.\n" )
            else:
                log.write( "Notified contacts of problem.\n" )
                
            return 0
        else:
            log.write( "Memory requirements not met, but running anyway: %s kb\n"
                       % total_mem )
    else:
예제 #12
0
    # the following step should be done by NM
    UpdateNodeConfiguration.Run( vars, log )

    log.write( "Updating ssh public host key with PLC.\n" )
    ssh_host_key= ""
    try:
        ssh_host_key_file= file("%s/etc/ssh/ssh_host_rsa_key.pub"%SYSIMG_PATH,"r")
        ssh_host_key= ssh_host_key_file.read().strip()
        ssh_host_key_file.close()
        ssh_host_key_file= None
    except IOError, e:
        pass

    update_vals= {}
    update_vals['ssh_rsa_key']= ssh_host_key
    BootAPI.call_api_function( vars, "BootUpdateNode", (update_vals,) )


    # get the kernel version
    option = ''
    if NODE_MODEL_OPTIONS & ModelOptions.SMP:
        option = 'smp'

    log.write( "Copying kernel and initrd for booting.\n" )
    utils.sysexec( "cp %s/boot/kernel-boot%s /tmp/kernel" % (SYSIMG_PATH,option), log )
    utils.sysexec( "cp %s/boot/initrd-boot%s /tmp/initrd" % (SYSIMG_PATH,option), log )

    BootAPI.save(vars)

    log.write( "Unmounting disks.\n" )
    utils.sysexec( "umount %s/vservers" % SYSIMG_PATH, log )
예제 #13
0
    # make sure the sysimg dir is present
    utils.makedirs(SYSIMG_PATH)

    log.write("mounting root file system\n")
    utils.sysexec("mount -t ext3 %s %s" % (PARTITIONS["root"], SYSIMG_PATH), log)

    log.write("mounting vserver partition in root file system\n")
    utils.makedirs(SYSIMG_PATH + "/vservers")
    utils.sysexec("mount -t ext3 %s %s/vservers" % (PARTITIONS["vservers"], SYSIMG_PATH), log)

    vars["ROOT_MOUNTED"] = 1

    # call getNodeFlavour
    try:
        node_flavour = BootAPI.call_api_function(vars, "GetNodeFlavour", (NODE_ID,))
        nodefamily = node_flavour["nodefamily"]
        extensions = node_flavour["extensions"]
        plain = node_flavour["plain"]
    except:
        raise BootManagerException("Could not call GetNodeFlavour - need PLCAPI-5.0")

    # the 'plain' option is for tests mostly
    if plain:
        download_suffix = ".tar"
        uncompress_option = ""
        log.write("Using plain bootstrapfs images\n")
    else:
        download_suffix = ".tar.bz2"
        uncompress_option = "-j"
        log.write("Using compressed bootstrapfs images\n")
예제 #14
0
        vars['RUN_LEVEL']='safeboot'
    update_vals['run_level']=vars['RUN_LEVEL']
    try:
        BootAPI.call_api_function( vars, "ReportRunlevel", (update_vals,) )
        log.write( "Successfully updated run level for this node at PLC\n" )
    except BootManagerException, e:
        log.write( "Unable to update run level for this node at PLC: %s.\n" % e )

    notify = vars.get("STATE_CHANGE_NOTIFY",0)

    if notify:
        message= vars['STATE_CHANGE_NOTIFY_MESSAGE']
        include_pis= 0
        include_techs= 1
        include_support= 0

        sent= 0
        try:
            sent= BootAPI.call_api_function( vars, "BootNotifyOwners",
                                             (message,
                                              include_pis,
                                              include_techs,
                                              include_support) )
        except BootManagerException, e:
            log.write( "Call to BootNotifyOwners failed: %s.\n" % e )

        if sent == 0:
            log.write( "Unable to notify site contacts of state change.\n" )

    return 1
def Run(vars, log):
    """
    Authenticate this node with PLC. This ensures that the node can operate
    as normal, and that our management authority has authorized it.

    For this, just call the PLC api function BootCheckAuthentication

    Return 1 if authorized, a BootManagerException if not or the
    call fails entirely.

    If there are two consecutive authentication failures, put the node
    into debug mode and exit the bootmanager.

    Expect the following variables from the store:
    NUM_AUTH_FAILURES_BEFORE_DEBUG    How many failures before debug
    """

    log.write("\n\nStep: Authenticating node with PLC.\n")

    # make sure we have the variables we need
    try:
        NUM_AUTH_FAILURES_BEFORE_DEBUG = int(
            vars["NUM_AUTH_FAILURES_BEFORE_DEBUG"])
    except KeyError as var:
        raise BootManagerException(
            "Missing variable in vars: {}\n".format(var))
    except ValueError as var:
        raise BootManagerException(
            "Variable in vars, shouldn't be: {}\n".format(var))

    try:
        authorized = BootAPI.call_api_function(vars, "BootCheckAuthentication",
                                               ())
        if authorized == 1:
            log.write("Authentication successful.\n")

            try:
                os.unlink(AUTH_FAILURE_COUNT_FILE)
            except OSError as e:
                pass

            return 1
    except BootManagerException as e:
        log.write("Authentication failed: {}.\n".format(e))
    except:
        # This is ugly.
        if vars['DISCONNECTED_OPERATION']:
            vars['API_SERVER_INST'] = None
            return 1
        else:
            raise

    # increment auth failure
    auth_failure_count = 0
    try:
        auth_failure_count = int(
            file(AUTH_FAILURE_COUNT_FILE, "r").read().strip())
    except IOError:
        pass
    except ValueError:
        pass

    auth_failure_count += 1

    try:
        fail_file = file(AUTH_FAILURE_COUNT_FILE, "w")
        fail_file.write(str(auth_failure_count))
        fail_file.close()
    except IOError:
        pass

    if auth_failure_count >= NUM_AUTH_FAILURES_BEFORE_DEBUG:
        log.write("Maximum number of authentication failures reached.\n")
        log.write("Canceling boot process and going into debug mode.\n")

    raise BootManagerException("Unable to authenticate node.")
def Run(vars, log):
    """

    Contact PLC and get the attributes for this node. Also, parse in
    options from the node model strong.

    Also, update any node network settings at PLC, minus the ip address,
    so, upload the mac (if node_id was in conf file), gateway, network,
    broadcast, netmask, dns1/2, and the hostname/domainname.

    Expect the following keys to be set:
    SKIP_HARDWARE_REQUIREMENT_CHECK     Whether or not we should skip hardware
                                        requirement checks
                                        
    The following keys are set/updated:
    WAS_NODE_ID_IN_CONF                 Set to 1 if the node id was in the conf file
    WAS_NODE_KEY_IN_CONF                Set to 1 if the node key was in the conf file
    BOOT_STATE                          The current node boot state
    NODE_MODEL                          The user specified model of this node
    NODE_MODEL_OPTIONS                  The options extracted from the user specified
                                                model of this node 
    SKIP_HARDWARE_REQUIREMENT_CHECK     Whether or not we should skip hardware
                                                requirement checks
    NODE_SESSION                        The session value returned from BootGetNodeDetails
    INTERFACES                          The network interfaces associated with this node
    INTERFACE_SETTINGS                  A dictionary of the values of the interface settings
    
    Return 1 if able to contact PLC and get node info.
    Raise a BootManagerException if anything fails.
    """

    log.write("\n\nStep: Retrieving details of node from PLC.\n")

    # make sure we have the variables we need
    try:
        SKIP_HARDWARE_REQUIREMENT_CHECK = vars[
            "SKIP_HARDWARE_REQUIREMENT_CHECK"]
        if SKIP_HARDWARE_REQUIREMENT_CHECK == "":
            raise ValueError("SKIP_HARDWARE_REQUIREMENT_CHECK")

        INTERFACE_SETTINGS = vars["INTERFACE_SETTINGS"]
        if INTERFACE_SETTINGS == "":
            raise ValueError("INTERFACE_SETTINGS")

        WAS_NODE_ID_IN_CONF = vars["WAS_NODE_ID_IN_CONF"]
        if WAS_NODE_ID_IN_CONF == "":
            raise ValueError("WAS_NODE_ID_IN_CONF")

        WAS_NODE_KEY_IN_CONF = vars["WAS_NODE_KEY_IN_CONF"]
        if WAS_NODE_KEY_IN_CONF == "":
            raise ValueError("WAS_NODE_KEY_IN_CONF")

    except KeyError as var:
        raise BootManagerException(
            "Missing variable in vars: {}\n".format(var))
    except ValueError as var:
        raise BootManagerException(
            "Variable in vars, shouldn't be: {}\n".format(var))

    node_details = BootAPI.call_api_function(
        vars, "GetNodes", (vars['NODE_ID'], [
            'boot_state', 'nodegroup_ids', 'interface_ids', 'model', 'site_id'
        ]))[0]

    vars['BOOT_STATE'] = node_details['boot_state']
    vars['RUN_LEVEL'] = node_details['boot_state']
    vars['NODE_MODEL'] = string.strip(node_details['model'])
    vars['SITE_ID'] = node_details['site_id']
    log.write("Successfully retrieved node record.\n")
    log.write("Current boot state: {}\n".format(vars['BOOT_STATE']))
    log.write("Node make/model: {}\n".format(vars['NODE_MODEL']))

    # parse in the model options from the node_model string
    model = vars['NODE_MODEL']
    options = ModelOptions.Get(model)
    vars['NODE_MODEL_OPTIONS'] = options

    # Check if we should skip hardware requirement check
    if options & ModelOptions.MINHW:
        vars['SKIP_HARDWARE_REQUIREMENT_CHECK'] = 1
        log.write("node model indicates override to hardware requirements.\n")

    # this contains all the node networks, for now, we are only concerned
    # in the primary network
    interfaces = BootAPI.call_api_function(vars, "GetInterfaces",
                                           (node_details['interface_ids'], ))
    got_primary = 0
    for network in interfaces:
        if network['is_primary'] == 1:
            log.write(
                "Primary network as returned from PLC: {}\n".format(network))
            got_primary = 1
            break

    if not got_primary:
        raise BootManagerException("Node did not have a primary network.")

    vars['INTERFACES'] = interfaces

    # call getNodeFlavour and store in VARS['node_flavour']
    try:
        node_flavour = BootAPI.call_api_function(vars, "GetNodeFlavour",
                                                 (vars['NODE_ID'], ))
    except:
        log.write(
            "GetNodeFlavour failed, not fatal if the node flavour is available in ``configuration''\n"
        )
        pass

    flavour_keys = [
        'virt',  # 'vs' or 'lxc'
        'nodefamily',  # the basename for downloading nodeimage
        'extensions',  # extensions to be applied on top of the base nodeimage
        'plain'  # false if compressed image, true if not
    ]

    # MyPLC 5.0 workaround
    # make sure to define 'extensions' even if not yet set
    if 'extensions' not in vars or vars['extensions'] == '':
        vars['extensions'] = []

    for k in flavour_keys:
        # Support MyPLC<5.2
        if k not in vars:
            try:
                vars[k] = node_flavour[k]
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                lines = traceback.format_exception(exc_type, exc_value,
                                                   exc_traceback)
                for line in lines:
                    log.write(line)
                raise BootManagerException(
                    "Could not call GetNodeFlavour - need PLCAPI-5.2")

    log.write("NodeFlavour as returned from PLC: {}\n".format(node_flavour))

    return 1
예제 #17
0
    Expect the following variables from the store:
    NUM_AUTH_FAILURES_BEFORE_DEBUG    How many failures before debug
    """

    log.write("\n\nStep: Authenticating node with PLC.\n")

    # make sure we have the variables we need
    try:
        NUM_AUTH_FAILURES_BEFORE_DEBUG = int(vars["NUM_AUTH_FAILURES_BEFORE_DEBUG"])
    except KeyError, var:
        raise BootManagerException, "Missing variable in vars: %s\n" % var
    except ValueError, var:
        raise BootManagerException, "Variable in vars, shouldn't be: %s\n" % var

    try:
        authorized = BootAPI.call_api_function(vars, "BootCheckAuthentication", ())
        if authorized == 1:
            log.write("Authentication successful.\n")

            try:
                os.unlink(AUTH_FAILURE_COUNT_FILE)
            except OSError, e:
                pass

            return 1
    except BootManagerException, e:
        log.write("Authentication failed: %s.\n" % e)
    except:
        # This is ugly.
        if vars["DISCONNECTED_OPERATION"]:
            vars["API_SERVER_INST"] = None
예제 #18
0
 def call(self, func, *args):
     return BootAPI.call_api_function(self.vars, func, args)
예제 #19
0
 def call(self, func, *args):
     return BootAPI.call_api_function(self.vars, func, args)
예제 #20
0
def Run(vars, log):
    """
    Load the kernel off of a node and boot to it.
    This step assumes the disks are mounted on SYSIMG_PATH.
    If successful, this function will not return. If it returns, no chain
    booting has occurred.
    
    Expect the following variables:
    SYSIMG_PATH           the path where the system image will be mounted
                          (always starts with TEMP_PATH)
    ROOT_MOUNTED          the node root file system is mounted
    NODE_SESSION             the unique session val set when we requested
                             the current boot state
    PLCONF_DIR               The directory to store PL configuration files in
    
    Sets the following variables:
    ROOT_MOUNTED          the node root file system is mounted
    """

    log.write("\n\nStep: Chain booting node.\n")

    # make sure we have the variables we need
    try:
        SYSIMG_PATH = vars["SYSIMG_PATH"]
        if SYSIMG_PATH == "":
            raise ValueError("SYSIMG_PATH")

        PLCONF_DIR = vars["PLCONF_DIR"]
        if PLCONF_DIR == "":
            raise ValueError("PLCONF_DIR")

        # its ok if this is blank
        NODE_SESSION = vars["NODE_SESSION"]

        NODE_MODEL_OPTIONS = vars["NODE_MODEL_OPTIONS"]

        PARTITIONS = vars["PARTITIONS"]
        if PARTITIONS == None:
            raise ValueError("PARTITIONS")

    except KeyError as var:
        raise BootManagerException(
            "Missing variable in vars: {}\n".format(var))
    except ValueError as var:
        raise BootManagerException(
            "Variable in vars, shouldn't be: {}\n".format(var))

    ROOT_MOUNTED = 0
    if vars.has_key('ROOT_MOUNTED'):
        ROOT_MOUNTED = vars['ROOT_MOUNTED']

    if ROOT_MOUNTED == 0:
        log.write("Mounting node partitions\n")

        # simply creating an instance of this class and listing the system
        # block devices will make them show up so vgscan can find the planetlab
        # volume group
        systeminfo.get_block_devices_dict(vars, log)

        utils.sysexec("vgscan", log)
        utils.sysexec("vgchange -ay planetlab", log)

        utils.makedirs(SYSIMG_PATH)

        cmd = "mount {} {}".format(PARTITIONS["root"], SYSIMG_PATH)
        utils.sysexec(cmd, log)
        cmd = "mount -t proc none {}/proc".format(SYSIMG_PATH)
        utils.sysexec(cmd, log)
        cmd = "mount {} {}/vservers".format(PARTITIONS["vservers"],
                                            SYSIMG_PATH)
        utils.sysexec(cmd, log)

        ROOT_MOUNTED = 1
        vars['ROOT_MOUNTED'] = 1

    utils.display_disks_status(PARTITIONS, "In ChainBootNode", log)

    # write out the session value /etc/planetlab/session
    try:
        session_file_path = "{}/{}/session".format(SYSIMG_PATH, PLCONF_DIR)
        session_file = file(session_file_path, "w")
        session_file.write(str(NODE_SESSION))
        session_file.close()
        session_file = None
        log.write("Updated /etc/planetlab/session\n")
    except IOError as e:
        log.write(
            "Unable to write out /etc/planetlab/session, continuing anyway\n")

    # update configuration files
    log.write("Updating configuration files.\n")
    # avoid using conf_files initscript as we're moving to systemd on some platforms

    if (vars['ONE_PARTITION'] != '1'):
        try:
            cmd = "/usr/bin/env python /usr/share/NodeManager/conf_files.py --noscripts"
            utils.sysexec_chroot(SYSIMG_PATH, cmd, log)
        except IOError as e:
            log.write("conf_files failed with \n {}".format(e))

        # update node packages
        log.write("Running node update.\n")
        if os.path.exists(SYSIMG_PATH + "/usr/bin/NodeUpdate.py"):
            cmd = "/usr/bin/NodeUpdate.py start noreboot"
        else:
            # for backwards compatibility
            cmd = "/usr/local/planetlab/bin/NodeUpdate.py start noreboot"
        utils.sysexec_chroot(SYSIMG_PATH, cmd, log)

    # Re-generate initrd right before kexec call
    # this is not required anymore on recent depls.
    if vars['virt'] == 'vs':
        MakeInitrd.Run(vars, log)

    # the following step should be done by NM
    UpdateNodeConfiguration.Run(vars, log)

    log.write("Updating ssh public host key with PLC.\n")
    ssh_host_key = ""
    try:
        ssh_host_key_file = file(
            "{}/etc/ssh/ssh_host_rsa_key.pub".format(SYSIMG_PATH), "r")
        ssh_host_key = ssh_host_key_file.read().strip()
        ssh_host_key_file.close()
        ssh_host_key_file = None
    except IOError as e:
        pass

    update_vals = {}
    update_vals['ssh_rsa_key'] = ssh_host_key
    BootAPI.call_api_function(vars, "BootUpdateNode", (update_vals, ))

    # get the kernel version
    option = ''
    if NODE_MODEL_OPTIONS & ModelOptions.SMP:
        option = 'smp'

    log.write("Copying kernel and initrd for booting.\n")
    if vars['virt'] == 'vs':
        utils.sysexec(
            "cp {}/boot/kernel-boot{} /tmp/kernel".format(SYSIMG_PATH, option),
            log)
        utils.sysexec(
            "cp {}/boot/initrd-boot{} /tmp/initrd".format(SYSIMG_PATH, option),
            log)
    else:
        # Use chroot to call rpm, b/c the bootimage&nodeimage rpm-versions may not work together
        try:
            kversion = os.popen("chroot {} rpm -qa kernel | tail -1 | cut -c 8-"\
                                .format(SYSIMG_PATH)).read().rstrip()
            major_version = int(
                kversion[0])  # Check if the string looks like a kernel version
        except:
            # Try a different method for non-rpm-based distributions
            kversion = os.popen("ls -lrt {}/lib/modules | tail -1 | awk '{print $9;}'"\
                                .format(SYSIMG_PATH)).read().rstrip()

        utils.sysexec(
            "cp {}/boot/vmlinuz-{} /tmp/kernel".format(SYSIMG_PATH, kversion),
            log)
        candidates = []
        # f16/18: expect initramfs image here
        candidates.append("/boot/initramfs-{}.img".format(kversion))
        # f20: uses a uid of some kind, e.g. /boot/543f88c129de443baaa65800cf3927ce/<kversion>/initrd
        candidates.append("/boot/*/{}/initrd".format(kversion))
        # Ubuntu:
        candidates.append("/boot/initrd.img-{}".format(kversion))

        def find_file_in_sysimg(candidates):
            import glob
            for pattern in candidates:
                matches = glob.glob(SYSIMG_PATH + pattern)
                log.write("locating initrd: found {} matches in {}\n".format(
                    len(matches), pattern))
                if matches:
                    return matches[0]

        initrd = find_file_in_sysimg(candidates)
        if initrd:
            utils.sysexec("cp {} /tmp/initrd".format(initrd), log)
        else:
            raise Exception("Unable to locate initrd - bailing out")

    BootAPI.save(vars)

    log.write("Unmounting disks.\n")

    if (vars['ONE_PARTITION'] != '1'):
        utils.sysexec("umount {}/vservers".format(SYSIMG_PATH), log)
    utils.sysexec("umount {}/proc".format(SYSIMG_PATH), log)
    utils.sysexec_noerr("umount {}/dev".format(SYSIMG_PATH), log)
    utils.sysexec_noerr("umount {}/sys".format(SYSIMG_PATH), log)
    utils.sysexec("umount {}".format(SYSIMG_PATH), log)
    utils.sysexec("vgchange -an", log)

    ROOT_MOUNTED = 0
    vars['ROOT_MOUNTED'] = 0

    # Change runlevel to 'boot' prior to kexec.
    StopRunlevelAgent.Run(vars, log)

    log.write("Unloading modules and chain booting to new kernel.\n")

    # further use of log after Upload will only output to screen
    log.Upload("/root/.bash_eternal_history")

    # regardless of whether kexec works or not, we need to stop trying to
    # run anything
    cancel_boot_flag = "/tmp/CANCEL_BOOT"
    utils.sysexec("touch {}".format(cancel_boot_flag), log)

    # on 2.x cds (2.4 kernel) for sure, we need to shutdown everything
    # to get kexec to work correctly. Even on 3.x cds (2.6 kernel),
    # there are a few buggy drivers that don't disable their hardware
    # correctly unless they are first unloaded.

    utils.sysexec_noerr("ifconfig eth0 down", log)

    utils.sysexec_noerr("killall dhclient", log)

    if vars['virt'] == 'vs':
        utils.sysexec_noerr("umount -a -r -t ext2,ext3", log)
    else:
        utils.sysexec_noerr("umount -a -r -t ext2,ext3,btrfs", log)
    utils.sysexec_noerr("modprobe -r lvm-mod", log)

    # modules that should not get unloaded
    # unloading cpqphp causes a kernel panic
    blacklist = ["floppy", "cpqphp", "i82875p_edac", "mptspi"]
    try:
        modules = file("/tmp/loadedmodules", "r")

        for line in modules:
            module = string.strip(line)
            if module in blacklist:
                log.write(
                    "Skipping unload of kernel module '{}'.\n".format(module))
            elif module != "":
                log.write("Unloading {}\n".format(module))
                utils.sysexec_noerr("modprobe -r {}".format(module), log)
                if "e1000" in module:
                    log.write(
                        "Unloading e1000 driver; sleeping 4 seconds...\n")
                    time.sleep(4)

        modules.close()
    except IOError:
        log.write("Couldn't read /tmp/loadedmodules, continuing.\n")

    try:
        modules = file("/proc/modules", "r")

        # Get usage count for USB
        usb_usage = 0
        for line in modules:
            try:
                # Module Size UsageCount UsedBy State LoadAddress
                parts = string.split(line)

                if parts[0] == "usb_storage":
                    usb_usage += int(parts[2])
            except IndexError as e:
                log.write("Couldn't parse /proc/modules, continuing.\n")

        modules.seek(0)

        for line in modules:
            try:
                # Module Size UsageCount UsedBy State LoadAddress
                parts = string.split(line)

                # While we would like to remove all "unused" modules,
                # you can't trust usage count, especially for things
                # like network drivers or RAID array drivers. Just try
                # and unload a few specific modules that we know cause
                # problems during chain boot, such as USB host
                # controller drivers (HCDs) (PL6577).
                # if int(parts[2]) == 0:
                if False and re.search('_hcd$', parts[0]):
                    if usb_usage > 0:
                        log.write("NOT unloading {} since USB may be in use\n".
                                  format(parts[0]))
                    else:
                        log.write("Unloading {}\n".format(parts[0]))
                        utils.sysexec_noerr("modprobe -r {}".format(parts[0]),
                                            log)
            except IndexError as e:
                log.write("Couldn't parse /proc/modules, continuing.\n")
    except IOError:
        log.write("Couldn't read /proc/modules, continuing.\n")

    kargs = "root={} ramdisk_size=8192".format(PARTITIONS["root"])
    if NODE_MODEL_OPTIONS & ModelOptions.SMP:
        kargs = kargs + " " + "acpi=off"
    try:
        kargsfb = open("/kargs.txt", "r")
        moreargs = kargsfb.readline()
        kargsfb.close()
        moreargs = moreargs.strip()
        log.write(
            'Parsed in "{}" kexec args from /kargs.txt\n'.format(moreargs))
        kargs = kargs + " " + moreargs
    except IOError:
        # /kargs.txt does not exist, which is fine. Just kexec with default
        # kargs, which is ramdisk_size=8192
        pass

    utils.sysexec_noerr('hwclock --systohc --utc ', log)
    #    utils.breakpoint("Before kexec");
    try:
        utils.sysexec(
            'kexec --force --initrd=/tmp/initrd --append="{}" /tmp/kernel'.
            format(kargs), log)
    except BootManagerException as e:
        # if kexec fails, we've shut the machine down to a point where nothing
        # can run usefully anymore (network down, all modules unloaded, file
        # systems unmounted. write out the error, and cancel the boot process

        log.write("\n\n")
        log.write("-------------------------------------------------------\n")
        log.write("kexec failed with the following error. Please report\n")
        log.write("this problem to [email protected].\n\n")
        log.write(str(e) + "\n\n")
        log.write("The boot process has been canceled.\n")
        log.write(
            "-------------------------------------------------------\n\n")

    return
        WAS_NODE_ID_IN_CONF = vars["WAS_NODE_ID_IN_CONF"]
        if WAS_NODE_ID_IN_CONF == "":
            raise ValueError, "WAS_NODE_ID_IN_CONF"

        WAS_NODE_KEY_IN_CONF = vars["WAS_NODE_KEY_IN_CONF"]
        if WAS_NODE_KEY_IN_CONF == "":
            raise ValueError, "WAS_NODE_KEY_IN_CONF"

    except KeyError, var:
        raise BootManagerException, "Missing variable in vars: %s\n" % var
    except ValueError, var:
        raise BootManagerException, "Variable in vars, shouldn't be: %s\n" % var

    node_details = BootAPI.call_api_function(
        vars, "GetNodes", (vars["NODE_ID"], ["boot_state", "nodegroup_ids", "interface_ids", "model", "site_id"])
    )[0]

    vars["BOOT_STATE"] = node_details["boot_state"]
    vars["RUN_LEVEL"] = node_details["boot_state"]
    vars["NODE_MODEL"] = string.strip(node_details["model"])
    vars["SITE_ID"] = node_details["site_id"]
    log.write("Successfully retrieved node record.\n")
    log.write("Current boot state: %s\n" % vars["BOOT_STATE"])
    log.write("Node make/model: %s\n" % vars["NODE_MODEL"])

    # parse in the model options from the node_model string
    model = vars["NODE_MODEL"]
    options = ModelOptions.Get(model)
    vars["NODE_MODEL_OPTIONS"] = options
예제 #22
0

def Run( vars, log ):
    """
        Stop the RunlevelAgent.py script.  Should proceed
        kexec to reset run_level to 'boot' before kexec
    """

    log.write( "\n\nStep: Stopping RunlevelAgent.py\n" )

    try:
        cmd = "%s/RunlevelAgent.py" % vars['BM_SOURCE_DIR']
        # raise error if script is not present.
        os.stat(cmd)
        os.system("/usr/bin/python %s stop" % cmd)
    except KeyError, var:
        raise BootManagerException, "Missing variable in vars: %s\n" % var
    except ValueError, var:
        raise BootManagerException, "Variable in vars, shouldn't be: %s\n" % var

    try:
        update_vals= {}
        update_vals['run_level']='boot'
        BootAPI.call_api_function( vars, "ReportRunlevel", (update_vals,) )
    except BootManagerException, e:
        log.write( "Unable to update boot state for this node at PLC: %s.\n" % e )

    return 1