Exemple #1
0
def Run(vars, log):
    """
    Find any new large block devices we can add to the vservers volume group
    
    Expect the following variables to be set:
    SYSIMG_PATH          the path where the system image will be mounted
    MINIMUM_DISK_SIZE       any disks smaller than this size, in GB, are not used
    NODE_MODEL_OPTIONS   the node's model options
    
    Set the following variables upon successfully running:
    ROOT_MOUNTED             the node root file system is mounted
    """

    log.write("\n\nStep: Checking for unused disks to add to LVM.\n")

    # make sure we have the variables we need
    try:
        SYSIMG_PATH = vars["SYSIMG_PATH"]
        if SYSIMG_PATH == "":
            raise ValueError("SYSIMG_PATH")

        MINIMUM_DISK_SIZE = int(vars["MINIMUM_DISK_SIZE"])

        PARTITIONS = vars["PARTITIONS"]
        if PARTITIONS == None:
            raise ValueError("PARTITIONS")

        NODE_MODEL_OPTIONS = vars["NODE_MODEL_OPTIONS"]
    except KeyError as var:
        raise BootManagerException(
            "Missing variable in vars: {}\n".format(var))
    except ValueError as var:
        raise BootManagerException(
            "Variable in vars, shouldn't be: {}\n".format(var))

    devices_dict = systeminfo.get_block_devices_dict(vars, log)

    # will contain the new devices to add to the volume group
    new_devices = []

    # total amount of new space in gb
    extended_gb_size = 0

    utils.display_disks_status(PARTITIONS, "In CheckForNewDisks", log)

    for device, details in devices_dict.items():

        (major, minor, blocks, gb_size, readonly) = details

        if device[:14] == "/dev/planetlab":
            log.write("Skipping device {} in volume group.\n".format(device))
            continue

        if readonly:
            log.write("Skipping read only device {}\n".format(device))
            continue

        if gb_size < MINIMUM_DISK_SIZE:
            log.write("Skipping too small device {} ({:4.2f}) Gb\n"\
                      .format(device, gb_size))
            continue

        log.write("Checking device {} to see if it is part " \
                   "of the volume group.\n".format(device))

        # Thierry - June 2015
        # when introducing the 'upgrade' verb, we ran into the situation
        # where 'pvdisplay' at this point displays e.g. /dev/sda, instead
        # of /dev/sda1
        # we thus consider that if either of these is known, then
        # the disk is already part of LVM
        first_partition = InstallPartitionDisks.get_partition_path_from_device(
            device, vars, log)
        probe_first_part = "pvdisplay {} | grep -q planetlab".format(
            first_partition)
        probe_device = "pvdisplay {} | grep -q planetlab".format(device)
        already_added = utils.sysexec_noerr(probe_first_part, log, shell=True) \
                     or utils.sysexec_noerr(probe_device, log, shell=True)

        if already_added:
            log.write("It appears {} is part of the volume group, continuing.\n"\
                      .format(device))
            continue

        # just to be extra paranoid, ignore the device if it already has
        # an lvm partition on it (new disks won't have this, and that is
        # what this code is for, so it should be ok).
        cmd = "parted --script --list {} | grep -q lvm$".format(device)
        has_lvm = utils.sysexec_noerr(cmd, log, shell=True)
        if has_lvm:
            log.write(
                "It appears {} has lvm already setup on it.\n".format(device))
            paranoid = False
            if paranoid:
                log.write(
                    "Too paranoid to add {} to vservers lvm.\n".format(device))
                continue

        if not InstallPartitionDisks.single_partition_device(
                device, vars, log):
            log.write("Unable to partition {}, not using it.\n".format(device))
            continue

        log.write("Successfully partitioned {}\n".format(device))

        if NODE_MODEL_OPTIONS & ModelOptions.RAWDISK:
            log.write("Running on a raw disk node, not using it.\n")
            continue

        part_path = InstallPartitionDisks.get_partition_path_from_device(
            device, vars, log)

        log.write("Attempting to add {} to the volume group\n".format(device))

        if not InstallPartitionDisks.create_lvm_physical_volume(
                part_path, vars, log):
            log.write("Unable to create lvm physical volume {}, not using it.\n"\
                      .format(part_path))
            continue

        log.write("Adding {} to list of devices to add to "
                  "planetlab volume group.\n".format(device))

        extended_gb_size = extended_gb_size + gb_size
        new_devices.append(part_path)

    if len(new_devices) > 0:

        log.write("Extending planetlab volume group.\n")

        log.write("Unmounting disks.\n")
        try:
            # backwards compat, though, we should never hit this case post PL 3.2
            os.stat("{}/rcfs/taskclass".format(SYSIMG_PATH))
            utils.sysexec_chroot_noerr(SYSIMG_PATH, "umount /rcfs", log)
        except OSError as e:
            pass

        # umount in order to extend disk size
        utils.sysexec_noerr("umount {}/proc".format(SYSIMG_PATH), log)
        utils.sysexec_noerr("umount {}/vservers".format(SYSIMG_PATH), log)
        utils.sysexec_noerr("umount {}".format(SYSIMG_PATH), log)
        utils.sysexec("vgchange -an", log)

        vars['ROOT_MOUNTED'] = 0

        while True:
            cmd = "vgextend planetlab {}".format(" ".join(new_devices))
            if not utils.sysexec_noerr(cmd, log):
                log.write("Failed to add physical volumes {} to "\
                           "volume group, continuing.\n".format(" ".join(new_devices)))
                res = 1
                break

            # now, get the number of unused extents, and extend the vserver
            # logical volume by that much.
            remaining_extents = \
               InstallPartitionDisks.get_remaining_extents_on_vg(vars, log)

            log.write("Extending vservers logical volume.\n")
            utils.sysexec("vgchange -ay", log)
            cmd = "lvextend -l +{} {}".format(remaining_extents,
                                              PARTITIONS["vservers"])
            if not utils.sysexec_noerr(cmd, log):
                log.write(
                    "Failed to extend vservers logical volume, continuing\n")
                res = 1
                break

            log.write(
                "making the ext filesystem match new logical volume size.\n")

            vars['ROOT_MOUNTED'] = 1
            cmd = "mount {} {}".format(PARTITIONS["root"], SYSIMG_PATH)
            utils.sysexec_noerr(cmd, log)
            cmd = "mount {} {}/vservers".format(PARTITIONS["vservers"],
                                                SYSIMG_PATH)
            utils.sysexec_noerr(cmd, log)
            cmd = "resize2fs {}".format(PARTITIONS["vservers"])
            resize = utils.sysexec_noerr(cmd, log)
            utils.sysexec_noerr("umount {}/vservers".format(SYSIMG_PATH), log)
            utils.sysexec_noerr("umount {}".format(SYSIMG_PATH), log)
            vars['ROOT_MOUNTED'] = 0

            utils.sysexec("vgchange -an", log)

            if not resize:
                log.write("Failed to resize vservers partition, continuing.\n")
                res = 1
                break
            else:
                log.write("Extended vservers partition by {:4.2f} Gb\n"\
                          .format(extended_gb_size))
                res = 1
                break

    else:
        log.write("No new disk devices to add to volume group.\n")
        res = 1

    return res
Exemple #2
0
def Run(vars, log):
    """
    See if a node installation is valid. More checks should certainly be
    done in the future, but for now, make sure that the sym links kernel-boot
    exist in /boot
    
    Expect the following variables to be set:
    SYSIMG_PATH              the path where the system image will be mounted
                             (always starts with TEMP_PATH)
    ROOT_MOUNTED             the node root file system is mounted
    NODE_ID                  The db node_id for this machine
    PLCONF_DIR               The directory to store the configuration file in
    
    Set the following variables upon successfully running:
    ROOT_MOUNTED             the node root file system is mounted
    """

    log.write("\n\nStep: Validating node installation.\n")

    # make sure we have the variables we need
    try:
        SYSIMG_PATH = vars["SYSIMG_PATH"]
        if SYSIMG_PATH == "":
            raise ValueError("SYSIMG_PATH")

        NODE_ID = vars["NODE_ID"]
        if NODE_ID == "":
            raise ValueError("NODE_ID")

        PLCONF_DIR = vars["PLCONF_DIR"]
        if PLCONF_DIR == "":
            raise ValueError("PLCONF_DIR")
        
        NODE_MODEL_OPTIONS = vars["NODE_MODEL_OPTIONS"]

        PARTITIONS = vars["PARTITIONS"]
        if PARTITIONS == None:
            raise ValueError("PARTITIONS")

    except KeyError as var:
        raise BootManagerException("Missing variable in vars: {}\n".format(var))
    except ValueError as var:
        raise BootManagerException("Variable in vars, shouldn't be: {}\n".format(var))


    ROOT_MOUNTED = 0
    if vars.has_key('ROOT_MOUNTED'):
        ROOT_MOUNTED = vars['ROOT_MOUNTED']

    # mount the root system image if we haven't already.
    # capture BootManagerExceptions during the vgscan/change and mount
    # calls, so we can return 0 instead
    if ROOT_MOUNTED == 0:
            
        # simply creating an instance of this class and listing the system
        # block devices will make them show up so vgscan can find the planetlab
        # volume group
        systeminfo.get_block_devices_dict(vars, log)

        try:
            utils.sysexec("vgscan", log)
            utils.sysexec("vgchange -ay planetlab", log)
        except BootManagerException as e:
            log.write("BootManagerException during vgscan/vgchange: {}\n".format(e))
            return 0
            
        utils.makedirs(SYSIMG_PATH)

        # xxx - TODO - need to fsck the btrfs partition
        if vars['virt'] == 'vs':
            filesystems_tocheck = ['root', 'vservers']
        else:
            filesystems_tocheck = ['root']

        for filesystem in filesystems_tocheck:
            try:
                # first run fsck to prevent fs corruption from hanging mount...
                log.write("fsck {} file system\n".format(filesystem))
                utils.sysexec("e2fsck -v -p {}".format(PARTITIONS[filesystem]), log, fsck=True)
            except BootManagerException as e:
                log.write("BootManagerException during fsck of {} ({}) filesystem : {}\n"\
                          .format(filesystem, PARTITIONS[filesystem], str(e)))
                try:
                    log.write("Trying to recover filesystem errors on {}\n".format(filesystem))
                    utils.sysexec("e2fsck -v -y {}".format(PARTITIONS[filesystem]), log, fsck=True)
                except BootManagerException as e:
                    log.write("BootManagerException while trying to recover"
                              "filesystem errors on {} ({}) filesystem : {}\n"
                              .format(filesystem, PARTITIONS[filesystem], str(e)))
                    return -1
            else:
                # disable time/count based filesystems checks
                utils.sysexec_noerr("tune2fs -c -1 -i 0 {}".format(PARTITIONS[filesystem]), log)

        try:
            # then attempt to mount them
            log.write("mounting root file system\n")
            utils.sysexec("mount -t ext3 {} {}".format(PARTITIONS["root"], SYSIMG_PATH),log)
        except BootManagerException as e:
            log.write("BootManagerException during mount of /root: {}\n".format(str(e)))
            return -2
            
        try:
            PROC_PATH = "{}/proc".format(SYSIMG_PATH)
            utils.makedirs(PROC_PATH)
            log.write("mounting /proc\n")
            utils.sysexec("mount -t proc none {}".format(PROC_PATH), log)
        except BootManagerException as e:
            log.write("BootManagerException during mount of /proc: {}\n".format(str(e)))
            return -2


        one_partition = vars['ONE_PARTITION']=='1'

        if (not one_partition):
            try:
                VSERVERS_PATH = "{}/vservers".format(SYSIMG_PATH)
                utils.makedirs(VSERVERS_PATH)
                log.write("mounting vservers partition in root file system\n")
                if vars['virt'] == 'vs':
                    utils.sysexec("mount -t ext3 {} {}".format(PARTITIONS["vservers"], VSERVERS_PATH), log)
                else:
                    utils.sysexec("mount -t btrfs {} {}".format(PARTITIONS["vservers"], VSERVERS_PATH), log)
            except BootManagerException as e:
                log.write("BootManagerException while mounting /vservers: {}\n".format(str(e)))
                return -2

        ROOT_MOUNTED = 1
        vars['ROOT_MOUNTED'] = 1
        
    # check if the base kernel is installed 
    # these 2 links are created by our kernel's post-install scriplet
    log.write("Checking for a custom kernel\n")
    try:
        if vars['virt'] == 'vs':
            os.stat("{}/boot/kernel-boot".format(SYSIMG_PATH))
        else:
            try:
                kversion = os.popen("chroot {} rpm -qa kernel | tail -1 | cut -c 8-"\
                                    .format(SYSIMG_PATH)).read().rstrip()
                os.stat("{}/boot/vmlinuz-{}".format(SYSIMG_PATH, kversion))
                major_version = int(kversion[0]) # Check if the string looks like a kernel version
            except:
                kversion = os.popen("ls -lrt {}/lib/modules | tail -1 | awk '{print $9;}'"\
                                    .format(SYSIMG_PATH)).read().rstrip()
    except OSError as e:            
        log.write("Couldn't locate base kernel (you might be using the stock kernel).\n")
        return -3

    # check if the model specified kernel is installed
    option = ''
    if NODE_MODEL_OPTIONS & ModelOptions.SMP:
        option = 'smp'
        try:
            os.stat("{}/boot/kernel-boot{}".format(SYSIMG_PATH, option))
        except OSError as e:
            # smp kernel is not there; remove option from modeloptions
            # such that the rest of the code base thinks we are just
            # using the base kernel.
            NODE_MODEL_OPTIONS = NODE_MODEL_OPTIONS & ~ModelOptions.SMP
            vars["NODE_MODEL_OPTIONS"] = NODE_MODEL_OPTIONS
            log.write("WARNING: Couldn't locate smp kernel.\n")
            
    # write out the node id to /etc/planetlab/node_id. if this fails, return
    # 0, indicating the node isn't a valid install.
    try:
        node_id_file_path = "{}/{}/node_id".format(SYSIMG_PATH, PLCONF_DIR)
        node_id_file = file(node_id_file_path, "w")
        node_id_file.write(str(NODE_ID))
        node_id_file.close()
        node_id_file = None
        log.write("Updated /etc/planetlab/node_id\n")
    except IOError as e:
        log.write("Unable to write out /etc/planetlab/node_id\n")
        return 0

    log.write("Node installation appears to be ok\n")
    
    return 1
Exemple #3
0
def Run(vars, log):
    """
    Load the kernel off of a node and boot to it.
    This step assumes the disks are mounted on SYSIMG_PATH.
    If successful, this function will not return. If it returns, no chain
    booting has occurred.
    
    Expect the following variables:
    SYSIMG_PATH           the path where the system image will be mounted
                          (always starts with TEMP_PATH)
    ROOT_MOUNTED          the node root file system is mounted
    NODE_SESSION             the unique session val set when we requested
                             the current boot state
    PLCONF_DIR               The directory to store PL configuration files in
    
    Sets the following variables:
    ROOT_MOUNTED          the node root file system is mounted
    """

    log.write("\n\nStep: Chain booting node.\n")

    # make sure we have the variables we need
    try:
        SYSIMG_PATH = vars["SYSIMG_PATH"]
        if SYSIMG_PATH == "":
            raise ValueError("SYSIMG_PATH")

        PLCONF_DIR = vars["PLCONF_DIR"]
        if PLCONF_DIR == "":
            raise ValueError("PLCONF_DIR")

        # its ok if this is blank
        NODE_SESSION = vars["NODE_SESSION"]

        NODE_MODEL_OPTIONS = vars["NODE_MODEL_OPTIONS"]

        PARTITIONS = vars["PARTITIONS"]
        if PARTITIONS == None:
            raise ValueError("PARTITIONS")

    except KeyError as var:
        raise BootManagerException(
            "Missing variable in vars: {}\n".format(var))
    except ValueError as var:
        raise BootManagerException(
            "Variable in vars, shouldn't be: {}\n".format(var))

    ROOT_MOUNTED = 0
    if vars.has_key('ROOT_MOUNTED'):
        ROOT_MOUNTED = vars['ROOT_MOUNTED']

    if ROOT_MOUNTED == 0:
        log.write("Mounting node partitions\n")

        # simply creating an instance of this class and listing the system
        # block devices will make them show up so vgscan can find the planetlab
        # volume group
        systeminfo.get_block_devices_dict(vars, log)

        utils.sysexec("vgscan", log)
        utils.sysexec("vgchange -ay planetlab", log)

        utils.makedirs(SYSIMG_PATH)

        cmd = "mount {} {}".format(PARTITIONS["root"], SYSIMG_PATH)
        utils.sysexec(cmd, log)
        cmd = "mount -t proc none {}/proc".format(SYSIMG_PATH)
        utils.sysexec(cmd, log)
        cmd = "mount {} {}/vservers".format(PARTITIONS["vservers"],
                                            SYSIMG_PATH)
        utils.sysexec(cmd, log)

        ROOT_MOUNTED = 1
        vars['ROOT_MOUNTED'] = 1

    utils.display_disks_status(PARTITIONS, "In ChainBootNode", log)

    # write out the session value /etc/planetlab/session
    try:
        session_file_path = "{}/{}/session".format(SYSIMG_PATH, PLCONF_DIR)
        session_file = file(session_file_path, "w")
        session_file.write(str(NODE_SESSION))
        session_file.close()
        session_file = None
        log.write("Updated /etc/planetlab/session\n")
    except IOError as e:
        log.write(
            "Unable to write out /etc/planetlab/session, continuing anyway\n")

    # update configuration files
    log.write("Updating configuration files.\n")
    # avoid using conf_files initscript as we're moving to systemd on some platforms

    if (vars['ONE_PARTITION'] != '1'):
        try:
            cmd = "/usr/bin/env python /usr/share/NodeManager/conf_files.py --noscripts"
            utils.sysexec_chroot(SYSIMG_PATH, cmd, log)
        except IOError as e:
            log.write("conf_files failed with \n {}".format(e))

        # update node packages
        log.write("Running node update.\n")
        if os.path.exists(SYSIMG_PATH + "/usr/bin/NodeUpdate.py"):
            cmd = "/usr/bin/NodeUpdate.py start noreboot"
        else:
            # for backwards compatibility
            cmd = "/usr/local/planetlab/bin/NodeUpdate.py start noreboot"
        utils.sysexec_chroot(SYSIMG_PATH, cmd, log)

    # Re-generate initrd right before kexec call
    # this is not required anymore on recent depls.
    if vars['virt'] == 'vs':
        MakeInitrd.Run(vars, log)

    # the following step should be done by NM
    UpdateNodeConfiguration.Run(vars, log)

    log.write("Updating ssh public host key with PLC.\n")
    ssh_host_key = ""
    try:
        ssh_host_key_file = file(
            "{}/etc/ssh/ssh_host_rsa_key.pub".format(SYSIMG_PATH), "r")
        ssh_host_key = ssh_host_key_file.read().strip()
        ssh_host_key_file.close()
        ssh_host_key_file = None
    except IOError as e:
        pass

    update_vals = {}
    update_vals['ssh_rsa_key'] = ssh_host_key
    BootAPI.call_api_function(vars, "BootUpdateNode", (update_vals, ))

    # get the kernel version
    option = ''
    if NODE_MODEL_OPTIONS & ModelOptions.SMP:
        option = 'smp'

    log.write("Copying kernel and initrd for booting.\n")
    if vars['virt'] == 'vs':
        utils.sysexec(
            "cp {}/boot/kernel-boot{} /tmp/kernel".format(SYSIMG_PATH, option),
            log)
        utils.sysexec(
            "cp {}/boot/initrd-boot{} /tmp/initrd".format(SYSIMG_PATH, option),
            log)
    else:
        # Use chroot to call rpm, b/c the bootimage&nodeimage rpm-versions may not work together
        try:
            kversion = os.popen("chroot {} rpm -qa kernel | tail -1 | cut -c 8-"\
                                .format(SYSIMG_PATH)).read().rstrip()
            major_version = int(
                kversion[0])  # Check if the string looks like a kernel version
        except:
            # Try a different method for non-rpm-based distributions
            kversion = os.popen("ls -lrt {}/lib/modules | tail -1 | awk '{print $9;}'"\
                                .format(SYSIMG_PATH)).read().rstrip()

        utils.sysexec(
            "cp {}/boot/vmlinuz-{} /tmp/kernel".format(SYSIMG_PATH, kversion),
            log)
        candidates = []
        # f16/18: expect initramfs image here
        candidates.append("/boot/initramfs-{}.img".format(kversion))
        # f20: uses a uid of some kind, e.g. /boot/543f88c129de443baaa65800cf3927ce/<kversion>/initrd
        candidates.append("/boot/*/{}/initrd".format(kversion))
        # Ubuntu:
        candidates.append("/boot/initrd.img-{}".format(kversion))

        def find_file_in_sysimg(candidates):
            import glob
            for pattern in candidates:
                matches = glob.glob(SYSIMG_PATH + pattern)
                log.write("locating initrd: found {} matches in {}\n".format(
                    len(matches), pattern))
                if matches:
                    return matches[0]

        initrd = find_file_in_sysimg(candidates)
        if initrd:
            utils.sysexec("cp {} /tmp/initrd".format(initrd), log)
        else:
            raise Exception("Unable to locate initrd - bailing out")

    BootAPI.save(vars)

    log.write("Unmounting disks.\n")

    if (vars['ONE_PARTITION'] != '1'):
        utils.sysexec("umount {}/vservers".format(SYSIMG_PATH), log)
    utils.sysexec("umount {}/proc".format(SYSIMG_PATH), log)
    utils.sysexec_noerr("umount {}/dev".format(SYSIMG_PATH), log)
    utils.sysexec_noerr("umount {}/sys".format(SYSIMG_PATH), log)
    utils.sysexec("umount {}".format(SYSIMG_PATH), log)
    utils.sysexec("vgchange -an", log)

    ROOT_MOUNTED = 0
    vars['ROOT_MOUNTED'] = 0

    # Change runlevel to 'boot' prior to kexec.
    StopRunlevelAgent.Run(vars, log)

    log.write("Unloading modules and chain booting to new kernel.\n")

    # further use of log after Upload will only output to screen
    log.Upload("/root/.bash_eternal_history")

    # regardless of whether kexec works or not, we need to stop trying to
    # run anything
    cancel_boot_flag = "/tmp/CANCEL_BOOT"
    utils.sysexec("touch {}".format(cancel_boot_flag), log)

    # on 2.x cds (2.4 kernel) for sure, we need to shutdown everything
    # to get kexec to work correctly. Even on 3.x cds (2.6 kernel),
    # there are a few buggy drivers that don't disable their hardware
    # correctly unless they are first unloaded.

    utils.sysexec_noerr("ifconfig eth0 down", log)

    utils.sysexec_noerr("killall dhclient", log)

    if vars['virt'] == 'vs':
        utils.sysexec_noerr("umount -a -r -t ext2,ext3", log)
    else:
        utils.sysexec_noerr("umount -a -r -t ext2,ext3,btrfs", log)
    utils.sysexec_noerr("modprobe -r lvm-mod", log)

    # modules that should not get unloaded
    # unloading cpqphp causes a kernel panic
    blacklist = ["floppy", "cpqphp", "i82875p_edac", "mptspi"]
    try:
        modules = file("/tmp/loadedmodules", "r")

        for line in modules:
            module = string.strip(line)
            if module in blacklist:
                log.write(
                    "Skipping unload of kernel module '{}'.\n".format(module))
            elif module != "":
                log.write("Unloading {}\n".format(module))
                utils.sysexec_noerr("modprobe -r {}".format(module), log)
                if "e1000" in module:
                    log.write(
                        "Unloading e1000 driver; sleeping 4 seconds...\n")
                    time.sleep(4)

        modules.close()
    except IOError:
        log.write("Couldn't read /tmp/loadedmodules, continuing.\n")

    try:
        modules = file("/proc/modules", "r")

        # Get usage count for USB
        usb_usage = 0
        for line in modules:
            try:
                # Module Size UsageCount UsedBy State LoadAddress
                parts = string.split(line)

                if parts[0] == "usb_storage":
                    usb_usage += int(parts[2])
            except IndexError as e:
                log.write("Couldn't parse /proc/modules, continuing.\n")

        modules.seek(0)

        for line in modules:
            try:
                # Module Size UsageCount UsedBy State LoadAddress
                parts = string.split(line)

                # While we would like to remove all "unused" modules,
                # you can't trust usage count, especially for things
                # like network drivers or RAID array drivers. Just try
                # and unload a few specific modules that we know cause
                # problems during chain boot, such as USB host
                # controller drivers (HCDs) (PL6577).
                # if int(parts[2]) == 0:
                if False and re.search('_hcd$', parts[0]):
                    if usb_usage > 0:
                        log.write("NOT unloading {} since USB may be in use\n".
                                  format(parts[0]))
                    else:
                        log.write("Unloading {}\n".format(parts[0]))
                        utils.sysexec_noerr("modprobe -r {}".format(parts[0]),
                                            log)
            except IndexError as e:
                log.write("Couldn't parse /proc/modules, continuing.\n")
    except IOError:
        log.write("Couldn't read /proc/modules, continuing.\n")

    kargs = "root={} ramdisk_size=8192".format(PARTITIONS["root"])
    if NODE_MODEL_OPTIONS & ModelOptions.SMP:
        kargs = kargs + " " + "acpi=off"
    try:
        kargsfb = open("/kargs.txt", "r")
        moreargs = kargsfb.readline()
        kargsfb.close()
        moreargs = moreargs.strip()
        log.write(
            'Parsed in "{}" kexec args from /kargs.txt\n'.format(moreargs))
        kargs = kargs + " " + moreargs
    except IOError:
        # /kargs.txt does not exist, which is fine. Just kexec with default
        # kargs, which is ramdisk_size=8192
        pass

    utils.sysexec_noerr('hwclock --systohc --utc ', log)
    #    utils.breakpoint("Before kexec");
    try:
        utils.sysexec(
            'kexec --force --initrd=/tmp/initrd --append="{}" /tmp/kernel'.
            format(kargs), log)
    except BootManagerException as e:
        # if kexec fails, we've shut the machine down to a point where nothing
        # can run usefully anymore (network down, all modules unloaded, file
        # systems unmounted. write out the error, and cancel the boot process

        log.write("\n\n")
        log.write("-------------------------------------------------------\n")
        log.write("kexec failed with the following error. Please report\n")
        log.write("this problem to [email protected].\n\n")
        log.write(str(e) + "\n\n")
        log.write("The boot process has been canceled.\n")
        log.write(
            "-------------------------------------------------------\n\n")

    return
def Run(vars, log):
    """
    Make sure the hardware we are running on is sufficient for
    the PlanetLab OS to be installed on. In the process, identify
    the list of block devices that may be used for a node installation,
    and identify the cdrom device that we booted off of.

    Return 1 if requiremenst met, 0 if requirements not met. Raise
    BootManagerException if any problems occur that prevent the requirements
    from being checked.

    Expect the following variables from the store:

    MINIMUM_MEMORY          minimum amount of memory in kb required
                            for install
    NODE_ID                 the node_id from the database for this node
    MINIMUM_DISK_SIZE       any disks smaller than this size, in GB, are not used
    TOTAL_MINIMUM_DISK_SIZE total disk size in GB, if all usable disks
                            meet this number, there isn't enough disk space for
                            this node to be usable after install
    SKIP_HARDWARE_REQUIREMENT_CHECK
                            If set, don't check if minimum requirements are met
    Sets the following variables:
    INSTALL_BLOCK_DEVICES    list of block devices to install onto
    """

    log.write("\n\nStep: Checking if hardware requirements met.\n")        
        
    try:
        MINIMUM_MEMORY = int(vars["MINIMUM_MEMORY"])
        if MINIMUM_MEMORY == "":
            raise ValueError("MINIMUM_MEMORY")

        NODE_ID = vars["NODE_ID"]
        if NODE_ID == "":
            raise ValueError("NODE_ID")

        MINIMUM_DISK_SIZE = int(vars["MINIMUM_DISK_SIZE"])

        # use vs_ or lxc_variants
        varname = vars['virt'] + "_TOTAL_MINIMUM_DISK_SIZE"
        TOTAL_MINIMUM_DISK_SIZE = int(vars[varname])

        SKIP_HARDWARE_REQUIREMENT_CHECK = int(vars["SKIP_HARDWARE_REQUIREMENT_CHECK"])
        
    except KeyError as var:
        raise BootManagerException("Missing variable in install store: {}".format(var))
    except ValueError as var:
        raise BootManagerException("Variable in install store blank, shouldn't be: {}".format(var))

    # lets see if we have enough memory to run
    log.write("Checking for available memory.\n")

    total_mem = systeminfo.get_total_phsyical_mem(vars, log)
    if total_mem is None:
        raise BootManagerException("Unable to read total physical memory")
        
    if total_mem < MINIMUM_MEMORY:
        if not SKIP_HARDWARE_REQUIREMENT_CHECK:
            log.write("Insufficient memory to run node: {} kb\n".format(total_mem))
            log.write("Required memory: {} kb\n".format(MINIMUM_MEMORY))

            include_pis = 0
            include_techs = 1
            include_support = 0
            
            sent = 0
            try:
                sent = BootAPI.call_api_function(vars, "BootNotifyOwners",
                                                 (notify_messages.MSG_INSUFFICIENT_MEMORY,
                                                  include_pis,
                                                  include_techs,
                                                  include_support))
            except BootManagerException as e:
                log.write("Call to BootNotifyOwners failed: {}.\n".format(e))
                
            if sent == 0:
                log.write("Unable to notify site contacts of problem.\n")
            else:
                log.write("Notified contacts of problem.\n")
                
            return 0
        else:
            log.write("Memory requirements not met, but running anyway: {} kb\n"
                      .format(total_mem))
    else:
        log.write("Looks like we have enough memory: {} kb\n".format(total_mem))



    # get a list of block devices to attempt to install on
    # (may include cdrom devices)
    install_devices = systeminfo.get_block_devices_dict(vars, log)

    # save the list of block devices in the log
    log.write("Detected block devices:\n")
    log.write(repr(install_devices) + "\n")

    if not install_devices or len(install_devices) == 0:
        log.write("No block devices detected.\n")
        
        include_pis = 0
        include_techs = 1
        include_support = 0
        
        sent = 0
        try:
            sent = BootAPI.call_api_function(vars, "BootNotifyOwners",
                                             (notify_messages.MSG_INSUFFICIENT_DISK,
                                              include_pis,
                                              include_techs,
                                              include_support))
        except BootManagerException as e:
            log.write("Call to BootNotifyOwners failed: {}.\n".format(e))
            
        if sent == 0:
            log.write("Unable to notify site contacts of problem.\n")

        return 0

    # now, lets remove any block devices we know won't work (readonly,cdroms),
    # or could be other writable removable disks (usb keychains, zip disks, etc)
    # i'm not aware of anything that helps with the latter test, so,
    # what we'll probably do is simply not use any block device below
    # some size threshold (set in installstore)

    # also, keep track of the total size for all devices that appear usable
    total_size = 0

    # do not modify subject of current loop
    ignored_devices = []
    for device, details in install_devices.items():

        major, minor, blocks, gb_size, readonly = details
        
        # if the device string starts with
        # planetlab or dm- (device mapper), ignore it (could be old lvm setup)
        if device[:14] == "/dev/planetlab" or device[:8] == "/dev/dm-":
            ignored_devices.append(device)
            continue

        if gb_size < MINIMUM_DISK_SIZE:
            log.write("Device is too small to use: {} \n"
                      "(appears to be {:4.2f} Gb)\n".format(device, gb_size))
            ignored_devices.append(device)
            continue

        if readonly:
            log.write("Device is readonly, not using: {}\n".format(device))
            ignored_devices.append(device)
            continue
            
        # add this sector count to the total count of usable
        # sectors we've found.
        total_size = total_size + gb_size

    # delayed erasure
    for device in ignored_devices:
        try:
            del install_devices[device]
        except KeyError as e:
            pass

    if len(install_devices) == 0:
        log.write("No suitable block devices found for install.\n")

        include_pis = 0
        include_techs = 1
        include_support = 0
        
        sent = 0
        try:
            sent = BootAPI.call_api_function(vars, "BootNotifyOwners",
                                             (notify_messages.MSG_INSUFFICIENT_DISK,
                                              include_pis,
                                              include_techs,
                                              include_support))
        except BootManagerException as e:
            log.write("Call to BootNotifyOwners failed: {}.\n".format(e))
            
        if sent == 0:
            log.write("Unable to notify site contacts of problem.\n")

        return 0


    # show the devices we found that are usable
    log.write("Usable block devices:\n")
    log.write(repr(install_devices.keys()) + "\n")

    # save the list of devices for the following steps
    vars["INSTALL_BLOCK_DEVICES"] = install_devices.keys()


    # ensure the total disk size is large enough. if
    # not, we need to email the tech contacts the problem, and
    # put the node into debug mode.
    if total_size < TOTAL_MINIMUM_DISK_SIZE:
        if not SKIP_HARDWARE_REQUIREMENT_CHECK:
            log.write("The total usable disk size of all disks is " \
                       "insufficient to be usable as a PlanetLab node.\n")
            include_pis = 0
            include_techs = 1
            include_support = 0
            
            sent = 0
            try:
                sent = BootAPI.call_api_function(vars, "BootNotifyOwners",
                                                 (notify_messages.MSG_INSUFFICIENT_DISK,
                                                  include_pis,
                                                  include_techs,
                                                  include_support))
            except BootManagerException as e:
                log.write("Call to BootNotifyOwners failed: {}.\n".format(e))
            
            if sent == 0:
                log.write("Unable to notify site contacts of problem.\n")

            return 0
        
        else:
            log.write("The total usable disk size of all disks is " \
                       "insufficient, but running anyway.\n")
            
    log.write("Total size for all usable block devices: {:4.2f} Gb\n".format(total_size))

    return 1
Exemple #5
0
def Run(vars, upgrade, log):
    """
    Download core + extensions bootstrapfs tarballs and install on the hard drive

    the upgrade boolean is True when we are upgrading a node root install while 
    preserving its slice contents; in that case we just perform extra cleanup
    before unwrapping the bootstrapfs
    this is because the running system may have extraneous files
    that is to say, files that are *not* present in the bootstrapfs
    and that can impact/clobber the resulting upgrade
    
    Expect the following variables from the store:
    SYSIMG_PATH          the path where the system image will be mounted
    PARTITIONS           dictionary of generic part. types (root/swap)
                         and their associated devices.
    NODE_ID              the id of this machine
    
    Sets the following variables:
    TEMP_BOOTCD_PATH     where the boot cd is remounted in the temp
                         path
    ROOT_MOUNTED         set to 1 when the the base logical volumes
                         are mounted.
    """

    log.write("\n\nStep: Install: bootstrapfs tarball (upgrade={}).\n".format(
        upgrade))

    # make sure we have the variables we need
    try:
        SYSIMG_PATH = vars["SYSIMG_PATH"]
        if SYSIMG_PATH == "":
            raise ValueError("SYSIMG_PATH")

        PARTITIONS = vars["PARTITIONS"]
        if PARTITIONS == None:
            raise ValueError("PARTITIONS")

        NODE_ID = vars["NODE_ID"]
        if NODE_ID == "":
            raise ValueError("NODE_ID")

        VERSION = vars['VERSION'] or 'unknown'

    except KeyError as var:
        raise BootManagerException(
            "Missing variable in vars: {}\n".format(var))
    except ValueError as var:
        raise BootManagerException(
            "Variable in vars, shouldn't be: {}\n".format(var))

    try:
        # make sure the required partitions exist
        val = PARTITIONS["root"]
        val = PARTITIONS["swap"]
        val = PARTITIONS["vservers"]
    except KeyError as part:
        log.write("Missing partition in PARTITIONS: {}\n".format(part))
        return 0

    bs_request = BootServerRequest.BootServerRequest(vars)

    # in upgrade mode, since we skip InstallPartitionDisks
    # we need to run this
    if upgrade:
        log.write("Upgrade mode init : Scanning for devices\n")
        systeminfo.get_block_devices_dict(vars, log)
        utils.sysexec_noerr("vgscan --mknodes", log)
        utils.sysexec_noerr("vgchange -ay", log)

    # debugging info - show in either mode
    utils.display_disks_status(PARTITIONS, "In InstallBootstrapFS", log)

    utils.breakpoint("we need to make /dev/mapper/* appear")

    log.write("turning on swap space\n")
    utils.sysexec("swapon {}".format(PARTITIONS["swap"]), log)

    # make sure the sysimg dir is present
    utils.makedirs(SYSIMG_PATH)

    log.write("mounting root file system\n")
    utils.sysexec(
        "mount -t ext3 {} {}".format(PARTITIONS["root"], SYSIMG_PATH), log)

    fstype = 'ext3' if vars['virt'] == 'vs' else 'btrfs'

    one_partition = vars['ONE_PARTITION'] == '1'

    if (not one_partition):
        log.write("mounting vserver partition in root file system (type {})\n".
                  format(fstype))
        utils.makedirs(SYSIMG_PATH + "/vservers")
        utils.sysexec("mount -t {} {} {}/vservers"\
                      .format(fstype, PARTITIONS["vservers"], SYSIMG_PATH), log)

        if vars['virt'] == 'lxc':
            # NOTE: btrfs quota is supported from version: >= btrfs-progs-0.20 (f18+)
            #       older versions will not recongize the 'quota' command.
            log.write(
                "Enabling btrfs quota on {}/vservers\n".format(SYSIMG_PATH))
            utils.sysexec_noerr(
                "btrfs quota enable {}/vservers".format(SYSIMG_PATH))

    vars['ROOT_MOUNTED'] = 1

    # this is now retrieved in GetAndUpdateNodeDetails
    nodefamily = vars['nodefamily']
    extensions = vars['extensions']

    # in upgrade mode: we need to cleanup the disk to make
    # it safe to just untar the new bootstrapfs tarball again
    # on top of the hard drive
    if upgrade:
        CleanupSysimgBeforeUpgrade(SYSIMG_PATH, nodefamily, log)

    # the 'plain' option is for tests mostly
    plain = vars['plain']
    if plain:
        download_suffix = ".tar"
        uncompress_option = ""
        log.write("Using plain bootstrapfs images\n")
    else:
        download_suffix = ".tar.bz2"
        uncompress_option = "-j"
        log.write("Using compressed bootstrapfs images\n")

    log.write("Using nodefamily={}\n".format(nodefamily))
    if not extensions:
        log.write("Installing only core software\n")
    else:
        log.write("Requested extensions {}\n".format(extensions))

    bootstrapfs_names = [nodefamily] + extensions

    for name in bootstrapfs_names:
        tarball = "bootstrapfs-{}{}".format(name, download_suffix)
        source_file = "/boot/{}".format(tarball)
        dest_file = "{}/{}".format(SYSIMG_PATH, tarball)

        source_hash_file = "/boot/{}.sha1sum".format(tarball)
        dest_hash_file = "{}/{}.sha1sum".format(SYSIMG_PATH, tarball)

        time_beg = time.time()
        log.write("downloading {}\n".format(source_file))
        # 30 is the connect timeout, 14400 is the max transfer time in
        # seconds (4 hours)
        result = bs_request.DownloadFile(source_file, None, None, 1, 1,
                                         dest_file, 30, 14400)
        time_end = time.time()
        duration = int(time_end - time_beg)
        log.write("Done downloading ({} seconds)\n".format(duration))
        if result:
            # Download SHA1 checksum file
            log.write("downloading sha1sum for {}\n".format(source_file))
            result = bs_request.DownloadFile(source_hash_file, None, None, 1,
                                             1, dest_hash_file, 30, 14400)

            log.write("verifying sha1sum for {}\n".format(source_file))
            if not utils.check_file_hash(dest_file, dest_hash_file):
                raise BootManagerException(
                    "FATAL: SHA1 checksum does not match between {} and {}"\
                    .format(source_file, source_hash_file))

            time_beg = time.time()
            log.write("extracting {} in {}\n".format(dest_file, SYSIMG_PATH))
            result = utils.sysexec(
                "tar -C {} -xpf {} {}".format(SYSIMG_PATH, dest_file,
                                              uncompress_option), log)
            time_end = time.time()
            duration = int(time_end - time_beg)
            log.write("Done extracting ({} seconds)\n".format(duration))
            utils.removefile(dest_file)
        else:
            # the main tarball is required
            if name == nodefamily:
                raise BootManagerException(
                    "FATAL: Unable to download main tarball {} from server."\
                    .format(source_file))
            # for extensions, just issue a warning
            else:
                log.write(
                    "WARNING: tarball for extension {} not found\n".format(
                        name))

    # copy resolv.conf from the base system into our temp dir
    # so DNS lookups work correctly while we are chrooted
    log.write("Copying resolv.conf to temp dir\n")
    utils.sysexec("cp /etc/resolv.conf {}/etc/".format(SYSIMG_PATH), log)

    # Copy the boot server certificate(s) and GPG public key to
    # /usr/boot in the temp dir.
    log.write("Copying boot server certificates and public key\n")

    if os.path.exists("/usr/boot"):
        # do nothing in case of upgrade
        if not os.path.exists(SYSIMG_PATH + "/usr/boot"):
            utils.makedirs(SYSIMG_PATH + "/usr")
            shutil.copytree("/usr/boot", SYSIMG_PATH + "/usr/boot")
    elif os.path.exists("/usr/bootme"):
        # do nothing in case of upgrade
        if not os.path.exists(SYSIMG_PATH + "/usr/bootme"):
            utils.makedirs(SYSIMG_PATH + "/usr/boot")
            boot_server = file("/usr/bootme/BOOTSERVER").readline().strip()
            shutil.copy("/usr/bootme/cacert/" + boot_server + "/cacert.pem",
                        SYSIMG_PATH + "/usr/boot/cacert.pem")
            file(SYSIMG_PATH + "/usr/boot/boot_server", "w").write(boot_server)
            shutil.copy("/usr/bootme/pubring.gpg",
                        SYSIMG_PATH + "/usr/boot/pubring.gpg")

    # For backward compatibility
    if os.path.exists("/usr/bootme"):
        # do nothing in case of upgrade
        if not os.path.exists(SYSIMG_PATH + "/mnt/cdrom/bootme"):
            utils.makedirs(SYSIMG_PATH + "/mnt/cdrom")
            shutil.copytree("/usr/bootme", SYSIMG_PATH + "/mnt/cdrom/bootme")

    # ONE_PARTITION => new distribution type
    if (vars['ONE_PARTITION'] != '1'):
        # Import the GPG key into the RPM database so that RPMS can be verified
        utils.makedirs(SYSIMG_PATH + "/etc/pki/rpm-gpg")
        utils.sysexec(
            "gpg --homedir=/root --export --armor"
            " --no-default-keyring --keyring {}/usr/boot/pubring.gpg"
            " > {}/etc/pki/rpm-gpg/RPM-GPG-KEY-planetlab".format(
                SYSIMG_PATH, SYSIMG_PATH), log)
        utils.sysexec_chroot(
            SYSIMG_PATH, "rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-planetlab",
            log)

    # keep a log on the installed hdd
    stamp = file(SYSIMG_PATH + "/bm-install.txt", 'a')
    now = time.strftime("%Y-%b-%d @ %H:%M %Z", time.gmtime())
    stamp.write("Hard drive installed by BootManager {}\n".format(VERSION))
    stamp.write("Finished extraction of bootstrapfs on {}\n".format(now))
    # do not modify this, the upgrade code uses this line for checking compatibility
    stamp.write("Using nodefamily {}\n".format(nodefamily))
    stamp.close()

    return 1