예제 #1
0
    def create_raid_array(self, dry = False):
#       if self.status != 'stopped' and not dry:
#           raise rrdm_error ('RAID device %s is already running' % self.dev_name)

        if (self.found_devices == self.num_drives):
	    dev_list = self.__form_mdadm_create_device_list()
	    if len(dev_list) == 0:
		raise rrdm_error ('Insufficient raid disks to create array [%s]' % 
				  self.dev_name) 

	    opt_list = self.__form_mdadm_create_opt_list()
	    command = self.__form_mdadm_cmd_line(opt_list, dev_list)

            if dry:
                print 'running:', command
            else:
                try:
                    ret = run_shell_cmd(command)
                except rrdm_error:
                    try:
                        ret = run_shell_cmd('/mfg/'+command)
                    except rrdm_error:
                        raise rrdm_error('failed to start RAID with cmdline : %s' % command)
        else:
            raise rrdm_error ('Unable to create raid array with missing disks [%d/%d]' % (self.found_devices, self.num_drives))

        rlog_notice ('Created array [%s:%s]' % (self.name, self.dev_name))
예제 #2
0
    def write_config(self, system):
        name = self.config_tmp_file
        self.config_valid = False

        try:
            rlog_debug ('Writing disk configuration to : ' + name)
            tfile = open (name, "w+")
            tfile.write('<rrdm version="1">\n')
            tfile.write('<config rebuild_rate=\"%s\" auto_rebuild=\"%s\"/>\n' % \
                        (system.get_rebuild_rate(), "true"))
            tfile.write('<drives>\n')

            self.disk_list = []
            for disk in system.disk_array.get_drive_list():
		# figure out the raid port corresponding to
		# this drive.
		if not disk.is_missing():
		    if disk.has_valid_superblock():
			# read the rport from the SB
			rport = '%s' % disk.superblock.get_raid_port()
		    else:
			# disk is in, but no rport, so we don't know what it
			# is yet, should be the old one.
			rport = self.get_disk_rport(disk.portnum)
			
		else:
		    # disk is missing so, fill in the previous raid port.
		    #
		    rport = self.get_disk_rport(disk.portnum)

                tfile.write ('<disk port=\"%s\" rport=\"%s\" serial=\"%s\"/>\n' % (
                             disk.portnum, rport, disk.serialnum))
                self.disk_list.append ((disk.portnum, disk.serialnum))

            tfile.write('</drives>\n')

            tfile.write('<raid-arrays>\n')
            # reset the array list
            self.array_list = []
            for array in system.raid_arrays.get_array_list():
                tfile.write ('<array name=\"%s\" uid=\"%s\"/>\n' % (
                             array.name, array.uuid))
                self.array_list.append ((array.name, array.uuid))

            tfile.write('</raid-arrays>\n')
            tfile.write('</rrdm>\n')
            tfile.close()
        except IOError:
            raise rrdm_error ('unable to create configuration file %s' % name)

        try:
            rename (name, self.config_file)
        except Exception:
            raise rrdm_error ('unable to update configuration file %s' %
                              self.config_file)

        self.config_valid = True
예제 #3
0
    def add_sb_kvp(self, kvp):
        if len(kvp) != 2:
            raise rrdm_error ('Invalid key value pair parameter')
    
	cmdline = '%s -a %s=%s %s' % (self.super_path, kvp[0], kvp[1], self.dev_name)
        err = run_shell_cmd (cmdline)
        if err != 0:
            raise rrdm_error ('Unable to update superblock on %s' % self.dev_name)
        
        self.__sb_kvp[kvp[0]] = kvp[1]
예제 #4
0
    def start_raid_array(self, dry=False):
        if self.status != "stopped" and not dry:
            # if its already running, just return ok
            return

        plist = filter(lambda dev: not dev.hd.is_missing(), self.part_list)
        uuids = map(lambda dev: dev.raid_super().uuid(), plist)
        uuids = filter(lambda u: u != None and u != "", uuids)

        # remove duplicates in uuids...
        uuids.sort()
        nuuids = []
        prev = None
        for u in uuids:
            if u != prev:
                nuuids.append(u)

            prev = u

        uuids = nuuids

        # get our "expected uuid"
        uuid = SystemConfig().get_array_uid(self.name)
        array_started = False
        while len(uuids) > 0:
            # first priority our uuid...
            if uuids.count(uuid) > 0:
                u = uuid
            else:
                # next priority, most uuids in list...
                maxu = max(map(lambda a: plist.count(a), uuids))
                u = filter(lambda a: plist.count(a) == maxu, uuids)[0]

            uuids.remove(u)
            if self.__start_raid_array_with_uuid(u, dry):
                array_started = True
                break

        if not array_started:
            raise rrdm_error("failed to start RAID")
        else:
            # raid array has started. If this raid array is a vecache then set the RAID
            # disable_queue_plugging sysfs param for this array
            if self.__sysfscfg_list != []:
                # Setting sysfs param to disable queueing on RAID10 writes on the VE blockstore
                try:
                    for entry in self.__sysfscfg_list:
                        cmd = "echo %s > %s/%s/%s" % (entry.value, entry.type, self.dev_name, entry.path)
                        run_shell_cmd(cmd)
                except IOerror, OSerror:
                    raise rrdm_error(
                        "Could not set sysfs param disable_queue_plugging for vecache device %s" % self.dev_name
                    )
예제 #5
0
    def sync_disk_sb (self):
        retries = 0
        while retries < 5 and not exists (self.dev_name):
            sleep (1)
            retries = retries + 1
            
	cmdline = '%s -u -s %s -p %d -r %d %s' % (self.super_path, self.serial,  \
				                  self.port_num, self.raid_port_num, self.dev_name)
	err = run_shell_cmd (cmdline)
	if err != 0:
	    if err == errno.EINVAL:
		    raise rrdm_error ('%s: Invalid argument provided.' % self.super_path)
	    raise rrdm_error ('Unable to update superblock on %s' % self.dev_name)
예제 #6
0
    def __fail_disk(self, hd_target):
        dconfig_path = '/config/disk'
	if not isdir(dconfig_path):
            try:
                mkdir(dconfig_path)
            except OSError:
                raise rrdm_error('Unable to create disk state directory %s' % \
                                 dconfig_path)

        dfile = '%s/disk%s_failed' % (dconfig_path, hd_target.portnum)
        try:
            open(dfile, "w").close()
        except IOError:
            raise rrdm_error ('Unable to create disk state file: %s' % dfile)
예제 #7
0
    def fail(self):

        if not isdir(self.__cfg_status_dir):
            try:
                mkdir(self.__cfg_status_dir)
            except OSError:
                raise rrdm_error('Unable to create disk state directory %s' % \
                                 self.__cfg_status_dir)

        try:
            open(self.__cfg_status_name, "w").close()
        except IOError:
            raise rrdm_error ('Unable to create disk state file: %s' % \
                              self.__cfg_status_name)
예제 #8
0
    def __assemble_raid_array(self, uuid, dry):
	dev_list = self.__form_mdadm_assemble_device_list(uuid)
	opt_list = self.__form_mdadm_assemble_opt_list()
        cmd_line = self.__form_mdadm_cmd_line(opt_list, dev_list)
	started_array = False
	if dry:
	    print 'Running:', cmd_line
	else:
	    try:
	        if len(dev_list) == 0:
		    raise rrdm_error ('Insufficient raid disks to start array [%s]' % self.dev_name) 
		rlog_notice ('Raid Assemble: [%s]' % cmd_line)
		run_shell_cmd(cmd_line)
		started_array = True
	    except rrdm_error:
		rlog_notice ('Failed to start array with command [%s]' % cmd_line)
		# md often leaves some badness around when it fails an assemble
		# remove it
		self.stop_raid_array(True)

	# since we failed assembly sometimes MD leaves some state around.
	# rescan our raid state.
	self.determine_array_status()

	return started_array
예제 #9
0
    def find_dev_by_raid_id(self, id):
        for part in self.part_list:
            # if its a failed raid drive, raid doesnt tell us where it came from.
            id_str = '%d' % id

            if part.raid_port == id_str:
                return part

        raise rrdm_error ('No raid device %s' % part.part_id)
예제 #10
0
    def stop_raid_array(self, force = False):
        if force or not self.is_stopped():
            cmd_line='mdadm --stop /dev/%s' % self.dev_name
	    try:
		run_shell_cmd(cmd_line)
	    except rrdm_error:
		raise rrdm_error('failed to stop RAID with cmdline : %s' % cmd_line)
        else:
            print 'Array %s is already stopped' % self.dev_name
예제 #11
0
    def __add_disk(self, hd_target):
        dconfig_path = '/config/disk'
	if not isdir(dconfig_path):
            try:
                mkdir(dconfig_path)
            except OSError:
                raise rrdm_error('Unable to create disk state directory %s' % \
                                 dconfig_path)

        dfile = '%s/disk%s_failed' % (dconfig_path, hd_target.portnum)

        if exists(dfile):
            remove(dfile)
예제 #12
0
    def get_rebuild_rate(self):
    
        try:
            file = open (self.raid_rebuild_max_proc, 'r')
            try:
                rate=file.read()
            
            finally:
                file.close()
        except (IOError, OSError):
            raise rrdm_error ('Unable to read rebuild rate from proc')
        
	return rate.strip()
예제 #13
0
    def check_consistency(self):
        in_sync = True
        for part in self.part_list:
            # only check actively in sync drives since MD will use a
            # spare drive number during the rebuild sequence, which can trick this into
            # thinking its out of sync
            if part.check_consistency() and not part.is_ok():
                in_sync = False
                rlog_notice(
                    "On array [%s] disk [%s] device [%s] is out of sync"
                    % (self.get_devname(), part.hd.get_devname(), part.get_devname())
                )

        if not in_sync:
            rlog_notice("System may be vulnerable to single disk failure")
            raise rrdm_error("Array %s is not properly spread across system drives" % (self.get_devname()))

        return in_sync
예제 #14
0
    def fill_from_rvbd_super(self, 
                             wait_for_device = False):
	super_path = RVBD_SUPER_PATH
	if not exists (super_path):
	    super_path = RVBD_MFG_SUPER_PATH
	    if not exists (super_path):
		raise rrdm_error ('Unable to locate rvbd_super tool.')

        retries = 0
        if wait_for_device:
            while not exists(self.dev_name) and retries < 3:
                sleep (1)
                retries += 1
            
	if not exists (self.dev_name):
	    raise rrdm_error ('Device does not exist %s' % self.dev_name)

	self.super_path = super_path

	cmdline = '%s -v %s' % (super_path, self.dev_name)
	try:
	    output = run_shell_cmd (cmdline, True)
	except rrdm_error:
	    raise rrdm_error ('Unable to execute rvbd super tool.')
	    
	if output == '':
	    raise rrdm_error ('No output returned from rvbd super tool.');
    
	ver_kvp = output.split('=')
	if ver_kvp[0] != 'version': 
	    raise rrdm_error ('Invalid output returned from rvbd super tool');

	self.version = int (ver_kvp[1], 10)
	# we only do version 1 SB 's today.
	# should probably abstract the routines as a class later if we need to add 
	# more.
	#
	if self.version == 1:
	    # if we have a valid SB version.
	    # just get fetch the output.
	    cmdline = '%s -g %s' % (super_path, self.dev_name)
	    output = run_shell_cmd (cmdline, True)
	    try:
		sb_lines = output.split('\n')
		for line in sb_lines:	
		    sb_kvp = line.split('=')
		    self.update_from_kvp(sb_kvp)
	    except IndexError:
		raise rrdm_error ('invalid SB output returned from rvbd_super')
예제 #15
0
    def fail(self):
        # once you've failed the disk, it disappears from the sysfs entry,
        # you can only fail a drive once, also b/c of that read the dev name first.
        #
        # failing is a 2 stage process of setting the drive to faulty and removing it
        # from the array.
        #
        array_name      = self.raid_array.get_devname()

        # XXX currently assumes that the disk in port X is raid X
        #
        if self.raid_port == 'unknown':
            # if this drive isnt in the system assume its on the hard drive.
            rlog_debug ('drive has been removed using drive-raid map')
	    sysconfig = SystemConfig()
	    if sysconfig.is_config_valid():
		portnum = sysconfig.get_disk_rport(self.hd.portnum)
	    else:
		# if we don't know which raid port to fail, don't just continue on.
		# skip out and log a msg.
		#
		rlog_notice ('Unable to determie rport when failing disk [%s]' %
			     portnum)
		return
        else:
            portnum = self.raid_port

        state_cmd   = "faulty"
        remove_cmd  = "remove"

	md_devname_path = '/sys/block/%s/md/rd%s/device' % (array_name, portnum)

        try:
            md_dev_name = get_sysfs_param (md_devname_path)
        except IOError:
            raise rrdm_error ('unable to read raid device : %s' % md_devname_path)

	# use the device name indicated by RAID, since if the drive is missing,
	# md might still have a reference to the device, but we don't have a scsi device
	# to use to figure out what the name of the device that used ot be in the array
	# is
        md_state_path  = '/sys/block/%s/md/dev-%s/state' % (array_name, md_dev_name)

        rlog_notice ('Failing array [%s] device [%s:%s]' % (array_name,
                      portnum, md_dev_name))
	retries = 0

	while retries < 3:
	    try:
		if exists (md_state_path):
		    sys_file = open (md_state_path, "w")
		    try:
			sys_file.write(state_cmd)
		    finally:
			sys_file.close()

		    sleep (0.5)

		    sys_file = open (md_state_path, "w")
		    try:
			sys_file.write(remove_cmd)
		    finally:
			sys_file.close()

		    # if we succeed, give a grace period to allow for the request 
		    # to complete.
		    sleep (0.5)

		# bail out its failed already or we succeeded
		# make sure drive is really gone, and if its not.. retry
		if not exists (md_state_path):
		    break
		    
	    except IOError:
		retries += 1

	if exists (md_state_path):
	    rlog_debug('Unable to fail %s on %s with cmd [%s:%s]' % (
		       self.raid_port, array_name, md_state_path,remove_cmd))
예제 #16
0
 def check_config(self):
     if (self.num_arrays != self.expected_num_arrays):
         raise rrdm_error ('Raid Configuration Mismatch')
예제 #17
0
    def __init__(self, 
                 spec,
                 model,
                 mfg_mode=False,
                 profile = None):
        self.spec           = spec
        self.model          = model
        self.appliance_serial = ''
        
        if profile == None or profile == '':
            # None if there is no storage profile or profiles
            # are not supported
            self.__cur_sprofile = get_storage_profile()
        else:
            # if the user has specified a profile on the cmd line 
            # we'll use that profile (most notably for the mfg option,
            # where we want to reconfigure a system for a new profile)
            self.__cur_sprofile = profile

        # if the user has not specified the storage profile
        # and we have not read it from disk, and we're in 
        # mfg mode then assume the storage profile is the default one
        # in the spec definition
        if self.spec.has_storage_cfg() and \
            self.__cur_sprofile in [ None, '' ]:
                self.__cur_sprofile = self.spec.get_default_scfg_name()
                if self.__cur_sprofile in [ None, '' ]:
                    raise AssertionError('Unable to determine storage profile')

        # we need to associate our view of the spec
        # with the profile we are currently configured for
        self.spec.set_spec_profile_view(self.__cur_sprofile)
        
        self.fill_appliance_serial()
	self.get_rebuild_rate()

        # grab the motherboard from hwtool
        self.motherboard    = get_hwtool_motherboard()
        self.phy_mobo       = get_hwtool_phy_motherboard()
        
        # gather all associated physical disk information
        # should we fill this from the spec info, or the available system info
        # split it out as a query?
        self.disk_array = DiskArray()
        num_physical_disks = hwtool_disk_map.get_num_hard_disk()
        if num_physical_disks <= 0:
            # how did we boot?
            raise rrdm_error ('Appliance detects an invalid number' \
                              ' of disks %d.' % num_physical_disks)

        self.disk_array.fill_from_system_info (self.spec)

        self.volumes        = []
        self.raid_arrays    = RaidDevices()
        self.__ftraid_arrays = []
        self.__zone_map     = {}

        # populate a zone map with pointers to physical disks in the zones.
        #
        if self.spec.has_storage_cfg() == False:
            # fall back to the legacy config mode, where the zones
            # describe exported volumes
            for zone in self.spec.get_zone_list():
                self.__legacy_volume_setup(zone)
        else:
            for z in self.spec.get_zone_list():
                dz = DiskZone(z, self.disk_array)
                self.__zone_map[dz.get_name()] = dz
    
            # Storage config/ storage profile describe the set of exported
            # volumes
            lvm_list = self.spec.get_storage_cfg(). \
                             get_logical_volumes(self.__cur_sprofile)

            for lvm in lvm_list:
                if lvm.get_type() == LogicalVolumeConfig.direct_type:
                    self.__lvm_direct_setup(lvm)
                if lvm.get_type() in LogicalVolumeConfig.raid_types:
                    self.__lvm_raid_setup(lvm)
                if lvm.get_type() == LogicalVolumeConfig.ftraid_type:
                    self.__lvm_ftraid_setup(lvm)

        # update drive status based on raid status.
        self.disk_array.update_status_by_zone(self.raid_arrays.get_array_list(),
                                              self.__ftraid_arrays)
예제 #18
0
 def validate(self):
     (valid, msg) = validate_spec(self.spec,
                                  self.disk_array)
     if not valid:
         raise rrdm_error(msg)