def main(): while True: sleep(1800) try: dc = DiskConfig('/config/disk_config.xml') dc_disks = dc.get_disks() except rrdm_error: # Couldnt read/write the config file, retry in the next interval continue hwdl = HwtoolDriveList() map = hwdl.get_drive_list() size_map = get_disk_list() for elem in map: dev = elem[1] dev_name = elem[2] try: sysfs_val = get_current_data_written_to_disk(dev_name) except ValueError: continue except rrdm_error: #Disk no longer in system port = dev[4:] try: del PREV_STATE[port] except KeyError: continue # If the disk size is < MIN_HD_SIZE and hwtool says its a disk # it has to be a virtual disk. None of our SSD's and HD's are # less than MIN_HD_SIZE in size. Skip these disks for wear # level update or else it will mess up the partitions if int(size_map[dev]) < MIN_HD_SIZE: continue try: sb_obj = RvbdSuperBlock('/dev/%sp1' % dev) sb_val = sb_obj.get_sb_kvp('current_data_stored') port_val = sb_obj.get_port() if sb_val: sb_val = int(sb_val) else: # Set it to zero as its used for addition and needs an int value sb_val = 0 # The disk_config.xml file has no idea about this disk, skip it and retry if not dc_disks.has_key(port_val): continue disk_serial = dc_disks[port_val] if PREV_STATE.has_key(port_val): # Check to make sure that the previous disk is the same as the current # Check serial to see if the disk is the same if disk_serial == PREV_STATE[port_val][0]: # Check to make sure that the disk was not removed and replaced during the poll interval # if the value stored in PREV_STATE is > than the new value means the disk was removed for sure # There may be a case that the value in PREV_STATE < new value due to a lot of IO after disk inserted # but there is no way for me to figure that out if PREV_STATE[port_val][1] < sysfs_val: prev_val = PREV_STATE[port_val][1] PREV_STATE[port_val] = (disk_serial, sysfs_val) total = sb_val + sysfs_val - prev_val sb_obj.add_sb_kvp(['current_data_stored', total]) continue # If you reach here, it means new disk OR start of script OR # same disk removed and reinserted in the poll interval PREV_STATE[port_val] = (disk_serial, sysfs_val) sb_obj.add_sb_kvp(['current_data_stored', sysfs_val + sb_val]) except TypeError: # Riverbed superblock missing continue except rrdm_error: # Could not get the current value in the partition # Just retry continue
class HardDisk: def __init__(self): self.type='' self.name='' self.portnum='' self.vendor='' self.bus_num='' self.size='0' self.cfg_size='0' self.status='' self.model='unknown' self.__media = 'unknown' # the partition table we expect is driven by the config. self.part_tbl=None self.__zone = None # the logical offset of a drive within its zone. self.__logical_port = -1 # the logical offset the drive thinks it is within its zone # this info is read from the drive SB, -1 indicates the drive self.__drive_logical_port = -1 self.superblock=None self.disk_led=None # flag to indicate if the disk is managed by someone else (eg. writeimage.sh) self.__managed=False # extended info filled in at request is the serial, model, fw, info self.serialnum='' self.firmware='' # branding info self.licensed=True def __str__(self): if self.__zone: zname = self.__zone.get_name() else: zname = 'unknown' result = '' result += 'disk%d:%d:%d:%s:%s:%s:%s:%s\n' % (self.portnum, self.__logical_port, self.__drive_logical_port, zname, self.model, self.serialnum, self.firmware, self.status) return result def get_media(self): return self.__media def get_zone(self): return self.__zone ## has_volume_by_name # @param name Name of a volume/raid/ftraid array # Return True if a zone contains a given volume def has_volume_by_name(self, name): return self.__zone.has_volume_by_name(name) def get_portnum(self): return self.portnum def get_logical_port(self): return self.__logical_port def get_drive_logical_port(self): return self.__drive_logical_port def get_base_scsi_name(self): return hwtool_disk_map.find_devname_by_port(self.portnum) def get_scsi_state(self): return hwtool_disk_map.find_state_by_port(self.portnum) def get_devname(self): return 'disk%s' % self.portnum def get_drive_id(self): if self.serialnum == '': return 'unknown' else: return self.serialnum def get_license(self): return self.licensed def managed(self): return self.__managed ########################################################################### # Status Routines for Disks # # disk status is one of "online" "failed" "rebuilding" "missing" "degraded" # ########################################################################### def is_online(self): return self.status == 'online' def is_rebuilding(self): return self.status == 'rebuilding' def is_missing(self): return self.status == 'missing' def is_failed(self): return self.status == 'failed' def is_invalid(self): return self.status == 'invalid' def degrade_drive(self): self.status = 'degraded' def fail_drive(self): self.status = 'failed' def invalid_drive(self): self.status = 'invalid' def rebuild_drive(self): self.status = 'rebuilding' ########################################################################### # # A hard disks info is filled in via 2 phases when read from the system. # initially the physical information is read from the system and stored. # then this information is used to build up the raid information, # and finally, the status of the disk is determined by the state of the raids on # the disk itself. # update status chooses the correct status for a disk based on the raid arrays that live # on the disk. # ########################################################################### def update_status_by_zone (self, raid_arrays, ftraid_arrays): # if the disk is not missing, we can look up its info in the raid arrays. # if (self.status == 'online'): for array in raid_arrays: drv_status = array.get_drive_raid_status(self) if drv_status != None: if drv_status in [ 'failed', 'missing' ]: self.degrade_drive() elif drv_status == 'rebuilding': self.rebuild_drive() for ftarr in ftraid_arrays: drv_status = ftarr.get_drive_status(self) if drv_status != None: if drv_status != 'online': self.degrade_drive() # This function reads information off the sysfs and other sources to # fill in disk related params. def fill_disk_params(self, port): found_bus = hwtool_disk_map.find_bus_by_port(port) if found_bus == None: raise rrdm_error ('Unable to determine bus number for port [%s]' % port) licensed = hwtool_dlm.is_licensed('%s' % port) # disk params read from sysfs based on the bus. # if any of these fail, assume the disk has gone missing. # # XXX/munirb: Bug 38786 # Dont set the status to failed as the disk may be missing # Check for that by doing an IO for vendor name which will # fail for missing drives self.type = get_scsi_sysfs_param(found_bus, 'vendor') self.bus_num = found_bus self.name = get_scsi_sysfs_param(found_bus, 'device_name') self.model = get_scsi_sysfs_param(found_bus, 'model') self.size = get_scsi_sysfs_param(found_bus, 'block/size') # XXX/munirb: Bug 38665 # Set the status to online even though the disk is unbranded # If the disk is already being used, no point raising the RAID alarm # Hardware alarm will be triggered via self.licensed # Also clear all disk details as we want to give the impression # that it is a big thing to use that disk self.status="online" if self.model == "Virtual disk": self.serialnum = "" self.firmware = "" self.__media = "disk" else: dinfo = disk_info_map.get_disk_info(self.get_devname()) if dinfo != None and dinfo.have_valid_info(): self.serialnum = dinfo.get_serial() self.firmware = dinfo.get_fw() self.__media = dinfo.get_media() else: self.serialnum = 'unknown' self.firmware = 'unknown' self.__media = 'unknown' if licensed: self.licensed=True else: self.licensed=False # Fill spare info from system and diskinfo def fill_system_spare_info(self, port): self.type='ATA' self.portnum = port try: self.fill_disk_params(self.portnum) except IOError: self.status="missing" self.licensed=False # the part tbl we expect is driven by the configuration # so it is set by its own API. def fill_from_system_info(self, port, zone, part_tbl, mfg_mode): self.type='ATA' self.__zone = zone self.part_tbl = part_tbl self.portnum = port try: # Check if the disk is in a managed zone (for VSH) ? dl = zone.get_layout() if dl.managed_disk() == 1: rlog_debug ('Drive %d is externally managed' % self.portnum) self.__managed = True except rrdm_error: print what pass self.__logical_port = zone.physical_to_zone_logical(self.portnum) if self.__logical_port == None or self.__logical_port < 0: raise rrdm_error ('Invalid drive %d for zone %s' % (self.portnum, zone.get_name())) try: self.fill_disk_params(self.portnum) except IOError: self.status="missing" self.licensed=False if not self.is_missing() and \ not self.__zone.is_media_valid(self.__media): self.invalid_drive() # initialize the LED self.disk_led = DiskLED(port) # if the drive is here & not managed, read the SB if not self.is_missing() and self.managed() == False: self.read_superblock() if mfg_mode == False and self.superblock == None: rlog_notice ('Drive %s does not have a valid superblock' % self.name) self.fail_drive() else: if SystemDiskStatus(self.portnum).is_failed(): self.fail_drive() if self.__zone and self.__zone.get_name() == 'fts': if self.superblock: if self.superblock.get_serial() == spare_serial_num: # Its a spare disk, keeping the debug statement if we add # anything in the future rlog_debug ('Drive %s is a spare disk' % self.name) else: if self.superblock.get_serial() != ApplianceInfo().get_serial() or \ self.__logical_port != self.superblock.get_raid_port(): # out of position drive, this requires force add to # get the system to use the disk self.invalid_drive() def is_valid_media(self): return self.__zone.is_media_valid(self.__media) def has_valid_superblock(self): return self.superblock != None def display_drive_info(self): print '\t----------------------------------------\t' print '\tPhysical Drive %s' % self.portnum print '\t----------------------------------------\t' size_blocks = int (self.size, 10) size_gb = size_blocks * 512 / (1024*1024*1024) print '\tStatus: %s\t\tType: %s' % (self.status, self.get_media()) print '\tProduct: %s\t\tCapacity: %d GB' % (self.model, size_gb) print '\tSerial: %s\t\tFirmware: %s' % (self.serialnum, self.firmware) print '\tLicensed: %s' % self.get_license() print '' ########################################################################### # Disk LED Control Params # ########################################################################### def turn_on_led(self): self.disk_led.set_led_state (True) def turn_off_led(self): self.disk_led.set_led_state (False) def get_led_state(self): return self.disk_led.get_led_state() def read_superblock(self, wait_for_device = False): part = self.part_tbl.find_partition_by_name ('rvbd') if part != None and self.status != 'missing': rvbd_dev = '/dev/%sp%s' % (self.get_devname(), part.part_id) try: self.superblock = RvbdSuperBlock(rvbd_dev, wait_for_device) rlog_debug ('Read SB for %s [%d:r%d]' % (rvbd_dev, self.superblock.get_port(), self.superblock.get_raid_port())) self.__drive_logical_port = self.superblock.get_raid_port() except rrdm_error, error_msg: self.superblock = None else: