예제 #1
0
파일: fscd.py 프로젝트: hanzf00/openbmc
 def __init__(self,
              config=RAMFS_CONFIG,
              zone_config=CONFIG_DIR,
              log_level="warning"):
     Logger.start("fscd", log_level)
     Logger.info("Starting fscd")
     self.zone_config = zone_config
     self.fsc_config = self.get_fsc_config(config)  # json dump from config
     self.boost = self.DEFAULT_BOOST
     self.non_fanfail_limited_boost = None
     self.boost_type = self.DEFAULT_BOOST_TYPE
     self.transitional = self.DEFAULT_TRANSITIONAL
     self.ramp_rate = self.DEFAULT_RAMP_RATE
     self.sensor_fail = None
     self.ssd_progressive_algorithm = None
     self.sensor_valid_check = None
     self.fail_sensor_type = None
     self.fan_dead_boost = None
     self.fan_fail = None
     self.fan_recovery_pending = False
     self.fan_recovery_time = None
     self.fan_limit_upper_pwm = None
     self.fan_limit_lower_pwm = None
     self.sensor_filter_all = False
     self.pwm_sensor_boost_value = None
     self.output_max_boost_pwm = False
예제 #2
0
파일: fscd.py 프로젝트: hanzf00/openbmc
 def get_config_params(self):
     self.transitional = self.fsc_config["pwm_transition_value"]
     self.boost = self.fsc_config["pwm_boost_value"]
     if "fan_limit_upper_pwm" in self.fsc_config:
         self.fan_limit_upper_pwm = self.fsc_config["fan_limit_upper_pwm"]
     if "fan_limit_lower_pwm" in self.fsc_config:
         self.fan_limit_lower_pwm = self.fsc_config["fan_limit_lower_pwm"]
     if "non_fanfail_limited_boost_value" in self.fsc_config:
         self.non_fanfail_limited_boost = self.fsc_config[
             "non_fanfail_limited_boost_value"]
     self.sensor_filter_all = self.fsc_config.get("sensor_filter_all",
                                                  False)
     if "boost" in self.fsc_config and "fan_fail" in self.fsc_config[
             "boost"]:
         self.fan_fail = self.fsc_config["boost"]["fan_fail"]
     if "boost" in self.fsc_config and "progressive" in self.fsc_config[
             "boost"]:
         if self.fsc_config["boost"]["progressive"]:
             self.boost_type = "progressive"
     if "fan_dead_boost" in self.fsc_config:
         self.fan_dead_boost = self.fsc_config["fan_dead_boost"]
         self.all_fan_fail_counter = 0
     if "output_max_boost_pwm" in self.fsc_config:
         self.output_max_boost_pwm = self.fsc_config["output_max_boost_pwm"]
     if "boost" in self.fsc_config and "sensor_fail" in self.fsc_config[
             "boost"]:
         self.sensor_fail = self.fsc_config["boost"]["sensor_fail"]
         if self.sensor_fail:
             if "pwm_sensor_boost_value" in self.fsc_config:
                 self.pwm_sensor_boost_value = self.fsc_config[
                     "pwm_sensor_boost_value"]
             if "fail_sensor_type" in self.fsc_config:
                 self.fail_sensor_type = self.fsc_config["fail_sensor_type"]
             if "ssd_progressive_algorithm" in self.fsc_config:
                 self.ssd_progressive_algorithm = self.fsc_config[
                     "ssd_progressive_algorithm"]
     if "sensor_valid_check" in self.fsc_config:
         self.sensor_valid_check = self.fsc_config["sensor_valid_check"]
     self.watchdog = self.fsc_config["watchdog"]
     if "fanpower" in self.fsc_config:
         self.fanpower = self.fsc_config["fanpower"]
     else:
         self.fanpower = False
     if "chassis_intrusion" in self.fsc_config:
         self.chassis_intrusion = self.fsc_config["chassis_intrusion"]
     else:
         self.chassis_intrusion = False
     if "enable_fsc_sensor_check" in self.fsc_config:
         self.enable_fsc_sensor_check = self.fsc_config[
             "enable_fsc_sensor_check"]
     else:
         self.enable_fsc_sensor_check = False
     if "ramp_rate" in self.fsc_config:
         self.ramp_rate = self.fsc_config["ramp_rate"]
     if self.watchdog:
         Logger.info("watchdog pinging enabled")
         kick_watchdog()
     self.interval = self.fsc_config["sample_interval_ms"] / 1000.0
     if "fan_recovery_time" in self.fsc_config:
         self.fan_recovery_time = self.fsc_config["fan_recovery_time"]
예제 #3
0
    def build_zones(self):
        self.zones = []
        counter = 0
        for name, data in list(self.fsc_config["zones"].items()):
            filename = data["expr_file"]
            with open(os.path.join(self.zone_config, filename), "r") as exf:
                source = exf.read()
                Logger.info("Compiling FSC expression for zone:")
                Logger.info(source)
                (expr, inf) = fsc_expr.make_eval_tree(source, self.profiles)
                for name in inf["ext_vars"]:
                    sdata = name.split(":")
                    board = sdata[0]
                    # sname never used. so comment out (avoid lint error)
                    # sname = sdata[1]
                    if board not in self.machine.frus:
                        self.machine.nums[board] = []
                    self.machine.frus.add(board)
                    if len(sdata) == 3:
                        self.machine.nums[board].append(sdata[2])

                zone = Zone(
                    data["pwm_output"],
                    expr,
                    inf,
                    self.transitional,
                    counter,
                    self.boost,
                    self.sensor_fail,
                    self.sensor_valid_check,
                    self.fail_sensor_type,
                    self.ssd_progressive_algorithm,
                )
                counter += 1
                self.zones.append(zone)
예제 #4
0
파일: fscd.py 프로젝트: khalex7/rikor-aztec
def handle_term(signum, frame):
    global wdfile
    board_callout(callout='init_fans', boost=DEFAULT_INIT_TRANSITIONAL)
    Logger.warn("killed by signal %d" % (signum, ))
    if signum == signal.SIGQUIT and wdfile:
        Logger.info("Killed with SIGQUIT - stopping watchdog.")
        stop_watchdog()
    sys.exit('killed')
예제 #5
0
def board_callout(callout="None", **kwargs):
    if "init_fans" in callout:
        boost = 100
        if "boost" in kwargs:
            boost = kwargs["boost"]
        Logger.info("FSC init fans to boost=%s " % str(boost))
        return yamp_set_all_pwm(boost)
    else:
        Logger.warn("Need to perform callout action %s" % callout)
    pass
예제 #6
0
def host_shutdown():
    MAIN_POWER = "/sys/bus/i2c/drivers/syscpld/12-0031/pwr_main_n"
    USERVER_POWER = "/sys/bus/i2c/drivers/syscpld/12-0031/pwr_usrv_en"

    cmd = 'echo 0 > ' + USERVER_POWER
    Logger.info("host_shutdown() executing {}".format(cmd))
    response = Popen(cmd, shell=True, stdout=PIPE).stdout.read()
    time.sleep(5)
    cmd = 'echo 0 > ' + MAIN_POWER
    Logger.info("host_shutdown() executing {}".format(cmd))
    response = Popen(cmd, shell=True, stdout=PIPE).stdout.read()
    return response
예제 #7
0
파일: fscd.py 프로젝트: hanzf00/openbmc
def stop_watchdog():
    """kick the watchdog device.
    """
    f = subprocess.Popen(WDTCLI_CMD + " stop",
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    info, err = f.communicate()
    if len(err) != 0:
        Logger.error("failed to stop watchdog device")
    else:
        Logger.info("watchdog stopped")
예제 #8
0
def host_shutdown():
    MAIN_POWER = "/sys/bus/i2c/drivers/smbcpld/12-003e/cpld_in_p1220"
    USERVER_POWER = "/sys/bus/i2c/drivers/scmcpld/2-0035/com_exp_pwr_enable"

    cmd = 'echo 0 > ' + USERVER_POWER
    Logger.info("host_shutdown() executing {}".format(cmd))
    response = Popen(cmd, shell=True, stdout=PIPE).stdout.read()
    time.sleep(5)
    cmd = 'echo 0 > ' + MAIN_POWER
    Logger.info("host_shutdown() executing {}".format(cmd))
    response = Popen(cmd, shell=True, stdout=PIPE).stdout.read()
    return response
def board_callout(callout="None", **kwargs):
    """
    Override this method for defining board specific callouts:
    - Exmaple chassis intrusion
    """
    if "init_fans" in callout:
        boost = 100
        if "boost" in kwargs:
            boost = kwargs["boost"]
        Logger.info("FSC init fans to boost=%s " % str(boost))
        return set_all_pwm(boost)
    else:
        Logger.warn("Need to perform callout action %s" % callout)
    pass
예제 #10
0
def board_callout(callout='None', **kwargs):
    '''
    Override this method for defining board specific callouts:
    - Exmaple chassis intrusion
    '''
    if 'init_fans' in callout:
        boost = 100
        if 'boost' in kwargs:
            boost = kwargs['boost']
        Logger.info("FSC init fans to boost=%s " % str(boost))
        return set_all_pwm(boost)
    else:
        Logger.warn("Need to perform callout action %s" % callout)
    pass
예제 #11
0
def board_callout(callout="None", **kwargs):
    """
    Override this method for defining board specific callouts:
    - Exmaple chassis intrusion
    """
    if "read_power" in callout:
        return bmc_read_power()
    elif "init_fans" in callout:
        boost = 100  # define a boost for the platform or respect fscd override
        if "boost" in kwargs:
            boost = kwargs["boost"]
        Logger.info("FSC init fans to boost=%s " % str(boost))
        return set_all_pwm(boost)
    pass
예제 #12
0
def board_callout(callout='None', **kwargs):
    '''
    Override this method for defining board specific callouts:
    - Exmaple chassis intrusion
    '''
    if 'read_power' in callout:
        return bmc_read_power()
    elif 'init_fans' in callout:
        boost = 100  # define a boost for the platform or respect fscd override
        if 'boost' in kwargs:
            boost = kwargs['boost']
        Logger.info("FSC init fans to boost=%s " % str(boost))
        return set_all_pwm(boost)
    pass
예제 #13
0
파일: fscd.py 프로젝트: khalex7/rikor-aztec
    def update_dead_fans(self, dead_fans):
        '''
        Check for dead and recovered fans
        '''
        last_dead_fans = dead_fans.copy()
        speeds = self.machine.read_fans(self.fans)
        print("\x1b[2J\x1b[H")
        sys.stdout.flush()

        for fan, rpms in list(speeds.items()):
            Logger.info("%s speed: %d RPM" % (fan.label, rpms))
            if rpms < self.fsc_config['min_rpm']:
                dead_fans.add(fan)
                self.fsc_fan_action(fan, action='dead')
            else:
                dead_fans.discard(fan)

        recovered_fans = last_dead_fans - dead_fans
        newly_dead_fans = dead_fans - last_dead_fans
        if len(newly_dead_fans) > 0:
            if self.fanpower:
                Logger.warn("%d fans failed" % (len(dead_fans), ))
            else:
                Logger.crit("%d fans failed" % (len(dead_fans), ))
            for dead_fan in dead_fans:
                if self.fanpower:
                    Logger.warn("%s dead, %d RPM" %
                                (dead_fan.label, speeds[dead_fan]))
                else:
                    Logger.crit("%s dead, %d RPM" %
                                (dead_fan.label, speeds[dead_fan]))
                Logger.usbdbg("%s fail" % (dead_fan.label))
                fan_fail_record_path = FAN_FAIL_RECORD_DIR + '%s' % (
                    dead_fan.label)
                if not os.path.isfile(fan_fail_record_path):
                    fan_fail_record = open(fan_fail_record_path, 'w')
                    fan_fail_record.close()
        for fan in recovered_fans:
            if self.fanpower:
                Logger.warn("%s has recovered" % (fan.label, ))
            else:
                Logger.crit("%s has recovered" % (fan.label, ))
            Logger.usbdbg("%s recovered" % (fan.label))
            self.fsc_fan_action(fan, action='recover')
            fan_fail_record_path = FAN_FAIL_RECORD_DIR + '%s' % (fan.label)
            if os.path.isfile(fan_fail_record_path):
                os.remove(fan_fail_record_path)
        return dead_fans
예제 #14
0
 def __init__(self, config=RAMFS_CONFIG, zone_config=CONFIG_DIR,
              log_level='warning'):
     Logger.start("fscd", log_level)
     Logger.info("Starting fscd")
     self.zone_config = zone_config
     self.fsc_config = self.get_fsc_config(config)  # json dump from config
     self.boost = self.DEFAULT_BOOST
     self.boost_type = self.DEFAULT_BOOST_TYPE
     self.transitional = self.DEFAULT_TRANSITIONAL
     self.ramp_rate = self.DEFAULT_RAMP_RATE
     self.sensor_fail = None
     self.ssd_progressive_algorithm = None
     self.sensor_valid_check = None
     self.fail_sensor_type = None
     self.fan_dead_boost = None
     self.fan_fail = None
예제 #15
0
파일: fscd.py 프로젝트: hanzf00/openbmc
    def run(self):
        """
        Main FSCD method that builds from the fscd config and runs
        """

        # Get everything from json and build profiles, fans, zones
        self.builder()

        self.fail_record_dir()

        self.machine.set_all_pwm(self.fans, self.transitional)
        self.fsc_set_all_fan_led(color="led_blue")

        mode = fan_mode["trans_mode"]
        self.zones[0].get_set_fan_mode(mode, action="write")

        last = time.time()
        dead_fans = set()

        if self.fanpower:
            time.sleep(30)

        while True:
            if self.watchdog:
                kick_watchdog()

            time.sleep(self.interval)

            if self.fanpower:
                if not self.get_fan_power_status():
                    self.fan_recovery_pending = True
                    continue
            if self.fan_fail:
                if self.fan_recovery_pending and self.fan_recovery_time != None:
                    # Accelerating, wait for a while
                    time.sleep(self.fan_recovery_time)
                    self.fan_recovery_pending = False
                # Get dead fans for determining speed
                dead_fans = self.update_dead_fans(dead_fans)

            now = time.time()
            time_difference = now - last
            last = now
            Logger.info("time_difference: %f" % (time_difference))

            # Check sensors and update zones
            self.update_zones(dead_fans, time_difference)
예제 #16
0
def board_callout(callout="None", **kwargs):
    """
    Override this method for defining board specific callouts:
    - Exmaple chassis intrusion
    """
    if "init_fans" in callout:
        boost = 100
        if "boost" in kwargs:
            boost = kwargs["boost"]
        Logger.info("FSC init fans to boost=%s " % str(boost))
        return set_all_pwm(boost)
    elif "chassis_intrusion" in callout:
        # fan present
        cmd = "presence_util.sh fan"
        lines = Popen(cmd, shell=True, stdout=PIPE).stdout.read().decode()
        fan_presence = 0
        psu_presence = 0
        tray_pull_out = 0
        for line in lines.split("\n"):
            m = re.match(r"fan.*\s:\s+(\d+)", line)
            if m is not None:
                if int(m.group(1)) == 1:
                    fan_presence += 1

        # psu present
        cmd = "presence_util.sh psu"
        lines = Popen(cmd, shell=True, stdout=PIPE).stdout.read().decode()
        for line in lines.split("\n"):
            m = re.match(r"psu.*\s:\s+(\d+)", line)
            if m is not None:
                if int(m.group(1)) == 1:
                    psu_presence += 1

        if fan_presence < 4:
            Logger.warn("chassis_intrusion Found Fan absent (%d/4)" %
                        (fan_presence))
            tray_pull_out = 1
        if psu_presence < 2:
            Logger.warn("chassis_intrusion Found PSU absent (%d/2)" %
                        (psu_presence))
        return tray_pull_out
    else:
        Logger.warn("Need to perform callout action %s" % callout)
    pass
예제 #17
0
    def build_zones(self):
        self.zones = []
        counter = 0
        for name, data in self.fsc_config['zones'].items():
            filename = data['expr_file']
            with open(os.path.join(self.zone_config, filename), 'r') as exf:
                source = exf.read()
                Logger.info("Compiling FSC expression for zone:")
                Logger.info(source)
                (expr, inf) = fsc_expr.make_eval_tree(source, self.profiles)
                for name in inf['ext_vars']:
                    board, sname = name.split(':')
                    self.machine.frus.add(board)

                zone = Zone(data['pwm_output'], expr, inf, self.transitional,
                            counter, self.boost, self.fail_sensor_type,
                            self.ssd_progressive_algorithm)
                counter += 1
                self.zones.append(zone)
예제 #18
0
    def get_config_params(self):
        self.transitional = self.fsc_config['pwm_transition_value']
        self.boost = self.fsc_config['pwm_boost_value']
        if 'boost' in self.fsc_config and 'fan_fail' in self.fsc_config[
                'boost']:
            self.fan_fail = self.fsc_config['boost']['fan_fail']
        if 'boost' in self.fsc_config and 'progressive' in self.fsc_config[
                'boost']:
            if self.fsc_config['boost']['progressive']:
                self.boost_type = 'progressive'
            if 'fan_dead_boost' in self.fsc_config:
                self.fan_dead_boost = self.fsc_config['fan_dead_boost']
        if 'boost' in self.fsc_config and 'sensor_fail' in self.fsc_config[
                'boost']:
            if self.fsc_config['boost']['sensor_fail']:
                if 'fail_sensor_type' in self.fsc_config:
                    self.fail_sensor_type = self.fsc_config['fail_sensor_type']
                if 'ssd_progressive_algorithm' in self.fsc_config:
                    self.ssd_progressive_algorithm = self.fsc_config[
                        'ssd_progressive_algorithm']

        self.watchdog = self.fsc_config['watchdog']
        if 'fanpower' in self.fsc_config:
            self.fanpower = self.fsc_config['fanpower']
        else:
            self.fanpower = False
        if 'chassis_intrusion' in self.fsc_config:
            self.chassis_intrusion = self.fsc_config['chassis_intrusion']
        else:
            self.chassis_intrusion = False
        if 'ramp_rate' in self.fsc_config:
            self.ramp_rate = self.fsc_config['ramp_rate']
        self.wdfile = None
        if self.watchdog:
            Logger.info("watchdog pinging enabled")
            self.wdfile = open('/dev/watchdog', 'w+')
            if not self.wdfile:
                Logger.error("couldn't open watchdog device")
            else:
                self.wdfile.write('V')
                self.wdfile.flush()
        self.interval = self.fsc_config['sample_interval_ms'] / 1000.0
예제 #19
0
파일: fscd.py 프로젝트: khalex7/rikor-aztec
 def get_config_params(self):
     self.transitional = self.fsc_config['pwm_transition_value']
     self.boost = self.fsc_config['pwm_boost_value']
     if 'boost' in self.fsc_config and 'fan_fail' in self.fsc_config[
             'boost']:
         self.fan_fail = self.fsc_config['boost']['fan_fail']
     if 'boost' in self.fsc_config and 'progressive' in self.fsc_config[
             'boost']:
         if self.fsc_config['boost']['progressive']:
             self.boost_type = 'progressive'
     if 'fan_dead_boost' in self.fsc_config:
         self.fan_dead_boost = self.fsc_config['fan_dead_boost']
         self.all_fan_fail_counter = 0
     if 'boost' in self.fsc_config and 'sensor_fail' in self.fsc_config[
             'boost']:
         self.sensor_fail = self.fsc_config['boost']['sensor_fail']
         if self.sensor_fail:
             if 'fail_sensor_type' in self.fsc_config:
                 self.fail_sensor_type = self.fsc_config['fail_sensor_type']
             if 'ssd_progressive_algorithm' in self.fsc_config:
                 self.ssd_progressive_algorithm = self.fsc_config[
                     'ssd_progressive_algorithm']
     if 'sensor_valid_check' in self.fsc_config:
         self.sensor_valid_check = self.fsc_config['sensor_valid_check']
     self.watchdog = self.fsc_config['watchdog']
     if 'fanpower' in self.fsc_config:
         self.fanpower = self.fsc_config['fanpower']
     else:
         self.fanpower = False
     if 'chassis_intrusion' in self.fsc_config:
         self.chassis_intrusion = self.fsc_config['chassis_intrusion']
     else:
         self.chassis_intrusion = False
     if 'ramp_rate' in self.fsc_config:
         self.ramp_rate = self.fsc_config['ramp_rate']
     if self.watchdog:
         Logger.info("watchdog pinging enabled")
         kick_watchdog()
     self.interval = self.fsc_config['sample_interval_ms'] / 1000.0
     if 'fan_recovery_time' in self.fsc_config:
         self.fan_recovery_time = self.fsc_config['fan_recovery_time']
예제 #20
0
    def update_dead_fans(self, dead_fans):
        '''
        Check for dead and recovered fans
        '''
        last_dead_fans = dead_fans.copy()
        speeds = self.machine.read_fans(self.fans)
        print("\x1b[2J\x1b[H")
        sys.stdout.flush()

        for fan, rpms in speeds.items():
            Logger.info("%s speed: %d RPM" % (fan.label, rpms))
            if rpms < self.fsc_config['min_rpm']:
                dead_fans.add(fan)
                self.fsc_fan_action(fan, action='dead')
            else:
                dead_fans.discard(fan)

        recovered_fans = last_dead_fans - dead_fans
        newly_dead_fans = dead_fans - last_dead_fans
        if len(newly_dead_fans) > 0:
            if self.fanpower:
                Logger.warn("%d fans failed" % (len(dead_fans), ))
            else:
                Logger.crit("%d fans failed" % (len(dead_fans), ))
            for dead_fan in dead_fans:
                if self.fanpower:
                    Logger.warn("%s dead, %d RPM" %
                                (dead_fan.label, speeds[dead_fan]))
                else:
                    Logger.crit("%s dead, %d RPM" %
                                (dead_fan.label, speeds[dead_fan]))
                Logger.usbdbg("%s fail" % (dead_fan.label))
        for fan in recovered_fans:
            if self.fanpower:
                Logger.warn("%s has recovered" % (fan.label, ))
            else:
                Logger.crit("%s has recovered" % (fan.label, ))
            Logger.usbdbg("%s recovered" % (fan.label))
            self.fsc_fan_action(fan, action='recover')
        return dead_fans
예제 #21
0
def host_shutdown():
    SCM_POWER_COMMAND = "/usr/local/bin/wdtcli kick &> /dev/null; /usr/local/bin/wedge_power.sh off"
    TH_SWITCH_POWER_COMMAND = "source /usr/local/bin/openbmc-utils.sh; echo 0 > $SMBCPLD_SYSFS_DIR/th3_turn_on"
    GB_SWITCH_POWER_COMMAND = "source /usr/local/bin/openbmc-utils.sh; echo 0 > $SMBCPLD_SYSFS_DIR/gb_turn_on"
    switch_poweroff_cmd = ""
    brd_type = pal_get_board_type()
    if brd_type == "Wedge400":
        switch_poweroff_cmd = TH_SWITCH_POWER_COMMAND
    elif brd_type == "Wedge400C":
        switch_poweroff_cmd = GB_SWITCH_POWER_COMMAND
    else:
        Logger.crit("Cannot identify board type: %s" % brd_type)
        Logger.crit("Switch won't be resetting!")

    Logger.info("host_shutdown() executing {}".format(SCM_POWER_COMMAND))
    response = Popen(SCM_POWER_COMMAND, shell=True, stdout=PIPE).stdout.read()
    time.sleep(5)
    if switch_poweroff_cmd != "":
        Logger.info("host_shutdown() executing {}".format(switch_poweroff_cmd))
        response = Popen(switch_poweroff_cmd, shell=True,
                         stdout=PIPE).stdout.read()

    return response
예제 #22
0
def yamp_host_shutdown():
    # Do the best effort by :
    # 1. Turn off CPU
    # 2. Then turn off SCD
    # 3. Then turn off all PSU
    # We do this because, if any one of CPLD/FPGA is already
    # malfunctioning, we still want to turn off as much part of
    # the system as possible.
    SCD_POWER_REG = "/sys/bus/i2c/drivers/scdcpld/4-0023/scd_power_en"
    CPU_OFF = "/usr/local/bin/wedge_power.sh off"
    SCD_OFF = "echo 0 > " + SCD_POWER_REG
    # First, turn off most of the switch board
    Logger.info("host_shutdown() executing {}".format(SCD_OFF))
    yamp_force_run_cmd(SCD_OFF)
    time.sleep(3)
    # Then, turn off X86 CPU
    Logger.info("host_shutdown() executing {}".format(CPU_OFF))
    yamp_force_run_cmd(CPU_OFF)

    # Until FSCD is proven to be very stable on most versions of FSCD,
    # we will only turn off SCD and BMC, but not PSUs.
    # (When PSUs are all turned off, it's hard to recover in DC)

    return 0
예제 #23
0
 def builder(self):
     '''
     Method to extract from json and build all internal data staructures
     '''
     # Build a bmc machine object - read/write sensors
     self.build_machine()
     # Extract everything from json
     self.get_config_params()
     self.build_fans()
     self.build_profiles()
     Logger.info("Available profiles: " + ", ".join(list(self.profiles.keys())))
     self.build_zones()
     Logger.info("Read %d zones" % (len(self.zones)))
     Logger.info("Including sensors from: " + ", ".join(self.machine.frus))
예제 #24
0
    def run(self, sensors, dt):
        ctx = {'dt': dt}
        outmin = 0
        fail_ssd_count = 0
        missing = set()

        for v in self.expr_meta['ext_vars']:
            board, sname = v.split(":")
            if sname in sensors[board]:
                sensor = sensors[board][sname]
                ctx[v] = sensor.value
                if sensor.status in ['ucr']:
                    Logger.warn('Sensor %s reporting status %s' %
                                (sensor.name, sensor.status))
                    outmin = self.transitional

                if self.fail_sensor_type != None:
                    if 'standby_sensor_fail' in self.fail_sensor_type.keys():
                        if self.fail_sensor_type[
                                'standby_sensor_fail'] == True:
                            if sensor.status in ['na']:
                                if re.match(r'SOC', sensor.name) != None:
                                    if 'server_sensor_fail' in self.fail_sensor_type.keys(
                                    ):
                                        if self.fail_sensor_type[
                                                'server_sensor_fail'] == True:
                                            ret = fsc_board.get_power_status(
                                                board)
                                            if ret:
                                                Logger.debug(
                                                    "Server Sensor Fail")
                                                outmin = self.boost
                                                break
                                elif re.match(r'SSD', sensor.name) != None:
                                    if 'SSD_sensor_fail' in self.fail_sensor_type.keys(
                                    ):
                                        if self.fail_sensor_type[
                                                'SSD_sensor_fail'] == True:
                                            fail_ssd_count = fail_ssd_count + 1
                                else:
                                    Logger.debug("Standby Sensor Fail")
                                    outmin = self.boost
                                    break
            else:
                missing.add(v)
                # evaluation tries to ignore the effects of None values
                # (e.g. acts as 0 in max/+)
                ctx[v] = None
        if missing:
            Logger.warn('Missing sensors: %s' % (', '.join(missing), ))
        if verbose:
            (exprout, dxstr) = self.expr.dbgeval(ctx)
            Logger.info(dxstr + " = " + str(exprout))
        else:
            exprout = self.expr.eval(ctx)
            Logger.info(self.expr_str + " = " + str(exprout))
        # If *all* sensors in the top level max() report None, the
        # expression will report None
        if not exprout:
            if not self.transitional_assert_flag:
                Logger.crit('ASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed.' % (self.counter))
            exprout = self.transitional
            self.transitional_assert_flag = True
        else:
            if self.transitional_assert_flag:
                Logger.crit('DEASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed.' % (self.counter))
            self.transitional_assert_flag = False

        if self.fail_sensor_type != None:
            if 'SSD_sensor_fail' in self.fail_sensor_type.keys():
                if self.fail_sensor_type['SSD_sensor_fail'] == True:
                    if fail_ssd_count != 0:
                        if self.ssd_progressive_algorithm != None:
                            if 'offset_algorithm' in self.ssd_progressive_algorithm.keys(
                            ):
                                list_index = 0
                                for i in self.ssd_progressive_algorithm[
                                        'offset_algorithm']:
                                    list_index = list_index + 1
                                    if fail_ssd_count <= i[0]:
                                        exprout = exprout + i[1]
                                        break
                                    else:
                                        if list_index == len(
                                                self.ssd_progressive_algorithm[
                                                    'offset_algorithm']):
                                            outmin = self.boost

        if exprout < outmin:
            exprout = outmin
        exprout = clamp(exprout, 0, 100)
        return exprout
예제 #25
0
 def get_fsc_config(self, fsc_config):
     if os.path.isfile(fsc_config):
         Logger.info("Started, reading configuration from %s" %
                     (fsc_config))
         with open(fsc_config, 'r') as f:
             return json.load(f)
예제 #26
0
    def update_zones(self, dead_fans, time_difference):
        """
            TODO: Need to change logic here.

            # Platforms with chassis_intrusion mode enabled
            if chassis_intrusion:
                set the chassis_intrusion_boost_flag to 0
                and then do necessary checks to set flag to 1
                if chassis_intrusion_boost_flag:
                    run boost mode
                else:
                    run normal mode
            else
                # Platforms WITHOUT chassis_intrusion mode
                run normal mode

        """
        sensors_tuples = self.machine.read_sensors(self.sensors)
        self.fsc_safe_guards(sensors_tuples)
        for zone in self.zones:
            Logger.info("PWM: %s" % (json.dumps(zone.pwm_output)))

            chassis_intrusion_boost_flag = 0
            if self.chassis_intrusion:
                self_tray_pull_out = board_callout(
                    callout='chassis_intrusion')
                if self_tray_pull_out == 1:
                    chassis_intrusion_boost_flag = 1

            if chassis_intrusion_boost_flag == 0:
                pwmval = zone.run(sensors=sensors_tuples, dt=time_difference)
            else:
                pwmval = self.boost

            if self.fan_fail:
                if self.boost_type == 'progressive' and self.fan_dead_boost:
                    # Cases where we want to progressively bump PWMs
                    dead = len(dead_fans)
                    if dead > 0:
                        Logger.info("Progressive mode: Failed fans: %s" %
                              (', '.join([str(i.label) for i in dead_fans],)))
                        for fan_count, rate in self.fan_dead_boost["data"]:
                            if dead <= fan_count:
                                pwmval = clamp(pwmval + (dead * rate), 0, 100)
                                break
                        else:
                            pwmval = self.boost
                else:
                    if dead_fans:
                        # If not progressive ,when there is 1 fan failed, boost all fans
                        Logger.info("Failed fans: %s" % (
                            ', '.join([str(i.label) for i in dead_fans],)))
                        pwmval = self.boost
                    if self.fan_dead_boost:
                        # If all the fans failed take action after a few cycles
                        if len(dead_fans) == len(self.fans):
                            self.all_fan_fail_counter = self.all_fan_fail_counter + 1
                            Logger.warn("Currently all fans failed for {} cycles".format(self.all_fan_fail_counter))
                            if self.fan_dead_boost["threshold"] and self.fan_dead_boost["action"]:
                                if self.all_fan_fail_counter >= self.fan_dead_boost["threshold"]:
                                    self.fsc_host_action(
                                        action=self.fan_dead_boost["action"],
                                        cause="All fans are bad for more than "
                                              + str(self.fan_dead_boost["threshold"])
                                              + " cycles"
                                        )
                        else:
                            # If atleast 1 fan is working reset the counter
                            self.all_fan_fail_counter = 0

            if abs(zone.last_pwm - pwmval) > self.ramp_rate:
                if pwmval < zone.last_pwm:
                    pwmval = zone.last_pwm - self.ramp_rate
                else:
                    pwmval = zone.last_pwm + self.ramp_rate
            zone.last_pwm = pwmval

            if hasattr(zone.pwm_output, '__iter__'):
                for output in zone.pwm_output:
                    self.machine.set_pwm(self.fans.get(
                        str(output)), pwmval)
            else:
                self.machine.set_pwm(self.fans[zone.pwm_output], pwmval)
예제 #27
0
    def run(self, sensors, dt):
        ctx = {"dt": dt}
        outmin = 0
        fail_ssd_count = 0
        sensor_index = 0
        cause_boost_count = 0
        no_sane_flag = 0
        mode = 0

        for v in self.expr_meta["ext_vars"]:
            sensor_valid_flag = 1
            sdata = v.split(":")
            board = sdata[0]
            sname = sdata[1]
            if self.sensor_valid_check != None:
                for check_name in self.sensor_valid_check:
                    if re.match(check_name, sname, re.IGNORECASE) != None:
                        self.sensor_valid_cur[
                            sensor_index] = fsc_board.sensor_valid_check(
                                board,
                                sname,
                                check_name,
                                self.sensor_valid_check[check_name]
                                ["attribute"],
                            )
                        # If current or previous sensor valid status is 0, ignore this sensor reading.
                        # Only when both are 1, goes to sensor check process
                        if (self.sensor_valid_cur[sensor_index]
                                == 0) or (self.sensor_valid_pre[sensor_index]
                                          == 0):
                            sensor_valid_flag = 0
                            self.missing_sensor_assert_retry[sensor_index] = 0
                        break

            if sensor_valid_flag == 1:
                if sname in sensors[board]:
                    self.missing_sensor_assert_retry[sensor_index] = 0
                    if self.missing_sensor_assert_flag[sensor_index]:
                        Logger.crit("DEASSERT: Zone%d Missing sensors: %s" %
                                    (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = False

                    sensor = sensors[board][sname]
                    ctx[v] = sensor.value
                    if sensor.status in ["ucr"]:
                        Logger.warn("Sensor %s reporting status %s" %
                                    (sensor.name, sensor.status))
                        outmin = max(outmin, self.transitional)
                        if outmin == self.transitional:
                            mode = fan_mode["trans_mode"]
                    else:
                        if self.sensor_fail == True:
                            sensor_fail_record_path = SENSOR_FAIL_RECORD_DIR + v
                            if not os.path.isdir(SENSOR_FAIL_RECORD_DIR):
                                os.mkdir(SENSOR_FAIL_RECORD_DIR)
                            if (sensor.status in [
                                    "na"
                            ]) and (self.sensor_valid_cur[sensor_index] != -1):
                                if re.match(r"SSD", sensor.name) != None:
                                    fail_ssd_count = fail_ssd_count + 1
                                else:
                                    Logger.warn("%s Fail" % v)
                                    outmin = max(outmin, self.boost)
                                    cause_boost_count += 1
                                if not os.path.isfile(sensor_fail_record_path):
                                    sensor_fail_record = open(
                                        sensor_fail_record_path, "w")
                                    sensor_fail_record.close()
                                if outmin == self.boost:
                                    mode = fan_mode["boost_mode"]
                            else:
                                if os.path.isfile(sensor_fail_record_path):
                                    os.remove(sensor_fail_record_path)
                else:
                    if (not self.missing_sensor_assert_flag[sensor_index]
                        ) and (self.missing_sensor_assert_retry[sensor_index]
                               >= 2):
                        Logger.crit("ASSERT: Zone%d Missing sensors: %s" %
                                    (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = True
                    if self.missing_sensor_assert_retry[sensor_index] < 2:
                        self.missing_sensor_assert_retry[sensor_index] += 1
                    # evaluation tries to ignore the effects of None values
                    # (e.g. acts as 0 in max/+)
                    ctx[v] = None
            self.sensor_valid_pre[sensor_index] = self.sensor_valid_cur[
                sensor_index]
            sensor_index += 1

        if verbose:
            (exprout, dxstr) = self.expr.dbgeval(ctx)
            Logger.info(dxstr + " = " + str(exprout))
        else:
            exprout = self.expr.eval(ctx)
            Logger.info(self.expr_str + " = " + str(exprout))
        # If *all* sensors in the top level max() report None, the
        # expression will report None
        if (not exprout) and (outmin == 0):
            if not self.transitional_assert_flag:
                Logger.crit("ASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed." % (self.counter))
            exprout = self.transitional
            mode = fan_mode["trans_mode"]
            no_sane_flag = 1
            self.transitional_assert_flag = True
        else:
            if self.transitional_assert_flag:
                Logger.crit("DEASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed." % (self.counter))
            self.transitional_assert_flag = False

        if self.fail_sensor_type != None:
            if "SSD_sensor_fail" in list(self.fail_sensor_type.keys()):
                if self.fail_sensor_type["SSD_sensor_fail"] == True:
                    if fail_ssd_count != 0:
                        if self.ssd_progressive_algorithm != None:
                            if "offset_algorithm" in list(
                                    self.ssd_progressive_algorithm.keys()):
                                list_index = 0
                                for i in self.ssd_progressive_algorithm[
                                        "offset_algorithm"]:
                                    list_index = list_index + 1
                                    if fail_ssd_count <= i[0]:
                                        exprout = exprout + i[1]
                                        no_sane_flag = 0
                                        break
                                    else:
                                        if list_index == len(
                                                self.ssd_progressive_algorithm[
                                                    "offset_algorithm"]):
                                            outmin = max(outmin, self.boost)
                                            cause_boost_count += 1
                                            if outmin == self.boost:
                                                mode = fan_mode["boost_mode"]

        boost_record_path = RECORD_DIR + "sensor_fail_boost"
        if cause_boost_count != 0:
            if not os.path.isfile(boost_record_path):
                sensor_fail_boost_record = open(boost_record_path, "w")
                sensor_fail_boost_record.close()
        else:
            if os.path.isfile(boost_record_path):
                os.remove(boost_record_path)

        if not exprout:
            exprout = 0
        if exprout < outmin:
            exprout = outmin
        else:
            if no_sane_flag != 1:
                mode = fan_mode["normal_mode"]
        self.get_set_fan_mode(mode, action="write")
        exprout = clamp(exprout, 0, 100)
        return exprout
예제 #28
0
파일: fscd.py 프로젝트: hanzf00/openbmc
    def update_zones(self, dead_fans, time_difference):
        """
            TODO: Need to change logic here.

            # Platforms with chassis_intrusion mode enabled
            if chassis_intrusion:
                set the chassis_intrusion_boost_flag to 0
                and then do necessary checks to set flag to 1
                if chassis_intrusion_boost_flag:
                    run boost mode
                else:
                    run normal mode
            else
                # Platforms WITHOUT chassis_intrusion mode
                run normal mode

            # Platforms with enable_fsc_sensor_check mode enabled
            if enable_fsc_sensor_check:
                set the sensor_violated_flag to 0
                and then do necessary checks to set flag to 1
                if sensor_violated_flag:
                    run boost mode
                else:
                    run normal mode
            else
                # Platforms WITHOUT enable_fsc_sensor_check mode
                run normal mode

        """
        ctx = {}
        if not self.sensor_filter_all:
            sensors_tuples = self.machine.read_sensors(self.sensors, None)
            self.fsc_safe_guards(sensors_tuples)
        for zone in self.zones:
            if self.sensor_filter_all:
                sensors_tuples = self.machine.read_sensors(
                    self.sensors, zone.expr_meta)
                self.fsc_safe_guards(sensors_tuples)
            Logger.info("PWM: %s" % (json.dumps(zone.pwm_output)))
            mode = 0
            chassis_intrusion_boost_flag = 0
            sensor_violated_flag = 0
            if self.chassis_intrusion:
                self_tray_pull_out = board_callout(callout="chassis_intrusion")
                if self_tray_pull_out == 1:
                    chassis_intrusion_boost_flag = 1
            if self.enable_fsc_sensor_check:
                Logger.info("enable_fsc_sensor_check")
                if self.fsc_sensor_check(sensors_tuples) != 0:
                    sensor_violated_flag = 1
            Logger.debug(" dead_fans(%d) " % len(dead_fans))
            Logger.debug("Calculate")

            if chassis_intrusion_boost_flag == 0 and sensor_violated_flag == 0:
                ctx["dt"] = time_difference
                ctx["dead_fans"] = dead_fans
                ctx["last_pwm"] = zone.last_pwm
                ignore_fan_mode = False
                if self.non_fanfail_limited_boost and dead_fans:
                    ignore_fan_mode = True
                pwmval = zone.run(sensors=sensors_tuples,
                                  ctx=ctx,
                                  ignore_mode=ignore_fan_mode)
                mode = zone.get_set_fan_mode(mode, action="read")
                # if we set pwm_sensor_boost_value option, assign it to pwmval
                if self.pwm_sensor_boost_value != None and \
                    int(mode) == fan_mode["boost_mode"]:
                    if pwmval == self.boost:
                        pwmval = self.pwm_sensor_boost_value
            else:
                pwmval = self.boost
                mode = fan_mode["boost_mode"]

            if self.fan_fail:
                boost_record_path = RECORD_DIR + "fan_fail_boost"
                if self.boost_type == "progressive" and self.fan_dead_boost:
                    # Cases where we want to progressively bump PWMs
                    dead = len(dead_fans)
                    if dead > 0:
                        Logger.info(
                            "Progressive mode: Failed fans: %s" %
                            (", ".join([str(i.label) for i in dead_fans])))
                        for fan_count, rate in self.fan_dead_boost["data"]:
                            if dead <= fan_count:
                                pwmval = clamp(pwmval + (dead * rate), 0, 100)
                                mode = fan_mode["normal_mode"]
                                if os.path.isfile(boost_record_path):
                                    os.remove(boost_record_path)
                                break
                        else:
                            pwmval = self.boost
                            mode = fan_mode["boost_mode"]
                            if not os.path.isfile(boost_record_path):
                                fan_fail_boost_record = open(
                                    boost_record_path, "w")
                                fan_fail_boost_record.close()
                    else:
                        if os.path.isfile(boost_record_path):
                            os.remove(boost_record_path)
                else:
                    if dead_fans:
                        # If not progressive ,when there is 1 fan failed, boost all fans
                        Logger.info(
                            "Failed fans: %s" %
                            (", ".join([str(i.label) for i in dead_fans])))
                        # choose the higher PWM
                        if self.output_max_boost_pwm:
                            pwmval = self.boost if pwmval < self.boost else pwmval
                        else:
                            pwmval = self.boost
                        mode = fan_mode["boost_mode"]
                        if not os.path.isfile(boost_record_path):
                            fan_fail_boost_record = open(
                                boost_record_path, "w")
                            fan_fail_boost_record.close()
                    else:
                        if os.path.isfile(boost_record_path):
                            os.remove(boost_record_path)
                    if self.fan_dead_boost:
                        # If all the fans failed take action after a few cycles
                        if len(dead_fans) == len(self.fans):
                            self.all_fan_fail_counter = self.all_fan_fail_counter + 1
                            Logger.warn(
                                "Currently all fans failed for {} cycles".
                                format(self.all_fan_fail_counter))
                            if (self.fan_dead_boost["threshold"]
                                    and self.fan_dead_boost["action"]):
                                if (self.all_fan_fail_counter >=
                                        self.fan_dead_boost["threshold"]):
                                    self.fsc_host_action(
                                        action=self.fan_dead_boost["action"],
                                        cause="All fans are bad for more than "
                                        +
                                        str(self.fan_dead_boost["threshold"]) +
                                        " cycles",
                                    )
                        else:
                            # If atleast 1 fan is working reset the counter
                            self.all_fan_fail_counter = 0

            if self.fan_limit_upper_pwm:
                if pwmval > self.fan_limit_upper_pwm:
                    pwmval = self.fan_limit_upper_pwm

            if self.fan_limit_lower_pwm:
                if pwmval < self.fan_limit_lower_pwm:
                    pwmval = self.fan_limit_lower_pwm

            # if no fan fail, the max of pwm is non_fanfail_limited_boost pwm:
            if self.non_fanfail_limited_boost and not dead_fans:
                pwmval = clamp(pwmval, 0, self.non_fanfail_limited_boost)

            if abs(zone.last_pwm - pwmval) > self.ramp_rate:
                if pwmval < zone.last_pwm:
                    pwmval = zone.last_pwm - self.ramp_rate
                else:
                    pwmval = zone.last_pwm + self.ramp_rate
            zone.last_pwm = pwmval

            if hasattr(zone.pwm_output, "__iter__"):
                for output in zone.pwm_output:
                    self.machine.set_pwm(self.fans.get(str(output)), pwmval)
            else:
                self.machine.set_pwm(self.fans[zone.pwm_output], pwmval)

            zone.get_set_fan_mode(mode, action="write")
예제 #29
0
파일: fsc_zone.py 프로젝트: hanzf00/openbmc
    def run(self, sensors, ctx, ignore_mode):
        outmin = 0
        fail_ssd_count = 0
        valid_m2_count = 0
        sensor_index = 0
        cause_boost_count = 0
        no_sane_flag = 0
        display_progressive_flag = 0
        mode = 0

        for v in self.expr_meta["ext_vars"]:
            sensor_valid_flag = 1
            sdata = v.split(":")
            board = sdata[0]
            sname = sdata[1]
            if self.sensor_valid_check != None:
                for check_name in self.sensor_valid_check:
                    if re.match(check_name, sname, re.IGNORECASE) != None:
                        self.sensor_valid_cur[
                            sensor_index] = fsc_board.sensor_valid_check(
                                board,
                                sname,
                                check_name,
                                self.sensor_valid_check[check_name]
                                ["attribute"],
                            )
                        # If current or previous sensor valid status is 0, ignore this sensor reading.
                        # Only when both are 1, goes to sensor check process
                        if (self.sensor_valid_cur[sensor_index]
                                == 0) or (self.sensor_valid_pre[sensor_index]
                                          == 0):
                            sensor_valid_flag = 0
                            self.missing_sensor_assert_retry[sensor_index] = 0
                        break

            if sensor_valid_flag == 1:
                if sname in sensors[board]:
                    self.missing_sensor_assert_retry[sensor_index] = 0
                    if self.missing_sensor_assert_flag[sensor_index]:
                        Logger.crit("DEASSERT: Zone%d Missing sensors: %s" %
                                    (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = False

                    sensor = sensors[board][sname]
                    ctx[v] = sensor.value
                    if re.match(r".*temp_dev", sname) != None:
                        valid_m2_count = valid_m2_count + 1
                    if sensor.status in ["ucr"]:
                        Logger.warn("Sensor %s reporting status %s" %
                                    (sensor.name, sensor.status))
                        outmin = max(outmin, self.transitional)
                        if outmin == self.transitional:
                            mode = fan_mode["trans_mode"]
                    else:
                        if self.sensor_fail == True:
                            sensor_fail_record_path = SENSOR_FAIL_RECORD_DIR + v
                            if not os.path.isdir(SENSOR_FAIL_RECORD_DIR):
                                os.mkdir(SENSOR_FAIL_RECORD_DIR)
                            if (sensor.status in [
                                    "na"
                            ]) and (self.sensor_valid_cur[sensor_index] != -1):
                                if (re.match(r"SSD", sensor.name) !=
                                        None) or (re.match(
                                            r".*temp_dev", sname) != None):
                                    fail_ssd_count = fail_ssd_count + 1
                                    Logger.warn("M.2 Device %s Fail" % v)
                                else:
                                    Logger.warn("%s Fail" % v)
                                    outmin = max(outmin, self.boost)
                                    cause_boost_count += 1
                                if not os.path.isfile(sensor_fail_record_path):
                                    sensor_fail_record = open(
                                        sensor_fail_record_path, "w")
                                    sensor_fail_record.close()
                                if outmin == self.boost:
                                    mode = fan_mode["boost_mode"]
                            else:
                                if os.path.isfile(sensor_fail_record_path):
                                    os.remove(sensor_fail_record_path)
                else:
                    if (not self.missing_sensor_assert_flag[sensor_index]
                        ) and (self.missing_sensor_assert_retry[sensor_index]
                               >= 2):
                        Logger.crit("ASSERT: Zone%d Missing sensors: %s" %
                                    (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = True
                    if self.missing_sensor_assert_retry[sensor_index] < 2:
                        self.missing_sensor_assert_retry[sensor_index] += 1
                    # evaluation tries to ignore the effects of None values
                    # (e.g. acts as 0 in max/+)
                    ctx[v] = None
            else:
                if sname in sensors[board]:
                    if self.sensor_fail == True:
                        sensor_fail_record_path = SENSOR_FAIL_RECORD_DIR + v
                        if os.path.isfile(sensor_fail_record_path):
                            os.remove(sensor_fail_record_path)

            self.sensor_valid_pre[sensor_index] = self.sensor_valid_cur[
                sensor_index]
            sensor_index += 1

        if verbose:
            (exprout, dxstr) = self.expr.dbgeval(ctx)
            Logger.info(dxstr + " = " + str(exprout))
        else:
            exprout = self.expr.eval_driver(ctx)
            Logger.info(self.expr_str + " = " + str(exprout))
        # If *all* sensors in the top level max() report None, the
        # expression will report None
        if (not exprout) and (outmin == 0):
            if not self.transitional_assert_flag:
                Logger.crit(
                    "ASSERT: Zone%d No sane fan speed could be calculated! Using transitional speed."
                    % (self.counter))
            exprout = self.transitional
            mode = fan_mode["trans_mode"]
            no_sane_flag = 1
            self.transitional_assert_flag = True
        else:
            if self.transitional_assert_flag:
                Logger.crit(
                    "DEASSERT: Zone%d No sane fan speed could be calculated! Using transitional speed."
                    % (self.counter))
            self.transitional_assert_flag = False

        if self.fail_sensor_type != None:
            progressive_mode = True
            if ("M2_sensor_fail" in list(self.fail_sensor_type.keys())) and (
                    "M2_sensor_count" in list(self.fail_sensor_type.keys())):
                if (self.fail_sensor_type["M2_sensor_fail"] == True) and (
                        self.fail_sensor_type["M2_sensor_count"] > 0):
                    if valid_m2_count == 0:
                        if fsc_board.all_slots_power_off() == False:
                            # Missing all module (no M.2 device)
                            outmin = max(outmin, self.boost)
                            cause_boost_count += 1
                            mode = fan_mode["boost_mode"]
                            progressive_mode = False
                        else:
                            # All slots power off, do not boost up
                            progressive_mode = False
                    elif valid_m2_count != self.fail_sensor_type[
                            "M2_sensor_count"]:
                        # Missing some module (M.2 devices partially populated)
                        progressive_mode = False
                        cause_boost_count += 1
                    else:
                        # M.2 devices fully populated
                        if cause_boost_count != 0:
                            # other boost reasons: e.g. other sensors (not M.2 devices' sensors) fail to read sensors
                            progressive_mode = False
                        else:
                            if fail_ssd_count != 0:
                                # M.2 devices progressive_mode
                                # handle M.2 devices/SSD fail to read case
                                cause_boost_count += 1  # show out sensor fail record
                                display_progressive_flag = (
                                    1)  # do not override by normal mode
                                mode = fan_mode["progressive_mode"]
                            else:
                                # M.2 devices noraml mode
                                progressive_mode = False

            if progressive_mode and ("SSD_sensor_fail" in list(
                    self.fail_sensor_type.keys())):
                if self.fail_sensor_type["SSD_sensor_fail"] == True:
                    if fail_ssd_count != 0:
                        if self.ssd_progressive_algorithm != None:
                            if "offset_algorithm" in list(
                                    self.ssd_progressive_algorithm.keys()):
                                list_index = 0
                                for i in self.ssd_progressive_algorithm[
                                        "offset_algorithm"]:
                                    list_index = list_index + 1
                                    if fail_ssd_count <= i[0]:
                                        exprout = exprout + i[1]
                                        no_sane_flag = 0
                                        break
                                    else:
                                        if list_index == len(
                                                self.ssd_progressive_algorithm[
                                                    "offset_algorithm"]):
                                            outmin = max(outmin, self.boost)
                                            cause_boost_count += 1
                                            if outmin == self.boost:
                                                mode = fan_mode["boost_mode"]

        boost_record_path = RECORD_DIR + "sensor_fail_boost"
        if cause_boost_count != 0:
            if not os.path.isfile(boost_record_path):
                sensor_fail_boost_record = open(boost_record_path, "w")
                sensor_fail_boost_record.close()
        else:
            if os.path.isfile(boost_record_path):
                os.remove(boost_record_path)

        if not exprout:
            exprout = 0
        if exprout < outmin:
            exprout = outmin
        else:
            if (no_sane_flag != 1) and (display_progressive_flag != 1):
                mode = fan_mode["normal_mode"]
        if not ignore_mode:
            self.get_set_fan_mode(mode, action="write")
        exprout = clamp(exprout, 0, 100)
        return exprout
예제 #30
0
    def run(self, sensors, dt):
        ctx = {'dt': dt}
        outmin = 0
        fail_ssd_count = 0
        sensor_index = 0

        for v in self.expr_meta['ext_vars']:
            sensor_valid_flag = 1
            board, sname = v.split(":")
            if self.sensor_valid_check != None:
                for check_name in self.sensor_valid_check:
                    if re.match(check_name, sname, re.IGNORECASE) != None:
                        self.sensor_valid_cur[sensor_index] = fsc_board.sensor_valid_check(board, sname, check_name, self.sensor_valid_check[check_name]["attribute"])
                        #If current or previous sensor valid status is 0, ignore this sensor reading.
                        #Only when both are 1, goes to sensor check process
                        if (self.sensor_valid_cur[sensor_index] == 0) or (self.sensor_valid_pre[sensor_index] == 0):
                            sensor_valid_flag = 0
                            self.missing_sensor_assert_retry[sensor_index] = 0
                        break

            if sensor_valid_flag == 1:
                if sname in sensors[board]:
                    self.missing_sensor_assert_retry[sensor_index] = 0
                    if self.missing_sensor_assert_flag[sensor_index]:
                        Logger.crit('DEASSERT: Zone%d Missing sensors: %s' % (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = False

                    sensor = sensors[board][sname]
                    ctx[v] = sensor.value
                    if sensor.status in ['ucr']:
                        Logger.warn('Sensor %s reporting status %s' % (sensor.name, sensor.status))
                        outmin = max(outmin, self.transitional)
                    else:
                        if self.sensor_fail == True:
                            if (sensor.status in ['na']) and (self.sensor_valid_cur[sensor_index] != -1):
                                if re.match(r'.+_C[2-4]_[0-3]_NVME_.+', sensor.name) != None:
                                    Logger.warn("%s Fail" % v)
                                    outmin = max(outmin, self.boost)
                                elif re.match(r'SSD', sensor.name) != None or re.match(r'(.*)nvme(.*)', sname) != None:
                                    fail_ssd_count = fail_ssd_count + 1
                                else:
                                    Logger.warn("%s Fail" % v)
                                    outmin = max(outmin, self.boost)
                else:
                    if (not self.missing_sensor_assert_flag[sensor_index]) and (self.missing_sensor_assert_retry[sensor_index] >= 2):
                        Logger.crit('ASSERT: Zone%d Missing sensors: %s' % (self.counter, v))
                        self.missing_sensor_assert_flag[sensor_index] = True
                    if (self.missing_sensor_assert_retry[sensor_index] < 2):
                        self.missing_sensor_assert_retry[sensor_index] += 1
                    # evaluation tries to ignore the effects of None values
                    # (e.g. acts as 0 in max/+)
                    ctx[v] = None
            self.sensor_valid_pre[sensor_index] = self.sensor_valid_cur[sensor_index]
            sensor_index += 1

        if verbose:
            (exprout, dxstr) = self.expr.dbgeval(ctx)
            Logger.info(dxstr + " = " + str(exprout))
        else:
            exprout = self.expr.eval(ctx)
            Logger.info(self.expr_str + " = " + str(exprout))
        # If *all* sensors in the top level max() report None, the
        # expression will report None
        if (not exprout) and (outmin == 0):
            if not self.transitional_assert_flag:
                Logger.crit('ASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed.' % (self.counter))
            exprout = self.transitional
            self.transitional_assert_flag = True
        else:
            if self.transitional_assert_flag:
                Logger.crit('DEASSERT: Zone%d No sane fan speed could be \
                    calculated! Using transitional speed.' % (self.counter))
            self.transitional_assert_flag = False

        if self.fail_sensor_type != None:
            if 'SSD_sensor_fail' in list(self.fail_sensor_type.keys()):
                if self.fail_sensor_type['SSD_sensor_fail'] == True:
                    if fail_ssd_count != 0:
                        if self.ssd_progressive_algorithm != None:
                            if 'offset_algorithm' in list(self.ssd_progressive_algorithm.keys()):
                                list_index = 0
                                for i in self.ssd_progressive_algorithm['offset_algorithm']:
                                    list_index = list_index + 1
                                    if fail_ssd_count <= i[0]:
                                        exprout = exprout + i[1]
                                        break
                                    else:
                                        if list_index == len(self.ssd_progressive_algorithm['offset_algorithm']):
                                           outmin = max(outmin, self.boost)
        if not exprout:
            exprout = 0
        if exprout < outmin:
            exprout = outmin
        exprout = clamp(exprout, 0, 100)
        return exprout