def calculate(self, peripheralState, ignition): try: now = sec_since_boot() # If peripheralState is None, we're probably not in a car, so we don't care if peripheralState is None or peripheralState.pandaType == log.PandaState.PandaType.unknown: with self.integration_lock: self.last_measurement_time = None self.next_pulsed_measurement_time = None self.power_used_uWh = 0 return # Low-pass battery voltage self.car_voltage_instant_mV = peripheralState.voltage self.car_voltage_mV = ( (peripheralState.voltage * CAR_VOLTAGE_LOW_PASS_K) + (self.car_voltage_mV * (1 - CAR_VOLTAGE_LOW_PASS_K))) statlog.gauge("car_voltage", self.car_voltage_mV / 1e3) # Cap the car battery power and save it in a param every 10-ish seconds self.car_battery_capacity_uWh = max(self.car_battery_capacity_uWh, 0) self.car_battery_capacity_uWh = min(self.car_battery_capacity_uWh, CAR_BATTERY_CAPACITY_uWh) if now - self.last_save_time >= 10: put_nonblocking("CarBatteryCapacity", str(int(self.car_battery_capacity_uWh))) self.last_save_time = now # First measurement, set integration time with self.integration_lock: if self.last_measurement_time is None: self.last_measurement_time = now return if ignition: # If there is ignition, we integrate the charging rate of the car with self.integration_lock: self.power_used_uWh = 0 integration_time_h = (now - self.last_measurement_time) / 3600 if integration_time_h < 0: raise ValueError( f"Negative integration time: {integration_time_h}h" ) self.car_battery_capacity_uWh += (CAR_CHARGING_RATE_W * 1e6 * integration_time_h) self.last_measurement_time = now else: # Get current power draw somehow current_power = HARDWARE.get_current_power_draw() # Do the integration self._perform_integration(now, current_power) except Exception: cloudlog.exception("Power monitoring calculation failed")
def thermald_thread(end_event, hw_queue): pm = messaging.PubMaster(['deviceState']) sm = messaging.SubMaster([ "peripheralState", "gpsLocationExternal", "controlsState", "pandaStates" ], poll=["pandaStates"]) count = 0 onroad_conditions: Dict[str, bool] = { "ignition": False, } startup_conditions: Dict[str, bool] = {} startup_conditions_prev: Dict[str, bool] = {} off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green last_hw_state = HardwareState( network_type=NetworkType.none, network_metered=False, network_strength=NetworkStrength.unknown, network_info=None, nvme_temps=[], modem_temps=[], wifi_address='N/A', ) current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML) should_start_prev = False in_car = False engaged_prev = False params = Params() power_monitor = PowerMonitoring() HARDWARE.initialize_hardware() thermal_config = HARDWARE.get_thermal_config() fan_controller = None restart_triggered_ts = 0. panda_state_ts = 0. while not end_event.is_set(): sm.update(PANDA_STATES_TIMEOUT) pandaStates = sm['pandaStates'] peripheralState = sm['peripheralState'] msg = read_thermal(thermal_config) # neokii if sec_since_boot() - restart_triggered_ts < 5.: onroad_conditions["not_restart_triggered"] = False else: onroad_conditions["not_restart_triggered"] = True if params.get_bool("SoftRestartTriggered"): params.put_bool("SoftRestartTriggered", False) restart_triggered_ts = sec_since_boot() if sm.updated['pandaStates'] and len(pandaStates) > 0: # Set ignition based on any panda connected onroad_conditions["ignition"] = any( ps.ignitionLine or ps.ignitionCan for ps in pandaStates if ps.pandaType != log.PandaState.PandaType.unknown) pandaState = pandaStates[0] if pandaState.pandaType != log.PandaState.PandaType.unknown: panda_state_ts = sec_since_boot() in_car = pandaState.harnessStatus != log.PandaState.HarnessStatus.notConnected # Setup fan handler on first connect to panda if fan_controller is None and peripheralState.pandaType != log.PandaState.PandaType.unknown: if TICI: fan_controller = TiciFanController() try: last_hw_state = hw_queue.get_nowait() except queue.Empty: pass msg.deviceState.freeSpacePercent = get_available_percent(default=100.0) msg.deviceState.memoryUsagePercent = int( round(psutil.virtual_memory().percent)) msg.deviceState.cpuUsagePercent = [ int(round(n)) for n in psutil.cpu_percent(percpu=True) ] msg.deviceState.gpuUsagePercent = int( round(HARDWARE.get_gpu_usage_percent())) msg.deviceState.networkType = last_hw_state.network_type msg.deviceState.networkMetered = last_hw_state.network_metered msg.deviceState.networkStrength = last_hw_state.network_strength if last_hw_state.network_info is not None: msg.deviceState.networkInfo = last_hw_state.network_info msg.deviceState.nvmeTempC = last_hw_state.nvme_temps msg.deviceState.modemTempC = last_hw_state.modem_temps msg.deviceState.wifiIpAddress = last_hw_state.wifi_address msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness( ) msg.deviceState.batteryPercent = HARDWARE.get_battery_capacity() msg.deviceState.batteryCurrent = HARDWARE.get_battery_current() msg.deviceState.usbOnline = HARDWARE.get_usb_present() current_filter.update(msg.deviceState.batteryCurrent / 1e6) max_comp_temp = temp_filter.update( max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC))) if fan_controller is not None: msg.deviceState.fanSpeedPercentDesired = fan_controller.update( max_comp_temp, onroad_conditions["ignition"]) is_offroad_for_5_min = (started_ts is None) and ( (not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP: # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 thermal_status = ThermalStatus.danger else: current_band = THERMAL_BANDS[thermal_status] band_idx = list(THERMAL_BANDS.keys()).index(thermal_status) if current_band.min_temp is not None and max_comp_temp < current_band.min_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1] elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1] # **** starting logic **** # Ensure date/time are valid now = datetime.datetime.utcnow() startup_conditions[ "time_valid"] = True #(now.year > 2020) or (now.year == 2020 and now.month >= 10) set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) startup_conditions[ "up_to_date"] = True #params.get("Offroad_ConnectivityNeeded") is None or params.get_bool("DisableUpdates") or params.get_bool("SnoozeUpdate") startup_conditions["not_uninstalling"] = not params.get_bool( "DoUninstall") startup_conditions["accepted_terms"] = params.get( "HasAcceptedTerms") == terms_version # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2 startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \ params.get_bool("Passive") startup_conditions["not_driver_view"] = not params.get_bool( "IsDriverViewEnabled") startup_conditions["not_taking_snapshot"] = not params.get_bool( "IsTakingSnapshot") # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 onroad_conditions[ "device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed( "Offroad_TemperatureTooHigh", (not onroad_conditions["device_temp_good"])) # TODO: this should move to TICI.initialize_hardware, but we currently can't import params there if TICI: if not os.path.isfile("/persist/comma/living-in-the-moment"): if not Path("/data/media").is_mount(): set_offroad_alert_if_changed("Offroad_StorageMissing", True) else: # check for bad NVMe try: with open("/sys/block/nvme0n1/device/model") as f: model = f.read().strip() if not model.startswith( "Samsung SSD 980") and params.get( "Offroad_BadNvme") is None: set_offroad_alert_if_changed( "Offroad_BadNvme", True) cloudlog.event("Unsupported NVMe", model=model, error=True) except Exception: pass # Handle offroad/onroad transition should_start = all(onroad_conditions.values()) if started_ts is None: should_start = should_start and all(startup_conditions.values()) if should_start != should_start_prev or (count == 0): params.put_bool("IsOnroad", should_start) params.put_bool("IsOffroad", not should_start) params.put_bool("IsEngaged", False) engaged_prev = False HARDWARE.set_power_save(not should_start) if sm.updated['controlsState']: engaged = sm['controlsState'].enabled if engaged != engaged_prev: params.put_bool("IsEngaged", engaged) engaged_prev = engaged try: with open('/dev/kmsg', 'w') as kmsg: kmsg.write(f"<3>[thermald] engaged: {engaged}\n") except Exception: pass if should_start: off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True else: if onroad_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions, onroad_conditions=onroad_conditions) started_ts = None if off_ts is None: off_ts = sec_since_boot() # Offroad power monitoring power_monitor.calculate(peripheralState, onroad_conditions["ignition"]) msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used() msg.deviceState.carBatteryCapacityUwh = max( 0, power_monitor.get_car_battery_capacity()) current_power_draw = HARDWARE.get_current_power_draw() # pylint: disable=assignment-from-none if current_power_draw is not None: statlog.sample("power_draw", current_power_draw) msg.deviceState.powerDrawW = current_power_draw else: msg.deviceState.powerDrawW = 0 # Check if we need to disable charging (handled by boardd) msg.deviceState.chargingDisabled = power_monitor.should_disable_charging( onroad_conditions["ignition"], in_car, off_ts) # Check if we need to shut down if power_monitor.should_shutdown(peripheralState, onroad_conditions["ignition"], in_car, off_ts, started_seen): cloudlog.warning(f"shutting device down, offroad since {off_ts}") params.put_bool("DoShutdown", True) msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90 # if current is positive, then battery is being discharged msg.deviceState.started = started_ts is not None msg.deviceState.startedMonoTime = int(1e9 * (started_ts or 0)) last_ping = params.get("LastAthenaPingTime") if last_ping is not None: msg.deviceState.lastAthenaPingTime = int(last_ping) msg.deviceState.thermalStatus = thermal_status pm.send("deviceState", msg) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # Log to statsd statlog.gauge("free_space_percent", msg.deviceState.freeSpacePercent) statlog.gauge("gpu_usage_percent", msg.deviceState.gpuUsagePercent) statlog.gauge("memory_usage_percent", msg.deviceState.memoryUsagePercent) for i, usage in enumerate(msg.deviceState.cpuUsagePercent): statlog.gauge(f"cpu{i}_usage_percent", usage) for i, temp in enumerate(msg.deviceState.cpuTempC): statlog.gauge(f"cpu{i}_temperature", temp) for i, temp in enumerate(msg.deviceState.gpuTempC): statlog.gauge(f"gpu{i}_temperature", temp) statlog.gauge("memory_temperature", msg.deviceState.memoryTempC) statlog.gauge("ambient_temperature", msg.deviceState.ambientTempC) for i, temp in enumerate(msg.deviceState.pmicTempC): statlog.gauge(f"pmic{i}_temperature", temp) for i, temp in enumerate(last_hw_state.nvme_temps): statlog.gauge(f"nvme_temperature{i}", temp) for i, temp in enumerate(last_hw_state.modem_temps): statlog.gauge(f"modem_temperature{i}", temp) statlog.gauge("fan_speed_percent_desired", msg.deviceState.fanSpeedPercentDesired) statlog.gauge("screen_brightness_percent", msg.deviceState.screenBrightnessPercent) # report to server once every 10 minutes if (count % int(600. / DT_TRML)) == 0: cloudlog.event( "STATUS_PACKET", count=count, pandaStates=[ strip_deprecated_keys(p.to_dict()) for p in pandaStates ], peripheralState=strip_deprecated_keys( peripheralState.to_dict()), location=(strip_deprecated_keys( sm["gpsLocationExternal"].to_dict()) if sm.alive["gpsLocationExternal"] else None), deviceState=strip_deprecated_keys(msg.to_dict())) count += 1
def calculate(self, peripheralState, ignition): try: now = sec_since_boot() # If peripheralState is None, we're probably not in a car, so we don't care if peripheralState is None or peripheralState.pandaType == log.PandaState.PandaType.unknown: with self.integration_lock: self.last_measurement_time = None self.next_pulsed_measurement_time = None self.power_used_uWh = 0 return # Low-pass battery voltage self.car_voltage_instant_mV = peripheralState.voltage self.car_voltage_mV = ( (peripheralState.voltage * CAR_VOLTAGE_LOW_PASS_K) + (self.car_voltage_mV * (1 - CAR_VOLTAGE_LOW_PASS_K))) statlog.gauge("car_voltage", self.car_voltage_mV / 1e3) # Cap the car battery power and save it in a param every 10-ish seconds self.car_battery_capacity_uWh = max(self.car_battery_capacity_uWh, 0) self.car_battery_capacity_uWh = min(self.car_battery_capacity_uWh, CAR_BATTERY_CAPACITY_uWh) if now - self.last_save_time >= 10: put_nonblocking("CarBatteryCapacity", str(int(self.car_battery_capacity_uWh))) self.last_save_time = now # First measurement, set integration time with self.integration_lock: if self.last_measurement_time is None: self.last_measurement_time = now return if ignition: # If there is ignition, we integrate the charging rate of the car with self.integration_lock: self.power_used_uWh = 0 integration_time_h = (now - self.last_measurement_time) / 3600 if integration_time_h < 0: raise ValueError( f"Negative integration time: {integration_time_h}h" ) self.car_battery_capacity_uWh += (CAR_CHARGING_RATE_W * 1e6 * integration_time_h) self.last_measurement_time = now else: # No ignition, we integrate the offroad power used by the device is_uno = peripheralState.pandaType == log.PandaState.PandaType.uno # Get current power draw somehow current_power = HARDWARE.get_current_power_draw() # pylint: disable=assignment-from-none if current_power is not None: pass elif HARDWARE.get_battery_status() == 'Discharging': # If the battery is discharging, we can use this measurement # On C2: this is low by about 10-15%, probably mostly due to UNO draw not being factored in current_power = ( (HARDWARE.get_battery_voltage() / 1000000) * (HARDWARE.get_battery_current() / 1000000)) elif (self.next_pulsed_measurement_time is not None) and ( self.next_pulsed_measurement_time <= now): # TODO: Figure out why this is off by a factor of 3/4??? FUDGE_FACTOR = 1.33 # Turn off charging for about 10 sec in a thread that does not get killed on SIGINT, and perform measurement here to avoid blocking thermal def perform_pulse_measurement(now): try: HARDWARE.set_battery_charging(False) time.sleep(5) # Measure for a few sec to get a good average voltages = [] currents = [] for _ in range(6): voltages.append(HARDWARE.get_battery_voltage()) currents.append(HARDWARE.get_battery_current()) time.sleep(1) current_power = ((mean(voltages) / 1000000) * (mean(currents) / 1000000)) self._perform_integration( now, current_power * FUDGE_FACTOR) # Enable charging again HARDWARE.set_battery_charging(True) except Exception: cloudlog.exception( "Pulsed power measurement failed") # Start pulsed measurement and return threading.Thread(target=perform_pulse_measurement, args=(now, )).start() self.next_pulsed_measurement_time = None return elif self.next_pulsed_measurement_time is None and not is_uno: # On a charging EON with black panda, or drawing more than 400mA out of a white/grey one # Only way to get the power draw is to turn off charging for a few sec and check what the discharging rate is # We shouldn't do this very often, so make sure it has been some long-ish random time interval self.next_pulsed_measurement_time = now + random.randint( 120, 180) return else: # Do nothing return # Do the integration self._perform_integration(now, current_power) except Exception: cloudlog.exception("Power monitoring calculation failed")