def thermald_thread() -> NoReturn: pm = messaging.PubMaster(['deviceState']) pandaState_timeout = int(1000 * 2.5 * DT_TRML) # 2.5x the expected pandaState frequency pandaState_sock = messaging.sub_sock('pandaStates', timeout=pandaState_timeout) sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "managerState"]) fan_speed = 0 count = 0 onroad_conditions: Dict[str, bool] = { "ignition": False, } startup_conditions: Dict[str, bool] = {} startup_conditions_prev: Dict[str, bool] = {} off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green usb_power = True network_type = NetworkType.none network_strength = NetworkStrength.unknown network_info = None modem_version = None registered_count = 0 nvme_temps = None modem_temps = None current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML) pandaState_prev = None should_start_prev = False in_car = False handle_fan = None is_uno = False ui_running_prev = False params = Params() power_monitor = PowerMonitoring() no_panda_cnt = 0 HARDWARE.initialize_hardware() thermal_config = HARDWARE.get_thermal_config() # TODO: use PI controller for UNO controller = PIController(k_p=0, k_i=2e-3, neg_limit=-80, pos_limit=0, rate=(1 / DT_TRML)) # Leave flag for loggerd to indicate device was left onroad if params.get_bool("IsOnroad"): params.put_bool("BootedOnroad", True) while True: pandaStates = messaging.recv_sock(pandaState_sock, wait=True) sm.update(0) peripheralState = sm['peripheralState'] msg = read_thermal(thermal_config) if pandaStates is not None and len(pandaStates.pandaStates) > 0: pandaState = pandaStates.pandaStates[0] # If we lose connection to the panda, wait 5 seconds before going offroad if pandaState.pandaType == log.PandaState.PandaType.unknown: no_panda_cnt += 1 if no_panda_cnt > DISCONNECT_TIMEOUT / DT_TRML: if onroad_conditions["ignition"]: cloudlog.error("Lost panda connection while onroad") onroad_conditions["ignition"] = False else: no_panda_cnt = 0 onroad_conditions["ignition"] = pandaState.ignitionLine or pandaState.ignitionCan in_car = pandaState.harnessStatus != log.PandaState.HarnessStatus.notConnected usb_power = peripheralState.usbPowerMode != log.PeripheralState.UsbPowerMode.client # Setup fan handler on first connect to panda if handle_fan is None and peripheralState.pandaType != log.PandaState.PandaType.unknown: is_uno = peripheralState.pandaType == log.PandaState.PandaType.uno if TICI: cloudlog.info("Setting up TICI fan handler") handle_fan = handle_fan_tici elif is_uno or PC: cloudlog.info("Setting up UNO fan handler") handle_fan = handle_fan_uno else: cloudlog.info("Setting up EON fan handler") setup_eon_fan() handle_fan = handle_fan_eon # Handle disconnect if pandaState_prev is not None: if pandaState.pandaType == log.PandaState.PandaType.unknown and \ pandaState_prev.pandaType != log.PandaState.PandaType.unknown: params.clear_all(ParamKeyType.CLEAR_ON_PANDA_DISCONNECT) pandaState_prev = pandaState # these are expensive calls. update every 10s if (count % int(10. / DT_TRML)) == 0: try: network_type = HARDWARE.get_network_type() network_strength = HARDWARE.get_network_strength(network_type) network_info = HARDWARE.get_network_info() # pylint: disable=assignment-from-none nvme_temps = HARDWARE.get_nvme_temperatures() modem_temps = HARDWARE.get_modem_temperatures() # Log modem version once if modem_version is None: modem_version = HARDWARE.get_modem_version() # pylint: disable=assignment-from-none if modem_version is not None: cloudlog.warning(f"Modem version: {modem_version}") if TICI and (network_info.get('state', None) == "REGISTERED"): registered_count += 1 else: registered_count = 0 if registered_count > 10: cloudlog.warning(f"Modem stuck in registered state {network_info}. nmcli conn up lte") os.system("nmcli conn up lte") registered_count = 0 except Exception: cloudlog.exception("Error getting network status") msg.deviceState.freeSpacePercent = get_available_percent(default=100.0) msg.deviceState.memoryUsagePercent = int(round(psutil.virtual_memory().percent)) msg.deviceState.cpuUsagePercent = [int(round(n)) for n in psutil.cpu_percent(percpu=True)] msg.deviceState.gpuUsagePercent = int(round(HARDWARE.get_gpu_usage_percent())) msg.deviceState.networkType = network_type msg.deviceState.networkStrength = network_strength if network_info is not None: msg.deviceState.networkInfo = network_info if nvme_temps is not None: msg.deviceState.nvmeTempC = nvme_temps if modem_temps is not None: msg.deviceState.modemTempC = modem_temps msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness() msg.deviceState.batteryPercent = HARDWARE.get_battery_capacity() msg.deviceState.batteryCurrent = HARDWARE.get_battery_current() msg.deviceState.usbOnline = HARDWARE.get_usb_present() current_filter.update(msg.deviceState.batteryCurrent / 1e6) max_comp_temp = temp_filter.update( max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC)) ) if handle_fan is not None: fan_speed = handle_fan(controller, max_comp_temp, fan_speed, onroad_conditions["ignition"]) msg.deviceState.fanSpeedPercentDesired = fan_speed is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP: # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 thermal_status = ThermalStatus.danger else: current_band = THERMAL_BANDS[thermal_status] band_idx = list(THERMAL_BANDS.keys()).index(thermal_status) if current_band.min_temp is not None and max_comp_temp < current_band.min_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1] elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1] # **** starting logic **** # Ensure date/time are valid now = datetime.datetime.utcnow() startup_conditions["time_valid"] = (now.year > 2020) or (now.year == 2020 and now.month >= 10) set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) startup_conditions["up_to_date"] = params.get("Offroad_ConnectivityNeeded") is None or params.get_bool("DisableUpdates") or params.get_bool("SnoozeUpdate") startup_conditions["not_uninstalling"] = not params.get_bool("DoUninstall") startup_conditions["accepted_terms"] = params.get("HasAcceptedTerms") == terms_version # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2 startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \ params.get_bool("Passive") startup_conditions["not_driver_view"] = not params.get_bool("IsDriverViewEnabled") startup_conditions["not_taking_snapshot"] = not params.get_bool("IsTakingSnapshot") # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 onroad_conditions["device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed("Offroad_TemperatureTooHigh", (not onroad_conditions["device_temp_good"])) if TICI: set_offroad_alert_if_changed("Offroad_StorageMissing", (not Path("/data/media").is_mount())) # Handle offroad/onroad transition should_start = all(onroad_conditions.values()) if started_ts is None: should_start = should_start and all(startup_conditions.values()) if should_start != should_start_prev or (count == 0): params.put_bool("IsOnroad", should_start) params.put_bool("IsOffroad", not should_start) HARDWARE.set_power_save(not should_start) if should_start: off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True else: if onroad_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions, onroad_conditions=onroad_conditions) started_ts = None if off_ts is None: off_ts = sec_since_boot() # Offroad power monitoring power_monitor.calculate(peripheralState, onroad_conditions["ignition"]) msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used() msg.deviceState.carBatteryCapacityUwh = max(0, power_monitor.get_car_battery_capacity()) current_power_draw = HARDWARE.get_current_power_draw() # pylint: disable=assignment-from-none msg.deviceState.powerDrawW = current_power_draw if current_power_draw is not None else 0 # Check if we need to disable charging (handled by boardd) msg.deviceState.chargingDisabled = power_monitor.should_disable_charging(onroad_conditions["ignition"], in_car, off_ts) # Check if we need to shut down if power_monitor.should_shutdown(peripheralState, onroad_conditions["ignition"], in_car, off_ts, started_seen): cloudlog.info(f"shutting device down, offroad since {off_ts}") # TODO: add function for blocking cloudlog instead of sleep time.sleep(10) HARDWARE.shutdown() # If UI has crashed, set the brightness to reasonable non-zero value ui_running = "ui" in (p.name for p in sm["managerState"].processes if p.running) if ui_running_prev and not ui_running: HARDWARE.set_screen_brightness(20) ui_running_prev = ui_running msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90 # if current is positive, then battery is being discharged msg.deviceState.started = started_ts is not None msg.deviceState.startedMonoTime = int(1e9*(started_ts or 0)) last_ping = params.get("LastAthenaPingTime") if last_ping is not None: msg.deviceState.lastAthenaPingTime = int(last_ping) msg.deviceState.thermalStatus = thermal_status pm.send("deviceState", msg) if EON and not is_uno: set_offroad_alert_if_changed("Offroad_ChargeDisabled", (not usb_power)) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # report to server once every 10 minutes if (count % int(600. / DT_TRML)) == 0: if EON and started_ts is None and msg.deviceState.memoryUsagePercent > 40: cloudlog.event("High offroad memory usage", mem=msg.deviceState.memoryUsagePercent) cloudlog.event("STATUS_PACKET", count=count, pandaStates=(strip_deprecated_keys(pandaStates.to_dict()) if pandaStates else None), peripheralState=strip_deprecated_keys(peripheralState.to_dict()), location=(strip_deprecated_keys(sm["gpsLocationExternal"].to_dict()) if sm.alive["gpsLocationExternal"] else None), deviceState=strip_deprecated_keys(msg.to_dict())) count += 1
def thermald_thread(end_event, hw_queue): pm = messaging.PubMaster(['deviceState']) sm = messaging.SubMaster([ "peripheralState", "gpsLocationExternal", "controlsState", "pandaStates" ], poll=["pandaStates"]) count = 0 onroad_conditions: Dict[str, bool] = { "ignition": False, } startup_conditions: Dict[str, bool] = {} startup_conditions_prev: Dict[str, bool] = {} off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green last_hw_state = HardwareState( network_type=NetworkType.none, network_metered=False, network_strength=NetworkStrength.unknown, network_info=None, nvme_temps=[], modem_temps=[], wifi_address='N/A', ) current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML) should_start_prev = False in_car = False engaged_prev = False params = Params() power_monitor = PowerMonitoring() HARDWARE.initialize_hardware() thermal_config = HARDWARE.get_thermal_config() fan_controller = None restart_triggered_ts = 0. panda_state_ts = 0. while not end_event.is_set(): sm.update(PANDA_STATES_TIMEOUT) pandaStates = sm['pandaStates'] peripheralState = sm['peripheralState'] msg = read_thermal(thermal_config) # neokii if sec_since_boot() - restart_triggered_ts < 5.: onroad_conditions["not_restart_triggered"] = False else: onroad_conditions["not_restart_triggered"] = True if params.get_bool("SoftRestartTriggered"): params.put_bool("SoftRestartTriggered", False) restart_triggered_ts = sec_since_boot() if sm.updated['pandaStates'] and len(pandaStates) > 0: # Set ignition based on any panda connected onroad_conditions["ignition"] = any( ps.ignitionLine or ps.ignitionCan for ps in pandaStates if ps.pandaType != log.PandaState.PandaType.unknown) pandaState = pandaStates[0] if pandaState.pandaType != log.PandaState.PandaType.unknown: panda_state_ts = sec_since_boot() in_car = pandaState.harnessStatus != log.PandaState.HarnessStatus.notConnected # Setup fan handler on first connect to panda if fan_controller is None and peripheralState.pandaType != log.PandaState.PandaType.unknown: if TICI: fan_controller = TiciFanController() try: last_hw_state = hw_queue.get_nowait() except queue.Empty: pass msg.deviceState.freeSpacePercent = get_available_percent(default=100.0) msg.deviceState.memoryUsagePercent = int( round(psutil.virtual_memory().percent)) msg.deviceState.cpuUsagePercent = [ int(round(n)) for n in psutil.cpu_percent(percpu=True) ] msg.deviceState.gpuUsagePercent = int( round(HARDWARE.get_gpu_usage_percent())) msg.deviceState.networkType = last_hw_state.network_type msg.deviceState.networkMetered = last_hw_state.network_metered msg.deviceState.networkStrength = last_hw_state.network_strength if last_hw_state.network_info is not None: msg.deviceState.networkInfo = last_hw_state.network_info msg.deviceState.nvmeTempC = last_hw_state.nvme_temps msg.deviceState.modemTempC = last_hw_state.modem_temps msg.deviceState.wifiIpAddress = last_hw_state.wifi_address msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness( ) msg.deviceState.batteryPercent = HARDWARE.get_battery_capacity() msg.deviceState.batteryCurrent = HARDWARE.get_battery_current() msg.deviceState.usbOnline = HARDWARE.get_usb_present() current_filter.update(msg.deviceState.batteryCurrent / 1e6) max_comp_temp = temp_filter.update( max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC))) if fan_controller is not None: msg.deviceState.fanSpeedPercentDesired = fan_controller.update( max_comp_temp, onroad_conditions["ignition"]) is_offroad_for_5_min = (started_ts is None) and ( (not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP: # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 thermal_status = ThermalStatus.danger else: current_band = THERMAL_BANDS[thermal_status] band_idx = list(THERMAL_BANDS.keys()).index(thermal_status) if current_band.min_temp is not None and max_comp_temp < current_band.min_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1] elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1] # **** starting logic **** # Ensure date/time are valid now = datetime.datetime.utcnow() startup_conditions[ "time_valid"] = True #(now.year > 2020) or (now.year == 2020 and now.month >= 10) set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) startup_conditions[ "up_to_date"] = True #params.get("Offroad_ConnectivityNeeded") is None or params.get_bool("DisableUpdates") or params.get_bool("SnoozeUpdate") startup_conditions["not_uninstalling"] = not params.get_bool( "DoUninstall") startup_conditions["accepted_terms"] = params.get( "HasAcceptedTerms") == terms_version # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2 startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \ params.get_bool("Passive") startup_conditions["not_driver_view"] = not params.get_bool( "IsDriverViewEnabled") startup_conditions["not_taking_snapshot"] = not params.get_bool( "IsTakingSnapshot") # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 onroad_conditions[ "device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed( "Offroad_TemperatureTooHigh", (not onroad_conditions["device_temp_good"])) # TODO: this should move to TICI.initialize_hardware, but we currently can't import params there if TICI: if not os.path.isfile("/persist/comma/living-in-the-moment"): if not Path("/data/media").is_mount(): set_offroad_alert_if_changed("Offroad_StorageMissing", True) else: # check for bad NVMe try: with open("/sys/block/nvme0n1/device/model") as f: model = f.read().strip() if not model.startswith( "Samsung SSD 980") and params.get( "Offroad_BadNvme") is None: set_offroad_alert_if_changed( "Offroad_BadNvme", True) cloudlog.event("Unsupported NVMe", model=model, error=True) except Exception: pass # Handle offroad/onroad transition should_start = all(onroad_conditions.values()) if started_ts is None: should_start = should_start and all(startup_conditions.values()) if should_start != should_start_prev or (count == 0): params.put_bool("IsOnroad", should_start) params.put_bool("IsOffroad", not should_start) params.put_bool("IsEngaged", False) engaged_prev = False HARDWARE.set_power_save(not should_start) if sm.updated['controlsState']: engaged = sm['controlsState'].enabled if engaged != engaged_prev: params.put_bool("IsEngaged", engaged) engaged_prev = engaged try: with open('/dev/kmsg', 'w') as kmsg: kmsg.write(f"<3>[thermald] engaged: {engaged}\n") except Exception: pass if should_start: off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True else: if onroad_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions, onroad_conditions=onroad_conditions) started_ts = None if off_ts is None: off_ts = sec_since_boot() # Offroad power monitoring power_monitor.calculate(peripheralState, onroad_conditions["ignition"]) msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used() msg.deviceState.carBatteryCapacityUwh = max( 0, power_monitor.get_car_battery_capacity()) current_power_draw = HARDWARE.get_current_power_draw() # pylint: disable=assignment-from-none if current_power_draw is not None: statlog.sample("power_draw", current_power_draw) msg.deviceState.powerDrawW = current_power_draw else: msg.deviceState.powerDrawW = 0 # Check if we need to disable charging (handled by boardd) msg.deviceState.chargingDisabled = power_monitor.should_disable_charging( onroad_conditions["ignition"], in_car, off_ts) # Check if we need to shut down if power_monitor.should_shutdown(peripheralState, onroad_conditions["ignition"], in_car, off_ts, started_seen): cloudlog.warning(f"shutting device down, offroad since {off_ts}") params.put_bool("DoShutdown", True) msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90 # if current is positive, then battery is being discharged msg.deviceState.started = started_ts is not None msg.deviceState.startedMonoTime = int(1e9 * (started_ts or 0)) last_ping = params.get("LastAthenaPingTime") if last_ping is not None: msg.deviceState.lastAthenaPingTime = int(last_ping) msg.deviceState.thermalStatus = thermal_status pm.send("deviceState", msg) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # Log to statsd statlog.gauge("free_space_percent", msg.deviceState.freeSpacePercent) statlog.gauge("gpu_usage_percent", msg.deviceState.gpuUsagePercent) statlog.gauge("memory_usage_percent", msg.deviceState.memoryUsagePercent) for i, usage in enumerate(msg.deviceState.cpuUsagePercent): statlog.gauge(f"cpu{i}_usage_percent", usage) for i, temp in enumerate(msg.deviceState.cpuTempC): statlog.gauge(f"cpu{i}_temperature", temp) for i, temp in enumerate(msg.deviceState.gpuTempC): statlog.gauge(f"gpu{i}_temperature", temp) statlog.gauge("memory_temperature", msg.deviceState.memoryTempC) statlog.gauge("ambient_temperature", msg.deviceState.ambientTempC) for i, temp in enumerate(msg.deviceState.pmicTempC): statlog.gauge(f"pmic{i}_temperature", temp) for i, temp in enumerate(last_hw_state.nvme_temps): statlog.gauge(f"nvme_temperature{i}", temp) for i, temp in enumerate(last_hw_state.modem_temps): statlog.gauge(f"modem_temperature{i}", temp) statlog.gauge("fan_speed_percent_desired", msg.deviceState.fanSpeedPercentDesired) statlog.gauge("screen_brightness_percent", msg.deviceState.screenBrightnessPercent) # report to server once every 10 minutes if (count % int(600. / DT_TRML)) == 0: cloudlog.event( "STATUS_PACKET", count=count, pandaStates=[ strip_deprecated_keys(p.to_dict()) for p in pandaStates ], peripheralState=strip_deprecated_keys( peripheralState.to_dict()), location=(strip_deprecated_keys( sm["gpsLocationExternal"].to_dict()) if sm.alive["gpsLocationExternal"] else None), deviceState=strip_deprecated_keys(msg.to_dict())) count += 1
def thermald_thread(): health_timeout = int(1000 * 2.5 * DT_TRML) # 2.5x the expected health frequency # now loop thermal_sock = messaging.pub_sock('thermal') health_sock = messaging.sub_sock('health', timeout=health_timeout) location_sock = messaging.sub_sock('gpsLocation') fan_speed = 0 count = 0 startup_conditions = { "ignition": False, } startup_conditions_prev = startup_conditions.copy() off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green usb_power = True current_branch = get_git_branch() network_type = NetworkType.none network_strength = NetworkStrength.unknown current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) cpu_temp_filter = FirstOrderFilter(0., CPU_TEMP_TAU, DT_TRML) health_prev = None should_start_prev = False handle_fan = None is_uno = False has_relay = False params = Params() pm = PowerMonitoring() no_panda_cnt = 0 thermal_config = get_thermal_config() while 1: health = messaging.recv_sock(health_sock, wait=True) location = messaging.recv_sock(location_sock) location = location.gpsLocation if location else None msg = read_thermal(thermal_config) if health is not None: usb_power = health.health.usbPowerMode != log.HealthData.UsbPowerMode.client # If we lose connection to the panda, wait 5 seconds before going offroad if health.health.hwType == log.HealthData.HwType.unknown: no_panda_cnt += 1 if no_panda_cnt > DISCONNECT_TIMEOUT / DT_TRML: if startup_conditions["ignition"]: cloudlog.error("Lost panda connection while onroad") startup_conditions["ignition"] = False else: no_panda_cnt = 0 startup_conditions["ignition"] = health.health.ignitionLine or health.health.ignitionCan # Setup fan handler on first connect to panda if handle_fan is None and health.health.hwType != log.HealthData.HwType.unknown: is_uno = health.health.hwType == log.HealthData.HwType.uno has_relay = health.health.hwType in [log.HealthData.HwType.blackPanda, log.HealthData.HwType.uno, log.HealthData.HwType.dos] if (not EON) or is_uno: cloudlog.info("Setting up UNO fan handler") handle_fan = handle_fan_uno else: cloudlog.info("Setting up EON fan handler") setup_eon_fan() handle_fan = handle_fan_eon # Handle disconnect if health_prev is not None: if health.health.hwType == log.HealthData.HwType.unknown and \ health_prev.health.hwType != log.HealthData.HwType.unknown: params.panda_disconnect() health_prev = health # get_network_type is an expensive call. update every 10s if (count % int(10. / DT_TRML)) == 0: try: network_type = HARDWARE.get_network_type() network_strength = HARDWARE.get_network_strength(network_type) except Exception: cloudlog.exception("Error getting network status") msg.thermal.freeSpace = get_available_percent(default=100.0) / 100.0 msg.thermal.memUsedPercent = int(round(psutil.virtual_memory().percent)) msg.thermal.cpuPerc = int(round(psutil.cpu_percent())) msg.thermal.networkType = network_type msg.thermal.networkStrength = network_strength msg.thermal.batteryPercent = get_battery_capacity() msg.thermal.batteryStatus = get_battery_status() msg.thermal.batteryCurrent = get_battery_current() msg.thermal.batteryVoltage = get_battery_voltage() msg.thermal.usbOnline = get_usb_present() # Fake battery levels on uno for frame if (not EON) or is_uno: msg.thermal.batteryPercent = 100 msg.thermal.batteryStatus = "Charging" msg.thermal.bat = 0 current_filter.update(msg.thermal.batteryCurrent / 1e6) # TODO: add car battery voltage check max_cpu_temp = cpu_temp_filter.update(max(msg.thermal.cpu)) max_comp_temp = max(max_cpu_temp, msg.thermal.mem, max(msg.thermal.gpu)) bat_temp = msg.thermal.bat if handle_fan is not None: fan_speed = handle_fan(max_cpu_temp, bat_temp, fan_speed, startup_conditions["ignition"]) msg.thermal.fanSpeed = fan_speed # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 # We only do this if there is a relay that prevents the car from faulting is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if max_cpu_temp > 107. or bat_temp >= 63. or (has_relay and is_offroad_for_5_min and max_cpu_temp > 70.0): # onroad not allowed thermal_status = ThermalStatus.danger elif max_comp_temp > 96.0 or bat_temp > 60.: # hysteresis between onroad not allowed and engage not allowed thermal_status = clip(thermal_status, ThermalStatus.red, ThermalStatus.danger) elif max_cpu_temp > 94.0: # hysteresis between engage not allowed and uploader not allowed thermal_status = clip(thermal_status, ThermalStatus.yellow, ThermalStatus.red) elif max_cpu_temp > 80.0: # uploader not allowed thermal_status = ThermalStatus.yellow elif max_cpu_temp > 75.0: # hysteresis between uploader not allowed and all good thermal_status = clip(thermal_status, ThermalStatus.green, ThermalStatus.yellow) else: # all good thermal_status = ThermalStatus.green # **** starting logic **** # Check for last update time and display alerts if needed now = datetime.datetime.utcnow() # show invalid date/time alert startup_conditions["time_valid"] = now.year >= 2019 set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) # Show update prompt try: last_update = datetime.datetime.fromisoformat(params.get("LastUpdateTime", encoding='utf8')) except (TypeError, ValueError): last_update = now dt = now - last_update update_failed_count = params.get("UpdateFailedCount") update_failed_count = 0 if update_failed_count is None else int(update_failed_count) last_update_exception = params.get("LastUpdateException", encoding='utf8') if update_failed_count > 15 and last_update_exception is not None: if current_branch in ["release2", "dashcam"]: extra_text = "Ensure the software is correctly installed" else: extra_text = last_update_exception set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) set_offroad_alert_if_changed("Offroad_UpdateFailed", True, extra_text=extra_text) elif dt.days > DAYS_NO_CONNECTIVITY_MAX and update_failed_count > 1: set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", True) elif dt.days > DAYS_NO_CONNECTIVITY_PROMPT: remaining_time = str(max(DAYS_NO_CONNECTIVITY_MAX - dt.days, 0)) set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", True, extra_text=f"{remaining_time} days.") else: set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) startup_conditions["not_uninstalling"] = not params.get("DoUninstall") == b"1" startup_conditions["accepted_terms"] = params.get("HasAcceptedTerms") == terms_version completed_training = params.get("CompletedTrainingVersion") == training_version panda_signature = params.get("PandaFirmware") startup_conditions["fw_version_match"] = (panda_signature is None) or (panda_signature == FW_SIGNATURE) # don't show alert is no panda is connected (None) set_offroad_alert_if_changed("Offroad_PandaFirmwareMismatch", (not startup_conditions["fw_version_match"])) # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.thermal.freeSpace > 0.02 startup_conditions["completed_training"] = completed_training or (current_branch in ['dashcam', 'dashcam-staging']) startup_conditions["not_driver_view"] = not params.get("IsDriverViewEnabled") == b"1" startup_conditions["not_taking_snapshot"] = not params.get("IsTakingSnapshot") == b"1" # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 startup_conditions["device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed("Offroad_TemperatureTooHigh", (not startup_conditions["device_temp_good"])) should_start = all(startup_conditions.values()) if should_start: if not should_start_prev: params.delete("IsOffroad") off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True os.system('echo performance > /sys/class/devfreq/soc:qcom,cpubw/governor') else: if startup_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions) if should_start_prev or (count == 0): params.put("IsOffroad", "1") started_ts = None if off_ts is None: off_ts = sec_since_boot() os.system('echo powersave > /sys/class/devfreq/soc:qcom,cpubw/governor') # Offroad power monitoring pm.calculate(health) msg.thermal.offroadPowerUsage = pm.get_power_used() msg.thermal.carBatteryCapacity = max(0, pm.get_car_battery_capacity()) # Check if we need to disable charging (handled by boardd) msg.thermal.chargingDisabled = pm.should_disable_charging(health, off_ts) # Check if we need to shut down if pm.should_shutdown(health, off_ts, started_seen, LEON): cloudlog.info(f"shutting device down, offroad since {off_ts}") # TODO: add function for blocking cloudlog instead of sleep time.sleep(10) os.system('LD_LIBRARY_PATH="" svc power shutdown') msg.thermal.chargingError = current_filter.x > 0. and msg.thermal.batteryPercent < 90 # if current is positive, then battery is being discharged msg.thermal.started = started_ts is not None msg.thermal.startedTs = int(1e9*(started_ts or 0)) msg.thermal.thermalStatus = thermal_status thermal_sock.send(msg.to_bytes()) set_offroad_alert_if_changed("Offroad_ChargeDisabled", (not usb_power)) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # report to server once per minute if (count % int(60. / DT_TRML)) == 0: cloudlog.event("STATUS_PACKET", count=count, health=(health.to_dict() if health else None), location=(location.to_dict() if location else None), thermal=msg.to_dict()) count += 1
def thermald_thread(): pm = messaging.PubMaster(['deviceState']) pandaState_timeout = int(1000 * 2.5 * DT_TRML) # 2.5x the expected pandaState frequency pandaState_sock = messaging.sub_sock('pandaState', timeout=pandaState_timeout) location_sock = messaging.sub_sock('gpsLocationExternal') managerState_sock = messaging.sub_sock('managerState', conflate=True) fan_speed = 0 count = 0 startup_conditions = { "ignition": False, } startup_conditions_prev = startup_conditions.copy() off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green usb_power = True current_branch = get_git_branch() network_type = NetworkType.none network_strength = NetworkStrength.unknown current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) cpu_temp_filter = FirstOrderFilter(0., CPU_TEMP_TAU, DT_TRML) pandaState_prev = None should_start_prev = False handle_fan = None is_uno = False ui_running_prev = False params = Params() power_monitor = PowerMonitoring() no_panda_cnt = 0 thermal_config = HARDWARE.get_thermal_config() # CPR3 logging if EON: base_path = "/sys/kernel/debug/cpr3-regulator/" cpr_files = [p for p in Path(base_path).glob("**/*") if p.is_file()] cpr_data = {} for cf in cpr_files: with open(cf, "r") as f: try: cpr_data[str(cf)] = f.read().strip() except Exception: pass cloudlog.event("CPR", data=cpr_data) while 1: pandaState = messaging.recv_sock(pandaState_sock, wait=True) msg = read_thermal(thermal_config) if pandaState is not None: usb_power = pandaState.pandaState.usbPowerMode != log.PandaState.UsbPowerMode.client # If we lose connection to the panda, wait 5 seconds before going offroad if pandaState.pandaState.pandaType == log.PandaState.PandaType.unknown: no_panda_cnt += 1 if no_panda_cnt > DISCONNECT_TIMEOUT / DT_TRML: if startup_conditions["ignition"]: cloudlog.error("Lost panda connection while onroad") startup_conditions["ignition"] = False else: no_panda_cnt = 0 startup_conditions[ "ignition"] = pandaState.pandaState.ignitionLine or pandaState.pandaState.ignitionCan startup_conditions[ "hardware_supported"] = pandaState.pandaState.pandaType not in [ log.PandaState.PandaType.whitePanda, log.PandaState.PandaType.greyPanda ] set_offroad_alert_if_changed( "Offroad_HardwareUnsupported", not startup_conditions["hardware_supported"]) # Setup fan handler on first connect to panda if handle_fan is None and pandaState.pandaState.pandaType != log.PandaState.PandaType.unknown: is_uno = pandaState.pandaState.pandaType == log.PandaState.PandaType.uno if (not EON) or is_uno: cloudlog.info("Setting up UNO fan handler") handle_fan = handle_fan_uno else: cloudlog.info("Setting up EON fan handler") setup_eon_fan() handle_fan = handle_fan_eon # Handle disconnect if pandaState_prev is not None: if pandaState.pandaState.pandaType == log.PandaState.PandaType.unknown and \ pandaState_prev.pandaState.pandaType != log.PandaState.PandaType.unknown: params.panda_disconnect() pandaState_prev = pandaState # get_network_type is an expensive call. update every 10s if (count % int(10. / DT_TRML)) == 0: try: network_type = HARDWARE.get_network_type() network_strength = HARDWARE.get_network_strength(network_type) except Exception: cloudlog.exception("Error getting network status") msg.deviceState.freeSpacePercent = get_available_percent(default=100.0) msg.deviceState.memoryUsagePercent = int( round(psutil.virtual_memory().percent)) msg.deviceState.cpuUsagePercent = int(round(psutil.cpu_percent())) msg.deviceState.networkType = network_type msg.deviceState.networkStrength = network_strength msg.deviceState.batteryPercent = HARDWARE.get_battery_capacity() msg.deviceState.batteryStatus = HARDWARE.get_battery_status() msg.deviceState.batteryCurrent = HARDWARE.get_battery_current() msg.deviceState.batteryVoltage = HARDWARE.get_battery_voltage() msg.deviceState.usbOnline = HARDWARE.get_usb_present() # Fake battery levels on uno for frame if (not EON) or is_uno: msg.deviceState.batteryPercent = 100 msg.deviceState.batteryStatus = "Charging" msg.deviceState.batteryTempC = 0 current_filter.update(msg.deviceState.batteryCurrent / 1e6) # TODO: add car battery voltage check max_cpu_temp = cpu_temp_filter.update(max(msg.deviceState.cpuTempC)) max_comp_temp = max(max_cpu_temp, msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC)) bat_temp = msg.deviceState.batteryTempC if handle_fan is not None: fan_speed = handle_fan(max_cpu_temp, bat_temp, fan_speed, startup_conditions["ignition"]) msg.deviceState.fanSpeedPercentDesired = fan_speed # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 # We only do this if there is a relay that prevents the car from faulting is_offroad_for_5_min = (started_ts is None) and ( (not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if max_cpu_temp > 107. or bat_temp >= 63. or (is_offroad_for_5_min and max_cpu_temp > 70.0): # onroad not allowed thermal_status = ThermalStatus.danger elif max_comp_temp > 96.0 or bat_temp > 60.: # hysteresis between onroad not allowed and engage not allowed thermal_status = clip(thermal_status, ThermalStatus.red, ThermalStatus.danger) elif max_cpu_temp > 94.0: # hysteresis between engage not allowed and uploader not allowed thermal_status = clip(thermal_status, ThermalStatus.yellow, ThermalStatus.red) elif max_cpu_temp > 80.0: # uploader not allowed thermal_status = ThermalStatus.yellow elif max_cpu_temp > 75.0: # hysteresis between uploader not allowed and all good thermal_status = clip(thermal_status, ThermalStatus.green, ThermalStatus.yellow) else: thermal_status = ThermalStatus.green # default to good condition # **** starting logic **** # Check for last update time and display alerts if needed now = datetime.datetime.utcnow() # show invalid date/time alert startup_conditions["time_valid"] = (now.year > 2020) or ( now.year == 2020 and now.month >= 10) set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) # Show update prompt try: last_update = datetime.datetime.fromisoformat( params.get("LastUpdateTime", encoding='utf8')) except (TypeError, ValueError): last_update = now dt = now - last_update update_failed_count = params.get("UpdateFailedCount") update_failed_count = 0 if update_failed_count is None else int( update_failed_count) last_update_exception = params.get("LastUpdateException", encoding='utf8') if update_failed_count > 15 and last_update_exception is not None: if current_branch in ["release2", "dashcam"]: extra_text = "Ensure the software is correctly installed" else: extra_text = last_update_exception set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) set_offroad_alert_if_changed("Offroad_UpdateFailed", True, extra_text=extra_text) elif dt.days > DAYS_NO_CONNECTIVITY_MAX and update_failed_count > 1: set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", True) elif dt.days > DAYS_NO_CONNECTIVITY_PROMPT: remaining_time = str(max(DAYS_NO_CONNECTIVITY_MAX - dt.days, 0)) set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", True, extra_text=f"{remaining_time} days.") else: set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) startup_conditions["up_to_date"] = params.get( "Offroad_ConnectivityNeeded") is None or params.get( "DisableUpdates") == b"1" startup_conditions["not_uninstalling"] = not params.get( "DoUninstall") == b"1" startup_conditions["accepted_terms"] = params.get( "HasAcceptedTerms") == terms_version panda_signature = params.get("PandaFirmware") startup_conditions["fw_version_match"] = (panda_signature is None) or ( panda_signature == FW_SIGNATURE ) # don't show alert is no panda is connected (None) set_offroad_alert_if_changed( "Offroad_PandaFirmwareMismatch", (not startup_conditions["fw_version_match"])) # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2 startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \ (current_branch in ['dashcam', 'dashcam-staging']) startup_conditions["not_driver_view"] = not params.get( "IsDriverViewEnabled") == b"1" startup_conditions["not_taking_snapshot"] = not params.get( "IsTakingSnapshot") == b"1" # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 startup_conditions[ "device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed( "Offroad_TemperatureTooHigh", (not startup_conditions["device_temp_good"])) # Handle offroad/onroad transition should_start = all(startup_conditions.values()) if should_start: if not should_start_prev: params.delete("IsOffroad") if TICI and DISABLE_LTE_ONROAD: os.system("sudo systemctl stop --no-block lte") off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True else: if startup_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions) if should_start_prev or (count == 0): params.put("IsOffroad", "1") if TICI and DISABLE_LTE_ONROAD: os.system("sudo systemctl start --no-block lte") started_ts = None if off_ts is None: off_ts = sec_since_boot() # Offroad power monitoring power_monitor.calculate(pandaState) msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used() msg.deviceState.carBatteryCapacityUwh = max( 0, power_monitor.get_car_battery_capacity()) # Check if we need to disable charging (handled by boardd) msg.deviceState.chargingDisabled = power_monitor.should_disable_charging( pandaState, off_ts) # Check if we need to shut down if power_monitor.should_shutdown(pandaState, off_ts, started_seen): cloudlog.info(f"shutting device down, offroad since {off_ts}") # TODO: add function for blocking cloudlog instead of sleep time.sleep(10) HARDWARE.shutdown() # If UI has crashed, set the brightness to reasonable non-zero value manager_state = messaging.recv_one_or_none(managerState_sock) if manager_state is not None: ui_running = "ui" in (p.name for p in manager_state.managerState.processes if p.running) if ui_running_prev and not ui_running: HARDWARE.set_screen_brightness(20) ui_running_prev = ui_running msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90 # if current is positive, then battery is being discharged msg.deviceState.started = started_ts is not None msg.deviceState.startedMonoTime = int(1e9 * (started_ts or 0)) msg.deviceState.thermalStatus = thermal_status pm.send("deviceState", msg) if EON and not is_uno: set_offroad_alert_if_changed("Offroad_ChargeDisabled", (not usb_power)) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # report to server once every 10 minutes if (count % int(600. / DT_TRML)) == 0: location = messaging.recv_sock(location_sock) cloudlog.event("STATUS_PACKET", count=count, pandaState=(strip_deprecated_keys( pandaState.to_dict()) if pandaState else None), location=(strip_deprecated_keys( location.gpsLocationExternal.to_dict()) if location else None), deviceState=strip_deprecated_keys(msg.to_dict())) count += 1
def thermald_thread(): pm = messaging.PubMaster(['deviceState']) pandaState_timeout = int(1000 * 2.5 * DT_TRML) # 2.5x the expected pandaState frequency pandaState_sock = messaging.sub_sock('pandaStates', timeout=pandaState_timeout) sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "managerState"]) fan_speed = 0 count = 0 startup_conditions = { "ignition": False, } startup_conditions_prev = startup_conditions.copy() off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green usb_power = True network_type = NetworkType.none network_strength = NetworkStrength.unknown network_info = None modem_version = None registered_count = 0 nvme_temps = None modem_temps = None current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML) pandaState_prev = None should_start_prev = False in_car = False handle_fan = None is_uno = False ui_running_prev = False power_monitor = PowerMonitoring() no_panda_cnt = 0 HARDWARE.initialize_hardware() thermal_config = HARDWARE.get_thermal_config() # TODO: use PI controller for UNO controller = PIController(k_p=0, k_i=2e-3, neg_limit=-80, pos_limit=0, rate=(1 / DT_TRML)) # Leave flag for loggerd to indicate device was left onroad if params.get_bool("IsOnroad"): params.put_bool("BootedOnroad", True) # dp peripheralStateLast = None dp_no_batt = params.get_bool("dp_no_batt") dp_temp_monitor = True dp_last_modified_temp_monitor = None dp_auto_shutdown = False dp_last_modified_auto_shutdown = None dp_auto_shutdown_in = 90 dp_last_modified_auto_shutdown_in = None dp_fan_mode = 0 dp_fan_mode_last = None modified = None last_modified = None last_modified_check = None dp_no_offroad_fix = params.get_bool('dp_no_offroad_fix') if JETSON: handle_fan = handle_fan_jetson while True: # dp - load temp monitor conf last_modified_check, modified = get_last_modified(LAST_MODIFIED_THERMALD, last_modified_check, modified) if last_modified != modified: dp_temp_monitor, dp_last_modified_temp_monitor = param_get_if_updated("dp_temp_monitor", "bool", dp_temp_monitor, dp_last_modified_temp_monitor) dp_auto_shutdown, dp_last_modified_auto_shutdown = param_get_if_updated("dp_auto_shutdown", "bool", dp_auto_shutdown, dp_last_modified_auto_shutdown) dp_auto_shutdown_in, dp_last_modified_auto_shutdown_in = param_get_if_updated("dp_auto_shutdown_in", "int", dp_auto_shutdown_in, dp_last_modified_auto_shutdown_in) dp_fan_mode, dp_fan_mode_last = param_get_if_updated("dp_fan_mode", "int", dp_fan_mode, dp_fan_mode_last) last_modified = modified pandaStates = messaging.recv_sock(pandaState_sock, wait=True) sm.update(0) peripheralState = sm['peripheralState'] msg = read_thermal(thermal_config) if pandaStates is not None and len(pandaStates.pandaStates) > 0: pandaState = pandaStates.pandaStates[0] # If we lose connection to the panda, wait 5 seconds before going offroad if pandaState.pandaType == log.PandaState.PandaType.unknown: no_panda_cnt += 1 if no_panda_cnt > DISCONNECT_TIMEOUT / DT_TRML: if startup_conditions["ignition"]: cloudlog.error("Lost panda connection while onroad") startup_conditions["ignition"] = False else: no_panda_cnt = 0 startup_conditions["ignition"] = pandaState.ignitionLine or pandaState.ignitionCan in_car = pandaState.harnessStatus != log.PandaState.HarnessStatus.notConnected usb_power = peripheralState.usbPowerMode != log.PeripheralState.UsbPowerMode.client # Setup fan handler on first connect to panda if not JETSON and handle_fan is None and peripheralState.pandaType != log.PandaState.PandaType.unknown: is_uno = peripheralState.pandaType == log.PandaState.PandaType.uno if TICI: cloudlog.info("Setting up TICI fan handler") handle_fan = handle_fan_tici elif is_uno or PC: cloudlog.info("Setting up UNO fan handler") handle_fan = handle_fan_uno else: cloudlog.info("Setting up EON fan handler") setup_eon_fan() handle_fan = handle_fan_eon # Handle disconnect if pandaState_prev is not None: if pandaState.pandaType == log.PandaState.PandaType.unknown and \ pandaState_prev.pandaType != log.PandaState.PandaType.unknown: params.clear_all(ParamKeyType.CLEAR_ON_PANDA_DISCONNECT) pandaState_prev = pandaState # these are expensive calls. update every 10s if (count % int(10. / DT_TRML)) == 0: try: network_type = HARDWARE.get_network_type() network_strength = HARDWARE.get_network_strength(network_type) network_info = HARDWARE.get_network_info() # pylint: disable=assignment-from-none nvme_temps = HARDWARE.get_nvme_temperatures() modem_temps = HARDWARE.get_modem_temperatures() # Log modem version once if modem_version is None: modem_version = HARDWARE.get_modem_version() # pylint: disable=assignment-from-none if modem_version is not None: cloudlog.warning(f"Modem version: {modem_version}") if TICI and (network_info.get('state', None) == "REGISTERED"): registered_count += 1 else: registered_count = 0 if registered_count > 10: cloudlog.warning(f"Modem stuck in registered state {network_info}. nmcli conn up lte") os.system("nmcli conn up lte") registered_count = 0 except Exception: cloudlog.exception("Error getting network status") msg.deviceState.freeSpacePercent = get_available_percent(default=100.0) msg.deviceState.memoryUsagePercent = int(round(psutil.virtual_memory().percent)) msg.deviceState.cpuUsagePercent = [int(round(n)) for n in psutil.cpu_percent(percpu=True)] msg.deviceState.gpuUsagePercent = int(round(HARDWARE.get_gpu_usage_percent())) msg.deviceState.networkType = network_type msg.deviceState.networkStrength = network_strength if network_info is not None: msg.deviceState.networkInfo = network_info if nvme_temps is not None: msg.deviceState.nvmeTempC = nvme_temps if modem_temps is not None: msg.deviceState.modemTempC = modem_temps msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness() msg.deviceState.batteryPercent = HARDWARE.get_battery_capacity() msg.deviceState.batteryCurrent = HARDWARE.get_battery_current() msg.deviceState.usbOnline = HARDWARE.get_usb_present() # Fake battery levels on uno for frame if dp_no_batt: msg.deviceState.batteryPercent = 100 current_filter.update(msg.deviceState.batteryCurrent / 1e6) max_comp_temp = temp_filter.update( max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC)) ) if handle_fan is not None: fan_speed = handle_fan(dp_fan_mode, controller, max_comp_temp, fan_speed, startup_conditions["ignition"]) msg.deviceState.fanSpeedPercentDesired = fan_speed is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP: # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 thermal_status = ThermalStatus.danger else: current_band = THERMAL_BANDS[thermal_status] band_idx = list(THERMAL_BANDS.keys()).index(thermal_status) if current_band.min_temp is not None and max_comp_temp < current_band.min_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1] elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1] if not dp_temp_monitor and thermal_status in [ThermalStatus.red, ThermalStatus.danger]: thermal_status = ThermalStatus.yellow # **** starting logic **** # Check for last update time and display alerts if needed # now = datetime.datetime.utcnow() # # # show invalid date/time alert # startup_conditions["time_valid"] = (now.year > 2020) or (now.year == 2020 and now.month >= 10) # set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) # # # Show update prompt # try: # last_update = datetime.datetime.fromisoformat(params.get("LastUpdateTime", encoding='utf8')) # except (TypeError, ValueError): # last_update = now # dt = now - last_update # # update_failed_count = params.get("UpdateFailedCount") # update_failed_count = 0 if update_failed_count is None else int(update_failed_count) # last_update_exception = params.get("LastUpdateException", encoding='utf8') # # if update_failed_count > 15 and last_update_exception is not None: # if tested_branch: # extra_text = "Ensure the software is correctly installed" # else: # extra_text = last_update_exception # # set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) # set_offroad_alert_if_changed("Offroad_UpdateFailed", True, extra_text=extra_text) # elif dt.days > DAYS_NO_CONNECTIVITY_MAX and update_failed_count > 1: # set_offroad_alert_if_changed("Offroad_UpdateFailed", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", True) # elif dt.days > DAYS_NO_CONNECTIVITY_PROMPT: # remaining_time = str(max(DAYS_NO_CONNECTIVITY_MAX - dt.days, 0)) # set_offroad_alert_if_changed("Offroad_UpdateFailed", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", True, extra_text=f"{remaining_time} days.") # else: # set_offroad_alert_if_changed("Offroad_UpdateFailed", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) startup_conditions["up_to_date"] = params.get("Offroad_ConnectivityNeeded") is None or params.get_bool("DisableUpdates") startup_conditions["not_uninstalling"] = not params.get_bool("DoUninstall") startup_conditions["accepted_terms"] = params.get("HasAcceptedTerms") == terms_version panda_signature = params.get("PandaFirmware") startup_conditions["fw_version_match"] = (panda_signature is None) or (panda_signature == FW_SIGNATURE) # don't show alert is no panda is connected (None) set_offroad_alert_if_changed("Offroad_PandaFirmwareMismatch", (not startup_conditions["fw_version_match"])) # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2 startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \ params.get_bool("Passive") startup_conditions["not_driver_view"] = not params.get_bool("IsDriverViewEnabled") startup_conditions["not_taking_snapshot"] = not params.get_bool("IsTakingSnapshot") # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 startup_conditions["device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed("Offroad_TemperatureTooHigh", (not startup_conditions["device_temp_good"])) if TICI: set_offroad_alert_if_changed("Offroad_NvmeMissing", (not Path("/data/media").is_mount())) # Handle offroad/onroad transition should_start = all(startup_conditions.values()) # dp - check usb_present to fix not going offroad on "EON/LEON + battery - Comma Power" if dp_no_offroad_fix: should_start = should_start and HARDWARE.get_usb_present() if should_start != should_start_prev or (count == 0): params.put_bool("IsOnroad", should_start) params.put_bool("IsOffroad", not should_start) HARDWARE.set_power_save(not should_start) if should_start: off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True else: if startup_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions) started_ts = None if off_ts is None: off_ts = sec_since_boot() # Offroad power monitoring if not dp_no_batt: power_monitor.calculate(peripheralState, startup_conditions["ignition"]) msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used() msg.deviceState.carBatteryCapacityUwh = max(0, power_monitor.get_car_battery_capacity()) # Check if we need to disable charging (handled by boardd) msg.deviceState.chargingDisabled = power_monitor.should_disable_charging(startup_conditions["ignition"], in_car, off_ts, dp_auto_shutdown, dp_auto_shutdown_in) # dp - for battery powered device # when peripheralState is not changing (panda offline), and usb is not present (not charging) if dp_no_offroad_fix and (peripheralStateLast == peripheralState) and not msg.deviceState.usbOnline: if (sec_since_boot() - off_ts) > dp_auto_shutdown_in * 60: time.sleep(10) HARDWARE.shutdown() peripheralStateLast = peripheralState # Check if we need to shut down if power_monitor.should_shutdown(peripheralState, startup_conditions["ignition"], in_car, off_ts, started_seen, LEON, dp_auto_shutdown, dp_auto_shutdown_in): cloudlog.info(f"shutting device down, offroad since {off_ts}") # TODO: add function for blocking cloudlog instead of sleep time.sleep(10) HARDWARE.shutdown() # If UI has crashed, set the brightness to reasonable non-zero value ui_running = "ui" in (p.name for p in sm["managerState"].processes if p.running) if ui_running_prev and not ui_running: HARDWARE.set_screen_brightness(20) ui_running_prev = ui_running msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90 # if current is positive, then battery is being discharged msg.deviceState.started = started_ts is not None msg.deviceState.startedMonoTime = int(1e9*(started_ts or 0)) last_ping = params.get("LastAthenaPingTime") if last_ping is not None: msg.deviceState.lastAthenaPingTime = int(last_ping) msg.deviceState.thermalStatus = thermal_status pm.send("deviceState", msg) if EON and not is_uno: set_offroad_alert_if_changed("Offroad_ChargeDisabled", (not usb_power)) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # report to server once every 10 minutes # if (count % int(600. / DT_TRML)) == 0: # if EON and started_ts is None and msg.deviceState.memoryUsagePercent > 40: # cloudlog.event("High offroad memory usage", mem=msg.deviceState.memoryUsagePercent) # # cloudlog.event("STATUS_PACKET", # count=count, # pandaStates=(strip_deprecated_keys(pandaStates.to_dict()) if pandaStates else None), # peripheralState=strip_deprecated_keys(peripheralState.to_dict()), # location=(strip_deprecated_keys(sm["gpsLocationExternal"].to_dict()) if sm.alive["gpsLocationExternal"] else None), # deviceState=strip_deprecated_keys(msg.to_dict())) count += 1
def thermald_thread(): pm = messaging.PubMaster(['deviceState']) pandaState_timeout = int(1000 * 2.5 * DT_TRML) # 2.5x the expected pandaState frequency pandaState_sock = messaging.sub_sock('pandaState', timeout=pandaState_timeout) location_sock = messaging.sub_sock('gpsLocationExternal') managerState_sock = messaging.sub_sock('managerState', conflate=True) fan_speed = 0 count = 0 startup_conditions = { "ignition": False, } startup_conditions_prev = startup_conditions.copy() off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green usb_power = True network_type = NetworkType.none network_strength = NetworkStrength.unknown network_info = None modem_version = None registered_count = 0 current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML) pandaState_prev = None should_start_prev = False handle_fan = None is_uno = False ui_running_prev = False params = Params() power_monitor = PowerMonitoring() no_panda_cnt = 0 HARDWARE.initialize_hardware() thermal_config = HARDWARE.get_thermal_config() # TODO: use PI controller for UNO controller = PIController(k_p=0, k_i=2e-3, neg_limit=-80, pos_limit=0, rate=(1 / DT_TRML)) if params.get_bool("IsOnroad"): cloudlog.event("onroad flag not cleared") # CPR3 logging if EON: base_path = "/sys/kernel/debug/cpr3-regulator/" cpr_files = [p for p in Path(base_path).glob("**/*") if p.is_file()] cpr_files = ["/sys/kernel/debug/regulator/pm8994_s11/voltage" ] + cpr_files cpr_data = {} for cf in cpr_files: with open(cf, "r") as f: try: cpr_data[str(cf)] = f.read().strip() except Exception: pass cloudlog.event("CPR", data=cpr_data) while 1: pandaState = messaging.recv_sock(pandaState_sock, wait=True) msg = read_thermal(thermal_config) if pandaState is not None: usb_power = pandaState.pandaState.usbPowerMode != log.PandaState.UsbPowerMode.client # If we lose connection to the panda, wait 5 seconds before going offroad if pandaState.pandaState.pandaType == log.PandaState.PandaType.unknown: no_panda_cnt += 1 if no_panda_cnt > DISCONNECT_TIMEOUT / DT_TRML: if startup_conditions["ignition"]: cloudlog.error("Lost panda connection while onroad") startup_conditions["ignition"] = False else: no_panda_cnt = 0 startup_conditions[ "ignition"] = pandaState.pandaState.ignitionLine or pandaState.pandaState.ignitionCan startup_conditions[ "hardware_supported"] = pandaState.pandaState.pandaType not in [ log.PandaState.PandaType.whitePanda, log.PandaState.PandaType.greyPanda ] set_offroad_alert_if_changed( "Offroad_HardwareUnsupported", not startup_conditions["hardware_supported"]) # Setup fan handler on first connect to panda if handle_fan is None and pandaState.pandaState.pandaType != log.PandaState.PandaType.unknown: is_uno = pandaState.pandaState.pandaType == log.PandaState.PandaType.uno if TICI: cloudlog.info("Setting up TICI fan handler") handle_fan = handle_fan_tici elif is_uno or PC: cloudlog.info("Setting up UNO fan handler") handle_fan = handle_fan_uno else: cloudlog.info("Setting up EON fan handler") setup_eon_fan() handle_fan = handle_fan_eon # Handle disconnect if pandaState_prev is not None: if pandaState.pandaState.pandaType == log.PandaState.PandaType.unknown and \ pandaState_prev.pandaState.pandaType != log.PandaState.PandaType.unknown: params.clear_all(ParamKeyType.CLEAR_ON_PANDA_DISCONNECT) pandaState_prev = pandaState # get_network_type is an expensive call. update every 10s if (count % int(10. / DT_TRML)) == 0: try: network_type = HARDWARE.get_network_type() network_strength = HARDWARE.get_network_strength(network_type) network_info = HARDWARE.get_network_info() # pylint: disable=assignment-from-none # Log modem version once if modem_version is None: modem_version = HARDWARE.get_modem_version() # pylint: disable=assignment-from-none if modem_version is not None: cloudlog.warning(f"Modem version: {modem_version}") if TICI and (network_info.get('state', None) == "REGISTERED"): registered_count += 1 else: registered_count = 0 if registered_count > 10: cloudlog.warning( f"Modem stuck in registered state {network_info}. nmcli conn up lte" ) os.system("nmcli conn up lte") registered_count = 0 except Exception: cloudlog.exception("Error getting network status") msg.deviceState.freeSpacePercent = get_available_percent(default=100.0) msg.deviceState.memoryUsagePercent = int( round(psutil.virtual_memory().percent)) msg.deviceState.cpuUsagePercent = [ int(round(n)) for n in psutil.cpu_percent(percpu=True) ] msg.deviceState.gpuUsagePercent = int( round(HARDWARE.get_gpu_usage_percent())) msg.deviceState.networkType = network_type msg.deviceState.networkStrength = network_strength if network_info is not None: msg.deviceState.networkInfo = network_info msg.deviceState.batteryPercent = HARDWARE.get_battery_capacity() msg.deviceState.batteryCurrent = HARDWARE.get_battery_current() msg.deviceState.usbOnline = HARDWARE.get_usb_present() current_filter.update(msg.deviceState.batteryCurrent / 1e6) max_comp_temp = temp_filter.update( max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC))) if handle_fan is not None: fan_speed = handle_fan(controller, max_comp_temp, fan_speed, startup_conditions["ignition"]) msg.deviceState.fanSpeedPercentDesired = fan_speed is_offroad_for_5_min = (started_ts is None) and ( (not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP: # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 thermal_status = ThermalStatus.danger else: current_band = THERMAL_BANDS[thermal_status] band_idx = list(THERMAL_BANDS.keys()).index(thermal_status) if current_band.min_temp is not None and max_comp_temp < current_band.min_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1] elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1] # **** starting logic **** # Check for last update time and display alerts if needed now = datetime.datetime.utcnow() # show invalid date/time alert startup_conditions["time_valid"] = (now.year > 2020) or ( now.year == 2020 and now.month >= 10) set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) # Show update prompt try: last_update = datetime.datetime.fromisoformat( params.get("LastUpdateTime", encoding='utf8')) except (TypeError, ValueError): last_update = now dt = now - last_update update_failed_count = params.get("UpdateFailedCount") update_failed_count = 0 if update_failed_count is None else int( update_failed_count) last_update_exception = params.get("LastUpdateException", encoding='utf8') if update_failed_count > 15 and last_update_exception is not None: if tested_branch: extra_text = "Ensure the software is correctly installed" else: extra_text = last_update_exception set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) set_offroad_alert_if_changed("Offroad_UpdateFailed", True, extra_text=extra_text) elif dt.days > DAYS_NO_CONNECTIVITY_MAX and update_failed_count > 1: set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", True) elif dt.days > DAYS_NO_CONNECTIVITY_PROMPT: remaining_time = str(max(DAYS_NO_CONNECTIVITY_MAX - dt.days, 0)) set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", True, extra_text=f"{remaining_time} days.") else: set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) startup_conditions["up_to_date"] = params.get( "Offroad_ConnectivityNeeded") is None or params.get_bool( "DisableUpdates") startup_conditions["not_uninstalling"] = not params.get_bool( "DoUninstall") startup_conditions["accepted_terms"] = params.get( "HasAcceptedTerms") == terms_version panda_signature = params.get("PandaFirmware") startup_conditions["fw_version_match"] = (panda_signature is None) or ( panda_signature == FW_SIGNATURE ) # don't show alert is no panda is connected (None) set_offroad_alert_if_changed( "Offroad_PandaFirmwareMismatch", (not startup_conditions["fw_version_match"])) # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2 startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \ params.get_bool("Passive") startup_conditions["not_driver_view"] = not params.get_bool( "IsDriverViewEnabled") startup_conditions["not_taking_snapshot"] = not params.get_bool( "IsTakingSnapshot") # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 startup_conditions[ "device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed( "Offroad_TemperatureTooHigh", (not startup_conditions["device_temp_good"])) if TICI: set_offroad_alert_if_changed("Offroad_NvmeMissing", (not Path("/data/media").is_mount())) # Handle offroad/onroad transition should_start = all(startup_conditions.values()) if should_start != should_start_prev or (count == 0): params.put_bool("IsOnroad", should_start) params.put_bool("IsOffroad", not should_start) HARDWARE.set_power_save(not should_start) if should_start: off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True else: if startup_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions) started_ts = None if off_ts is None: off_ts = sec_since_boot() # Offroad power monitoring power_monitor.calculate(pandaState) msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used() msg.deviceState.carBatteryCapacityUwh = max( 0, power_monitor.get_car_battery_capacity()) # Check if we need to disable charging (handled by boardd) msg.deviceState.chargingDisabled = power_monitor.should_disable_charging( pandaState, off_ts) # Check if we need to shut down if power_monitor.should_shutdown(pandaState, off_ts, started_seen): cloudlog.info(f"shutting device down, offroad since {off_ts}") # TODO: add function for blocking cloudlog instead of sleep time.sleep(10) HARDWARE.shutdown() # If UI has crashed, set the brightness to reasonable non-zero value manager_state = messaging.recv_one_or_none(managerState_sock) if manager_state is not None: ui_running = "ui" in (p.name for p in manager_state.managerState.processes if p.running) if ui_running_prev and not ui_running: HARDWARE.set_screen_brightness(20) ui_running_prev = ui_running msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90 # if current is positive, then battery is being discharged msg.deviceState.started = started_ts is not None msg.deviceState.startedMonoTime = int(1e9 * (started_ts or 0)) last_ping = params.get("LastAthenaPingTime") if last_ping is not None: msg.deviceState.lastAthenaPingTime = int(last_ping) msg.deviceState.thermalStatus = thermal_status pm.send("deviceState", msg) if EON and not is_uno: set_offroad_alert_if_changed("Offroad_ChargeDisabled", (not usb_power)) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # report to server once every 10 minutes if (count % int(600. / DT_TRML)) == 0: if EON and started_ts is None and msg.deviceState.memoryUsagePercent > 40: cloudlog.event("High offroad memory usage", mem=msg.deviceState.memoryUsagePercent) location = messaging.recv_sock(location_sock) cloudlog.event("STATUS_PACKET", count=count, pandaState=(strip_deprecated_keys( pandaState.to_dict()) if pandaState else None), location=(strip_deprecated_keys( location.gpsLocationExternal.to_dict()) if location else None), deviceState=strip_deprecated_keys(msg.to_dict())) count += 1
def thermald_thread(): pm = messaging.PubMaster(['deviceState']) pandaState_timeout = int(1000 * 2.5 * DT_TRML) # 2.5x the expected pandaState frequency pandaState_sock = messaging.sub_sock('pandaState', timeout=pandaState_timeout) location_sock = messaging.sub_sock('gpsLocationExternal') managerState_sock = messaging.sub_sock('managerState', conflate=True) fan_speed = 0 count = 0 startup_conditions = { "ignition": False, } startup_conditions_prev = startup_conditions.copy() off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green usb_power = True network_type = NetworkType.none network_strength = NetworkStrength.unknown network_info = None modem_version = None registered_count = 0 current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) cpu_temp_filter = FirstOrderFilter(0., CPU_TEMP_TAU, DT_TRML) pandaState_prev = None should_start_prev = False handle_fan = None is_uno = False ui_running_prev = False power_monitor = PowerMonitoring() no_panda_cnt = 0 HARDWARE.initialize_hardware() thermal_config = HARDWARE.get_thermal_config() if params.get_bool("IsOnroad"): cloudlog.event("onroad flag not cleared") # dp dp_no_batt = params.get_bool("dp_no_batt") dp_temp_monitor = True dp_last_modified_temp_monitor = None dp_auto_shutdown = False dp_last_modified_auto_shutdown = None dp_auto_shutdown_last = False dp_auto_shutdown_in = 90 dp_last_modified_auto_shutdown_in = None dp_auto_shutdown_in_last = 90 dp_fan_mode = 0 dp_fan_mode_last = None modified = None last_modified = None last_modified_check = None #dp prebuilt_file = '/data/openpilot/prebuilt' dp_prebuilt = params.get_bool("dp_prebuilt") if not os.path.isfile(prebuilt_file) and dp_prebuilt: os.system("touch /data/openpilot/prebuilt") elif os.path.isfile(prebuilt_file) and not dp_prebuilt: os.system("rm -fr /data/openpilot/prebuilt") # CPR3 logging if EON: base_path = "/sys/kernel/debug/cpr3-regulator/" cpr_files = [p for p in Path(base_path).glob("**/*") if p.is_file()] cpr_files = ["/sys/kernel/debug/regulator/pm8994_s11/voltage" ] + cpr_files cpr_data = {} for cf in cpr_files: with open(cf, "r") as f: try: cpr_data[str(cf)] = f.read().strip() except Exception: pass cloudlog.event("CPR", data=cpr_data) # dp - light up = not started if JETSON: HARDWARE.led(True) while 1: # dp - load temp monitor conf last_modified_check, modified = get_last_modified( LAST_MODIFIED_THERMALD, last_modified_check, modified) if last_modified != modified: dp_temp_monitor, dp_last_modified_temp_monitor = param_get_if_updated( "dp_temp_monitor", "bool", dp_temp_monitor, dp_last_modified_temp_monitor) dp_auto_shutdown, dp_last_modified_auto_shutdown = param_get_if_updated( "dp_auto_shutdown", "bool", dp_auto_shutdown, dp_last_modified_auto_shutdown) dp_auto_shutdown_in, dp_last_modified_auto_shutdown_in = param_get_if_updated( "dp_auto_shutdown_in", "int", dp_auto_shutdown_in, dp_last_modified_auto_shutdown_in) dp_fan_mode, dp_fan_mode_last = param_get_if_updated( "dp_fan_mode", "int", dp_fan_mode, dp_fan_mode_last) last_modified = modified pandaState = messaging.recv_sock(pandaState_sock, wait=True) msg = read_thermal(thermal_config) if pandaState is not None: usb_power = pandaState.pandaState.usbPowerMode != log.PandaState.UsbPowerMode.client # If we lose connection to the panda, wait 5 seconds before going offroad if pandaState.pandaState.pandaType == log.PandaState.PandaType.unknown: no_panda_cnt += 1 if no_panda_cnt > DISCONNECT_TIMEOUT / DT_TRML: if startup_conditions["ignition"]: cloudlog.error("Lost panda connection while onroad") startup_conditions["ignition"] = False else: no_panda_cnt = 0 startup_conditions[ "ignition"] = pandaState.pandaState.ignitionLine or pandaState.pandaState.ignitionCan startup_conditions[ "hardware_supported"] = pandaState.pandaState.pandaType not in [ log.PandaState.PandaType.whitePanda, log.PandaState.PandaType.greyPanda ] set_offroad_alert_if_changed( "Offroad_HardwareUnsupported", not startup_conditions["hardware_supported"]) # Setup fan handler on first connect to panda if not JETSON and handle_fan is None and pandaState.pandaState.pandaType != log.PandaState.PandaType.unknown: is_uno = pandaState.pandaState.pandaType == log.PandaState.PandaType.uno if (not EON) or is_uno: cloudlog.info("Setting up UNO fan handler") handle_fan = handle_fan_uno else: cloudlog.info("Setting up EON fan handler") setup_eon_fan() handle_fan = handle_fan_eon # Handle disconnect if pandaState_prev is not None: if pandaState.pandaState.pandaType == log.PandaState.PandaType.unknown and \ pandaState_prev.pandaState.pandaType != log.PandaState.PandaType.unknown: params.clear_all(ParamKeyType.CLEAR_ON_PANDA_DISCONNECT) pandaState_prev = pandaState # get_network_type is an expensive call. update every 10s if (count % int(10. / DT_TRML)) == 0: try: network_type = HARDWARE.get_network_type() network_strength = HARDWARE.get_network_strength(network_type) network_info = HARDWARE.get_network_info() # pylint: disable=assignment-from-none # Log modem version once if modem_version is None: modem_version = HARDWARE.get_modem_version() # pylint: disable=assignment-from-none if modem_version is not None: cloudlog.warning(f"Modem version: {modem_version}") if TICI and (network_info.get('state', None) == "REGISTERED"): registered_count += 1 else: registered_count = 0 if registered_count > 10: cloudlog.warning( f"Modem stuck in registered state {network_info}. nmcli conn up lte" ) os.system("nmcli conn up lte") registered_count = 0 except Exception: cloudlog.exception("Error getting network status") msg.deviceState.freeSpacePercent = get_available_percent(default=100.0) msg.deviceState.memoryUsagePercent = int( round(psutil.virtual_memory().percent)) msg.deviceState.cpuUsagePercent = int(round(psutil.cpu_percent())) msg.deviceState.gpuUsagePercent = int( round(HARDWARE.get_gpu_usage_percent())) msg.deviceState.networkType = network_type msg.deviceState.networkStrength = network_strength if network_info is not None: msg.deviceState.networkInfo = network_info msg.deviceState.batteryPercent = HARDWARE.get_battery_capacity() msg.deviceState.batteryStatus = HARDWARE.get_battery_status() msg.deviceState.batteryCurrent = HARDWARE.get_battery_current() msg.deviceState.batteryVoltage = HARDWARE.get_battery_voltage() msg.deviceState.usbOnline = HARDWARE.get_usb_present() # Fake battery levels on uno for frame if (not EON) or is_uno or dp_no_batt: msg.deviceState.batteryPercent = 100 msg.deviceState.batteryStatus = "Charging" msg.deviceState.batteryTempC = 0 current_filter.update(msg.deviceState.batteryCurrent / 1e6) # TODO: add car battery voltage check max_cpu_temp = cpu_temp_filter.update(max(msg.deviceState.cpuTempC)) max_comp_temp = max(max_cpu_temp, msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC)) bat_temp = msg.deviceState.batteryTempC if handle_fan is not None: fan_speed = handle_fan(dp_fan_mode, max_cpu_temp, bat_temp, fan_speed, startup_conditions["ignition"]) msg.deviceState.fanSpeedPercentDesired = fan_speed # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 # We only do this if there is a relay that prevents the car from faulting is_offroad_for_5_min = (started_ts is None) and ( (not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if max_cpu_temp > 107. or bat_temp >= 63. or (is_offroad_for_5_min and max_cpu_temp > 70.0): # onroad not allowed thermal_status = ThermalStatus.danger elif max_comp_temp > 96.0 or bat_temp > 60.: # hysteresis between onroad not allowed and engage not allowed thermal_status = clip(thermal_status, ThermalStatus.red, ThermalStatus.danger) elif max_cpu_temp > 94.0: # hysteresis between engage not allowed and uploader not allowed thermal_status = clip(thermal_status, ThermalStatus.yellow, ThermalStatus.red) elif max_cpu_temp > 80.0: # uploader not allowed thermal_status = ThermalStatus.yellow elif max_cpu_temp > 75.0: # hysteresis between uploader not allowed and all good thermal_status = clip(thermal_status, ThermalStatus.green, ThermalStatus.yellow) else: thermal_status = ThermalStatus.green # default to good condition if not dp_temp_monitor and thermal_status in [ ThermalStatus.red, ThermalStatus.danger ]: thermal_status = ThermalStatus.yellow # **** starting logic **** # Check for last update time and display alerts if needed # now = datetime.datetime.utcnow() # # # show invalid date/time alert # startup_conditions["time_valid"] = (now.year > 2020) or (now.year == 2020 and now.month >= 10) # set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) # # # Show update prompt # try: # last_update = datetime.datetime.fromisoformat(params.get("LastUpdateTime", encoding='utf8')) # except (TypeError, ValueError): # last_update = now # dt = now - last_update # # update_failed_count = params.get("UpdateFailedCount") # update_failed_count = 0 if update_failed_count is None else int(update_failed_count) # last_update_exception = params.get("LastUpdateException", encoding='utf8') # # if update_failed_count > 15 and last_update_exception is not None: # if tested_branch: # extra_text = "Ensure the software is correctly installed" # else: # extra_text = last_update_exception # # set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) # set_offroad_alert_if_changed("Offroad_UpdateFailed", True, extra_text=extra_text) # elif dt.days > DAYS_NO_CONNECTIVITY_MAX and update_failed_count > 1: # set_offroad_alert_if_changed("Offroad_UpdateFailed", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", True) # elif dt.days > DAYS_NO_CONNECTIVITY_PROMPT: # remaining_time = str(max(DAYS_NO_CONNECTIVITY_MAX - dt.days, 0)) # set_offroad_alert_if_changed("Offroad_UpdateFailed", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", True, extra_text=f"{remaining_time} days.") # else: # set_offroad_alert_if_changed("Offroad_UpdateFailed", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) # set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) startup_conditions["up_to_date"] = params.get( "Offroad_ConnectivityNeeded") is None or params.get_bool( "DisableUpdates") startup_conditions["not_uninstalling"] = not params.get_bool( "DoUninstall") startup_conditions["accepted_terms"] = params.get( "HasAcceptedTerms") == terms_version panda_signature = params.get("PandaFirmware") startup_conditions["fw_version_match"] = (panda_signature is None) or ( panda_signature == FW_SIGNATURE ) # don't show alert is no panda is connected (None) set_offroad_alert_if_changed( "Offroad_PandaFirmwareMismatch", (not startup_conditions["fw_version_match"])) # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2 startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \ params.get_bool("Passive") startup_conditions["not_driver_view"] = not params.get_bool( "IsDriverViewEnabled") startup_conditions["not_taking_snapshot"] = not params.get_bool( "IsTakingSnapshot") # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 startup_conditions[ "device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed( "Offroad_TemperatureTooHigh", (not startup_conditions["device_temp_good"])) if TICI: set_offroad_alert_if_changed("Offroad_NvmeMissing", (not Path("/data/media").is_mount())) # Handle offroad/onroad transition should_start = all(startup_conditions.values()) if should_start != should_start_prev or (count == 0): params.put_bool("IsOnroad", should_start) params.put_bool("IsOffroad", not should_start) HARDWARE.set_power_save(not should_start) if should_start: off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True else: if startup_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions) started_ts = None if off_ts is None: off_ts = sec_since_boot() # dp - show led when off and vice versa if should_start != should_start_prev: HARDWARE.led(False if should_start else True) # Offroad power monitoring if not dp_no_batt: power_monitor.calculate(pandaState) msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used( ) msg.deviceState.carBatteryCapacityUwh = max( 0, power_monitor.get_car_battery_capacity()) # Check if we need to disable charging (handled by boardd) msg.deviceState.chargingDisabled = power_monitor.should_disable_charging( pandaState, off_ts) # Check if we need to shut down if power_monitor.should_shutdown(pandaState, off_ts, started_seen, LEON): cloudlog.info(f"shutting device down, offroad since {off_ts}") # TODO: add function for blocking cloudlog instead of sleep time.sleep(10) HARDWARE.shutdown() # dp - auto shutdown # reset off_ts if we change auto shutdown related params if off_ts is not None: if dp_auto_shutdown: shutdown_sec = dp_auto_shutdown_in * 60 sec_now = sec_since_boot() - off_ts if (shutdown_sec - 5) < sec_now: msg.deviceState.chargingDisabled = True if shutdown_sec < sec_now: time.sleep(10) HARDWARE.shutdown() if dp_auto_shutdown_in_last != dp_auto_shutdown_in or dp_auto_shutdown_last != dp_auto_shutdown: off_ts = sec_since_boot() dp_auto_shutdown_last = dp_auto_shutdown dp_auto_shutdown_in_last = dp_auto_shutdown_in # If UI has crashed, set the brightness to reasonable non-zero value manager_state = messaging.recv_one_or_none(managerState_sock) if manager_state is not None: ui_running = "ui" in (p.name for p in manager_state.managerState.processes if p.running) if ui_running_prev and not ui_running: HARDWARE.set_screen_brightness(20) ui_running_prev = ui_running msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90 # if current is positive, then battery is being discharged msg.deviceState.started = started_ts is not None msg.deviceState.startedMonoTime = int(1e9 * (started_ts or 0)) last_ping = params.get("LastAthenaPingTime") if last_ping is not None: msg.deviceState.lastAthenaPingTime = int(last_ping) msg.deviceState.thermalStatus = thermal_status pm.send("deviceState", msg) if EON and not is_uno: set_offroad_alert_if_changed("Offroad_ChargeDisabled", (not usb_power)) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # report to server once every 10 minutes # if (count % int(600. / DT_TRML)) == 0: # if EON and started_ts is None and msg.deviceState.memoryUsagePercent > 40: # cloudlog.event("High offroad memory usage", mem=msg.deviceState.memoryUsagePercent) # # location = messaging.recv_sock(location_sock) # cloudlog.event("STATUS_PACKET", # count=count, # pandaState=(strip_deprecated_keys(pandaState.to_dict()) if pandaState else None), # location=(strip_deprecated_keys(location.gpsLocationExternal.to_dict()) if location else None), # deviceState=strip_deprecated_keys(msg.to_dict())) count += 1