def uploader_fn(exit_event): cloudlog.info("uploader_fn") params = Params() dongle_id = params.get("DongleId").decode('utf8') if dongle_id is None: cloudlog.info("uploader missing dongle_id") raise Exception("uploader can't start without dongle id") uploader = Uploader(dongle_id, ROOT) backoff = 0.1 while True: allow_raw_upload = (params.get("IsUploadRawEnabled") != b"0") on_hotspot = is_on_hotspot() on_wifi = is_on_wifi() should_upload = on_wifi and not on_hotspot if exit_event.is_set(): return d = uploader.next_file_to_upload( with_raw=allow_raw_upload and should_upload) if d is None: time.sleep(5) continue key, fn = d cloudlog.event("uploader_netcheck", is_on_hotspot=on_hotspot, is_on_wifi=on_wifi) cloudlog.info("to upload %r", d) success = uploader.upload(key, fn) if success: backoff = 0.1 else: cloudlog.info("backoff %r", backoff) time.sleep(backoff + random.uniform(0, backoff)) backoff = min(backoff * 2, 120) cloudlog.info("upload done, success=%r", success)
def uploader_fn(exit_event): cloudlog.info("uploader_fn") params = Params() dongle_id = params.get("DongleId").decode('utf8') if dongle_id is None: cloudlog.info("uploader missing dongle_id") raise Exception("uploader can't start without dongle id") uploader = Uploader(dongle_id, ROOT) backoff = 0.1 counter = 0 should_upload = False while not exit_event.is_set(): offroad = params.get("IsOffroad") == b'1' allow_raw_upload = (params.get("IsUploadRawEnabled") != b"0") and offroad check_network = (counter % 12 == 0 if offroad else True) if check_network: on_hotspot = is_on_hotspot() on_wifi = is_on_wifi() should_upload = on_wifi and not on_hotspot d = uploader.next_file_to_upload(with_raw=allow_raw_upload and should_upload) counter += 1 if d is None: # Nothing to upload time.sleep(60 if offroad else 5) continue key, fn = d cloudlog.event("uploader_netcheck", is_on_hotspot=on_hotspot, is_on_wifi=on_wifi) cloudlog.info("to upload %r", d) success = uploader.upload(key, fn) if success: backoff = 0.1 else: cloudlog.info("backoff %r", backoff) time.sleep(backoff + random.uniform(0, backoff)) backoff = min(backoff*2, 120) cloudlog.info("upload done, success=%r", success)
def get_dirty() -> bool: origin = get_origin() branch = get_branch() if (origin is None) or (branch is None): return True dirty = False try: # Actually check dirty files if not get_prebuilt(): # This is needed otherwise touched files might show up as modified try: subprocess.check_call(["git", "update-index", "--refresh"]) except subprocess.CalledProcessError: pass dirty = (subprocess.call( ["git", "diff-index", "--quiet", branch, "--"]) != 0) # Log dirty files if dirty and get_comma_remote(): try: dirty_files = run_cmd(["git", "diff-index", branch, "--"]) cloudlog.event("dirty comma branch", version=get_version(), dirty=dirty, origin=origin, branch=branch, dirty_files=dirty_files, commit=get_commit(), origin_commit=get_commit(branch)) except subprocess.CalledProcessError: pass dirty = dirty or (not get_comma_remote()) dirty = dirty or ('master' in branch) except subprocess.CalledProcessError: cloudlog.exception("git subprocess failed while checking dirty") dirty = True return dirty
def list_upload_files(self): if not os.path.isdir(self.root): return self.raw_size = 0 self.raw_count = 0 self.immediate_size = 0 self.immediate_count = 0 for logname in listdir_by_creation(self.root): path = os.path.join(self.root, logname) try: names = os.listdir(path) except OSError: continue if any(name.endswith(".lock") for name in names): continue for name in sorted(names, key=self.get_upload_sort): key = os.path.join(logname, name) fn = os.path.join(path, name) # skip files already uploaded try: is_uploaded = getxattr(fn, UPLOAD_ATTR_NAME) except OSError: cloudlog.event("uploader_getxattr_failed", exc=self.last_exc, key=key, fn=fn) is_uploaded = True # deleter could have deleted if is_uploaded: continue try: if name in self.immediate_priority: self.immediate_count += 1 self.immediate_size += os.path.getsize(fn) else: self.raw_count += 1 self.raw_size += os.path.getsize(fn) except OSError: pass yield (name, key, fn)
def main(): params = Params() dongle_id = params.get("DongleId").decode('utf-8') cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty) crash.bind_user(id=dongle_id) crash.bind_extra(version=version, dirty=dirty) crash.install() try: while 1: cloudlog.info("starting athena daemon") proc = Process(name='athenad', target=launcher, args=('selfdrive.athena.athenad',)) proc.start() proc.join() cloudlog.event("athenad exited", exitcode=proc.exitcode) time.sleep(5) except Exception: cloudlog.exception("manage_athenad.exception") finally: params.delete(ATHENA_MGR_PID_PARAM)
def add(self, alert_type, enabled = True): alert_type = str(alert_type) this_alert = self.alerts[alert_type] # downgrade the alert if we aren't enabled if not enabled and this_alert.alert_type > ET.NO_ENTRY: this_alert = alert("Comma Unavailable" if this_alert.alert_text_1 != "" else "", this_alert.alert_text_2, ET.NO_ENTRY, None, "chimeDouble", .4, 0., 3.) # ignore no entries if we are enabled if enabled and this_alert.alert_type < ET.WARNING: return # if new alert is different, log it if self.current_alert is None or self.current_alert.alert_text_2 != this_alert.alert_text_2: cloudlog.event('alert_add', alert_type=alert_type, enabled=enabled) self.activealerts.append(this_alert) self.activealerts.sort()
def main(): set_core_affinity(1) set_realtime_priority(1) procs = {} crash_count = 0 modem_killed = False modem_state = "ONLINE" while True: # check critical android services if any(p is None or not p.is_running() for p in procs.values()) or not len(procs): cur = {p: None for p in WATCHED_PROCS} for p in psutil.process_iter(attrs=['cmdline']): cmdline = None if not len( p.info['cmdline']) else p.info['cmdline'][0] if cmdline in WATCHED_PROCS: cur[cmdline] = p if len(procs): for p in WATCHED_PROCS: if cur[p] != procs[p]: cloudlog.event("android service pid changed", proc=p, cur=cur[p], prev=procs[p], error=True) procs.update(cur) if os.path.exists(MODEM_PATH): # check modem state state = get_modem_state() if state != modem_state and not modem_killed: cloudlog.event("modem state changed", state=state) modem_state = state # check modem crashes cnt = get_modem_crash_count() if cnt is not None: if cnt > crash_count: cloudlog.event("modem crash", count=cnt) crash_count = cnt # handle excessive modem crashes if crash_count > MAX_MODEM_CRASHES and not modem_killed: cloudlog.event("killing modem", error=True) with open("/sys/kernel/debug/msm_subsys/modem", "w") as f: f.write("put") modem_killed = True time.sleep(1)
def main(): try: set_core_affinity([0, 1, 2, 3]) except Exception: cloudlog.exception("failed to set core affinity") params = Params() dongle_id = params.get("DongleId", encoding='utf-8') UploadQueueCache.initialize(upload_queue) ws_uri = ATHENA_HOST + "/ws/v2/" + dongle_id api = Api(dongle_id) conn_retries = 0 while 1: try: cloudlog.event("athenad.main.connecting_ws", ws_uri=ws_uri) ws = create_connection(ws_uri, cookie="jwt=" + api.get_token(), enable_multithread=True, timeout=30.0) cloudlog.event("athenad.main.connected_ws", ws_uri=ws_uri) conn_retries = 0 cur_upload_items.clear() handle_long_poll(ws) except (KeyboardInterrupt, SystemExit): break except (ConnectionError, TimeoutError, WebSocketException): conn_retries += 1 params.delete("LastAthenaPingTime") except socket.timeout: params.delete("LastAthenaPingTime") except Exception: cloudlog.exception("athenad.main.exception") conn_retries += 1 params.delete("LastAthenaPingTime") time.sleep(backoff(conn_retries))
def plannerd_thread(sm=None, pm=None): config_realtime_process(5 if TICI else 1 if JETSON else 2, Priority.CTRL_LOW) cloudlog.info("plannerd is waiting for CarParams") params = Params() CP = car.CarParams.from_bytes(params.get("CarParams", block=True)) cloudlog.info("plannerd got CarParams: %s", CP.carName) use_lanelines = not params.get_bool('EndToEndToggle') wide_camera = params.get_bool('EnableWideCamera') if TICI else False cloudlog.event("e2e mode", on=use_lanelines) longitudinal_planner = Planner(CP) lateral_planner = LateralPlanner(CP, use_lanelines=use_lanelines, wide_camera=wide_camera) if sm is None: sm = messaging.SubMaster([ 'carState', 'controlsState', 'radarState', 'modelV2', 'dragonConf' ], poll=['radarState', 'modelV2'], ignore_avg_freq=['radarState']) if pm is None: pm = messaging.PubMaster([ 'longitudinalPlan', 'liveLongitudinalMpc', 'lateralPlan', 'liveMpc' ]) while True: sm.update() if sm.updated['modelV2']: lateral_planner.update(sm, CP) lateral_planner.publish(sm, pm) if sm.updated['radarState']: longitudinal_planner.update(sm, CP) longitudinal_planner.publish(sm, pm)
def uploader_fn(exit_event): cloudlog.info("uploader_fn") params = Params() dongle_id, access_token = params.get("DongleId"), params.get("AccessToken") if dongle_id is None or access_token is None: cloudlog.info("uploader MISSING DONGLE_ID or ACCESS_TOKEN") raise Exception("uploader can't start without dongle id and access token") uploader = Uploader(dongle_id, access_token, ROOT) backoff = 0.1 while True: allow_raw_upload = (params.get("IsUploadRawEnabled") != "0") allow_cellular = (params.get("IsUploadVideoOverCellularEnabled") != "0") on_hotspot = is_on_hotspot() on_wifi = is_on_wifi() should_upload = allow_cellular or (on_wifi and not on_hotspot) if exit_event.is_set(): return d = uploader.next_file_to_upload(with_raw=allow_raw_upload and should_upload) if d is None: time.sleep(5) continue key, fn, _ = d cloudlog.event("uploader_netcheck", allow_cellular=allow_cellular, is_on_hotspot=on_hotspot, is_on_wifi=on_wifi) cloudlog.info("to upload %r", d) success = uploader.upload(key, fn) if success: backoff = 0.1 else: cloudlog.info("backoff %r", backoff) time.sleep(backoff + random.uniform(0, backoff)) backoff = min(backoff*2, 120) cloudlog.info("upload done, success=%r", success)
def upload(self, key, fn): try: sz = os.path.getsize(fn) except OSError: cloudlog.exception("upload: getsize failed") return False cloudlog.event("upload", key=key, fn=fn, sz=sz) cloudlog.info("checking %r with size %r", key, sz) if sz == 0: # can't upload files of 0 size os.unlink(fn) # delete the file success = True else: cloudlog.info("uploading %r", fn) # stat = self.killable_upload(key, fn) stat = self.normal_upload(key, fn) if stat is not None and stat.status_code in (200, 201): cloudlog.event("upload_success", key=key, fn=fn, sz=sz) os.unlink(fn) # delete the file success = True else: cloudlog.event("upload_failed", stat=stat, exc=self.last_exc, key=key, fn=fn, sz=sz) success = False self.clean_dirs() return success
def main(): params = Params() dongle_id = params.get("DongleId", encoding='utf-8') crash.init() crash.bind_user(id=dongle_id) crash.bind_extra(dirty=dirty, origin=origin, branch=branch, commit=commit, device=HARDWARE.get_device_type()) ws_uri = ATHENA_HOST + "/ws/v2/" + dongle_id api = Api(dongle_id) conn_retries = 0 while 1: try: cloudlog.event("athenad.main.connecting_ws", ws_uri=ws_uri) ws = create_connection(ws_uri, cookie="jwt=" + api.get_token(), enable_multithread=True, timeout=1.0) cloudlog.event("athenad.main.connected_ws", ws_uri=ws_uri) ws.settimeout(1) conn_retries = 0 handle_long_poll(ws) except (KeyboardInterrupt, SystemExit): break except (ConnectionError, TimeoutError, WebSocketException): conn_retries += 1 params.delete("LastAthenaPingTime") except Exception: crash.capture_exception() cloudlog.exception("athenad.main.exception") conn_retries += 1 params.delete("LastAthenaPingTime") time.sleep(backoff(conn_retries))
def uploader_fn(exit_event): cloudlog.info("uploader_fn") params = Params() dongle_id = params.get("DongleId").decode('utf8') if dongle_id is None: cloudlog.info("uploader missing dongle_id") raise Exception("uploader can't start without dongle id") sm = messaging.SubMaster(['deviceState']) uploader = Uploader(dongle_id, ROOT) backoff = 0.1 while not exit_event.is_set(): sm.update(0) on_wifi = force_wifi or sm[ 'deviceState'].networkType == NetworkType.wifi offroad = params.get("IsOffroad") == b'1' allow_raw_upload = params.get("IsUploadRawEnabled") != b"0" d = uploader.next_file_to_upload( with_raw=allow_raw_upload and on_wifi and offroad) if d is None: # Nothing to upload if allow_sleep: time.sleep(60 if offroad else 5) continue key, fn = d cloudlog.event("uploader_netcheck", is_on_wifi=on_wifi) cloudlog.info("to upload %r", d) success = uploader.upload(key, fn) if success: backoff = 0.1 elif allow_sleep: cloudlog.info("backoff %r", backoff) time.sleep(backoff + random.uniform(0, backoff)) backoff = min(backoff * 2, 120) cloudlog.info("upload done, success=%r", success)
def upload(self, key, fn): try: sz = os.path.getsize(fn) except OSError: cloudlog.exception("upload: getsize failed") return False cloudlog.event("upload", key=key, fn=fn, sz=sz) cloudlog.debug("checking %r with size %r", key, sz) if sz == 0: try: # tag files of 0 size as uploaded setxattr(fn, UPLOAD_ATTR_NAME, UPLOAD_ATTR_VALUE) except OSError: cloudlog.event("uploader_setxattr_failed", exc=self.last_exc, key=key, fn=fn, sz=sz) success = True else: start_time = time.monotonic() cloudlog.debug("uploading %r", fn) stat = self.normal_upload(key, fn) if stat is not None and stat.status_code in (200, 201, 403, 412): cloudlog.event("upload_success" if stat.status_code != 412 else "upload_ignored", key=key, fn=fn, sz=sz, debug=True) try: # tag file as uploaded setxattr(fn, UPLOAD_ATTR_NAME, UPLOAD_ATTR_VALUE) except OSError: cloudlog.event("uploader_setxattr_failed", exc=self.last_exc, key=key, fn=fn, sz=sz) self.last_filename = fn self.last_time = time.monotonic() - start_time self.last_speed = (sz / 1e6) / self.last_time success = True else: cloudlog.event("upload_failed", stat=stat, exc=self.last_exc, key=key, fn=fn, sz=sz, debug=True) success = False return success
def main(gctx=None): while True: # try network r = subprocess.call(["ping", "-W", "4", "-c", "1", "8.8.8.8"]) if r: time.sleep(60) continue # download application update try: r = subprocess.check_output(NICE_LOW_PRIORITY + ["git", "fetch"], stderr=subprocess.STDOUT) except subprocess.CalledProcessError, e: cloudlog.event("git fetch failed", cmd=e.cmd, output=e.output, returncode=e.returncode) time.sleep(60) continue cloudlog.info("git fetch success: %s", r) time.sleep(60 * 60)
def add(self, frame, alert_type, enabled=True, extra_text_1='', extra_text_2=''): alert_type = str(alert_type) added_alert = copy.copy(self.alerts[alert_type]) added_alert.alert_text_1 += extra_text_1 added_alert.alert_text_2 += extra_text_2 added_alert.start_time = frame * DT_CTRL # if new alert is higher priority, log it if not self.alertPresent( ) or added_alert.alert_priority > self.activealerts[0].alert_priority: cloudlog.event('alert_add', alert_type=alert_type, enabled=enabled) self.activealerts.append(added_alert) # sort by priority first and then by start_time self.activealerts.sort(key=lambda k: (k.alert_priority, k.start_time), reverse=True)
def add(self, alert_type, enabled=True, extra_text=''): alert_type = str(alert_type) this_alert = copy.copy(self.alerts[alert_type]) this_alert.alert_text_2 += extra_text # downgrade the alert if we aren't enabled, except if it's FCW, which remains the same # TODO: remove this 'if' by adding more alerts if not enabled and this_alert.alert_type in [ET.WARNING, ET.SOFT_DISABLE, ET.IMMEDIATE_DISABLE] \ and this_alert != self.alerts['fcw']: this_alert = alert( "Comma Unavailable" if this_alert.alert_text_1 != "" else "", this_alert.alert_text_2, ET.NO_ENTRY, None, "chimeDouble", .4, 0., 3.) # ignore no entries if we are enabled if enabled and this_alert.alert_type in [ET.ENABLE, ET.NO_ENTRY]: return # if new alert is higher priority, log it if self.current_alert is None or this_alert > self.current_alert: cloudlog.event('alert_add', alert_type=alert_type, enabled=enabled) self.activealerts.append(this_alert) self.activealerts.sort()
def startLocalProxy(global_end_event, remote_ws_uri, local_port): try: cloudlog.event("athena startLocalProxy", remote_ws_uri=remote_ws_uri, local_port=local_port) if local_port not in LOCAL_PORT_WHITELIST: raise Exception("Requested local port not whitelisted") params = Params() dongle_id = params.get("DongleId") identity_token = Api(dongle_id).get_token() ws = create_connection(remote_ws_uri, cookie="jwt=" + identity_token, enable_multithread=True) ssock, csock = socket.socketpair() local_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) local_sock.connect(('127.0.0.1', local_port)) local_sock.setblocking(0) proxy_end_event = threading.Event() threads = [ threading.Thread(target=ws_proxy_recv, args=(ws, local_sock, ssock, proxy_end_event, global_end_event)), threading.Thread(target=ws_proxy_send, args=(ws, local_sock, csock, proxy_end_event)) ] map(lambda thread: thread.start(), threads) return {"success": 1} except Exception as e: traceback.print_exc() raise e
def main(): params = Params() dongle_id = params.get("DongleId").decode('utf-8') ws_uri = ATHENA_HOST + "/ws/v2/" + dongle_id api = Api(dongle_id) conn_retries = 0 while 1: try: ws = create_connection(ws_uri, cookie="jwt=" + api.get_token(), enable_multithread=True) cloudlog.event("athenad.main.connected_ws", ws_uri=ws_uri) ws.settimeout(1) conn_retries = 0 handle_long_poll(ws) except (KeyboardInterrupt, SystemExit): break except Exception: cloudlog.exception("athenad.main.exception") conn_retries += 1 time.sleep(backoff(conn_retries))
def upload(self, key, fn): # write out the bz2 compress if fn.endswith("log"): ext = ".bz2" cloudlog.info("compressing %r to %r", fn, fn+ext) if os.system("nice -n 19 bzip2 -c %s > %s.tmp && mv %s.tmp %s%s && rm %s" % (fn, fn, fn, fn, ext, fn)) != 0: cloudlog.exception("upload: bzip2 compression failed") return False # assuming file is named properly key += ext fn += ext try: sz = os.path.getsize(fn) except OSError: cloudlog.exception("upload: getsize failed") return False cloudlog.event("upload", key=key, fn=fn, sz=sz) cloudlog.info("checking %r with size %r", key, sz) #with open("/sys/devices/virtual/switch/tri-state-key/state") as f: # tristate = int(f.read()) tristate=0 params = Params() if params.get("RecordFront") == "0": tristate=3 if sz == 0: # can't upload files of 0 size os.unlink(fn) # delete the file success = True else: if tristate == 3: os.unlink(fn) # delete the file success = True else: cloudlog.info("uploading %r", fn) # stat = self.killable_upload(key, fn) stat = self.normal_upload(key, fn) if stat is not None and stat.status_code in (200, 201): cloudlog.event("upload_success", key=key, fn=fn, sz=sz) os.unlink(fn) # delete the file success = True else: cloudlog.event("upload_failed", stat=stat, exc=self.last_exc, key=key, fn=fn, sz=sz) success = False self.clean_dirs() return success
def upload(self, key, fn): # write out the bz2 compress if fn.endswith("log"): ext = ".bz2" cloudlog.info("compressing %r to %r", fn, fn + ext) if os.system( "nice -n 19 bzip2 -c %s > %s.tmp && mv %s.tmp %s%s && rm %s" % (fn, fn, fn, fn, ext, fn)) != 0: cloudlog.exception("upload: bzip2 compression failed") return False # assuming file is named properly key += ext fn += ext try: sz = os.path.getsize(fn) except OSError: cloudlog.exception("upload: getsize failed") return False cloudlog.event("upload", key=key, fn=fn, sz=sz) cloudlog.info("checking %r with size %r", key, sz) if sz == 0: # can't upload files of 0 size os.unlink(fn) # delete the file success = True else: cloudlog.info("uploading %r", fn) # stat = self.killable_upload(key, fn) stat = self.normal_upload(key, fn) if stat is not None and stat.status_code == 200: cloudlog.event("upload_success", key=key, fn=fn, sz=sz) os.unlink(fn) # delete the file success = True else: cloudlog.event("upload_failed", stat=stat, exc=self.last_exc, key=key, fn=fn, sz=sz) success = False self.clean_dirs() return success
dirty = dirty or (subprocess.call( ["git", "diff-index", "--quiet", branch, "--"]) != 0) if dirty: dirty_files = subprocess.check_output( ["git", "diff-index", branch, "--"], encoding='utf8') commit = subprocess.check_output( ["git", "rev-parse", "--verify", "HEAD"], encoding='utf8').rstrip() origin_commit = subprocess.check_output( ["git", "rev-parse", "--verify", branch], encoding='utf8').rstrip() cloudlog.event("dirty comma branch", version=version, dirty=dirty, origin=origin, branch=branch, dirty_files=dirty_files, commit=commit, origin_commit=origin_commit) except subprocess.CalledProcessError: dirty = True cloudlog.exception("git subprocess failed while checking dirty") if __name__ == "__main__": print("Dirty: %s" % dirty) print("Version: %s" % version) print("Remote: %s" % origin) print("Branch %s" % branch)
def thermald_thread(end_event, hw_queue): pm = messaging.PubMaster(['deviceState']) sm = messaging.SubMaster([ "peripheralState", "gpsLocationExternal", "controlsState", "pandaStates" ], poll=["pandaStates"]) count = 0 onroad_conditions: Dict[str, bool] = { "ignition": False, } startup_conditions: Dict[str, bool] = {} startup_conditions_prev: Dict[str, bool] = {} off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green last_hw_state = HardwareState( network_type=NetworkType.none, network_metered=False, network_strength=NetworkStrength.unknown, network_info=None, nvme_temps=[], modem_temps=[], wifi_address='N/A', ) current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML) should_start_prev = False in_car = False engaged_prev = False params = Params() power_monitor = PowerMonitoring() HARDWARE.initialize_hardware() thermal_config = HARDWARE.get_thermal_config() fan_controller = None restart_triggered_ts = 0. panda_state_ts = 0. while not end_event.is_set(): sm.update(PANDA_STATES_TIMEOUT) pandaStates = sm['pandaStates'] peripheralState = sm['peripheralState'] msg = read_thermal(thermal_config) # neokii if sec_since_boot() - restart_triggered_ts < 5.: onroad_conditions["not_restart_triggered"] = False else: onroad_conditions["not_restart_triggered"] = True if params.get_bool("SoftRestartTriggered"): params.put_bool("SoftRestartTriggered", False) restart_triggered_ts = sec_since_boot() if sm.updated['pandaStates'] and len(pandaStates) > 0: # Set ignition based on any panda connected onroad_conditions["ignition"] = any( ps.ignitionLine or ps.ignitionCan for ps in pandaStates if ps.pandaType != log.PandaState.PandaType.unknown) pandaState = pandaStates[0] if pandaState.pandaType != log.PandaState.PandaType.unknown: panda_state_ts = sec_since_boot() in_car = pandaState.harnessStatus != log.PandaState.HarnessStatus.notConnected # Setup fan handler on first connect to panda if fan_controller is None and peripheralState.pandaType != log.PandaState.PandaType.unknown: if TICI: fan_controller = TiciFanController() try: last_hw_state = hw_queue.get_nowait() except queue.Empty: pass msg.deviceState.freeSpacePercent = get_available_percent(default=100.0) msg.deviceState.memoryUsagePercent = int( round(psutil.virtual_memory().percent)) msg.deviceState.cpuUsagePercent = [ int(round(n)) for n in psutil.cpu_percent(percpu=True) ] msg.deviceState.gpuUsagePercent = int( round(HARDWARE.get_gpu_usage_percent())) msg.deviceState.networkType = last_hw_state.network_type msg.deviceState.networkMetered = last_hw_state.network_metered msg.deviceState.networkStrength = last_hw_state.network_strength if last_hw_state.network_info is not None: msg.deviceState.networkInfo = last_hw_state.network_info msg.deviceState.nvmeTempC = last_hw_state.nvme_temps msg.deviceState.modemTempC = last_hw_state.modem_temps msg.deviceState.wifiIpAddress = last_hw_state.wifi_address msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness( ) msg.deviceState.batteryPercent = HARDWARE.get_battery_capacity() msg.deviceState.batteryCurrent = HARDWARE.get_battery_current() msg.deviceState.usbOnline = HARDWARE.get_usb_present() current_filter.update(msg.deviceState.batteryCurrent / 1e6) max_comp_temp = temp_filter.update( max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC))) if fan_controller is not None: msg.deviceState.fanSpeedPercentDesired = fan_controller.update( max_comp_temp, onroad_conditions["ignition"]) is_offroad_for_5_min = (started_ts is None) and ( (not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP: # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 thermal_status = ThermalStatus.danger else: current_band = THERMAL_BANDS[thermal_status] band_idx = list(THERMAL_BANDS.keys()).index(thermal_status) if current_band.min_temp is not None and max_comp_temp < current_band.min_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1] elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1] # **** starting logic **** # Ensure date/time are valid now = datetime.datetime.utcnow() startup_conditions[ "time_valid"] = True #(now.year > 2020) or (now.year == 2020 and now.month >= 10) set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) startup_conditions[ "up_to_date"] = True #params.get("Offroad_ConnectivityNeeded") is None or params.get_bool("DisableUpdates") or params.get_bool("SnoozeUpdate") startup_conditions["not_uninstalling"] = not params.get_bool( "DoUninstall") startup_conditions["accepted_terms"] = params.get( "HasAcceptedTerms") == terms_version # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2 startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \ params.get_bool("Passive") startup_conditions["not_driver_view"] = not params.get_bool( "IsDriverViewEnabled") startup_conditions["not_taking_snapshot"] = not params.get_bool( "IsTakingSnapshot") # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 onroad_conditions[ "device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed( "Offroad_TemperatureTooHigh", (not onroad_conditions["device_temp_good"])) # TODO: this should move to TICI.initialize_hardware, but we currently can't import params there if TICI: if not os.path.isfile("/persist/comma/living-in-the-moment"): if not Path("/data/media").is_mount(): set_offroad_alert_if_changed("Offroad_StorageMissing", True) else: # check for bad NVMe try: with open("/sys/block/nvme0n1/device/model") as f: model = f.read().strip() if not model.startswith( "Samsung SSD 980") and params.get( "Offroad_BadNvme") is None: set_offroad_alert_if_changed( "Offroad_BadNvme", True) cloudlog.event("Unsupported NVMe", model=model, error=True) except Exception: pass # Handle offroad/onroad transition should_start = all(onroad_conditions.values()) if started_ts is None: should_start = should_start and all(startup_conditions.values()) if should_start != should_start_prev or (count == 0): params.put_bool("IsOnroad", should_start) params.put_bool("IsOffroad", not should_start) params.put_bool("IsEngaged", False) engaged_prev = False HARDWARE.set_power_save(not should_start) if sm.updated['controlsState']: engaged = sm['controlsState'].enabled if engaged != engaged_prev: params.put_bool("IsEngaged", engaged) engaged_prev = engaged try: with open('/dev/kmsg', 'w') as kmsg: kmsg.write(f"<3>[thermald] engaged: {engaged}\n") except Exception: pass if should_start: off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True else: if onroad_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions, onroad_conditions=onroad_conditions) started_ts = None if off_ts is None: off_ts = sec_since_boot() # Offroad power monitoring power_monitor.calculate(peripheralState, onroad_conditions["ignition"]) msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used() msg.deviceState.carBatteryCapacityUwh = max( 0, power_monitor.get_car_battery_capacity()) current_power_draw = HARDWARE.get_current_power_draw() # pylint: disable=assignment-from-none if current_power_draw is not None: statlog.sample("power_draw", current_power_draw) msg.deviceState.powerDrawW = current_power_draw else: msg.deviceState.powerDrawW = 0 # Check if we need to disable charging (handled by boardd) msg.deviceState.chargingDisabled = power_monitor.should_disable_charging( onroad_conditions["ignition"], in_car, off_ts) # Check if we need to shut down if power_monitor.should_shutdown(peripheralState, onroad_conditions["ignition"], in_car, off_ts, started_seen): cloudlog.warning(f"shutting device down, offroad since {off_ts}") params.put_bool("DoShutdown", True) msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90 # if current is positive, then battery is being discharged msg.deviceState.started = started_ts is not None msg.deviceState.startedMonoTime = int(1e9 * (started_ts or 0)) last_ping = params.get("LastAthenaPingTime") if last_ping is not None: msg.deviceState.lastAthenaPingTime = int(last_ping) msg.deviceState.thermalStatus = thermal_status pm.send("deviceState", msg) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # Log to statsd statlog.gauge("free_space_percent", msg.deviceState.freeSpacePercent) statlog.gauge("gpu_usage_percent", msg.deviceState.gpuUsagePercent) statlog.gauge("memory_usage_percent", msg.deviceState.memoryUsagePercent) for i, usage in enumerate(msg.deviceState.cpuUsagePercent): statlog.gauge(f"cpu{i}_usage_percent", usage) for i, temp in enumerate(msg.deviceState.cpuTempC): statlog.gauge(f"cpu{i}_temperature", temp) for i, temp in enumerate(msg.deviceState.gpuTempC): statlog.gauge(f"gpu{i}_temperature", temp) statlog.gauge("memory_temperature", msg.deviceState.memoryTempC) statlog.gauge("ambient_temperature", msg.deviceState.ambientTempC) for i, temp in enumerate(msg.deviceState.pmicTempC): statlog.gauge(f"pmic{i}_temperature", temp) for i, temp in enumerate(last_hw_state.nvme_temps): statlog.gauge(f"nvme_temperature{i}", temp) for i, temp in enumerate(last_hw_state.modem_temps): statlog.gauge(f"modem_temperature{i}", temp) statlog.gauge("fan_speed_percent_desired", msg.deviceState.fanSpeedPercentDesired) statlog.gauge("screen_brightness_percent", msg.deviceState.screenBrightnessPercent) # report to server once every 10 minutes if (count % int(600. / DT_TRML)) == 0: cloudlog.event( "STATUS_PACKET", count=count, pandaStates=[ strip_deprecated_keys(p.to_dict()) for p in pandaStates ], peripheralState=strip_deprecated_keys( peripheralState.to_dict()), location=(strip_deprecated_keys( sm["gpsLocationExternal"].to_dict()) if sm.alive["gpsLocationExternal"] else None), deviceState=strip_deprecated_keys(msg.to_dict())) count += 1
def hw_state_thread(end_event, hw_queue): """Handles non critical hardware state, and sends over queue""" count = 0 registered_count = 0 prev_hw_state = None modem_version = None modem_nv = None modem_configured = False while not end_event.is_set(): # these are expensive calls. update every 10s if (count % int(10. / DT_TRML)) == 0: try: network_type = HARDWARE.get_network_type() modem_temps = HARDWARE.get_modem_temperatures() if len(modem_temps) == 0 and prev_hw_state is not None: modem_temps = prev_hw_state.modem_temps # Log modem version once if TICI and ((modem_version is None) or (modem_nv is None)): modem_version = HARDWARE.get_modem_version() # pylint: disable=assignment-from-none modem_nv = HARDWARE.get_modem_nv() # pylint: disable=assignment-from-none if (modem_version is not None) and (modem_nv is not None): cloudlog.event("modem version", version=modem_version, nv=modem_nv) hw_state = HardwareState( network_type=network_type, network_metered=HARDWARE.get_network_metered(network_type), network_strength=HARDWARE.get_network_strength( network_type), network_info=HARDWARE.get_network_info(), nvme_temps=HARDWARE.get_nvme_temperatures(), modem_temps=modem_temps, wifi_address=HARDWARE.get_ip_address(), ) try: hw_queue.put_nowait(hw_state) except queue.Full: pass if TICI and (hw_state.network_info is not None) and (hw_state.network_info.get( 'state', None) == "REGISTERED"): registered_count += 1 else: registered_count = 0 if registered_count > 10: cloudlog.warning( f"Modem stuck in registered state {hw_state.network_info}. nmcli conn up lte" ) os.system("nmcli conn up lte") registered_count = 0 # TODO: remove this once the config is in AGNOS if not modem_configured and len(HARDWARE.get_sim_info().get( 'sim_id', '')) > 0: cloudlog.warning("configuring modem") HARDWARE.configure_modem() modem_configured = True prev_hw_state = hw_state except Exception: cloudlog.exception("Error getting hardware state") count += 1 time.sleep(DT_TRML)
def uploader_fn(exit_event): cloudlog.info("uploader_fn") params = Params() dongle_id = params.get("DongleId") if dongle_id is None: return # cloudlog.info("uploader missing dongle_id") # raise Exception("uploader can't start without dongle id") else: dongle_id = dongle_id.decode('utf8') uploader = Uploader(dongle_id, ROOT) # dp dp_upload_on_mobile = False dp_last_modified_upload_on_mobile = None dp_upload_on_hotspot = False dp_last_modified_upload_on_hotspot = None modified = None last_modified = None last_modified_check = None backoff = 0.1 counter = 0 should_upload = False while not exit_event.is_set(): offroad = params.get("IsOffroad") == b'1' allow_raw_upload = (params.get("IsUploadRawEnabled") != b"0") and offroad check_network = (counter % 12 == 0 if offroad else True) if check_network: on_hotspot = is_on_hotspot() on_wifi = is_on_wifi() # dp - load temp monitor conf last_modified_check, modified = get_last_modified(LAST_MODIFIED_UPLOADER, last_modified_check, modified) if last_modified != modified: dp_upload_on_mobile, dp_last_modified_upload_on_mobile = param_get_if_updated("dp_upload_on_mobile", "bool", dp_upload_on_mobile, dp_last_modified_upload_on_mobile) dp_upload_on_hotspot, dp_last_modified_upload_on_hotspot = param_get_if_updated("dp_upload_on_hotspot", "bool", dp_upload_on_hotspot, dp_last_modified_upload_on_hotspot) last_modified = modified should_upload = on_wifi and not on_hotspot d = uploader.next_file_to_upload(with_raw=allow_raw_upload and should_upload) counter += 1 if d is None: # Nothing to upload time.sleep(60 if offroad else 5) continue key, fn = d cloudlog.event("uploader_netcheck", is_on_hotspot=on_hotspot, is_on_wifi=on_wifi) cloudlog.info("to upload %r", d) success = uploader.upload(key, fn) if success: backoff = 0.1 else: cloudlog.info("backoff %r", backoff) time.sleep(backoff + random.uniform(0, backoff)) backoff = min(backoff*2, 120) cloudlog.info("upload done, success=%r", success)
def thermald_thread() -> NoReturn: pm = messaging.PubMaster(['deviceState']) pandaState_timeout = int(1000 * 2.5 * DT_TRML) # 2.5x the expected pandaState frequency pandaState_sock = messaging.sub_sock('pandaStates', timeout=pandaState_timeout) sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "managerState"]) fan_speed = 0 count = 0 onroad_conditions: Dict[str, bool] = { "ignition": False, } startup_conditions: Dict[str, bool] = {} startup_conditions_prev: Dict[str, bool] = {} off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green usb_power = True network_type = NetworkType.none network_strength = NetworkStrength.unknown network_info = None modem_version = None registered_count = 0 nvme_temps = None modem_temps = None current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML) pandaState_prev = None should_start_prev = False in_car = False handle_fan = None is_uno = False ui_running_prev = False params = Params() power_monitor = PowerMonitoring() no_panda_cnt = 0 HARDWARE.initialize_hardware() thermal_config = HARDWARE.get_thermal_config() # TODO: use PI controller for UNO controller = PIController(k_p=0, k_i=2e-3, neg_limit=-80, pos_limit=0, rate=(1 / DT_TRML)) # Leave flag for loggerd to indicate device was left onroad if params.get_bool("IsOnroad"): params.put_bool("BootedOnroad", True) while True: pandaStates = messaging.recv_sock(pandaState_sock, wait=True) sm.update(0) peripheralState = sm['peripheralState'] msg = read_thermal(thermal_config) if pandaStates is not None and len(pandaStates.pandaStates) > 0: pandaState = pandaStates.pandaStates[0] # If we lose connection to the panda, wait 5 seconds before going offroad if pandaState.pandaType == log.PandaState.PandaType.unknown: no_panda_cnt += 1 if no_panda_cnt > DISCONNECT_TIMEOUT / DT_TRML: if onroad_conditions["ignition"]: cloudlog.error("Lost panda connection while onroad") onroad_conditions["ignition"] = False else: no_panda_cnt = 0 onroad_conditions["ignition"] = pandaState.ignitionLine or pandaState.ignitionCan in_car = pandaState.harnessStatus != log.PandaState.HarnessStatus.notConnected usb_power = peripheralState.usbPowerMode != log.PeripheralState.UsbPowerMode.client # Setup fan handler on first connect to panda if handle_fan is None and peripheralState.pandaType != log.PandaState.PandaType.unknown: is_uno = peripheralState.pandaType == log.PandaState.PandaType.uno if TICI: cloudlog.info("Setting up TICI fan handler") handle_fan = handle_fan_tici elif is_uno or PC: cloudlog.info("Setting up UNO fan handler") handle_fan = handle_fan_uno else: cloudlog.info("Setting up EON fan handler") setup_eon_fan() handle_fan = handle_fan_eon # Handle disconnect if pandaState_prev is not None: if pandaState.pandaType == log.PandaState.PandaType.unknown and \ pandaState_prev.pandaType != log.PandaState.PandaType.unknown: params.clear_all(ParamKeyType.CLEAR_ON_PANDA_DISCONNECT) pandaState_prev = pandaState # these are expensive calls. update every 10s if (count % int(10. / DT_TRML)) == 0: try: network_type = HARDWARE.get_network_type() network_strength = HARDWARE.get_network_strength(network_type) network_info = HARDWARE.get_network_info() # pylint: disable=assignment-from-none nvme_temps = HARDWARE.get_nvme_temperatures() modem_temps = HARDWARE.get_modem_temperatures() # Log modem version once if modem_version is None: modem_version = HARDWARE.get_modem_version() # pylint: disable=assignment-from-none if modem_version is not None: cloudlog.warning(f"Modem version: {modem_version}") if TICI and (network_info.get('state', None) == "REGISTERED"): registered_count += 1 else: registered_count = 0 if registered_count > 10: cloudlog.warning(f"Modem stuck in registered state {network_info}. nmcli conn up lte") os.system("nmcli conn up lte") registered_count = 0 except Exception: cloudlog.exception("Error getting network status") msg.deviceState.freeSpacePercent = get_available_percent(default=100.0) msg.deviceState.memoryUsagePercent = int(round(psutil.virtual_memory().percent)) msg.deviceState.cpuUsagePercent = [int(round(n)) for n in psutil.cpu_percent(percpu=True)] msg.deviceState.gpuUsagePercent = int(round(HARDWARE.get_gpu_usage_percent())) msg.deviceState.networkType = network_type msg.deviceState.networkStrength = network_strength if network_info is not None: msg.deviceState.networkInfo = network_info if nvme_temps is not None: msg.deviceState.nvmeTempC = nvme_temps if modem_temps is not None: msg.deviceState.modemTempC = modem_temps msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness() msg.deviceState.batteryPercent = HARDWARE.get_battery_capacity() msg.deviceState.batteryCurrent = HARDWARE.get_battery_current() msg.deviceState.usbOnline = HARDWARE.get_usb_present() current_filter.update(msg.deviceState.batteryCurrent / 1e6) max_comp_temp = temp_filter.update( max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC)) ) if handle_fan is not None: fan_speed = handle_fan(controller, max_comp_temp, fan_speed, onroad_conditions["ignition"]) msg.deviceState.fanSpeedPercentDesired = fan_speed is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP: # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 thermal_status = ThermalStatus.danger else: current_band = THERMAL_BANDS[thermal_status] band_idx = list(THERMAL_BANDS.keys()).index(thermal_status) if current_band.min_temp is not None and max_comp_temp < current_band.min_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1] elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp: thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1] # **** starting logic **** # Ensure date/time are valid now = datetime.datetime.utcnow() startup_conditions["time_valid"] = (now.year > 2020) or (now.year == 2020 and now.month >= 10) set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) startup_conditions["up_to_date"] = params.get("Offroad_ConnectivityNeeded") is None or params.get_bool("DisableUpdates") or params.get_bool("SnoozeUpdate") startup_conditions["not_uninstalling"] = not params.get_bool("DoUninstall") startup_conditions["accepted_terms"] = params.get("HasAcceptedTerms") == terms_version # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2 startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \ params.get_bool("Passive") startup_conditions["not_driver_view"] = not params.get_bool("IsDriverViewEnabled") startup_conditions["not_taking_snapshot"] = not params.get_bool("IsTakingSnapshot") # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 onroad_conditions["device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed("Offroad_TemperatureTooHigh", (not onroad_conditions["device_temp_good"])) if TICI: set_offroad_alert_if_changed("Offroad_StorageMissing", (not Path("/data/media").is_mount())) # Handle offroad/onroad transition should_start = all(onroad_conditions.values()) if started_ts is None: should_start = should_start and all(startup_conditions.values()) if should_start != should_start_prev or (count == 0): params.put_bool("IsOnroad", should_start) params.put_bool("IsOffroad", not should_start) HARDWARE.set_power_save(not should_start) if should_start: off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True else: if onroad_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions, onroad_conditions=onroad_conditions) started_ts = None if off_ts is None: off_ts = sec_since_boot() # Offroad power monitoring power_monitor.calculate(peripheralState, onroad_conditions["ignition"]) msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used() msg.deviceState.carBatteryCapacityUwh = max(0, power_monitor.get_car_battery_capacity()) current_power_draw = HARDWARE.get_current_power_draw() # pylint: disable=assignment-from-none msg.deviceState.powerDrawW = current_power_draw if current_power_draw is not None else 0 # Check if we need to disable charging (handled by boardd) msg.deviceState.chargingDisabled = power_monitor.should_disable_charging(onroad_conditions["ignition"], in_car, off_ts) # Check if we need to shut down if power_monitor.should_shutdown(peripheralState, onroad_conditions["ignition"], in_car, off_ts, started_seen): cloudlog.info(f"shutting device down, offroad since {off_ts}") # TODO: add function for blocking cloudlog instead of sleep time.sleep(10) HARDWARE.shutdown() # If UI has crashed, set the brightness to reasonable non-zero value ui_running = "ui" in (p.name for p in sm["managerState"].processes if p.running) if ui_running_prev and not ui_running: HARDWARE.set_screen_brightness(20) ui_running_prev = ui_running msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90 # if current is positive, then battery is being discharged msg.deviceState.started = started_ts is not None msg.deviceState.startedMonoTime = int(1e9*(started_ts or 0)) last_ping = params.get("LastAthenaPingTime") if last_ping is not None: msg.deviceState.lastAthenaPingTime = int(last_ping) msg.deviceState.thermalStatus = thermal_status pm.send("deviceState", msg) if EON and not is_uno: set_offroad_alert_if_changed("Offroad_ChargeDisabled", (not usb_power)) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # report to server once every 10 minutes if (count % int(600. / DT_TRML)) == 0: if EON and started_ts is None and msg.deviceState.memoryUsagePercent > 40: cloudlog.event("High offroad memory usage", mem=msg.deviceState.memoryUsagePercent) cloudlog.event("STATUS_PACKET", count=count, pandaStates=(strip_deprecated_keys(pandaStates.to_dict()) if pandaStates else None), peripheralState=strip_deprecated_keys(peripheralState.to_dict()), location=(strip_deprecated_keys(sm["gpsLocationExternal"].to_dict()) if sm.alive["gpsLocationExternal"] else None), deviceState=strip_deprecated_keys(msg.to_dict())) count += 1
def main(): params = Params() if params.get_bool("DisableUpdates"): raise RuntimeError("updates are disabled by the DisableUpdates param") if EON and os.geteuid() != 0: raise RuntimeError("updated must be launched as root!") # Set low io priority proc = psutil.Process() if psutil.LINUX: proc.ionice(psutil.IOPRIO_CLASS_BE, value=7) ov_lock_fd = open(LOCK_FILE, 'w') try: fcntl.flock(ov_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError as e: raise RuntimeError( "couldn't get overlay lock; is another updated running?") from e # Wait for IsOffroad to be set before our first update attempt wait_helper = WaitTimeHelper(proc) wait_helper.sleep(30) overlay_init = Path(os.path.join(BASEDIR, ".overlay_init")) if overlay_init.exists(): overlay_init.unlink() first_run = True last_fetch_time = 0 update_failed_count = 0 # Run the update loop # * every 1m, do a lightweight internet/update check # * every 10m, do a full git fetch while not wait_helper.shutdown: update_now = wait_helper.ready_event.is_set() wait_helper.ready_event.clear() # Don't run updater while onroad or if the time's wrong time_wrong = datetime.datetime.utcnow().year < 2019 is_onroad = not params.get_bool("IsOffroad") if is_onroad or time_wrong: wait_helper.sleep(30) cloudlog.info("not running updater, not offroad") continue # Attempt an update exception = None new_version = False update_failed_count += 1 try: init_overlay() internet_ok, update_available = check_for_update() if internet_ok and not update_available: update_failed_count = 0 # Fetch updates at most every 10 minutes if internet_ok and (update_now or time.monotonic() - last_fetch_time > 60 * 10): new_version = fetch_update(wait_helper) update_failed_count = 0 last_fetch_time = time.monotonic() if first_run and not new_version and os.path.isdir( NEOSUPDATE_DIR): shutil.rmtree(NEOSUPDATE_DIR) first_run = False except subprocess.CalledProcessError as e: cloudlog.event("update process failed", cmd=e.cmd, output=e.output, returncode=e.returncode) exception = f"command failed: {e.cmd}\n{e.output}" except Exception as e: cloudlog.exception("uncaught updated exception, shouldn't happen") exception = str(e) set_params(new_version, update_failed_count, exception) wait_helper.sleep(60) dismount_overlay()
def thermald_thread(): setup_eon_fan() # prevent LEECO from undervoltage BATT_PERC_OFF = 10 if LEON else 3 # now loop thermal_sock = messaging.pub_sock(service_list['thermal'].port) health_sock = messaging.sub_sock(service_list['health'].port) location_sock = messaging.sub_sock(service_list['gpsLocation'].port) fan_speed = 0 count = 0 shutdown_count = 0 off_ts = None started_ts = None ignition_seen = False started_seen = False thermal_status = ThermalStatus.green health_sock.RCVTIMEO = 1500 current_filter = FirstOrderFilter(0., CURRENT_TAU, 1.) health_prev = None # Make sure charging is enabled charging_disabled = False os.system('echo "1" > /sys/class/power_supply/battery/charging_enabled') params = Params() while 1: health = messaging.recv_sock(health_sock, wait=True) location = messaging.recv_sock(location_sock) location = location.gpsLocation if location else None msg = read_thermal() # clear car params when panda gets disconnected if health is None and health_prev is not None: params.panda_disconnect() health_prev = health # loggerd is gated based on free space avail = get_available_percent() / 100.0 # thermal message now also includes free space msg.thermal.freeSpace = avail with open("/sys/class/power_supply/battery/capacity") as f: msg.thermal.batteryPercent = int(f.read()) with open("/sys/class/power_supply/battery/status") as f: msg.thermal.batteryStatus = f.read().strip() with open("/sys/class/power_supply/battery/current_now") as f: msg.thermal.batteryCurrent = int(f.read()) with open("/sys/class/power_supply/battery/voltage_now") as f: msg.thermal.batteryVoltage = int(f.read()) with open("/sys/class/power_supply/usb/present") as f: msg.thermal.usbOnline = bool(int(f.read())) usb_online = msg.thermal.usbOnline current_filter.update(msg.thermal.batteryCurrent / 1e6) # TODO: add car battery voltage check max_cpu_temp = max(msg.thermal.cpu0, msg.thermal.cpu1, msg.thermal.cpu2, msg.thermal.cpu3) / 10.0 max_comp_temp = max(max_cpu_temp, msg.thermal.mem / 10., msg.thermal.gpu / 10.) bat_temp = msg.thermal.bat / 1000. fan_speed = handle_fan(max_cpu_temp, bat_temp, fan_speed) msg.thermal.fanSpeed = fan_speed # thermal logic with hysterisis if max_cpu_temp > 107. or bat_temp >= 63.: # onroad not allowed thermal_status = ThermalStatus.danger elif max_comp_temp > 95. or bat_temp > 60.: # hysteresis between onroad not allowed and engage not allowed thermal_status = clip(thermal_status, ThermalStatus.red, ThermalStatus.danger) elif max_cpu_temp > 90.0: # hysteresis between engage not allowed and uploader not allowed thermal_status = clip(thermal_status, ThermalStatus.yellow, ThermalStatus.red) elif max_cpu_temp > 85.0: # uploader not allowed thermal_status = ThermalStatus.yellow elif max_cpu_temp > 75.0: # hysteresis between uploader not allowed and all good thermal_status = clip(thermal_status, ThermalStatus.green, ThermalStatus.yellow) else: # all good thermal_status = ThermalStatus.green # **** starting logic **** # start constellation of processes when the car starts ignition = health is not None and health.health.started ignition_seen = ignition_seen or ignition # add voltage check for ignition if not ignition_seen and health is not None and health.health.voltage > 13500: ignition = True do_uninstall = params.get("DoUninstall") == "1" accepted_terms = params.get("HasAcceptedTerms") == "1" completed_training = params.get( "CompletedTrainingVersion") == training_version should_start = ignition # have we seen a panda? passive = (params.get("Passive") == "1") # with 2% left, we killall, otherwise the phone will take a long time to boot should_start = should_start and msg.thermal.freeSpace > 0.02 # confirm we have completed training and aren't uninstalling should_start = should_start and accepted_terms and ( passive or completed_training) and (not do_uninstall) # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 if thermal_status >= ThermalStatus.danger: # TODO: Add a better warning when this is happening should_start = False if should_start: off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True os.system( 'echo performance > /sys/class/devfreq/soc:qcom,cpubw/governor' ) else: started_ts = None if off_ts is None: off_ts = sec_since_boot() os.system( 'echo powersave > /sys/class/devfreq/soc:qcom,cpubw/governor' ) # shutdown if the battery gets lower than 3%, it's discharging, we aren't running for # more than a minute but we were running if msg.thermal.batteryPercent < BATT_PERC_OFF and msg.thermal.batteryStatus == "Discharging" and \ started_seen and (sec_since_boot() - off_ts) > 60: os.system('LD_LIBRARY_PATH="" svc power shutdown') #charging_disabled = check_car_battery_voltage(should_start, health, charging_disabled) msg.thermal.chargingDisabled = charging_disabled msg.thermal.chargingError = current_filter.x > 0. and msg.thermal.batteryPercent < 90 # if current is positive, then battery is being discharged msg.thermal.started = started_ts is not None msg.thermal.startedTs = int(1e9 * (started_ts or 0)) msg.thermal.thermalStatus = thermal_status thermal_sock.send(msg.to_bytes()) print(msg) # report to server once per minute if (count % 60) == 0: cloudlog.event("STATUS_PACKET", count=count, health=(health.to_dict() if health else None), location=(location.to_dict() if location else None), thermal=msg.to_dict()) count += 1 # shutdown EON if usb is not present after certain time if not usb_online: shutdown_count += 1 else: shutdown_count = 0 if shutdown_count >= _SHUTDOWN_AT: os.system('LD_LIBRARY_PATH="" svc power shutdown')
def main(): update_failed_count = 0 overlay_init_done = False params = Params() if params.get("DisableUpdates") == b"1": raise RuntimeError("updates are disabled by param") if not os.geteuid() == 0: raise RuntimeError("updated must be launched as root!") # Set low io priority p = psutil.Process() if psutil.LINUX: p.ionice(psutil.IOPRIO_CLASS_BE, value=7) ov_lock_fd = open('/tmp/safe_staging_overlay.lock', 'w') try: fcntl.flock(ov_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: raise RuntimeError("couldn't get overlay lock; is another updated running?") # Wait a short time before our first update attempt # Avoids race with IsOffroad not being set, reduces manager startup load time.sleep(30) wait_helper = WaitTimeHelper() time_offroad = 0 need_reboot = False while True: update_failed_count += 1 time_wrong = datetime.datetime.utcnow().year < 2019 ping_failed = subprocess.call(["ping", "-W", "4", "-c", "1", "8.8.8.8"]) # Wait until we have a valid datetime to initialize the overlay if not (ping_failed or time_wrong): try: # If the git directory has modifcations after we created the overlay # we need to recreate the overlay if overlay_init_done: overlay_init_fn = os.path.join(BASEDIR, ".overlay_init") git_dir_path = os.path.join(BASEDIR, ".git") new_files = run(["find", git_dir_path, "-newer", overlay_init_fn]) if len(new_files.splitlines()): cloudlog.info(".git directory changed, recreating overlay") overlay_init_done = False if not overlay_init_done: init_ovfs() overlay_init_done = True if params.get("IsOffroad") == b"1": need_reboot = attempt_update(time_offroad, need_reboot) update_failed_count = 0 else: time_offroad = sec_since_boot() cloudlog.info("not running updater, openpilot running") except subprocess.CalledProcessError as e: cloudlog.event( "update process failed", cmd=e.cmd, output=e.output, returncode=e.returncode ) overlay_init_done = False except Exception: cloudlog.exception("uncaught updated exception, shouldn't happen") params.put("UpdateFailedCount", str(update_failed_count)) wait_between_updates(wait_helper.ready_event) if wait_helper.shutdown: break # We've been signaled to shut down dismount_ovfs()
def thermald_thread(): health_timeout = int(1000 * 2.5 * DT_TRML) # 2.5x the expected health frequency # now loop thermal_sock = messaging.pub_sock('thermal') health_sock = messaging.sub_sock('health', timeout=health_timeout) location_sock = messaging.sub_sock('gpsLocation') fan_speed = 0 count = 0 startup_conditions = { "ignition": False, } startup_conditions_prev = startup_conditions.copy() off_ts = None started_ts = None started_seen = False thermal_status = ThermalStatus.green usb_power = True current_branch = get_git_branch() network_type = NetworkType.none network_strength = NetworkStrength.unknown current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) cpu_temp_filter = FirstOrderFilter(0., CPU_TEMP_TAU, DT_TRML) health_prev = None should_start_prev = False handle_fan = None is_uno = False has_relay = False params = Params() pm = PowerMonitoring() no_panda_cnt = 0 thermal_config = get_thermal_config() while 1: health = messaging.recv_sock(health_sock, wait=True) location = messaging.recv_sock(location_sock) location = location.gpsLocation if location else None msg = read_thermal(thermal_config) if health is not None: usb_power = health.health.usbPowerMode != log.HealthData.UsbPowerMode.client # If we lose connection to the panda, wait 5 seconds before going offroad if health.health.hwType == log.HealthData.HwType.unknown: no_panda_cnt += 1 if no_panda_cnt > DISCONNECT_TIMEOUT / DT_TRML: if startup_conditions["ignition"]: cloudlog.error("Lost panda connection while onroad") startup_conditions["ignition"] = False else: no_panda_cnt = 0 startup_conditions["ignition"] = health.health.ignitionLine or health.health.ignitionCan # Setup fan handler on first connect to panda if handle_fan is None and health.health.hwType != log.HealthData.HwType.unknown: is_uno = health.health.hwType == log.HealthData.HwType.uno has_relay = health.health.hwType in [log.HealthData.HwType.blackPanda, log.HealthData.HwType.uno, log.HealthData.HwType.dos] if (not EON) or is_uno: cloudlog.info("Setting up UNO fan handler") handle_fan = handle_fan_uno else: cloudlog.info("Setting up EON fan handler") setup_eon_fan() handle_fan = handle_fan_eon # Handle disconnect if health_prev is not None: if health.health.hwType == log.HealthData.HwType.unknown and \ health_prev.health.hwType != log.HealthData.HwType.unknown: params.panda_disconnect() health_prev = health # get_network_type is an expensive call. update every 10s if (count % int(10. / DT_TRML)) == 0: try: network_type = HARDWARE.get_network_type() network_strength = HARDWARE.get_network_strength(network_type) except Exception: cloudlog.exception("Error getting network status") msg.thermal.freeSpace = get_available_percent(default=100.0) / 100.0 msg.thermal.memUsedPercent = int(round(psutil.virtual_memory().percent)) msg.thermal.cpuPerc = int(round(psutil.cpu_percent())) msg.thermal.networkType = network_type msg.thermal.networkStrength = network_strength msg.thermal.batteryPercent = get_battery_capacity() msg.thermal.batteryStatus = get_battery_status() msg.thermal.batteryCurrent = get_battery_current() msg.thermal.batteryVoltage = get_battery_voltage() msg.thermal.usbOnline = get_usb_present() # Fake battery levels on uno for frame if (not EON) or is_uno: msg.thermal.batteryPercent = 100 msg.thermal.batteryStatus = "Charging" msg.thermal.bat = 0 current_filter.update(msg.thermal.batteryCurrent / 1e6) # TODO: add car battery voltage check max_cpu_temp = cpu_temp_filter.update(max(msg.thermal.cpu)) max_comp_temp = max(max_cpu_temp, msg.thermal.mem, max(msg.thermal.gpu)) bat_temp = msg.thermal.bat if handle_fan is not None: fan_speed = handle_fan(max_cpu_temp, bat_temp, fan_speed, startup_conditions["ignition"]) msg.thermal.fanSpeed = fan_speed # If device is offroad we want to cool down before going onroad # since going onroad increases load and can make temps go over 107 # We only do this if there is a relay that prevents the car from faulting is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) if max_cpu_temp > 107. or bat_temp >= 63. or (has_relay and is_offroad_for_5_min and max_cpu_temp > 70.0): # onroad not allowed thermal_status = ThermalStatus.danger elif max_comp_temp > 96.0 or bat_temp > 60.: # hysteresis between onroad not allowed and engage not allowed thermal_status = clip(thermal_status, ThermalStatus.red, ThermalStatus.danger) elif max_cpu_temp > 94.0: # hysteresis between engage not allowed and uploader not allowed thermal_status = clip(thermal_status, ThermalStatus.yellow, ThermalStatus.red) elif max_cpu_temp > 80.0: # uploader not allowed thermal_status = ThermalStatus.yellow elif max_cpu_temp > 75.0: # hysteresis between uploader not allowed and all good thermal_status = clip(thermal_status, ThermalStatus.green, ThermalStatus.yellow) else: # all good thermal_status = ThermalStatus.green # **** starting logic **** # Check for last update time and display alerts if needed now = datetime.datetime.utcnow() # show invalid date/time alert startup_conditions["time_valid"] = now.year >= 2019 set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"])) # Show update prompt try: last_update = datetime.datetime.fromisoformat(params.get("LastUpdateTime", encoding='utf8')) except (TypeError, ValueError): last_update = now dt = now - last_update update_failed_count = params.get("UpdateFailedCount") update_failed_count = 0 if update_failed_count is None else int(update_failed_count) last_update_exception = params.get("LastUpdateException", encoding='utf8') if update_failed_count > 15 and last_update_exception is not None: if current_branch in ["release2", "dashcam"]: extra_text = "Ensure the software is correctly installed" else: extra_text = last_update_exception set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) set_offroad_alert_if_changed("Offroad_UpdateFailed", True, extra_text=extra_text) elif dt.days > DAYS_NO_CONNECTIVITY_MAX and update_failed_count > 1: set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", True) elif dt.days > DAYS_NO_CONNECTIVITY_PROMPT: remaining_time = str(max(DAYS_NO_CONNECTIVITY_MAX - dt.days, 0)) set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", True, extra_text=f"{remaining_time} days.") else: set_offroad_alert_if_changed("Offroad_UpdateFailed", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeeded", False) set_offroad_alert_if_changed("Offroad_ConnectivityNeededPrompt", False) startup_conditions["not_uninstalling"] = not params.get("DoUninstall") == b"1" startup_conditions["accepted_terms"] = params.get("HasAcceptedTerms") == terms_version completed_training = params.get("CompletedTrainingVersion") == training_version panda_signature = params.get("PandaFirmware") startup_conditions["fw_version_match"] = (panda_signature is None) or (panda_signature == FW_SIGNATURE) # don't show alert is no panda is connected (None) set_offroad_alert_if_changed("Offroad_PandaFirmwareMismatch", (not startup_conditions["fw_version_match"])) # with 2% left, we killall, otherwise the phone will take a long time to boot startup_conditions["free_space"] = msg.thermal.freeSpace > 0.02 startup_conditions["completed_training"] = completed_training or (current_branch in ['dashcam', 'dashcam-staging']) startup_conditions["not_driver_view"] = not params.get("IsDriverViewEnabled") == b"1" startup_conditions["not_taking_snapshot"] = not params.get("IsTakingSnapshot") == b"1" # if any CPU gets above 107 or the battery gets above 63, kill all processes # controls will warn with CPU above 95 or battery above 60 startup_conditions["device_temp_good"] = thermal_status < ThermalStatus.danger set_offroad_alert_if_changed("Offroad_TemperatureTooHigh", (not startup_conditions["device_temp_good"])) should_start = all(startup_conditions.values()) if should_start: if not should_start_prev: params.delete("IsOffroad") off_ts = None if started_ts is None: started_ts = sec_since_boot() started_seen = True os.system('echo performance > /sys/class/devfreq/soc:qcom,cpubw/governor') else: if startup_conditions["ignition"] and (startup_conditions != startup_conditions_prev): cloudlog.event("Startup blocked", startup_conditions=startup_conditions) if should_start_prev or (count == 0): params.put("IsOffroad", "1") started_ts = None if off_ts is None: off_ts = sec_since_boot() os.system('echo powersave > /sys/class/devfreq/soc:qcom,cpubw/governor') # Offroad power monitoring pm.calculate(health) msg.thermal.offroadPowerUsage = pm.get_power_used() msg.thermal.carBatteryCapacity = max(0, pm.get_car_battery_capacity()) # Check if we need to disable charging (handled by boardd) msg.thermal.chargingDisabled = pm.should_disable_charging(health, off_ts) # Check if we need to shut down if pm.should_shutdown(health, off_ts, started_seen, LEON): cloudlog.info(f"shutting device down, offroad since {off_ts}") # TODO: add function for blocking cloudlog instead of sleep time.sleep(10) os.system('LD_LIBRARY_PATH="" svc power shutdown') msg.thermal.chargingError = current_filter.x > 0. and msg.thermal.batteryPercent < 90 # if current is positive, then battery is being discharged msg.thermal.started = started_ts is not None msg.thermal.startedTs = int(1e9*(started_ts or 0)) msg.thermal.thermalStatus = thermal_status thermal_sock.send(msg.to_bytes()) set_offroad_alert_if_changed("Offroad_ChargeDisabled", (not usb_power)) should_start_prev = should_start startup_conditions_prev = startup_conditions.copy() # report to server once per minute if (count % int(60. / DT_TRML)) == 0: cloudlog.event("STATUS_PACKET", count=count, health=(health.to_dict() if health else None), location=(location.to_dict() if location else None), thermal=msg.to_dict()) count += 1