def with_datastore(request): """Fixture which returns an initialized datastore.""" try: os.unlink(FNAME) except FileNotFoundError: pass wandb._set_internal_process() s = datastore.DataStore() s.open_for_write(FNAME) def fin(): os.unlink(FNAME) request.addfinalizer(fin) return s
def test_proto_write_partial(): """Serialize a proto into a partial block.""" data = dict(this=2, that=4) history = wandb_internal_pb2.HistoryRecord() for k, v in data.items(): json_data = json.dumps(v) item = history.item.add() item.key = k item.value_json = json_data rec = wandb_internal_pb2.Record() rec.history.CopyFrom(history) wandb._set_internal_process() s = datastore.DataStore() s.open_for_write(FNAME) s.write(rec) s.close()
def __init__( self, sync_list, project=None, entity=None, run_id=None, view=None, verbose=None, mark_synced=None, app_url=None, ): threading.Thread.__init__(self) # mark this process as internal wandb._set_internal_process(disable=True) self._sync_list = sync_list self._project = project self._entity = entity self._run_id = run_id self._view = view self._verbose = verbose self._mark_synced = mark_synced self._app_url = app_url
def wandb_internal( settings, record_q, result_q, ): """Internal process function entrypoint. Read from record queue and dispatch work to various threads. Arguments: settings: dictionary of configuration parameters. record_q: records to be handled result_q: for sending results back """ # mark this process as internal wandb._set_internal_process() started = time.time() # register the exit handler only when wandb_internal is called, not on import @atexit.register def handle_exit(*args): logger.info("Internal process exited") # Lets make sure we dont modify settings so use a static object _settings = settings_static.SettingsStatic(settings) if _settings.log_internal: configure_logging(_settings.log_internal, _settings._log_level) parent_pid = os.getppid() pid = os.getpid() logger.info( "W&B internal server running at pid: %s, started at: %s", pid, datetime.fromtimestamp(started), ) publish_interface = interface.BackendSender(record_q=record_q) stopped = threading.Event() threads = [] send_record_q = queue.Queue() record_sender_thread = SenderThread( settings=_settings, record_q=send_record_q, result_q=result_q, stopped=stopped, interface=publish_interface, debounce_interval_ms=30000, ) threads.append(record_sender_thread) write_record_q = queue.Queue() record_writer_thread = WriterThread( settings=_settings, record_q=write_record_q, result_q=result_q, stopped=stopped, writer_q=write_record_q, ) threads.append(record_writer_thread) record_handler_thread = HandlerThread( settings=_settings, record_q=record_q, result_q=result_q, stopped=stopped, sender_q=send_record_q, writer_q=write_record_q, interface=publish_interface, ) threads.append(record_handler_thread) process_check = ProcessCheck(settings=_settings, pid=parent_pid) for thread in threads: thread.start() interrupt_count = 0 while not stopped.is_set(): try: # wait for stop event while not stopped.is_set(): time.sleep(1) if process_check.is_dead(): logger.error("Internal process shutdown.") stopped.set() except KeyboardInterrupt: interrupt_count += 1 logger.warning( "Internal process interrupt: {}".format(interrupt_count)) finally: if interrupt_count >= 2: logger.error("Internal process interrupted.") stopped.set() for thread in threads: thread.join() for thread in threads: exc_info = thread.get_exception() if exc_info: logger.error("Thread {}:".format(thread.name), exc_info=exc_info) print("Thread {}:".format(thread.name), file=sys.stderr) traceback.print_exception(*exc_info) sentry_exc(exc_info, delay=True) wandb.termerror("Internal wandb error: file data was not synced") sys.exit(-1)
def ensure_launched(self): """Launch backend worker if not running.""" settings = dict(self._settings or ()) settings["_log_level"] = self._log_level or logging.DEBUG # TODO: this is brittle and should likely be handled directly on the # settings object. Multi-processing blows up when it can't pickle # objects. if "_early_logger" in settings: del settings["_early_logger"] self.record_q = self._multiprocessing.Queue() self.result_q = self._multiprocessing.Queue() if settings.get("start_method") != "thread": process_class = self._multiprocessing.Process else: process_class = BackendThread # disable interal process checks since we are one process wandb._set_internal_process(disable=True) self.wandb_process = process_class( target=wandb_internal, kwargs=dict( settings=settings, record_q=self.record_q, result_q=self.result_q, ), ) self.wandb_process.name = "wandb_internal" # Support running code without a: __name__ == "__main__" save_mod_name = None save_mod_path = None main_module = sys.modules["__main__"] main_mod_spec = getattr(main_module, "__spec__", None) main_mod_path = getattr(main_module, "__file__", None) main_mod_name = None if main_mod_spec is None: # hack for pdb main_mod_spec = (importlib.machinery.ModuleSpec( name="wandb.mpmain", loader=importlib.machinery.BuiltinImporter) if sys.version_info[0] > 2 else None) main_module.__spec__ = main_mod_spec main_mod_name = getattr(main_mod_spec, "name", None) if main_mod_name is not None: save_mod_name = main_mod_name main_module.__spec__.name = "wandb.mpmain" elif main_mod_path is not None: save_mod_path = main_module.__file__ fname = os.path.join(os.path.dirname(wandb.__file__), "mpmain", "__main__.py") main_module.__file__ = fname logger.info("starting backend process...") # Start the process with __name__ == "__main__" workarounds self.wandb_process.start() self._internal_pid = self.wandb_process.pid logger.info("started backend process with pid: {}".format( self.wandb_process.pid)) # Undo temporary changes from: __name__ == "__main__" if save_mod_name: main_module.__spec__.name = save_mod_name elif save_mod_path: main_module.__file__ = save_mod_path self.interface = interface.BackendSender( process=self.wandb_process, record_q=self.record_q, result_q=self.result_q, )
def ensure_launched(self) -> None: """Launch backend worker if not running.""" settings: Dict[str, Any] = dict() if self._settings is not None: settings = self._settings.make_static() settings["_log_level"] = self._log_level or logging.DEBUG # TODO: this is brittle and should likely be handled directly on the # settings object. Multi-processing blows up when it can't pickle # objects. if "_early_logger" in settings: del settings["_early_logger"] start_method = settings.get("start_method") if self._manager: self._ensure_launched_manager() return self.record_q = self._multiprocessing.Queue() self.result_q = self._multiprocessing.Queue() user_pid = os.getpid() if start_method == "thread": wandb._set_internal_process(disable=True) # type: ignore wandb_thread = BackendThread( target=wandb_internal, kwargs=dict( settings=settings, record_q=self.record_q, result_q=self.result_q, user_pid=user_pid, ), ) # TODO: risky cast, assumes BackendThread Process ducktyping self.wandb_process = wandb_thread # type: ignore else: self.wandb_process = self._multiprocessing.Process( target=wandb_internal, kwargs=dict( settings=settings, record_q=self.record_q, result_q=self.result_q, user_pid=user_pid, ), ) self.wandb_process.name = "wandb_internal" self._module_main_install() logger.info("starting backend process...") # Start the process with __name__ == "__main__" workarounds assert self.wandb_process self.wandb_process.start() self._internal_pid = self.wandb_process.pid logger.info("started backend process with pid: {}".format( self.wandb_process.pid)) self._module_main_uninstall() self.interface = InterfaceQueue( process=self.wandb_process, record_q=self.record_q, result_q=self.result_q, )