def test_settings(test_dir, mocker): """ Settings object for tests""" # TODO: likely not the right thing to do, we shouldn't be setting this wandb._IS_INTERNAL_PROCESS = False wandb.wandb_sdk.wandb_run.EXIT_TIMEOUT = 15 wandb.wandb_sdk.wandb_setup._WandbSetup.instance = None wandb_dir = os.path.join(os.getcwd(), "wandb") mkdir_exists_ok(wandb_dir) # root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) # TODO: consider making a debugable directory that stays around... settings = wandb.Settings( _start_time=time.time(), base_url="http://localhost", root_dir=os.getcwd(), save_code=True, project="test", console="off", host="test", api_key=DUMMY_API_KEY, run_id=wandb.util.generate_id(), _start_datetime=datetime.datetime.now(), ) settings.setdefaults() yield settings # Just incase someone forgets to join in tests if wandb.run is not None: wandb.run.join()
def setup(self, kwargs): self.kwargs = kwargs # built up login settings login_settings: Settings = wandb.Settings() if settings_param := kwargs.pop("_settings", None): login_settings._apply_settings(settings_param)
def __init__( self, entity: str, project: str, queues: Iterable[str] = None, max_jobs: int = None, ): self._entity = entity self._project = project self._api = Api() self._settings = wandb.Settings() self._base_url = self._api.settings().get("base_url") self._jobs: Dict[Union[int, str], AbstractRun] = {} self._ticks = 0 self._running = 0 self._cwd = os.getcwd() self._namespace = wandb.util.generate_id() self._access = _convert_access("project") self._max_jobs = max_jobs or 1 # serverside creation self.gorilla_supports_agents = ( self._api.launch_agent_introspection() is not None ) create_response = self._api.create_launch_agent( entity, project, queues, self.gorilla_supports_agents ) self._id = create_response["launchAgentId"] self._name = "" # hacky: want to display this to the user but we don't get it back from gql until polling starts. fix later self._queues = queues if queues else ["default"]
def update_session(self, key): settings: Settings = wandb.Settings() settings._apply_source_login(dict(api_key=key)) self._wl._update(settings=settings) # Whenever the key changes, make sure to pull in user settings # from server. self._wl._update_user_settings()
def test_settings(test_dir, mocker, live_mock_server): """Settings object for tests""" # TODO: likely not the right thing to do, we shouldn't be setting this wandb._IS_INTERNAL_PROCESS = False wandb.wandb_sdk.wandb_run.EXIT_TIMEOUT = 15 wandb.wandb_sdk.wandb_setup._WandbSetup.instance = None wandb_dir = os.path.join(test_dir, "wandb") mkdir_exists_ok(wandb_dir) # root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) settings = wandb.Settings( _start_datetime=datetime.datetime.now(), _start_time=time.time(), api_key=DUMMY_API_KEY, base_url=live_mock_server.base_url, console="off", host="test", project="test", root_dir=test_dir, run_id=wandb.util.generate_id(), save_code=False, ) yield settings # Just in case someone forgets to join in tests. ...well, please don't! if wandb.run is not None: wandb.run.finish()
def __init__(self, log_dir, log_name, post_fix='', tb_on=True, wandb_on=True, project_name='faster-rcnn', resume=False): self.log_dir = log_dir self.log_name = log_name self.tb_on = tb_on self.wandb_on = wandb_on log_filemode = 'a' if resume else 'w' logging.basicConfig(level=logging.INFO, filename=os.path.join( self.log_dir, 'log{}.txt'.format(post_fix)), filemode=log_filemode) self.logger = logging.getLogger("Train") if self.tb_on: tb_dir = os.path.join(log_dir, 'tensorboard') if not os.path.exists(tb_dir): os.mkdir(tb_dir) self.tbWriter = tb.writer.FileWriter(tb_dir) if self.wandb_on: wandb.init(project=project_name, settings=wandb.Settings(console='off'), id=self.log_name, name=self.log_name)
def agent(sweep_id, function=None, entity=None, project=None, count=None): """Generic agent entrypoint, used for CLI or jupyter. Args: sweep_id (dict): Sweep ID generated by CLI or sweep API function (func, optional): A function to call instead of the "program" specifed in the config entity (str, optional): W&B Entity project (str, optional): W&B Project count (int, optional): the number of trials to run. """ in_jupyter = wandb._get_python_type() != "python" if in_jupyter: os.environ[wandb.env.JUPYTER] = "true" _api0 = InternalApi() if not _api0.api_key: wandb._jupyter_login(api=_api0) _ = wandb.Settings() return run_agent( sweep_id, function=function, in_jupyter=in_jupyter, entity=entity, project=project, count=count, )
def login(key, host, cloud, relogin, anonymously, no_offline=False): # TODO: handle no_offline anon_mode = "must" if anonymously else "never" wandb.setup(settings=wandb.Settings( _cli_only_mode=True, anonymous=anon_mode, base_url=host)) api = _get_cling_api() if host == "https://api.wandb.ai" or (host is None and cloud): api.clear_setting("base_url", globally=True, persist=True) # To avoid writing an empty local settings file, we only clear if it exists if os.path.exists(Settings._local_path()): api.clear_setting("base_url", persist=True) elif host: if not host.startswith("http"): raise ClickException("host must start with http(s)://") api.set_setting("base_url", host.strip("/"), globally=True, persist=True) key = key[0] if len(key) > 0 else None wandb.login(relogin=relogin, key=key, anonymous=anon_mode, host=host, force=True)
def test_multiproc_strict(live_mock_server, parse_ctx): run = wandb.init(settings=wandb.Settings(strict="true")) train(0) procs = [] for i in range(2): procs.append( multiprocessing.Process(target=train, kwargs=dict(add_val=100))) try: for p in procs: p.start() finally: for p in procs: p.join() # expect fail assert p.exitcode != 0 run.finish() ctx_util = parse_ctx(live_mock_server.get_ctx()) summary = ctx_util.summary s = {k: v for k, v in dict(summary).items() if not k.startswith("_")} assert dict(val=3, val2=1, mystep=3) == s
def _get_cling_api(): """Get a reference to the internal api with cling settings.""" global _api if _api is None: # TODO(jhr): make a settings object that is better for non runs. wandb.setup(settings=wandb.Settings(_cli_only_mode=True)) _api = InternalApi() return _api
def test_no_dirs(test_settings, runner): with runner.isolated_filesystem(): s = wandb.Settings(mode="disabled") test_settings._apply_settings(s) run = wandb.init(settings=test_settings) run.log({"acc": 0.9}) run.finish() assert not os.path.isdir("wandb")
def build_wandb(self): if not self.args['wandb']['name']: self.wandb = None else: wandb.init(project=self.args['wandb']['name'], settings=wandb.Settings(console='off')) wandb.config.update(self.args) self.wandb = wandb
def model_provider(): """Build the model.""" args = get_args() print_rank_0('building GPT2 model ...') if args.pipe_parallel_size == 0: model = GPT2Model(num_tokentypes=0, parallel_output=True) else: model = GPT2ModelPipe(num_tokentypes=0, parallel_output=True, topology=mpu.get_topology()) # This is a hack to give us a reference to get_batch_pipe from within training.py # We need to call model.set_batch_fn after deepspeed.initialize model._megatron_batch_fn = get_batch_pipe ## Wandb use_wandb = get_wandb_api_key() is not None set_use_wandb(use_wandb) args_dict = vars(args) if use_wandb: # only display system stats from one worker per machine wandb_settings = wandb.Settings() if is_local_main( ) else wandb.Settings(_disable_stats=True) group_name = args_dict.get('wandb_group') name = f'{socket.gethostname()}-{local_rank()}' if group_name else None try: wandb.init(project="neox", group=group_name, name=name, save_code=False, force=False, entity=args_dict.get('wandb_team'), settings=wandb_settings) except UsageError as e: set_use_wandb(False) print(e) print( 'Skipping wandb. Execute `wandb login` on local or main node machine to enable.' ) if use_wandb: wandb.config.update(args_dict) return model
def run_start(self, run_id): settings = wandb.Settings() settings._start_run() settings.update(run_id=run_id) files_dir = settings.files_dir os.makedirs(files_dir) log_user = settings.log_user os.makedirs(log_user) self._inform_init(settings)
def init_wandb_func(): wandb.init( project=cfg.wandb_project, entity=cfg.wandb_user, sync_tensorboard=True, id=wandb_unique_id, name=wandb_unique_id, group=wandb_group, job_type=cfg.wandb_job_type, tags=cfg.wandb_tags, resume=True, settings=wandb.Settings(start_method='fork'), )
def setup(self, context): wandb_params = { 'project': self.project_name, 'settings': wandb.Settings(symlink=False), } if self.group_name is not None: wandb_params['group'] = self.group_name resuming_previous_run = 'wandb_id' in context.metadata if not resuming_previous_run: wandb_params['id'] = context.metadata[ "wandb_id"] = wandb.util.generate_id() rw = RandomWords() context.name = f'{context.name}-{rw.random_word()}-{rw.random_word()}-{context.metadata["wandb_id"]}' wandb_params['name'] = context.name config = context.get_config() config = flatten_nested_dict(config) wandb_params['config'] = config else: wandb_params['id'] = context.metadata["wandb_id"] wandb_params['resume'] = 'allow' # Initialize directories for saving data if self.group_name is None: self.save_folder = os.path.join(self.logging_dir, self.project_name, context.name) else: self.save_folder = os.path.join(self.logging_dir, self.project_name, self.group_name, context.name) if not os.path.exists(self.save_folder): os.makedirs(self.save_folder) wandb_params['dir'] = self.save_folder for i in range(100): try: wandb.init(**wandb_params) break except Exception as e: print(f"wandb.init failed due to {e}\nRetrying in 10s...") time.sleep(10) wandb.define_metric("*", summary="max") wandb.define_metric("*", summary="min") wandb.define_metric("*", summary="mean") # Save code on first iteration if not resuming_previous_run: for file_path in context.file_paths: WandbLogger.wandb_save(file_path) print(str(context))
def update_session(self, key): _logger = wandb.setup()._get_logger() settings: Settings = wandb.Settings() login_settings = dict(api_key=key) if key else dict(mode="offline") settings._apply_source_login(login_settings, _logger=_logger) self._wl._update(settings=settings) # Whenever the key changes, make sure to pull in user settings # from server. if not self._wl.settings._offline: self._wl._update_user_settings()
def new(cls, entity: Optional[str] = None, project: Optional[str] = None) -> WandbLogger: """ Creates a new logger. :return: The logger. """ wandb.init(entity=entity, project=project, settings=wandb.Settings(start_method='fork')) return cls()
def setup(self, kwargs): self.kwargs = kwargs # built up login settings login_settings = wandb.Settings() settings_param = kwargs.pop("_settings", None) if settings_param: login_settings._apply_settings(settings_param) login_settings._apply_login(kwargs) # make sure they are applied globally self._wl = wandb.setup(settings=login_settings) self._settings = self._wl._settings
def test_settings_unexpected_args_telemetry(runner, live_mock_server, parse_ctx, capsys): with runner.isolated_filesystem(): run = wandb.init(settings=wandb.Settings(blah=3)) captured = capsys.readouterr().err msg = "Ignoring unexpected arguments: ['blah']" assert msg in captured ctx_util = parse_ctx(live_mock_server.get_ctx()) telemetry = ctx_util.telemetry # TelemetryRecord field 11 is Issues, # whose field 2 corresponds to unexpected arguments in Settings telemetry_issues = telemetry.get("11", []) assert 2 in telemetry_issues run.finish()
def train(): # Here we're in a different process. It's hard to communicate # back to the main process for assertions. settings = wandb.Settings(base_url="http://localhost", api_key=dummy_api_key) # TODO: Fix this. # There is an issue here, the agent sets the environment variable # WANDB_SWEEP_ID and wandb.init() should pick that up. But it doesn't, # I think because the settings object has been frozen at some other time. run = wandb.init(settings=settings) # If this assertion fails, the test will timeout (because we # never complete 1 agent run) assert run.sweep_id == 'test-sweep-id'
def init(*args, **kwargs): try: mocks_from_args(mocker, default_wandb_args(), mock_server) # TODO: likely not the right thing to do, we shouldn't be setting this wandb._IS_INTERNAL_PROCESS = False # We want to run setup every time in tests wandb.wandb_sdk.wandb_setup._WandbSetup._instance = None mocker.patch("wandb.wandb_sdk.wandb_init.Backend", utils.BackendMock) return wandb.init(settings=wandb.Settings(console="off", mode="offline", _except_exit=False), *args, **kwargs) finally: unset_globals()
def setup(self, kwargs): self.kwargs = kwargs # built up login settings login_settings: Settings = wandb.Settings() settings_param = kwargs.pop("_settings", None) # note that this case does not come up anywhere except for the tests if settings_param is not None: if isinstance(settings_param, Settings): login_settings._apply_settings(settings_param) elif isinstance(settings_param, dict): login_settings.update(settings_param, source=Source.LOGIN) _logger = wandb.setup()._get_logger() # Do not save relogin into settings as we just want to relogin once self._relogin = kwargs.pop("relogin", None) login_settings._apply_login(kwargs, _logger=_logger) # make sure they are applied globally self._wl = wandb.setup(settings=login_settings) self._settings = self._wl.settings
def wrapper(self, *args, settings=settings, **kwargs): if not isinstance(settings, wandb.sdk.wandb_settings.Settings): settings = wandb.Settings() settings.update( run_group=coalesce(settings.run_group, f"{current.flow_name}/{current.run_id}"), source=wandb.sdk.wandb_settings.Source.INIT, ) settings.update( run_job_type=coalesce(settings.run_job_type, current.step_name), source=wandb.sdk.wandb_settings.Source.INIT, ) with wandb.init(settings=settings) as run: with wb_telemetry.context(run=run) as tel: tel.feature.metaflow = True proxy = ArtifactProxy(self) run.config.update(proxy.params) func(proxy, *args, **kwargs) for name, data in proxy.inputs.items(): wandb_use( name, data, datasets=datasets, models=models, others=others, run=run, ) for name, data in proxy.outputs.items(): wandb_track( name, data, datasets=datasets, models=models, others=others, run=run, )
def wandb_start_run( self, w_run_name: str | None = None, w_job_type: str | None = None, run_args: RunArgsType | None = None, ) -> Run: if wandb.run is not None: raise RuntimeError(f"W&B has registerred run {wandb.run.name}") with switched_aws_cfg(self._s3_credentials_file): wandb_run = wandb.init( project=self._wab_project_name, name=w_run_name, job_type=w_job_type, settings=wandb.Settings(start_method="fork"), config=run_args, # type: ignore tags=self._try_get_neuro_tags(), ) if not isinstance(wandb_run, Run): raise RuntimeError( f"Failed to initialize W&B run, got: {wandb_run:r}") return wandb_run
def wandb_init_run(request, runner, mocker, mock_server): marker = request.node.get_closest_marker('wandb_args') args = default_wandb_args() if marker: args.update(marker.kwargs) try: mocks_from_args(mocker, args, mock_server) for k, v in args["env"].items(): os.environ[k] = v # TODO: likely not the right thing to do, we shouldn't be setting this wandb._IS_INTERNAL_PROCESS = False # We want to run setup every time in tests wandb.wandb_sdk.wandb_setup._WandbSetup._instance = None mocker.patch('wandb.wandb_sdk.wandb_init.Backend', utils.BackendMock) run = wandb.init(settings=wandb.Settings(console="off", mode="offline", _except_exit=False), **args["wandb_init"]) yield run wandb.join() finally: unset_globals() for k, v in args["env"].items(): del os.environ[k]
def agent(sweep_id, function=None, entity=None, project=None, count=None): """Generic agent entrypoint, used for CLI or jupyter. Args: sweep_id (dict): Sweep ID generated by CLI or sweep API entity (str, optional): W&B Entity project (str, optional): W&B Project function (dict, optional): Configure sweep function """ in_jupyter = wandb._get_python_type() != "python" if in_jupyter: os.environ[wandb.env.JUPYTER] = "true" _api0 = InternalApi() if not _api0.api_key: wandb._jupyter_login(api=_api0) settings = wandb.Settings() return run_agent(sweep_id, function=function, in_jupyter=in_jupyter, entity=entity, project=project, count=count)
def _send_tensorboard(self, tb_root, tb_logdirs, send_manager): if self._entity is None: viewer, server_info = send_manager._api.viewer_server_info() self._entity = viewer.get("entity") proto_run = wandb_internal_pb2.RunRecord() proto_run.run_id = self._run_id or wandb.util.generate_id() proto_run.project = self._project or wandb.util.auto_project_name(None) proto_run.entity = self._entity url = "{}/{}/{}/runs/{}".format( self._app_url, url_quote(proto_run.entity), url_quote(proto_run.project), url_quote(proto_run.run_id), ) print("Syncing: %s ..." % url) sys.stdout.flush() record = send_manager._interface._make_record(run=proto_run) send_manager.send(record) settings = wandb.Settings( root_dir=TMPDIR.name, run_id=proto_run.run_id, _start_datetime=datetime.datetime.now(), _start_time=time.time(), ) watcher = tb_watcher.TBWatcher( settings, proto_run, send_manager._interface, True ) for tb in tb_logdirs: watcher.add(tb, True, tb_root) sys.stdout.flush() watcher.finish() # send all of our records like a boss while not send_manager._interface.record_q.empty(): data = send_manager._interface.record_q.get(block=True) send_manager.send(data) sys.stdout.flush() send_manager.finish()
def test_offline_compression(console_settings, capfd, runner): with capfd.disabled(): s = wandb.Settings(mode="offline") console_settings._apply_settings(s) run = wandb.init(settings=console_settings) for i in tqdm.tqdm(range(100), ncols=139, ascii=" 123456789#"): time.sleep(0.05) print("\n" * 1000) print("QWERT") print("YUIOP") print("12345") print("\x1b[A\r\x1b[J\x1b[A\r\x1b[1J") time.sleep(1) run.finish() binary_log_file = ( os.path.join(os.path.dirname(run.dir), "run-" + run.id) + ".wandb") binary_log = runner.invoke( cli.sync, ["--view", "--verbose", binary_log_file]).stdout # Only a single output record per stream is written when the run finishes assert binary_log.count("Record: output") == 2 # Only final state of progress bar is logged assert binary_log.count("#") == 100, binary_log.count # Intermediete states are not logged assert "QWERT" not in binary_log assert "YUIOP" not in binary_log assert "12345" not in binary_log assert "UIOP" in binary_log
def __init__(self, config, variant): self.config = WandBLogger.get_default_config() self.config.update(config) if self.config.experiment_id == '': self.config.experiment_id = uuid.uuid4().hex if self.config.prefix != '': self.config.project = '{}--{}'.format(self.config.prefix, self.config.project) if self.config.output_dir == '': self.config.output_dir = tempfile.mkdtemp() else: self.config.output_dir = os.path.join(self.config.output_dir, self.config.experiment_id) os.makedirs(self.config.output_dir, exist_ok=True) self._variant = copy(variant) if 'hostname' not in self._variant: self._variant['hostname'] = gethostname() if self.config.random_delay > 0: time.sleep(np.random.uniform(0, self.config.random_delay)) wandb.init( config=self._variant, project=self.config.project, dir=self.config.output_dir, id=self.config.experiment_id, settings=wandb.Settings( start_method="thread", _disable_stats=True, ), mode='online' if self.config.online else 'offline', )