def _command_run(self, command): run = wandb_run.Run(mode='run', sweep_id=self._sweep_id, storage_id=command.get('run_storage_id'), run_id=command.get('run_id')) # save the the wandb config to reflect the state of the run that the # the server generated. run.config.set_run_dir(run.dir) run.config.update({k: v['value'] for k, v in command['args'].items()}) env = dict(os.environ) run.set_environment(env) flags = [ "--{0}={1}".format(name, config['value']) for name, config in command['args'].items() ] agent_run_args = { 'command': 'agent-run', 'program': command['program'], 'args': flags } internal_cli_path = os.path.join(os.path.dirname(__file__), 'internal_cli.py') self._run_processes[run.id] = subprocess.Popen([ '/usr/bin/env', 'python', internal_cli_path, json.dumps(agent_run_args) ], env=env) # we track how many times the user has tried to stop this run # so we can escalate how hard we try to kill it in self._command_stop() self._run_processes[run.id].num_times_stopped = 0
def _command_run(self, command): logger.info('Agent starting run with config:\n' + '\n'.join([ '\t{}: {}'.format(k, v['value']) for k, v in command['args'].items() ])) run = wandb_run.Run(mode='run', sweep_id=self._sweep_id, storage_id=command.get('run_storage_id'), run_id=command.get('run_id')) # save the the wandb config to reflect the state of the run that the # the server generated. run.config.set_run_dir(run.dir) run.config.update({k: v['value'] for k, v in command['args'].items()}) env = dict(os.environ) sweep_env = command.get('env', {}) env.update(sweep_env) run.set_environment(env) flags = [ "--{}={}".format(name, config['value']) for name, config in command['args'].items() ] self._run_processes[run.id] = subprocess.Popen( ['/usr/bin/env', 'python', command['program']] + flags, env=env, preexec_fn=os.setpgrp) # we keep track of when we sent the sigterm to give processes a chance # to handle the signal before sending sigkill every heartbeat self._run_processes[run.id].last_sigterm_time = None self._last_report_time = None
def run(ctx, program, args, id, resume, dir, configs, message, name, notes, show, tags, run_group, job_type): wandb.ensure_configured() if configs: config_paths = configs.split(',') else: config_paths = [] config = Config(config_paths=config_paths, wandb_dir=dir or wandb.wandb_dir()) tags = [tag for tag in tags.split(",") if tag] if tags else None # populate run parameters from env if not specified id = id or os.environ.get(env.RUN_ID) message = message or os.environ.get(env.DESCRIPTION) tags = tags or env.get_tags() run_group = run_group or os.environ.get(env.RUN_GROUP) job_type = job_type or os.environ.get(env.JOB_TYPE) name = name or os.environ.get(env.NAME) notes = notes or os.environ.get(env.NOTES) resume = resume or os.environ.get(env.RESUME) run = wandb_run.Run(run_id=id, mode='clirun', config=config, description=message, program=program, tags=tags, group=run_group, job_type=job_type, name=name, notes=notes, resume=resume) run.enable_logging() environ = dict(os.environ) if configs: environ[env.CONFIG_PATHS] = configs if show: environ[env.SHOW_RUN] = 'True' if not run.api.api_key: util.prompt_api_key(run.api, input_callback=click.prompt) try: rm = run_manager.RunManager(run) rm.init_run(environ) except run_manager.Error: exc_type, exc_value, exc_traceback = sys.exc_info() wandb.termerror( 'An Exception was raised during setup, see %s for full traceback.' % util.get_log_file_path()) wandb.termerror(str(exc_value)) if 'permission' in str(exc_value): wandb.termerror( 'Are you sure you provided the correct API key to "wandb login"?' ) lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.error('\n'.join(lines)) sys.exit(1) rm.run_user_process(program, args, environ)
def test_get_url(git_repo, loggedin): api = InternalApi({"entity": "cool"}) api.set_setting("anonymous", "true") run = wandb_run.Run(api=api) assert run.get_url() == "https://app.wandb.ai/cool/uncategorized/runs/"+run.id+"?apiKey="+"X"*40 assert run.get_project_url() == "https://app.wandb.ai/cool/uncategorized?apiKey="+"X"*40 api.set_setting("entity", "") with pytest.raises(CommError): run.get_url()
def _command_run(self, command): logger.info('Agent starting run with config:\n' + '\n'.join([ '\t{}: {}'.format(k, v['value']) for k, v in command['args'].items() ])) if self._in_jupyter: print('wandb: Agent Starting Run: {} with config:\n'.format( command.get('run_id')) + '\n'.join([ '\t{}: {}'.format(k, v['value']) for k, v in command['args'].items() ])) run = wandb_run.Run(mode='run', sweep_id=self._sweep_id, storage_id=command.get('run_storage_id'), run_id=command.get('run_id')) # save the the wandb config to reflect the state of the run that the # the server generated. run.config.set_run_dir(run.dir) run.config.update({k: v['value'] for k, v in command['args'].items()}) env = dict(os.environ) sweep_env = command.get('env', {}) env.update(sweep_env) run.set_environment(env) flags = [ "--{}={}".format(name, config['value']) for name, config in command['args'].items() ] if self._function: proc = AgentProcess(function=self._function, env=env, run_id=command.get('run_id'), in_jupyter=self._in_jupyter) else: command_list = ['/usr/bin/env' ] if platform.system() != "Windows" else [] command_list += ['python', command['program']] + flags proc = AgentProcess(command=command_list, env=env) self._run_processes[run.id] = proc # we keep track of when we sent the sigterm to give processes a chance # to handle the signal before sending sigkill every heartbeat self._run_processes[run.id].last_sigterm_time = None self._last_report_time = None
def _command_run(self, command): logger.info('Agent starting run with config:\n' + '\n'.join(['\t{}: {}'.format(k, v['value']) for k, v in command['args'].items()])) if self._in_jupyter: print('wandb: Agent Starting Run: {} with config:\n'.format(command.get('run_id')) + '\n'.join(['\t{}: {}'.format(k, v['value']) for k, v in command['args'].items()])) run = wandb_run.Run(mode='run', sweep_id=self._sweep_id, storage_id=command.get('run_storage_id'), run_id=command.get('run_id')) # save the the wandb config to reflect the state of the run that the # the server generated. run.config.set_run_dir(run.dir) run.config.update({k: v['value'] for k, v in command['args'].items()}) env = dict(os.environ) sweep_env = command.get('env', {}) env.update(sweep_env) run.set_environment(env) flags = ["--{}={}".format(name, config['value']) for name, config in command['args'].items()] if self._function: proc = AgentProcess(function=self._function, env=env, run_id=command.get('run_id'), in_jupyter=self._in_jupyter) else: sweep_vars = dict(interpretter=["python"], program=[command['program']], args=flags, env=["/usr/bin/env"]) if platform.system() == "Windows": del sweep_vars["env"] command_list = [] sweep_command = self._sweep_command or ["${env}", "${interpretter}", "${program}", "${args}"] for c in sweep_command: if c.startswith("${") and c.endswith("}"): replace_list = sweep_vars.get(c[2:-1]) command_list += replace_list or [] else: command_list += [c] logger.info('About to run command: {}'.format(' '.join(command_list))) proc = AgentProcess(command=command_list, env=env) self._run_processes[run.id] = proc # we keep track of when we sent the sigterm to give processes a chance # to handle the signal before sending sigkill every heartbeat self._run_processes[run.id].last_sigterm_time = None self._last_report_time = None
def run(ctx, program, args, id, resume, dir, configs, message, name, notes, show, tags, run_group, job_type): wandb.ensure_configured() if configs: config_paths = configs.split(',') else: config_paths = [] config = Config(config_paths=config_paths, wandb_dir=dir or wandb.wandb_dir()) tags = [tag for tag in tags.split(",") if tag] if tags else None run = wandb_run.Run(run_id=id, mode='clirun', config=config, description=message, program=program, tags=tags, group=run_group, job_type=job_type, name=name, notes=notes, resume=resume) run.enable_logging() environ = dict(os.environ) if configs: environ[env.CONFIG_PATHS] = configs if show: environ[env.SHOW_RUN] = 'True' run.check_anonymous() try: rm = run_manager.RunManager(run) rm.init_run(environ) except run_manager.Error: exc_type, exc_value, exc_traceback = sys.exc_info() wandb.termerror( 'An Exception was raised during setup, see %s for full traceback.' % util.get_log_file_path()) wandb.termerror(str(exc_value)) if 'permission' in str(exc_value): wandb.termerror( 'Are you sure you provided the correct API key to "wandb login"?' ) lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.error('\n'.join(lines)) sys.exit(1) rm.run_user_process(program, args, environ)
def test_history_updates_keys_until_summary_writes(git_repo): run = wandb_run.Run() run.history.add({'a': 5, 'b': 9}) assert get_last_val(run.history, 'a') == 5 assert get_last_val(run.history, 'b') == 9 assert run.summary['a'] == 5 assert run.summary['b'] == 9 run.history.add({'a': 6, 'b': 10}) assert get_last_val(run.history, 'a') == 6 assert get_last_val(run.history, 'b') == 10 assert run.summary['a'] == 6 assert run.summary['b'] == 10 run.summary['a'] = 112491 assert run.summary['a'] == 112491 run.history.add({'a': 1, 'b': 3}) assert get_last_val(run.history, 'a') == 1 assert get_last_val(run.history, 'b') == 3 assert run.summary['a'] == 112491 assert run.summary['b'] == 3 run.history.add({'a': -40, 'b': -49}) assert get_last_val(run.history, 'a') == -40 assert get_last_val(run.history, 'b') == -49 assert run.summary['a'] == 112491 # most recent history key is logged assert run.summary['b'] == -49 run.summary['c'] = 100 assert run.summary['c'] == 100 run.history.add({'c': 200, 'd': 300}) assert run.summary['c'] == 100 assert run.summary['d'] == 300 run.history.add({'a': 1000, 'b': 2000, 'c': 200, 'd': 300}) assert get_last_val(run.history, 'a') == 1000 assert get_last_val(run.history, 'b') == 2000 assert get_last_val(run.history, 'c') == 200 assert get_last_val(run.history, 'd') == 300 assert run.summary['a'] == 112491 assert run.summary['b'] == 2000 assert run.summary['c'] == 100 assert run.summary['d'] == 300
def run(ctx, program, args, id, resume, dir, configs, message, show): api.ensure_configured() if configs: config_paths = configs.split(',') else: config_paths = [] config = Config(config_paths=config_paths, wandb_dir=dir or wandb.wandb_dir()) run = wandb_run.Run(run_id=id, mode='clirun', config=config, description=message, program=program, resume=resume) api.set_current_run_id(run.id) env = dict(os.environ) if configs: env['WANDB_CONFIG_PATHS'] = configs if show: env['WANDB_SHOW_RUN'] = 'True' try: rm = run_manager.RunManager(api, run) rm.init_run(env) except run_manager.Error: exc_type, exc_value, exc_traceback = sys.exc_info() wandb.termerror( 'An Exception was raised during setup, see %s for full traceback.' % util.get_log_file_path()) wandb.termerror(str(exc_value)) if 'permission' in str(exc_value): wandb.termerror( 'Are you sure you provided the correct API key to "wandb login"?' ) lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.error('\n'.join(lines)) sys.exit(1) rm.run_user_process(program, args, env)
def summary(): with CliRunner().isolated_filesystem(): run = wandb_run.Run() run.summary.update({"foo": "init"}) yield run.summary