def run(ctx, program, args, id, resume, dir, configs, message, name, notes, show, tags, run_group, job_type): wandb.ensure_configured() if configs: config_paths = configs.split(',') else: config_paths = [] config = Config(config_paths=config_paths, wandb_dir=dir or wandb.wandb_dir()) tags = [tag for tag in tags.split(",") if tag] if tags else None # populate run parameters from env if not specified id = id or os.environ.get(env.RUN_ID) message = message or os.environ.get(env.DESCRIPTION) tags = tags or env.get_tags() run_group = run_group or os.environ.get(env.RUN_GROUP) job_type = job_type or os.environ.get(env.JOB_TYPE) name = name or os.environ.get(env.NAME) notes = notes or os.environ.get(env.NOTES) resume = resume or os.environ.get(env.RESUME) run = wandb_run.Run(run_id=id, mode='clirun', config=config, description=message, program=program, tags=tags, group=run_group, job_type=job_type, name=name, notes=notes, resume=resume) run.enable_logging() environ = dict(os.environ) if configs: environ[env.CONFIG_PATHS] = configs if show: environ[env.SHOW_RUN] = 'True' if not run.api.api_key: util.prompt_api_key(run.api, input_callback=click.prompt) try: rm = run_manager.RunManager(run) rm.init_run(environ) except run_manager.Error: exc_type, exc_value, exc_traceback = sys.exc_info() wandb.termerror( 'An Exception was raised during setup, see %s for full traceback.' % util.get_log_file_path()) wandb.termerror(str(exc_value)) if 'permission' in str(exc_value): wandb.termerror( 'Are you sure you provided the correct API key to "wandb login"?' ) lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.error('\n'.join(lines)) sys.exit(1) rm.run_user_process(program, args, environ)
def __init__(self, run_id=None, mode=None, dir=None, config=None, sweep_id=None, storage_id=None, description=None, resume=None, program=None, wandb_dir=None): # self.id is actually stored in the "name" attribute in GQL self.id = run_id if run_id else generate_id() self.resume = resume if resume else 'never' self.mode = mode if mode else 'run' self.program = program if not self.program: try: import __main__ self.program = __main__.__file__ except (ImportError, AttributeError): # probably `python -c`, an embedded interpreter or something self.program = '<python with no main file>' self.wandb_dir = wandb_dir if dir is None: self._dir = run_dir_path(self.id, dry=self.mode == 'dryrun') else: self._dir = os.path.abspath(dir) self._mkdir() if config is None: self.config = Config() else: self.config = config # this is the GQL ID: self.storage_id = storage_id # socket server, currently only available in headless mode self.socket = None if description is not None: self.description = description # An empty description.md may have been created by RunManager() so it's # important that we overwrite empty strings here. if not self.description: self.description = self.id self.sweep_id = sweep_id self._history = None self._events = None self._summary = None self._meta = None self._user_accessed_summary = False self._examples = None
def run(ctx, program, args, id, resume, dir, configs, message, name, notes, show, tags, run_group, job_type): wandb.ensure_configured() if configs: config_paths = configs.split(',') else: config_paths = [] config = Config(config_paths=config_paths, wandb_dir=dir or wandb.wandb_dir()) tags = [tag for tag in tags.split(",") if tag] if tags else None run = wandb_run.Run(run_id=id, mode='clirun', config=config, description=message, program=program, tags=tags, group=run_group, job_type=job_type, name=name, notes=notes, resume=resume) run.enable_logging() environ = dict(os.environ) if configs: environ[env.CONFIG_PATHS] = configs if show: environ[env.SHOW_RUN] = 'True' run.check_anonymous() try: rm = run_manager.RunManager(run) rm.init_run(environ) except run_manager.Error: exc_type, exc_value, exc_traceback = sys.exc_info() wandb.termerror( 'An Exception was raised during setup, see %s for full traceback.' % util.get_log_file_path()) wandb.termerror(str(exc_value)) if 'permission' in str(exc_value): wandb.termerror( 'Are you sure you provided the correct API key to "wandb login"?' ) lines = traceback.format_exception(exc_type, exc_value, exc_traceback) logger.error('\n'.join(lines)) sys.exit(1) rm.run_user_process(program, args, environ)
if __name__ == "__main__": # Parse for configuration file parser = argparse.ArgumentParser(description=None) parser.add_argument( "-c", "--config", type=str, default="vertical_nn.yaml", help="Select configuration file", ) args = vars(parser.parse_args()) # Config config = Config(config_paths=[args["config"]]) wandb.init(config=config, project="vertical") # Environment env = gym.make( config["environment"]["name"], obs_noise=config["environment"]["obsNoise"], init_rand=config["environment"]["initRand"], init_state=config["environment"]["initState"], delay=config["environment"]["delay"], reward_mods=config["environment"]["rewardMods"], state_bounds=[config["environment"]["altBounds"], None], total_steps=config["environment"]["steps"], goal_obs=config["environment"]["goalObs"], state_obs=config["environment"]["stateObs"], action_bounds=config["environment"]["actionBounds"],
def __init__(self, run_id=None, mode=None, dir=None, group=None, job_type=None, config=None, sweep_id=None, storage_id=None, description=None, resume=None, program=None, args=None, wandb_dir=None, tags=None, name=None, notes=None, api=None): """Create a Run. Arguments: description (str): This is the old, deprecated style of description: the run's name followed by a newline, followed by multiline notes. """ # self.storage_id is "id" in GQL. self.storage_id = storage_id # self.id is "name" in GQL. self.id = run_id if run_id else util.generate_id() # self._name is "display_name" in GQL. self._name = None self.notes = None self.resume = resume if resume else 'never' self.mode = mode if mode else 'run' self.group = group self.job_type = job_type self.pid = os.getpid() self.resumed = False # we set resume when history is first accessed if api: if api.current_run_id and api.current_run_id != self.id: raise RuntimeError( 'Api object passed to run {} is already being used by run {}' .format(self.id, api.current_run_id)) else: api.set_current_run_id(self.id) self._api = api if dir is None: self._dir = run_dir_path(self.id, dry=self.mode == 'dryrun') else: self._dir = os.path.abspath(dir) self._mkdir() # self.name and self.notes used to be combined into a single field. # Now if name and notes don't have their own values, we get them from # self._name_and_description, but we don't update description.md # if they're changed. This is to discourage relying on self.description # and self._name_and_description so that we can drop them later. # # This needs to be set before name and notes because name and notes may # influence it. They have higher precedence. self._name_and_description = None if description: wandb.termwarn( 'Run.description is deprecated. Please use wandb.init(notes="long notes") instead.' ) self._name_and_description = description elif os.path.exists(self.description_path): with open(self.description_path) as d_file: self._name_and_description = d_file.read() if name is not None: self.name = name if notes is not None: self.notes = notes self.program = program if not self.program: try: import __main__ self.program = __main__.__file__ except (ImportError, AttributeError): # probably `python -c`, an embedded interpreter or something self.program = '<python with no main file>' self.args = args if self.args is None: self.args = sys.argv[1:] self.wandb_dir = wandb_dir with configure_scope() as scope: self.project = self.api.settings("project") scope.set_tag("project", self.project) scope.set_tag("entity", self.entity) try: scope.set_tag("url", self.get_url(self.api, network=False) ) # TODO: Move this somewhere outside of init except CommError: pass if self.resume == "auto": util.mkdir_exists_ok(wandb.wandb_dir()) resume_path = os.path.join(wandb.wandb_dir(), RESUME_FNAME) with open(resume_path, "w") as f: f.write(json.dumps({"run_id": self.id})) if config is None: self.config = Config() else: self.config = config # socket server, currently only available in headless mode self.socket = None self.tags = tags if tags else [] self.sweep_id = sweep_id self._history = None self._events = None self._summary = None self._meta = None self._run_manager = None self._jupyter_agent = None
def __init__(self, run_id=None, mode=None, dir=None, group=None, job_type=None, config=None, sweep_id=None, storage_id=None, description=None, resume=None, program=None, args=None, wandb_dir=None, tags=None): # self.id is actually stored in the "name" attribute in GQL self.id = run_id if run_id else util.generate_id() self.display_name = self.id self.resume = resume if resume else 'never' self.mode = mode if mode else 'run' self.group = group self.job_type = job_type self.pid = os.getpid() self.resumed = False # we set resume when history is first accessed self.program = program if not self.program: try: import __main__ self.program = __main__.__file__ except (ImportError, AttributeError): # probably `python -c`, an embedded interpreter or something self.program = '<python with no main file>' self.args = args if self.args is None: self.args = sys.argv[1:] self.wandb_dir = wandb_dir with configure_scope() as scope: api = InternalApi() self.project = api.settings("project") self.entity = api.settings("entity") scope.set_tag("project", self.project) scope.set_tag("entity", self.entity) scope.set_tag("url", self.get_url(api)) if dir is None: self._dir = run_dir_path(self.id, dry=self.mode == 'dryrun') else: self._dir = os.path.abspath(dir) self._mkdir() if self.resume == "auto": util.mkdir_exists_ok(wandb.wandb_dir()) resume_path = os.path.join(wandb.wandb_dir(), RESUME_FNAME) with open(resume_path, "w") as f: f.write(json.dumps({"run_id": self.id})) if config is None: self.config = Config() else: self.config = config # this is the GQL ID: self.storage_id = storage_id # socket server, currently only available in headless mode self.socket = None self.name_and_description = "" if description is not None: self.name_and_description = description elif os.path.exists(self.description_path): with open(self.description_path) as d_file: self.name_and_description = d_file.read() self.tags = tags if tags else [] self.sweep_id = sweep_id self._history = None self._events = None self._summary = None self._meta = None self._run_manager = None self._jupyter_agent = None
def restore(ctx, run, no_git, branch, project, entity): if ":" in run: if "/" in run: entity, rest = run.split("/", 1) else: rest = run project, run = rest.split(":", 1) elif run.count("/") > 1: entity, run = run.split("/", 1) project, run = api.parse_slug(run, project=project) commit, json_config, patch_content, metadata = api.run_config( project, run=run, entity=entity) repo = metadata.get("git", {}).get("repo") image = metadata.get("docker") RESTORE_MESSAGE = """`wandb restore` needs to be run from the same git repository as the original run. Run `git clone %s` and restore from there or pass the --no-git flag.""" % repo if no_git: commit = None elif not api.git.enabled: if repo: raise ClickException(RESTORE_MESSAGE) elif image: wandb.termlog( "Original run has no git history. Just restoring config and docker" ) if commit and api.git.enabled: subprocess.check_call(['git', 'fetch', '--all']) try: api.git.repo.commit(commit) except ValueError: wandb.termlog("Couldn't find original commit: {}".format(commit)) commit = None files = api.download_urls(project, run=run, entity=entity) for filename in files: if filename.startswith('upstream_diff_') and filename.endswith( '.patch'): commit = filename[len('upstream_diff_'):-len('.patch')] try: api.git.repo.commit(commit) except ValueError: commit = None else: break if commit: wandb.termlog( "Falling back to upstream commit: {}".format(commit)) patch_path, _ = api.download_write_file(files[filename]) else: raise ClickException(RESTORE_MESSAGE) else: if patch_content: patch_path = os.path.join(wandb.wandb_dir(), 'diff.patch') with open(patch_path, "w") as f: f.write(patch_content) else: patch_path = None branch_name = "wandb/%s" % run if branch and branch_name not in api.git.repo.branches: api.git.repo.git.checkout(commit, b=branch_name) wandb.termlog("Created branch %s" % click.style(branch_name, bold=True)) elif branch: wandb.termlog( "Using existing branch, run `git branch -D %s` from master for a clean checkout" % branch_name) api.git.repo.git.checkout(branch_name) else: wandb.termlog("Checking out %s in detached mode" % commit) api.git.repo.git.checkout(commit) if patch_path: # we apply the patch from the repository root so git doesn't exclude # things outside the current directory root = api.git.root patch_rel_path = os.path.relpath(patch_path, start=root) # --reject is necessary or else this fails any time a binary file # occurs in the diff # we use .call() instead of .check_call() for the same reason # TODO(adrian): this means there is no error checking here subprocess.call(['git', 'apply', '--reject', patch_rel_path], cwd=root) wandb.termlog("Applied patch") # TODO: we should likely respect WANDB_DIR here. util.mkdir_exists_ok("wandb") config = Config(run_dir="wandb") config.load_json(json_config) config.persist() wandb.termlog("Restored config variables to %s" % config._config_path()) if image: if not metadata["program"].startswith("<") and metadata.get( "args") is not None: # TODO: we may not want to default to python here. runner = util.find_runner(metadata["program"]) or ["python"] command = runner + [metadata["program"]] + metadata["args"] cmd = " ".join(command) else: wandb.termlog( "Couldn't find original command, just restoring environment") cmd = None wandb.termlog("Docker image found, attempting to start") ctx.invoke(docker, docker_run_args=[image], cmd=cmd) return commit, json_config, patch_content, repo, metadata
def restore(run, branch, project, entity): project, run = api.parse_slug(run, project=project) commit, json_config, patch_content = api.run_config(project, run=run, entity=entity) subprocess.check_call(['git', 'fetch', '--all']) if commit: try: api.git.repo.commit(commit) except ValueError: click.echo("Couldn't find original commit: {}".format(commit)) commit = None files = api.download_urls(project, run=run, entity=entity) for filename in files: if filename.startswith('upstream_diff_') and filename.endswith( '.patch'): commit = filename[len('upstream_diff_'):-len('.patch')] try: api.git.repo.commit(commit) except ValueError: commit = None else: break if commit: click.echo( "Falling back to upstream commit: {}".format(commit)) patch_path, _ = api.download_write_file(files[filename]) else: raise ClickException( "Can't find commit from which to restore code") else: if patch_content: patch_path = os.path.join(wandb.wandb_dir(), 'diff.patch') with open(patch_path, "w") as f: f.write(patch_content) else: patch_path = None branch_name = "wandb/%s" % run if branch and branch_name not in api.git.repo.branches: api.git.repo.git.checkout(commit, b=branch_name) click.echo("Created branch %s" % click.style(branch_name, bold=True)) elif branch: click.secho( "Using existing branch, run `git branch -D %s` from master for a clean checkout" % branch_name, fg="red") api.git.repo.git.checkout(branch_name) else: click.secho("Checking out %s in detached mode" % commit) api.git.repo.git.checkout(commit) if patch_path: # we apply the patch from the repository root so git doesn't exclude # things outside the current directory root = api.git.root patch_rel_path = os.path.relpath(patch_path, start=root) # --reject is necessary or else this fails any time a binary file # occurs in the diff # we use .call() instead of .check_call() for the same reason # TODO(adrian): this means there is no error checking here subprocess.call(['git', 'apply', '--reject', patch_rel_path], cwd=root) click.echo("Applied patch") config = Config() config.load_json(json_config) config.persist() click.echo("Restored config variables")