def __init__(self, settings=None, interface=None): self._settings = settings self.data = {} self.fname = os.path.join(self._settings.files_dir, METADATA_FNAME) self._interface = interface self._git = GitRepo( remote=self._settings["git_remote"] if "git_remote" in self._settings.keys() else "origin") # Location under "code" directory in files where program was saved. self._saved_program = None # Locations under files directory where diff patches were saved. self._saved_patches = []
def _init_run(self, run, config_dict): # We subtract the previous runs runtime when resuming start_time = run.start_time.ToSeconds() - self._resume_state["runtime"] repo = GitRepo(remote=self._settings.git_remote) # TODO: we don't check inserted currently, ultimately we should make # the upsert know the resume state and fail transactionally server_run, inserted = self._api.upsert_run( name=run.run_id, entity=run.entity or None, project=run.project or None, group=run.run_group or None, job_type=run.job_type or None, display_name=run.display_name or None, notes=run.notes or None, tags=run.tags[:] or None, config=config_dict or None, sweep_name=run.sweep_id or None, host=run.host or None, program_path=self._settings.program or None, repo=repo.remote_url, commit=repo.last_commit, ) self._run = run self._run.starting_step = self._resume_state["step"] self._run.start_time.FromSeconds(start_time) self._run.config.CopyFrom(self._interface._make_config(config_dict)) if self._resume_state["summary"] is not None: self._run.summary.CopyFrom( self._interface._make_summary_from_dict( self._resume_state["summary"])) storage_id = server_run.get("id") if storage_id: self._run.storage_id = storage_id id = server_run.get("name") if id: self._api.set_current_run_id(id) display_name = server_run.get("displayName") if display_name: self._run.display_name = display_name project = server_run.get("project") # TODO: remove self._api.set_settings, and make self._project a property? if project: project_name = project.get("name") if project_name: self._run.project = project_name self._project = project_name self._api_settings["project"] = project_name self._api.set_setting("project", project_name) entity = project.get("entity") if entity: entity_name = entity.get("name") if entity_name: self._run.entity = entity_name self._entity = entity_name self._api_settings["entity"] = entity_name self._api.set_setting("entity", entity_name) sweep_id = server_run.get("sweepName") if sweep_id: self._run.sweep_id = sweep_id
def git_repo(runner): with runner.isolated_filesystem(): r = git.Repo.init(".") mkdir_exists_ok("wandb") # Because the forked process doesn't use my monkey patch above with open("wandb/settings", "w") as f: f.write("[default]\nproject: test") open("README", "wb").close() r.index.add(["README"]) r.index.commit("Initial commit") yield GitRepo(lazy=False)
def _get_program_relpath_from_gitrepo(program): repo = GitRepo() root = repo.root if not root: root = os.getcwd() full_path_to_program = os.path.join( root, os.path.relpath(os.getcwd(), root), program ) if os.path.exists(full_path_to_program): relative_path = os.path.relpath(full_path_to_program, start=root) if "../" in relative_path: logger.warning("could not save program above cwd: %s" % program) return None return relative_path logger.warning("could not find program at %s" % program) return None
def auto_project_name(program): # if we're in git, set project name to git repo name + relative path within repo root_dir = GitRepo().root_dir if root_dir is None: return "uncategorized" # On windows, GitRepo returns paths in unix style, but os.path is windows # style. Coerce here. root_dir = to_native_slash_path(root_dir) repo_name = os.path.basename(root_dir) if program is None: return repo_name if not os.path.isabs(program): program = os.path.join(os.curdir, program) prog_dir = os.path.dirname(os.path.abspath(program)) if not prog_dir.startswith(root_dir): return repo_name project = repo_name sub_path = os.path.relpath(prog_dir, root_dir) if sub_path != '.': project += '-' + sub_path return project.replace(os.sep, '_')
class Meta(object): """Used to store metadata during and after a run.""" def __init__(self, settings=None, interface=None): self._settings = settings self.data = {} self.fname = os.path.join(self._settings.files_dir, METADATA_FNAME) self._interface = interface self._git = GitRepo( remote=self._settings["git_remote"] if "git_remote" in self._settings.keys() else "origin") # Location under "code" directory in files where program was saved. self._saved_program = None # Locations under files directory where diff patches were saved. self._saved_patches = [] def _save_pip(self): """Saves the current working set of pip packages to {REQUIREMENTS_FNAME}""" try: import pkg_resources installed_packages = [d for d in iter(pkg_resources.working_set)] installed_packages_list = sorted( ["%s==%s" % (i.key, i.version) for i in installed_packages]) with open( os.path.join(self._settings.files_dir, REQUIREMENTS_FNAME), "w") as f: f.write("\n".join(installed_packages_list)) except Exception: logger.error("Error saving pip packages") def _save_code(self): if self._settings.program_relpath is None: logger.warning("unable to save code -- program entry not found") return root = self._git.root or os.getcwd() program_relative = self._settings.program_relpath util.mkdir_exists_ok( os.path.join(self._settings.files_dir, "code", os.path.dirname(program_relative))) program_absolute = os.path.join(root, program_relative) if not os.path.exists(program_absolute): logger.warning("unable to save code -- can't find %s" % program_absolute) return saved_program = os.path.join(self._settings.files_dir, "code", program_relative) self._saved_program = program_relative if not os.path.exists(saved_program): copyfile(program_absolute, saved_program) def _save_patches(self): """Save the current state of this repository to one or more patches. Makes one patch against HEAD and another one against the most recent commit that occurs in an upstream branch. This way we can be robust to history editing as long as the user never does "push -f" to break history on an upstream branch. Writes the first patch to <files_dir>/<DIFF_FNAME> and the second to <files_dir>/upstream_diff_<commit_id>.patch. """ if not self._git.enabled: return False try: root = self._git.root diff_args = ["git", "diff"] if self._git.has_submodule_diff: diff_args.append("--submodule=diff") if self._git.dirty: patch_path = os.path.join(self._settings.files_dir, DIFF_FNAME) with open(patch_path, "wb") as patch: # we diff against HEAD to ensure we get changes in the index subprocess.check_call(diff_args + ["HEAD"], stdout=patch, cwd=root, timeout=5) self._saved_patches.append( os.path.relpath(patch_path, start=self._settings.files_dir)) upstream_commit = self._git.get_upstream_fork_point() if upstream_commit and upstream_commit != self._git.repo.head.commit: sha = upstream_commit.hexsha upstream_patch_path = os.path.join( self._settings.files_dir, "upstream_diff_{}.patch".format(sha)) with open(upstream_patch_path, "wb") as upstream_patch: subprocess.check_call(diff_args + [sha], stdout=upstream_patch, cwd=root, timeout=5) self._saved_patches.append( os.path.relpath(upstream_patch_path, start=self._settings.files_dir)) # TODO: A customer saw `ValueError: Reference at 'refs/remotes/origin/foo' # does not exist` so we now catch ValueError. Catching this error feels # too generic. except ( ValueError, subprocess.CalledProcessError, subprocess.TimeoutExpired, ) as e: logger.error("Error generating diff: %s" % e) def _setup_sys(self): self.data["os"] = self._settings._os self.data["python"] = self._settings._python self.data["heartbeatAt"] = datetime.utcnow().isoformat() self.data["startedAt"] = datetime.utcfromtimestamp( self._settings._start_time).isoformat() self.data["docker"] = self._settings.docker try: pynvml.nvmlInit() self.data["gpu"] = pynvml.nvmlDeviceGetName( pynvml.nvmlDeviceGetHandleByIndex(0)).decode("utf8") self.data["gpu_count"] = pynvml.nvmlDeviceGetCount() except pynvml.NVMLError: pass try: self.data["cpu_count"] = multiprocessing.cpu_count() except NotImplementedError: pass self.data["cuda"] = self._settings._cuda self.data["args"] = self._settings._args self.data["state"] = "running" def _setup_git(self): if self._git.enabled: self.data["git"] = { "remote": self._git.remote_url, "commit": self._git.last_commit, } self.data["email"] = self._git.email self.data[ "root"] = self._git.root or self.data["root"] or os.getcwd() def probe(self): self._setup_sys() if not self._settings.disable_code: if self._settings.program_relpath is not None: self.data["codePath"] = self._settings.program_relpath if self._settings.program is not None: self.data["program"] = self._settings.program else: self.data["program"] = "<python with no main file>" if self._settings._jupyter: if self._settings.notebook_name: self.data["program"] = self._settings.notebook_name else: if self._settings._jupyter_path: if "fileId=" in self._settings._jupyter_path: self.data["colab"] = ( "https://colab.research.google.com/drive/" + self._settings._jupyter_path.split( # noqa "fileId=")[1]) self.data[ "program"] = self._settings._jupyter_name else: self.data[ "program"] = self._settings._jupyter_path self.data[ "root"] = self._settings._jupyter_root self._setup_git() if self._settings.anonymous != "true": self.data["host"] = self._settings.host self.data["username"] = self._settings.username self.data["executable"] = sys.executable else: self.data.pop("email", None) self.data.pop("root", None) if self._settings.save_code: self._save_code() self._save_patches() if self._settings._save_requirements: self._save_pip() def write(self): with open(self.fname, "w") as f: s = json.dumps(self.data, indent=4) f.write(s) f.write("\n") base_name = os.path.basename(self.fname) files = dict(files=[(base_name, "now")]) if self._saved_program: saved_program = os.path.join("code", self._saved_program) files["files"].append((saved_program, "now")) for patch in self._saved_patches: files["files"].append((patch, "now")) self._interface.publish_files(files)
def test_no_remote(self): assert not GitRepo(remote=None).enabled
def test_no_repo(self): assert not GitRepo(root="/tmp").enabled