def _handle_status_error(self, e): output = e.output.decode() if "NoSuchBucket" in output: raise remotelib.OperationError( "%s is not available - %s does not exist" % (self.name, self.bucket)) else: raise remotelib.OperationError("%s is not available: %s" % (self.name, output))
def _ssh_host(self): try: host = self._output("host") except LookupError: raise remotelib.OperationError( "cannot get host for %s - is the remote started?" % self.name) else: if not host: raise remotelib.OperationError( "cannot get host for %s - the instance " "appears to be stopped" % self.name) return host
def _op_src(opspec): opdef = op_util.opdef_for_opspec(opspec) src = opdef.guildfile.dir if src is None: return None if not os.path.isdir(src): raise remotelib.OperationError( "cannot find source location for operation '%s'" % opspec) if not os.path.exists(os.path.join(src, "guild.yml")): raise remotelib.OperationError( "source location for operation '%s' (%s) does not " "contain guild.yml" % (opspec, src)) return src
def _op_src(opspec): from guild.commands import run_impl model, _op_name = run_impl.resolve_model_op(opspec) src = model.modeldef.guildfile.dir if src is None: return None if not os.path.isdir(src): raise remotelib.OperationError( "cannot find source location for operation '%s'" % opspec) if not os.path.exists(os.path.join(src, "guild.yml")): raise remotelib.OperationError( "source location for operation '%s' (%s) does not " "contain guild.yml" % (opspec, src)) return src
def _terraform_destroy(self): cmd = ["terraform", "destroy", "-auto-approve"] result = subprocess.call(cmd, cwd=self.working_dir) if result != 0: raise remotelib.OperationError( "error destroying Terraform state in %s" % self.working_dir)
def _terraform_apply(self): cmd = ["terraform", "apply", "-auto-approve"] result = subprocess.call(cmd, cwd=self.working_dir) if result != 0: raise remotelib.OperationError( "error applying Terraform config in %s" % self.working_dir)
def _aws_cmd(): cmd = util.which("aws") if not cmd: raise remotelib.OperationError( "AWS Command Line Interface (CLI) is not available\n" "Refer to https://docs.aws.amazon.com/cli for help installing it.") return cmd
def _create_gist(gist_remote_user, gist_remote_name, gist_readme_name, env): import requests access_token = _required_gist_access_token(env) content = _gist_readme_content(gist_remote_user, gist_remote_name) data = { "accept": "application/vnd.github.v3+json", "description": "Guild AI Repository", "public": True, "files": { gist_readme_name: { "filename": gist_readme_name, "type": "text/markdown", "language": "Markdown", "content": content, } }, } headers = { "Authorization": "token %s" % access_token, } resp = requests.post("https://api.github.com/gists", json=data, headers=headers) if resp.status_code not in (200, 201): raise remotelib.OperationError( "error creating gist: (%i) %s" % (resp.status_code, resp.text) ) return resp.json()
def _validate_extracted_run(dir, run_id, archive): # RUN_DIR/.guild/opref is required for a run. extracted_run_dir = os.path.join(dir, run_id) opref_path = os.path.join(extracted_run_dir, ".guild", "opref") if not os.path.exists(opref_path): log.error("%s does not contain expected run %s", archive, run_id) raise remotelib.OperationError("invalid run archive in gist") return extracted_run_dir
def _ensure_terraform_init(self): if os.path.exists(os.path.join(self.working_dir, ".terraform")): return cmd = ["terraform", "init"] result = subprocess.call(cmd, cwd=self.working_dir) if result != 0: raise remotelib.OperationError( "unable to initialize Terraform in %s" % self.working_dir)
def _azcopy_cmd(): cmd = util.which("azcopy") if not cmd: raise remotelib.OperationError( "AzCopy is not available\n" "Refer to https://docs.microsoft.com/en-us/azure/storage/" "common/storage-use-azcopy-v10 for help installing it.") return cmd
def _git_cmd(): cmd = util.which("git") if not cmd: raise remotelib.OperationError( "git command is not available\n" "Refer to https://git-scm.com/book/en/v2/Getting-Started-Installing-Git " "for help installing it." ) return cmd
def _output(self, name): cmd = ["terraform", "output", "-json"] try: out = subprocess.check_output(cmd, cwd=self.working_dir) except subprocess.CalledProcessError: raise remotelib.OperationError( "unable to get Terraform output in %s" % self.working_dir) else: output = json.loads(out.decode()) return output[name]["value"]
def _init_remote_restart_run_dir(self, remote_run_id): run_dir = os.path.join(self.guild_home, "runs", remote_run_id) cmd = ("set -e; " "test ! -e {run_dir}/.guild/LOCK || exit 3; " "touch {run_dir}/.guild/PENDING; " "echo \"$(date +%s)000000\" > {run_dir}/.guild/attrs/started". format(run_dir=run_dir)) log.info("Initializing remote run for restart") try: self._ssh_cmd(cmd) except remotelib.RemoteProcessError as e: if e.exit_status == 3: raise remotelib.OperationError("running", remote_run_id) raise else: return run_dir
def _delete_gist(gist, env): import requests access_token = _required_gist_access_token(env) data = { "accept": "application/vnd.github.v3+json", "gist_id": gist["id"], } headers = { "Authorization": "token %s" % access_token, } resp = requests.delete( "https://api.github.com/gists/%s" % gist["id"], json=data, headers=headers ) if resp.status_code not in (200, 204): raise remotelib.OperationError( "error creating gist: (%i) %s" % (resp.status_code, resp.text) )
def start(self): remote_util.remote_activity("Getting %s status", self.name) try: gist = self._repo_gist() except NoSuchGist: log.info("Creating gist") gist = self._create_gist() log.info( "Created %s (gist %s) for user %s", self.name, gist["id"], self.user, ) self._sync_runs_meta() else: raise remotelib.OperationError( "%s (gist %s) already exists for user %s" % (self.name, gist["id"], self.user) )
def stop(self): log.info("Deleting S3 bucket %s", self.bucket) try: self._s3_cmd("rb", ["--force", "s3://%s" % self.bucket]) except remotelib.RemoteProcessError: raise remotelib.OperationError()
def start(self): log.info("Creating S3 bucket %s", self.bucket) try: self._s3_cmd("mb", ["s3://%s" % self.bucket]) except remotelib.RemoteProcessError: raise remotelib.OperationError()
def require_env(name): if name not in os.environ: raise remotelib.OperationError( "missing required %s environment variable" % name)
def _verify_terraform(): if not util.which("terraform"): raise remotelib.OperationError( "Terraform is required for this operation - refer to " "https://www.terraform.io/intro/getting-started/install.html " "for more information.")