def __init__(self, collect_existing_tpus=True, **kwargs): super().__init__(**kwargs) # Check for dependencies try: utils.call(["gcloud", "--version"]) except: raise Exception("Missing commandline utility: gcloud") self.tpu = TPUManager(self) self.resource_managers = [self.tpu]
def __init__(self, collect_existing_tpus=True, **kwargs): super().__init__(**kwargs) # Check for dependencies try: utils.call(["gcloud", "--version"]) except Exception as e: raise (e) if kwargs['config'].get('is_gcb', False): self._name = "cloud-build" self._zone = kwargs['config']['zone'] self.tpu = TPUManager(self) self.resource_managers = [self.tpu]
def get_all_tpu_names(self): _, r, _ = utils.call([ "gcloud", "compute", "tpus", "list", "--zone={}".format(self.zone) ]) lines = r.split("\n")[1:] lines = filter(lambda l: l != "", lines) names = [l.split()[0] for l in lines] return filter(lambda n: self.instance.name in n, names)
def details(self): _, r, _ = utils.call( ["gcloud", "compute", "tpus", "describe", "--zone={}".format(self.manager.zone), self.name]) r = r.split("\n") details = dict() for line in r: v = line.split(": ") if len(v) != 2: continue k, v = v details[k.strip()] = v.strip() return details
def _up(self, name, ip, preemptible, version, zone, background): logger.info("Trying to acquire TPU with name: {} ip: {}".format(name, ip)) cmd = [ "gcloud", "compute", "tpus", "create", name, "--range=10.0.{}.0".format(ip), "--accelerator-type={}".format(version), "--version={}".format(self.tf_version), "--network=default" ] if zone: cmd += ["--zone={}".format(zone)] if preemptible: cmd += ["--preemptible"] if background: cmd += ["--async"] s, _, err = utils.call(cmd) if s == 0: return TPU(name=name, manager=self) raise Exception("Failed to create TPU with name: {} ip: {} error: \n{}".format(name, ip, err))
def name(self): if getattr(self, '_name', None) is None: self._name = utils.call(["hostname"])[1].strip() return self._name