def _initialize(self): """Tries to initialize the task.""" self.log("Running initialize") self.initialize_called = True while True: public_ip = self.public_ip # todo: add retry logic to public_ip property if public_ip: break print( f"Trying to initialize, but task {self.name} doesn't have public_ip, sleeping" ) time.sleep(TIMEOUT_SEC) while True: self.ssh_client = u.ssh_to_host(self.public_ip, self.keypair_fn, self.username) if self.ssh_client is None: self.log("SSH into %s:%s failed, retrying in %d seconds" % (self.job.name, self.id, TIMEOUT_SEC)) time.sleep(TIMEOUT_SEC) else: break # todo: install tmux self._setup_tmux() self.run('mkdir -p ' + self.remote_scratch) if not self.skip_efs_mount: self._mount_efs() # run initialization commands here if self._is_initialized_file_present(): self.log("reusing previous initialized state") elif self.install_script: self.log("running install script") self.install_script += '\necho ok > /tmp/is_initialized\n' self.file_write('install.sh', u._add_echo(self.install_script)) self.run('bash -e install.sh', max_wait_sec=2400) # fail on errors # TODO(y): propagate error messages printed on console to the user # right now had to log into tmux to see it assert self._is_initialized_file_present() else: self.log('No install script. Skipping to end') # installation happens through user-data instead of install script # if neither one is passed, manually create is_initialized self.run('echo ok > /tmp/is_initialized') self.connect_instructions = """ ssh -i %s -o StrictHostKeyChecking=no %s@%s tmux a """.strip() % (self.keypair_fn, self.username, self.public_ip) self.initialized = True self.log("Initialize complete") self.log(self.connect_instructions)
def _initialize(self): """Tries to initialize the task.""" self.log("Running initialize") self.initialize_called = True public_ip = self.public_ip # todo: add retry logic to public_ip property while True: self.ssh_client = u.ssh_to_host(self.public_ip, self.keypair_fn, self.username) if self.ssh_client is None: self.log("SSH into %s:%s failed, retrying in %d seconds" % (self.job.name, self.id, TIMEOUT_SEC)) time.sleep(TIMEOUT_SEC) else: break # todo: install tmux self._setup_tmux() self.run('mkdir -p ' + self.remote_scratch) self._mount_efs() # run initialization commands here if self._is_initialized_file_present(): self.log("reusing previous initialized state") else: self.log("running install script") self.install_script += '\necho ok > /tmp/is_initialized\n' self.file_write('install.sh', u._add_echo(self.install_script)) self.run('bash -e install.sh') # fail on errors # TODO(y): propagate error messages printed on console to the user # right now had to log into tmux to see it assert self._is_initialized_file_present() self.connect_instructions = """ ssh -i %s -o StrictHostKeyChecking=no %s@%s tmux a """.strip() % (self.keypair_fn, self.username, self.public_ip) self.log("Initialize complete")