def wait_for_application(app, vms, timeout=None): """Wait until an is UP and connectable over ssh.""" console.start_progressbar(textwrap.dedent("""\ Waiting until application is ready... Progress: 'P' = Publishing, 'S' = Starting, 'C' = Connecting ===> """)) # XXX: At first boot cloud-init deploys our authorized keys file. # This process can finish after ssh has started up. The images # need to be fixed to ensure cloud-init has finished before ssh # starts up. state = get_application_state(app) if timeout: timeleft = max(120, timeout) # anything < 120 does not make sense start = time.time() else: timeleft = None if state == 'PUBLISHING': if timeleft: timeleft -= 30 # Fudge factor. When an application is first started up, ssh needs to # create its ssh host keys. In theory this wait should not be necessary # as ssh binds to port 22 after creating the host keys. In practise, # something doesn't quite work our and it is needed. Needs more # investigation to understand. For now, take the easy way.. extra_sleep = 30 else: extra_sleep = 0 console.debug('State {0}, extra sleep {1}.', state, extra_sleep) app = wait_until_application_is_in_state(app, 'STARTED', timeleft) if timeleft: timeleft = max(0, timeleft - (time.time()-start)) wait_until_application_accepts_ssh(app, vms, timeleft) console.end_progressbar('DONE') time.sleep(extra_sleep) return app
def synchronize_on_task(taskname, timeout=600): """Wait until all instances of ``taskname`` have completed. Returns a dictionary with the shared state of the VMs that were waited for. """ waitfor = set() for vmdef in env.appdef['vms']: if vmdef['name'] not in env.vms: continue for taskdef in vmdef['tasks']: if taskdef['name'] == taskname: waitfor.add(vmdef['name']) state = {} end_time = time.time() + timeout while True: for vmdef in env.appdef['vms']: vmname = vmdef['name'] vmstate = env.shared_state[vmname] # resync if vmstate['exited']: state[vmname] = vmstate break if vmname not in waitfor: continue if taskname in vmstate['completed_tasks']: waitfor.remove(vmname) state[vmname] = vmstate if vmstate['exited'] or not waitfor: break console.debug('Waiting for %s' % repr(waitfor)) time.sleep(5) if time.time() > end_time: error.raise_error("Timeout waiting for task `{0}`.", taskname) return state
def wait_until_application_accepts_ssh(app, vms, timeout=None, poll_timeout=None): """Wait until an application is reachable by ssh. An application is reachable by SSH if all the VMs that have a public key in their userdata are connect()able on port 22. """ if timeout is None: timeout = 300 if poll_timeout is None: poll_timeout = 5 waitaddrs = set((vm['dynamicMetadata']['externalIp'] for vm in app['vms'] if vm['name'] in vms)) aliveaddrs = set() end_time = time.time() + timeout # For the intricate details on non-blocking connect()'s, see Stevens, # UNIX network programming, volume 1, chapter 16.3 and following. while True: if time.time() > end_time: break waitfds = {} for addr in waitaddrs: sock = socket.socket() sock.setblocking(False) try: sock.connect((addr, 22)) except socket.error as e: if e.errno not in nb_connect_errors: console.debug('connect(): errno {.errno}'.format(e)) continue waitfds[sock.fileno()] = (sock, addr) poll_end_time = time.time() + poll_timeout while True: timeout = poll_end_time - time.time() if timeout < 0: for fd in waitfds: sock, _ = waitfds[fd] sock.close() break try: wfds = list(waitfds) _, wfds, _ = select.select([], wfds, [], timeout) except select.error as e: if e.args[0] == errno.EINTR: continue console.debug('select(): errno {.errno}'.format(e)) raise for fd in wfds: assert fd in waitfds sock, addr = waitfds[fd] try: err = sock.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) except socket.error as e: err = e.errno sock.close() if not err: aliveaddrs.add(addr) waitaddrs.remove(addr) del waitfds[fd] if not waitfds: break if not waitaddrs: return console.show_progress('C') # 'C' = Connecting time.sleep(max(0, poll_end_time - time.time())) unreachable = set((vm['name'] for vm in app['vms'] if vm['dynamicMetadata']['externalIp'] in waitaddrs)) noun = inflect.plural_noun('VM', len(unreachable)) vmnames = '`{0}`'.format('`, `'.join(sorted(unreachable))) error.raise_error('{0} `{1}` did not become reachable within {2} seconds.', noun, vmnames, timeout)
def debug(message, *args, **kwargs): message = message.format(*args, **kwargs) console.debug('[VM {0}] {1}', vmname, message)
def do_ssh(args, env): """The "ravello ssh" command.""" with env.let(quiet=True): login.default_login() keypair.default_keypair() if manifest.manifest_exists(): with env.let(quiet=True): manif = manifest.default_manifest() else: manif = None parts = args.application.split(':') if len(parts) in (1, 2) and manif is None: error.raise_error('No manifest found ({0}).\n' 'Please specify the fully qualified app name.\n' 'Use `ravtest ps --all` for a list.', manifest.manifest_name()) if len(parts) in (1, 2): project = manif['project']['name'] console.info('Project name is `{0}`.', project) defname = parts[0] instance = parts[1] if len(parts) == 2 else None elif len(parts) == 3: project, defname, instance = parts else: error.raise_error('Illegal application name: `{0}`.', appname) apps = cache.find_applications(project, defname, instance) if len(apps) == 0: error.raise_error('No instances of application `{0}` exist.', defname) elif len(apps) > 1: error.raise_error('Multiple instances of `{0}` exist.\n' 'Use `ravtest ps` to list the instances and then\n' 'specify the application with its instance id.', defname) app = cache.get_application(apps[0]['id']) appname = app['name'] _, _, instance = appname.split(':') vmname = args.vm vm = application.get_vm(app, vmname) if vm is None: error.raise_error('Application `{0}:{1}` has no VM named `{2}`.\n' 'Use `ravtest ps --full` to see a list of VMs.', defname, instance, vmname) console.info("Connecting to VM `{0}` of application `{1}:{2}`...", vmname, defname, instance) # Start up the application and wait for it if we need to. state = application.get_application_state(app) if state not in ('PUBLISHING', 'STARTING', 'STOPPED', 'STARTED'): error.raise_error("VM `{0}` is in an unknown state.", vmname) userdata = vm.get('customVmConfigurationData', {}) vmkey = userdata.get('keypair', {}) if vmkey.get('id') != env.public_key['id']: error.raise_error("VM uses unknown public key `{0}`.", vmkey.get('name')) application.start_application(app) application.wait_for_application(app, [vmname]) # Now run ssh. Prefer openssh but fall back to using Fabric/Paramiko. host = 'ravello@{0}'.format(vm['dynamicMetadata']['externalIp']) command = '~/bin/run {0}'.format(args.testid) openssh = util.find_openssh() interactive = os.isatty(sys.stdin.fileno()) if interactive and openssh: if not sys.platform.startswith('win'): # On Unix use execve(). This is the most efficient. argv = ['ssh', '-i', env.private_key_file, '-o', 'UserKnownHostsFile=/dev/null', '-o', 'StrictHostKeyChecking=no', '-o', 'LogLevel=quiet', '-t', host, command] console.debug('Starting {0}', ' '.join(argv)) os.execve(openssh, argv, os.environ) else: # Windows has execve() but for some reason it does not work # well with arguments with spaces in it. So use subprocess # instead. command = [openssh, '-i', env.private_key_file, '-o', 'UserKnownHostsFile=NUL', '-o', 'StrictHostKeyChecking=no', '-o', 'LogLevel=quiet', '-t', host, command] ssh = subprocess.Popen(command) ret = ssh.wait() error.exit(ret) # TODO: should also support PuTTY on Windows console.info(textwrap.dedent("""\ Warning: no local openssh installation found. Falling back to Fabric/Paramiko for an interactive shell. However, please note: * CTRL-C and terminal resize signals may not work. * Output of programs that repaint the screen may be garbled (e.g. progress bars). """)) fab.env.host_string = host fab.env.key_filename = env.private_key_file fab.env.disable_known_hosts = True fab.env.remote_interrupt = True fab.env.output_prefix = None fabric.state.output.running = None fabric.state.output.status = None ret = fab.run(command, warn_only=True) return ret.return_code