def list_instances(args): launcher = Launcher(region=get_region(args.ec2_region)) reservations = launcher.get_reservations() count = sum(len(r.instances) for r in reservations) print "%d instance%s%s" % (count, 's' if count != 1 else '', ':' if count > 0 else '') for r in reservations: for i in r.instances: print_instance(i)
def list_instances(args): launcher = Launcher(region=get_region(args.ec2_region)) reservations = launcher.get_reservations() count = sum(len(r.instances) for r in reservations) print "%d instance%s%s" % ( count, 's' if count != 1 else '', ':' if count > 0 else '' ) for r in reservations: for i in r.instances: print_instance(i)
def list_instances(self): """Returns the current instances in the cluster. Returns: :class:`Instances`: The collection of instances in the cluster. """ launcher = Launcher(self.name, self.region) reservations = launcher.get_reservations() instances = self._running_or_pending_instances(reservations) return Instances( manager=[i for i in instances if self._is_manager(i)], workers=[i for i in instances if not self._is_manager(i)], unfulfilled=reservations['unfulfilled_spot_requests'])
def worker_logs(args): launcher = Launcher(**launcher_args(args)) instances = running_or_pending_instances(launcher.get_reservations()) identity_file = get_identity_file(args.identity_file) def grep(instance): #note that we have to swap "A" and "B" because tac has reversed the order of the lines. command = '"source ufora_setup.sh; tac \\$LOG_DIR/logs/ufora-worker.log | grep -m %s -B %s -A %s -e %s" | tac' % (args.N, args.A, args.B, args.expression) return (pad(instance.ip_address + "> ", 25), ssh_output(identity_file, instance.ip_address, command)) for ip, res in parallel_for(instances, grep): for line in res.split("\n"): print ip, line
def list_instances(self): """Returns the current instances in the cluster. Returns: :class:`Instances`: The collection of instances in the cluster. """ launcher = Launcher(self.name, self.region) reservations = launcher.get_reservations() instances = self._running_or_pending_instances(reservations) return Instances( manager=[i for i in instances if self._is_manager(i)], workers=[i for i in instances if not self._is_manager(i)], unfulfilled=reservations['unfulfilled_spot_requests'] )
def list_instances(args): launcher = Launcher(**launcher_args(args)) reservations = launcher.get_reservations() instances = running_or_pending_instances(reservations) count = len(instances) print "%d instance%s%s" % (count, 's' if count != 1 else '', ':' if count > 0 else '') for i in instances: print_instance(i) if reservations['unfulfilled_spot_requests']: print "" count = len(reservations['unfulfilled_spot_requests']) print "%d unfulfilled spot instance request%s:" % (count, 's' if count != 1 else '') for r in reservations['unfulfilled_spot_requests']: print_spot_request(r)
def stop_instances(args): launcher = Launcher(region=get_region(args.ec2_region)) instances = running_or_pending_instances(launcher.get_reservations()) count = len(instances) if count == 0: print "No running instances to stop" return verb = 'Terminating' if args.terminate else 'Stopping' print '%s %d instances:' % (verb, count) for i in instances: print_instance(i) if args.terminate: i.terminate() else: i.stop()
def add_instances(args): launcher = Launcher(**launcher_args(args)) manager = [ i for i in running_or_pending_instances(launcher.get_reservations()) if 'manager' in i.tags.get('Name', '') ] if len(manager) > 1: print "There is more than one Manager instance. Can't add workers.", \ "Managers:" for m in manager: print_instance(m) return 1 elif len(manager) == 0: print "No manager instances are running. Can't add workers." return 1 if args.num_instances < 1: print "--num-instances must be greater or equal to 1." return 1 manager = manager[0] launcher.vpc_id = manager.vpc_id launcher.subnet_id = manager.subnet_id launcher.instance_type = manager.instance_type launcher.security_group_id = manager.groups[0].id print "Launching worker instance(s):" status_printer = StatusPrinter() workers = launcher.launch_workers(args.num_instances, manager.key_name, manager.id, args.spot_price, callback=status_printer.on_status) status_printer.done() print "Workers started:" for worker in workers: print_instance(worker, 'worker') print "" print "Waiting for services:" if launcher.wait_for_services(workers, callback=status_printer.on_status): status_printer.done() else: status_printer.failed()
def restart_instances(args): launcher = Launcher(**launcher_args(args)) instances = running_or_pending_instances(launcher.get_reservations()) identity_file = get_identity_file(args.identity_file) def restart_instance(instance): is_manager = 'manager' in instance.tags.get('Name', '') if is_manager: command = '"source ufora_setup.sh; \\$DOCKER stop ufora_manager; sudo rm -rf \\$LOG_DIR/*; \\$DOCKER start ufora_manager"' else: command = '"source ufora_setup.sh; \\$DOCKER stop ufora_worker; sudo rm -rf \\$LOG_DIR/*; \\$DOCKER start ufora_worker"' return (pad(instance.ip_address + "> ", 25), ssh_output(identity_file, instance.ip_address, command)) for ip, res in parallel_for(instances, restart_instance): for line in res.split("\n"): print ip, line
def add_instances(args): launcher = Launcher(**launcher_args(args)) manager = [i for i in running_or_pending_instances(launcher.get_reservations()) if 'manager' in i.tags.get('Name', '')] if len(manager) > 1: print "There is more than one Manager instance. Can't add workers.", \ "Managers:" for m in manager: print_instance(m) return 1 elif len(manager) == 0: print "No manager instances are running. Can't add workers." return 1 if args.num_instances < 1: print "--num-instances must be greater or equal to 1." return 1 manager = manager[0] launcher.vpc_id = manager.vpc_id launcher.subnet_id = manager.subnet_id launcher.instance_type = manager.instance_type launcher.security_group_id = manager.groups[0].id print "Launching worker instance(s):" status_printer = StatusPrinter() workers = launcher.launch_workers(args.num_instances, manager.key_name, manager.id, args.spot_price, callback=status_printer.on_status) status_printer.done() print "Workers started:" for worker in workers: print_instance(worker, 'worker') print "" print "Waiting for services:" if launcher.wait_for_services(workers, callback=status_printer.on_status): status_printer.done() else: status_printer.failed()
def worker_logs(args): launcher = Launcher(**launcher_args(args)) instances = running_or_pending_instances(launcher.get_reservations()) identity_file = get_identity_file(args.identity_file) def grep(instance): #note that we have to swap "A" and "B" because tac has reversed the order of the lines. command = ( '"source ufora_setup.sh; tac \\$LOG_DIR/logs/ufora-worker.log ' '| grep -m %s -B %s -A %s -e %s" | tac') % (args.N, args.A, args.B, args.expression) return (pad(instance.ip_address + "> ", 25), ssh_output(identity_file, instance.ip_address, command)) for ip, res in parallel_for(instances, grep): for line in res.split("\n"): print ip, line
def deploy_package(args): launcher = Launcher(**launcher_args(args)) instances = running_instances(launcher.get_reservations()) if len(instances) == 0: print "No running instances" return print "Running instances:" for i in instances: print_instance(i) print '' def is_failure(result): return isinstance(result, basestring) def any_failures(results): return any(is_failure(x) for x in results) def print_failures(results): for ix in xrange(len(results)): if is_failure(results[ix]): print instances[ix].id, "|", instances[ ix].ip_address, ':', results[ix] print "Uploading package..." results = upload_package(args.package, instances, get_identity_file(args.identity_file)) if any_failures(results): print "Failed to upload package:" print_failures(results) return print "Package uploaded successfully" print '' print "Updating service..." results = update_ufora_service(instances, get_identity_file(args.identity_file)) if any_failures(results): print "Failed to update service:" print_failures(results) return print "Service updated successfully"
def worker_load(args): cmd_to_run = 'tail -f /mnt/ufora/logs/ufora-worker.log' if args.logs else \ 'sudo apt-get install htop\\; htop' launcher = Launcher(**launcher_args(args)) instances = running_or_pending_instances(launcher.get_reservations()) identity_file = get_identity_file(args.identity_file) session = os.getenv("USER") def sh(cmd, **kwargs): try: print "CMD =", cmd.format(SESSION=session, **kwargs) subprocess.check_output(cmd.format(SESSION=session, **kwargs), shell=True) except subprocess.CalledProcessError: import traceback traceback.print_exc() sh("tmux -2 kill-session -t {SESSION}") sh("tmux -2 new-session -d -s {SESSION}") # Setup a window for tailing log files sh("tmux new-window -t {SESSION}:1 -n 'pyfora_htop'") for ix in xrange((len(instances) - 1) / 2): sh("tmux split-window -v -t 0 -l 20") for ix in xrange(len(instances) / 2): sh("tmux split-window -h -t {ix}", ix=ix) # for ix in xrange(len(instances)-1,0,-1): # sh('tmux resize-pane -t {ix} -y 20', ix=ix) for ix in xrange(len(instances)): sh('tmux send-keys -t {ix} "ssh ubuntu@%s -t -i %s %s" C-m' % (instances[ix].ip_address, identity_file, cmd_to_run), ix=ix) # Attach to session sh('tmux -2 attach-session -t {SESSION}')
def worker_load(args): cmd_to_run = 'tail -f /mnt/ufora/logs/ufora-worker.log' if args.logs else \ 'sudo apt-get install htop\\; htop' launcher = Launcher(**launcher_args(args)) instances = running_or_pending_instances(launcher.get_reservations()) identity_file = get_identity_file(args.identity_file) session = os.getenv("USER") def sh(cmd, **kwargs): try: print "CMD =", cmd.format(SESSION=session, **kwargs) subprocess.check_output(cmd.format(SESSION=session, **kwargs), shell=True) except subprocess.CalledProcessError: import traceback traceback.print_exc() sh("tmux -2 kill-session -t {SESSION}") sh("tmux -2 new-session -d -s {SESSION}") # Setup a window for tailing log files sh("tmux new-window -t {SESSION}:1 -n 'pyfora_htop'") for ix in xrange((len(instances)-1)/2): sh("tmux split-window -v -t 0 -l 20") for ix in xrange(len(instances)/2): sh("tmux split-window -h -t {ix}", ix=ix) # for ix in xrange(len(instances)-1,0,-1): # sh('tmux resize-pane -t {ix} -y 20', ix=ix) for ix in xrange(len(instances)): sh('tmux send-keys -t {ix} "ssh ubuntu@%s -t -i %s %s" C-m' % (instances[ix].ip_address, identity_file, cmd_to_run), ix=ix) # Attach to session sh('tmux -2 attach-session -t {SESSION}')
def deploy_package(args): launcher = Launcher(**launcher_args(args)) instances = running_instances(launcher.get_reservations()) if len(instances) == 0: print "No running instances" return print "Running instances:" for i in instances: print_instance(i) print '' def is_failure(result): return isinstance(result, basestring) def any_failures(results): return any(is_failure(x) for x in results) def print_failures(results): for ix in xrange(len(results)): if is_failure(results[ix]): print instances[ix].id, "|", instances[ix].ip_address, ':', results[ix] print "Uploading package..." results = upload_package(args.package, instances, get_identity_file(args.identity_file)) if any_failures(results): print "Failed to upload package:" print_failures(results) return print "Package uploaded successfully" print '' print "Updating service..." results = update_ufora_service(instances, get_identity_file(args.identity_file)) if any_failures(results): print "Failed to update service:" print_failures(results) return print "Service updated successfully"
def stop_instances(args): launcher = Launcher(**launcher_args(args)) reservations = launcher.get_reservations() instances = running_or_pending_instances(reservations) count = len(instances) if count == 0: print "No running instances to stop" else: verb = 'Terminating' if args.terminate else 'Stopping' print '%s %d instances:' % (verb, count) for i in instances: print_instance(i) if args.terminate: i.terminate() else: i.stop() spot_requests = reservations['unfulfilled_spot_requests'] if spot_requests: print "Cancelling %d unfulfilled spot instance requests:" % len(spot_requests) for r in spot_requests: print_spot_request(r) r.cancel()
def restart_instances(args): launcher = Launcher(**launcher_args(args)) instances = running_or_pending_instances(launcher.get_reservations()) identity_file = get_identity_file(args.identity_file) def restart_instance(instance): is_manager = 'manager' in instance.tags.get('Name', '') if is_manager: command = ( '"source ufora_setup.sh; \\$DOCKER stop ufora_manager; ' 'sudo rm -rf \\$LOG_DIR/*; \\$DOCKER start ufora_manager"') else: command = ( '"source ufora_setup.sh; \\$DOCKER stop ufora_worker; ' 'sudo rm -rf \\$LOG_DIR/*; \\$DOCKER start ufora_worker"') return (pad(instance.ip_address + "> ", 25), ssh_output(identity_file, instance.ip_address, command)) for ip, res in parallel_for(instances, restart_instance): for line in res.split("\n"): print ip, line
def stop_instances(args): launcher = Launcher(**launcher_args(args)) reservations = launcher.get_reservations() instances = running_or_pending_instances(reservations) count = len(instances) if count == 0: print "No running instances to stop" else: verb = 'Terminating' if args.terminate else 'Stopping' print '%s %d instances:' % (verb, count) for i in instances: print_instance(i) if args.terminate: i.terminate() else: i.stop() spot_requests = reservations['unfulfilled_spot_requests'] if spot_requests: print "Cancelling %d unfulfilled spot instance requests:" % len( spot_requests) for r in spot_requests: print_spot_request(r) r.cancel()