Esempio n. 1
0
def list_instances(args):
    launcher = Launcher(region=get_region(args.ec2_region))
    reservations = launcher.get_reservations()
    count = sum(len(r.instances) for r in reservations)
    print "%d instance%s%s" % (count, 's' if count != 1 else '',
                               ':' if count > 0 else '')
    for r in reservations:
        for i in r.instances:
            print_instance(i)
Esempio n. 2
0
def list_instances(args):
    launcher = Launcher(region=get_region(args.ec2_region))
    reservations = launcher.get_reservations()
    count = sum(len(r.instances) for r in reservations)
    print "%d instance%s%s" % (
        count, 's' if count != 1 else '', ':' if count > 0 else ''
        )
    for r in reservations:
        for i in r.instances:
            print_instance(i)
Esempio n. 3
0
    def list_instances(self):
        """Returns the current instances in the cluster.

        Returns:
            :class:`Instances`: The collection of instances in the cluster.
        """
        launcher = Launcher(self.name, self.region)
        reservations = launcher.get_reservations()
        instances = self._running_or_pending_instances(reservations)
        return Instances(
            manager=[i for i in instances if self._is_manager(i)],
            workers=[i for i in instances if not self._is_manager(i)],
            unfulfilled=reservations['unfulfilled_spot_requests'])
Esempio n. 4
0
def worker_logs(args):
    launcher = Launcher(**launcher_args(args))
    instances = running_or_pending_instances(launcher.get_reservations())
    identity_file = get_identity_file(args.identity_file)

    def grep(instance):
        #note that we have to swap "A" and "B" because tac has reversed the order of the lines.
        command = '"source ufora_setup.sh; tac \\$LOG_DIR/logs/ufora-worker.log | grep -m %s -B %s -A %s -e %s" | tac' % (args.N, args.A, args.B, args.expression)
        
        return (pad(instance.ip_address + "> ", 25), ssh_output(identity_file, instance.ip_address, command))

    for ip, res in parallel_for(instances, grep):
        for line in res.split("\n"):
            print ip, line
Esempio n. 5
0
    def list_instances(self):
        """Returns the current instances in the cluster.

        Returns:
            :class:`Instances`: The collection of instances in the cluster.
        """
        launcher = Launcher(self.name, self.region)
        reservations = launcher.get_reservations()
        instances = self._running_or_pending_instances(reservations)
        return Instances(
            manager=[i for i in instances if self._is_manager(i)],
            workers=[i for i in instances if not self._is_manager(i)],
            unfulfilled=reservations['unfulfilled_spot_requests']
            )
Esempio n. 6
0
def list_instances(args):
    launcher = Launcher(**launcher_args(args))
    reservations = launcher.get_reservations()
    instances = running_or_pending_instances(reservations)
    count = len(instances)
    print "%d instance%s%s" % (count, 's' if count != 1 else '', ':' if count > 0 else '')
    for i in instances:
        print_instance(i)

    if reservations['unfulfilled_spot_requests']:
        print ""
        count = len(reservations['unfulfilled_spot_requests'])
        print "%d unfulfilled spot instance request%s:" % (count, 's' if count != 1 else '')
        for r in reservations['unfulfilled_spot_requests']:
            print_spot_request(r)
Esempio n. 7
0
def stop_instances(args):
    launcher = Launcher(region=get_region(args.ec2_region))
    instances = running_or_pending_instances(launcher.get_reservations())
    count = len(instances)
    if count == 0:
        print "No running instances to stop"
        return

    verb = 'Terminating' if args.terminate else 'Stopping'
    print '%s %d instances:' % (verb, count)
    for i in instances:
        print_instance(i)
        if args.terminate:
            i.terminate()
        else:
            i.stop()
Esempio n. 8
0
def stop_instances(args):
    launcher = Launcher(region=get_region(args.ec2_region))
    instances = running_or_pending_instances(launcher.get_reservations())
    count = len(instances)
    if count == 0:
        print "No running instances to stop"
        return

    verb = 'Terminating' if args.terminate else 'Stopping'
    print '%s %d instances:' % (verb, count)
    for i in instances:
        print_instance(i)
        if args.terminate:
            i.terminate()
        else:
            i.stop()
Esempio n. 9
0
def list_instances(args):
    launcher = Launcher(**launcher_args(args))
    reservations = launcher.get_reservations()
    instances = running_or_pending_instances(reservations)
    count = len(instances)
    print "%d instance%s%s" % (count, 's' if count != 1 else '',
                               ':' if count > 0 else '')
    for i in instances:
        print_instance(i)

    if reservations['unfulfilled_spot_requests']:
        print ""
        count = len(reservations['unfulfilled_spot_requests'])
        print "%d unfulfilled spot instance request%s:" % (count, 's' if
                                                           count != 1 else '')
        for r in reservations['unfulfilled_spot_requests']:
            print_spot_request(r)
Esempio n. 10
0
def add_instances(args):
    launcher = Launcher(**launcher_args(args))
    manager = [
        i for i in running_or_pending_instances(launcher.get_reservations())
        if 'manager' in i.tags.get('Name', '')
    ]
    if len(manager) > 1:
        print "There is more than one Manager instance. Can't add workers.", \
            "Managers:"
        for m in manager:
            print_instance(m)
        return 1
    elif len(manager) == 0:
        print "No manager instances are running. Can't add workers."
        return 1

    if args.num_instances < 1:
        print "--num-instances must be greater or equal to 1."
        return 1

    manager = manager[0]
    launcher.vpc_id = manager.vpc_id
    launcher.subnet_id = manager.subnet_id
    launcher.instance_type = manager.instance_type
    launcher.security_group_id = manager.groups[0].id

    print "Launching worker instance(s):"
    status_printer = StatusPrinter()
    workers = launcher.launch_workers(args.num_instances,
                                      manager.key_name,
                                      manager.id,
                                      args.spot_price,
                                      callback=status_printer.on_status)
    status_printer.done()

    print "Workers started:"
    for worker in workers:
        print_instance(worker, 'worker')

    print ""
    print "Waiting for services:"
    if launcher.wait_for_services(workers, callback=status_printer.on_status):
        status_printer.done()
    else:
        status_printer.failed()
Esempio n. 11
0
def restart_instances(args):
    launcher = Launcher(**launcher_args(args))
    instances = running_or_pending_instances(launcher.get_reservations())
    identity_file = get_identity_file(args.identity_file)

    def restart_instance(instance):
        is_manager = 'manager' in instance.tags.get('Name', '')

        if is_manager:
            command = '"source ufora_setup.sh; \\$DOCKER stop ufora_manager; sudo rm -rf \\$LOG_DIR/*; \\$DOCKER start ufora_manager"'
        else:
            command = '"source ufora_setup.sh; \\$DOCKER stop ufora_worker; sudo rm -rf \\$LOG_DIR/*; \\$DOCKER start ufora_worker"'

        return (pad(instance.ip_address + "> ", 25), ssh_output(identity_file, instance.ip_address, command))

    for ip, res in parallel_for(instances, restart_instance):
        for line in res.split("\n"):
            print ip, line
Esempio n. 12
0
def add_instances(args):
    launcher = Launcher(**launcher_args(args))
    manager = [i for i in running_or_pending_instances(launcher.get_reservations())
               if 'manager' in i.tags.get('Name', '')]
    if len(manager) > 1:
        print "There is more than one Manager instance. Can't add workers.", \
            "Managers:"
        for m in manager:
            print_instance(m)
        return 1
    elif len(manager) == 0:
        print "No manager instances are running. Can't add workers."
        return 1

    if args.num_instances < 1:
        print "--num-instances must be greater or equal to 1."
        return 1

    manager = manager[0]
    launcher.vpc_id = manager.vpc_id
    launcher.subnet_id = manager.subnet_id
    launcher.instance_type = manager.instance_type
    launcher.security_group_id = manager.groups[0].id

    print "Launching worker instance(s):"
    status_printer = StatusPrinter()
    workers = launcher.launch_workers(args.num_instances,
                                      manager.key_name,
                                      manager.id,
                                      args.spot_price,
                                      callback=status_printer.on_status)
    status_printer.done()

    print "Workers started:"
    for worker in workers:
        print_instance(worker, 'worker')

    print ""
    print "Waiting for services:"
    if launcher.wait_for_services(workers, callback=status_printer.on_status):
        status_printer.done()
    else:
        status_printer.failed()
Esempio n. 13
0
def worker_logs(args):
    launcher = Launcher(**launcher_args(args))
    instances = running_or_pending_instances(launcher.get_reservations())
    identity_file = get_identity_file(args.identity_file)

    def grep(instance):
        #note that we have to swap "A" and "B" because tac has reversed the order of the lines.
        command = (
            '"source ufora_setup.sh; tac \\$LOG_DIR/logs/ufora-worker.log '
            '| grep -m %s -B %s -A %s -e %s" | tac') % (args.N, args.A, args.B,
                                                        args.expression)

        return (pad(instance.ip_address + "> ",
                    25), ssh_output(identity_file, instance.ip_address,
                                    command))

    for ip, res in parallel_for(instances, grep):
        for line in res.split("\n"):
            print ip, line
Esempio n. 14
0
def deploy_package(args):
    launcher = Launcher(**launcher_args(args))
    instances = running_instances(launcher.get_reservations())
    if len(instances) == 0:
        print "No running instances"
        return

    print "Running instances:"
    for i in instances:
        print_instance(i)
    print ''

    def is_failure(result):
        return isinstance(result, basestring)

    def any_failures(results):
        return any(is_failure(x) for x in results)

    def print_failures(results):
        for ix in xrange(len(results)):
            if is_failure(results[ix]):
                print instances[ix].id, "|", instances[
                    ix].ip_address, ':', results[ix]

    print "Uploading package..."
    results = upload_package(args.package, instances,
                             get_identity_file(args.identity_file))
    if any_failures(results):
        print "Failed to upload package:"
        print_failures(results)
        return
    print "Package uploaded successfully"
    print ''

    print "Updating service..."
    results = update_ufora_service(instances,
                                   get_identity_file(args.identity_file))
    if any_failures(results):
        print "Failed to update service:"
        print_failures(results)
        return
    print "Service updated successfully"
Esempio n. 15
0
def worker_load(args):
    cmd_to_run = 'tail -f /mnt/ufora/logs/ufora-worker.log' if args.logs else \
        'sudo apt-get install htop\\; htop'
    launcher = Launcher(**launcher_args(args))
    instances = running_or_pending_instances(launcher.get_reservations())
    identity_file = get_identity_file(args.identity_file)

    session = os.getenv("USER")

    def sh(cmd, **kwargs):
        try:
            print "CMD =", cmd.format(SESSION=session, **kwargs)
            subprocess.check_output(cmd.format(SESSION=session, **kwargs),
                                    shell=True)
        except subprocess.CalledProcessError:
            import traceback
            traceback.print_exc()

    sh("tmux -2 kill-session -t {SESSION}")

    sh("tmux -2 new-session -d -s {SESSION}")

    # Setup a window for tailing log files
    sh("tmux new-window -t {SESSION}:1 -n 'pyfora_htop'")

    for ix in xrange((len(instances) - 1) / 2):
        sh("tmux split-window -v -t 0 -l 20")

    for ix in xrange(len(instances) / 2):
        sh("tmux split-window -h -t {ix}", ix=ix)

    # for ix in xrange(len(instances)-1,0,-1):
    #     sh('tmux resize-pane -t {ix} -y 20', ix=ix)

    for ix in xrange(len(instances)):
        sh('tmux send-keys -t {ix} "ssh ubuntu@%s -t -i %s %s" C-m' %
           (instances[ix].ip_address, identity_file, cmd_to_run),
           ix=ix)

    # Attach to session
    sh('tmux -2 attach-session -t {SESSION}')
Esempio n. 16
0
def worker_load(args):
    cmd_to_run = 'tail -f /mnt/ufora/logs/ufora-worker.log' if args.logs else \
        'sudo apt-get install htop\\; htop'
    launcher = Launcher(**launcher_args(args))
    instances = running_or_pending_instances(launcher.get_reservations())
    identity_file = get_identity_file(args.identity_file)

    session = os.getenv("USER")
    def sh(cmd, **kwargs):
        try:
            print "CMD =", cmd.format(SESSION=session, **kwargs)
            subprocess.check_output(cmd.format(SESSION=session, **kwargs), shell=True)
        except subprocess.CalledProcessError:
            import traceback
            traceback.print_exc()

    sh("tmux -2 kill-session -t {SESSION}")

    sh("tmux -2 new-session -d -s {SESSION}")

    # Setup a window for tailing log files
    sh("tmux new-window -t {SESSION}:1 -n 'pyfora_htop'")

    for ix in xrange((len(instances)-1)/2):
        sh("tmux split-window -v -t 0 -l 20")

    for ix in xrange(len(instances)/2):
        sh("tmux split-window -h -t {ix}", ix=ix)

    # for ix in xrange(len(instances)-1,0,-1):
    #     sh('tmux resize-pane -t {ix} -y 20', ix=ix)

    for ix in xrange(len(instances)):
        sh('tmux send-keys -t {ix} "ssh ubuntu@%s -t -i %s %s" C-m' % (instances[ix].ip_address,
                                                                       identity_file,
                                                                       cmd_to_run),
           ix=ix)


    # Attach to session
    sh('tmux -2 attach-session -t {SESSION}')
Esempio n. 17
0
def deploy_package(args):
    launcher = Launcher(**launcher_args(args))
    instances = running_instances(launcher.get_reservations())
    if len(instances) == 0:
        print "No running instances"
        return

    print "Running instances:"
    for i in instances:
        print_instance(i)
    print ''

    def is_failure(result):
        return isinstance(result, basestring)

    def any_failures(results):
        return any(is_failure(x) for x in results)

    def print_failures(results):
        for ix in xrange(len(results)):
            if is_failure(results[ix]):
                print instances[ix].id, "|", instances[ix].ip_address, ':', results[ix]

    print "Uploading package..."
    results = upload_package(args.package, instances, get_identity_file(args.identity_file))
    if any_failures(results):
        print "Failed to upload package:"
        print_failures(results)
        return
    print "Package uploaded successfully"
    print ''

    print "Updating service..."
    results = update_ufora_service(instances, get_identity_file(args.identity_file))
    if any_failures(results):
        print "Failed to update service:"
        print_failures(results)
        return
    print "Service updated successfully"
Esempio n. 18
0
def stop_instances(args):
    launcher = Launcher(**launcher_args(args))
    reservations = launcher.get_reservations()
    instances = running_or_pending_instances(reservations)
    count = len(instances)
    if count == 0:
        print "No running instances to stop"
    else:
        verb = 'Terminating' if args.terminate else 'Stopping'
        print '%s %d instances:' % (verb, count)
        for i in instances:
            print_instance(i)
            if args.terminate:
                i.terminate()
            else:
                i.stop()

    spot_requests = reservations['unfulfilled_spot_requests']
    if spot_requests:
        print "Cancelling %d unfulfilled spot instance requests:" % len(spot_requests)
        for r in spot_requests:
            print_spot_request(r)
            r.cancel()
Esempio n. 19
0
def restart_instances(args):
    launcher = Launcher(**launcher_args(args))
    instances = running_or_pending_instances(launcher.get_reservations())
    identity_file = get_identity_file(args.identity_file)

    def restart_instance(instance):
        is_manager = 'manager' in instance.tags.get('Name', '')

        if is_manager:
            command = (
                '"source ufora_setup.sh; \\$DOCKER stop ufora_manager; '
                'sudo rm -rf \\$LOG_DIR/*; \\$DOCKER start ufora_manager"')
        else:
            command = (
                '"source ufora_setup.sh; \\$DOCKER stop ufora_worker; '
                'sudo rm -rf \\$LOG_DIR/*; \\$DOCKER start ufora_worker"')

        return (pad(instance.ip_address + "> ",
                    25), ssh_output(identity_file, instance.ip_address,
                                    command))

    for ip, res in parallel_for(instances, restart_instance):
        for line in res.split("\n"):
            print ip, line
Esempio n. 20
0
def stop_instances(args):
    launcher = Launcher(**launcher_args(args))
    reservations = launcher.get_reservations()
    instances = running_or_pending_instances(reservations)
    count = len(instances)
    if count == 0:
        print "No running instances to stop"
    else:
        verb = 'Terminating' if args.terminate else 'Stopping'
        print '%s %d instances:' % (verb, count)
        for i in instances:
            print_instance(i)
            if args.terminate:
                i.terminate()
            else:
                i.stop()

    spot_requests = reservations['unfulfilled_spot_requests']
    if spot_requests:
        print "Cancelling %d unfulfilled spot instance requests:" % len(
            spot_requests)
        for r in spot_requests:
            print_spot_request(r)
            r.cancel()