예제 #1
0
def add_instances(args):
    cluster = Cluster(args.name, args.ec2_region)
    instances = cluster.list_instances()

    if len(instances.manager) > 1:
        print "There is more than one Manager instance. Can't add workers.", \
            "Managers:"
        for m in instances.manager:
            print_instance(m)
        return 1
    elif len(instances.manager) == 0:
        print "No manager instance is running. Can't add workers."
        return 1

    if args.num_instances < 1:
        print "--num-instances must be greater or equal to 1."
        return 1

    status_printer = StatusPrinter()
    cluster.add_workers(instances.manager[0],
                        args.num_instances,
                        args.spot_price,
                        status_printer.on_event)


    status_printer.done()
예제 #2
0
def start_instances(args):
    assert args.num_instances > 0
    ssh_keyname = get_ssh_keyname(args.ssh_keyname)
    open_public_port = args.open_public_port or ssh_keyname is None
    if ssh_keyname is None and not args.yes_all:
        response = raw_input(
            "You are launching instances without specifying an ssh key-pair name.\n"
            "You will not be able to log into the launched instances.\n"
            "You can specify a key-pair using the --ssh-keyname option.\n"
            "Do you want to continue without a keypair (Y/n)? "
            )
        if response not in ('Y', 'y'):
            return

    if args.name is None:
        args.name = 'pyfora'
        print '--name argument was not specified. Using default name: ' + args.name

    status_printer = StatusPrinter(args.open_public_port)

    cluster = Cluster(args.name, args.ec2_region)
    instances = cluster.launch(args.instance_type,
                               ssh_keyname,
                               args.num_instances,
                               open_public_port,
                               args.vpc_id,
                               args.subnet_id,
                               args.security_group_id,
                               args.spot_price,
                               status_printer.on_event)


    if not instances.manager:
        list_instances(args)
        return
예제 #3
0
def stop_instances(args):
    cluster = Cluster(args.name, args.ec2_region)
    instances = cluster.list_instances()

    count = len(instances.manager) + len(instances.workers)
    if count == 0:
        print "No running instances to stop"
    else:
        verb = 'Terminating' if args.terminate else 'Stopping'
        print '%s %d instances:' % (verb, count)
        for i in itertools.chain(instances.workers, instances.manager):
            print_instance(i)

    if instances.unfulfilled:
        print "Cancelling %d unfulfilled spot instance requests:" % len(instances.unfulfilled)
        for r in instances.unfulfilled:
            print_spot_request(r)

    cluster.stop(instances, args.terminate)
예제 #4
0
def worker_load(args):
    cmd_to_run = 'tail -f /mnt/ufora/logs/ufora-worker.log' if args.logs else \
        'sudo apt-get install htop\\; htop'
    cluster = Cluster(args.name, args.ec2_region)
    instances = cluster.list_instances()
    instances = instances.manager + instances.workers
    identity_file = get_identity_file(args.identity_file)

    session = os.getenv("USER")
    def sh(cmd, **kwargs):
        try:
            print "CMD =", cmd.format(SESSION=session, **kwargs)
            subprocess.check_output(cmd.format(SESSION=session, **kwargs), shell=True)
        except subprocess.CalledProcessError:
            import traceback
            traceback.print_exc()

    sh("tmux -2 kill-session -t {SESSION}")

    sh("tmux -2 new-session -d -s {SESSION}")

    # Setup a window for tailing log files
    sh("tmux new-window -t {SESSION}:1 -n 'pyfora_htop'")

    for ix in xrange((len(instances)-1)/2):
        sh("tmux split-window -v -t 0 -l 20")

    for ix in xrange(len(instances)/2):
        sh("tmux split-window -h -t {ix}", ix=ix)

    # for ix in xrange(len(instances)-1,0,-1):
    #     sh('tmux resize-pane -t {ix} -y 20', ix=ix)

    for ix in xrange(len(instances)):
        sh('tmux send-keys -t {ix} "ssh ubuntu@%s -t -i %s %s" C-m' % (instances[ix].ip_address,
                                                                       identity_file,
                                                                       cmd_to_run),
           ix=ix)


    # Attach to session
    sh('tmux -2 attach-session -t {SESSION}')
예제 #5
0
def worker_logs(args):
    cluster = Cluster(args.name, args.ec2_region)
    instances = cluster.list_instances()
    instances = instances.manager + instances.workers
    identity_file = get_identity_file(args.identity_file)

    def grep(instance):
        #note that we have to swap "A" and "B" because tac has reversed the order of the lines.
        command = ('"source ufora_setup.sh; tac \\$LOG_DIR/logs/ufora-worker.log '
                   '| grep -m %s -B %s -A %s -e %s" | tac') % (args.N,
                                                               args.A,
                                                               args.B,
                                                               args.expression)

        return (pad(instance.ip_address + "> ", 25),
                ssh_output(identity_file, instance.ip_address, command))

    for ip, res in parallel_for(instances, grep):
        for line in res.split("\n"):
            print ip, line
예제 #6
0
def list_instances(args):
    cluster = Cluster(args.name, get_region(args.ec2_region))
    instances = cluster.list_instances()
    count = len(instances.workers)
    if instances.manager:
        count += len(instances.manager)
        if len(instances.manager) > 1:
            print "Something is wrong! This cluster has more than one manager!"

    print "%d instance%s%s" % (count, 's' if count != 1 else '', ':' if count > 0 else '')
    for manager in instances.manager:
        print_instance(manager)
    for i in instances.workers:
        print_instance(i)

    if instances.unfulfilled:
        print ""
        count = len(instances.unfulfilled)
        print "%d unfulfilled spot instance request%s:" % (count, 's' if count != 1 else '')
        for r in instances.unfulfilled:
            print_spot_request(r)
예제 #7
0
def deploy_package(args):
    cluster = Cluster(args.name, args.ec2_region)
    instances = cluster.list_instances()
    instances = instances.manager + instances.workers
    if len(instances) == 0:
        print "No running instances"
        return

    print "Running instances:"
    for i in instances:
        print_instance(i)
    print ''

    def is_failure(result):
        return isinstance(result, basestring)

    def any_failures(results):
        return any(is_failure(x) for x in results)

    def print_failures(results):
        for ix in xrange(len(results)):
            if is_failure(results[ix]):
                print instances[ix].id, "|", instances[ix].ip_address, ':', results[ix]

    print "Uploading package..."
    results = upload_package(args.package, instances, get_identity_file(args.identity_file))
    if any_failures(results):
        print "Failed to upload package:"
        print_failures(results)
        return
    print "Package uploaded successfully"
    print ''

    print "Updating service..."
    results = update_ufora_service(instances, get_identity_file(args.identity_file))
    if any_failures(results):
        print "Failed to update service:"
        print_failures(results)
        return
    print "Service updated successfully"
예제 #8
0
def restart_instances(args):
    cluster = Cluster(args.name, args.ec2_region)
    instances = cluster.list_instances()
    instances = instances.manager + instances.workers
    identity_file = get_identity_file(args.identity_file)

    def restart_instance(instance):
        is_manager = 'manager' in instance.tags.get('Name', '')

        if is_manager:
            command = ('"source ufora_setup.sh; \\$DOCKER stop ufora_manager; '
                       'sudo rm -rf \\$LOG_DIR/*; \\$DOCKER start ufora_manager"')
        else:
            command = ('"source ufora_setup.sh; \\$DOCKER stop ufora_worker; '
                       'sudo rm -rf \\$LOG_DIR/*; \\$DOCKER start ufora_worker"')

        return (pad(instance.ip_address + "> ", 25),
                ssh_output(identity_file, instance.ip_address, command))

    for ip, res in parallel_for(instances, restart_instance):
        for line in res.split("\n"):
            print ip, line