Beispiel #1
0
def import_nodes(*args, **kwargs):
    """ec2 nodes import name_or_config instance1_id instance2_id ... [--wait-timeout=300 --pem=/path/to/key.pem]

    Import instances from a spot reservation and then perform the boot sequence on them.
    The command will block for wait-timeout
    seconds, or until all nodes reach a ready state (currently defined
    as being pingable and containing files indicating readiness.
    A wait-timeout of 0 disables this. A pem
    file, either passed on the command line or through the environment
    is required for the timeout to work properly. Note that with
    timeouts enabled, this will check that the nodes reach a ready
    state.
    """

    name_or_config, instances_to_add = arguments.parse_or_die(import_nodes, [object], rest=True, *args)
    timeout = config.kwarg_or_default('wait-timeout', kwargs, default=600)
    # Note pemfile is different from other places since it's only required with wait-timeout.
    pemfile = config.kwarg_or_get('pem', kwargs, 'SIRIKATA_CLUSTER_PEMFILE', default=None)
    name, cc = name_and_config(name_or_config)

    if 'spot' not in cc.state:
        print "It looks like this cluster hasn't made a spot reservation..."
        return 1

    conn = EC2Connection(config.AWS_ACCESS_KEY_ID, config.AWS_SECRET_ACCESS_KEY)

    instances_to_add = list(instances_to_add)
    if len(instances_to_add) == 0:
        print "No instances specified, trying to use full list of account instances..."
        reservations = conn.get_all_instances()
        for res in reservations:
            instances_to_add += [inst.id for inst in res.instances if inst.state == 'running']
    if len(instances_to_add) != cc.size:
        print "Number of instances doesn't match the cluster size. Make sure you explicitly specify %d instances" % (cc.size)
        return 1

    cc.state['instances'] = instances_to_add

    # Verify the instances are valid, just checking that we get valid
    # objects back when we look them up with AWS
    print "Verifying instances are valid..."
    instances = get_all_instances(cc, conn);
    if len(instances) != len(instances_to_add):
        print "Only got %d instances back, you'll need to manually clean things up..." % len(instances)
        return 1

    # Cache some information about the instances which shouldn't change
    cc.state['instance_props'] = dict(
        [
            (instid, {
                    'id' : instances[instid].id,
                    'ip' : instances[instid].ip_address,
                    'hostname' : instances[instid].dns_name,
                    'private_ip' : instances[instid].private_ip_address,
                    'private_hostname' : instances[instid].private_dns_name,
                    }) for instid in instances_to_add])
    cc.save()

    return name_and_boot_nodes(cc, conn, pemfile, timeout)
Beispiel #2
0
def sync_sirikata(*args, **kwargs):
    """ec2 sync sirikata /path/to/installed/sirikata [--puppet-path=/etc/puppet] [--notify-puppets=cluster_name_or_config]

    Package a version of Sirikata installed in the given path and set
    it up with Puppet for distribution to puppet agent nodes.

    If you already have puppets running, add
    --notify-puppets=cluster_name to trigger a puppet update (runs the
    equivalent of sirikata-cluster.py puppet slaves restart cluster_name)
    """

    installed_path = arguments.parse_or_die(sync_sirikata, [str], *args)
    puppet_base_path = config.kwarg_or_get('puppet-path', kwargs, 'PUPPET_PATH', default='/etc/puppet')
    notify_puppets = config.kwarg_or_default('notify-puppets', kwargs)
    # Note pemfile is different from other places since it's only required with notify-puppets.
    pemfile = config.kwarg_or_get('pem', kwargs, 'SIRIKATA_CLUSTER_PEMFILE', default=None)

    # Generate the archive if given a directory)
    gen_file = installed_path
    if os.path.isdir(installed_path):
        retcode = util_sirikata.package(installed_path)
        if retcode != 0: return retcode
        gen_file = os.path.join(installed_path, 'sirikata.tar.bz2')

    # Make sure we have a place to put the file
    dest_dir = os.path.join(puppet_base_path, 'modules', 'sirikata', 'files', 'home', 'ubuntu')
    if not os.path.exists(dest_dir):
        # Need root for this, so we have to do it through subprocess
        subprocess.call(['sudo', 'mkdir', '-p', dest_dir])

    # And copy it into place
    print "Copying archive into puppet"
    dest_file = os.path.join(dest_dir, 'sirikata.tar.bz2')
    subprocess.call(['sudo', 'cp', gen_file, dest_file])

    if notify_puppets:
        print "Notifying puppets"
        slaves_restart_kwargs = {}
        if pemfile is not None: slaves_restart_kwargs['pem'] = pemfile
        # notify_puppets == cluster name
        # Nuke old tar.bz2's so new ones will be downloaded
        nodes.ssh(notify_puppets, 'rm', '-f', 'sirikata.tar.bz2', **slaves_restart_kwargs)
        puppet.slaves_restart(notify_puppets, **slaves_restart_kwargs)
Beispiel #3
0
def add_service(*args, **kwargs):
    """ec2 add service cluster_name_or_config service_id target_node|any [--user=user] [--cwd=/path/to/execute] [--] command to run

    Add a service to run on the cluster. The service needs to be
    assigned a unique id (a string) and takes the form of a command
    (which should be able to be shutdown via signals). If the command
    requires parameters of the form --setting=value, make sure you add
    -- before the command so they aren't used as arguments to this
    command. You should also be sure that the command's binary is
    specified as a full path.

    user specifies the user account that should execute the service
    cwd sets the working directory for the service

    To make handling PID files easier, any appearance of PIDFILE in
    your command arguments will be replaced with the path to the PID
    file selected. For example, you might add --pid-file=PIDFILE as an
    argument.
    """

    name_or_config, service_name, target_node, service_cmd = arguments.parse_or_die(add_service, [object, str, str], rest=True, *args)
    cname, cc = name_and_config(name_or_config)

    user = config.kwarg_or_default('user', kwargs, default=None)
    cwd = config.kwarg_or_default('cwd', kwargs, default=None)
    force_daemonize = bool(config.kwarg_or_default('force-daemonize', kwargs, default=False))

    if not len(service_cmd):
        print "You need to specify a command for the service"
        return 1

    if 'services' not in cc.state: cc.state['services'] = {}
    if service_name in cc.state['services']:
        print "The requested service already exists."
        return 1

    if not os.path.isabs(service_cmd[0]):
        print "The path to the service's binary isn't absolute (%s)" % service_cmd[0]
        print args
        print service_cmd
        return 1

    conn = EC2Connection(config.AWS_ACCESS_KEY_ID, config.AWS_SECRET_ACCESS_KEY)
    target_node_inst = get_node(cc, conn, target_node)
    target_node_pacemaker_id = get_node_pacemaker_id(cc, conn, target_node)
    target_node_hostname = get_node_hostname(cc, conn, target_node)

    # Can now get default values that depend on the node
    if user is None: user = cc.user(target_node)
    if cwd is None: cwd = cc.default_working_path(target_node)

    service_binary = service_cmd[0]

    pidfile = os.path.join(cc.workspace_path(), 'sirikata_%s.pid' % (service_name) )

    daemon_cmd = ['start-stop-daemon', '--start',
                  '--pidfile', pidfile,
                  '--user', user,
                  '--chdir', cwd,
                  # '--test'
                  ]
    if force_daemonize:
        daemon_cmd += ['--background', '--make-pidfile']
    daemon_cmd += ['--exec', service_binary,
                   '--'] + [arg.replace('PIDFILE', pidfile).replace('FQDN', target_node_hostname) for arg in service_cmd[1:]]
    retcode = node_ssh(cc, target_node_inst.id,
                       *daemon_cmd)
    if retcode != 0:
        print "Failed to add cluster service"
        return retcode

    # Save a record of this service so we can find it again when we need to stop it.
    cc.state['services'][service_name] = {
        'node' : target_node_inst.id,
        'binary' : service_binary
        }
    cc.save()

    return retcode
Beispiel #4
0
def boot(*args, **kwargs):
    """ec2 nodes boot name_or_config [--wait-timeout=300 --pem=/path/to/key.pem]

    Boot a cluster's nodes. The command will block for wait-timeout
    seconds, or until all nodes reach a ready state (currently defined
    as being pingable and containing files indicating readiness.
    A wait-timeout of 0 disables this. A pem
    file, either passed on the command line or through the environment
    is required for the timeout to work properly. Note that with
    timeouts enabled, this will check that the nodes reach a ready
    state.
    """

    name_or_config = arguments.parse_or_die(boot, [object], *args)
    timeout = config.kwarg_or_default('wait-timeout', kwargs, default=600)
    # Note pemfile is different from other places since it's only required with wait-timeout.
    pemfile = config.kwarg_or_get('pem', kwargs, 'SIRIKATA_CLUSTER_PEMFILE', default=None)
    name, cc = name_and_config(name_or_config)

    if 'reservation' in cc.state or 'spot' in cc.state or 'instances' in cc.state:
        print "It looks like you already have active nodes for this cluster..."
        exit(1)

    if timeout > 0 and not pemfile:
        print "You need to specify a pem file to use timeouts."
        exit(1)

    # Load the setup script template, replace puppet master info
    user_data = data.load('ec2-user-data', 'node-setup.sh')
    user_data = user_data.replace('{{{PUPPET_MASTER}}}', cc.puppet_master)

    # Unlike spot instances, where we can easily request that any
    # availability zone be used by that all be in the same AZ, here we
    # have to specify an AZ directly. We just choose one randomly for now...
    conn = EC2Connection(config.AWS_ACCESS_KEY_ID, config.AWS_SECRET_ACCESS_KEY)
    zones = conn.get_all_zones()
    zone = random.choice(zones).name

    # Now create the nodes
    reservation = conn.run_instances(cc.ami,
                                     placement=zone,
                                     min_count=cc.size, max_count=cc.size,
                                     key_name=cc.keypair,
                                     instance_type=cc.instance_type,
                                     security_groups=[cc.group],
                                     user_data=user_data
                                     )

    # Save reservation, instance info
    cc.state['reservation'] = reservation.id
    cc.state['instances'] = [inst.id for inst in reservation.instances]
    cc.save()
    # Cache some information about the instances which shouldn't
    # change. However, this can take some time to come up properly, so
    # we may need to poll a few times before we get the right info
    print "Collecting node information..."
    while True:
        new_instances = get_all_instances(cc, conn)
        if any([inst.ip_address is None or inst.dns_name is None or inst.private_ip_address is None or inst.private_dns_name is None for inst in new_instances.values()]):
            time.sleep(5)
            continue
        cc.state['instance_props'] = dict(
            [
                (inst.id, {
                        'id' : inst.id,
                        'ip' : inst.ip_address,
                        'hostname' : inst.dns_name,
                        'private_ip' : inst.private_ip_address,
                        'private_hostname' : inst.private_dns_name,
                        }) for inst in new_instances.values()])
        break
    cc.save()
    return name_and_boot_nodes(cc, conn, pemfile, timeout)