def import_nodes(*args, **kwargs): """ec2 nodes import name_or_config instance1_id instance2_id ... [--wait-timeout=300 --pem=/path/to/key.pem] Import instances from a spot reservation and then perform the boot sequence on them. The command will block for wait-timeout seconds, or until all nodes reach a ready state (currently defined as being pingable and containing files indicating readiness. A wait-timeout of 0 disables this. A pem file, either passed on the command line or through the environment is required for the timeout to work properly. Note that with timeouts enabled, this will check that the nodes reach a ready state. """ name_or_config, instances_to_add = arguments.parse_or_die(import_nodes, [object], rest=True, *args) timeout = config.kwarg_or_default('wait-timeout', kwargs, default=600) # Note pemfile is different from other places since it's only required with wait-timeout. pemfile = config.kwarg_or_get('pem', kwargs, 'SIRIKATA_CLUSTER_PEMFILE', default=None) name, cc = name_and_config(name_or_config) if 'spot' not in cc.state: print "It looks like this cluster hasn't made a spot reservation..." return 1 conn = EC2Connection(config.AWS_ACCESS_KEY_ID, config.AWS_SECRET_ACCESS_KEY) instances_to_add = list(instances_to_add) if len(instances_to_add) == 0: print "No instances specified, trying to use full list of account instances..." reservations = conn.get_all_instances() for res in reservations: instances_to_add += [inst.id for inst in res.instances if inst.state == 'running'] if len(instances_to_add) != cc.size: print "Number of instances doesn't match the cluster size. Make sure you explicitly specify %d instances" % (cc.size) return 1 cc.state['instances'] = instances_to_add # Verify the instances are valid, just checking that we get valid # objects back when we look them up with AWS print "Verifying instances are valid..." instances = get_all_instances(cc, conn); if len(instances) != len(instances_to_add): print "Only got %d instances back, you'll need to manually clean things up..." % len(instances) return 1 # Cache some information about the instances which shouldn't change cc.state['instance_props'] = dict( [ (instid, { 'id' : instances[instid].id, 'ip' : instances[instid].ip_address, 'hostname' : instances[instid].dns_name, 'private_ip' : instances[instid].private_ip_address, 'private_hostname' : instances[instid].private_dns_name, }) for instid in instances_to_add]) cc.save() return name_and_boot_nodes(cc, conn, pemfile, timeout)
def sync_sirikata(*args, **kwargs): """ec2 sync sirikata /path/to/installed/sirikata [--puppet-path=/etc/puppet] [--notify-puppets=cluster_name_or_config] Package a version of Sirikata installed in the given path and set it up with Puppet for distribution to puppet agent nodes. If you already have puppets running, add --notify-puppets=cluster_name to trigger a puppet update (runs the equivalent of sirikata-cluster.py puppet slaves restart cluster_name) """ installed_path = arguments.parse_or_die(sync_sirikata, [str], *args) puppet_base_path = config.kwarg_or_get('puppet-path', kwargs, 'PUPPET_PATH', default='/etc/puppet') notify_puppets = config.kwarg_or_default('notify-puppets', kwargs) # Note pemfile is different from other places since it's only required with notify-puppets. pemfile = config.kwarg_or_get('pem', kwargs, 'SIRIKATA_CLUSTER_PEMFILE', default=None) # Generate the archive if given a directory) gen_file = installed_path if os.path.isdir(installed_path): retcode = util_sirikata.package(installed_path) if retcode != 0: return retcode gen_file = os.path.join(installed_path, 'sirikata.tar.bz2') # Make sure we have a place to put the file dest_dir = os.path.join(puppet_base_path, 'modules', 'sirikata', 'files', 'home', 'ubuntu') if not os.path.exists(dest_dir): # Need root for this, so we have to do it through subprocess subprocess.call(['sudo', 'mkdir', '-p', dest_dir]) # And copy it into place print "Copying archive into puppet" dest_file = os.path.join(dest_dir, 'sirikata.tar.bz2') subprocess.call(['sudo', 'cp', gen_file, dest_file]) if notify_puppets: print "Notifying puppets" slaves_restart_kwargs = {} if pemfile is not None: slaves_restart_kwargs['pem'] = pemfile # notify_puppets == cluster name # Nuke old tar.bz2's so new ones will be downloaded nodes.ssh(notify_puppets, 'rm', '-f', 'sirikata.tar.bz2', **slaves_restart_kwargs) puppet.slaves_restart(notify_puppets, **slaves_restart_kwargs)
def add_service(*args, **kwargs): """ec2 add service cluster_name_or_config service_id target_node|any [--user=user] [--cwd=/path/to/execute] [--] command to run Add a service to run on the cluster. The service needs to be assigned a unique id (a string) and takes the form of a command (which should be able to be shutdown via signals). If the command requires parameters of the form --setting=value, make sure you add -- before the command so they aren't used as arguments to this command. You should also be sure that the command's binary is specified as a full path. user specifies the user account that should execute the service cwd sets the working directory for the service To make handling PID files easier, any appearance of PIDFILE in your command arguments will be replaced with the path to the PID file selected. For example, you might add --pid-file=PIDFILE as an argument. """ name_or_config, service_name, target_node, service_cmd = arguments.parse_or_die(add_service, [object, str, str], rest=True, *args) cname, cc = name_and_config(name_or_config) user = config.kwarg_or_default('user', kwargs, default=None) cwd = config.kwarg_or_default('cwd', kwargs, default=None) force_daemonize = bool(config.kwarg_or_default('force-daemonize', kwargs, default=False)) if not len(service_cmd): print "You need to specify a command for the service" return 1 if 'services' not in cc.state: cc.state['services'] = {} if service_name in cc.state['services']: print "The requested service already exists." return 1 if not os.path.isabs(service_cmd[0]): print "The path to the service's binary isn't absolute (%s)" % service_cmd[0] print args print service_cmd return 1 conn = EC2Connection(config.AWS_ACCESS_KEY_ID, config.AWS_SECRET_ACCESS_KEY) target_node_inst = get_node(cc, conn, target_node) target_node_pacemaker_id = get_node_pacemaker_id(cc, conn, target_node) target_node_hostname = get_node_hostname(cc, conn, target_node) # Can now get default values that depend on the node if user is None: user = cc.user(target_node) if cwd is None: cwd = cc.default_working_path(target_node) service_binary = service_cmd[0] pidfile = os.path.join(cc.workspace_path(), 'sirikata_%s.pid' % (service_name) ) daemon_cmd = ['start-stop-daemon', '--start', '--pidfile', pidfile, '--user', user, '--chdir', cwd, # '--test' ] if force_daemonize: daemon_cmd += ['--background', '--make-pidfile'] daemon_cmd += ['--exec', service_binary, '--'] + [arg.replace('PIDFILE', pidfile).replace('FQDN', target_node_hostname) for arg in service_cmd[1:]] retcode = node_ssh(cc, target_node_inst.id, *daemon_cmd) if retcode != 0: print "Failed to add cluster service" return retcode # Save a record of this service so we can find it again when we need to stop it. cc.state['services'][service_name] = { 'node' : target_node_inst.id, 'binary' : service_binary } cc.save() return retcode
def boot(*args, **kwargs): """ec2 nodes boot name_or_config [--wait-timeout=300 --pem=/path/to/key.pem] Boot a cluster's nodes. The command will block for wait-timeout seconds, or until all nodes reach a ready state (currently defined as being pingable and containing files indicating readiness. A wait-timeout of 0 disables this. A pem file, either passed on the command line or through the environment is required for the timeout to work properly. Note that with timeouts enabled, this will check that the nodes reach a ready state. """ name_or_config = arguments.parse_or_die(boot, [object], *args) timeout = config.kwarg_or_default('wait-timeout', kwargs, default=600) # Note pemfile is different from other places since it's only required with wait-timeout. pemfile = config.kwarg_or_get('pem', kwargs, 'SIRIKATA_CLUSTER_PEMFILE', default=None) name, cc = name_and_config(name_or_config) if 'reservation' in cc.state or 'spot' in cc.state or 'instances' in cc.state: print "It looks like you already have active nodes for this cluster..." exit(1) if timeout > 0 and not pemfile: print "You need to specify a pem file to use timeouts." exit(1) # Load the setup script template, replace puppet master info user_data = data.load('ec2-user-data', 'node-setup.sh') user_data = user_data.replace('{{{PUPPET_MASTER}}}', cc.puppet_master) # Unlike spot instances, where we can easily request that any # availability zone be used by that all be in the same AZ, here we # have to specify an AZ directly. We just choose one randomly for now... conn = EC2Connection(config.AWS_ACCESS_KEY_ID, config.AWS_SECRET_ACCESS_KEY) zones = conn.get_all_zones() zone = random.choice(zones).name # Now create the nodes reservation = conn.run_instances(cc.ami, placement=zone, min_count=cc.size, max_count=cc.size, key_name=cc.keypair, instance_type=cc.instance_type, security_groups=[cc.group], user_data=user_data ) # Save reservation, instance info cc.state['reservation'] = reservation.id cc.state['instances'] = [inst.id for inst in reservation.instances] cc.save() # Cache some information about the instances which shouldn't # change. However, this can take some time to come up properly, so # we may need to poll a few times before we get the right info print "Collecting node information..." while True: new_instances = get_all_instances(cc, conn) if any([inst.ip_address is None or inst.dns_name is None or inst.private_ip_address is None or inst.private_dns_name is None for inst in new_instances.values()]): time.sleep(5) continue cc.state['instance_props'] = dict( [ (inst.id, { 'id' : inst.id, 'ip' : inst.ip_address, 'hostname' : inst.dns_name, 'private_ip' : inst.private_ip_address, 'private_hostname' : inst.private_dns_name, }) for inst in new_instances.values()]) break cc.save() return name_and_boot_nodes(cc, conn, pemfile, timeout)