def register_ebs_ami(self, snapshot_id, arch = 'x86_64', default_ephem_map = True, img_name = None, img_desc = None): # register against snapshot try: aki=PVGRUB_AKIS[self.region.name][arch] except KeyError: raise Exception("Unable to determine pvgrub hd00 AKI for region (%s) arch (%s)" % (self.region.name, arch)) if not img_name: rand_id = random.randrange(2**32) # These names need to be unique, hence the pseudo-uuid img_name='EBSHelper AMI - %s - uuid-%x' % (snapshot_id, rand_id) if not img_desc: img_desc='Created directly from volume snapshot %s' % (snapshot_id) self.log.debug("Registering snapshot (%s) as new EBS AMI" % (snapshot_id)) ebs = EBSBlockDeviceType() ebs.snapshot_id = snapshot_id ebs.delete_on_termination = True block_map = BlockDeviceMapping() block_map['/dev/sda'] = ebs # The ephemeral mappings are automatic with S3 images # For EBS images we need to make them explicit # These settings are required to make the same fstab work on both S3 and EBS images if default_ephem_map: e0 = EBSBlockDeviceType() e0.ephemeral_name = 'ephemeral0' e1 = EBSBlockDeviceType() e1.ephemeral_name = 'ephemeral1' block_map['/dev/sdb'] = e0 block_map['/dev/sdc'] = e1 result = self.conn.register_image(name=img_name, description=img_desc, architecture=arch, kernel_id=aki, root_device_name='/dev/sda', block_device_map=block_map) return str(result)
def _register_image(self, snapshot_id): conn = self.platform.new_ec2_conn() instance_id = self.platform.get_instance_id() instance = conn.get_all_instances([instance_id])[0].instances[0] block_device_map = BlockDeviceMapping(conn) root_vol = EBSBlockDeviceType(snapshot_id=snapshot_id) root_vol.delete_on_termination = True # Adding ephemeral devices for eph, device in EPH_STORAGE_MAPPING[linux.os['arch']].items(): bdt = EBSBlockDeviceType(conn) bdt.ephemeral_name = eph block_device_map[device] = bdt root_partition = instance.root_device_name[:-1] if root_partition in self.platform.get_block_device_mapping().values(): block_device_map[root_partition] = root_vol else: block_device_map[instance.root_device_name] = root_vol return conn.register_image( name=self.image_name, root_device_name=instance.root_device_name, block_device_map=block_device_map, kernel_id=instance.kernel, virtualization_type=instance.virtualization_type, ramdisk_id=self.platform.get_ramdisk_id(), architecture=instance.architecture)
def _register_image(self, snapshot_id): conn = self.platform.new_ec2_conn() instance_id = self.platform.get_instance_id() instance = conn.get_all_instances([instance_id])[0].instances[0] block_device_map = BlockDeviceMapping(conn) root_vol = EBSBlockDeviceType(snapshot_id=snapshot_id) root_vol.delete_on_termination = True # Adding ephemeral devices for eph, device in EPH_STORAGE_MAPPING[linux.os['arch']].items(): bdt = EBSBlockDeviceType(conn) bdt.ephemeral_name = eph block_device_map[device] = bdt root_partition = instance.root_device_name[:-1] if root_partition in self.platform.get_block_device_mapping().values(): block_device_map[root_partition] = root_vol else: block_device_map[instance.root_device_name] = root_vol return conn.register_image( name=self.image_name, root_device_name=instance.root_device_name, block_device_map=block_device_map, kernel_id=instance.kernel, virtualization_type=instance.virtualization_type, ramdisk_id=self.platform.get_ramdisk_id(), architecture=instance.architecture)
def register_ebs_ami(self, snapshot_id, arch="x86_64", default_ephem_map=True, img_name=None, img_desc=None): # register against snapshot try: aki = PVGRUB_AKIS[self.region.name][arch] except KeyError: raise Exception("Unable to find pvgrub hd00 AKI for %s, arch (%s)" % (self.region.name, arch)) if not img_name: rand_id = random.randrange(2 ** 32) # These names need to be unique, hence the pseudo-uuid img_name = "EBSHelper AMI - %s - uuid-%x" % (snapshot_id, rand_id) if not img_desc: img_desc = "Created directly from volume snapshot %s" % snapshot_id self.log.debug("Registering %s as new EBS AMI" % snapshot_id) self.create_sgroup("ec2helper-vnc-ssh-%x" % random.randrange(2 ** 32), allow_vnc=True) ebs = EBSBlockDeviceType() ebs.snapshot_id = snapshot_id ebs.delete_on_termination = True block_map = BlockDeviceMapping() block_map["/dev/sda"] = ebs # The ephemeral mappings are automatic with S3 images # For EBS images we need to make them explicit # These settings are required to make the same fstab work on both S3 # and EBS images if default_ephem_map: e0 = EBSBlockDeviceType() e0.ephemeral_name = "ephemeral0" e1 = EBSBlockDeviceType() e1.ephemeral_name = "ephemeral1" block_map["/dev/sdb"] = e0 block_map["/dev/sdc"] = e1 result = self.conn.register_image( name=img_name, description=img_desc, architecture=arch, kernel_id=aki, root_device_name="/dev/sda", block_device_map=block_map, ) sleep(10) new_amis = self.conn.get_all_images([result]) new_amis[0].add_tag("Name", resource_tag) return str(result)
def parse_block_device_args(self, block_device_maps_args): block_device_map = BlockDeviceMapping() for block_device_map_arg in block_device_maps_args: parts = block_device_map_arg.split('=') if len(parts) > 1: device_name = parts[0] block_dev_type = EBSBlockDeviceType() value_parts = parts[1].split(':') if value_parts[0].startswith('snap'): block_dev_type.snapshot_id = value_parts[0] else: if value_parts[0].startswith('ephemeral'): block_dev_type.ephemeral_name = value_parts[0] if len(value_parts) > 1: block_dev_type.size = int(value_parts[1]) if len(value_parts) > 2: if value_parts[2] == 'true': block_dev_type.delete_on_termination = True block_device_map[device_name] = block_dev_type return block_device_map
def launch_cluster(conn, opts, cluster_name): if opts.identity_file is None: print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections." sys.exit(1) if opts.key_pair is None: print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances." sys.exit(1) print "Setting up security groups..." master_group = get_or_make_group(conn, cluster_name + "-master") master_group.owner_id = os.getenv('EC2_USER_ID') slave_group = get_or_make_group(conn, cluster_name + "-slaves") slave_group.owner_id = os.getenv('EC2_USER_ID') zoo_group = get_or_make_group(conn, cluster_name + "-zoo") zoo_group.owner_id = os.getenv('EC2_USER_ID') if master_group.rules == []: # Group was just now created master_group.authorize(src_group=master_group) master_group.authorize(src_group=slave_group) master_group.authorize(src_group=zoo_group) master_group.authorize('tcp', 22, 22, '0.0.0.0/0') master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') master_group.authorize('tcp', 5050, 5051, '0.0.0.0/0') master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0') master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0') master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0') master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0') master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0') master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0') master_group.authorize('tcp', 40000, 40000, '0.0.0.0/0') #apache hama master_group.authorize('tcp', 40013, 40013, '0.0.0.0/0') #apache hama if opts.ganglia: master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0') if slave_group.rules == []: # Group was just now created slave_group.authorize(src_group=master_group) slave_group.authorize(src_group=slave_group) slave_group.authorize(src_group=zoo_group) slave_group.authorize('tcp', 22, 22, '0.0.0.0/0') slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') slave_group.authorize('tcp', 5050, 5051, '0.0.0.0/0') slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0') slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0') slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0') slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0') slave_group.authorize('tcp', 40015, 40015, '0.0.0.0/0') ##apache hama web UI if zoo_group.rules == []: # Group was just now created zoo_group.authorize(src_group=master_group) zoo_group.authorize(src_group=slave_group) zoo_group.authorize(src_group=zoo_group) zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0') zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0') zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0') zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0') # Check if instances are already running in our groups existing_masters, existing_slaves, existing_zoos = get_existing_cluster(conn, opts, cluster_name, die_on_error=False) if existing_slaves or (existing_masters and not opts.use_existing_master): print >> stderr, ("ERROR: There are already instances running in " + "group %s or %s or %s" % (master_group.name, slave_group.name, zoo_group.name)) sys.exit(1) print "Launching instances..." try: image = conn.get_all_images(image_ids=[opts.emi])[0] except: print >> stderr, "Could not find emi " + opts.emi sys.exit(1) try: image_master = conn.get_all_images(image_ids=[opts.emi_master])[0] except: print >> stderr, "Could not find emi " + opts.emi_master sys.exit(1) if (opts.emi_zoo != ""): try: image_zoo = conn.get_all_images(image_ids=[opts.emi_zoo])[0] except: print >> stderr, "Could not find emi " + opts.emi_zoo sys.exit(1) # Create block device mapping so that we can add an EBS volume if asked to logging.debug( "Calling boto BlockDeviceMapping()...") block_map = BlockDeviceMapping() logging.debug(" Printing block_map..") #print block_map if opts.ebs_vol_size > 0: logging.debug("Calling boto EBSBlockDeviceType()...") device = EBSBlockDeviceType() #print "device: ", device device.size = opts.ebs_vol_size device.delete_on_termination = True device.ephemeral_name = "ephemeral0" #block_map["/dev/sdv"] = device #block_map["/dev/sdv"] = device block_map["/dev/vdb"] = device if opts.user_data_file != None: user_data_file = open(opts.user_data_file) try: opts.user_data = user_data_file.read() #print "user data (encoded) = ", opts.user_data finally: user_data_file.close() # Launch non-spot instances zones = get_zones(conn, opts) num_zones = len(zones) i = 0 slave_nodes = [] for zone in zones: num_slaves_this_zone = get_partition(opts.slaves, num_zones, i) if num_slaves_this_zone > 0: slave_res = image.run(key_name = opts.key_pair, security_groups = [slave_group], instance_type = opts.instance_type, placement = zone, min_count = num_slaves_this_zone, max_count = num_slaves_this_zone, block_device_map = block_map, user_data = opts.user_data) slave_nodes += slave_res.instances print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone, zone, slave_res.id) i += 1 # Launch or resume masters if existing_masters: print "Starting master..." for inst in existing_masters: if inst.state not in ["shutting-down", "terminated"]: inst.start() master_nodes = existing_masters else: master_type = opts.master_instance_type if master_type == "": master_type = opts.instance_type if opts.zone == 'all': opts.zone = random.choice(conn.get_all_zones()).name master_res = image_master.run(key_name = opts.key_pair, security_groups = [master_group], instance_type = master_type, placement = opts.zone, min_count = 1, max_count = 1, block_device_map = block_map, user_data = opts.user_data) master_nodes = master_res.instances print "Launched master in %s, regid = %s" % (zone, master_res.id) # Launch ZooKeeper nodes if required if int(opts.ft) > 1: print "Running " + opts.ft + " zookeepers" zoo_res = image_zoo.run(key_name = opts.key_pair, security_groups = [zoo_group], instance_type = opts.instance_type, placement = opts.zone, min_count = 3, max_count = 3, block_device_map = block_map, user_data = opts.user_data) zoo_nodes = zoo_res.instances print "Launched zoo, regid = " + zoo_res.id else: zoo_nodes = [] # Return all the instances return (master_nodes, slave_nodes, zoo_nodes)
def launch_cluster(conn, opts, cluster_name): if opts.identity_file is None: print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections." sys.exit(1) if opts.key_pair is None: print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances." sys.exit(1) print "Setting up security groups..." master_group = get_or_make_group(conn, cluster_name + "-master") master_group.owner_id = os.getenv('EC2_USER_ID') slave_group = get_or_make_group(conn, cluster_name + "-slaves") slave_group.owner_id = os.getenv('EC2_USER_ID') if master_group.rules == []: # Group was just now created master_group.authorize(src_group=master_group) master_group.authorize(src_group=slave_group) master_group.authorize('tcp', 22, 22, '0.0.0.0/0') master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0') master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0') master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0') master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0') master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0') if opts.ganglia: master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0') if slave_group.rules == []: # Group was just now created slave_group.authorize(src_group=master_group) slave_group.authorize(src_group=slave_group) slave_group.authorize('tcp', 22, 22, '0.0.0.0/0') slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0') slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0') slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0') slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0') # Check if instances are already running in our groups existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name, die_on_error=False) if existing_slaves or (existing_masters and not opts.use_existing_master): print >> stderr, ("ERROR: There are already instances running in " + "group %s or %s" % (master_group.name, slave_group.name)) sys.exit(1) # Figure out Spark AMI if opts.ami is None: opts.ami = get_spark_ami(opts) print "Launching instances..." try: image = conn.get_all_images(image_ids=[opts.ami])[0] except: print >> stderr, "Could not find AMI " + opts.ami sys.exit(1) # Create block device mapping so that we can add an EBS volume if asked to logging.debug( "Calling boto BlockDeviceMapping()...") block_map = BlockDeviceMapping() logging.debug(" Printing block_map..") #print block_map if opts.ebs_vol_size > 0: logging.debug("Calling boto EBSBlockDeviceType()...") device = EBSBlockDeviceType() #print "device: ", device device.size = opts.ebs_vol_size device.delete_on_termination = True device.ephemeral_name = "ephemeral0" #block_map["/dev/sdv"] = device #block_map["/dev/sdv"] = device block_map["/dev/vdb"] = device if opts.user_data_file != None: user_data_file = open(opts.user_data_file) try: opts.user_data = user_data_file.read() #print "user data (encoded) = ", opts.user_data finally: user_data_file.close() # Launch slaves if opts.spot_price != None: # Launch spot instances with the requested price print ("Requesting %d slaves as spot instances with price $%.3f" % (opts.slaves, opts.spot_price)) zones = get_zones(conn, opts) num_zones = len(zones) i = 0 my_req_ids = [] for zone in zones: num_slaves_this_zone = get_partition(opts.slaves, num_zones, i) slave_reqs = conn.request_spot_instances( price = opts.spot_price, image_id = opts.ami, launch_group = "launch-group-%s" % cluster_name, placement = zone, count = num_slaves_this_zone, key_name = opts.key_pair, security_groups = [slave_group], instance_type = opts.instance_type, block_device_map = block_map) my_req_ids += [req.id for req in slave_reqs] i += 1 print "Waiting for spot instances to be granted..." try: while True: time.sleep(10) reqs = conn.get_all_spot_instance_requests() id_to_req = {} for r in reqs: id_to_req[r.id] = r active_instance_ids = [] for i in my_req_ids: if i in id_to_req and id_to_req[i].state == "active": active_instance_ids.append(id_to_req[i].instance_id) if len(active_instance_ids) == opts.slaves: print "All %d slaves granted" % opts.slaves reservations = conn.get_all_instances(active_instance_ids) slave_nodes = [] for r in reservations: slave_nodes += r.instances break else: print "%d of %d slaves granted, waiting longer" % ( len(active_instance_ids), opts.slaves) except: print "Canceling spot instance requests" conn.cancel_spot_instance_requests(my_req_ids) # Log a warning if any of these requests actually launched instances: (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name, die_on_error=False) running = len(master_nodes) + len(slave_nodes) if running: print >> stderr, ("WARNING: %d instances are still running" % running) sys.exit(0) else: # Launch non-spot instances zones = get_zones(conn, opts) num_zones = len(zones) i = 0 slave_nodes = [] for zone in zones: num_slaves_this_zone = get_partition(opts.slaves, num_zones, i) if num_slaves_this_zone > 0: slave_res = image.run(key_name = opts.key_pair, security_groups = [slave_group], instance_type = opts.instance_type, placement = zone, min_count = num_slaves_this_zone, max_count = num_slaves_this_zone, block_device_map = block_map, user_data = opts.user_data) slave_nodes += slave_res.instances print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone, zone, slave_res.id) i += 1 # Launch or resume masters if existing_masters: print "Starting master..." for inst in existing_masters: if inst.state not in ["shutting-down", "terminated"]: inst.start() master_nodes = existing_masters else: master_type = opts.master_instance_type if master_type == "": master_type = opts.instance_type if opts.zone == 'all': opts.zone = random.choice(conn.get_all_zones()).name master_res = image.run(key_name = opts.key_pair, security_groups = [master_group], instance_type = master_type, placement = opts.zone, min_count = 1, max_count = 1, block_device_map = block_map, user_data = opts.user_data) master_nodes = master_res.instances print "Launched master in %s, regid = %s" % (zone, master_res.id) # Return all the instances return (master_nodes, slave_nodes)
def launch_cluster(conn, opts, cluster_name): if opts.identity_file is None: print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections." sys.exit(1) if opts.key_pair is None: print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances." sys.exit(1) print "Setting up security groups..." master_group = get_or_make_group(conn, cluster_name + "-master") master_group.owner_id = os.getenv('EC2_USER_ID') slave_group = get_or_make_group(conn, cluster_name + "-slaves") slave_group.owner_id = os.getenv('EC2_USER_ID') zoo_group = get_or_make_group(conn, cluster_name + "-zoo") zoo_group.owner_id = os.getenv('EC2_USER_ID') if master_group.rules == []: # Group was just now created master_group.authorize(src_group=master_group) master_group.authorize(src_group=slave_group) master_group.authorize(src_group=zoo_group) master_group.authorize('tcp', 22, 22, '0.0.0.0/0') master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') master_group.authorize('tcp', 5050, 5051, '0.0.0.0/0') master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0') master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0') master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0') master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0') master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0') master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0') if opts.ganglia: master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0') if slave_group.rules == []: # Group was just now created slave_group.authorize(src_group=master_group) slave_group.authorize(src_group=slave_group) slave_group.authorize(src_group=zoo_group) slave_group.authorize('tcp', 22, 22, '0.0.0.0/0') slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') slave_group.authorize('tcp', 5050, 5051, '0.0.0.0/0') slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0') slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0') slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0') slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0') if zoo_group.rules == []: # Group was just now created zoo_group.authorize(src_group=master_group) zoo_group.authorize(src_group=slave_group) zoo_group.authorize(src_group=zoo_group) zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0') zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0') zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0') zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0') # Check if instances are already running in our groups existing_masters, existing_slaves, existing_zoos = get_existing_cluster( conn, opts, cluster_name, die_on_error=False) if existing_slaves or (existing_masters and not opts.use_existing_master): print >> stderr, ( "ERROR: There are already instances running in " + "group %s or %s" % (master_group.name, slave_group.name, zoo_group.name)) sys.exit(1) # Figure out Spark AMI if opts.ami is None: opts.ami = get_ami(opts) print "Launching instances..." try: image = conn.get_all_images(image_ids=[opts.ami])[0] except: print >> stderr, "Could not find AMI " + opts.ami sys.exit(1) # Create block device mapping so that we can add an EBS volume if asked to logging.debug("Calling boto BlockDeviceMapping()...") block_map = BlockDeviceMapping() logging.debug(" Printing block_map..") #print block_map if opts.ebs_vol_size > 0: logging.debug("Calling boto EBSBlockDeviceType()...") device = EBSBlockDeviceType() #print "device: ", device device.size = opts.ebs_vol_size device.delete_on_termination = True device.ephemeral_name = "ephemeral0" #block_map["/dev/sdv"] = device #block_map["/dev/sdv"] = device block_map["/dev/vdb"] = device if opts.user_data_file != None: user_data_file = open(opts.user_data_file) try: opts.user_data = user_data_file.read() #print "user data (encoded) = ", opts.user_data finally: user_data_file.close() # Launch non-spot instances zones = get_zones(conn, opts) num_zones = len(zones) i = 0 slave_nodes = [] for zone in zones: num_slaves_this_zone = get_partition(opts.slaves, num_zones, i) if num_slaves_this_zone > 0: slave_res = image.run(key_name=opts.key_pair, security_groups=[slave_group], instance_type=opts.instance_type, placement=zone, min_count=num_slaves_this_zone, max_count=num_slaves_this_zone, block_device_map=block_map, user_data=opts.user_data) slave_nodes += slave_res.instances print "Launched %d slaves in %s, regid = %s" % ( num_slaves_this_zone, zone, slave_res.id) i += 1 # Launch or resume masters if existing_masters: print "Starting master..." for inst in existing_masters: if inst.state not in ["shutting-down", "terminated"]: inst.start() master_nodes = existing_masters else: master_type = opts.master_instance_type if master_type == "": master_type = opts.instance_type if opts.zone == 'all': opts.zone = random.choice(conn.get_all_zones()).name master_res = image.run(key_name=opts.key_pair, security_groups=[master_group], instance_type=master_type, placement=opts.zone, min_count=1, max_count=1, block_device_map=block_map, user_data=opts.user_data) master_nodes = master_res.instances print "Launched master in %s, regid = %s" % (zone, master_res.id) # Launch ZooKeeper nodes if required if int(opts.ft) > 1: print "Running " + opts.ft + " zookeepers" zoo_res = image.run(key_name=opts.key_pair, security_groups=[zoo_group], instance_type=opts.instance_type, placement=opts.zone, min_count=3, max_count=3, block_device_map=block_map, user_data=opts.user_data) zoo_nodes = zoo_res.instances print "Launched zoo, regid = " + zoo_res.id else: zoo_nodes = [] # Return all the instances return (master_nodes, slave_nodes, zoo_nodes)
def launch_cluster(conn, opts, cluster_name): if opts.identity_file is None: print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections." sys.exit(1) if opts.key_pair is None: print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances." sys.exit(1) print "Setting up security groups..." if opts.one_security_group: master_group = get_or_make_group(conn, cluster_name + "-group") master_group.owner_id = os.getenv('EC2_USER_ID') slave_group = master_group zoo_group = master_group else: master_group = get_or_make_group(conn, cluster_name + "-master") master_group.owner_id = os.getenv('EC2_USER_ID') slave_group = get_or_make_group(conn, cluster_name + "-slaves") slave_group.owner_id = os.getenv('EC2_USER_ID') zoo_group = get_or_make_group(conn, cluster_name + "-zoo") zoo_group.owner_id = os.getenv('EC2_USER_ID') if master_group.rules == []: # Group was just now created master_group.authorize(src_group=master_group) master_group.authorize(src_group=slave_group) master_group.authorize(src_group=zoo_group) master_group.authorize('tcp', 22, 22, '0.0.0.0/0') master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') master_group.authorize('tcp', 5050, 5051, '0.0.0.0/0') master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0') master_group.authorize('tcp', 50030, 50031, '0.0.0.0/0') master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0') master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0') master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0') master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0') master_group.authorize('tcp', 40000, 40000, '0.0.0.0/0') #apache hama master_group.authorize('tcp', 40013, 40013, '0.0.0.0/0') #apache hama master_group.authorize('tcp', 8020, 8020, '0.0.0.0/0') #hdfs HA nameservice master_group.authorize('tcp', 8485, 8485, '0.0.0.0/0') #journal nodes master_group.authorize('tcp', 8023, 8023, '0.0.0.0/0') #jt HA master_group.authorize('tcp', 8021, 8021, '0.0.0.0/0') #jt HA master_group.authorize('tcp', 8018, 8019, '0.0.0.0/0') #zkfc master_group.authorize('tcp', 2812, 2812, '0.0.0.0/0') #monit web ui #If cohosted with zookeeper open necessary ports if opts.cohost: print "Opening additional ports for zookeeper... " master_group.authorize('tcp', 2181, 2181, '0.0.0.0/0') master_group.authorize('tcp', 2888, 2888, '0.0.0.0/0') master_group.authorize('tcp', 3888, 3888, '0.0.0.0/0') if opts.ganglia: master_group.authorize('tcp', 80, 80, '0.0.0.0/0') #Also needed 8649 and 8651 but check if only for master if slave_group.rules == []: # Group was just now created slave_group.authorize(src_group=master_group) slave_group.authorize(src_group=slave_group) slave_group.authorize(src_group=zoo_group) slave_group.authorize('tcp', 22, 22, '0.0.0.0/0') slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') slave_group.authorize('tcp', 5050, 5051, '0.0.0.0/0') slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0') slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0') slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0') slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0') slave_group.authorize('tcp', 40015, 40015, '0.0.0.0/0') ##apache hama web UI slave_group.authorize('tcp', 2812, 2812, '0.0.0.0/0') #monit web ui slave_group.authorize('tcp', 31000, 32000, '0.0.0.0/0') #task tracker web ui if zoo_group.rules == []: # Group was just now created zoo_group.authorize(src_group=master_group) zoo_group.authorize(src_group=slave_group) zoo_group.authorize(src_group=zoo_group) zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0') zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0') zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0') zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0') zoo_group.authorize('tcp', 8018, 8020, '0.0.0.0/0') #hdfs HA nameservic zoo_group.authorize('tcp', 8485, 8485, '0.0.0.0/0') #journal nodes zoo_group.authorize('tcp', 8023, 8023, '0.0.0.0/0') #jt HA zoo_group.authorize('tcp', 2812, 2812, '0.0.0.0/0') #monit web ui # Check if instances are already running in our groups # Grouped instances are instances that run on the same security group in order to allow communication # using private IPs and without DNS resolving existing_masters, existing_slaves, existing_zoos, existing_grouped = get_existing_cluster(conn, opts, cluster_name, die_on_error=False) if existing_slaves or (existing_masters and not opts.use_existing_master) or existing_grouped: print >> stderr, ("ERROR: There are already instances running in " + "group %s or %s or %s" % (master_group.name, slave_group.name, zoo_group.name)) sys.exit(1) print "Launching instances..." try: image = conn.get_all_images(image_ids=[opts.emi])[0] except: print >> stderr, "Could not find emi " + opts.emi sys.exit(1) try: image_master = conn.get_all_images(image_ids=[opts.emi_master])[0] except: print >> stderr, "Could not find emi " + opts.emi_master sys.exit(1) # Launch additional ZooKeeper nodes if required - ex: if mesos masters specified are 2 and the zoo_num=3 (default) if int(opts.ft) > 1: if(opts.cohost): zoo_num = str(int(opts.zoo_num) - int(opts.ft)) #extra zoo instances needed else: zoo_num = opts.zoo_num else: zoo_num = opts.zoo_num if (zoo_num > 0): if opts.emi_zoo == "": emi_zoo = opts.emi_master else: emi_zoo = opts.emi_zoo try: image_zoo = conn.get_all_images(image_ids=[emi_zoo])[0] except: print >> stderr, "Could not find emi " + emi_zoo sys.exit(1) # Create block device mapping so that we can add an EBS volume if asked to logging.debug( "Calling boto BlockDeviceMapping()...") block_map = BlockDeviceMapping() logging.debug(" Printing block_map..") #print block_map if opts.ebs_vol_size > 0: logging.debug("Calling boto EBSBlockDeviceType()...") device = EBSBlockDeviceType() #print "device: ", device device.size = opts.ebs_vol_size device.delete_on_termination = True device.ephemeral_name = "ephemeral0" #block_map["/dev/sdv"] = device #block_map["/dev/sdv"] = device block_map["/dev/vdb"] = device if opts.user_data_file != None: user_data_file = open(opts.user_data_file) try: opts.user_data = user_data_file.read() #print "user data (encoded) = ", opts.user_data finally: user_data_file.close() # Launch non-spot instances zones = get_zones(conn, opts) num_zones = len(zones) i = 0 slave_nodes = [] for zone in zones: num_slaves_this_zone = get_partition(opts.slaves, num_zones, i) if num_slaves_this_zone > 0: slave_res = image.run(key_name = opts.key_pair, security_groups = [slave_group], instance_type = opts.instance_type, placement = zone, min_count = num_slaves_this_zone, max_count = num_slaves_this_zone, block_device_map = block_map, user_data = opts.user_data) slave_nodes += slave_res.instances print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone, zone, slave_res.id) i += 1 # Launch or resume masters if existing_masters: print "Starting master..." for inst in existing_masters: if inst.state not in ["shutting-down", "terminated"]: inst.start() master_nodes = existing_masters else: master_type = opts.master_instance_type if master_type == "": master_type = opts.instance_type if opts.zone == 'all': opts.zone = random.choice(conn.get_all_zones()).name print "Running " + opts.ft + " masters" master_res = image_master.run(key_name = opts.key_pair, security_groups = [master_group], instance_type = master_type, placement = opts.zone, min_count = opts.ft, max_count = opts.ft, block_device_map = block_map, user_data = opts.user_data) master_nodes = master_res.instances print "Launched master in %s, regid = %s" % (zone, master_res.id) if(zoo_num > 0): print "Running additional " + zoo_num + " zookeepers" zoo_res = image_zoo.run(key_name = opts.key_pair, security_groups = [zoo_group], instance_type = opts.instance_type, placement = opts.zone, min_count = zoo_num, max_count = zoo_num, block_device_map = block_map, user_data = opts.user_data) zoo_nodes = zoo_res.instances print "Launched zoo, regid = " + zoo_res.id else: zoo_nodes = [] if (opts.cohost): print "Zookeepers are co-hosted on mesos instances..." # Return all the instances return (master_nodes, slave_nodes, zoo_nodes)