Ejemplo n.º 1
0
    def register_ebs_ami(self, snapshot_id, arch = 'x86_64', default_ephem_map = True,
                         img_name = None, img_desc = None):
        # register against snapshot
        try:
            aki=PVGRUB_AKIS[self.region.name][arch]
        except KeyError:
            raise Exception("Unable to determine pvgrub hd00 AKI for region (%s) arch (%s)" % (self.region.name, arch))

        if not img_name:
            rand_id = random.randrange(2**32)
            # These names need to be unique, hence the pseudo-uuid
            img_name='EBSHelper AMI - %s - uuid-%x' % (snapshot_id, rand_id)
        if not img_desc:
            img_desc='Created directly from volume snapshot %s' % (snapshot_id)

        self.log.debug("Registering snapshot (%s) as new EBS AMI" % (snapshot_id))
        ebs = EBSBlockDeviceType()
        ebs.snapshot_id = snapshot_id
        ebs.delete_on_termination = True
        block_map = BlockDeviceMapping()
        block_map['/dev/sda'] = ebs
        # The ephemeral mappings are automatic with S3 images
        # For EBS images we need to make them explicit
        # These settings are required to make the same fstab work on both S3 and EBS images
        if default_ephem_map:
            e0 = EBSBlockDeviceType()
            e0.ephemeral_name = 'ephemeral0'
            e1 = EBSBlockDeviceType()
            e1.ephemeral_name = 'ephemeral1'
            block_map['/dev/sdb'] = e0
            block_map['/dev/sdc'] = e1
        result = self.conn.register_image(name=img_name, description=img_desc,
                           architecture=arch,  kernel_id=aki,
                           root_device_name='/dev/sda', block_device_map=block_map)
        return str(result)
Ejemplo n.º 2
0
    def _register_image(self, snapshot_id):
        conn = self.platform.new_ec2_conn()

        instance_id = self.platform.get_instance_id()
        instance = conn.get_all_instances([instance_id])[0].instances[0]

        block_device_map = BlockDeviceMapping(conn)

        root_vol = EBSBlockDeviceType(snapshot_id=snapshot_id)
        root_vol.delete_on_termination = True
        # Adding ephemeral devices
        for eph, device in EPH_STORAGE_MAPPING[linux.os['arch']].items():
            bdt = EBSBlockDeviceType(conn)
            bdt.ephemeral_name = eph
            block_device_map[device] = bdt

        root_partition = instance.root_device_name[:-1]
        if root_partition in self.platform.get_block_device_mapping().values():
            block_device_map[root_partition] = root_vol
        else:
            block_device_map[instance.root_device_name] = root_vol

        return conn.register_image(
            name=self.image_name,
            root_device_name=instance.root_device_name,
            block_device_map=block_device_map,
            kernel_id=instance.kernel,
            virtualization_type=instance.virtualization_type,
            ramdisk_id=self.platform.get_ramdisk_id(),
            architecture=instance.architecture)
Ejemplo n.º 3
0
    def _register_image(self, snapshot_id):
        conn = self.platform.new_ec2_conn()
    
        instance_id = self.platform.get_instance_id()
        instance = conn.get_all_instances([instance_id])[0].instances[0]

        block_device_map = BlockDeviceMapping(conn)

        root_vol = EBSBlockDeviceType(snapshot_id=snapshot_id)
        root_vol.delete_on_termination = True
        # Adding ephemeral devices
        for eph, device in EPH_STORAGE_MAPPING[linux.os['arch']].items():
            bdt = EBSBlockDeviceType(conn)
            bdt.ephemeral_name = eph
            block_device_map[device] = bdt

        root_partition = instance.root_device_name[:-1]
        if root_partition in self.platform.get_block_device_mapping().values():
            block_device_map[root_partition] = root_vol
        else:
            block_device_map[instance.root_device_name] = root_vol

        return conn.register_image(
            name=self.image_name,
            root_device_name=instance.root_device_name,
            block_device_map=block_device_map,
            kernel_id=instance.kernel,
            virtualization_type=instance.virtualization_type,
            ramdisk_id=self.platform.get_ramdisk_id(),
            architecture=instance.architecture)
Ejemplo n.º 4
0
    def register_ebs_ami(self, snapshot_id, arch="x86_64", default_ephem_map=True, img_name=None, img_desc=None):
        # register against snapshot
        try:
            aki = PVGRUB_AKIS[self.region.name][arch]
        except KeyError:
            raise Exception("Unable to find pvgrub hd00 AKI for %s, arch (%s)" % (self.region.name, arch))
        if not img_name:
            rand_id = random.randrange(2 ** 32)
            # These names need to be unique, hence the pseudo-uuid
            img_name = "EBSHelper AMI - %s - uuid-%x" % (snapshot_id, rand_id)
        if not img_desc:
            img_desc = "Created directly from volume snapshot %s" % snapshot_id

        self.log.debug("Registering %s as new EBS AMI" % snapshot_id)
        self.create_sgroup("ec2helper-vnc-ssh-%x" % random.randrange(2 ** 32), allow_vnc=True)
        ebs = EBSBlockDeviceType()
        ebs.snapshot_id = snapshot_id
        ebs.delete_on_termination = True
        block_map = BlockDeviceMapping()
        block_map["/dev/sda"] = ebs
        # The ephemeral mappings are automatic with S3 images
        # For EBS images we need to make them explicit
        # These settings are required to make the same fstab work on both S3
        # and EBS images
        if default_ephem_map:
            e0 = EBSBlockDeviceType()
            e0.ephemeral_name = "ephemeral0"
            e1 = EBSBlockDeviceType()
            e1.ephemeral_name = "ephemeral1"
            block_map["/dev/sdb"] = e0
            block_map["/dev/sdc"] = e1
        result = self.conn.register_image(
            name=img_name,
            description=img_desc,
            architecture=arch,
            kernel_id=aki,
            root_device_name="/dev/sda",
            block_device_map=block_map,
        )
        sleep(10)
        new_amis = self.conn.get_all_images([result])
        new_amis[0].add_tag("Name", resource_tag)

        return str(result)
Ejemplo n.º 5
0
 def parse_block_device_args(self, block_device_maps_args):
     block_device_map = BlockDeviceMapping()
     for block_device_map_arg in block_device_maps_args:
         parts = block_device_map_arg.split('=')
         if len(parts) > 1:
             device_name = parts[0]
             block_dev_type = EBSBlockDeviceType()
             value_parts = parts[1].split(':')
             if value_parts[0].startswith('snap'):
                 block_dev_type.snapshot_id = value_parts[0]
             else:
                 if value_parts[0].startswith('ephemeral'):
                     block_dev_type.ephemeral_name = value_parts[0]
             if len(value_parts) > 1:
                 block_dev_type.size = int(value_parts[1])
             if len(value_parts) > 2:
                 if value_parts[2] == 'true':
                     block_dev_type.delete_on_termination = True
             block_device_map[device_name] = block_dev_type
     return block_device_map
Ejemplo n.º 6
0
def launch_cluster(conn, opts, cluster_name):
  if opts.identity_file is None:
    print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
    sys.exit(1)
  if opts.key_pair is None:
    print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
    sys.exit(1)
  print "Setting up security groups..."
  master_group = get_or_make_group(conn, cluster_name + "-master")
  master_group.owner_id = os.getenv('EC2_USER_ID')
  slave_group = get_or_make_group(conn, cluster_name + "-slaves")
  slave_group.owner_id = os.getenv('EC2_USER_ID')
  zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
  zoo_group.owner_id = os.getenv('EC2_USER_ID')
  
  if master_group.rules == []: # Group was just now created
    master_group.authorize(src_group=master_group)
    master_group.authorize(src_group=slave_group)
    master_group.authorize(src_group=zoo_group)
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    master_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
    master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
    master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
    master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
    master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
    master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
    master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
    master_group.authorize('tcp', 40000, 40000, '0.0.0.0/0') #apache hama
    master_group.authorize('tcp', 40013, 40013, '0.0.0.0/0') #apache hama
    if opts.ganglia:
      master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize(src_group=master_group)
    slave_group.authorize(src_group=slave_group)
    slave_group.authorize(src_group=zoo_group)
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    slave_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
    slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
    slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
    slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
    slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
    slave_group.authorize('tcp', 40015, 40015, '0.0.0.0/0') ##apache hama web UI
  
  if zoo_group.rules == []: # Group was just now created
      zoo_group.authorize(src_group=master_group)
      zoo_group.authorize(src_group=slave_group)
      zoo_group.authorize(src_group=zoo_group)
      zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
      zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
      zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
      zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')   
   


  # Check if instances are already running in our groups
  existing_masters, existing_slaves, existing_zoos = get_existing_cluster(conn, opts, cluster_name,
                                                           die_on_error=False)
  if existing_slaves or (existing_masters and not opts.use_existing_master):
    print >> stderr, ("ERROR: There are already instances running in " +
        "group %s or %s or %s" % (master_group.name, slave_group.name, zoo_group.name))
    sys.exit(1)

  print "Launching instances..."

  try:
    image = conn.get_all_images(image_ids=[opts.emi])[0]
  except:
    print >> stderr, "Could not find emi " + opts.emi
    sys.exit(1)
    
  try:
    image_master = conn.get_all_images(image_ids=[opts.emi_master])[0]
  except:
    print >> stderr, "Could not find emi " + opts.emi_master
    sys.exit(1)
  
  if (opts.emi_zoo != ""):  
      try:
        image_zoo = conn.get_all_images(image_ids=[opts.emi_zoo])[0]
      except:
        print >> stderr, "Could not find emi " + opts.emi_zoo
        sys.exit(1)     
    
    
    

  # Create block device mapping so that we can add an EBS volume if asked to
  logging.debug( "Calling boto BlockDeviceMapping()...")
  block_map = BlockDeviceMapping()
  logging.debug(" Printing block_map..") 
  #print block_map
  if opts.ebs_vol_size > 0:
    logging.debug("Calling boto EBSBlockDeviceType()...")
    device = EBSBlockDeviceType()
    #print "device: ", device
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    device.ephemeral_name = "ephemeral0"
    #block_map["/dev/sdv"] = device
    #block_map["/dev/sdv"] = device
    block_map["/dev/vdb"] = device
    
  if opts.user_data_file != None:
      user_data_file = open(opts.user_data_file)
      try:
          opts.user_data = user_data_file.read()
          #print "user data (encoded) = ", opts.user_data
      finally:
          user_data_file.close()
  
  # Launch non-spot instances
  zones = get_zones(conn, opts)    
  num_zones = len(zones)
  i = 0
  slave_nodes = []
  for zone in zones:
    num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
    if num_slaves_this_zone > 0:
        slave_res = image.run(key_name = opts.key_pair,
                              security_groups = [slave_group],
                              instance_type = opts.instance_type,
                              placement = zone,
                              min_count = num_slaves_this_zone,
                              max_count = num_slaves_this_zone,
                              block_device_map = block_map,
                              user_data = opts.user_data)
        slave_nodes += slave_res.instances
        print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                        zone, slave_res.id)
    i += 1  

  # Launch or resume masters
  if existing_masters:
    print "Starting master..."
    for inst in existing_masters:
      if inst.state not in ["shutting-down", "terminated"]:  
        inst.start()
    master_nodes = existing_masters
  else:
    master_type = opts.master_instance_type
    if master_type == "":
      master_type = opts.instance_type
    if opts.zone == 'all':
      opts.zone = random.choice(conn.get_all_zones()).name
    master_res = image_master.run(key_name = opts.key_pair,
                           security_groups = [master_group],
                           instance_type = master_type,
                           placement = opts.zone,
                           min_count = 1,
                           max_count = 1,
                           block_device_map = block_map,
                           user_data = opts.user_data)
    master_nodes = master_res.instances
    print "Launched master in %s, regid = %s" % (zone, master_res.id)
    
  # Launch ZooKeeper nodes if required
  if int(opts.ft) > 1:
    print "Running " + opts.ft + " zookeepers"
    zoo_res = image_zoo.run(key_name = opts.key_pair,
                        security_groups = [zoo_group],
                        instance_type = opts.instance_type,
                        placement = opts.zone,
                        min_count = 3,
                        max_count = 3,
                        block_device_map = block_map,
                        user_data = opts.user_data)
    zoo_nodes = zoo_res.instances
    print "Launched zoo, regid = " + zoo_res.id
  else:
    zoo_nodes = []

  # Return all the instances
  return (master_nodes, slave_nodes, zoo_nodes)
Ejemplo n.º 7
0
def launch_cluster(conn, opts, cluster_name):
  if opts.identity_file is None:
    print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
    sys.exit(1)
  if opts.key_pair is None:
    print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
    sys.exit(1)
  print "Setting up security groups..."
  master_group = get_or_make_group(conn, cluster_name + "-master")
  master_group.owner_id = os.getenv('EC2_USER_ID')
  slave_group = get_or_make_group(conn, cluster_name + "-slaves")
  slave_group.owner_id = os.getenv('EC2_USER_ID')
  if master_group.rules == []: # Group was just now created
    master_group.authorize(src_group=master_group)
    master_group.authorize(src_group=slave_group)
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
    master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
    master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
    master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
    master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
    if opts.ganglia:
      master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize(src_group=master_group)
    slave_group.authorize(src_group=slave_group)
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
    slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
    slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
    slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

  # Check if instances are already running in our groups
  existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                           die_on_error=False)
  if existing_slaves or (existing_masters and not opts.use_existing_master):
    print >> stderr, ("ERROR: There are already instances running in " +
        "group %s or %s" % (master_group.name, slave_group.name))
    sys.exit(1)

  # Figure out Spark AMI
  if opts.ami is None:
    opts.ami = get_spark_ami(opts)
  print "Launching instances..."

  try:
    image = conn.get_all_images(image_ids=[opts.ami])[0]
  except:
    print >> stderr, "Could not find AMI " + opts.ami
    sys.exit(1)

  # Create block device mapping so that we can add an EBS volume if asked to
  logging.debug( "Calling boto BlockDeviceMapping()...")
  block_map = BlockDeviceMapping()
  logging.debug(" Printing block_map..") 
  #print block_map
  if opts.ebs_vol_size > 0:
    logging.debug("Calling boto EBSBlockDeviceType()...")
    device = EBSBlockDeviceType()
    #print "device: ", device
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    device.ephemeral_name = "ephemeral0"
    #block_map["/dev/sdv"] = device
    #block_map["/dev/sdv"] = device
    block_map["/dev/vdb"] = device
    
  if opts.user_data_file != None:
      user_data_file = open(opts.user_data_file)
      try:
          opts.user_data = user_data_file.read()
          #print "user data (encoded) = ", opts.user_data
      finally:
          user_data_file.close()

  # Launch slaves
  if opts.spot_price != None:
    # Launch spot instances with the requested price
    print ("Requesting %d slaves as spot instances with price $%.3f" %
           (opts.slaves, opts.spot_price))
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    my_req_ids = []
    for zone in zones:
      num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
      slave_reqs = conn.request_spot_instances(
          price = opts.spot_price,
          image_id = opts.ami,
          launch_group = "launch-group-%s" % cluster_name,
          placement = zone,
          count = num_slaves_this_zone,
          key_name = opts.key_pair,
          security_groups = [slave_group],
          instance_type = opts.instance_type,
          block_device_map = block_map)
      my_req_ids += [req.id for req in slave_reqs]
      i += 1

    print "Waiting for spot instances to be granted..."
    try:
      while True:
        time.sleep(10)
        reqs = conn.get_all_spot_instance_requests()
        id_to_req = {}
        for r in reqs:
          id_to_req[r.id] = r
        active_instance_ids = []
        for i in my_req_ids:
          if i in id_to_req and id_to_req[i].state == "active":
            active_instance_ids.append(id_to_req[i].instance_id)
        if len(active_instance_ids) == opts.slaves:
          print "All %d slaves granted" % opts.slaves
          reservations = conn.get_all_instances(active_instance_ids)
          slave_nodes = []
          for r in reservations:
            slave_nodes += r.instances
          break
        else:
          print "%d of %d slaves granted, waiting longer" % (
            len(active_instance_ids), opts.slaves)
    except:
      print "Canceling spot instance requests"
      conn.cancel_spot_instance_requests(my_req_ids)
      # Log a warning if any of these requests actually launched instances:
      (master_nodes, slave_nodes) = get_existing_cluster(
          conn, opts, cluster_name, die_on_error=False)
      running = len(master_nodes) + len(slave_nodes)
      if running:
        print >> stderr, ("WARNING: %d instances are still running" % running)
      sys.exit(0)
  else:
    # Launch non-spot instances
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    slave_nodes = []
    for zone in zones:
      num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
      if num_slaves_this_zone > 0:
                  
        slave_res = image.run(key_name = opts.key_pair,
                              security_groups = [slave_group],
                              instance_type = opts.instance_type,
                              placement = zone,
                              min_count = num_slaves_this_zone,
                              max_count = num_slaves_this_zone,
                              block_device_map = block_map,
                              user_data = opts.user_data)
        slave_nodes += slave_res.instances
        print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                        zone, slave_res.id)
      i += 1

  # Launch or resume masters
  if existing_masters:
    print "Starting master..."
    for inst in existing_masters:
      if inst.state not in ["shutting-down", "terminated"]:
        inst.start()
    master_nodes = existing_masters
  else:
    master_type = opts.master_instance_type
    if master_type == "":
      master_type = opts.instance_type
    if opts.zone == 'all':
      opts.zone = random.choice(conn.get_all_zones()).name
    master_res = image.run(key_name = opts.key_pair,
                           security_groups = [master_group],
                           instance_type = master_type,
                           placement = opts.zone,
                           min_count = 1,
                           max_count = 1,
                           block_device_map = block_map,
                           user_data = opts.user_data)
    master_nodes = master_res.instances
    print "Launched master in %s, regid = %s" % (zone, master_res.id)

  # Return all the instances
  return (master_nodes, slave_nodes)
Ejemplo n.º 8
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
        sys.exit(1)
    if opts.key_pair is None:
        print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
        sys.exit(1)
    print "Setting up security groups..."
    master_group = get_or_make_group(conn, cluster_name + "-master")
    master_group.owner_id = os.getenv('EC2_USER_ID')
    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    slave_group.owner_id = os.getenv('EC2_USER_ID')
    zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
    zoo_group.owner_id = os.getenv('EC2_USER_ID')

    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize(src_group=zoo_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        master_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
        master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
        master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
        master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize(src_group=zoo_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        slave_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

    if zoo_group.rules == []:  # Group was just now created
        zoo_group.authorize(src_group=master_group)
        zoo_group.authorize(src_group=slave_group)
        zoo_group.authorize(src_group=zoo_group)
        zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
        zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

    # Check if instances are already running in our groups
    existing_masters, existing_slaves, existing_zoos = get_existing_cluster(
        conn, opts, cluster_name, die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, (
            "ERROR: There are already instances running in " +
            "group %s or %s" %
            (master_group.name, slave_group.name, zoo_group.name))
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_ami(opts)
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    logging.debug("Calling boto BlockDeviceMapping()...")
    block_map = BlockDeviceMapping()
    logging.debug(" Printing block_map..")
    #print block_map
    if opts.ebs_vol_size > 0:
        logging.debug("Calling boto EBSBlockDeviceType()...")
        device = EBSBlockDeviceType()
        #print "device: ", device
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        device.ephemeral_name = "ephemeral0"
        #block_map["/dev/sdv"] = device
        #block_map["/dev/sdv"] = device
        block_map["/dev/vdb"] = device

    if opts.user_data_file != None:
        user_data_file = open(opts.user_data_file)
        try:
            opts.user_data = user_data_file.read()
            #print "user data (encoded) = ", opts.user_data
        finally:
            user_data_file.close()

    # Launch non-spot instances
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    slave_nodes = []
    for zone in zones:
        num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
        if num_slaves_this_zone > 0:
            slave_res = image.run(key_name=opts.key_pair,
                                  security_groups=[slave_group],
                                  instance_type=opts.instance_type,
                                  placement=zone,
                                  min_count=num_slaves_this_zone,
                                  max_count=num_slaves_this_zone,
                                  block_device_map=block_map,
                                  user_data=opts.user_data)
            slave_nodes += slave_res.instances
            print "Launched %d slaves in %s, regid = %s" % (
                num_slaves_this_zone, zone, slave_res.id)
        i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(key_name=opts.key_pair,
                               security_groups=[master_group],
                               instance_type=master_type,
                               placement=opts.zone,
                               min_count=1,
                               max_count=1,
                               block_device_map=block_map,
                               user_data=opts.user_data)
        master_nodes = master_res.instances
        print "Launched master in %s, regid = %s" % (zone, master_res.id)

    # Launch ZooKeeper nodes if required
    if int(opts.ft) > 1:
        print "Running " + opts.ft + " zookeepers"
        zoo_res = image.run(key_name=opts.key_pair,
                            security_groups=[zoo_group],
                            instance_type=opts.instance_type,
                            placement=opts.zone,
                            min_count=3,
                            max_count=3,
                            block_device_map=block_map,
                            user_data=opts.user_data)
        zoo_nodes = zoo_res.instances
        print "Launched zoo, regid = " + zoo_res.id
    else:
        zoo_nodes = []

    # Return all the instances
    return (master_nodes, slave_nodes, zoo_nodes)
Ejemplo n.º 9
0
def launch_cluster(conn, opts, cluster_name):
  if opts.identity_file is None:
    print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
    sys.exit(1)
  if opts.key_pair is None:
    print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
    sys.exit(1)
  print "Setting up security groups..."
  
  if opts.one_security_group:
    master_group = get_or_make_group(conn, cluster_name + "-group")
    master_group.owner_id = os.getenv('EC2_USER_ID')
    slave_group = master_group
    zoo_group = master_group
  
  else:
      master_group = get_or_make_group(conn, cluster_name + "-master")
      master_group.owner_id = os.getenv('EC2_USER_ID')
      slave_group = get_or_make_group(conn, cluster_name + "-slaves")
      slave_group.owner_id = os.getenv('EC2_USER_ID')
      zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
      zoo_group.owner_id = os.getenv('EC2_USER_ID')
      
  if master_group.rules == []: # Group was just now created
    master_group.authorize(src_group=master_group)
    master_group.authorize(src_group=slave_group)
    master_group.authorize(src_group=zoo_group)
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    master_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
    master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
    master_group.authorize('tcp', 50030, 50031, '0.0.0.0/0')
    master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
    master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
    master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
    master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
    master_group.authorize('tcp', 40000, 40000, '0.0.0.0/0') #apache hama
    master_group.authorize('tcp', 40013, 40013, '0.0.0.0/0') #apache hama
    master_group.authorize('tcp', 8020, 8020, '0.0.0.0/0') #hdfs HA nameservice
    master_group.authorize('tcp', 8485, 8485, '0.0.0.0/0') #journal nodes
    master_group.authorize('tcp', 8023, 8023, '0.0.0.0/0') #jt HA   
    master_group.authorize('tcp', 8021, 8021, '0.0.0.0/0') #jt HA
    master_group.authorize('tcp', 8018, 8019, '0.0.0.0/0') #zkfc
    master_group.authorize('tcp', 2812, 2812, '0.0.0.0/0') #monit web ui    
    
    #If cohosted with zookeeper open necessary ports
    if opts.cohost:
        print "Opening additional ports for zookeeper... "
        master_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
        master_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
        master_group.authorize('tcp', 3888, 3888, '0.0.0.0/0') 
        
    if opts.ganglia:
      master_group.authorize('tcp', 80, 80, '0.0.0.0/0')
      #Also needed 8649 and 8651 but check if only for master
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize(src_group=master_group)
    slave_group.authorize(src_group=slave_group)
    slave_group.authorize(src_group=zoo_group)
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    slave_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
    slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
    slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
    slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
    slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
    slave_group.authorize('tcp', 40015, 40015, '0.0.0.0/0') ##apache hama web UI
    slave_group.authorize('tcp', 2812, 2812, '0.0.0.0/0') #monit web ui
    slave_group.authorize('tcp', 31000, 32000, '0.0.0.0/0') #task tracker web ui    
  
  if zoo_group.rules == []: # Group was just now created
      zoo_group.authorize(src_group=master_group)
      zoo_group.authorize(src_group=slave_group)
      zoo_group.authorize(src_group=zoo_group)
      zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
      zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
      zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
      zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')
      zoo_group.authorize('tcp', 8018, 8020, '0.0.0.0/0') #hdfs HA nameservic
      zoo_group.authorize('tcp', 8485, 8485, '0.0.0.0/0') #journal nodes
      zoo_group.authorize('tcp', 8023, 8023, '0.0.0.0/0') #jt HA
      zoo_group.authorize('tcp', 2812, 2812, '0.0.0.0/0') #monit web ui        
   


  # Check if instances are already running in our groups
  # Grouped instances are instances that run on the same security group in order to allow communication
  # using private IPs and without DNS resolving
  existing_masters, existing_slaves, existing_zoos, existing_grouped = get_existing_cluster(conn, opts, cluster_name, die_on_error=False)
  if existing_slaves or (existing_masters and not opts.use_existing_master) or existing_grouped:
    print >> stderr, ("ERROR: There are already instances running in " +
        "group %s or %s or %s" % (master_group.name, slave_group.name, zoo_group.name))
    sys.exit(1)

  print "Launching instances..."

  try:
    image = conn.get_all_images(image_ids=[opts.emi])[0]
  except:
    print >> stderr, "Could not find emi " + opts.emi
    sys.exit(1)
    
  try:
    image_master = conn.get_all_images(image_ids=[opts.emi_master])[0]
  except:
    print >> stderr, "Could not find emi " + opts.emi_master
    sys.exit(1)
  
  # Launch additional ZooKeeper nodes if required - ex: if mesos masters specified are 2 and the zoo_num=3 (default)
  if int(opts.ft) > 1:
    if(opts.cohost):
        zoo_num = str(int(opts.zoo_num) - int(opts.ft)) #extra zoo instances needed
    else:
        zoo_num = opts.zoo_num
  else:
      zoo_num = opts.zoo_num
      
  if (zoo_num > 0):
      if opts.emi_zoo == "":
          emi_zoo = opts.emi_master 
      else:
          emi_zoo = opts.emi_zoo
              
      try:
        image_zoo = conn.get_all_images(image_ids=[emi_zoo])[0]
      except:
        print >> stderr, "Could not find emi " + emi_zoo
        sys.exit(1)
       

  # Create block device mapping so that we can add an EBS volume if asked to
  logging.debug( "Calling boto BlockDeviceMapping()...")
  block_map = BlockDeviceMapping()
  logging.debug(" Printing block_map..") 
  #print block_map
  if opts.ebs_vol_size > 0:
    logging.debug("Calling boto EBSBlockDeviceType()...")
    device = EBSBlockDeviceType()
    #print "device: ", device
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    device.ephemeral_name = "ephemeral0"
    #block_map["/dev/sdv"] = device
    #block_map["/dev/sdv"] = device
    block_map["/dev/vdb"] = device
    
  if opts.user_data_file != None:
      user_data_file = open(opts.user_data_file)
      try:
          opts.user_data = user_data_file.read()
          #print "user data (encoded) = ", opts.user_data
      finally:
          user_data_file.close()
  
  # Launch non-spot instances
  zones = get_zones(conn, opts)    
  num_zones = len(zones)
  i = 0
  slave_nodes = []
  for zone in zones:
    num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
    if num_slaves_this_zone > 0:
        slave_res = image.run(key_name = opts.key_pair,
                              security_groups = [slave_group],
                              instance_type = opts.instance_type,
                              placement = zone,
                              min_count = num_slaves_this_zone,
                              max_count = num_slaves_this_zone,
                              block_device_map = block_map,
                              user_data = opts.user_data)
        slave_nodes += slave_res.instances
        print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                        zone, slave_res.id)
    i += 1  

  # Launch or resume masters
  if existing_masters:
    print "Starting master..."
    for inst in existing_masters:
      if inst.state not in ["shutting-down", "terminated"]:  
        inst.start()
    master_nodes = existing_masters
  else:
    master_type = opts.master_instance_type
    if master_type == "":
      master_type = opts.instance_type
    if opts.zone == 'all':
      opts.zone = random.choice(conn.get_all_zones()).name
    
    print "Running " + opts.ft + " masters"
    master_res = image_master.run(key_name = opts.key_pair,
                           security_groups = [master_group],
                           instance_type = master_type,
                           placement = opts.zone,
                           min_count = opts.ft,
                           max_count = opts.ft,
                           block_device_map = block_map,
                           user_data = opts.user_data)
    master_nodes = master_res.instances
    print "Launched master in %s, regid = %s" % (zone, master_res.id)

  if(zoo_num > 0):
    
    print "Running additional " + zoo_num + " zookeepers"
    zoo_res = image_zoo.run(key_name = opts.key_pair,
                        security_groups = [zoo_group],
                        instance_type = opts.instance_type,
                        placement = opts.zone,
                        min_count = zoo_num,
                        max_count = zoo_num,
                        block_device_map = block_map,
                        user_data = opts.user_data)
    zoo_nodes = zoo_res.instances
    print "Launched zoo, regid = " + zoo_res.id
  else:
    zoo_nodes = []
    
  if (opts.cohost):
      print "Zookeepers are co-hosted on mesos instances..."

  # Return all the instances
  return (master_nodes, slave_nodes, zoo_nodes)