Ejemplo n.º 1
0
    def test_run_instances_block_device_mapping(self):
        # Same as the test in ``unit/ec2/autoscale/test_group.py:TestLaunchConfiguration``,
        # but with modified request parameters (due to a mismatch between EC2 &
        # Autoscaling).
        self.set_http_response(status_code=200)
        dev_sdf = BlockDeviceType(snapshot_id='snap-12345')
        dev_sdg = BlockDeviceType(snapshot_id='snap-12346', delete_on_termination=True, encrypted=True)

        bdm = BlockDeviceMapping()
        bdm.update(OrderedDict((('/dev/sdf', dev_sdf), ('/dev/sdg', dev_sdg))))

        response = self.service_connection.run_instances(
            image_id='123456',
            instance_type='m1.large',
            security_groups=['group1', 'group2'],
            block_device_map=bdm
        )

        self.assert_request_parameters({
            'Action': 'RunInstances',
            'BlockDeviceMapping.1.DeviceName': '/dev/sdf',
            'BlockDeviceMapping.1.Ebs.DeleteOnTermination': 'false',
            'BlockDeviceMapping.1.Ebs.SnapshotId': 'snap-12345',
            'BlockDeviceMapping.2.DeviceName': '/dev/sdg',
            'BlockDeviceMapping.2.Ebs.DeleteOnTermination': 'true',
            'BlockDeviceMapping.2.Ebs.SnapshotId': 'snap-12346',
            'BlockDeviceMapping.2.Ebs.Encrypted': 'true',
            'ImageId': '123456',
            'InstanceType': 'm1.large',
            'MaxCount': 1,
            'MinCount': 1,
            'SecurityGroup.1': 'group1',
            'SecurityGroup.2': 'group2',
        }, ignore_params_values=[
            'Version', 'AWSAccessKeyId', 'SignatureMethod', 'SignatureVersion',
            'Timestamp'
        ])
Ejemplo n.º 2
0
class BlockDeviceMappingTests(unittest.TestCase):
    def setUp(self):
        self.block_device_mapping = BlockDeviceMapping()

    def block_device_type_eq(self, b1, b2):
        if isinstance(b1, BlockDeviceType) and isinstance(b2, BlockDeviceType):
            return all([b1.connection == b2.connection,
                        b1.ephemeral_name == b2.ephemeral_name,
                        b1.no_device == b2.no_device,
                        b1.volume_id == b2.volume_id,
                        b1.snapshot_id == b2.snapshot_id,
                        b1.status == b2.status,
                        b1.attach_time == b2.attach_time,
                        b1.delete_on_termination == b2.delete_on_termination,
                        b1.size == b2.size,
                        b1.encrypted == b2.encrypted])

    def test_startElement_with_name_ebs_sets_and_returns_current_value(self):
        retval = self.block_device_mapping.startElement("ebs", None, None)
        assert self.block_device_type_eq(retval, BlockDeviceType(self.block_device_mapping))

    def test_startElement_with_name_virtualName_sets_and_returns_current_value(self):
        retval = self.block_device_mapping.startElement("virtualName", None, None)
        assert self.block_device_type_eq(retval, BlockDeviceType(self.block_device_mapping))

    def test_endElement_with_name_device_sets_current_name_dev_null(self):
        self.block_device_mapping.endElement("device", "/dev/null", None)
        self.assertEqual(self.block_device_mapping.current_name, "/dev/null")

    def test_endElement_with_name_device_sets_current_name(self):
        self.block_device_mapping.endElement("deviceName", "some device name", None)
        self.assertEqual(self.block_device_mapping.current_name, "some device name")

    def test_endElement_with_name_item_sets_current_name_key_to_current_value(self):
        self.block_device_mapping.current_name = "some name"
        self.block_device_mapping.current_value = "some value"
        self.block_device_mapping.endElement("item", "some item", None)
        self.assertEqual(self.block_device_mapping["some name"], "some value")
Ejemplo n.º 3
0
def launch_cluster(conn, opts, cluster_name):

  #Remove known hosts to avoid "Offending key for IP ..." errors.
  known_hosts = os.environ['HOME'] + "/.ssh/known_hosts"
  if os.path.isfile(known_hosts):
    os.remove(known_hosts)
  if opts.key_pair is None:
      opts.key_pair = keypair()
      if opts.key_pair is None:
        print ( "ERROR: Must provide a key pair name (-k) to use on instances.", file=sys.stderr)
        sys.exit(1)

  if opts.profile is None:
    opts.profile = profile()
    if opts.profile is None:
      print ( "ERROR: No profile found in current host. It be provided with -p option.", file=sys.stderr)
      sys.exit(1)

  public_key = pub_key()
  user_data = Template("""#!/bin/bash
  set -e -x
  echo '$public_key' >> ~root/.ssh/authorized_keys
  echo '$public_key' >> ~ec2-user/.ssh/authorized_keys""").substitute(public_key=public_key)

  print("Setting up security groups...")
  master_group = get_or_make_group(conn, cluster_name + "-master")
  slave_group = get_or_make_group(conn, cluster_name + "-slaves")

  security_group = os.popen("curl -s http://169.254.169.254/latest/meta-data/security-groups").read()

  sparknotebook_group = get_or_make_group(conn, security_group)
  if master_group.rules == []: # Group was just now created
    master_group.authorize(src_group=master_group)
    master_group.authorize(src_group=slave_group)
    master_group.authorize(src_group=sparknotebook_group)
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    master_group.authorize('tcp', 18080, 18080, '0.0.0.0/0')
    master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
    master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
    master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
    master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
    master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
    master_group.authorize('tcp', 7077, 7077, '0.0.0.0/0')
    if opts.ganglia:
      master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize(src_group=master_group)
    slave_group.authorize(src_group=slave_group)
    slave_group.authorize(src_group=sparknotebook_group)
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
    slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
    slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
    slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

  if not any(r for r in sparknotebook_group.rules for g in r.grants if master_group.id == g.group_id):
    sparknotebook_group.authorize(ip_protocol="tcp", from_port="1", to_port="65535", src_group=master_group)
    sparknotebook_group.authorize(ip_protocol="icmp", from_port="-1", to_port="-1", src_group=master_group)

  if not any(r for r in sparknotebook_group.rules for g in r.grants if slave_group.id == g.group_id):
    sparknotebook_group.authorize(ip_protocol="tcp", from_port="1", to_port="65535", src_group=slave_group)
    sparknotebook_group.authorize(ip_protocol="icmp", from_port="-1", to_port="-1", src_group=slave_group)

  # Check if instances are already running in our groups
  existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                           die_on_error=False)
  if existing_slaves or (existing_masters and not opts.use_existing_master):
    print (("ERROR: There are already instances running in " +
        "group %s or %s" % (master_group.name, slave_group.name)), file=sys.stderr)
    sys.exit(1)

  # Figure out Spark AMI
  if opts.ami is None:
    opts.ami = get_spark_ami(opts)
  print("Launching instances...")

  try:
    image = conn.get_all_images(image_ids=[opts.ami])[0]
  except:
    print ("Could not find AMI " + opts.ami, file=sys.stderr)
    sys.exit(1)

  # Create block device mapping so that we can add an EBS volume if asked to
  block_map = BlockDeviceMapping()
  if opts.ebs_vol_size > 0:
    device = EBSBlockDeviceType()
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    block_map["/dev/sdv"] = device


  # AWS ignores the AMI-specified block device mapping for M3 (see SPARK-3342).
  if opts.instance_type.startswith('m3.'):
    for i in range(get_num_disks(opts.instance_type)):
      dev = BlockDeviceType()
      dev.ephemeral_name = 'ephemeral%d' % i
      # The first ephemeral drive is /dev/sdb.
      name = '/dev/sd' + string.ascii_letters[i + 1]
      block_map[name] = dev

  # Launch slaves
  if opts.spot_price != None:
    zones = get_zones(conn, opts)

    num_zones = len(zones)
    i = 0
    my_req_ids = []

    for zone in zones:
      best_price = find_best_price(conn,opts.instance_type,zone, opts.spot_price)
      # Launch spot instances with the requested price
      print(("Requesting %d slaves as spot instances with price $%.3f/hour each (total $%.3f/hour)" %
           (opts.slaves, best_price, opts.slaves * best_price)), file=sys.stderr)

      num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
      interface = boto.ec2.networkinterface.NetworkInterfaceSpecification(subnet_id=subnetId(), groups=[slave_group.id], associate_public_ip_address=True)
      interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(interface)

      slave_reqs = conn.request_spot_instances(
          price = best_price,
          image_id = opts.ami,
          launch_group = "launch-group-%s" % cluster_name,
          placement = zone,
          count = num_slaves_this_zone,
          key_name = opts.key_pair,
          instance_type = opts.instance_type,
          block_device_map = block_map,
          user_data = user_data,
          instance_profile_arn = opts.profile,
          network_interfaces = interfaces)
      my_req_ids += [req.id for req in slave_reqs]
      i += 1

    print ("Waiting for spot instances to be granted", file=sys.stderr)
    try:
      while True:
        time.sleep(10)
        reqs = conn.get_all_spot_instance_requests()
        id_to_req = {}
        for r in reqs:
          id_to_req[r.id] = r
        active_instance_ids = []
        for i in my_req_ids:
          if i in id_to_req and id_to_req[i].state == "active":
            active_instance_ids.append(id_to_req[i].instance_id)
        if len(active_instance_ids) == opts.slaves:
          print ("All %d slaves granted" % opts.slaves, file=sys.stderr)
          reservations = conn.get_all_instances(active_instance_ids)
          slave_nodes = []
          for r in reservations:
            slave_nodes += r.instances
          break
        else:
          # print >> stderr, ".",
          print("%d of %d slaves granted, waiting longer" % (
            len(active_instance_ids), opts.slaves))
    except:
      print("Canceling spot instance requests", file=sys.stderr)
      conn.cancel_spot_instance_requests(my_req_ids)
      # Log a warning if any of these requests actually launched instances:
      (master_nodes, slave_nodes) = get_existing_cluster(
          conn, opts, cluster_name, die_on_error=False)
      running = len(master_nodes) + len(slave_nodes)
      if running:
        print(("WARNING: %d instances are still running" % running), file=sys.stderr)
      sys.exit(0)
  else:
    # Launch non-spot instances
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    slave_nodes = []
    for zone in zones:
      num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
      if num_slaves_this_zone > 0:
        slave_res = image.run(key_name = opts.key_pair,
                              security_group_ids = [slave_group.id],
                              instance_type = opts.instance_type,
                              subnet_id = subnetId(),
                              placement = zone,
                              min_count = num_slaves_this_zone,
                              max_count = num_slaves_this_zone,
                              block_device_map = block_map,
                              user_data = user_data,
                              instance_profile_arn = opts.profile)
        slave_nodes += slave_res.instances
        print("Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                        zone, slave_res.id), file=sys.stderr)
      i += 1

  # Launch or resume masters
  if existing_masters:
    print("Starting master...")
    for inst in existing_masters:
      if inst.state not in ["shutting-down", "terminated"]:
        inst.start()
    master_nodes = existing_masters
  else:
    master_type = opts.master_instance_type
    if master_type == "":
      master_type = opts.instance_type
    if opts.zone == 'all':
      opts.zone = random.choice(conn.get_all_zones()).name
    if opts.spot_price != None:
      best_price = find_best_price(conn,master_type,opts.zone,opts.spot_price)
      # Launch spot instances with the requested price
      print(("Requesting master as spot instances with price $%.3f/hour" % (best_price)), file=sys.stderr)

      interface = boto.ec2.networkinterface.NetworkInterfaceSpecification(subnet_id=subnetId(), groups=[master_group.id], associate_public_ip_address=True)
      interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(interface)

      master_reqs = conn.request_spot_instances(
        price = best_price,
        image_id = opts.ami,
        launch_group = "launch-group-%s" % cluster_name,
        placement = opts.zone,
        count = 1,
        key_name = opts.key_pair,
        instance_type = master_type,
        block_device_map = block_map,
        user_data = user_data,
        instance_profile_arn = opts.profile,
        network_interfaces = interfaces)
      my_req_ids = [r.id for r in master_reqs]
      print("Waiting for spot instance to be granted", file=sys.stderr)
      try:
        while True:
          time.sleep(10)
          reqs = conn.get_all_spot_instance_requests(request_ids=my_req_ids)
          id_to_req = {}
          for r in reqs:
            id_to_req[r.id] = r
          active_instance_ids = []
          for i in my_req_ids:
            #print(id_to_req[i].state, file=sys.stderr)
            if i in id_to_req and id_to_req[i].state == "active":
              active_instance_ids.append(id_to_req[i].instance_id)
          if len(active_instance_ids) == 1:
            print ( "Master granted", file=sys.stderr)
            reservations = conn.get_all_instances(active_instance_ids)
            master_nodes = []
            for r in reservations:
              master_nodes += r.instances
            break
          else:
            # print >> stderr, ".",
            print("%d of %d masters granted, waiting longer" % (
              len(active_instance_ids), 1))
      except:
        print("Canceling spot instance requests", file=sys.stderr)
        conn.cancel_spot_instance_requests(my_req_ids)
        # Log a warning if any of these requests actually launched instances:
        (master_nodes, master_nodes) = get_existing_cluster(
            conn, opts, cluster_name, die_on_error=False)
        running = len(master_nodes) + len(master_nodes)
        if running:
          print(("WARNING: %d instances are still running" % running), file=sys.stderr)
        sys.exit(0)
    else:
      master_res = image.run(key_name = opts.key_pair,
                             security_group_ids = [master_group.id],
                             instance_type = master_type,
                             subnet_id = subnetId(),
                             placement = opts.zone,
                             min_count = 1,
                             max_count = 1,
                             block_device_map = block_map,
                             user_data = user_data,
                             instance_profile_arn = opts.profile)
      master_nodes = master_res.instances
      print("Launched master in %s, regid = %s" % (zone, master_res.id), file=sys.stderr)
  # Return all the instances
  return (master_nodes, slave_nodes)
Ejemplo n.º 4
0
def launch_cluster(conn, opts, cluster_name):
  print "Setting up security groups..."
  master_group = get_or_make_group(conn, cluster_name + "-master")
  slave_group = get_or_make_group(conn, cluster_name + "-slaves")
  zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
  if master_group.rules == []: # Group was just now created
    master_group.authorize(src_group=master_group)
    master_group.authorize(src_group=slave_group)
    master_group.authorize(src_group=zoo_group)
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
    master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
    master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
    if opts.cluster_type == "mesos":
      master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
    if opts.ganglia:
      master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize(src_group=master_group)
    slave_group.authorize(src_group=slave_group)
    slave_group.authorize(src_group=zoo_group)
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
    slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
    slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
    slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
  if zoo_group.rules == []: # Group was just now created
    zoo_group.authorize(src_group=master_group)
    zoo_group.authorize(src_group=slave_group)
    zoo_group.authorize(src_group=zoo_group)
    zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
    zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

  # Check if instances are already running in our groups
  active_nodes = get_existing_cluster(conn, opts, cluster_name,
                                      die_on_error=False)
  if any(active_nodes):
    print >> stderr, ("ERROR: There are already instances running in " +
        "group %s, %s or %s" % (master_group.name, slave_group.name, zoo_group.name))
    sys.exit(1)

  # Figure out the latest AMI from our static URL
  if opts.ami == "latest":
    try:
      opts.ami = urllib2.urlopen(LATEST_AMI_URL).read().strip()
      print "Latest Spark AMI: " + opts.ami
    except:
      print >> stderr, "Could not read " + LATEST_AMI_URL
      sys.exit(1)

  print "Launching instances..."

  try:
    image = conn.get_all_images(image_ids=[opts.ami])[0]
  except:
    print >> stderr, "Could not find AMI " + opts.ami
    sys.exit(1)

  # Create block device mapping so that we can add an EBS volume if asked to
  block_map = BlockDeviceMapping()
  if opts.ebs_vol_size > 0:
    device = EBSBlockDeviceType()
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    block_map["/dev/sdv"] = device

  # Launch slaves
  if opts.spot_price != None:
    # Launch spot instances with the requested price
    print ("Requesting %d slaves as spot instances with price $%.3f" %
           (opts.slaves, opts.spot_price))
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    my_req_ids = []
    for zone in zones:
      num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
      slave_reqs = conn.request_spot_instances(
          price = opts.spot_price,
          image_id = opts.ami,
          launch_group = "launch-group-%s" % cluster_name,
          placement = zone,
          count = num_slaves_this_zone,
          key_name = opts.key_pair,
          security_groups = [slave_group],
          instance_type = opts.instance_type,
          block_device_map = block_map)
      my_req_ids += [req.id for req in slave_reqs]
      i += 1
    
    print "Waiting for spot instances to be granted..."
    try:
      while True:
        time.sleep(10)
        reqs = conn.get_all_spot_instance_requests()
        id_to_req = {}
        for r in reqs:
          id_to_req[r.id] = r
        active_instance_ids = []
        for i in my_req_ids:
          if i in id_to_req and id_to_req[i].state == "active":
            active_instance_ids.append(id_to_req[i].instance_id)
        if len(active_instance_ids) == opts.slaves:
          print "All %d slaves granted" % opts.slaves
          reservations = conn.get_all_instances(active_instance_ids)
          slave_nodes = []
          for r in reservations:
            slave_nodes += r.instances
          break
        else:
          print "%d of %d slaves granted, waiting longer" % (
            len(active_instance_ids), opts.slaves)
    except:
      print "Canceling spot instance requests"
      conn.cancel_spot_instance_requests(my_req_ids)
      # Log a warning if any of these requests actually launched instances:
      (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
          conn, opts, cluster_name, die_on_error=False)
      running = len(master_nodes) + len(slave_nodes) + len(zoo_nodes)
      if running:
        print >> stderr, ("WARNING: %d instances are still running" % running)
      sys.exit(0)
  else:
    # Launch non-spot instances
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    slave_nodes = []
    for zone in zones:
      num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
      if num_slaves_this_zone > 0:
        slave_res = image.run(key_name = opts.key_pair,
                              security_groups = [slave_group],
                              instance_type = opts.instance_type,
                              placement = zone,
                              min_count = num_slaves_this_zone,
                              max_count = num_slaves_this_zone,
                              block_device_map = block_map)
        slave_nodes += slave_res.instances
        print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                        zone, slave_res.id)
      i += 1

  # Launch masters
  master_type = opts.master_instance_type
  if master_type == "":
    master_type = opts.instance_type
  if opts.zone == 'all':
    opts.zone = random.choice(conn.get_all_zones()).name
  master_res = image.run(key_name = opts.key_pair,
                         security_groups = [master_group],
                         instance_type = master_type,
                         placement = opts.zone,
                         min_count = 1,
                         max_count = 1,
                         block_device_map = block_map)
  master_nodes = master_res.instances
  print "Launched master in %s, regid = %s" % (zone, master_res.id)

  zoo_nodes = []

  # Return all the instances
  return (master_nodes, slave_nodes, zoo_nodes)
Ejemplo n.º 5
0
def create_image(module, ec2):
    """
    Creates new AMI

    module : AnsibleModule object
    ec2: authenticated ec2 connection object
    """

    instance_id = module.params.get('instance_id')
    name = module.params.get('name')
    wait = module.params.get('wait')
    wait_timeout = int(module.params.get('wait_timeout'))
    description = module.params.get('description')
    architecture = module.params.get('architecture')
    kernel_id = module.params.get('kernel_id')
    root_device_name = module.params.get('root_device_name')
    virtualization_type = module.params.get('virtualization_type')
    no_reboot = module.params.get('no_reboot')
    device_mapping = module.params.get('device_mapping')
    tags = module.params.get('tags')
    launch_permissions = module.params.get('launch_permissions')

    try:
        params = {'name': name, 'description': description}

        images = ec2.get_all_images(filters={'name': name})

        if images and images[0]:
            # ensure that launch_permissions are up to date
            update_image(module, ec2, images[0].id)

        bdm = None
        if device_mapping:
            bdm = BlockDeviceMapping()
            for device in device_mapping:
                if 'device_name' not in device:
                    module.fail_json(msg='Device name must be set for volume')
                device_name = device['device_name']
                del device['device_name']
                bd = BlockDeviceType(**device)
                bdm[device_name] = bd

        if instance_id:
            params['instance_id'] = instance_id
            params['no_reboot'] = no_reboot
            if bdm:
                params['block_device_mapping'] = bdm
            image_id = ec2.create_image(**params)
        else:
            params['architecture'] = architecture
            params['virtualization_type'] = virtualization_type
            if kernel_id:
                params['kernel_id'] = kernel_id
            if root_device_name:
                params['root_device_name'] = root_device_name
            if bdm:
                params['block_device_map'] = bdm
            image_id = ec2.register_image(**params)
    except boto.exception.BotoServerError as e:
        module.fail_json(msg="%s: %s" % (e.error_code, e.error_message))

    # Wait until the image is recognized. EC2 API has eventual consistency,
    # such that a successful CreateImage API call doesn't guarantee the success
    # of subsequent DescribeImages API call using the new image id returned.
    for i in range(wait_timeout):
        try:
            img = ec2.get_image(image_id)

            if img.state == 'available':
                break
            elif img.state == 'failed':
                module.fail_json(
                    msg=
                    "AMI creation failed, please see the AWS console for more details"
                )
        except boto.exception.EC2ResponseError as e:
            if ('InvalidAMIID.NotFound' not in e.error_code
                    and 'InvalidAMIID.Unavailable'
                    not in e.error_code) and wait and i == wait_timeout - 1:
                module.fail_json(
                    msg=
                    "Error while trying to find the new image. Using wait=yes and/or a longer wait_timeout may help. %s: %s"
                    % (e.error_code, e.error_message))
        finally:
            time.sleep(1)

    if img.state != 'available':
        module.fail_json(
            msg=
            "Error while trying to find the new image. Using wait=yes and/or a longer wait_timeout may help."
        )

    if tags:
        try:
            ec2.create_tags(image_id, tags)
        except boto.exception.EC2ResponseError as e:
            module.fail_json(msg="Image tagging failed => %s: %s" %
                             (e.error_code, e.error_message))
    if launch_permissions:
        try:
            img = ec2.get_image(image_id)
            img.set_launch_permissions(**launch_permissions)
        except boto.exception.BotoServerError as e:
            module.fail_json(msg="%s: %s" % (e.error_code, e.error_message),
                             image_id=image_id)

    module.exit_json(msg="AMI creation operation complete",
                     changed=True,
                     **get_ami_info(img))
Ejemplo n.º 6
0
                        interface = boto.ec2.networkinterface.NetworkInterfaceSpecification(
                            subnet_id=vpc_subnet_id,
                            groups=group_id,
                            associate_public_ip_address=assign_public_ip)
                    interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(
                        interface)
                    params['network_interfaces'] = interfaces
            else:
                params['subnet_id'] = vpc_subnet_id
                if vpc_subnet_id:
                    params['security_group_ids'] = group_id
                else:
                    params['security_groups'] = group_name

            if volumes:
                bdm = BlockDeviceMapping()
                for volume in volumes:
                    if 'device_name' not in volume:
                        module.fail_json(
                            msg='Device name must be set for volume')
                    # Minimum volume size is 1GB. We'll use volume size explicitly set to 0
                    # to be a signal not to create this volume
                    if 'volume_size' not in volume or int(
                            volume['volume_size']) > 0:
                        bdm[volume['device_name']] = create_block_device(
                            module, ec2, volume)

                params['block_device_map'] = bdm

            # check to see if we're using spot pricing first before starting instances
            if not spot_price:
Ejemplo n.º 7
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
        sys.exit(1)
    if opts.key_pair is None:
        print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
        sys.exit(1)

    user_data_content = None
    if opts.user_data:
        with open(opts.user_data) as user_data_file:
            user_data_content = user_data_file.read()

    print "Setting up security groups..."
    if opts.security_group_prefix is None:
        master_group = get_or_make_group(conn, cluster_name + "-master")
        slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    else:
        master_group = get_or_make_group(conn, opts.security_group_prefix + "-master")
        slave_group = get_or_make_group(conn, opts.security_group_prefix + "-slaves")
    authorized_address = opts.authorized_address
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize('tcp', 22, 22, authorized_address)
        master_group.authorize('tcp', 8080, 8081, authorized_address)
        master_group.authorize('tcp', 18080, 18080, authorized_address)
        master_group.authorize('tcp', 19999, 19999, authorized_address)
        master_group.authorize('tcp', 50030, 50030, authorized_address)
        master_group.authorize('tcp', 50070, 50070, authorized_address)
        master_group.authorize('tcp', 60070, 60070, authorized_address)
        master_group.authorize('tcp', 4040, 4045, authorized_address)
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, authorized_address)
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize('tcp', 22, 22, authorized_address)
        slave_group.authorize('tcp', 8080, 8081, authorized_address)
        slave_group.authorize('tcp', 50060, 50060, authorized_address)
        slave_group.authorize('tcp', 50075, 50075, authorized_address)
        slave_group.authorize('tcp', 60060, 60060, authorized_address)
        slave_group.authorize('tcp', 60075, 60075, authorized_address)

    # Check if instances are already running with the cluster name
    existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                             die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, ("ERROR: There are already instances for name: %s " % cluster_name)
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_spark_ami(opts)


    additional_groups = []
    if opts.additional_security_group:
        additional_groups = [sg
                             for sg in conn.get_all_security_groups()
                             if opts.additional_security_group in (sg.name, sg.id)]
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add EBS volumes if asked to.
    # The first drive is attached as /dev/sds, 2nd as /dev/sdt, ... /dev/sdz
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        for i in range(opts.ebs_vol_num):
            device = EBSBlockDeviceType()
            device.size = opts.ebs_vol_size
            device.volume_type=opts.ebs_vol_type
            device.delete_on_termination = True
            block_map["/dev/sd" + chr(ord('s') + i)] = device

    # AWS ignores the AMI-specified block device mapping for M3 (see SPARK-3342).
    if opts.instance_type.startswith('m3.'):
        for i in range(get_num_disks(opts.instance_type)):
            dev = BlockDeviceType()
            dev.ephemeral_name = 'ephemeral%d' % i
            # The first ephemeral drive is /dev/sdb.
            name = '/dev/sd' + string.letters[i + 1]
            block_map[name] = dev

    # Launch slaves
    if opts.spot_price is not None:
        # Launch spot instances with the requested price
        print ("Requesting %d slaves as spot instances with price $%.3f" %
               (opts.slaves, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_groups=[slave_group] + additional_groups,
                instance_type=opts.instance_type,
                block_device_map=block_map,
                user_data=user_data_content)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print "Waiting for spot instances to be granted..."
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                outstanding_request_ids = []
                for i in my_req_ids:
                    if i in id_to_req:
                        if id_to_req[i].state == "active":
                            active_instance_ids.append(id_to_req[i].instance_id)
                        else:
                            outstanding_request_ids.append(i)
                if len(active_instance_ids) == opts.slaves:
                    print "All %d slaves granted" % opts.slaves
                    reservations = conn.get_all_instances(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print "%d of %d slaves granted, waiting longer for request ids including %s" % (
                        len(active_instance_ids), opts.slaves, outstanding_request_ids[0:10])
        except:
            print "Canceling spot instance requests"
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes, slave_nodes) = get_existing_cluster(
                conn, opts, cluster_name, die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print >> stderr, ("WARNING: %d instances are still running" % running)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(key_name=opts.key_pair,
                                      security_groups=[slave_group] + additional_groups,
                                      instance_type=opts.instance_type,
                                      placement=zone,
                                      min_count=num_slaves_this_zone,
                                      max_count=num_slaves_this_zone,
                                      block_device_map=block_map,
                                      user_data=user_data_content)
                slave_nodes += slave_res.instances
                print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                                zone, slave_res.id)
            i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(key_name=opts.key_pair,
                               security_groups=[master_group] + additional_groups,
                               instance_type=master_type,
                               placement=opts.zone,
                               min_count=1,
                               max_count=1,
                               block_device_map=block_map,
                               user_data=user_data_content)
        master_nodes = master_res.instances
        print "Launched master in %s, regid = %s" % (zone, master_res.id)

    # Give the instances descriptive names
    for master in master_nodes:
        name = '{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)
        tag_instance(master, name)

    for slave in slave_nodes:
        name = '{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)
        tag_instance(slave, name)

    # Return all the instances
    return (master_nodes, slave_nodes)
Ejemplo n.º 8
0
def create_ami(host_instance, args, config, instance_config, ssh_key,
               key_filename, instance_data, deploypass, cert, pkey,
               ami_name_prefix):
    connection = host_instance.connection
    setup_fabric_env(instance=host_instance,
                     abort_on_prompts=True,
                     disable_known_hosts=True,
                     key_filename=key_filename)

    target_name = args.config
    virtualization_type = config.get("virtualization_type")
    config_dir = "%s/%s" % (AMI_CONFIGS_DIR, target_name)
    if ami_name_prefix:
        prefix = ami_name_prefix
    else:
        prefix = args.config
    dated_target_name = "{}-{}".format(
        prefix, time.strftime("%Y-%m-%d-%H-%M", time.gmtime()))

    if config.get('distro') in ('debian', 'ubuntu'):
        ubuntu_release = config.get("release", "precise")
    int_dev_name = config['target']['int_dev_name']
    mount_dev = int_dev_name
    grub_dev = int_dev_name
    mount_point = config['target']['mount_point']
    boot_mount_dev = None
    host_packages_file = os.path.join(config_dir, "host_packages")
    packages_file = os.path.join(config_dir, "packages")
    if os.path.exists(host_packages_file):
        install_packages(host_packages_file, config.get('distro'))

    v = attach_and_wait(host_instance, config['target']['size'],
                        config['target']['aws_dev_name'], int_dev_name)

    # Step 0: install required packages
    if config.get('distro') == "centos":
        run('which MAKEDEV >/dev/null || yum -d 1 install -y MAKEDEV')

    # Step 1: prepare target FS
    run('mkdir -p %s' % mount_point)
    if config.get("root_device_type") == "instance-store":
        # Use file image
        mount_dev = "/dev/cloud_root/lv_root"
        grub_dev = "/dev/loop0"
        boot_mount_dev = "/dev/mapper/loop0p1"
        img_file = dated_target_name
        partition_image(mount_dev=mount_dev,
                        int_dev_name=int_dev_name,
                        img_file=img_file)

    elif virtualization_type == "hvm":
        # use EBS volume
        mount_dev = "/dev/cloud_root/lv_root"
        boot_mount_dev = "%s1" % int_dev_name
        partition_ebs_volume(int_dev_name=int_dev_name)

    run('/sbin/mkfs.{fs_type} {args} {dev}'.format(
        fs_type=config['target']['fs_type'],
        args=config['target'].get("mkfs_args", ""),
        dev=mount_dev))
    run('/sbin/e2label {dev} {label}'.format(
        dev=mount_dev, label=config['target']['e2_label']))
    run('mount {dev} {mount_point}'.format(dev=mount_dev,
                                           mount_point=mount_point))
    run('mkdir {0}/dev {0}/proc {0}/etc {0}/boot {0}/sys'.format(mount_point))
    run('mount -t sysfs sys %s/sys' % mount_point)

    if config.get('distro') not in ('debian', 'ubuntu'):
        run('mount -t proc proc %s/proc' % mount_point)
        run('for i in console null zero random urandom; '
            'do /sbin/MAKEDEV -d %s/dev -x $i ; done' % mount_point)
    if boot_mount_dev:
        run('mount {} {}/boot'.format(boot_mount_dev, mount_point))

    # Step 2: install base system
    if config.get('distro') in ('debian', 'ubuntu'):
        run("debootstrap %s %s "
            "http://puppet/repos/apt/ubuntu/" % (ubuntu_release, mount_point))
        run('chroot %s mount -t proc none /proc' % mount_point)
        run('mount -o bind /dev %s/dev' % mount_point)
        put('%s/releng-public-%s.list' % (AMI_CONFIGS_DIR, ubuntu_release),
            '%s/etc/apt/sources.list' % mount_point)
        with lcd(config_dir):
            put('usr/sbin/policy-rc.d',
                '%s/usr/sbin/' % mount_point,
                mirror_local_mode=True)
        install_packages(packages_file,
                         config.get('distro'),
                         chroot=mount_point)
    else:
        with lcd(config_dir):
            put('etc/yum-local.cfg', '%s/etc/yum-local.cfg' % mount_point)
        yum = 'yum -d 1 -c {0}/etc/yum-local.cfg -y --installroot={0} '.format(
            mount_point)
        # this groupinstall emulates the %packages section of the kickstart
        # config, which defaults to Core and Base.
        run('%s groupinstall Core Base' % yum)
        run('%s clean packages' % yum)
        # Rebuild RPM DB for cases when versions mismatch
        run('chroot %s rpmdb --rebuilddb || :' % mount_point)

    # Step 3: upload custom configuration files
    run('chroot %s mkdir -p /boot/grub' % mount_point)
    for directory in ('boot', 'etc', 'usr'):
        local_directory = os.path.join(config_dir, directory)
        remote_directory = os.path.join(mount_point, directory)
        if not os.path.exists(local_directory):
            pass

        sync(local_directory, remote_directory)

    # Step 4: tune configs
    run('sed -i -e s/@ROOT_DEV_LABEL@/{label}/g -e s/@FS_TYPE@/{fs}/g '
        '{mnt}/etc/fstab'.format(label=config['target']['e2_label'],
                                 fs=config['target']['fs_type'],
                                 mnt=mount_point))
    if config.get('distro') in ('debian', 'ubuntu'):
        if virtualization_type == "hvm":
            run("chroot {mnt} grub-install {int_dev_name}".format(
                mnt=mount_point, int_dev_name=int_dev_name))
            run("chroot {mnt} update-grub".format(mnt=mount_point))
        else:
            run("chroot {mnt} update-grub -y".format(mnt=mount_point))
            run("sed  -i 's/^# groot.*/# groot=(hd0)/g' "
                "{mnt}/boot/grub/menu.lst".format(mnt=mount_point))
            run("chroot {mnt} update-grub".format(mnt=mount_point))
    else:
        run('ln -s grub.conf %s/boot/grub/menu.lst' % mount_point)
        run('ln -s ../boot/grub/grub.conf %s/etc/grub.conf' % mount_point)
        if config.get('kernel_package') == 'kernel-PAE':
            run('sed -i s/@VERSION@/`chroot %s rpm -q '
                '--queryformat "%%{version}-%%{release}.%%{arch}.PAE" '
                '%s | tail -n1`/g %s/boot/grub/grub.conf' %
                (mount_point, config.get('kernel_package',
                                         'kernel'), mount_point))
        else:
            run('sed -i s/@VERSION@/`chroot %s rpm -q '
                '--queryformat "%%{version}-%%{release}.%%{arch}" '
                '%s | tail -n1`/g %s/boot/grub/grub.conf' %
                (mount_point, config.get('kernel_package',
                                         'kernel'), mount_point))
        if config.get("root_device_type") == "instance-store":
            # files normally copied by grub-install
            run("cp -va /usr/share/grub/x86_64-redhat/* /mnt/boot/grub/")
            put(os.path.join(config_dir, "grub.cmd"), "/tmp/grub.cmd")
            run("sed -i s/@IMG@/{}/g /tmp/grub.cmd".format(img_file))
            run("cat /tmp/grub.cmd | grub --device-map=/dev/null")
        elif virtualization_type == "hvm":
            # See https://bugs.archlinux.org/task/30241 for the details,
            # grub-nstall doesn't handle /dev/xvd* devices properly
            grub_install_patch = os.path.join(config_dir, "grub-install.diff")
            if os.path.exists(grub_install_patch):
                put(grub_install_patch, "/tmp/grub-install.diff")
                run('which patch >/dev/null || yum -d 1 install -y patch')
                run('patch -p0 -i /tmp/grub-install.diff /sbin/grub-install')
            run("grub-install --root-directory=%s --no-floppy %s" %
                (mount_point, grub_dev))

    run("sed -i -e '/PermitRootLogin/d' -e '/UseDNS/d' "
        "-e '$ a PermitRootLogin without-password' "
        "-e '$ a UseDNS no' "
        "%s/etc/ssh/sshd_config" % mount_point)

    if config.get('distro') in ('debian', 'ubuntu'):
        pass
    else:
        manage_service("network", mount_point, "on")
        manage_service("rc.local", mount_point, "on")

    if config.get("root_device_type") == "instance-store" and \
            config.get("distro") == "centos":
        instance_data = instance_data.copy()
        instance_data['name'] = host_instance.tags.get("Name")
        instance_data['hostname'] = host_instance.tags.get("FQDN")
        run("cp /etc/resolv.conf {}/etc/resolv.conf".format(mount_point))
        # make puppet happy
        # disable ipv6
        run("/sbin/service ip6tables stop")
        # mount /dev to let sshd start
        run('mount -o bind /dev %s/dev' % mount_point)
        assimilate_instance(host_instance,
                            instance_config,
                            ssh_key,
                            instance_data,
                            deploypass,
                            chroot=mount_point,
                            reboot=False)
        ami_cleanup(mount_point=mount_point, distro=config["distro"])
        # kill chroot processes
        put('%s/kill_chroot.sh' % AMI_CONFIGS_DIR, '/tmp/kill_chroot.sh')
        run('bash /tmp/kill_chroot.sh {}'.format(mount_point))
        run('swapoff -a')
    run('umount %s/dev || :' % mount_point)
    if config.get("distro") == "ubuntu":
        run('rm -f %s/usr/sbin/policy-rc.d' % mount_point)
        run('chroot %s ln -s /sbin/MAKEDEV /dev/' % mount_point)
        for dev in ('zero', 'null', 'console', 'generic'):
            run('chroot %s sh -c "cd /dev && ./MAKEDEV %s"' %
                (mount_point, dev))
    run('umount %s/sys || :' % mount_point)
    run('umount %s/proc || :' % mount_point)
    run('umount %s/dev  || :' % mount_point)
    run('umount %s/boot || :' % mount_point)
    run('umount %s' % mount_point)
    if config.get("root_device_type") == "instance-store" \
            and config.get("distro") == "centos":
        # create bundle
        run("yum -d 1 install -y ruby "
            "http://s3.amazonaws.com/ec2-downloads/ec2-ami-tools.noarch.rpm")
        bundle_location = "{b}/{d}/{t}/{n}".format(
            b=config["bucket"],
            d=config["bucket_dir"],
            t=config["target"]["tags"]["moz-type"],
            n=dated_target_name)
        manifest_location = "{}/{}.manifest.xml".format(
            bundle_location, dated_target_name)
        run("mkdir -p /mnt-tmp/out")
        put(cert, "/mnt-tmp/cert.pem")
        put(pkey, "/mnt-tmp/pk.pem")
        run("ec2-bundle-image -c /mnt-tmp/cert.pem -k /mnt-tmp/pk.pem "
            "-u {uid} -i /mnt-tmp/{img_file} -d /mnt-tmp/out -r x86_64".format(
                img_file=img_file, uid=config["aws_user_id"]))

        with hide('running', 'stdout', 'stderr'):
            log.info("uploading bundle")
            run("ec2-upload-bundle -b {bundle_location}"
                " --access-key {access_key} --secret-key {secret_key}"
                " --region {region}"
                " -m /mnt-tmp/out/{img_file}.manifest.xml  --retry".format(
                    bundle_location=bundle_location,
                    access_key=boto.config.get("Credentials",
                                               "aws_access_key_id"),
                    secret_key=boto.config.get("Credentials",
                                               "aws_secret_access_key"),
                    region=connection.region.name,
                    img_file=img_file))

    v.detach(force=True)
    wait_for_status(v, "status", "available", "update")
    if not config.get("root_device_type") == "instance-store":
        # Step 5: Create a snapshot
        log.info('Creating a snapshot')
        snapshot = v.create_snapshot(dated_target_name)
        wait_for_status(snapshot, "status", "completed", "update")
        snapshot.add_tag('Name', dated_target_name)
        snapshot.add_tag('moz-created', str(int(time.mktime(time.gmtime()))))

    # Step 6: Create an AMI
    log.info('Creating AMI')
    if config.get("root_device_type") == "instance-store":
        ami_id = connection.register_image(
            dated_target_name,
            '%s AMI' % dated_target_name,
            architecture=config['arch'],
            virtualization_type=virtualization_type,
            image_location=manifest_location,
        )
    else:
        host_img = connection.get_image(config['ami'])
        block_map = BlockDeviceMapping()
        block_map[host_img.root_device_name] = BlockDeviceType(
            snapshot_id=snapshot.id)
        root_device_name = host_img.root_device_name
        if virtualization_type == "hvm":
            kernel_id = None
            ramdisk_id = None
        else:
            kernel_id = host_img.kernel_id
            ramdisk_id = host_img.ramdisk_id

        ami_id = connection.register_image(
            dated_target_name,
            '%s AMI' % dated_target_name,
            architecture=config['arch'],
            kernel_id=kernel_id,
            ramdisk_id=ramdisk_id,
            root_device_name=root_device_name,
            block_device_map=block_map,
            virtualization_type=virtualization_type,
        )
    while True:
        try:
            ami = connection.get_image(ami_id)
            ami.add_tag('Name', dated_target_name)
            ami.add_tag('moz-created', str(int(time.mktime(time.gmtime()))))
            if config["target"].get("tags"):
                for tag, value in config["target"]["tags"].items():
                    log.info("Tagging %s: %s", tag, value)
                    ami.add_tag(tag, value)
            log.info('AMI created')
            log.info('ID: {id}, name: {name}'.format(id=ami.id, name=ami.name))
            break
        except:
            log.info('Wating for AMI')
            time.sleep(10)

    # Step 7: Cleanup
    if not args.keep_volume:
        log.info('Deleting volume')
        v.delete()
    if not args.keep_host_instance:
        log.info('Terminating host instance')
        host_instance.terminate()

    return ami
Ejemplo n.º 9
0
    def vmcreate(self, obj_attr_list):
        '''
        TBD
        '''
        try:
            _status = 100
            _fmsg = "An error has occurred, but no error message was captured"

            _instance = False
            _reservation = False

            self.determine_instance_name(obj_attr_list)
            self.determine_key_name(obj_attr_list)

            obj_attr_list[
                "last_known_state"] = "about to connect to " + self.get_description(
                ) + " manager"

            self.take_action_if_requested("VM", obj_attr_list,
                                          "provision_originated")

            if not self.ec2conn:
                self.connect(obj_attr_list["access"], obj_attr_list["credentials"], \
                             obj_attr_list["vmc_name"])

            if self.is_vm_running(obj_attr_list):
                _msg = "An instance named \"" + obj_attr_list["cloud_vm_name"]
                _msg += " is already running. It needs to be destroyed first."
                _status = 187
                cberr(_msg)
                raise CldOpsException(_msg, _status)

            # "Security groups" must be a list
            _security_groups = []
            _security_groups.append(obj_attr_list["security_groups"])

            _time_mark_prs = int(time())
            obj_attr_list[
                "mgt_002_provisioning_request_sent"] = _time_mark_prs - int(
                    obj_attr_list["mgt_001_provisioning_request_originated"])

            self.vm_placement(obj_attr_list)

            obj_attr_list["last_known_state"] = "about to send create request"

            self.get_images(obj_attr_list)
            self.get_networks(obj_attr_list)

            obj_attr_list["config_drive"] = False

            # We need the instance placemente information before creating the actual volume
            #self.vvcreate(obj_attr_list)

            if "cloud_rv_type" not in obj_attr_list:
                obj_attr_list["cloud_rv_type"] = "standard"

            _bdm = BlockDeviceMapping()
            '''
            Options:
            gp2 (== ssd)
            io1 (also ssd)
            st1 (not sure)
            sc1 (cold?)
            standard (spinners)
            '''

            if obj_attr_list["cloud_rv_iops"] == "0":
                _iops = None
            else:
                _iops = obj_attr_list["cloud_rv_iops"]

            if "cloud_rv" in obj_attr_list and obj_attr_list["cloud_rv"] != "0":
                _size = obj_attr_list["cloud_rv"]
            else:
                _size = None

            _bdm['/dev/sda1'] = BlockDeviceType(
                volume_type=obj_attr_list["cloud_rv_type"],
                delete_on_termination=True,
                iops=_iops,
                size=_size)

            self.common_messages("VM", obj_attr_list, "creating", 0, '')

            self.pre_vmcreate_process(obj_attr_list)
            _reservation = self.ec2conn.run_instances(image_id = obj_attr_list["boot_volume_imageid1"], \
                                                      instance_type = obj_attr_list["size"], \
                                                      key_name = obj_attr_list["key_name"], \
                                                      user_data = self.populate_cloudconfig(obj_attr_list),
                                                      block_device_map = _bdm,
                                                      security_groups = _security_groups)

            if _reservation:

                sleep(int(obj_attr_list["update_frequency"]))

                _instance = _reservation.instances[0]

                _instance.add_tag("Name", obj_attr_list["cloud_vm_name"])

                obj_attr_list["cloud_vm_uuid"] = '{0}'.format(_instance.id)
                obj_attr_list["instance_obj"] = _instance

                self.vvcreate(obj_attr_list)

                self.take_action_if_requested("VM", obj_attr_list,
                                              "provision_started")

                _time_mark_prc = self.wait_for_instance_ready(
                    obj_attr_list, _time_mark_prs)

                if obj_attr_list["cloud_vv_instance"]:
                    self.common_messages("VV", obj_attr_list, "attaching",
                                         _status, _fmsg)
                    obj_attr_list["cloud_vv_instance"].attach(
                        obj_attr_list["cloud_vm_uuid"], "/dev/xvdc")

                self.wait_for_instance_boot(obj_attr_list, _time_mark_prc)

                obj_attr_list["host_name"] = "unknown"

                self.take_action_if_requested("VM", obj_attr_list,
                                              "provision_finished")

                _status = 0

                if obj_attr_list["force_failure"].lower() == "true":
                    _fmsg = "Forced failure (option FORCE_FAILURE set \"true\")"
                    _status = 916

        except CldOpsException, obj:
            _status = obj.status
            _fmsg = str(obj.msg)
Ejemplo n.º 10
0
    dst_sg = ec2dst.create_security_group(args.name, 'AMI Copy')
    cleanup.add(dst_sg, 'delete',
                'Removing destination security group: %s' % args.name)
    info('Allowing SSH access from 0.0.0.0/0')
    dst_sg.authorize('tcp', 22, 22, '0.0.0.0/0')

    # Set up device mapping variables
    info('Generating a list of EBS volumes to copy')
    # Create a list of devices for the copying instances
    tmp_dev = valid_block_devs[:]
    # Grab the source AMI BDM
    src_ami_bdm = src_ami.block_device_mapping
    # Use the source AMI BDM as the base for the destination AMI BDM
    dst_ami_bdm = src_ami.block_device_mapping
    # The instance BDMs should be empty to start with
    src_inst_bdm = BlockDeviceMapping()
    dst_inst_bdm = BlockDeviceMapping()
    device_map = {}

    # Generate the instance BDMs and keep track of the mappings
    for b in src_ami_bdm.keys():
        if src_ami_bdm[b].snapshot_id:
            d = tmp_dev.pop(0)
            src_inst_bdm[d] = BlockDeviceType(
                    snapshot_id = src_ami_bdm[b].snapshot_id,
                    size = src_ami_bdm[b].size,
                    delete_on_termination = True,
                    volume_type = src_ami_bdm[b].volume_type,
                    iops = src_ami_bdm[b].iops,)
            dst_inst_bdm[d] = BlockDeviceType(
                    size = src_ami_bdm[b].size,
Ejemplo n.º 11
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
        sys.exit(1)
    if opts.key_pair is None:
        print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
        sys.exit(1)
    print "Setting up security groups..."
    master_group = get_or_make_group(conn, cluster_name + "-master")
    master_group.owner_id = os.getenv('EC2_USER_ID')
    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    slave_group.owner_id = os.getenv('EC2_USER_ID')
    zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
    zoo_group.owner_id = os.getenv('EC2_USER_ID')

    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize(src_group=zoo_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        master_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
        master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
        master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
        master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize(src_group=zoo_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        slave_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

    if zoo_group.rules == []:  # Group was just now created
        zoo_group.authorize(src_group=master_group)
        zoo_group.authorize(src_group=slave_group)
        zoo_group.authorize(src_group=zoo_group)
        zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
        zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

    # Check if instances are already running in our groups
    existing_masters, existing_slaves, existing_zoos = get_existing_cluster(
        conn, opts, cluster_name, die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, (
            "ERROR: There are already instances running in " +
            "group %s or %s" %
            (master_group.name, slave_group.name, zoo_group.name))
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_ami(opts)
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    logging.debug("Calling boto BlockDeviceMapping()...")
    block_map = BlockDeviceMapping()
    logging.debug(" Printing block_map..")
    #print block_map
    if opts.ebs_vol_size > 0:
        logging.debug("Calling boto EBSBlockDeviceType()...")
        device = EBSBlockDeviceType()
        #print "device: ", device
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        device.ephemeral_name = "ephemeral0"
        #block_map["/dev/sdv"] = device
        #block_map["/dev/sdv"] = device
        block_map["/dev/vdb"] = device

    if opts.user_data_file != None:
        user_data_file = open(opts.user_data_file)
        try:
            opts.user_data = user_data_file.read()
            #print "user data (encoded) = ", opts.user_data
        finally:
            user_data_file.close()

    # Launch non-spot instances
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    slave_nodes = []
    for zone in zones:
        num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
        if num_slaves_this_zone > 0:
            slave_res = image.run(key_name=opts.key_pair,
                                  security_groups=[slave_group],
                                  instance_type=opts.instance_type,
                                  placement=zone,
                                  min_count=num_slaves_this_zone,
                                  max_count=num_slaves_this_zone,
                                  block_device_map=block_map,
                                  user_data=opts.user_data)
            slave_nodes += slave_res.instances
            print "Launched %d slaves in %s, regid = %s" % (
                num_slaves_this_zone, zone, slave_res.id)
        i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(key_name=opts.key_pair,
                               security_groups=[master_group],
                               instance_type=master_type,
                               placement=opts.zone,
                               min_count=1,
                               max_count=1,
                               block_device_map=block_map,
                               user_data=opts.user_data)
        master_nodes = master_res.instances
        print "Launched master in %s, regid = %s" % (zone, master_res.id)

    # Launch ZooKeeper nodes if required
    if int(opts.ft) > 1:
        print "Running " + opts.ft + " zookeepers"
        zoo_res = image.run(key_name=opts.key_pair,
                            security_groups=[zoo_group],
                            instance_type=opts.instance_type,
                            placement=opts.zone,
                            min_count=3,
                            max_count=3,
                            block_device_map=block_map,
                            user_data=opts.user_data)
        zoo_nodes = zoo_res.instances
        print "Launched zoo, regid = " + zoo_res.id
    else:
        zoo_nodes = []

    # Return all the instances
    return (master_nodes, slave_nodes, zoo_nodes)
Ejemplo n.º 12
0
    snapshot = volume.create_snapshot(description=description)
    print >> sys.stderr, "created snapshot {0}".format(snapshot.id)

    nixops.util.check_wait(check, max_tries=120)

    m._conn.create_tags([snapshot.id], {'Name': ami_name})

    if not args.keep: depl.destroy_resources()

    # Register the image.
    aki = m._conn.get_all_images(
        filters={'manifest-location': '*pv-grub-hd0_1.03-x86_64*'})[0]
    print >> sys.stderr, "using kernel image {0} - {1}".format(
        aki.id, aki.location)

    block_map = BlockDeviceMapping()
    block_map['/dev/sda'] = BlockDeviceType(snapshot_id=snapshot.id,
                                            delete_on_termination=True)
    block_map['/dev/sdb'] = BlockDeviceType(ephemeral_name="ephemeral0")
    block_map['/dev/sdc'] = BlockDeviceType(ephemeral_name="ephemeral1")
    block_map['/dev/sdd'] = BlockDeviceType(ephemeral_name="ephemeral2")
    block_map['/dev/sde'] = BlockDeviceType(ephemeral_name="ephemeral3")

    ami_id = m._conn.register_image(name=ami_name,
                                    description=description,
                                    architecture="x86_64",
                                    root_device_name="/dev/sda",
                                    kernel_id=aki.id,
                                    block_device_map=block_map)

print >> sys.stderr, "registered AMI {0}".format(ami_id)
Ejemplo n.º 13
0
def create_launch_config(connection, module):
    name = module.params.get('name')
    image_id = module.params.get('image_id')
    key_name = module.params.get('key_name')
    security_groups = module.params['security_groups']
    user_data = module.params.get('user_data')
    volumes = module.params['volumes']
    instance_type = module.params.get('instance_type')
    spot_price = module.params.get('spot_price')
    instance_monitoring = module.params.get('instance_monitoring')
    assign_public_ip = module.params.get('assign_public_ip')
    kernel_id = module.params.get('kernel_id')
    ramdisk_id = module.params.get('ramdisk_id')
    instance_profile_name = module.params.get('instance_profile_name')
    ebs_optimized = module.params.get('ebs_optimized')
    classic_link_vpc_id = module.params.get('classic_link_vpc_id')
    classic_link_vpc_security_groups = module.params.get(
        'classic_link_vpc_security_groups')
    append_hash = module.params.get('append_hash')
    bdm = BlockDeviceMapping()

    if volumes:
        for volume in volumes:
            if 'device_name' not in volume:
                module.fail_json(msg='Device name must be set for volume')
            # Minimum volume size is 1GB. We'll use volume size explicitly set to 0
            # to be a signal not to create this volume
            if 'volume_size' not in volume or int(volume['volume_size']) > 0:
                bdm[volume['device_name']] = create_block_device(
                    module, volume)

    lc = LaunchConfiguration(
        name=name,
        image_id=image_id,
        key_name=key_name,
        security_groups=security_groups,
        user_data=user_data,
        block_device_mappings=[bdm],
        instance_type=instance_type,
        kernel_id=kernel_id,
        spot_price=spot_price,
        instance_monitoring=instance_monitoring,
        associate_public_ip_address=assign_public_ip,
        ramdisk_id=ramdisk_id,
        instance_profile_name=instance_profile_name,
        ebs_optimized=ebs_optimized,
        classic_link_vpc_security_groups=classic_link_vpc_security_groups,
        classic_link_vpc_id=classic_link_vpc_id,
    )

    if append_hash:
        # MD5 of launch configuration properties
        h = hashlib.md5()
        h.update(str(frozenset(lc.__dict__)))

        # Update name variables with md5 hash
        name = '-'.join((name, h.hexdigest()))
        lc.name = name

    launch_configs = connection.get_all_launch_configurations(names=[name])
    changed = False
    if not launch_configs:
        try:
            connection.create_launch_configuration(lc)
            launch_configs = connection.get_all_launch_configurations(
                names=[name])
            changed = True
        except BotoServerError, e:
            module.fail_json(msg=str(e))
Ejemplo n.º 14
0
def create_instance_args():
    """
    Looks up security group, subnet
    and returns arguments to pass into
    ec2.run_instances() including
    user data
    """

    vpc = VPCConnection()
    subnet = vpc.get_all_subnets(filters={
        'tag:aws:cloudformation:stack-name': stack_name,
        'tag:play': args.play
    })
    if len(subnet) < 1:
        sys.stderr.write(
            "ERROR: Expected at least one subnet, got {}\n".format(
                len(subnet)))
        sys.exit(1)
    subnet_id = subnet[0].id
    vpc_id = subnet[0].vpc_id

    security_group_id = get_instance_sec_group(vpc_id)

    if args.identity:
        config_secure = 'true'
        with open(args.identity) as f:
            identity_contents = f.read()
    else:
        config_secure = 'false'
        identity_contents = "dummy"

    user_data = """#!/bin/bash
set -x
set -e
exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
base_dir="/var/tmp/edx-cfg"
extra_vars="$base_dir/extra-vars-$$.yml"
secure_identity="$base_dir/secure-identity"
git_ssh="$base_dir/git_ssh.sh"
configuration_version="{configuration_version}"
configuration_secure_version="{configuration_secure_version}"
configuration_private_version="{configuration_private_version}"
environment="{environment}"
deployment="{deployment}"
play="{play}"
config_secure={config_secure}
git_repo_name="configuration"
git_repo="https://github.com/edx/$git_repo_name"
git_repo_secure="{configuration_secure_repo}"
git_repo_secure_name="{configuration_secure_repo_basename}"
git_repo_private="{configuration_private_repo}"
git_repo_private_name=$(basename $git_repo_private .git)
secure_vars_file={secure_vars_file}
environment_deployment_secure_vars="$base_dir/$git_repo_secure_name/ansible/vars/{environment}-{deployment}.yml"
deployment_secure_vars="$base_dir/$git_repo_secure_name/ansible/vars/{deployment}.yml"
instance_id=\\
$(curl http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null)
instance_ip=\\
$(curl http://169.254.169.254/latest/meta-data/local-ipv4 2>/dev/null)
instance_type=\\
$(curl http://169.254.169.254/latest/meta-data/instance-type 2>/dev/null)
playbook_dir="$base_dir/{playbook_dir}"

if $config_secure; then
    git_cmd="env GIT_SSH=$git_ssh git"
else
    git_cmd="git"
fi

ANSIBLE_ENABLE_SQS=true
SQS_NAME={queue_name}
SQS_REGION=us-east-1
SQS_MSG_PREFIX="[ $instance_id $instance_ip $environment-$deployment $play ]"
PYTHONUNBUFFERED=1

# environment for ansible
export ANSIBLE_ENABLE_SQS SQS_NAME SQS_REGION SQS_MSG_PREFIX PYTHONUNBUFFERED

if [[ ! -x /usr/bin/git || ! -x /usr/bin/pip ]]; then
    echo "Installing pkg dependencies"
    /usr/bin/apt-get update
    /usr/bin/apt-get install -y git python-pip python-apt \\
        git-core build-essential python-dev libxml2-dev \\
        libxslt-dev curl --force-yes
fi


rm -rf $base_dir
mkdir -p $base_dir
cd $base_dir

cat << EOF > $git_ssh
#!/bin/sh
exec /usr/bin/ssh -o StrictHostKeyChecking=no -i "$secure_identity" "\$@"
EOF

chmod 755 $git_ssh

if $config_secure; then
    cat << EOF > $secure_identity
{identity_contents}
EOF
fi

cat << EOF >> $extra_vars
---
# extra vars passed into
# abbey.py including versions
# of all the repositories
{extra_vars_yml}

{git_refs_yml}

# abbey will always run fake migrations
# this is so that the application can come
# up healthy
fake_migrations: true

# Use the build number an the dynamic cache key.
EDXAPP_UPDATE_STATIC_FILES_KEY: true
edxapp_dynamic_cache_key: {deployment}-{environment}-{play}-{cache_id}

disable_edx_services: true

# abbey should never take instances in
# and out of elbs
elb_pre_post: false
EOF

chmod 400 $secure_identity

$git_cmd clone $git_repo $git_repo_name
cd $git_repo_name
$git_cmd checkout $configuration_version
cd $base_dir

if $config_secure; then
    $git_cmd clone $git_repo_secure $git_repo_secure_name
    cd $git_repo_secure_name
    $git_cmd checkout $configuration_secure_version
    cd $base_dir
fi

if [[ ! -z $git_repo_private ]]; then
    $git_cmd clone $git_repo_private $git_repo_private_name
    cd $git_repo_private_name
    $git_cmd checkout $configuration_private_version
    cd $base_dir
fi


cd $base_dir/$git_repo_name
sudo pip install -r requirements.txt

cd $playbook_dir

if [[ -r "$deployment_secure_vars" ]]; then
    extra_args_opts+=" -e@$deployment_secure_vars"
fi

if [[ -r "$environment_deployment_secure_vars" ]]; then
    extra_args_opts+=" -e@$environment_deployment_secure_vars"
fi

if $secure_vars_file; then
    extra_args_opts+=" -e@$secure_vars_file"
fi

extra_args_opts+=" -e@$extra_vars"

ansible-playbook -vvvv -c local -i "localhost," $play.yml $extra_args_opts
ansible-playbook -vvvv -c local -i "localhost," stop_all_edx_services.yml $extra_args_opts

rm -rf $base_dir

    """.format(
        configuration_version=args.configuration_version,
        configuration_secure_version=args.configuration_secure_version,
        configuration_secure_repo=args.configuration_secure_repo,
        configuration_secure_repo_basename=os.path.basename(
            args.configuration_secure_repo),
        configuration_private_version=args.configuration_private_version,
        configuration_private_repo=args.configuration_private_repo,
        environment=args.environment,
        deployment=args.deployment,
        play=args.play,
        playbook_dir=args.playbook_dir,
        config_secure=config_secure,
        identity_contents=identity_contents,
        queue_name=run_id,
        extra_vars_yml=extra_vars_yml,
        git_refs_yml=git_refs_yml,
        secure_vars_file=secure_vars_file,
        cache_id=args.cache_id)

    mapping = BlockDeviceMapping()
    root_vol = BlockDeviceType(size=args.root_vol_size)
    mapping['/dev/sda1'] = root_vol

    ec2_args = {
        'security_group_ids': [security_group_id],
        'subnet_id': subnet_id,
        'key_name': args.keypair,
        'image_id': base_ami,
        'instance_type': args.instance_type,
        'instance_profile_name': args.role_name,
        'user_data': user_data,
        'block_device_map': mapping,
    }

    return ec2_args
Ejemplo n.º 15
0
 def setUp(self):
     self.block_device_mapping = BlockDeviceMapping()
Ejemplo n.º 16
0
def launch_cluster(conn, opts, cluster_name):
    print "Setting up security groups..."

    master_group = get_or_make_group(conn, "ampcamp-master")
    slave_group = get_or_make_group(conn, "ampcamp-slaves")
    zoo_group = get_or_make_group(conn, "ampcamp-zoo")
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize(src_group=zoo_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        if opts.cluster_type == "mesos":
            master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
            master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
            master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
            master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize(src_group=zoo_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        if opts.cluster_type == "mesos":
            slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
            slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
            slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
            slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
    if zoo_group.rules == []:  # Group was just now created
        zoo_group.authorize(src_group=master_group)
        zoo_group.authorize(src_group=slave_group)
        zoo_group.authorize(src_group=zoo_group)
        zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
        zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

    # Check if instances are already running in our groups
    print "Checking for running cluster..."
    reservations = conn.get_all_instances()
    for res in reservations:
        for instance in res.instances:
            if 'tags' in instance.__dict__ and 'cluster' in instance.tags:
                if instance.tags['cluster'] == cluster_name and is_active(
                        instance):
                    print >> stderr, (
                        "ERROR: Instances %s is already running in cluster %s"
                        % (instance.id, cluster_name))
                    sys.exit(1)

    if opts.ami in ["latest", "standalone"]:
        opts.ami = get_ami(opts.ami)

    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        block_map["/dev/sdv"] = device

    # Launch slaves
    if opts.spot_price != None:
        # Launch spot instances with the requested price
        print("Requesting %d slaves as spot instances with price $%.3f" %
              (opts.slaves, opts.spot_price))
        slave_reqs = conn.request_spot_instances(
            price=opts.spot_price,
            image_id=opts.ami,
            launch_group="launch-group-%s" % cluster_name,
            placement=opts.zone,
            count=opts.slaves,
            key_name=opts.key_pair,
            security_groups=[slave_group],
            instance_type=opts.instance_type,
            block_device_map=block_map)
        my_req_ids = [req.id for req in slave_reqs]
        print "Waiting for spot instances to be granted..."
        while True:
            time.sleep(10)
            reqs = conn.get_all_spot_instance_requests()
            id_to_req = {}
            for r in reqs:
                id_to_req[r.id] = r
            active = 0
            instance_ids = []
            for i in my_req_ids:
                if id_to_req[i].state == "active":
                    active += 1
                    instance_ids.append(id_to_req[i].instance_id)
            if active == opts.slaves:
                print "All %d slaves granted" % opts.slaves
                reservations = conn.get_all_instances(instance_ids)
                slave_nodes = []
                for r in reservations:
                    slave_nodes += r.instances
                break
            else:
                print "%d of %d slaves granted, waiting longer" % (active,
                                                                   opts.slaves)
    else:
        # Launch non-spot instances
        slave_res = image.run(key_name=opts.key_pair,
                              security_groups=[slave_group],
                              instance_type=opts.instance_type,
                              placement=opts.zone,
                              min_count=opts.slaves,
                              max_count=opts.slaves,
                              block_device_map=block_map)
        slave_nodes = slave_res.instances
        print "Launched slaves, regid = " + slave_res.id

    # Launch masters
    master_type = opts.master_instance_type
    if master_type == "":
        master_type = opts.instance_type
    master_res = image.run(key_name=opts.key_pair,
                           security_groups=[master_group],
                           instance_type=master_type,
                           placement=opts.zone,
                           min_count=1,
                           max_count=1,
                           block_device_map=block_map)
    master_nodes = master_res.instances
    print "Launched master, regid = " + master_res.id

    # Create the right tags
    tags = {}
    tags['cluster'] = cluster_name

    tags['type'] = 'slave'
    for node in slave_nodes:
        conn.create_tags([node.id], tags)

    tags['type'] = 'master'
    for node in master_nodes:
        conn.create_tags([node.id], tags)

    zoo_nodes = []

    # Return all the instances
    return (master_nodes, slave_nodes, zoo_nodes)
Ejemplo n.º 17
0
def launch_spot_request(conn, request, tenant, job):
    try:
        logger.debug("%s = %s. tenants vpc = %s" %
                     (request.zone, tenant.subnets[request.zone], tenant.vpc))

        cost_aware_req = job.cost_aware
        drafts_req = job.cost_aware
        drafts_avg = job.cost_aware
        mapping = BlockDeviceMapping()
        sda1 = BlockDeviceType()
        eph0 = BlockDeviceType()
        eph1 = BlockDeviceType()
        eph2 = BlockDeviceType()
        eph3 = BlockDeviceType()
        sda1.size = 10
        eph0.ephemeral_name = 'ephemeral0'
        eph1.ephemeral_name = 'ephemeral1'
        eph2.ephemeral_name = 'ephemeral2'
        eph3.ephemeral_name = 'ephemeral3'
        mapping['/dev/sda1'] = sda1
        mapping['/dev/sdb'] = eph0
        mapping['/dev/sdc'] = eph1
        mapping['/dev/sdd'] = eph2
        mapping['/dev/sde'] = eph3

        inst_req = None

        inst_req = conn.request_spot_instances(
            price=request.bid,
            image_id=request.ami,
            subnet_id=tenant.subnets[request.zone],
            count=request.count,
            key_name=tenant.key_pair,
            security_group_ids=[tenant.security_group],
            instance_type=request.instance_type,
            user_data=customise_cloudinit(tenant, job),
            block_device_map=mapping)
        my_req_ids = [req.id for req in inst_req]
        # address = ""
        for req in my_req_ids:
            insert_launch_stats(req, request, tenant)
            # tag each request
            tag_requests(req, tenant.name, conn)
            ProvisionerConfig().dbconn.execute((
                "insert into instance_request (tenant, instance_type, " +
                "price, job_runner_id, request_type, request_id, " +
                "subnet, cost_aware_ins, cost_aware_bid, cost_aware_subnet," +
                " drafts_ins, drafts_bid, drafts_subnet, selected_avg_price,"
                " cost_aware_avg_price, drafts_avg_price, drafts_avg_ins, " +
                "drafts_avg_bid, drafts_avg_subnet, drafts_avg_avg_price) " +
                "values ('%s', '%s', %s, %s, '%s', '%s', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
            ) % (tenant.db_id, request.instance.db_id, request.price, job.id,
                 "spot", req, tenant.subnets_db_id[request.zone],
                 cost_aware_req.instance.db_id, cost_aware_req.bid,
                 tenant.subnets_db_id[cost_aware_req.zone],
                 drafts_req.instance.db_id, drafts_req.DrAFTS,
                 tenant.subnets_db_id[drafts_req.zone], request.AvgPrice,
                 cost_aware_req.AvgPrice, drafts_req.AvgPrice,
                 drafts_avg.instance.db_id, drafts_avg.DrAFTS,
                 tenant.subnets_db_id[drafts_avg.zone], drafts_avg.AvgPrice))

        return my_req_ids
    except boto.exception.EC2ResponseError:
        logger.exception("There was an error communicating with EC2.")
Ejemplo n.º 18
0
            #print i.platform
            #print i.instance_type
            #print i.instance_profile
            print '-----'


region_name = 'us-west-1'
for r in boto.ec2.regions():
    if r.name == region_name:
        break
conn = boto.connect_ec2(region=r)

#print conn.run_instances(image_id='ami-75287b30')
#print conn.run_instances(image_id='ami-71287b34')

mapping = BlockDeviceMapping()
eph0 = BlockDeviceType()
eph1 = BlockDeviceType()
eph0.ephemeral_name = 'ephemeral0'
eph1.ephemeral_name = 'ephemeral1'
mapping['/dev/xvdc'] = eph0
mapping['/dev/xvdd'] = eph1

print conn.run_instances(image_id='ami-75287b30',
                         instance_type='m1.medium',
                         key_name='debian6',
                         block_device_map=mapping)

#print conn.terminate_instances(instance_ids=['i-8bd812d3'])
#print sys.argv[1:]
Ejemplo n.º 19
0
 def startElement(self, name, attrs, connection):
     if name == 'blockDeviceMapping':
         self.attrs['block_device_mapping'] = BlockDeviceMapping()
         return self.attrs['block_device_mapping']
     else:
         return None
Ejemplo n.º 20
0
def create_instance_args():
    """
    Looks up security group, subnet
    and returns arguments to pass into
    ec2.run_instances() including
    user data
    """

    vpc = boto.vpc.connect_to_region(args.region)
    subnet = vpc.get_all_subnets(
        filters={
            'tag:aws:cloudformation:stack-name': stack_name,
            'tag:play': args.play}
    )

    if len(subnet) < 1:
        #
        # try scheme for non-cloudformation builds
        #

        subnet = vpc.get_all_subnets(
            filters={
                'tag:play': args.play,
                'tag:environment': args.environment,
                'tag:deployment': args.deployment}
        )

    if len(subnet) < 1:
        sys.stderr.write("ERROR: Expected at least one subnet, got {} for {}-{}-{}\n".format(
            len(subnet), args.environment, args.deployment, args.play))
        sys.exit(1)
    subnet_id = subnet[0].id
    vpc_id = subnet[0].vpc_id

    security_group_id = get_instance_sec_group(vpc_id)

    if args.identity:
        config_secure = 'true'
        with open(args.identity) as f:
            identity_contents = f.read()
    else:
        config_secure = 'false'
        identity_contents = "dummy"

    user_data = """#!/bin/bash
set -x
set -e
exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
base_dir="/var/tmp/edx-cfg"
extra_vars="$base_dir/extra-vars-$$.yml"
secure_identity="$base_dir/secure-identity"
git_ssh="$base_dir/git_ssh.sh"
configuration_version="{configuration_version}"
configuration_secure_version="{configuration_secure_version}"
configuration_private_version="{configuration_private_version}"
configuration_internal_version="{configuration_internal_version}"
environment="{environment}"
deployment="{deployment}"
play="{play}"
cluster="{play}"
config_secure={config_secure}
git_repo_name="configuration"
git_repo="https://github.com/edx/$git_repo_name"
git_repo_secure="{configuration_secure_repo}"
git_repo_secure_name=$(basename $git_repo_secure .git)
git_repo_private="{configuration_private_repo}"
git_repo_private_name=$(basename $git_repo_private .git)
git_repo_internal="{configuration_internal_repo}"
git_repo_internal_name=$(basename $git_repo_internal .git)
secure_vars_file={secure_vars_file}
environment_deployment_secure_vars="$base_dir/$git_repo_secure_name/ansible/vars/{environment}-{deployment}.yml"
deployment_secure_vars="$base_dir/$git_repo_secure_name/ansible/vars/{deployment}.yml"
environment_deployment_internal_vars="$base_dir/$git_repo_internal_name/ansible/vars/{environment}-{deployment}.yml"
deployment_internal_vars="$base_dir/$git_repo_internal_name/ansible/vars/{deployment}.yml"
instance_id=\\
$(curl http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null)
instance_ip=\\
$(curl http://169.254.169.254/latest/meta-data/local-ipv4 2>/dev/null)
instance_type=\\
$(curl http://169.254.169.254/latest/meta-data/instance-type 2>/dev/null)
playbook_dir="$base_dir/{playbook_dir}"

if $config_secure; then
    git_cmd="env GIT_SSH=$git_ssh git"
else
    git_cmd="git"
fi

ANSIBLE_ENABLE_SQS=true
SQS_NAME={queue_name}
SQS_REGION={region}
SQS_MSG_PREFIX="[ $instance_id $instance_ip $environment-$deployment $play ]"
PYTHONUNBUFFERED=1
HIPCHAT_TOKEN={hipchat_token}
HIPCHAT_ROOM={hipchat_room}
HIPCHAT_MSG_PREFIX="$environment-$deployment-$play: "
HIPCHAT_FROM="ansible-$instance_id"
HIPCHAT_MSG_COLOR=$(echo -e "yellow\\ngreen\\npurple\\ngray" | shuf | head -1)
DATADOG_API_KEY={datadog_api_key}
# environment for ansible
export ANSIBLE_ENABLE_SQS SQS_NAME SQS_REGION SQS_MSG_PREFIX PYTHONUNBUFFERED
export HIPCHAT_TOKEN HIPCHAT_ROOM HIPCHAT_MSG_PREFIX HIPCHAT_FROM
export HIPCHAT_MSG_COLOR DATADOG_API_KEY


#################################### Lifted from ansible-bootstrap.sh
if [[ -z "$ANSIBLE_REPO" ]]; then
  ANSIBLE_REPO="https://github.com/edx/ansible.git"
fi

if [[ -z "$ANSIBLE_VERSION" ]]; then
  ANSIBLE_VERSION="master"
fi

if [[ -z "$CONFIGURATION_REPO" ]]; then
  CONFIGURATION_REPO="https://github.com/edx/configuration.git"
fi

if [[ -z "$CONFIGURATION_VERSION" ]]; then
  CONFIGURATION_VERSION="master"
fi

if [[ -z "$UPGRADE_OS" ]]; then
  UPGRADE_OS=false
fi

#
# Bootstrapping constants
#
VIRTUAL_ENV_VERSION="15.0.2"
PIP_VERSION="8.1.2"
SETUPTOOLS_VERSION="24.0.3"
EDX_PPA="deb http://ppa.edx.org precise main"
EDX_PPA_KEY_SERVER="keyserver.ubuntu.com"
EDX_PPA_KEY_ID="B41E5E3969464050"

cat << EOF
******************************************************************************

Running the abbey with the following arguments:

ANSIBLE_REPO="$ANSIBLE_REPO"
ANSIBLE_VERSION="$ANSIBLE_VERSION"
CONFIGURATION_REPO="$CONFIGURATION_REPO"
CONFIGURATION_VERSION="$CONFIGURATION_VERSION"

******************************************************************************
EOF


if [[ $(id -u) -ne 0 ]] ;then
    echo "Please run as root";
    exit 1;
fi

if grep -q 'Precise Pangolin' /etc/os-release
then
    SHORT_DIST="precise"
elif grep -q 'Trusty Tahr' /etc/os-release
then
    SHORT_DIST="trusty"
elif grep -q 'Xenial Xerus' /etc/os-release
then
    SHORT_DIST="xenial"
else
    cat << EOF

    This script is only known to work on Ubuntu Precise, Trusty and Xenial,
    exiting.  If you are interested in helping make installation possible
    on other platforms, let us know.

EOF
   exit 1;
fi

EDX_PPA="deb http://ppa.edx.org $SHORT_DIST main"

# Upgrade the OS
apt-get update -y
apt-key update -y

if [ "$UPGRADE_OS" = true ]; then
    echo "Upgrading the OS..."
    apt-get upgrade -y
fi

# Required for add-apt-repository
apt-get install -y software-properties-common python-software-properties

# Add git PPA
add-apt-repository -y ppa:git-core/ppa

# For older distributions we need to install a PPA for Python 2.7.10
if [[ "precise" = "$SHORT_DIST" || "trusty" = "$SHORT_DIST" ]]; then

    # Add python PPA
    apt-key adv --keyserver "$EDX_PPA_KEY_SERVER" --recv-keys "$EDX_PPA_KEY_ID"
    add-apt-repository -y "$EDX_PPA"
fi

# Install python 2.7 latest, git and other common requirements
# NOTE: This will install the latest version of python 2.7 and
# which may differ from what is pinned in virtualenvironments
apt-get update -y

apt-get install -y python2.7 python2.7-dev python-pip python-apt python-yaml python-jinja2 build-essential sudo git-core libmysqlclient-dev libffi-dev libssl-dev

# Workaround for a 16.04 bug, need to upgrade to latest and then
# potentially downgrade to the preferred version.
# https://github.com/pypa/pip/issues/3862
if [[ "xenial" = "$SHORT_DIST" ]]; then
    pip install --upgrade pip
    pip install --upgrade pip=="$PIP_VERSION"
else
    pip install --upgrade pip=="$PIP_VERSION"
fi

# pip moves to /usr/local/bin when upgraded
hash -r   #pip may have moved from /usr/bin/ to /usr/local/bin/. This clears bash's path cache.
PATH=/usr/local/bin:$PATH
pip install setuptools=="$SETUPTOOLS_VERSION"
pip install virtualenv=="$VIRTUAL_ENV_VERSION"


##################### END Lifted from ansible-bootstrap.sh


# python3 is required for certain other things
# (currently xqwatcher so it can run python2 and 3 grader code,
# but potentially more in the future). It's not available on Ubuntu 12.04,
# but in those cases we don't need it anyways.
if [[ -n "$(apt-cache search --names-only '^python3-pip$')" ]]; then
    /usr/bin/apt-get update
    /usr/bin/apt-get install -y python3-pip python3-dev
fi

# this is missing on 14.04 (base package on 12.04)
# we need to do this on any build, since the above apt-get
# only runs on a build from scratch
/usr/bin/apt-get install -y python-httplib2 --force-yes

rm -rf $base_dir
mkdir -p $base_dir
cd $base_dir

cat << EOF > $git_ssh
#!/bin/sh
exec /usr/bin/ssh -o StrictHostKeyChecking=no -i "$secure_identity" "\$@"
EOF

chmod 755 $git_ssh

if $config_secure; then
    cat << EOF > $secure_identity
{identity_contents}
EOF
fi

cat << EOF >> $extra_vars
---
# extra vars passed into
# abbey.py including versions
# of all the repositories
{extra_vars_yml}

# abbey will always run fake migrations
# this is so that the application can come
# up healthy
fake_migrations: true

disable_edx_services: true
COMMON_TAG_EC2_INSTANCE: true

# abbey should never take instances in
# and out of elbs
elb_pre_post: false
EOF

chmod 400 $secure_identity

$git_cmd clone $git_repo $git_repo_name
cd $git_repo_name
$git_cmd checkout $configuration_version
cd $base_dir

if $config_secure; then
    $git_cmd clone $git_repo_secure $git_repo_secure_name
    cd $git_repo_secure_name
    $git_cmd checkout $configuration_secure_version
    cd $base_dir
fi

if [[ ! -z $git_repo_private ]]; then
    $git_cmd clone $git_repo_private $git_repo_private_name
    cd $git_repo_private_name
    $git_cmd checkout $configuration_private_version
    cd $base_dir
fi

if [[ ! -z $git_repo_internal ]]; then
    $git_cmd clone $git_repo_internal $git_repo_internal_name
    cd $git_repo_internal_name
    $git_cmd checkout $configuration_internal_version
    cd $base_dir
fi


cd $base_dir/$git_repo_name
sudo pip install -r pre-requirements.txt
sudo pip install -r requirements.txt

cd $playbook_dir

if [[ -r "$deployment_internal_vars" ]]; then
    extra_args_opts+=" -e@$deployment_internal_vars"
fi

if [[ -r "$environment_deployment_internal_vars" ]]; then
    extra_args_opts+=" -e@$environment_deployment_internal_vars"
fi

if [[ -r "$deployment_secure_vars" ]]; then
    extra_args_opts+=" -e@$deployment_secure_vars"
fi

if [[ -r "$environment_deployment_secure_vars" ]]; then
    extra_args_opts+=" -e@$environment_deployment_secure_vars"
fi

if $secure_vars_file; then
    extra_args_opts+=" -e@$secure_vars_file"
fi

extra_args_opts+=" -e@$extra_vars"

ansible-playbook -vvvv -c local -i "localhost," $play.yml $extra_args_opts
ansible-playbook -vvvv -c local -i "localhost," stop_all_edx_services.yml $extra_args_opts

rm -rf $base_dir

    """.format(
                hipchat_token=args.hipchat_api_token,
                hipchat_room=args.ansible_hipchat_room_id,
                configuration_version=args.configuration_version,
                configuration_secure_version=args.configuration_secure_version,
                configuration_secure_repo=args.configuration_secure_repo,
                configuration_private_version=args.configuration_private_version,
                configuration_private_repo=args.configuration_private_repo,
                configuration_internal_version=args.configuration_internal_version,
                configuration_internal_repo=args.configuration_internal_repo,
                environment=args.environment,
                deployment=args.deployment,
                play=args.play,
                playbook_dir=args.playbook_dir,
                config_secure=config_secure,
                identity_contents=identity_contents,
                queue_name=run_id,
                extra_vars_yml=extra_vars_yml,
                secure_vars_file=secure_vars_file,
                cache_id=args.cache_id,
                datadog_api_key=args.datadog_api_key,
                region=args.region)

    mapping = BlockDeviceMapping()
    root_vol = BlockDeviceType(size=args.root_vol_size,
                               volume_type='gp2')
    mapping['/dev/sda1'] = root_vol

    ec2_args = {
        'security_group_ids': [security_group_id],
        'subnet_id': subnet_id,
        'key_name': args.keypair,
        'image_id': base_ami,
        'instance_type': args.instance_type,
        'instance_profile_name': args.role_name,
        'user_data': user_data,
        'block_device_map': mapping,
    }

    return ec2_args
Ejemplo n.º 21
0
def launch_cluster(conn, opts, cluster_name):
  if opts.identity_file is None:
    print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
    sys.exit(1)
  if opts.key_pair is None:
    print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
    sys.exit(1)
  print "Setting up security groups..."
  
  if opts.one_security_group:
    master_group = get_or_make_group(conn, cluster_name + "-group")
    master_group.owner_id = os.getenv('EC2_USER_ID')
    slave_group = master_group
    zoo_group = master_group
  
  else:
      master_group = get_or_make_group(conn, cluster_name + "-master")
      master_group.owner_id = os.getenv('EC2_USER_ID')
      slave_group = get_or_make_group(conn, cluster_name + "-slaves")
      slave_group.owner_id = os.getenv('EC2_USER_ID')
      zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
      zoo_group.owner_id = os.getenv('EC2_USER_ID')
      
  if master_group.rules == []: # Group was just now created
    master_group.authorize(src_group=master_group)
    master_group.authorize(src_group=slave_group)
    master_group.authorize(src_group=zoo_group)
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    master_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
    master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
    master_group.authorize('tcp', 50030, 50031, '0.0.0.0/0')
    master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
    master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
    master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
    master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
    master_group.authorize('tcp', 40000, 40000, '0.0.0.0/0') #apache hama
    master_group.authorize('tcp', 40013, 40013, '0.0.0.0/0') #apache hama
    master_group.authorize('tcp', 8020, 8020, '0.0.0.0/0') #hdfs HA nameservice
    master_group.authorize('tcp', 8485, 8485, '0.0.0.0/0') #journal nodes
    master_group.authorize('tcp', 8023, 8023, '0.0.0.0/0') #jt HA   
    master_group.authorize('tcp', 8021, 8021, '0.0.0.0/0') #jt HA
    master_group.authorize('tcp', 8018, 8019, '0.0.0.0/0') #zkfc
    master_group.authorize('tcp', 2812, 2812, '0.0.0.0/0') #monit web ui    
    
    #If cohosted with zookeeper open necessary ports
    if opts.cohost:
        print "Opening additional ports for zookeeper... "
        master_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
        master_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
        master_group.authorize('tcp', 3888, 3888, '0.0.0.0/0') 
        
    if opts.ganglia:
      master_group.authorize('tcp', 80, 80, '0.0.0.0/0')
      #Also needed 8649 and 8651 but check if only for master
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize(src_group=master_group)
    slave_group.authorize(src_group=slave_group)
    slave_group.authorize(src_group=zoo_group)
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    slave_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
    slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
    slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
    slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
    slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
    slave_group.authorize('tcp', 40015, 40015, '0.0.0.0/0') ##apache hama web UI
    slave_group.authorize('tcp', 2812, 2812, '0.0.0.0/0') #monit web ui
    slave_group.authorize('tcp', 31000, 32000, '0.0.0.0/0') #task tracker web ui    
  
  if zoo_group.rules == []: # Group was just now created
      zoo_group.authorize(src_group=master_group)
      zoo_group.authorize(src_group=slave_group)
      zoo_group.authorize(src_group=zoo_group)
      zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
      zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
      zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
      zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')
      zoo_group.authorize('tcp', 8018, 8020, '0.0.0.0/0') #hdfs HA nameservic
      zoo_group.authorize('tcp', 8485, 8485, '0.0.0.0/0') #journal nodes
      zoo_group.authorize('tcp', 8023, 8023, '0.0.0.0/0') #jt HA
      zoo_group.authorize('tcp', 2812, 2812, '0.0.0.0/0') #monit web ui        
   


  # Check if instances are already running in our groups
  # Grouped instances are instances that run on the same security group in order to allow communication
  # using private IPs and without DNS resolving
  existing_masters, existing_slaves, existing_zoos, existing_grouped = get_existing_cluster(conn, opts, cluster_name, die_on_error=False)
  if existing_slaves or (existing_masters and not opts.use_existing_master) or existing_grouped:
    print >> stderr, ("ERROR: There are already instances running in " +
        "group %s or %s or %s" % (master_group.name, slave_group.name, zoo_group.name))
    sys.exit(1)

  print "Launching instances..."

  try:
    image = conn.get_all_images(image_ids=[opts.emi])[0]
  except:
    print >> stderr, "Could not find emi " + opts.emi
    sys.exit(1)
    
  try:
    image_master = conn.get_all_images(image_ids=[opts.emi_master])[0]
  except:
    print >> stderr, "Could not find emi " + opts.emi_master
    sys.exit(1)
  
  # Launch additional ZooKeeper nodes if required - ex: if mesos masters specified are 2 and the zoo_num=3 (default)
  if int(opts.ft) > 1:
    if(opts.cohost):
        zoo_num = str(int(opts.zoo_num) - int(opts.ft)) #extra zoo instances needed
    else:
        zoo_num = opts.zoo_num
  else:
      zoo_num = opts.zoo_num
      
  if (zoo_num > 0):
      if opts.emi_zoo == "":
          emi_zoo = opts.emi_master 
      else:
          emi_zoo = opts.emi_zoo
              
      try:
        image_zoo = conn.get_all_images(image_ids=[emi_zoo])[0]
      except:
        print >> stderr, "Could not find emi " + emi_zoo
        sys.exit(1)
       

  # Create block device mapping so that we can add an EBS volume if asked to
  logging.debug( "Calling boto BlockDeviceMapping()...")
  block_map = BlockDeviceMapping()
  logging.debug(" Printing block_map..") 
  #print block_map
  if opts.ebs_vol_size > 0:
    logging.debug("Calling boto EBSBlockDeviceType()...")
    device = EBSBlockDeviceType()
    #print "device: ", device
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    device.ephemeral_name = "ephemeral0"
    #block_map["/dev/sdv"] = device
    #block_map["/dev/sdv"] = device
    block_map["/dev/vdb"] = device
    
  if opts.user_data_file != None:
      user_data_file = open(opts.user_data_file)
      try:
          opts.user_data = user_data_file.read()
          #print "user data (encoded) = ", opts.user_data
      finally:
          user_data_file.close()
  
  # Launch non-spot instances
  zones = get_zones(conn, opts)    
  num_zones = len(zones)
  i = 0
  slave_nodes = []
  for zone in zones:
    num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
    if num_slaves_this_zone > 0:
        slave_res = image.run(key_name = opts.key_pair,
                              security_groups = [slave_group],
                              instance_type = opts.instance_type,
                              placement = zone,
                              min_count = num_slaves_this_zone,
                              max_count = num_slaves_this_zone,
                              block_device_map = block_map,
                              user_data = opts.user_data)
        slave_nodes += slave_res.instances
        print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                        zone, slave_res.id)
    i += 1  

  # Launch or resume masters
  if existing_masters:
    print "Starting master..."
    for inst in existing_masters:
      if inst.state not in ["shutting-down", "terminated"]:  
        inst.start()
    master_nodes = existing_masters
  else:
    master_type = opts.master_instance_type
    if master_type == "":
      master_type = opts.instance_type
    if opts.zone == 'all':
      opts.zone = random.choice(conn.get_all_zones()).name
    
    print "Running " + opts.ft + " masters"
    master_res = image_master.run(key_name = opts.key_pair,
                           security_groups = [master_group],
                           instance_type = master_type,
                           placement = opts.zone,
                           min_count = opts.ft,
                           max_count = opts.ft,
                           block_device_map = block_map,
                           user_data = opts.user_data)
    master_nodes = master_res.instances
    print "Launched master in %s, regid = %s" % (zone, master_res.id)

  if(zoo_num > 0):
    
    print "Running additional " + zoo_num + " zookeepers"
    zoo_res = image_zoo.run(key_name = opts.key_pair,
                        security_groups = [zoo_group],
                        instance_type = opts.instance_type,
                        placement = opts.zone,
                        min_count = zoo_num,
                        max_count = zoo_num,
                        block_device_map = block_map,
                        user_data = opts.user_data)
    zoo_nodes = zoo_res.instances
    print "Launched zoo, regid = " + zoo_res.id
  else:
    zoo_nodes = []
    
  if (opts.cohost):
      print "Zookeepers are co-hosted on mesos instances..."

  # Return all the instances
  return (master_nodes, slave_nodes, zoo_nodes)
Ejemplo n.º 22
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
        sys.exit(1)
    if opts.key_pair is None:
        print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
        sys.exit(1)
    print "Setting up security groups..."
    master_group = get_or_make_group(conn, cluster_name + "-master")
    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        master_group.authorize('tcp', 18080, 18080, '0.0.0.0/0')
        master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
        master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

    # Check if instances are already running in our groups
    existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                             die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, ("ERROR: There are already instances running in " +
                          "group %s or %s" % (master_group.name, slave_group.name))
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_spark_ami(opts)
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        block_map["/dev/sdv"] = device

    # Launch slaves
    if opts.spot_price is not None:
        # Launch spot instances with the requested price
        print ("Requesting %d slaves as spot instances with price $%.3f" %
               (opts.slaves, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_groups=[slave_group],
                instance_type=opts.instance_type,
                block_device_map=block_map)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print "Waiting for spot instances to be granted..."
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                for i in my_req_ids:
                    if i in id_to_req and id_to_req[i].state == "active":
                        active_instance_ids.append(id_to_req[i].instance_id)
                if len(active_instance_ids) == opts.slaves:
                    print "All %d slaves granted" % opts.slaves
                    reservations = conn.get_all_instances(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print "%d of %d slaves granted, waiting longer" % (
                        len(active_instance_ids), opts.slaves)
        except:
            print "Canceling spot instance requests"
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes, slave_nodes) = get_existing_cluster(
                conn, opts, cluster_name, die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print >> stderr, ("WARNING: %d instances are still running" % running)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(key_name=opts.key_pair,
                                      security_groups=[slave_group],
                                      instance_type=opts.instance_type,
                                      placement=zone,
                                      min_count=num_slaves_this_zone,
                                      max_count=num_slaves_this_zone,
                                      block_device_map=block_map)
                slave_nodes += slave_res.instances
                print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                                zone, slave_res.id)
            i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(key_name=opts.key_pair,
                               security_groups=[master_group],
                               instance_type=master_type,
                               placement=opts.zone,
                               min_count=1,
                               max_count=1,
                               block_device_map=block_map)
        master_nodes = master_res.instances
        print "Launched master in %s, regid = %s" % (zone, master_res.id)

    # Give the instances descriptive names
    for master in master_nodes:
        master.add_tag(
            key='Name',
            value='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id))
    for slave in slave_nodes:
        slave.add_tag(
            key='Name',
            value='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))

    # Return all the instances
    return (master_nodes, slave_nodes)
Ejemplo n.º 23
0
def start_node():
    start_logging()

    print(" ".join(argv))

    if len(argv) != 2:
        print("Usage: %s <nodename>" % (argv[0], ), file=sys.stderr)
        return 1

    nodename = argv[1]

    cc = ClusterConfiguration.from_config()
    region = get_region()
    ec2 = boto.ec2.connect_to_region(region)

    if not ec2:
        print("Could not connect to EC2 endpoint in region %r" % (region, ),
              file=sys.stderr)
        return 1

    kw = {}
    slurm_s3_root = cc.slurm_s3_root

    kw['image_id'] = (cc.compute_ami if cc.compute_ami is not None else
                      amazon_linux_ami[region])
    if cc.instance_profile is not None:
        if cc.instance_profile.startswith("arn:"):
            kw['instance_profile_arn'] = cc.instance_profile
        else:
            kw['instance_profile_name'] = cc.instance_profile
    kw['key_name'] = cc.key_name
    kw['instance_type'] = cc.compute_instance_type

    if cc.compute_bid_price is not None:
        end = time() + 24 * 60 * 60  # FIXME: Don't hardcode this.
        kw['price'] = cc.compute_bid_price
        kw['valid_until'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime(end))

    node_address = cc.get_address_for_nodename(nodename)
    node_subnet = cc.get_subnet_for_address(node_address)
    user_data = init_script % {
        "region":
        region,
        "nodename":
        nodename,
        "os_packages":
        " ".join(cc.compute_os_packages if cc.
                 compute_os_packages is not None else []),
        "external_packages":
        " ".join(cc.compute_external_packages if cc.
                 compute_external_packages is not None else []),
        "slurm_ec2_conf":
        cc.slurm_ec2_configuration,
        "slurm_s3_root":
        slurm_s3_root,
    }
    user_data = b64encode(user_data)
    kw['user_data'] = user_data

    # Map the ethernet interface to the correct IP address
    eth0 = NetworkInterfaceSpecification(associate_public_ip_address=True,
                                         delete_on_termination=True,
                                         device_index=0,
                                         groups=cc.security_groups,
                                         private_ip_address=str(node_address),
                                         subnet_id=node_subnet.id)

    kw['network_interfaces'] = NetworkInterfaceCollection(eth0)

    # Attach any ephemeral storage devices
    block_device_map = BlockDeviceMapping()
    block_device_map['/dev/xvda'] = BlockDeviceType(size=32, volume_type="gp2")
    devices = cc.ephemeral_stores[cc.compute_instance_type]

    for i, device in enumerate(devices):
        drive = "/dev/sd" + chr(ord('b') + i)
        block_device_map[drive] = BlockDeviceType(
            ephemeral_name="ephemeral%d" % i)

    kw['block_device_map'] = block_device_map

    if cc.compute_bid_price is None:
        print("run_instances: %r" % kw)
        reservation = ec2.run_instances(**kw)
        tags = {
            'SLURMHostname': nodename,
            'SLURMS3Root': slurm_s3_root,
            'Name': "SLURM Computation Node %s" % nodename,
        }

        print("instances: %s" %
              " ".join([instance.id for instance in reservation.instances]))

        # create-tags can fail at times since the tag resource database is
        # a bit behind EC2's actual state.
        for i in xrange(10):
            try:
                ec2.create_tags(
                    [instance.id for instance in reservation.instances], tags)
                break
            except Exception as e:
                print("Failed to tag instance: %s" % e, file=sys.stderr)
                sleep(0.5 * i)
    else:
        print("request_spot_instances: %r" % kw, file=sys.stderr)
        requests = ec2.request_spot_instances(**kw)
        print("requests: %s" % " ".join([request.id for request in requests]))

    return 0
Ejemplo n.º 24
0
def create_launch_config(connection, module):
    name = module.params.get('name')
    image_id = module.params.get('image_id')
    key_name = module.params.get('key_name')
    security_groups = module.params['security_groups']
    user_data = module.params.get('user_data')
    user_data_path = module.params.get('user_data_path')
    volumes = module.params['volumes']
    instance_type = module.params.get('instance_type')
    spot_price = module.params.get('spot_price')
    instance_monitoring = module.params.get('instance_monitoring')
    assign_public_ip = module.params.get('assign_public_ip')
    kernel_id = module.params.get('kernel_id')
    ramdisk_id = module.params.get('ramdisk_id')
    instance_profile_name = module.params.get('instance_profile_name')
    ebs_optimized = module.params.get('ebs_optimized')
    classic_link_vpc_id = module.params.get('classic_link_vpc_id')
    classic_link_vpc_security_groups = module.params.get(
        'classic_link_vpc_security_groups')
    bdm = BlockDeviceMapping()

    if user_data_path:
        try:
            with open(user_data_path, 'r') as user_data_file:
                user_data = user_data_file.read()
        except IOError as e:
            module.fail_json(msg=str(e), exception=traceback.format_exc())

    if volumes:
        for volume in volumes:
            if 'device_name' not in volume:
                module.fail_json(msg='Device name must be set for volume')
            # Minimum volume size is 1GB. We'll use volume size explicitly set to 0
            # to be a signal not to create this volume
            if 'volume_size' not in volume or int(volume['volume_size']) > 0:
                bdm[volume['device_name']] = create_block_device(
                    module, volume)

    lc = LaunchConfiguration(
        name=name,
        image_id=image_id,
        key_name=key_name,
        security_groups=security_groups,
        user_data=user_data,
        block_device_mappings=[bdm],
        instance_type=instance_type,
        kernel_id=kernel_id,
        spot_price=spot_price,
        instance_monitoring=instance_monitoring,
        associate_public_ip_address=assign_public_ip,
        ramdisk_id=ramdisk_id,
        instance_profile_name=instance_profile_name,
        ebs_optimized=ebs_optimized,
        classic_link_vpc_security_groups=classic_link_vpc_security_groups,
        classic_link_vpc_id=classic_link_vpc_id,
    )

    launch_configs = connection.get_all_launch_configurations(names=[name])
    changed = False
    if not launch_configs:
        try:
            connection.create_launch_configuration(lc)
            launch_configs = connection.get_all_launch_configurations(
                names=[name])
            changed = True
        except BotoServerError as e:
            module.fail_json(msg=str(e))

    result = dict(
        ((a[0], a[1]) for a in vars(launch_configs[0]).items()
         if a[0] not in ('connection', 'created_time', 'instance_monitoring',
                         'block_device_mappings')))
    result['created_time'] = str(launch_configs[0].created_time)
    # Looking at boto's launchconfig.py, it looks like this could be a boolean
    # value or an object with an enabled attribute.  The enabled attribute
    # could be a boolean or a string representation of a boolean.  Since
    # I can't test all permutations myself to see if my reading of the code is
    # correct, have to code this *very* defensively
    if launch_configs[0].instance_monitoring is True:
        result['instance_monitoring'] = True
    else:
        try:
            result['instance_monitoring'] = module.boolean(
                launch_configs[0].instance_monitoring.enabled)
        except AttributeError:
            result['instance_monitoring'] = False
    if launch_configs[0].block_device_mappings is not None:
        result['block_device_mappings'] = []
        for bdm in launch_configs[0].block_device_mappings:
            result['block_device_mappings'].append(
                dict(device_name=bdm.device_name,
                     virtual_name=bdm.virtual_name))
            if bdm.ebs is not None:
                result['block_device_mappings'][-1]['ebs'] = dict(
                    snapshot_id=bdm.ebs.snapshot_id,
                    volume_size=bdm.ebs.volume_size)

    if user_data_path:
        result[
            'user_data'] = "hidden"  # Otherwise, we dump binary to the user's terminal

    module.exit_json(changed=changed,
                     name=result['name'],
                     created_time=result['created_time'],
                     image_id=result['image_id'],
                     arn=result['launch_configuration_arn'],
                     security_groups=result['security_groups'],
                     instance_type=result['instance_type'],
                     result=result)
Ejemplo n.º 25
0
def test_create_launch_configuration_with_block_device_mappings():
    block_device_mapping = BlockDeviceMapping()

    ephemeral_drive = BlockDeviceType()
    ephemeral_drive.ephemeral_name = 'ephemeral0'
    block_device_mapping['/dev/xvdb'] = ephemeral_drive

    snapshot_drive = BlockDeviceType()
    snapshot_drive.snapshot_id = "snap-1234abcd"
    snapshot_drive.volume_type = "standard"
    block_device_mapping['/dev/xvdp'] = snapshot_drive

    ebs_drive = BlockDeviceType()
    ebs_drive.volume_type = "io1"
    ebs_drive.size = 100
    ebs_drive.iops = 1000
    ebs_drive.delete_on_termination = False
    block_device_mapping['/dev/xvdh'] = ebs_drive

    conn = boto.connect_autoscale(use_block_device_types=True)
    config = LaunchConfiguration(
        name='tester',
        image_id='ami-abcd1234',
        instance_type='m1.small',
        key_name='the_keys',
        security_groups=["default", "default2"],
        user_data=b"This is some user_data",
        instance_monitoring=True,
        instance_profile_name=
        'arn:aws:iam::123456789012:instance-profile/testing',
        spot_price=0.1,
        block_device_mappings=[block_device_mapping])
    conn.create_launch_configuration(config)

    launch_config = conn.get_all_launch_configurations()[0]
    launch_config.name.should.equal('tester')
    launch_config.image_id.should.equal('ami-abcd1234')
    launch_config.instance_type.should.equal('m1.small')
    launch_config.key_name.should.equal('the_keys')
    set(launch_config.security_groups).should.equal(
        set(['default', 'default2']))
    launch_config.user_data.should.equal(b"This is some user_data")
    launch_config.instance_monitoring.enabled.should.equal('true')
    launch_config.instance_profile_name.should.equal(
        'arn:aws:iam::123456789012:instance-profile/testing')
    launch_config.spot_price.should.equal(0.1)
    len(launch_config.block_device_mappings).should.equal(3)

    returned_mapping = launch_config.block_device_mappings

    set(returned_mapping.keys()).should.equal(
        set(['/dev/xvdb', '/dev/xvdp', '/dev/xvdh']))

    returned_mapping['/dev/xvdh'].iops.should.equal(1000)
    returned_mapping['/dev/xvdh'].size.should.equal(100)
    returned_mapping['/dev/xvdh'].volume_type.should.equal("io1")
    returned_mapping['/dev/xvdh'].delete_on_termination.should.be.false

    returned_mapping['/dev/xvdp'].snapshot_id.should.equal("snap-1234abcd")
    returned_mapping['/dev/xvdp'].volume_type.should.equal("standard")

    returned_mapping['/dev/xvdb'].ephemeral_name.should.equal('ephemeral0')
Ejemplo n.º 26
0
 def setUp(self):
     self.block_device_mapping = BlockDeviceMapping()
Ejemplo n.º 27
0
def test_create_launch_configuration_with_block_device_mappings():
    block_device_mapping = BlockDeviceMapping()

    ephemeral_drive = BlockDeviceType()
    ephemeral_drive.ephemeral_name = "ephemeral0"
    block_device_mapping["/dev/xvdb"] = ephemeral_drive

    snapshot_drive = BlockDeviceType()
    snapshot_drive.snapshot_id = "snap-1234abcd"
    snapshot_drive.volume_type = "standard"
    block_device_mapping["/dev/xvdp"] = snapshot_drive

    ebs_drive = BlockDeviceType()
    ebs_drive.volume_type = "io1"
    ebs_drive.size = 100
    ebs_drive.iops = 1000
    ebs_drive.delete_on_termination = False
    block_device_mapping["/dev/xvdh"] = ebs_drive

    conn = boto.connect_autoscale(use_block_device_types=True)
    config = LaunchConfiguration(
        name="tester",
        image_id="ami-abcd1234",
        instance_type="m1.small",
        key_name="the_keys",
        security_groups=["default", "default2"],
        user_data=b"This is some user_data",
        instance_monitoring=True,
        instance_profile_name="arn:aws:iam::{}:instance-profile/testing".
        format(ACCOUNT_ID),
        spot_price=0.1,
        block_device_mappings=[block_device_mapping],
    )
    conn.create_launch_configuration(config)

    launch_config = conn.get_all_launch_configurations()[0]
    launch_config.name.should.equal("tester")
    launch_config.image_id.should.equal("ami-abcd1234")
    launch_config.instance_type.should.equal("m1.small")
    launch_config.key_name.should.equal("the_keys")
    set(launch_config.security_groups).should.equal(
        set(["default", "default2"]))
    launch_config.user_data.should.equal(b"This is some user_data")
    launch_config.instance_monitoring.enabled.should.equal("true")
    launch_config.instance_profile_name.should.equal(
        "arn:aws:iam::{}:instance-profile/testing".format(ACCOUNT_ID))
    launch_config.spot_price.should.equal(0.1)
    len(launch_config.block_device_mappings).should.equal(3)

    returned_mapping = launch_config.block_device_mappings

    set(returned_mapping.keys()).should.equal(
        set(["/dev/xvdb", "/dev/xvdp", "/dev/xvdh"]))

    returned_mapping["/dev/xvdh"].iops.should.equal(1000)
    returned_mapping["/dev/xvdh"].size.should.equal(100)
    returned_mapping["/dev/xvdh"].volume_type.should.equal("io1")
    returned_mapping["/dev/xvdh"].delete_on_termination.should.be.false

    returned_mapping["/dev/xvdp"].snapshot_id.should.equal("snap-1234abcd")
    returned_mapping["/dev/xvdp"].volume_type.should.equal("standard")

    returned_mapping["/dev/xvdb"].ephemeral_name.should.equal("ephemeral0")
Ejemplo n.º 28
0
def spawn_worker_instance():
    # Check that the user logged in is also authorized to do this
    if not current_user.is_authorized():
        return login_manager.unauthorized()

    errors = {}

    # Check required fields
    for f in ['name', 'token']:
        val = request.form[f]
        if val is None or val.strip() == '':
            errors[f] = "This field is required"

    # Check required file
    if not request.files['public-ssh-key']:
        errors['code-tarball'] = "Public key file is required"

    # Bug 961200: Check that a proper OpenSSH public key was uploaded.
    # It should start with "ssh-rsa AAAAB3"
    pubkey = request.files['public-ssh-key'].read()
    if not validate_public_key(pubkey):
        errors[
            'public-ssh-key'] = "Supplied file does not appear to be a valid OpenSSH public key."

    if errors:
        return get_worker_params(errors, request.form)

    # Upload s3 key to bucket
    sshkey = bucket.new_key("keys/%s.pub" % request.form['token'])
    sshkey.set_contents_from_string(pubkey)

    ephemeral = app.config.get("EPHEMERAL_MAP", None)
    # Create
    boot_script = render_template(
        'boot-script.sh',
        aws_region=app.config['AWS_REGION'],
        temporary_bucket=app.config['TEMPORARY_BUCKET'],
        ssh_key=sshkey.key,
        ephemeral_map=ephemeral)

    mapping = None
    if ephemeral:
        mapping = BlockDeviceMapping()
        for device, eph_name in ephemeral.iteritems():
            mapping[device] = BlockDeviceType(ephemeral_name=eph_name)

    # Create EC2 instance
    reservation = ec2.run_instances(
        image_id=
        'ami-eb4608db',  # ubuntu/images/hvm/ubuntu-utopic-14.10-amd64-server-20141022.3
        security_groups=app.config['SECURITY_GROUPS'],
        user_data=boot_script,
        block_device_map=mapping,
        instance_type=app.config['INSTANCE_TYPE'],
        instance_initiated_shutdown_behavior='terminate',
        client_token=request.form['token'],
        instance_profile_name=app.config['INSTANCE_PROFILE'])
    instance = reservation.instances[0]

    # Associate a few tags
    ec2.create_tags(
        [instance.id], {
            "Owner": current_user.email,
            "Name": request.form['name'],
            "Application": app.config['INSTANCE_APP_TAG']
        })

    # Send an email to the user who launched it
    params = {
        'monitoring_url': abs_url_for('monitor', instance_id=instance.id)
    }
    ses.send_email(
        source=app.config['EMAIL_SOURCE'],
        subject=("telemetry-analysis worker instance: %s (%s) launched" %
                 (request.form['name'], instance.id)),
        format='html',
        body=render_template('instance-launched-email.html', **params),
        to_addresses=[current_user.email])
    return redirect(url_for('monitor', instance_id=instance.id))
Ejemplo n.º 29
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print("ERROR: Must provide an identity file (-i) for ssh connections.",
              file=stderr)
        sys.exit(1)

    if opts.key_pair is None:
        print("ERROR: Must provide a key pair name (-k) to use on instances.",
              file=stderr)
        sys.exit(1)

    user_data_content = None

    print("Setting up security groups...")
    master_group = get_or_make_group(conn, cluster_name + "-master",
                                     opts.vpc_id)
    slave_group = get_or_make_group(conn, cluster_name + "-slaves",
                                    opts.vpc_id)
    authorized_address = opts.authorized_address
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize('tcp', 22, 22, authorized_address)
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize('tcp', 22, 22, authorized_address)

    # Check if instances are already running in our groups
    existing_masters, existing_slaves = get_existing_cluster(
        conn, opts, cluster_name, die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print("ERROR: There are already instances running in group %s or %s" %
              (master_group.name, slave_group.name),
              file=stderr)
        sys.exit(1)

    # Use the default Ubuntu AMI.
    if opts.ami is None:
        if opts.region == "us-east-1":
            opts.ami = "ami-2d39803a"
        elif opts.region == "us-west-1":
            opts.ami = "ami-06116566"
        elif opts.region == "us-west-2":
            opts.ami = "ami-9abea4fb"
        elif opts.region == "eu-west-1":
            opts.ami = "ami-f95ef58a"
        elif opts.region == "eu-central-1":
            opts.ami = "ami-87564feb"
        elif opts.region == "ap-northeast-1":
            opts.ami = "ami-a21529cc"
        elif opts.region == "ap-northeast-2":
            opts.ami = "ami-09dc1267"
        elif opts.region == "ap-southeast-1":
            opts.ami = "ami-25c00c46"
        elif opts.region == "ap-southeast-2":
            opts.ami = "ami-6c14310f"
        elif opts.region == "ap-south-1":
            opts.ami = "ami-4a90fa25"
        elif opts.region == "sa-east-1":
            opts.ami = "ami-0fb83963"
        else:
            raise Exception("The specified region is unknown.")

    # we use group ids to work around https://github.com/boto/boto/issues/350
    additional_group_ids = []
    if opts.additional_security_group:
        additional_group_ids = [
            sg.id for sg in conn.get_all_security_groups()
            if opts.additional_security_group in (sg.name, sg.id)
        ]
    print("Launching instances...")

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print("Could not find AMI " + opts.ami, file=stderr)
        sys.exit(1)

    # Create block device mapping so that we can add EBS volumes if asked to.
    # The first drive is attached as /dev/sds, 2nd as /dev/sdt, ... /dev/sdz
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        for i in range(opts.ebs_vol_num):
            device = EBSBlockDeviceType()
            device.size = opts.ebs_vol_size
            device.volume_type = opts.ebs_vol_type
            device.delete_on_termination = True
            block_map["/dev/sd" + chr(ord('s') + i)] = device

    # AWS ignores the AMI-specified block device mapping for M3 (see SPARK-3342).
    if opts.instance_type.startswith('m3.'):
        for i in range(get_num_disks(opts.instance_type)):
            dev = BlockDeviceType()
            dev.ephemeral_name = 'ephemeral%d' % i
            # The first ephemeral drive is /dev/sdb.
            name = '/dev/sd' + string.ascii_letters[i + 1]
            block_map[name] = dev

    # Launch slaves
    if opts.spot_price is not None:
        # Launch spot instances with the requested price
        print("Requesting %d slaves as spot instances with price $%.3f" %
              (opts.slaves, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_group_ids=[slave_group.id] + additional_group_ids,
                instance_type=opts.instance_type,
                block_device_map=block_map,
                subnet_id=opts.subnet_id,
                placement_group=opts.placement_group,
                user_data=user_data_content,
                instance_profile_name=opts.instance_profile_name)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print("Waiting for spot instances to be granted...")
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                for i in my_req_ids:
                    if i in id_to_req and id_to_req[i].state == "active":
                        active_instance_ids.append(id_to_req[i].instance_id)
                if len(active_instance_ids) == opts.slaves:
                    print("All %d slaves granted" % opts.slaves)
                    reservations = conn.get_all_reservations(
                        active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print("%d of %d slaves granted, waiting longer" %
                          (len(active_instance_ids), opts.slaves))
        except:
            print("Canceling spot instance requests")
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes,
             slave_nodes) = get_existing_cluster(conn,
                                                 opts,
                                                 cluster_name,
                                                 die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print(("WARNING: %d instances are still running" % running),
                      file=stderr)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(
                    key_name=opts.key_pair,
                    security_group_ids=[slave_group.id] + additional_group_ids,
                    instance_type=opts.instance_type,
                    placement=zone,
                    min_count=num_slaves_this_zone,
                    max_count=num_slaves_this_zone,
                    block_device_map=block_map,
                    subnet_id=opts.subnet_id,
                    placement_group=opts.placement_group,
                    user_data=user_data_content,
                    instance_initiated_shutdown_behavior=opts.
                    instance_initiated_shutdown_behavior,
                    instance_profile_name=opts.instance_profile_name)
                slave_nodes += slave_res.instances
                print(
                    "Launched {s} slave{plural_s} in {z}, regid = {r}".format(
                        s=num_slaves_this_zone,
                        plural_s=('' if num_slaves_this_zone == 1 else 's'),
                        z=zone,
                        r=slave_res.id))
            i += 1

    # Launch or resume masters
    if existing_masters:
        print("Starting master...")
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(
            key_name=opts.key_pair,
            security_group_ids=[master_group.id] + additional_group_ids,
            instance_type=master_type,
            placement=opts.zone,
            min_count=1,
            max_count=1,
            block_device_map=block_map,
            subnet_id=opts.subnet_id,
            placement_group=opts.placement_group,
            user_data=user_data_content,
            instance_initiated_shutdown_behavior=opts.
            instance_initiated_shutdown_behavior,
            instance_profile_name=opts.instance_profile_name)

        master_nodes = master_res.instances
        print("Launched master in %s, regid = %s" % (zone, master_res.id))

    # This wait time corresponds to SPARK-4983
    print("Waiting for AWS to propagate instance metadata...")
    time.sleep(15)

    # Give the instances descriptive names and set additional tags
    additional_tags = {}
    if opts.additional_tags.strip():
        additional_tags = dict(
            map(str.strip, tag.split(':', 1))
            for tag in opts.additional_tags.split(','))

    for master in master_nodes:
        master.add_tags(
            dict(additional_tags,
                 Name='{cn}-master-{iid}'.format(cn=cluster_name,
                                                 iid=master.id)))

    for slave in slave_nodes:
        slave.add_tags(
            dict(additional_tags,
                 Name='{cn}-slave-{iid}'.format(cn=cluster_name,
                                                iid=slave.id)))

    # Return all the instances
    return (master_nodes, slave_nodes)
Ejemplo n.º 30
0
def create_instance_args():
    """
    Looks up security group, subnet
    and returns arguments to pass into
    ec2.run_instances() including
    user data
    """

    vpc = boto.vpc.connect_to_region(args.region)
    subnet = vpc.get_all_subnets(filters={
        'tag:aws:cloudformation:stack-name': stack_name,
        'tag:play': args.play
    })

    if len(subnet) < 1:
        #
        # try scheme for non-cloudformation builds
        #

        subnet = vpc.get_all_subnets(
            filters={
                'tag:play': args.play,
                'tag:environment': args.environment,
                'tag:deployment': args.deployment
            })

    if len(subnet) < 1:
        sys.stderr.write(
            "ERROR: Expected at least one subnet, got {} for {}-{}-{}\n".
            format(len(subnet), args.environment, args.deployment, args.play))
        sys.exit(1)
    subnet_id = subnet[0].id
    vpc_id = subnet[0].vpc_id

    security_group_id = get_instance_sec_group(vpc_id)

    if args.identity:
        config_secure = 'true'
        with open(args.identity) as f:
            identity_contents = f.read()
    else:
        config_secure = 'false'
        identity_contents = "dummy"

    user_data = """#!/bin/bash
set -x
set -e
exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
base_dir="/var/tmp/edx-cfg"
extra_vars="$base_dir/extra-vars-$$.yml"
secure_identity="$base_dir/secure-identity"
git_ssh="$base_dir/git_ssh.sh"
configuration_version="{configuration_version}"
configuration_secure_version="{configuration_secure_version}"
configuration_private_version="{configuration_private_version}"
configuration_internal_version="{configuration_internal_version}"
environment="{environment}"
deployment="{deployment}"
play="{play}"
cluster="{play}"
config_secure={config_secure}
git_repo_name="configuration"
git_repo="https://github.com/edx/$git_repo_name"
git_repo_secure="{configuration_secure_repo}"
git_repo_secure_name=$(basename $git_repo_secure .git)
git_repo_private="{configuration_private_repo}"
git_repo_private_name=$(basename $git_repo_private .git)
git_repo_internal="{configuration_internal_repo}"
git_repo_internal_name=$(basename $git_repo_internal .git)
secure_vars_file={secure_vars_file}
environment_deployment_secure_vars="$base_dir/$git_repo_secure_name/ansible/vars/{environment}-{deployment}.yml"
deployment_secure_vars="$base_dir/$git_repo_secure_name/ansible/vars/{deployment}.yml"
environment_deployment_internal_vars="$base_dir/$git_repo_internal_name/ansible/vars/{environment}-{deployment}.yml"
deployment_internal_vars="$base_dir/$git_repo_internal_name/ansible/vars/{deployment}.yml"
instance_id=\\
$(curl http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null)
instance_ip=\\
$(curl http://169.254.169.254/latest/meta-data/local-ipv4 2>/dev/null)
instance_type=\\
$(curl http://169.254.169.254/latest/meta-data/instance-type 2>/dev/null)
playbook_dir="$base_dir/{playbook_dir}"

if $config_secure; then
    git_cmd="env GIT_SSH=$git_ssh git"
else
    git_cmd="git"
fi

ANSIBLE_ENABLE_SQS=true
SQS_NAME={queue_name}
SQS_REGION={region}
SQS_MSG_PREFIX="[ $instance_id $instance_ip $environment-$deployment $play ]"
PYTHONUNBUFFERED=1
HIPCHAT_TOKEN={hipchat_token}
HIPCHAT_ROOM={hipchat_room}
HIPCHAT_MSG_PREFIX="$environment-$deployment-$play: "
HIPCHAT_FROM="ansible-$instance_id"
HIPCHAT_MSG_COLOR=$(echo -e "yellow\\ngreen\\npurple\\ngray" | shuf | head -1)
DATADOG_API_KEY={datadog_api_key}
# environment for ansible
export ANSIBLE_ENABLE_SQS SQS_NAME SQS_REGION SQS_MSG_PREFIX PYTHONUNBUFFERED
export HIPCHAT_TOKEN HIPCHAT_ROOM HIPCHAT_MSG_PREFIX HIPCHAT_FROM
export HIPCHAT_MSG_COLOR DATADOG_API_KEY

if [[ ! -x /usr/bin/git || ! -x /usr/bin/pip ]]; then
    echo "Installing pkg dependencies"
    /usr/bin/apt-get update
    /usr/bin/apt-get install -y git python-pip python-apt \\
        git-core build-essential python-dev libxml2-dev \\
        libxslt-dev curl libmysqlclient-dev --force-yes
fi

# python3 is required for certain other things
# (currently xqwatcher so it can run python2 and 3 grader code,
# but potentially more in the future). It's not available on Ubuntu 12.04,
# but in those cases we don't need it anyways.
if [[ -n "$(apt-cache search --names-only '^python3-pip$')" ]]; then
    /usr/bin/apt-get update
    /usr/bin/apt-get install -y python3-pip python3-dev
fi

# this is missing on 14.04 (base package on 12.04)
# we need to do this on any build, since the above apt-get
# only runs on a build from scratch
/usr/bin/apt-get install -y python-httplib2 --force-yes

# Must upgrade to latest before pinning to work around bug
# https://github.com/pypa/pip/issues/3862
pip install --upgrade pip
hash -r   #pip may have moved from /usr/bin/ to /usr/local/bin/. This clears bash's path cache.
pip install --upgrade pip==8.1.2

# upgrade setuptools early to avoid no distribution errors
pip install --upgrade setuptools==24.0.3

rm -rf $base_dir
mkdir -p $base_dir
cd $base_dir

cat << EOF > $git_ssh
#!/bin/sh
exec /usr/bin/ssh -o StrictHostKeyChecking=no -i "$secure_identity" "\$@"
EOF

chmod 755 $git_ssh

if $config_secure; then
    cat << EOF > $secure_identity
{identity_contents}
EOF
fi

cat << EOF >> $extra_vars
---
# extra vars passed into
# abbey.py including versions
# of all the repositories
{extra_vars_yml}

# abbey will always run fake migrations
# this is so that the application can come
# up healthy
fake_migrations: true

disable_edx_services: true
COMMON_TAG_EC2_INSTANCE: true

# abbey should never take instances in
# and out of elbs
elb_pre_post: false
EOF

chmod 400 $secure_identity

$git_cmd clone $git_repo $git_repo_name
cd $git_repo_name
$git_cmd checkout $configuration_version
cd $base_dir

if $config_secure; then
    $git_cmd clone $git_repo_secure $git_repo_secure_name
    cd $git_repo_secure_name
    $git_cmd checkout $configuration_secure_version
    cd $base_dir
fi

if [[ ! -z $git_repo_private ]]; then
    $git_cmd clone $git_repo_private $git_repo_private_name
    cd $git_repo_private_name
    $git_cmd checkout $configuration_private_version
    cd $base_dir
fi

if [[ ! -z $git_repo_internal ]]; then
    $git_cmd clone $git_repo_internal $git_repo_internal_name
    cd $git_repo_internal_name
    $git_cmd checkout $configuration_internal_version
    cd $base_dir
fi


cd $base_dir/$git_repo_name
sudo pip install -r pre-requirements.txt
sudo pip install -r requirements.txt

cd $playbook_dir

if [[ -r "$deployment_internal_vars" ]]; then
    extra_args_opts+=" -e@$deployment_internal_vars"
fi

if [[ -r "$environment_deployment_internal_vars" ]]; then
    extra_args_opts+=" -e@$environment_deployment_internal_vars"
fi

if [[ -r "$deployment_secure_vars" ]]; then
    extra_args_opts+=" -e@$deployment_secure_vars"
fi

if [[ -r "$environment_deployment_secure_vars" ]]; then
    extra_args_opts+=" -e@$environment_deployment_secure_vars"
fi

if $secure_vars_file; then
    extra_args_opts+=" -e@$secure_vars_file"
fi

extra_args_opts+=" -e@$extra_vars"

ansible-playbook -vvvv -c local -i "localhost," $play.yml $extra_args_opts
ansible-playbook -vvvv -c local -i "localhost," stop_all_edx_services.yml $extra_args_opts

rm -rf $base_dir

    """.format(
        hipchat_token=args.hipchat_api_token,
        hipchat_room=args.ansible_hipchat_room_id,
        configuration_version=args.configuration_version,
        configuration_secure_version=args.configuration_secure_version,
        configuration_secure_repo=args.configuration_secure_repo,
        configuration_private_version=args.configuration_private_version,
        configuration_private_repo=args.configuration_private_repo,
        configuration_internal_version=args.configuration_internal_version,
        configuration_internal_repo=args.configuration_internal_repo,
        environment=args.environment,
        deployment=args.deployment,
        play=args.play,
        playbook_dir=args.playbook_dir,
        config_secure=config_secure,
        identity_contents=identity_contents,
        queue_name=run_id,
        extra_vars_yml=extra_vars_yml,
        secure_vars_file=secure_vars_file,
        cache_id=args.cache_id,
        datadog_api_key=args.datadog_api_key,
        region=args.region)

    mapping = BlockDeviceMapping()
    root_vol = BlockDeviceType(size=args.root_vol_size, volume_type='gp2')
    mapping['/dev/sda1'] = root_vol

    ec2_args = {
        'security_group_ids': [security_group_id],
        'subnet_id': subnet_id,
        'key_name': args.keypair,
        'image_id': base_ami,
        'instance_type': args.instance_type,
        'instance_profile_name': args.role_name,
        'user_data': user_data,
        'block_device_map': mapping,
    }

    return ec2_args
Ejemplo n.º 31
0
def node_install(cn=def_cn,
                 inst_type_idx=def_inst_type,
                 idn=0,
                 avz=def_default_avz,
                 rt=def_default_requesttype,
                 group_name='oggmssh',
                 ssh_port=22,
                 cidr='0.0.0.0/0'):
    """
    Request and prepare single instance
    """
    # FSO---connect
    cloud = boto.ec2.connect_to_region(avz[:-1], profile_name=ec2Profile)
    aminfo = cloud.get_image(def_ami[avz[:-1]])
    vpcconn = VPCConnection(region=cloud.region, profile_name=ec2Profile)

    try:
        vpc_id, subnet_id = def_subnet[avz]
        vpc = vpcconn.get_all_vpcs(vpc_ids=[vpc_id])[0]
    except:
        vpc_id = None
        subnet_id = None
        vpc = None

    # FSO---check if node with same name already exists
    if node_exists(cn + '_node' + str(idn)):
        print("Node already exists")
        sys.exit()

    # Check if ssh keypair exists
    key_name = get_keypair_name(avz[:-1])
    check_keypair(cloud, key_name)

    # FSO---create a bigger root device
    dev_sda1 = EBSBlockDeviceType()
    dev_sda1.size = rootfs_size_gb
    dev_sda1.delete_on_termination = True
    bdm = BlockDeviceMapping()
    bdm['/dev/sda1'] = dev_sda1

    dev_sdf_vol = get_user_persist_ebs(cloud, avz)

    # Check to see if specified security group already exists.
    # If we get an InvalidGroup.NotFound error back from EC2,
    # it means that it doesn't exist and we need to create it.
    try:
        group = cloud.get_all_security_groups(groupnames=[group_name])[0]
    except cloud.ResponseError as e:
        if e.code == 'InvalidGroup.NotFound':
            print('Creating Security Group: %s' % group_name)
            # Create a security group to control access to instance via SSH.
            group = cloud.create_security_group(
                group_name, 'A group that allows SSH access')
        else:
            raise

    # Authorize all Intra-VPC traffic
    if vpc is not None:
        try:
            group.authorize('-1', -1, -1, vpc.cidr_block)
        except cloud.ResponseError as e:
            if e.code != 'InvalidPermission.Duplicate':
                raise

    # Add a rule to the security group to authorize SSH traffic
    # on the specified port.
    try:
        group.authorize('tcp', ssh_port, ssh_port, cidr)
    except cloud.ResponseError as e:
        if e.code == 'InvalidPermission.Duplicate':
            print('Security Group: %s already authorized' % group_name)
        else:
            raise

    log_with_ts("request node " + str(idn))
    print('Reserving instance for node', aminfo.id,
          instance_infos[inst_type_idx]['type'], aminfo.name, aminfo.region)

    if rt == 'spot':
        print("placing node in ", avz)
        requests = cloud.request_spot_instances(
            def_price,
            def_ami[avz[:-1]],
            count=1,
            type='one-time',
            security_group_ids=[group.id],
            key_name=key_name,
            placement=avz,
            subnet_id=subnet_id,
            ebs_optimized=True,
            instance_type=instance_infos[inst_type_idx]['type'],
            block_device_map=bdm)
        req_ids = [request.id for request in requests]
        instance_ids = wait_for_fulfillment(cloud, req_ids)
        instances = cloud.get_only_instances(instance_ids=instance_ids)
        node = instances[0]
        log_with_ts("fullfilled spot node " + str(idn))
    else:
        print("placing node in ", avz)
        reservation = cloud.run_instances(
            image_id=def_ami[avz[:-1]],
            key_name=key_name,
            placement=avz,
            subnet_id=subnet_id,
            security_group_ids=[group.id],
            ebs_optimized=True,
            instance_type=instance_infos[inst_type_idx]['type'],
            block_device_map=bdm)
        node = reservation.instances[0]
        log_with_ts("fullfilled ondemand node " + str(idn))

    time.sleep(2)
    while not node.update() == 'running':
        print('waiting for', cn, 'node', idn, 'to boot...')
        time.sleep(5)

    log_with_ts("booted node " + str(idn))

    if dev_sdf_vol is not None:
        cloud.attach_volume(dev_sdf_vol.id, node.id, "/dev/sdf")

    node.add_tag('Name', cn + '_node' + str(idn))
    node.add_tag('type', cn + 'node')
    node.add_tag('node-owner', user_identifier)

    # FSO---set delete on termination flag to true for ebs block device
    node.modify_attribute('blockDeviceMapping', {'/dev/sda1': True})

    # FSO--- test socket connect to ssh service
    ssh_test(node)
    log_with_ts("reachable node " + str(idn))

    update_key_filename(node.region.name)

    # Mount potential user volume
    if dev_sdf_vol is not None:
        use_user_volume(node.dns_name)

    log_with_ts("finished node " + str(idn))
Ejemplo n.º 32
0
def do_build(ctxt, **kwargs):
    conn = ctxt.cnx_ec2
    if 'template' in kwargs and kwargs['template']:
        template_file_name = kwargs['template']
        kwargs = parse_template(ctxt, template_file_name, kwargs)
    del kwargs['template']

    defaultrun = {'instance_type': 'm1.large', 'key_name': ctxt.key_name }
    for key in defaultrun:
        if key not in kwargs or kwargs[key] == None:
            kwargs[key] = defaultrun[key]
                        
    (remote_user, kwargs) = get_remote_user(ctxt, **kwargs)
    (key_file, kwargs) = get_key_file(ctxt, **kwargs)

    (tags,kwargs) = do_tags(**kwargs)

    do_run_scripts =  kwargs.pop('run')

    ###########
    # Check VM naming
    ###########
    if 'Name' not in tags and kwargs['hostname'] is not None:
        tags['Name'] = kwargs['hostname']
    if 'Name' not in tags:
        yield "instance name is mandatory"
        return
    
    try:
        oslib.ec2_objects.Instance(ctxt, name=tags['Name']).get()
        # if get succed, the name already exist, else get throws an exception
        yield "duplicate name %s" % tags['Name']
        return 
    except:
        pass
        
    user_data_properties = {}
    
    image = kwargs.pop('image_id', None)

    ###########
    # Check device mapping
    ###########
    volumes = BlockDeviceMapping(conn)
    first_volume = 'f'
    l = first_volume

    ebs_optimized = False
    for volume_info in kwargs.pop('volume_size', []):
        # yaml is not typed, volume_info can be a string or a number
        if isinstance(volume_info, basestring):
            options = volume_info.split(',')
            size = int(oslib.parse_size(options[0], 'G', default_suffix='G'))
        else:
            options = []
            size = int(volume_info)
        vol_kwargs = {"connection":conn, "size": size}
        if len(options) > 1:
            for opt in options[1:]:
                parsed = opt.split('=')
                key = parsed[0]
                if len(parsed) == 2:
                    value = parsed[1]
                elif len(parsed) == 1:
                    value = True
                else:
                    raise OSLibError("can't parse volume argument %s", opt)
                if key == 'iops':
                    ebs_optimized = True
                    vol_kwargs['volume_type'] = 'io1'
                vol_kwargs[key] = value
        volumes["/dev/sd%s"%l] = BlockDeviceType(**vol_kwargs)
        l = chr( ord(l[0]) + 1)
    kwargs['ebs_optimized'] = ebs_optimized

    # if drive letter is not f, some volumes definition was found
    if l != first_volume:
        kwargs['block_device_map'] = volumes
        user_data_properties['volumes'] = ' '.join(volumes.keys())

    # after user_data_properties['volumes'] otherwise they will be lvm'ed
    for snapshot_id in kwargs.pop('snap_id', []):
        volumes["/dev/sd%s"%l] = BlockDeviceType(connection=conn, snapshot_id=snapshot_id)
        l = chr( ord(l[0]) + 1)
    
    kwargs = build_user_data(user_data_properties, **kwargs)

    ###########
    # Check elastic IP
    ###########
    if kwargs['elastic_ip']:
        eip = True
    else:
        eip = False
    del kwargs['elastic_ip']

    for k in kwargs.keys()[:]:
        value = kwargs[k]
        if kwargs[k] == None:
            del(kwargs[k])
        elif value.__class__ == [].__class__ and len(value) == 0:
            del(kwargs[k])
    
    if 'private_ip_address' in kwargs and kwargs['private_ip_address']:
        netif_specification = NetworkInterfaceCollection()
        netif_kwargs = {}
        if kwargs['private_ip_address']:
            netif_kwargs['private_ip_address'] = kwargs['private_ip_address']
            del kwargs['private_ip_address']
        if 'associate_public_ip_address' in kwargs and kwargs['associate_public_ip_address']:
            netif_kwargs['associate_public_ip_address'] = kwargs['associate_public_ip_address']
            del kwargs['associate_public_ip_address']
        if 'security_groups' in kwargs and kwargs['security_groups']:
            netif_kwargs['groups'] = kwargs['security_groups']
            del kwargs['security_groups']
        
        netif_kwargs['subnet_id'] = kwargs['subnet_id']
        del kwargs['subnet_id']
        print netif_kwargs
        spec = NetworkInterfaceSpecification(**netif_kwargs)
        netif_specification.append(spec)   
        kwargs['network_interfaces'] = netif_specification

    reservation = conn.run_instances(image, **kwargs)
    instance = reservation.instances[0]
    # Quick hack to keep the selected remote user
    instance.remote_user = remote_user
    
    if len(tags) > 0:
        conn.create_tags([ instance.id ], tags)
        
    if instance.interfaces and len(instance.interfaces) > 0:
        for interface in instance.interfaces:
            conn.create_tags([ interface.id ], {'creator': tags['creator']})

    while instance.state != 'running' and instance.state != 'terminated':
        instance.update(True)
        yield (".")
        time.sleep(1)
    yield ("\n")
    
    if eip:
        ip = conn.allocate_address().public_ip
        conn.associate_address(instance_id = instance.id, public_ip=ip)
        conn.create_tags([instance.id], {"EIP": ip})

    #Update tag for this instance's volumes
    for device in instance.block_device_mapping:
        device_type = instance.block_device_mapping[device]
        (vol_tags, vol_kwargs) = do_tags(name='%s/%s' % (tags['Name'], device.replace('/dev/','')))
        conn.create_tags([ device_type.volume_id ], vol_tags)
    instance.update(True)

    windows_instance = instance.platform == 'Windows'

    if do_run_scripts and not windows_instance:
        while instance.state != 'terminated':
            try:
                s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                s.settimeout(1.0)
                s.connect((instance.public_dns_name, 22))
                s.close()
                break
            except socket.error, msg:
                yield (".")
                s.close()
                time.sleep(1)
        yield ("\n")
        instance.key_file = key_file

        remote_setup(instance, remote_user, key_file)
Ejemplo n.º 33
0
def register(snapshot_id,
             region,
             arch,
             size=None,
             name=None,
             desc=None,
             pvm=False):
    conn = utils.connect(region)

    if None in (name, size):
        log.debug('getting snapshot - %s', snapshot_id)
        snapshot = conn.get_all_snapshots(snapshot_ids=[snapshot_id])[0]
        size = size if size else snapshot.volume_size
        name = name if name else snapshot.description

    virt = 'hvm'
    kernel_id = None
    device_base = '/dev/xvd'
    ec2_arch = "x86_64" if arch == "amd64" else arch

    if pvm:
        kernel_id = utils.get_kernel(region, arch)
        virt = 'paravirtual'
        device_base = '/dev/sd'
        name += '-pvm'

    log.debug('creating block_device_map')
    block_device_map = BlockDeviceMapping()

    rootfs = BlockDeviceType()
    rootfs.delete_on_termination = True
    rootfs.size = size
    rootfs.snapshot_id = snapshot_id
    rootfs_device_name = device_base + 'a'
    block_device_map[rootfs_device_name] = rootfs

    ephemeral = BlockDeviceType()
    ephemeral.ephemeral_name = 'ephemeral0'
    ephemeral_device_name = device_base + 'b'
    block_device_map[ephemeral_device_name] = ephemeral

    log.debug('registering image - %s', name)
    client3 = utils.connect_boto3(region)

    response = client3.register_image(Name=name,
                                      Architecture=ec2_arch,
                                      RootDeviceName=rootfs_device_name,
                                      BlockDeviceMappings=[{
                                          'DeviceName': '/dev/xvda',
                                          'Ebs': {
                                              'DeleteOnTermination': True,
                                              'VolumeSize': size,
                                              'SnapshotId': snapshot_id,
                                          },
                                      }, {
                                          'DeviceName':
                                          '/dev/xvdb',
                                          'VirtualName':
                                          'ephemeral0',
                                      }],
                                      VirtualizationType=virt,
                                      EnaSupport=True)

    ami_id = response['ImageId']

    log.info('registered image - %s %s %s', ami_id, name, region)
    return ami_id, name