Exemple #1
0
  def attach_disk(self, parameters, disk_name, instance_id):
    """ Attaches the Elastic Block Store volume specified in 'disk_name' to this
    virtual machine.

    Args:
      parameters: A dict with keys for each parameter needed to connect to AWS.
      disk_name: A str naming the EBS mount to attach to this machine.
      instance_id: A str naming the id of the instance that the disk should be
        attached to. In practice, callers add disks to their own instances.
    """
    try:
      conn = self.open_connection(parameters)
      utils.log('Attaching volume {0} to instance {1}, at /dev/sdc'.format(
        disk_name, instance_id))
      conn.attach_volume(disk_name, instance_id, '/dev/sdc')

      while True:
        utils.log('Waiting for disk to finish attaching.')
        status = conn.get_all_volumes(disk_name)[0].status
        utils.log('Volume {0} reports its status as {1}'.format(disk_name,
          status))
        if status == 'in-use':
          break
        utils.sleep(1)

      utils.log('Volume {0} is attached and ready for use.')
      return '/dev/sdc'
    except EC2ResponseError as exception:
      utils.log('An error occurred when trying to attach volume {0} to ' \
        'instance {1} at /dev/sdc'.format(disk_name, instance_id))
      self.handle_failure('EC2 response error while attaching volume:' +
        exception.error_message)
  def attach_disk(self, parameters, disk_name, instance_id):
    """ Attaches the Elastic Block Store (EBS) volume specified in 'disk_name'
    to this virtual machine.

    This method differs from its EC2 counterpart because in EC2, we can ask the
    cloud to attach the disk to a certain location. In Euca, it determines where
    the disk gets placed, so we have to learn where it placed the disk and
    return that location instead.

    Args:
      parameters: A dict with keys for each parameter needed to connect to AWS.
      disk_name: A str naming the EBS volume to attach to this machine.
      instance_id: A str naming the id of the instance that the disk should be
        attached to. In practice, callers add disks to their own instances.
    Returns:
      The location on the local filesystem where the disk has been attached.
    """
    devices_before_attach = glob.glob('/dev/*')
    EC2Agent.attach_disk(self, parameters, disk_name, instance_id)
    while True:
      devices_after_attach = glob.glob('/dev/*')
      new_devices = utils.diff(devices_after_attach, devices_before_attach)
      if new_devices:
        utils.log("Found new attached devices: {0}".format(new_devices))
        if len(new_devices) == 1:
          utils.log("Found exactly one new attached device at {0}".format(
            new_devices[0]))
          return new_devices[0]
        else:
          self.handle_failure("Found too many new attached devices - not sure" \
            " which one is the device we attached. New devices are {0}".format(
            new_devices))
      else:
        utils.log("Still waiting for attached device to appear.")
        utils.sleep(1)
  def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, ssl=True):
    """
    Initialize a new instance of the EAGER service.

    Args:
      host  Hostname to which the service should bind (Optional). Defaults
            to 0.0.0.0.
      port  Port of the service (Optional). Default to 18444.
      ssl   True if SSL should be engaged or False otherwise (Optional).
            Defaults to True. When engaged, this implementation expects
            to find the necessary SSL certificates in the /etc/appscale/certs
            directory.
    """
    self.host = host
    self.port = port

    secret = None
    while True:
      try:
        secret = utils.get_secret(self.APPSCALE_DIR + 'secret.key')
        break
      except Exception:
        utils.log('Waiting for the secret key to become available')
        utils.sleep(5)
    utils.log('Found the secret set to: {0}'.format(secret))

    SOAPpy.Config.simplify_objects = True

    if ssl:
      utils.log('Checking for the certificate and private key')
      cert = self.APPSCALE_DIR + 'certs/mycert.pem'
      key = self.APPSCALE_DIR + 'certs/mykey.pem'
      while True:
        if os.path.exists(cert) and os.path.exists(key):
          break
        else:
          utils.log('Waiting for certificates')
          utils.sleep(5)

      ssl_context = SSL.Context()
      ssl_context.load_cert(cert, key)
      self.server = SOAPpy.SOAPServer((host, port), ssl_context=ssl_context)
    else:
      self.server = SOAPpy.SOAPServer((host, port))

    e = Eager()
    self.server.registerFunction(e.ping)
    self.server.registerFunction(e.validate_application_for_deployment)
    self.server.registerFunction(e.publish_api_list)
    self.server.registerFunction(e.add_policy)
    self.server.registerFunction(e.remove_policy)
    self.server.registerFunction(e.enable_policy)
    self.server.registerFunction(e.disable_policy)
    self.server.registerFunction(e.list_policy)
    self.server.registerFunction(e.info_policy)
 
    self.started = False
Exemple #4
0
    def attach_disk(self, parameters, disk_name, instance_id):
        """ Attaches the Elastic Block Store (EBS) volume specified in 'disk_name'
    to this virtual machine.

    This method differs from its EC2 counterpart because in EC2, we can ask the
    cloud to attach the disk to a certain location. In Euca, it determines where
    the disk gets placed, so we have to learn where it placed the disk and
    return that location instead.

    Args:
      parameters: A dict with keys for each parameter needed to connect to AWS.
      disk_name: A str naming the EBS volume to attach to this machine.
      instance_id: A str naming the id of the instance that the disk should be
        attached to. In practice, callers add disks to their own instances.
    Returns:
      The location on the local filesystem where the disk has been attached.
    """
        devices_before_attach = glob.glob('/dev/*')
        EC2Agent.attach_disk(self, parameters, disk_name, instance_id)
        while True:
            devices_after_attach = glob.glob('/dev/*')
            new_devices = utils.diff(devices_after_attach,
                                     devices_before_attach)
            if new_devices:
                utils.log(
                    "Found new attached devices: {0}".format(new_devices))
                if len(new_devices) == 1:
                    utils.log(
                        "Found exactly one new attached device at {0}".format(
                            new_devices[0]))
                    return new_devices[0]
                else:
                    self.handle_failure("Found too many new attached devices - not sure" \
                      " which one is the device we attached. New devices are {0}".format(
                      new_devices))
            else:
                utils.log("Still waiting for attached device to appear.")
                utils.sleep(1)
Exemple #5
0
  def run_instances(self, count, parameters, security_configured):
    """
    Spawns the specified number of EC2 instances using the parameters
    provided. This method is blocking in that it waits until the
    requested VMs are properly booted up. However if the requested
    VMs cannot be procured within 1800 seconds, this method will treat
    it as an error and return. (Also see documentation for the BaseAgent
    class)

    Args:
      count               No. of VMs to spawned
      parameters          A dictionary of parameters. This must contain 'keyname',
                          'group', 'image_id' and 'instance_type' parameters.
      security_configured Uses this boolean value as an heuristic to
                          detect brand new AppScale deployments.

    Returns:
      A tuple of the form (instances, public_ips, private_ips)
    """
    image_id = parameters[self.PARAM_IMAGE_ID]
    instance_type = parameters[self.PARAM_INSTANCE_TYPE]
    keyname = parameters[self.PARAM_KEYNAME]
    group = parameters[self.PARAM_GROUP]
    spot = parameters[self.PARAM_SPOT]

    utils.log('[{0}] [{1}] [{2}] [{3}] [ec2] [{4}] [{5}]'.format(count,
      image_id, instance_type, keyname, group, spot))


    credentials = parameters[self.PARAM_CREDENTIALS]
    creds = parameters['credentials']
    f = open('userfile','w')
    userstr  = """#!/bin/bash \nset -x\nexec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1\ntouch anand3.txt\necho "testing logfile"\necho BEGIN\ndate '+%Y-%m-%d %H:%M:%S'\necho END\ntouch anand2.txt\n"""
    userstr+='export AWS_ACCESS_KEY_ID={0}\n'.format(str(credentials['EC2_ACCESS_KEY']))
    userstr+='export AWS_SECRET_ACCESS_KEY={0}\n'.format( str(credentials['EC2_SECRET_KEY']))
    userstr+='echo export AWS_ACCESS_KEY_ID={0} >> ~/.bashrc\n'.format(str(credentials['EC2_ACCESS_KEY']))
    userstr+='echo export AWS_SECRET_ACCESS_KEY={0} >> ~/.bashrc\n'.format( str(credentials['EC2_SECRET_KEY']))
    userstr+='echo export AWS_ACCESS_KEY_ID={0} >> /home/ubuntu/.bashrc\n'.format(str(credentials['EC2_ACCESS_KEY']))   
    userstr+='echo export AWS_SECRET_ACCESS_KEY={0} >> /home/ubuntu/.bashrc\n'.format( str(credentials['EC2_SECRET_KEY']))

    userstr+='export STOCHKIT_HOME={0}\n'.format('/home/ubuntu/StochKit/')
    userstr+='export STOCHKIT_ODE={0}\n'.format('/home/ubuntu/ode/')
    userstr+='echo export STOCHKIT_HOME={0} >> ~/.bashrc\n'.format("/home/ubuntu/StochKit/")
    userstr+='echo export STOCHKIT_HOME={0} >> /home/ubuntu/.bashrc\n'.format("/home/ubuntu/StochKit/")
    userstr+='echo export STOCHKIT_ODE={0} >> ~/.bashrc\n'.format("/home/ubuntu/ode/")
    userstr+='echo export STOCHKIT_ODE={0} >> /home/ubuntu/.bashrc\n'.format("/home/ubuntu/ode/")
    userstr+='source ~/.bashrc \n'
    userstr+='source /home/ubuntu/.bashrc \n'
    # Workers need an alarm...
    skip_alarm = False
    if self.PARAM_QUEUE_HEAD in parameters and parameters[self.PARAM_QUEUE_HEAD]:
      # ...but the queue head doesnt
      skip_alarm = True
      # Queue head, needs to have at least two cores
      insufficient_cores = ['t1.micro', 'm1.small', 'm1.medium', 'm3.medium']
      if instance_type in insufficient_cores:
        instance_type = 'c3.large'
      # Create the user that we want to use to connect to the broker
      # and configure its permissions on the default vhost.
      userstr += "rabbitmqctl add_user stochss ucsb\n"
      userstr += 'rabbitmqctl set_permissions -p / stochss ".*" ".*" ".*"\n'
      # userstr += "rabbitmq-server -detached\n"
    else:
      # Update celery config file...it should have the correct IP
      # of the Queue head node, which should already be running.
      celery_config_filename = os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
        "../celeryconfig.py"
      )
      # Pass it line by line so theres no weird formatting errors from 
      # trying to echo a multi-line file directly on the command line
      with open(celery_config_filename, 'r') as celery_config_file:
        lines = celery_config_file.readlines()
        # Make sure we overwrite the file with our first write
        userstr += "echo '{0}' > /home/ubuntu/celeryconfig.py\n".format(lines[0])
        for line in lines[1:]:
          userstr += "echo '{0}' >> /home/ubuntu/celeryconfig.py\n".format(line)
    # Even the queue head gets a celery worker
    # NOTE: We only need to use the -n argument to celery command if we are starting
    #       multiple workers on the same machine. Instead, we are starting one worker
    #       per machine and letting that one worker execute one task per core, using
    #       the configuration in celeryconfig.py to ensure that Celery detects the 
    #       number of cores and enforces this desired behavior.
    if self.PARAM_WORKER_QUEUE in parameters:
      userstr+="nohup celery -A tasks worker --autoreload --loglevel=info -Q {0} --workdir /home/ubuntu > /home/ubuntu/nohup.log 2>&1 & \n".format(
          parameters[self.PARAM_WORKER_QUEUE]
      )
    else:
      userstr+="nohup celery -A tasks worker --autoreload --loglevel=info --workdir /home/ubuntu > /home/ubuntu/nohup.log 2>&1 & \n"
    f.write(userstr)
    f.close()
    start_time = datetime.datetime.now()
    active_public_ips = []
    active_private_ips = []
    active_instances = []

    try:
      attempts = 1
      while True:
        instance_info = self.describe_instances_old(parameters)
        active_public_ips = instance_info[0]
        active_private_ips = instance_info[1]
        active_instances = instance_info[2]

        # If security has been configured on this agent just now,
        # that's an indication that this is a fresh cloud deployment.
        # As such it's not expected to have any running VMs.
        if len(active_instances) > 0 or security_configured:
          break
        elif attempts == self.DESCRIBE_INSTANCES_RETRY_COUNT:
          self.handle_failure('Failed to invoke describe_instances')
        attempts += 1

      conn = self.open_connection(parameters)
      if spot == 'True':
        price = parameters[self.PARAM_SPOT_PRICE]
        conn.request_spot_instances(str(price), image_id, key_name=keyname,
          security_groups=[group], instance_type=instance_type, count=count, user_data = userstr)
      else:
        conn.run_instances(image_id, count, count, key_name=keyname,
          security_groups=[group], instance_type=instance_type, user_data=userstr)

      instance_ids = []
      public_ips = []
      private_ips = []
      utils.sleep(10)
      end_time = datetime.datetime.now() + datetime.timedelta(0,
        self.MAX_VM_CREATION_TIME)
      now = datetime.datetime.now()

      while now < end_time:
        time_left = (end_time - now).seconds
        utils.log('[{0}] {1} seconds left...'.format(now, time_left))
        instance_info = self.describe_instances_old(parameters)
        public_ips = instance_info[0]
        private_ips = instance_info[1]
        instance_ids = instance_info[2]
        public_ips = utils.diff(public_ips, active_public_ips)
        private_ips = utils.diff(private_ips, active_private_ips)
        instance_ids = utils.diff(instance_ids, active_instances)
        if count == len(public_ips):
          break
        time.sleep(self.SLEEP_TIME)
        now = datetime.datetime.now()

      if not public_ips:
        self.handle_failure('No public IPs were able to be procured '
                            'within the time limit')

      if len(public_ips) != count:
        for index in range(0, len(public_ips)):
          if public_ips[index] == '0.0.0.0':
            instance_to_term = instance_ids[index]
            utils.log('Instance {0} failed to get a public IP address and' \
                      ' is being terminated'.format(instance_to_term))
            conn.terminate_instances([instance_to_term])

      end_time = datetime.datetime.now()
      total_time = end_time - start_time
      if spot:
        utils.log('TIMING: It took {0} seconds to spawn {1} spot ' \
                  'instances'.format(total_time.seconds, count))
      else:
        utils.log('TIMING: It took {0} seconds to spawn {1} ' \
                  'regular instances'.format(total_time.seconds, count))
        if not skip_alarm:
          utils.log('Creating Alarms for the instances')
          for machineid in instance_ids:
              self.make_sleepy(parameters, machineid)   
      return instance_ids, public_ips, private_ips
    except EC2ResponseError as exception:
      self.handle_failure('EC2 response error while starting VMs: ' +
                          exception.error_message)
    except Exception as exception:
      if isinstance(exception, AgentRuntimeException):
        raise exception
      else:
        self.handle_failure('Error while starting VMs: ' + exception.message)
Exemple #6
0
  def run_instances(self, count, parameters, security_configured):
    """
    Spawns the specified number of EC2 instances using the parameters
    provided. This method is blocking in that it waits until the
    requested VMs are properly booted up. However if the requested
    VMs cannot be procured within 1800 seconds, this method will treat
    it as an error and return. (Also see documentation for the BaseAgent
    class)

    Args:
      count               No. of VMs to spawned
      parameters          A dictionary of parameters. This must contain 'keyname',
                          'group', 'image_id' and 'instance_type' parameters.
      security_configured Uses this boolean value as an heuristic to
                          detect brand new AppScale deployments.

    Returns:
      A tuple of the form (instances, public_ips, private_ips)
    """
    image_id = parameters[self.PARAM_IMAGE_ID]
    instance_type = parameters[self.PARAM_INSTANCE_TYPE]
    keyname = parameters[self.PARAM_KEYNAME]
    group = parameters[self.PARAM_GROUP]
    spot = parameters[self.PARAM_SPOT]

    utils.log('[{0}] [{1}] [{2}] [{3}] [ec2] [{4}] [{5}]'.format(count,
      image_id, instance_type, keyname, group, spot))

    start_time = datetime.datetime.now()
    active_public_ips = []
    active_private_ips = []
    active_instances = []

    try:
      attempts = 1
      while True:
        instance_info = self.describe_instances(parameters)
        active_public_ips = instance_info[0]
        active_private_ips = instance_info[1]
        active_instances = instance_info[2]

        # If security has been configured on this agent just now,
        # that's an indication that this is a fresh cloud deployment.
        # As such it's not expected to have any running VMs.
        if len(active_instances) > 0 or security_configured:
          break
        elif attempts == self.DESCRIBE_INSTANCES_RETRY_COUNT:
          self.handle_failure('Failed to invoke describe_instances')
        attempts += 1

      conn = self.open_connection(parameters)
      if spot == 'True':
        price = parameters[self.PARAM_SPOT_PRICE]
        conn.request_spot_instances(str(price), image_id, key_name=keyname,
          security_groups=[group], instance_type=instance_type, count=count)
      else:
        conn.run_instances(image_id, count, count, key_name=keyname,
          security_groups=[group], instance_type=instance_type)

      instance_ids = []
      public_ips = []
      private_ips = []
      utils.sleep(10)
      end_time = datetime.datetime.now() + datetime.timedelta(0,
        self.MAX_VM_CREATION_TIME)
      now = datetime.datetime.now()

      while now < end_time:
        time_left = (end_time - now).seconds
        utils.log('[{0}] {1} seconds left...'.format(now, time_left))
        instance_info = self.describe_instances(parameters)
        public_ips = instance_info[0]
        private_ips = instance_info[1]
        instance_ids = instance_info[2]
        public_ips = utils.diff(public_ips, active_public_ips)
        private_ips = utils.diff(private_ips, active_private_ips)
        instance_ids = utils.diff(instance_ids, active_instances)
        if count == len(public_ips):
          break
        time.sleep(self.SLEEP_TIME)
        now = datetime.datetime.now()

      if not public_ips:
        self.handle_failure('No public IPs were able to be procured '
                            'within the time limit')

      if len(public_ips) != count:
        for index in range(0, len(public_ips)):
          if public_ips[index] == '0.0.0.0':
            instance_to_term = instance_ids[index]
            utils.log('Instance {0} failed to get a public IP address and' \
                      ' is being terminated'.format(instance_to_term))
            conn.terminate_instances([instance_to_term])

      end_time = datetime.datetime.now()
      total_time = end_time - start_time
      if spot:
        utils.log('TIMING: It took {0} seconds to spawn {1} spot ' \
                  'instances'.format(total_time.seconds, count))
      else:
        utils.log('TIMING: It took {0} seconds to spawn {1} ' \
                  'regular instances'.format(total_time.seconds, count))
      return instance_ids, public_ips, private_ips
    except EC2ResponseError as exception:
      self.handle_failure('EC2 response error while starting VMs: ' +
                          exception.error_message)
    except Exception as exception:
      if isinstance(exception, AgentRuntimeException):
        raise exception
      else:
        self.handle_failure('Error while starting VMs: ' + exception.message)
Exemple #7
0
    def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, ssl=True):
        """
    Initialize a new instance of the infrastructure manager service.

    Args:
      host  Hostname to which the service should bind (Optional). Defaults
            to 0.0.0.0.
      port  Port of the service (Optional). Default to 17444.
      ssl   True if SSL should be engaged or False otherwise (Optional).
            Defaults to True. When engaged, this implementation expects
            to find the necessary SSL certificates in the /etc/appscale/certs
            directory.
    """
        self.host = host
        self.port = port

        secret = None
        while True:
            try:
                secret = utils.get_secret(self.APPSCALE_DIR + 'secret.key')
                break
            except Exception:
                logging.info('Waiting for the secret key to become available')
                utils.sleep(5)
        logging.info('Found the secret set to: {0}'.format(secret))

        SOAPpy.Config.simplify_objects = True

        if ssl:
            logging.info('Checking for the certificate and private key')
            cert = self.APPSCALE_DIR + 'certs/mycert.pem'
            key = self.APPSCALE_DIR + 'certs/mykey.pem'
            while True:
                if os.path.exists(cert) and os.path.exists(key):
                    break
                else:
                    logging.info('Waiting for certificates')
                    utils.sleep(5)

            ssl_context = SSL.Context()
            ssl_context.load_cert(cert, key)
            self.server = SOAPpy.SOAPServer((host, port),
                                            ssl_context=ssl_context)
        else:
            self.server = SOAPpy.SOAPServer((host, port))

        parent_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
        config_file = os.path.join(parent_dir, self.CONFIG_FILE)
        if os.path.exists(config_file):
            with open(config_file) as file_handle:
                params = json.load(file_handle)
            if params.has_key(PersistentStoreFactory.PARAM_STORE_TYPE):
                logging.info(
                    'Loading infrastructure manager configuration from ' +
                    config_file)
                i = InfrastructureManager(params)
            else:
                i = InfrastructureManager()
        else:
            i = InfrastructureManager()

        self.server.registerFunction(i.describe_operation)
        self.server.registerFunction(i.run_instances)
        self.server.registerFunction(i.terminate_instances)
        self.server.registerFunction(i.attach_disk)

        system_manager = SystemManager()

        self.server.registerFunction(system_manager.get_cpu_usage)
        self.server.registerFunction(system_manager.get_disk_usage)
        self.server.registerFunction(system_manager.get_memory_usage)
        self.server.registerFunction(system_manager.get_service_summary)
        self.server.registerFunction(system_manager.get_swap_usage)
        self.server.registerFunction(system_manager.get_loadavg)

        self.started = False
Exemple #8
0
    def run_instances(self, count, parameters, security_configured):
        """
    Spawns the specified number of EC2 instances using the parameters
    provided. This method is blocking in that it waits until the
    requested VMs are properly booted up. However if the requested
    VMs cannot be procured within 1800 seconds, this method will treat
    it as an error and return. (Also see documentation for the BaseAgent
    class)

    Args:
      count               No. of VMs to spawned
      parameters          A dictionary of parameters. This must contain 'keyname',
                          'group', 'image_id' and 'instance_type' parameters.
      security_configured Uses this boolean value as an heuristic to
                          detect brand new AppScale deployments.

    Returns:
      A tuple of the form (instances, public_ips, private_ips)
    """
        image_id = parameters[self.PARAM_IMAGE_ID]
        instance_type = parameters[self.PARAM_INSTANCE_TYPE]
        keyname = parameters[self.PARAM_KEYNAME]
        group = parameters[self.PARAM_GROUP]
        spot = parameters[self.PARAM_SPOT]
        zone = parameters[self.PARAM_ZONE]

        utils.log("Starting {0} machines with machine id {1}, with " \
          "instance type {2}, keyname {3}, in security group {4}, in zone {5}" \
          .format(count, image_id, instance_type, keyname, group, zone))

        start_time = datetime.datetime.now()
        active_public_ips = []
        active_private_ips = []
        active_instances = []

        try:
            attempts = 1
            while True:
                instances = self.__describe_instances(parameters)
                term_instance_info = self.__get_instance_info(
                    instances, 'terminated', keyname)
                if len(term_instance_info[2]):
                    self.handle_failure('One or more nodes started with key {0} have '\
                                        'been terminated'.format(keyname))
                instance_info = self.__get_instance_info(
                    instances, 'running', keyname)
                active_public_ips = instance_info[0]
                active_private_ips = instance_info[1]
                active_instances = instance_info[2]

                # If security has been configured on this agent just now,
                # that's an indication that this is a fresh cloud deployment.
                # As such it's not expected to have any running VMs.
                if len(active_instances) > 0 or security_configured:
                    break
                elif attempts == self.DESCRIBE_INSTANCES_RETRY_COUNT:
                    self.handle_failure('Failed to invoke describe_instances')
                attempts += 1

            conn = self.open_connection(parameters)
            if spot == 'True':
                price = parameters[self.PARAM_SPOT_PRICE]
                conn.request_spot_instances(str(price),
                                            image_id,
                                            key_name=keyname,
                                            security_groups=[group],
                                            instance_type=instance_type,
                                            count=count,
                                            placement=zone)
            else:
                retries_left = self.RUN_INSTANCES_RETRY_COUNT
                while True:
                    try:
                        conn.run_instances(image_id,
                                           count,
                                           count,
                                           key_name=keyname,
                                           security_groups=[group],
                                           instance_type=instance_type,
                                           placement=zone)
                        break
                    except EC2ResponseError as exception:
                        utils.log("Couldn't start {0} instances because of error: {1}. " \
                          "{2} retries left.".format(count, exception.error_message,
                          retries_left))
                        retries_left = -1
                        if retries_left <= 0:
                            self.handle_failure(exception.error_message)
                        utils.sleep(10)

            instance_ids = []
            public_ips = []
            private_ips = []
            utils.sleep(10)
            end_time = datetime.datetime.now() + datetime.timedelta(
                0, self.MAX_VM_CREATION_TIME)
            now = datetime.datetime.now()

            while now < end_time:
                time_left = (end_time - now).seconds
                utils.log('[{0}] {1} seconds left...'.format(now, time_left))
                instances = self.__describe_instances(parameters)
                term_instance_info = self.__get_instance_info(
                    instances, 'terminated', keyname)
                if len(term_instance_info[2]):
                    self.handle_failure('One or more nodes started with key {0} have '\
                                        'been terminated'.format(keyname))
                instance_info = self.__get_instance_info(
                    instances, 'running', keyname)
                public_ips = instance_info[0]
                private_ips = instance_info[1]
                instance_ids = instance_info[2]
                public_ips = utils.diff(public_ips, active_public_ips)
                private_ips = utils.diff(private_ips, active_private_ips)
                instance_ids = utils.diff(instance_ids, active_instances)
                if count == len(public_ips):
                    break
                time.sleep(self.SLEEP_TIME)
                now = datetime.datetime.now()

            if not public_ips:
                self.handle_failure('No public IPs were able to be procured '
                                    'within the time limit')

            if len(public_ips) != count:
                for index in range(0, len(public_ips)):
                    if public_ips[index] == '0.0.0.0':
                        instance_to_term = instance_ids[index]
                        utils.log('Instance {0} failed to get a public IP address and' \
                                  ' is being terminated'.format(instance_to_term))
                        conn.terminate_instances([instance_to_term])

            end_time = datetime.datetime.now()
            total_time = end_time - start_time
            if spot:
                utils.log('TIMING: It took {0} seconds to spawn {1} spot ' \
                          'instances'.format(total_time.seconds, count))
            else:
                utils.log('TIMING: It took {0} seconds to spawn {1} ' \
                          'regular instances'.format(total_time.seconds, count))
            return instance_ids, public_ips, private_ips
        except EC2ResponseError as exception:
            self.handle_failure('EC2 response error while starting VMs: ' +
                                exception.error_message)
        except Exception as exception:
            if isinstance(exception, AgentRuntimeException):
                raise exception
            else:
                self.handle_failure('Error while starting VMs: ' +
                                    exception.message)
  def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, ssl=True):
    """
    Initialize a new instance of the infrastructure manager service.

    Args:
      host  Hostname to which the service should bind (Optional). Defaults
            to 0.0.0.0.
      port  Port of the service (Optional). Default to 17444.
      ssl   True if SSL should be engaged or False otherwise (Optional).
            Defaults to True. When engaged, this implementation expects
            to find the necessary SSL certificates in the /etc/appscale/certs
            directory.
    """
    self.host = host
    self.port = port

    secret = None
    while True:
      try:
        secret = utils.get_secret(self.APPSCALE_DIR + 'secret.key')
        break
      except Exception:
        logging.info('Waiting for the secret key to become available')
        utils.sleep(5)
    logging.info('Found the secret set to: {0}'.format(secret))

    SOAPpy.Config.simplify_objects = True

    if ssl:
      logging.info('Checking for the certificate and private key')
      cert = self.APPSCALE_DIR + 'certs/mycert.pem'
      key = self.APPSCALE_DIR + 'certs/mykey.pem'
      while True:
        if os.path.exists(cert) and os.path.exists(key):
          break
        else:
          logging.info('Waiting for certificates')
          utils.sleep(5)

      ssl_context = SSL.Context()
      ssl_context.load_cert(cert, key)
      self.server = SOAPpy.SOAPServer((host, port), ssl_context=ssl_context)
    else:
      self.server = SOAPpy.SOAPServer((host, port))

    parent_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
    config_file = os.path.join(parent_dir, self.CONFIG_FILE)
    if os.path.exists(config_file):
      with open(config_file) as file_handle:
        params = json.load(file_handle)
      if params.has_key(PersistentStoreFactory.PARAM_STORE_TYPE):
        logging.info('Loading infrastructure manager configuration from ' +
                  config_file)
        i = InfrastructureManager(params)
      else:
        i = InfrastructureManager()
    else:
      i = InfrastructureManager()

    self.server.registerFunction(i.describe_instances)
    self.server.registerFunction(i.run_instances)
    self.server.registerFunction(i.terminate_instances)
    self.server.registerFunction(i.attach_disk)

    system_manager = SystemManager()

    self.server.registerFunction(system_manager.get_cpu_usage)
    self.server.registerFunction(system_manager.get_disk_usage)
    self.server.registerFunction(system_manager.get_memory_usage)
    self.server.registerFunction(system_manager.get_service_summary)
    self.server.registerFunction(system_manager.get_swap_usage)
    self.server.registerFunction(system_manager.get_loadavg)

    self.started = False
Exemple #10
0
  def run_instances(self, count, parameters, security_configured):
    """
    Spawn the specified number of EC2 instances using the parameters
    provided. This method relies on the ec2-run-instances command to
    spawn the actual VMs in the cloud. This method is blocking in that
    it waits until the requested VMs are properly booted up. However
    if the requested VMs cannot be procured within 1800 seconds, this
    method will treat it as an error and return. (Also see documentation
    for the BaseAgent class)

    Args:
      count               No. of VMs to spawned
      parameters          A dictionary of parameters. This must contain 'keyname',
                          'group', 'image_id' and 'instance_type' parameters.
      security_configured Uses this boolean value as an heuristic to
                          detect brand new AppScale deployments.

    Returns:
      A tuple of the form (instances, public_ips, private_ips)
    """
    image_id = parameters[self.PARAM_IMAGE_ID]
    instance_type = parameters[self.PARAM_INSTANCE_TYPE]
    keyname = parameters[self.PARAM_KEYNAME]
    group = parameters[self.PARAM_GROUP]
    spot = False

    utils.log('[{0}] [{1}] [{2}] [{3}] [ec2] [{4}] [{5}]'.format(count,
      image_id, instance_type, keyname, group, spot))

    start_time = datetime.datetime.now()
    active_public_ips = []
    active_private_ips = []
    active_instances = []
    if os.environ.has_key('EC2_URL'):
      utils.log('EC2_URL = [{0}]'.format(os.environ['EC2_URL']))
    else:
      utils.log('Warning: EC2_URL environment not found in the process runtime!')
    while True:
      active_public_ips, active_private_ips, active_instances =\
      self.describe_instances(parameters)
      # If security has been configured on this agent just now,
      # that's an indication that this is a fresh cloud deployment.
      # As such it's not expected to have any running VMs.
      if len(active_instances) > 0 or security_configured:
        break

    args = '-k {0} -n {1} --instance-type {2} --group {3} {4}'.format(keyname,
      count, instance_type, group, image_id)
    if spot:
      price = self.get_optimal_spot_price(instance_type)
      command_to_run = '{0}-request-spot-instances -p {1} {2}'.format(self.prefix, price, args)
    else:
      command_to_run = '{0}-run-instances {1}'.format(self.prefix, args)

    while True:
      run_instances = utils.shell(command_to_run)
      utils.log('Run instances says {0}'.format(run_instances))
      status, command_to_run = self.run_instances_response(command_to_run, run_instances)
      if status:
        break
      utils.log('sleepy time')
      utils.sleep(5)

    instances = []
    public_ips = []
    private_ips = []
    utils.sleep(10)

    end_time = datetime.datetime.now() + datetime.timedelta(0, self.MAX_VM_CREATION_TIME)
    now = datetime.datetime.now()
    while now < end_time:
      describe_instances = utils.shell(self.prefix + '-describe-instances 2>&1')
      utils.log('[{0}] {1} seconds left...'.format(now, (end_time - now).seconds))
      utils.log(describe_instances)
      fqdn_regex = re.compile('\s+({0})\s+({0})\s+running\s+{1}\s'.format(self.FQDN_REGEX, keyname))
      instance_regex = re.compile('INSTANCE\s+(i-\w+)')
      all_ip_addresses = utils.flatten(fqdn_regex.findall(describe_instances))
      instances = utils.flatten(instance_regex.findall(describe_instances))
      public_ips, private_ips = self.get_ip_addresses(all_ip_addresses)
      public_ips = utils.diff(public_ips, active_public_ips)
      private_ips = utils.diff(private_ips, active_private_ips)
      instances = utils.diff(instances, active_instances)
      if count == len(public_ips):
        break
      time.sleep(self.SLEEP_TIME)
      now = datetime.datetime.now()

    if not public_ips:
      sys.exit('No public IPs were able to be procured within the time limit')

    if len(public_ips) != count:
      for index in range(0, len(public_ips)):
        if public_ips[index] == '0.0.0.0':
          instance_to_term = instances[index]
          utils.log('Instance {0} failed to get a public IP address and is being terminated'.\
          format(instance_to_term))
          utils.shell(self.prefix + '-terminate-instances ' + instance_to_term)
      pass

    end_time = datetime.datetime.now()
    total_time = end_time - start_time
    if spot:
      utils.log('TIMING: It took {0} seconds to spawn {1} spot instances'.format(
        total_time.seconds, count))
    else:
      utils.log('TIMING: It took {0} seconds to spawn {1} regular instances'.format(
        total_time.seconds, count))
    return instances, public_ips, private_ips