Ejemplo n.º 1
0
    def __deregister_flex_vm(self,
                             ip,
                             username,
                             keyfile,
                             parameters,
                             queue_head_ip,
                             force=False):

        try:
            if self.check_network_ports(ip, [22]):
                deregister_command = self.get_remote_command_string(
                    ip=ip,
                    username=username,
                    keyfile=keyfile,
                    command=
                    "sudo ~/stochss/release-tools/flex-cloud/deregister_flex_vm.sh"
                )
                logging.debug(
                    'deregister_command =\n{}'.format(deregister_command))
                os.system(deregister_command)
            else:
                logging.debug(
                    'Flex VM is not accessible via SSH, can not execute deregister command'
                )

        except Exception as e:
            logging.exception('Failed to deregister Flex VM: {0}'.format(e))
#            logging.error(sys.exc_info())

        finally:
            VMStateModel.set_state(
                params=parameters,
                ins_ids=[self.get_flex_instance_id(public_ip=ip)],
                state=VMStateModel.STATE_TERMINATED,
                description='VM Deregistered.')
Ejemplo n.º 2
0
    def __configure_celery(self, params, public_ips, instance_ids):
        """
        Private method used for uploading the current celery configuration to each instance 
        that is running and ssh connectable.
        
        Args
            parameters      A dictionary of parameters
            public_ips      A list of public ips that are going to be configed
            instance_ids    A list of instance_ids that are used for terminating instances and update
                            database if fail on configuration by some reason  
        """
        # Update celery config file...it should have the correct IP
        # of the Queue head node, which should already be running.
        # Pass it line by line so theres no weird formatting errors from
        # trying to echo a multi-line file directly on the command line

        key_file = os.path.join(os.path.dirname(__file__), "..", "{0}.key".format(params["keyname"]))
        logging.debug("key_file = {0}".format(key_file))

        if not os.path.exists(key_file):
            raise Exception("ssh key_file file not found: {0}".format(key_file))

        credentials = params["credentials"]

        commands = []
        commands.append("source /home/ubuntu/.bashrc")
        commands.append("export AWS_ACCESS_KEY_ID={0}".format(str(credentials["EC2_ACCESS_KEY"])))
        commands.append("export AWS_SECRET_ACCESS_KEY={0}".format(str(credentials["EC2_SECRET_KEY"])))

        for ip, ins_id in zip(public_ips, instance_ids):
            # helper.wait_for_ssh_connection(key_file, ip)
            ins_type = VMStateModel.get_instance_type(params, ins_id)
            commands.append("export INSTANCE_TYPE={0}".format(ins_type))
            success = helper.start_celery_on_vm(
                instance_type=ins_type,
                ip=ip,
                key_file=key_file,
                agent_type=self.agent_type,
                worker_name=ip.replace(".", "_"),
                prepend_commands=commands,
            )
            if success == 0:
                # update db with successful running vms
                logging.info("celery started! ")
                logging.info("host ip: {0}".format(ip))
                VMStateModel.set_state(params, [ins_id], VMStateModel.STATE_RUNNING, VMStateModel.DESCRI_SUCCESS)
            else:
                self.agent.deregister_some_instances(params, [ins_id])
                VMStateModel.set_state(
                    params, [ins_id], VMStateModel.STATE_FAILED, VMStateModel.DESCRI_FAIL_TO_COFIGURE_CELERY
                )
                raise Exception("Failure to start celery on {0}".format(ip))

        # get all intstance types and configure the celeryconfig.py locally
        instance_types = VMStateModel.get_running_instance_types(params)
        helper.config_celery_queues(agent_type=self.agent_type, instance_types=instance_types)
    def __verify_ec2_instances_via_ssh(self, instance_ids, parameters,
                                       public_ips):
        keyfile = os.path.join(os.path.dirname(__file__), '..',
                               '{0}.key'.format(parameters['keyname']))
        logging.info('keyfile = {0}'.format(keyfile))

        if not os.path.exists(keyfile):
            raise Exception("ssh keyfile file not found: {0}".format(keyfile))

        connected_public_ips = []
        connected_instance_ids = []

        for (pub_ip, ins_id) in zip(public_ips, instance_ids):
            logging.info('connecting to {0}...'.format(pub_ip))
            success = helper.wait_for_ssh_connection(key_file=keyfile,
                                                     ip=pub_ip)

            if success == True:
                logging.info('{0} is successfully added'.format(pub_ip))
                connected_public_ips.append(pub_ip)
                connected_instance_ids.append(ins_id)

        # if there are some vms not able to be connected via ssh,
        # just shut them down explicitly
        if len(public_ips) != len(connected_public_ips):
            logging.info(
                'Time out on ssh to {0} instances. They will be terminated.'.
                format(len(public_ips) - len(connected_public_ips)))

            try:
                terminate_ins_ids = []
                for ins_id in instance_ids:
                    if ins_id not in connected_instance_ids:
                        terminate_ins_ids.append(ins_id)
                self.agent.deregister_some_instances(parameters,
                                                     terminate_ins_ids)
                # update db with failed vms
                VMStateModel.set_state(parameters, terminate_ins_ids,
                                       VMStateModel.STATE_FAILED,
                                       VMStateModel.DESCRI_TIMEOUT_ON_SSH)
            except:
                raise Exception(
                    "Errors in terminating instances that cannot be connected via ssh."
                )

        public_ips = None
        instance_ids = None

        return connected_public_ips, connected_instance_ids
Ejemplo n.º 4
0
    def __verify_ec2_instances_via_ssh(self, instance_ids, parameters, public_ips):
        keyfile = os.path.join(os.path.dirname(__file__), "..", "{0}.key".format(parameters["keyname"]))
        logging.info("keyfile = {0}".format(keyfile))

        if not os.path.exists(keyfile):
            raise Exception("ssh keyfile file not found: {0}".format(keyfile))

        connected_public_ips = []
        connected_instance_ids = []

        for (pub_ip, ins_id) in zip(public_ips, instance_ids):
            logging.info("connecting to {0}...".format(pub_ip))
            success = helper.wait_for_ssh_connection(key_file=keyfile, ip=pub_ip)

            if success == True:
                logging.info("{0} is successfully added".format(pub_ip))
                connected_public_ips.append(pub_ip)
                connected_instance_ids.append(ins_id)

        # if there are some vms not able to be connected via ssh,
        # just shut them down explicitly
        if len(public_ips) != len(connected_public_ips):
            logging.info(
                "Time out on ssh to {0} instances. They will be terminated.".format(
                    len(public_ips) - len(connected_public_ips)
                )
            )

            try:
                terminate_ins_ids = []
                for ins_id in instance_ids:
                    if ins_id not in connected_instance_ids:
                        terminate_ins_ids.append(ins_id)
                self.agent.deregister_some_instances(parameters, terminate_ins_ids)
                # update db with failed vms
                VMStateModel.set_state(
                    parameters, terminate_ins_ids, VMStateModel.STATE_FAILED, VMStateModel.DESCRI_TIMEOUT_ON_SSH
                )
            except:
                raise Exception("Errors in terminating instances that cannot be connected via ssh.")

        public_ips = None
        instance_ids = None

        return connected_public_ips, connected_instance_ids
Ejemplo n.º 5
0
    def __deregister_flex_vm(self, ip, username, keyfile, parameters, queue_head_ip, force=False):

        try:
            if self.check_network_ports(ip, [22]):
                deregister_command = self.get_remote_command_string(ip=ip, username=username, keyfile=keyfile,
                command="sudo ~/stochss/release-tools/flex-cloud/deregister_flex_vm.sh")
                logging.debug('deregister_command =\n{}'.format(deregister_command))
                os.system(deregister_command)
            else:
                logging.debug('Flex VM is not accessible via SSH, can not execute deregister command')

        except Exception as e:
            logging.exception('Failed to deregister Flex VM: {0}'.format(e))
#            logging.error(sys.exc_info())

        finally:
            VMStateModel.set_state(params=parameters, ins_ids=[self.get_flex_instance_id(public_ip=ip)],
                                   state=VMStateModel.STATE_TERMINATED, description='VM Deregistered.')
Ejemplo n.º 6
0
    def prepare_instances(self,
                          parameters,
                          count=None,
                          security_configured=True):
        """
        prepares the specified number of Flex instances using the parameters
        provided. This method is blocking in that it waits until the
        requested VMs are properly booted up. However if the requested
        VMs cannot be procured within 1800 seconds, this method will treat
        it as an error and return. (Also see documentation for the BaseAgent
        class)

        Args:
          parameters          A dictionary of parameters. This must contain 'keyname',
                              'group', 'image_id' and 'instance_type' parameters.
          security_configured Uses this boolean value as an heuristic to
                              detect brand new AppScale deployments.

        Returns:
          A tuple of the form (instances, public_ips, private_ips)
        """
        logging.debug(
            'flex_agent.prepare_instances() parameters={0}'.format(parameters))
        try:

            flex_cloud_machine_info = parameters[
                self.PARAM_FLEX_CLOUD_MACHINE_INFO]
            logging.debug('flex_cloud_machine_info =\n{}'.format(
                pprint.pformat(flex_cloud_machine_info)))

            queue_head = parameters[self.PARAM_FLEX_QUEUE_HEAD]
            logging.debug('queue_head = {}'.format(queue_head))
            queue_head_keyfile = queue_head['keyfile']
            remote_queue_head_keyfile = os.path.join(
                FlexConfig.QUEUE_HEAD_KEY_DIR,
                os.path.basename(queue_head_keyfile))

            for machine in flex_cloud_machine_info:
                ip = machine['ip']
                keyfile = machine['keyfile']

                os.chmod(keyfile, int('600', 8))

                username = machine['username']
                is_queue_head = machine[self.PARAM_QUEUE_HEAD]
                id = self.get_flex_instance_id(public_ip=ip)

                if not os.path.exists(keyfile):
                    logging.error(
                        'Keyfile: {0} does not exist!'.format(keyfile))
                    VMStateModel.set_state(
                        params=parameters,
                        ins_ids=[id],
                        state=VMStateModel.STATE_FAILED,
                        description=VMStateModel.DESCRI_INVALID_KEYFILE)
                    continue

                logging.debug("[{0}] [{1}] [{2}] [is_queue_head:{3}]".format(
                    ip, keyfile, username, is_queue_head))

                scp_command = \
                    'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format(
                        keyfile=keyfile,
                        source=queue_head_keyfile,
                        target="{username}@{ip}:{remote_queue_head_keyfile}".format(
                            username=username, ip=ip, remote_queue_head_keyfile=remote_queue_head_keyfile
                        )
                    )

                logging.debug(
                    'scp command for queue head keyfile =\n{}'.format(
                        scp_command))
                res = os.system(scp_command)
                if res != 0:
                    logging.error(
                        'scp for queue head keyfile failed!'.format(keyfile))
                    VMStateModel.set_state(
                        params=parameters,
                        ins_ids=[id],
                        state=VMStateModel.STATE_FAILED,
                        description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue

                script_lines = []
                script_lines.append("#!/bin/bash")

                script_lines.append(
                    "echo export STOCHKIT_HOME={0} >> ~/.bashrc".format(
                        "~/stochss/StochKit/"))
                script_lines.append(
                    "echo export STOCHKIT_ODE={0} >> ~/.bashrc".format(
                        "~/stochss/ode/"))
                script_lines.append(
                    "echo export R_LIBS={0} >> ~/.bashrc".format(
                        "~/stochss/stochoptim/library"))
                script_lines.append(
                    "echo export C_FORCE_ROOT=1 >> ~/.bashrc".format(
                        "~/stochss/stochoptim/library"))
                script_lines.append(
                    "chmod 600 {remote_queue_head_keyfile}".format(
                        remote_queue_head_keyfile=remote_queue_head_keyfile))

                if is_queue_head:
                    logging.debug(
                        'Adding extra commands for configuring queue head...')
                    script_lines.append(
                        "sudo rabbitmqctl add_user stochss ucsb")
                    script_lines.append(
                        'sudo rabbitmqctl set_permissions -p / stochss ".*" ".*" ".*"'
                    )

                    reset_mysql_script = '~/stochss/release-tools/flex-cloud/reset_mysql_pwd.sh'
                    script_lines.append(
                        "sudo {reset_mysql_script} root {flex_db_password}".
                        format(reset_mysql_script=reset_mysql_script,
                               flex_db_password=parameters[
                                   self.PARAM_FLEX_DB_PASSWORD]))

                bash_script = '\n'.join(script_lines)
                logging.debug(
                    "\n\n\nbash_script =\n{0}\n\n\n".format(bash_script))

                bash_script_filename = os.path.join(AgentConfig.TMP_DIRNAME,
                                                    'stochss_init.sh')
                with open(bash_script_filename, 'w') as bash_script_file:
                    bash_script_file.write(bash_script)

                scp_command = 'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format(
                    keyfile=keyfile,
                    source=bash_script_filename,
                    target="{username}@{ip}:~/stochss_init.sh".format(
                        username=username, ip=ip))

                logging.debug('scp command =\n{}'.format(scp_command))
                res = os.system(scp_command)

                os.remove(bash_script_filename)

                if res != 0:
                    logging.error('scp failed!'.format(keyfile))
                    VMStateModel.set_state(
                        params=parameters,
                        ins_ids=[id],
                        state=VMStateModel.STATE_FAILED,
                        description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue

                commands = ['chmod +x ~/stochss_init.sh', '~/stochss_init.sh']
                command = ';'.join(commands)

                remote_command_string = self.get_remote_command_string(
                    ip=ip, username=username, keyfile=keyfile, command=command)

                logging.debug('remote_command_string =\n{}'.format(
                    remote_command_string))
                res = os.system(remote_command_string)

                if res != 0:
                    logging.error('remote command failed!'.format(keyfile))
                    VMStateModel.set_state(
                        params=parameters,
                        ins_ids=[id],
                        state=VMStateModel.STATE_FAILED,
                        description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue
        except Exception as e:
            logging.exception(e)
            raise
    def __configure_celery(self, params, public_ips, instance_ids):
        '''
        Private method used for uploading the current celery configuration to each instance 
        that is running and ssh connectable.
        
        Args
            parameters      A dictionary of parameters
            public_ips      A list of public ips that are going to be configed
            instance_ids    A list of instance_ids that are used for terminating instances and update
                            database if fail on configuration by some reason  
        '''
        # Update celery config file...it should have the correct IP
        # of the Queue head node, which should already be running.
        # Pass it line by line so theres no weird formatting errors from
        # trying to echo a multi-line file directly on the command line

        key_file = os.path.join(os.path.dirname(__file__), '..',
                                '{0}.key'.format(params['keyname']))
        logging.debug("key_file = {0}".format(key_file))

        if not os.path.exists(key_file):
            raise Exception(
                "ssh key_file file not found: {0}".format(key_file))

        credentials = params['credentials']

        commands = []
        commands.append('source /home/ubuntu/.bashrc')
        commands.append('export AWS_ACCESS_KEY_ID={0}'.format(
            str(credentials['EC2_ACCESS_KEY'])))
        commands.append('export AWS_SECRET_ACCESS_KEY={0}'.format(
            str(credentials['EC2_SECRET_KEY'])))

        for ip, ins_id in zip(public_ips, instance_ids):
            # helper.wait_for_ssh_connection(key_file, ip)
            ins_type = VMStateModel.get_instance_type(params, ins_id)
            commands.append('export INSTANCE_TYPE={0}'.format(ins_type))
            success = helper.start_celery_on_vm(instance_type=ins_type,
                                                ip=ip,
                                                key_file=key_file,
                                                agent_type=self.agent_type,
                                                worker_name=ip.replace(
                                                    '.', '_'),
                                                prepend_commands=commands)
            if success == 0:
                # update db with successful running vms
                logging.info("celery started! ")
                logging.info("host ip: {0}".format(ip))
                VMStateModel.set_state(params, [ins_id],
                                       VMStateModel.STATE_RUNNING,
                                       VMStateModel.DESCRI_SUCCESS)
            else:
                self.agent.deregister_some_instances(params, [ins_id])
                VMStateModel.set_state(
                    params, [ins_id], VMStateModel.STATE_FAILED,
                    VMStateModel.DESCRI_FAIL_TO_COFIGURE_CELERY)
                raise Exception("Failure to start celery on {0}".format(ip))

        # get all intstance types and configure the celeryconfig.py locally
        instance_types = VMStateModel.get_running_instance_types(params)
        helper.config_celery_queues(agent_type=self.agent_type,
                                    instance_types=instance_types)
    def __poll_instances_status(self, num_vms, parameters):
        '''
        Private method that working on polling the state of instances that have already spawned 
        every some time and checking the ssh connectability if they are running.
        
        Args
            num_vms         Number of virtual machines that are needed to be polling
            parameters      A dictionary of parameters
            
        Return
            A turple of (public ips, private ips, instance ids). Each of the three is a list
        '''
        logging.info('Start polling task for infrastructure = {0}'.format(
            parameters['infrastructure']))

        ins_ids = self.agent.describe_instances_launched(parameters)
        logging.info("ins_ids = {0}".format(ins_ids))

        # update db with new instance ids and 'pending'
        VMStateModel.update_ins_ids(parameters,
                                    ins_ids,
                                    self.reservation_id,
                                    from_state=VMStateModel.STATE_CREATING,
                                    to_state=VMStateModel.STATE_PENDING)

        public_ips = None
        private_ips = None
        instance_ids = None
        keyfiles = None

        for x in xrange(EC2BackendWorker.POLL_COUNT):
            # get the ips and ids of this keyname
            public_ips, private_ips, instance_ids, instance_types, keyfiles = self.agent.describe_instances_running(
                parameters)

            logging.info("public_ips = {0}".format(public_ips))
            logging.debug("private_ips = {0}".format(private_ips))
            logging.info("instance_ids = {0}".format(instance_ids))
            logging.info("instance_types = {0}".format(instance_types))
            logging.info("keyfiles = {0}".format(keyfiles))

            # if we get the requested number of vms (the requested number will be 1 if this is queue head),
            # update reservation information and send a message to the backend server
            if num_vms == len(public_ips):
                # update db with new public ips and private ips
                VMStateModel.update_ips(parameters, instance_ids, public_ips,
                                        private_ips, instance_types, keyfiles)
                break

            else:
                if x < EC2BackendWorker.POLL_COUNT - 1:
                    time.sleep(EC2BackendWorker.POLL_WAIT_TIME)
                    logging.info('Polling task: sleep 5 seconds...')

                else:
                    VMStateModel.update_ips(parameters, instance_ids,
                                            public_ips, private_ips,
                                            instance_types, keyfiles)

                    logging.info(
                        'Polling timeout. About to terminate some instances:')
                    terminate_ins_ids = []
                    for ins_id in ins_ids:
                        if ins_id not in instance_ids:
                            logging.info(
                                'instance {0} to be terminated'.format(ins_id))
                            terminate_ins_ids.append(ins_id)
                    # terminate timeout instances
                    self.agent.deregister_some_instances(
                        parameters, terminate_ins_ids)
                    # update db with failure information
                    VMStateModel.set_state(parameters, terminate_ins_ids,
                                           VMStateModel.STATE_FAILED,
                                           VMStateModel.DESCRI_FAIL_TO_RUN)

        return public_ips, private_ips, instance_ids
Ejemplo n.º 9
0
    def prepare_instances(self, parameters, count=None, security_configured=True):
        """
        prepares the specified number of Flex instances using the parameters
        provided. This method is blocking in that it waits until the
        requested VMs are properly booted up. However if the requested
        VMs cannot be procured within 1800 seconds, this method will treat
        it as an error and return. (Also see documentation for the BaseAgent
        class)

        Args:
          parameters          A dictionary of parameters. This must contain 'keyname',
                              'group', 'image_id' and 'instance_type' parameters.
          security_configured Uses this boolean value as an heuristic to
                              detect brand new AppScale deployments.

        Returns:
          A tuple of the form (instances, public_ips, private_ips)
        """
        logging.debug('flex_agent.prepare_instances() parameters={0}'.format(parameters))
        try:

            flex_cloud_machine_info = parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]
            logging.debug('flex_cloud_machine_info =\n{}'.format(pprint.pformat(flex_cloud_machine_info)))

            queue_head = parameters[self.PARAM_FLEX_QUEUE_HEAD]
            logging.debug('queue_head = {}'.format(queue_head))
            queue_head_keyfile = queue_head['keyfile']
            remote_queue_head_keyfile = os.path.join(FlexConfig.QUEUE_HEAD_KEY_DIR,
                                                     os.path.basename(queue_head_keyfile))

            for machine in flex_cloud_machine_info:
                ip = machine['ip']
                keyfile = machine['keyfile']

                os.chmod(keyfile, int('600', 8))

                username = machine['username']
                is_queue_head = machine[self.PARAM_QUEUE_HEAD]
                id = self.get_flex_instance_id(public_ip=ip)

                if not os.path.exists(keyfile):
                    logging.error('Keyfile: {0} does not exist!'.format(keyfile))
                    VMStateModel.set_state(params=parameters, ins_ids=[id],
                                           state=VMStateModel.STATE_FAILED,
                                           description=VMStateModel.DESCRI_INVALID_KEYFILE)
                    continue

                logging.debug("[{0}] [{1}] [{2}] [is_queue_head:{3}]".format(ip, keyfile, username, is_queue_head))

                scp_command = \
                    'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format(
                        keyfile=keyfile,
                        source=queue_head_keyfile,
                        target="{username}@{ip}:{remote_queue_head_keyfile}".format(
                            username=username, ip=ip, remote_queue_head_keyfile=remote_queue_head_keyfile
                        )
                    )

                logging.debug('scp command for queue head keyfile =\n{}'.format(scp_command))
                res = os.system(scp_command)
                if res != 0:
                    logging.error('scp for queue head keyfile failed!'.format(keyfile))
                    VMStateModel.set_state(params=parameters, ins_ids=[id],
                                           state=VMStateModel.STATE_FAILED,
                                           description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue

                script_lines = []
                script_lines.append("#!/bin/bash")

                script_lines.append("echo export STOCHKIT_HOME={0} >> ~/.bashrc".format("~/stochss/StochKit/"))
                script_lines.append("echo export STOCHKIT_ODE={0} >> ~/.bashrc".format("~/stochss/ode/"))
                script_lines.append("echo export R_LIBS={0} >> ~/.bashrc".format("~/stochss/stochoptim/library"))
                script_lines.append("echo export C_FORCE_ROOT=1 >> ~/.bashrc".format("~/stochss/stochoptim/library"))
                script_lines.append("chmod 600 {remote_queue_head_keyfile}".format(
                                                            remote_queue_head_keyfile=remote_queue_head_keyfile))

                if is_queue_head:
                    logging.debug('Adding extra commands for configuring queue head...')
                    script_lines.append("sudo rabbitmqctl add_user stochss ucsb")
                    script_lines.append('sudo rabbitmqctl set_permissions -p / stochss ".*" ".*" ".*"')

                    reset_mysql_script = '~/stochss/release-tools/flex-cloud/reset_mysql_pwd.sh'
                    script_lines.append("sudo {reset_mysql_script} root {flex_db_password}".format(
                        reset_mysql_script=reset_mysql_script,
                        flex_db_password=parameters[self.PARAM_FLEX_DB_PASSWORD]))

                bash_script = '\n'.join(script_lines)
                logging.debug("\n\n\nbash_script =\n{0}\n\n\n".format(bash_script))

                bash_script_filename = os.path.join(AgentConfig.TMP_DIRNAME, 'stochss_init.sh')
                with open(bash_script_filename, 'w') as bash_script_file:
                    bash_script_file.write(bash_script)

                scp_command = 'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format(
                    keyfile=keyfile,
                    source=bash_script_filename,
                    target="{username}@{ip}:~/stochss_init.sh".format(username=username,
                                                                      ip=ip))

                logging.debug('scp command =\n{}'.format(scp_command))
                res = os.system(scp_command)

                os.remove(bash_script_filename)

                if res != 0:
                    logging.error('scp failed!'.format(keyfile))
                    VMStateModel.set_state(params=parameters, ins_ids=[id],
                                           state=VMStateModel.STATE_FAILED,
                                           description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue

                commands = ['chmod +x ~/stochss_init.sh',
                            '~/stochss_init.sh']
                command = ';'.join(commands)

                remote_command_string = self.get_remote_command_string(ip=ip, username=username,
                                                                       keyfile=keyfile, command=command)

                logging.debug('remote_command_string =\n{}'.format(remote_command_string))
                res = os.system(remote_command_string)

                if res != 0:
                    logging.error('remote command failed!'.format(keyfile))
                    VMStateModel.set_state(params=parameters, ins_ids=[id],
                                           state=VMStateModel.STATE_FAILED,
                                           description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue
        except Exception as e:
            logging.exception(e)
            raise
Ejemplo n.º 10
0
    def __poll_instances_status(self, num_vms, parameters):
        """
        Private method that working on polling the state of instances that have already spawned 
        every some time and checking the ssh connectability if they are running.
        
        Args
            num_vms         Number of virtual machines that are needed to be polling
            parameters      A dictionary of parameters
            
        Return
            A turple of (public ips, private ips, instance ids). Each of the three is a list
        """
        logging.info("Start polling task for infrastructure = {0}".format(parameters["infrastructure"]))

        ins_ids = self.agent.describe_instances_launched(parameters)
        logging.info("ins_ids = {0}".format(ins_ids))

        # update db with new instance ids and 'pending'
        VMStateModel.update_ins_ids(
            parameters,
            ins_ids,
            self.reservation_id,
            from_state=VMStateModel.STATE_CREATING,
            to_state=VMStateModel.STATE_PENDING,
        )

        public_ips = None
        private_ips = None
        instance_ids = None
        keyfiles = None

        for x in xrange(EC2BackendWorker.POLL_COUNT):
            # get the ips and ids of this keyname
            public_ips, private_ips, instance_ids, instance_types, keyfiles = self.agent.describe_instances_running(
                parameters
            )

            logging.info("public_ips = {0}".format(public_ips))
            logging.debug("private_ips = {0}".format(private_ips))
            logging.info("instance_ids = {0}".format(instance_ids))
            logging.info("instance_types = {0}".format(instance_types))
            logging.info("keyfiles = {0}".format(keyfiles))

            # if we get the requested number of vms (the requested number will be 1 if this is queue head),
            # update reservation information and send a message to the backend server
            if num_vms == len(public_ips):
                # update db with new public ips and private ips
                VMStateModel.update_ips(parameters, instance_ids, public_ips, private_ips, instance_types, keyfiles)
                break

            else:
                if x < EC2BackendWorker.POLL_COUNT - 1:
                    time.sleep(EC2BackendWorker.POLL_WAIT_TIME)
                    logging.info("Polling task: sleep 5 seconds...")

                else:
                    VMStateModel.update_ips(parameters, instance_ids, public_ips, private_ips, instance_types, keyfiles)

                    logging.info("Polling timeout. About to terminate some instances:")
                    terminate_ins_ids = []
                    for ins_id in ins_ids:
                        if ins_id not in instance_ids:
                            logging.info("instance {0} to be terminated".format(ins_id))
                            terminate_ins_ids.append(ins_id)
                    # terminate timeout instances
                    self.agent.deregister_some_instances(parameters, terminate_ins_ids)
                    # update db with failure information
                    VMStateModel.set_state(
                        parameters, terminate_ins_ids, VMStateModel.STATE_FAILED, VMStateModel.DESCRI_FAIL_TO_RUN
                    )

        return public_ips, private_ips, instance_ids