def __deregister_flex_vm(self,
                             ip,
                             username,
                             keyfile,
                             parameters,
                             queue_head_ip,
                             force=False):

        try:
            if self.check_network_ports(ip, [22]):
                deregister_command = self.get_remote_command_string(
                    ip=ip,
                    username=username,
                    keyfile=keyfile,
                    command=
                    "sudo ~/stochss/release-tools/flex-cloud/deregister_flex_vm.sh"
                )
                logging.debug(
                    'deregister_command =\n{}'.format(deregister_command))
                os.system(deregister_command)
            else:
                logging.debug(
                    'Flex VM is not accessible via SSH, can not execute deregister command'
                )

        except Exception as e:
            logging.exception('Failed to deregister Flex VM: {0}'.format(e))
#            logging.error(sys.exc_info())

        finally:
            VMStateModel.set_state(
                params=parameters,
                ins_ids=[self.get_flex_instance_id(public_ip=ip)],
                state=VMStateModel.STATE_TERMINATED,
                description='VM Deregistered.')
 def _run(self):
     logging.debug(
         'SynchronizeDB._run() thread_id={0} agent_type={1} parameters={2}'.
         format(self.thread_id, self.agent_type, self.parameters))
     self.is_start = False
     VMStateModel.synchronize(agent=self.agent, parameters=self.parameters)
     if self.update_vm_state_db():
         self._start()
Example #3
0
 def _run(self):
     logging.debug(
         "SynchronizeDB._run() thread_id={0} agent_type={1} parameters={2}".format(
             self.thread_id, self.agent_type, self.parameters
         )
     )
     self.is_start = False
     VMStateModel.synchronize(agent=self.agent, parameters=self.parameters)
     if self.update_vm_state_db():
         self._start()
    def synchronize_db(self, params, force=False):
        logging.debug('synchronize_db(force={0}) param={1}'.format(
            force, params))
        last_time = None
        set_gap_large = False
        try:
            e = db.GqlQuery("SELECT * FROM VMStateSyn").get()
            if e:
                last_time = e.last_syn
            else:
                last_time = datetime.datetime.now() - datetime.timedelta(1)
        except Exception as e:
            logging.error(
                'Error: have errors in opening db_syn file. {0}'.format(e))
            return

        if last_time is None:
            raise Exception(
                'Error: cannot read last synchronization information of db!')

        else:
            now = datetime.datetime.now()
            delta = now - last_time
            gap = delta.total_seconds()

            logging.info('Time now: {0}'.format(now))
            logging.info('Time last synchronization: {0}'.format(last_time))
            logging.info('Time in between: {0}'.format(gap))

            infrastructure = params[self.PARAM_INFRASTRUCTURE]
            agent = self.agent_factory.create_agent(infrastructure)

            if force:
                VMStateModel.synchronize(agent=agent, parameters=params)

            if gap < backend_handler.SynchronizeDB.PAUSE + 1:
                logging.info('Less than {0} seconds to synchronize db.'.format(
                    backend_handler.SynchronizeDB.PAUSE))
                return

            logging.info('Start synchronize db every {0} seconds.'.format(
                backend_handler.SynchronizeDB.PAUSE))

            from_fields = {
                'op': 'start_db_syn',
                'agent': pickle.dumps(agent),
                'parameters': pickle.dumps(params),
            }

            logging.info('\n\nAdding db syn task for agent = {}'.format(
                agent.AGENT_NAME))
            taskqueue.add(url=InfrastructureManager.BACKEND_QUEUE_URL,
                          params=from_fields,
                          method='GET')
    def __verify_ec2_instances_via_ssh(self, instance_ids, parameters,
                                       public_ips):
        keyfile = os.path.join(os.path.dirname(__file__), '..',
                               '{0}.key'.format(parameters['keyname']))
        logging.info('keyfile = {0}'.format(keyfile))

        if not os.path.exists(keyfile):
            raise Exception("ssh keyfile file not found: {0}".format(keyfile))

        connected_public_ips = []
        connected_instance_ids = []

        for (pub_ip, ins_id) in zip(public_ips, instance_ids):
            logging.info('connecting to {0}...'.format(pub_ip))
            success = helper.wait_for_ssh_connection(key_file=keyfile,
                                                     ip=pub_ip)

            if success == True:
                logging.info('{0} is successfully added'.format(pub_ip))
                connected_public_ips.append(pub_ip)
                connected_instance_ids.append(ins_id)

        # if there are some vms not able to be connected via ssh,
        # just shut them down explicitly
        if len(public_ips) != len(connected_public_ips):
            logging.info(
                'Time out on ssh to {0} instances. They will be terminated.'.
                format(len(public_ips) - len(connected_public_ips)))

            try:
                terminate_ins_ids = []
                for ins_id in instance_ids:
                    if ins_id not in connected_instance_ids:
                        terminate_ins_ids.append(ins_id)
                self.agent.deregister_some_instances(parameters,
                                                     terminate_ins_ids)
                # update db with failed vms
                VMStateModel.set_state(parameters, terminate_ins_ids,
                                       VMStateModel.STATE_FAILED,
                                       VMStateModel.DESCRI_TIMEOUT_ON_SSH)
            except:
                raise Exception(
                    "Errors in terminating instances that cannot be connected via ssh."
                )

        public_ips = None
        instance_ids = None

        return connected_public_ips, connected_instance_ids
    def synchronize_db(self, params, force=False):
        logging.debug('synchronize_db(force={0}) param={1}'.format(force, params))
        last_time = None
        set_gap_large = False
        try:
            e = db.GqlQuery("SELECT * FROM VMStateSyn").get()
            if e:
                last_time = e.last_syn
            else:
                last_time = datetime.datetime.now() - datetime.timedelta(1)
        except Exception as e:
            logging.error('Error: have errors in opening db_syn file. {0}'.format(e))
            return

        if last_time is None:
            raise Exception('Error: cannot read last synchronization information of db!')

        else:
            now = datetime.datetime.now()
            delta = now - last_time
            gap = delta.total_seconds()

            logging.info('Time now: {0}'.format(now))
            logging.info('Time last synchronization: {0}'.format(last_time))
            logging.info('Time in between: {0}'.format(gap))

            infrastructure = params[self.PARAM_INFRASTRUCTURE]
            agent = self.agent_factory.create_agent(infrastructure)

            if force:
                VMStateModel.synchronize(agent = agent, parameters = params)

            if gap < backend_handler.SynchronizeDB.PAUSE + 1:
                logging.info('Less than {0} seconds to synchronize db.'.format(backend_handler.SynchronizeDB.PAUSE))
                return

            logging.info('Start synchronize db every {0} seconds.'.format(backend_handler.SynchronizeDB.PAUSE))

            from_fields = {
                'op': 'start_db_syn',
                'agent': pickle.dumps(agent),
                'parameters': pickle.dumps(params),
            }

            logging.info('\n\nAdding db syn task for agent = {}'.format(agent.AGENT_NAME))
            taskqueue.add(url=InfrastructureManager.BACKEND_QUEUE_URL, params=from_fields, method='GET')
Example #7
0
    def deregister_flex_cloud(self, user_id):
        logging.debug('deregister_flex_cloud')

        service = backendservices(
            self.user_data)  #infrastructure=AgentTypes.FLEX)
        credentials = self.user_data.getCredentials()
        params = {
            'infrastructure': AgentTypes.FLEX,
            'flex_cloud_machine_info':
            self.user_data.get_flex_cloud_machine_info(),
            'flex_queue_head': self.user_data.get_flex_queue_head_machine(),
            'key_prefix': '',  # no prefix
            'keyname': '',
            'email': [user_id],
            'credentials': credentials,
            'user_id': user_id,
            'reservation_id': self.user_data.reservation_id
        }
        self.user_data.flex_cloud_status = True
        self.user_data.flex_cloud_info_msg = 'Stopping Flex Cloud'
        self.user_data.put()

        result = service.deregister_flex_cloud(parameters=params,
                                               blocking=True)

        if result == True:
            logging.debug('deregister_flex_cloud succeeded!')
            self.user_data.valid_flex_cloud_info = False
            self.user_data.is_flex_cloud_info_set = False

            self.user_data.reservation_id = None
            self.user_data.flex_db_password = None
            self.user_data.flex_cloud_status = True
            self.user_data.flex_cloud_info_msg = 'Flex Cloud Stopped'
            self.user_data.put()
        else:
            logging.error('deregister_flex_cloud failed!')
            self.user_data.flex_cloud_status = True
            self.user_data.flex_cloud_info_msg = 'Error when deregistering Flex Cloud'
            self.user_data.put()

        logging.debug("Cleaning up old flex-cloud entries in the DB")
        VMStateModel.cleanup_flex_old_flex_entries(user_id)

        self.redirect('/flexCloudCredentials')
Example #8
0
    def __verify_ec2_instances_via_ssh(self, instance_ids, parameters, public_ips):
        keyfile = os.path.join(os.path.dirname(__file__), "..", "{0}.key".format(parameters["keyname"]))
        logging.info("keyfile = {0}".format(keyfile))

        if not os.path.exists(keyfile):
            raise Exception("ssh keyfile file not found: {0}".format(keyfile))

        connected_public_ips = []
        connected_instance_ids = []

        for (pub_ip, ins_id) in zip(public_ips, instance_ids):
            logging.info("connecting to {0}...".format(pub_ip))
            success = helper.wait_for_ssh_connection(key_file=keyfile, ip=pub_ip)

            if success == True:
                logging.info("{0} is successfully added".format(pub_ip))
                connected_public_ips.append(pub_ip)
                connected_instance_ids.append(ins_id)

        # if there are some vms not able to be connected via ssh,
        # just shut them down explicitly
        if len(public_ips) != len(connected_public_ips):
            logging.info(
                "Time out on ssh to {0} instances. They will be terminated.".format(
                    len(public_ips) - len(connected_public_ips)
                )
            )

            try:
                terminate_ins_ids = []
                for ins_id in instance_ids:
                    if ins_id not in connected_instance_ids:
                        terminate_ins_ids.append(ins_id)
                self.agent.deregister_some_instances(parameters, terminate_ins_ids)
                # update db with failed vms
                VMStateModel.set_state(
                    parameters, terminate_ins_ids, VMStateModel.STATE_FAILED, VMStateModel.DESCRI_TIMEOUT_ON_SSH
                )
            except:
                raise Exception("Errors in terminating instances that cannot be connected via ssh.")

        public_ips = None
        instance_ids = None

        return connected_public_ips, connected_instance_ids
    def __configure_celery(self, params):
        '''
        Private method used for uploading the current celery configuration to each instance
        that is running and ssh connectable.

        Args
            parameters      A dictionary of parameters
        '''
        # Update celery config file...it should have the correct IP
        # of the Queue head node, which should already be running.
        # Pass it line by line so theres no weird formatting errors from
        # trying to echo a multi-line file directly on the command line

        logging.debug('__configure_celery() params={0}'.format(params))
        flex_cloud_machine_info = params[self.PARAM_FLEX_CLOUD_MACHINE_INFO]

        instance_types = []
        for machine in flex_cloud_machine_info:
            vm = VMStateModel.get_by_ip(
                machine['ip'], reservation_id=params['reservation_id'])
            commands = []
            my_ins_type = 'Unknown'
            commands.append('source ~/.bashrc')
            if vm is None:
                logging.error('VMStateModel.get_by_ip({0}) in None'.format(
                    machine['ip']))
                continue
            else:
                my_ins_type = vm.ins_type
                commands.append('export INSTANCE_TYPE={0}'.format(vm.ins_type))
                if vm.ins_type not in instance_types:
                    instance_types.append(vm.ins_type)

            ip = machine['ip']
            keyfile = machine['keyfile']
            username = machine['username']

            success = helper.start_celery_on_vm(instance_type=my_ins_type,
                                                ip=ip,
                                                key_file=keyfile,
                                                username=username,
                                                agent_type=self.agent_type,
                                                worker_name=ip.replace(
                                                    '.', '_'),
                                                prepend_commands=commands)
            if success == 0:
                # update db with successful running vms
                logging.info("celery started on host ip: {0}".format(ip))

            else:
                raise Exception("Fail to start celery on {0}".format(ip))

        # get all intstance types and configure the celeryconfig.py locally
        logging.info('For local celery setup, instance_types = {0}'.format(
            instance_types))
        helper.config_celery_queues(agent_type=self.agent_type,
                                    instance_types=instance_types)
Example #10
0
    def __deregister_flex_vm(self, ip, username, keyfile, parameters, queue_head_ip, force=False):

        try:
            if self.check_network_ports(ip, [22]):
                deregister_command = self.get_remote_command_string(ip=ip, username=username, keyfile=keyfile,
                command="sudo ~/stochss/release-tools/flex-cloud/deregister_flex_vm.sh")
                logging.debug('deregister_command =\n{}'.format(deregister_command))
                os.system(deregister_command)
            else:
                logging.debug('Flex VM is not accessible via SSH, can not execute deregister command')

        except Exception as e:
            logging.exception('Failed to deregister Flex VM: {0}'.format(e))
#            logging.error(sys.exc_info())

        finally:
            VMStateModel.set_state(params=parameters, ins_ids=[self.get_flex_instance_id(public_ip=ip)],
                                   state=VMStateModel.STATE_TERMINATED, description='VM Deregistered.')
Example #11
0
    def update_flex_cloud_machine_info_from_db(self, service):
        logging.debug('update_flex_cloud_machine_info_from_db')

        if self.is_flex_cloud_info_set:
            flex_cloud_machine_info = self.get_flex_cloud_machine_info()

            if flex_cloud_machine_info is None or len(flex_cloud_machine_info) == 0:
                return

            all_vms = self.__get_all_vms(AgentTypes.FLEX, service)
            #logging.debug('flex: all_vms =\n{0}'.format(pprint.pformat(all_vms)))
            logging.debug('flex: all_vms =\n{0}'.format(all_vms))

            all_vms_map = {vm['pub_ip']: vm for vm in all_vms}
            #logging.debug('flex: all_vms_map =\n{0}'.format(pprint.pformat(all_vms_map)))
            logging.debug('flex: all_vms_map =\n{0}'.format(all_vms_map))

            for machine in flex_cloud_machine_info:
                vms = VMStateModel.get_by_ip(machine['ip'], reservation_id=self.reservation_id)
                if vms is None:
                    logging.debug('machine={0} vms=NONE'.format(machine))
                else:
                    logging.debug('machine={0} vms={1} {2}'.format(machine, vms.pub_ip, vms.state))
                if vms and vms.res_id == self.reservation_id:
                    machine['state'] = vms.state
                    machine['description'] = vms.description
                else:
                    if vms:
                        logging.error('From VMStateModel, reservation_id = {0} != user_data.reservation_id'.format(
                            vms.res_id
                        ))
                    machine['state'] = VMStateModel.STATE_UNKNOWN
                    machine['description'] = VMStateModel.STATE_UNKNOWN

            for machine in flex_cloud_machine_info:
                machine['key_file_id'] = int(machine['key_file_id'])

            logging.debug('After updating from VMStateModel, flex_cloud_machine_info =\n{0}'.format(
                                                                pprint.pformat(flex_cloud_machine_info)))

            # Update Flex Cloud Status
            valid_flex_cloud_info = False
            for machine in flex_cloud_machine_info:
                if machine['queue_head'] and machine['state'] == VMStateModel.STATE_RUNNING:
                    valid_flex_cloud_info = True

            self.valid_flex_cloud_info = valid_flex_cloud_info
            self.set_flex_cloud_machine_info(flex_cloud_machine_info)
            self.put()

            logging.debug('valid_flex_cloud_info = {0}'.format(self.valid_flex_cloud_info))

        else:
            # for clearing out db syn requests
            all_vms = self.__get_all_vms(AgentTypes.FLEX, service)
            logging.debug('flex: all_vms =\n{0}'.format(pprint.pformat(all_vms)))
Example #12
0
    def __create_vm_state_model_entries(self, infrastructure, num_vms,
                                        ec2_secret_key, ec2_access_key,
                                        user_id, reservation_id):
        logging.debug('__create_vm_state_model_entries')
        logging.debug('num_vms = {0} user_id = {1} reservation_id = {2}'.format(num_vms, user_id, reservation_id))

        ids = []
        for _ in xrange(num_vms):
            vm_state = VMStateModel(state=VMStateModel.STATE_CREATING,
                                    infra=infrastructure,
                                    ec2_access_key=ec2_access_key,
                                    ec2_secret_key=ec2_secret_key,
                                    user_id=user_id,
                                    res_id=reservation_id)
            vm_state.put()
            ids.append(vm_state.key().id())

        logging.debug('__create_vm_state_model_entries: ids = {0}'.format(ids))
        return ids
Example #13
0
    def deregister_flex_cloud(self, user_id):
        logging.debug('deregister_flex_cloud')

        service = backendservices(self.user_data) #infrastructure=AgentTypes.FLEX)
        credentials = self.user_data.getCredentials()
        params = {
            'infrastructure': AgentTypes.FLEX,
            'flex_cloud_machine_info': self.user_data.get_flex_cloud_machine_info(),
            'flex_queue_head': self.user_data.get_flex_queue_head_machine(),
            'key_prefix': '', # no prefix
            'keyname': '',
            'email': [user_id],
            'credentials': credentials,
            'user_id': user_id,
            'reservation_id': self.user_data.reservation_id
        }
        self.user_data.flex_cloud_status = True
        self.user_data.flex_cloud_info_msg = 'Stopping Flex Cloud'
        self.user_data.put()

        result = service.deregister_flex_cloud(parameters=params, blocking=True)

        if result == True:
            logging.debug('deregister_flex_cloud succeeded!')
            self.user_data.valid_flex_cloud_info = False
            self.user_data.is_flex_cloud_info_set = False

            self.user_data.reservation_id = None
            self.user_data.flex_db_password = None
            self.user_data.flex_cloud_status = True
            self.user_data.flex_cloud_info_msg = 'Flex Cloud Stopped'
            self.user_data.put()
        else:
            logging.error('deregister_flex_cloud failed!')
            self.user_data.flex_cloud_status = True
            self.user_data.flex_cloud_info_msg = 'Error when deregistering Flex Cloud'
            self.user_data.put()

        logging.debug("Cleaning up old flex-cloud entries in the DB")
        VMStateModel.cleanup_flex_old_flex_entries(user_id)

        self.redirect('/flexCloudCredentials')
Example #14
0
    def __configure_celery(self, params):
        """
        Private method used for uploading the current celery configuration to each instance
        that is running and ssh connectable.

        Args
            parameters      A dictionary of parameters
        """
        # Update celery config file...it should have the correct IP
        # of the Queue head node, which should already be running.
        # Pass it line by line so theres no weird formatting errors from
        # trying to echo a multi-line file directly on the command line

        logging.debug("__configure_celery() params={0}".format(params))
        flex_cloud_machine_info = params[self.PARAM_FLEX_CLOUD_MACHINE_INFO]

        instance_types = []
        for machine in flex_cloud_machine_info:
            vm = VMStateModel.get_by_ip(machine["ip"], reservation_id=params["reservation_id"])
            commands = []
            my_ins_type = "Unknown"
            commands.append("source ~/.bashrc")
            if vm is None:
                logging.error("VMStateModel.get_by_ip({0}) in None".format(machine["ip"]))
                continue
            else:
                my_ins_type = vm.ins_type
                commands.append("export INSTANCE_TYPE={0}".format(vm.ins_type))
                if vm.ins_type not in instance_types:
                    instance_types.append(vm.ins_type)

            ip = machine["ip"]
            keyfile = machine["keyfile"]
            username = machine["username"]

            success = helper.start_celery_on_vm(
                instance_type=my_ins_type,
                ip=ip,
                key_file=keyfile,
                username=username,
                agent_type=self.agent_type,
                worker_name=ip.replace(".", "_"),
                prepend_commands=commands,
            )
            if success == 0:
                # update db with successful running vms
                logging.info("celery started on host ip: {0}".format(ip))

            else:
                raise Exception("Fail to start celery on {0}".format(ip))

        # get all intstance types and configure the celeryconfig.py locally
        logging.info("For local celery setup, instance_types = {0}".format(instance_types))
        helper.config_celery_queues(agent_type=self.agent_type, instance_types=instance_types)
Example #15
0
 def describe_machines_from_db(self, infrastructure, force=False):
     parameters = {
         "infrastructure": infrastructure,
         "credentials": self.get_credentials(),
         "key_prefix": self.user_data.user_id,
         "user_id": self.user_data.user_id,
     }
     if infrastructure == AgentTypes.FLEX:
         parameters['flex_cloud_machine_info'] = self.user_data.get_flex_cloud_machine_info()
         parameters['reservation_id'] = self.user_data.reservation_id
     i = InfrastructureManager()
     i.synchronize_db(parameters, force=force)
     all_vms = VMStateModel.get_all(parameters)
     return all_vms
Example #16
0
    def __configure_celery(self, params, public_ips, instance_ids):
        """
        Private method used for uploading the current celery configuration to each instance 
        that is running and ssh connectable.
        
        Args
            parameters      A dictionary of parameters
            public_ips      A list of public ips that are going to be configed
            instance_ids    A list of instance_ids that are used for terminating instances and update
                            database if fail on configuration by some reason  
        """
        # Update celery config file...it should have the correct IP
        # of the Queue head node, which should already be running.
        # Pass it line by line so theres no weird formatting errors from
        # trying to echo a multi-line file directly on the command line

        key_file = os.path.join(os.path.dirname(__file__), "..", "{0}.key".format(params["keyname"]))
        logging.debug("key_file = {0}".format(key_file))

        if not os.path.exists(key_file):
            raise Exception("ssh key_file file not found: {0}".format(key_file))

        credentials = params["credentials"]

        commands = []
        commands.append("source /home/ubuntu/.bashrc")
        commands.append("export AWS_ACCESS_KEY_ID={0}".format(str(credentials["EC2_ACCESS_KEY"])))
        commands.append("export AWS_SECRET_ACCESS_KEY={0}".format(str(credentials["EC2_SECRET_KEY"])))

        for ip, ins_id in zip(public_ips, instance_ids):
            # helper.wait_for_ssh_connection(key_file, ip)
            ins_type = VMStateModel.get_instance_type(params, ins_id)
            commands.append("export INSTANCE_TYPE={0}".format(ins_type))
            success = helper.start_celery_on_vm(
                instance_type=ins_type,
                ip=ip,
                key_file=key_file,
                agent_type=self.agent_type,
                worker_name=ip.replace(".", "_"),
                prepend_commands=commands,
            )
            if success == 0:
                # update db with successful running vms
                logging.info("celery started! ")
                logging.info("host ip: {0}".format(ip))
                VMStateModel.set_state(params, [ins_id], VMStateModel.STATE_RUNNING, VMStateModel.DESCRI_SUCCESS)
            else:
                self.agent.deregister_some_instances(params, [ins_id])
                VMStateModel.set_state(
                    params, [ins_id], VMStateModel.STATE_FAILED, VMStateModel.DESCRI_FAIL_TO_COFIGURE_CELERY
                )
                raise Exception("Failure to start celery on {0}".format(ip))

        # get all intstance types and configure the celeryconfig.py locally
        instance_types = VMStateModel.get_running_instance_types(params)
        helper.config_celery_queues(agent_type=self.agent_type, instance_types=instance_types)
Example #17
0
    def start_ec2_vms(self, params, blocking=False):
        '''
        This method instantiates EC2 vm instances
        '''
        logging.debug("start_ec2_vms : inside method with params : \n%s", pprint.pformat(params))
        try:
            # make sure that any keynames we use are prefixed with stochss so that
            #we can do a terminate all based on keyname prefix
            key_prefix = AgentConfig.get_agent_key_prefix(agent_type=AgentTypes.EC2,
                                                          key_prefix=params.get('key_prefix', ''))

            key_name = params["keyname"]
            if not key_name.startswith(key_prefix):
                params['keyname'] = key_prefix + key_name

            # NOTE: We are forcing blocking mode within the InfrastructureManager class
            # for the launching of VMs because of how GAE joins on all threads before
            # returning a response from a request.
            i = InfrastructureManager(blocking=blocking)
            res = {}

            # 1. change the status of 'failed' in the previous launch in db to 'terminated' 
            # NOTE: We need to make sure that the RabbitMQ server is running if any compute
            # nodes are running as we are using the AMQP broker option for Celery.

            ins_ids = VMStateModel.terminate_not_active(params)

           # 2. get user_id, infra, ec2 credentials

            user_id = self.__get_required_parameter(parameter_key='user_id', params=params)
            infrastructure = self.__get_required_parameter(parameter_key='infrastructure', params=params)
            reservation_id = self.__get_required_parameter(parameter_key='reservation_id', params=params)

            logging.debug('ec2: reservation_id = {0}'.format(reservation_id))

            if 'credentials' in params:
                if 'EC2_ACCESS_KEY' in params['credentials'] and 'EC2_SECRET_KEY' in params['credentials']:
                    ec2_access_key = params['credentials']['EC2_ACCESS_KEY']
                    ec2_secret_key = params['credentials']['EC2_SECRET_KEY']
                else:
                    raise Exception('VMStateModel ERROR: Cannot get access key or secret.')
            else:
                raise Exception('VMStateModel ERROR: No credentials are provided.')

            if ec2_access_key is None or ec2_secret_key is None:
                raise Exception('VMStateModel ERROR: ec2 credentials are not valid.')

            # 3. create exact number of entities in db for this launch, and set the status to 'creating'
            num_vms = 0
            if 'vms' in params:
                for vm in params['vms']:
                    logging.debug('vm: {0}, num: {1}'.format(vm['instance_type'], vm['num_vms']))
                    num_vms += vm['num_vms']
            if 'head_node' in params:
                num_vms += 1

            logging.debug('num = {0}'.format(num_vms))

            ids = self.__create_vm_state_model_entries(ec2_access_key=ec2_access_key, ec2_secret_key=ec2_secret_key,
                                                       infrastructure=infrastructure, num_vms=num_vms, user_id=user_id,
                                                       reservation_id=reservation_id)

            # 4. Prepare Instances
            params[VMStateModel.IDS] = ids
            res = i.prepare_instances(params)
            
            # 5, check and create stochss table exists if it does not exist
            self.__create_dynamodb_stochss_table(ec2_access_key=ec2_access_key, ec2_secret_key=ec2_secret_key)

            logging.debug("start_ec2_vms : exiting method with result : %s", str(res))
            return True, None

        except Exception as e:
            logging.exception("start_ec2_vms : exiting method with error : {0}".format(str(e)))
            return False, 'Errors occur in starting machines:' + str(e)
Example #18
0
        '''
        key_prefix = AgentConfig.get_agent_key_prefix(agent_type=AgentTypes.EC2,
                                                      key_prefix=params.get('key_prefix', ''))
        try:
            logging.debug("Stopping compute nodes with key_prefix: {0}".format(key_prefix))
            i = InfrastructureManager(blocking=blocking)
            res = i.deregister_instances(parameters=params, terminate=True)
            ret = True

        except Exception, e:
            logging.error("Terminate machine failed with error : %s", str(e))
            ret = False

        finally:
            # update db
            VMStateModel.terminate_all(params)

        return ret

#    def describeMachines(self, params):
#        '''
#        This method gets the status of all the instances
#        '''
#        # add calls to the infrastructure manager for getting details of machines
#        logging.debug("describeMachines() params =\n%s", pprint.pformat(params))
#
#        key_prefix = AgentConfig.get_agent_key_prefix(agent_type=self.infrastructure,
#                                                      key_prefix=params.get('key_prefix', ''))
#        logging.debug('key_prefix = {0}'.format(key_prefix))
#
#        params["key_prefix"] = key_prefix
    def prepare_instances(self,
                          parameters,
                          count=None,
                          security_configured=True):
        """
        prepares the specified number of Flex instances using the parameters
        provided. This method is blocking in that it waits until the
        requested VMs are properly booted up. However if the requested
        VMs cannot be procured within 1800 seconds, this method will treat
        it as an error and return. (Also see documentation for the BaseAgent
        class)

        Args:
          parameters          A dictionary of parameters. This must contain 'keyname',
                              'group', 'image_id' and 'instance_type' parameters.
          security_configured Uses this boolean value as an heuristic to
                              detect brand new AppScale deployments.

        Returns:
          A tuple of the form (instances, public_ips, private_ips)
        """
        logging.debug(
            'flex_agent.prepare_instances() parameters={0}'.format(parameters))
        try:

            flex_cloud_machine_info = parameters[
                self.PARAM_FLEX_CLOUD_MACHINE_INFO]
            logging.debug('flex_cloud_machine_info =\n{}'.format(
                pprint.pformat(flex_cloud_machine_info)))

            queue_head = parameters[self.PARAM_FLEX_QUEUE_HEAD]
            logging.debug('queue_head = {}'.format(queue_head))
            queue_head_keyfile = queue_head['keyfile']
            remote_queue_head_keyfile = os.path.join(
                FlexConfig.QUEUE_HEAD_KEY_DIR,
                os.path.basename(queue_head_keyfile))

            for machine in flex_cloud_machine_info:
                ip = machine['ip']
                keyfile = machine['keyfile']

                os.chmod(keyfile, int('600', 8))

                username = machine['username']
                is_queue_head = machine[self.PARAM_QUEUE_HEAD]
                id = self.get_flex_instance_id(public_ip=ip)

                if not os.path.exists(keyfile):
                    logging.error(
                        'Keyfile: {0} does not exist!'.format(keyfile))
                    VMStateModel.set_state(
                        params=parameters,
                        ins_ids=[id],
                        state=VMStateModel.STATE_FAILED,
                        description=VMStateModel.DESCRI_INVALID_KEYFILE)
                    continue

                logging.debug("[{0}] [{1}] [{2}] [is_queue_head:{3}]".format(
                    ip, keyfile, username, is_queue_head))

                scp_command = \
                    'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format(
                        keyfile=keyfile,
                        source=queue_head_keyfile,
                        target="{username}@{ip}:{remote_queue_head_keyfile}".format(
                            username=username, ip=ip, remote_queue_head_keyfile=remote_queue_head_keyfile
                        )
                    )

                logging.debug(
                    'scp command for queue head keyfile =\n{}'.format(
                        scp_command))
                res = os.system(scp_command)
                if res != 0:
                    logging.error(
                        'scp for queue head keyfile failed!'.format(keyfile))
                    VMStateModel.set_state(
                        params=parameters,
                        ins_ids=[id],
                        state=VMStateModel.STATE_FAILED,
                        description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue

                script_lines = []
                script_lines.append("#!/bin/bash")

                script_lines.append(
                    "echo export STOCHKIT_HOME={0} >> ~/.bashrc".format(
                        "~/stochss/StochKit/"))
                script_lines.append(
                    "echo export STOCHKIT_ODE={0} >> ~/.bashrc".format(
                        "~/stochss/ode/"))
                script_lines.append(
                    "echo export R_LIBS={0} >> ~/.bashrc".format(
                        "~/stochss/stochoptim/library"))
                script_lines.append(
                    "echo export C_FORCE_ROOT=1 >> ~/.bashrc".format(
                        "~/stochss/stochoptim/library"))
                script_lines.append(
                    "chmod 600 {remote_queue_head_keyfile}".format(
                        remote_queue_head_keyfile=remote_queue_head_keyfile))

                if is_queue_head:
                    logging.debug(
                        'Adding extra commands for configuring queue head...')
                    script_lines.append(
                        "sudo rabbitmqctl add_user stochss ucsb")
                    script_lines.append(
                        'sudo rabbitmqctl set_permissions -p / stochss ".*" ".*" ".*"'
                    )

                    reset_mysql_script = '~/stochss/release-tools/flex-cloud/reset_mysql_pwd.sh'
                    script_lines.append(
                        "sudo {reset_mysql_script} root {flex_db_password}".
                        format(reset_mysql_script=reset_mysql_script,
                               flex_db_password=parameters[
                                   self.PARAM_FLEX_DB_PASSWORD]))

                bash_script = '\n'.join(script_lines)
                logging.debug(
                    "\n\n\nbash_script =\n{0}\n\n\n".format(bash_script))

                bash_script_filename = os.path.join(AgentConfig.TMP_DIRNAME,
                                                    'stochss_init.sh')
                with open(bash_script_filename, 'w') as bash_script_file:
                    bash_script_file.write(bash_script)

                scp_command = 'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format(
                    keyfile=keyfile,
                    source=bash_script_filename,
                    target="{username}@{ip}:~/stochss_init.sh".format(
                        username=username, ip=ip))

                logging.debug('scp command =\n{}'.format(scp_command))
                res = os.system(scp_command)

                os.remove(bash_script_filename)

                if res != 0:
                    logging.error('scp failed!'.format(keyfile))
                    VMStateModel.set_state(
                        params=parameters,
                        ins_ids=[id],
                        state=VMStateModel.STATE_FAILED,
                        description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue

                commands = ['chmod +x ~/stochss_init.sh', '~/stochss_init.sh']
                command = ';'.join(commands)

                remote_command_string = self.get_remote_command_string(
                    ip=ip, username=username, keyfile=keyfile, command=command)

                logging.debug('remote_command_string =\n{}'.format(
                    remote_command_string))
                res = os.system(remote_command_string)

                if res != 0:
                    logging.error('remote command failed!'.format(keyfile))
                    VMStateModel.set_state(
                        params=parameters,
                        ins_ids=[id],
                        state=VMStateModel.STATE_FAILED,
                        description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue
        except Exception as e:
            logging.exception(e)
            raise
Example #20
0
    def prepare_instances(self, parameters, count=None, security_configured=True):
        """
        prepares the specified number of Flex instances using the parameters
        provided. This method is blocking in that it waits until the
        requested VMs are properly booted up. However if the requested
        VMs cannot be procured within 1800 seconds, this method will treat
        it as an error and return. (Also see documentation for the BaseAgent
        class)

        Args:
          parameters          A dictionary of parameters. This must contain 'keyname',
                              'group', 'image_id' and 'instance_type' parameters.
          security_configured Uses this boolean value as an heuristic to
                              detect brand new AppScale deployments.

        Returns:
          A tuple of the form (instances, public_ips, private_ips)
        """
        logging.debug('flex_agent.prepare_instances() parameters={0}'.format(parameters))
        try:

            flex_cloud_machine_info = parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]
            logging.debug('flex_cloud_machine_info =\n{}'.format(pprint.pformat(flex_cloud_machine_info)))

            queue_head = parameters[self.PARAM_FLEX_QUEUE_HEAD]
            logging.debug('queue_head = {}'.format(queue_head))
            queue_head_keyfile = queue_head['keyfile']
            remote_queue_head_keyfile = os.path.join(FlexConfig.QUEUE_HEAD_KEY_DIR,
                                                     os.path.basename(queue_head_keyfile))

            for machine in flex_cloud_machine_info:
                ip = machine['ip']
                keyfile = machine['keyfile']

                os.chmod(keyfile, int('600', 8))

                username = machine['username']
                is_queue_head = machine[self.PARAM_QUEUE_HEAD]
                id = self.get_flex_instance_id(public_ip=ip)

                if not os.path.exists(keyfile):
                    logging.error('Keyfile: {0} does not exist!'.format(keyfile))
                    VMStateModel.set_state(params=parameters, ins_ids=[id],
                                           state=VMStateModel.STATE_FAILED,
                                           description=VMStateModel.DESCRI_INVALID_KEYFILE)
                    continue

                logging.debug("[{0}] [{1}] [{2}] [is_queue_head:{3}]".format(ip, keyfile, username, is_queue_head))

                scp_command = \
                    'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format(
                        keyfile=keyfile,
                        source=queue_head_keyfile,
                        target="{username}@{ip}:{remote_queue_head_keyfile}".format(
                            username=username, ip=ip, remote_queue_head_keyfile=remote_queue_head_keyfile
                        )
                    )

                logging.debug('scp command for queue head keyfile =\n{}'.format(scp_command))
                res = os.system(scp_command)
                if res != 0:
                    logging.error('scp for queue head keyfile failed!'.format(keyfile))
                    VMStateModel.set_state(params=parameters, ins_ids=[id],
                                           state=VMStateModel.STATE_FAILED,
                                           description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue

                script_lines = []
                script_lines.append("#!/bin/bash")

                script_lines.append("echo export STOCHKIT_HOME={0} >> ~/.bashrc".format("~/stochss/StochKit/"))
                script_lines.append("echo export STOCHKIT_ODE={0} >> ~/.bashrc".format("~/stochss/ode/"))
                script_lines.append("echo export R_LIBS={0} >> ~/.bashrc".format("~/stochss/stochoptim/library"))
                script_lines.append("echo export C_FORCE_ROOT=1 >> ~/.bashrc".format("~/stochss/stochoptim/library"))
                script_lines.append("chmod 600 {remote_queue_head_keyfile}".format(
                                                            remote_queue_head_keyfile=remote_queue_head_keyfile))

                if is_queue_head:
                    logging.debug('Adding extra commands for configuring queue head...')
                    script_lines.append("sudo rabbitmqctl add_user stochss ucsb")
                    script_lines.append('sudo rabbitmqctl set_permissions -p / stochss ".*" ".*" ".*"')

                    reset_mysql_script = '~/stochss/release-tools/flex-cloud/reset_mysql_pwd.sh'
                    script_lines.append("sudo {reset_mysql_script} root {flex_db_password}".format(
                        reset_mysql_script=reset_mysql_script,
                        flex_db_password=parameters[self.PARAM_FLEX_DB_PASSWORD]))

                bash_script = '\n'.join(script_lines)
                logging.debug("\n\n\nbash_script =\n{0}\n\n\n".format(bash_script))

                bash_script_filename = os.path.join(AgentConfig.TMP_DIRNAME, 'stochss_init.sh')
                with open(bash_script_filename, 'w') as bash_script_file:
                    bash_script_file.write(bash_script)

                scp_command = 'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format(
                    keyfile=keyfile,
                    source=bash_script_filename,
                    target="{username}@{ip}:~/stochss_init.sh".format(username=username,
                                                                      ip=ip))

                logging.debug('scp command =\n{}'.format(scp_command))
                res = os.system(scp_command)

                os.remove(bash_script_filename)

                if res != 0:
                    logging.error('scp failed!'.format(keyfile))
                    VMStateModel.set_state(params=parameters, ins_ids=[id],
                                           state=VMStateModel.STATE_FAILED,
                                           description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue

                commands = ['chmod +x ~/stochss_init.sh',
                            '~/stochss_init.sh']
                command = ';'.join(commands)

                remote_command_string = self.get_remote_command_string(ip=ip, username=username,
                                                                       keyfile=keyfile, command=command)

                logging.debug('remote_command_string =\n{}'.format(remote_command_string))
                res = os.system(remote_command_string)

                if res != 0:
                    logging.error('remote command failed!'.format(keyfile))
                    VMStateModel.set_state(params=parameters, ins_ids=[id],
                                           state=VMStateModel.STATE_FAILED,
                                           description=VMStateModel.DESCRI_FAIL_TO_PREPARE)
                    continue
        except Exception as e:
            logging.exception(e)
            raise
    def prepare_vms(self, parameters):
        logging.debug('prepare_vms(): parameters={0}'.format(parameters))

        queue_head_machine = parameters[self.PARAM_FLEX_QUEUE_HEAD]

        user_data = self.__get_user_data(parameters['user_id'])

        if self.PARAM_FLEX_CLOUD_MACHINE_INFO not in parameters \
                or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == None \
                or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == []:

            logging.error('Error: No {0} param!'.format(
                self.PARAM_FLEX_CLOUD_MACHINE_INFO))
            # Report Error
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Invalid Parameters'
            user_data.put()
            return

        flex_cloud_machine_info = parameters[
            self.PARAM_FLEX_CLOUD_MACHINE_INFO]

        # Set the user message to "configuring..."
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = 'Flex Cloud configured. Waiting for workers to become available...'
        user_data.put()

        # Initialize the VMstateModel db
        all_accessible = True
        for machine in flex_cloud_machine_info:
            if self.agent.check_network_ports(machine['ip'], [22, 443]):
                state = VMStateModel.STATE_ACCESSIBLE
            else:
                state = VMStateModel.STATE_INACCESSIBLE
                all_accessible = False
            vm_state = VMStateModel(state=state,
                                    infra=self.agent_type,
                                    ins_type=FlexConfig.INSTANCE_TYPE,
                                    pri_ip=machine['ip'],
                                    pub_ip=machine['ip'],
                                    username=machine['username'],
                                    keyfile=machine['keyfile'],
                                    ins_id=self.agent.get_flex_instance_id(
                                        machine['ip']),
                                    user_id=parameters['user_id'],
                                    res_id=self.reservation_id)
            vm_state.put()

        if not all_accessible:
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Error: not all workers are accessible'
            user_data.put()
            return

        if queue_head_machine == None or not helper.wait_for_ssh_connection(
                queue_head_machine['keyfile'],
                queue_head_machine['ip'],
                username=queue_head_machine['username']):
            logging.error(
                'Found no viable ssh-able/running queue head machine!')
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Error: Can not connect {0} (queue head) via SSH'.format(
                queue_head_machine['ip'])
            user_data.put()
            return

        if not self.__prepare_queue_head(queue_head_machine, parameters):
            logging.error('Error: could not prepare queue head!')
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Error preparing the queue head'
            user_data.put()
            return

        flex_cloud_workers = []
        for machine in parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]:
            if machine[self.PARAM_IS_QUEUE_HEAD] != True:
                if helper.wait_for_ssh_connection(
                        machine['keyfile'],
                        machine['ip'],
                        username=machine['username']):
                    flex_cloud_workers.append(machine)
                else:
                    # Report Failure
                    user_data.flex_cloud_status = False
                    user_data.flex_cloud_info_msg = 'Error: Can not connect to {0} via SSH'.format(
                        machine['ip'])
                    user_data.put()
                    return

            if len(flex_cloud_workers) > 0:
                logging.debug(
                    'Preparing workers: {0}'.format(flex_cloud_workers))
                params = {
                    'infrastructure':
                    AgentTypes.FLEX,
                    self.PARAM_FLEX_CLOUD_MACHINE_INFO:
                    flex_cloud_workers,
                    'credentials':
                    parameters['credentials'],
                    'user_id':
                    parameters['user_id'],
                    self.PARAM_FLEX_QUEUE_HEAD:
                    parameters[self.PARAM_FLEX_QUEUE_HEAD],
                    'reservation_id':
                    parameters['reservation_id']
                }
                self.agent.prepare_instances(params)

        helper.update_celery_config_with_queue_head_ip(
            queue_head_ip=queue_head_machine['ip'], agent_type=self.agent_type)

        self.__configure_celery(params=parameters)

        # Report Success
        logging.debug('Flex Cloud Deployed')
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = 'Flex Cloud Deployed'
        user_data.put()

        # Force the update of the instance status
        VMStateModel.synchronize(agent=self.agent, parameters=parameters)

        return
Example #22
0
    def __poll_instances_status(self, num_vms, parameters):
        """
        Private method that working on polling the state of instances that have already spawned 
        every some time and checking the ssh connectability if they are running.
        
        Args
            num_vms         Number of virtual machines that are needed to be polling
            parameters      A dictionary of parameters
            
        Return
            A turple of (public ips, private ips, instance ids). Each of the three is a list
        """
        logging.info("Start polling task for infrastructure = {0}".format(parameters["infrastructure"]))

        ins_ids = self.agent.describe_instances_launched(parameters)
        logging.info("ins_ids = {0}".format(ins_ids))

        # update db with new instance ids and 'pending'
        VMStateModel.update_ins_ids(
            parameters,
            ins_ids,
            self.reservation_id,
            from_state=VMStateModel.STATE_CREATING,
            to_state=VMStateModel.STATE_PENDING,
        )

        public_ips = None
        private_ips = None
        instance_ids = None
        keyfiles = None

        for x in xrange(EC2BackendWorker.POLL_COUNT):
            # get the ips and ids of this keyname
            public_ips, private_ips, instance_ids, instance_types, keyfiles = self.agent.describe_instances_running(
                parameters
            )

            logging.info("public_ips = {0}".format(public_ips))
            logging.debug("private_ips = {0}".format(private_ips))
            logging.info("instance_ids = {0}".format(instance_ids))
            logging.info("instance_types = {0}".format(instance_types))
            logging.info("keyfiles = {0}".format(keyfiles))

            # if we get the requested number of vms (the requested number will be 1 if this is queue head),
            # update reservation information and send a message to the backend server
            if num_vms == len(public_ips):
                # update db with new public ips and private ips
                VMStateModel.update_ips(parameters, instance_ids, public_ips, private_ips, instance_types, keyfiles)
                break

            else:
                if x < EC2BackendWorker.POLL_COUNT - 1:
                    time.sleep(EC2BackendWorker.POLL_WAIT_TIME)
                    logging.info("Polling task: sleep 5 seconds...")

                else:
                    VMStateModel.update_ips(parameters, instance_ids, public_ips, private_ips, instance_types, keyfiles)

                    logging.info("Polling timeout. About to terminate some instances:")
                    terminate_ins_ids = []
                    for ins_id in ins_ids:
                        if ins_id not in instance_ids:
                            logging.info("instance {0} to be terminated".format(ins_id))
                            terminate_ins_ids.append(ins_id)
                    # terminate timeout instances
                    self.agent.deregister_some_instances(parameters, terminate_ins_ids)
                    # update db with failure information
                    VMStateModel.set_state(
                        parameters, terminate_ins_ids, VMStateModel.STATE_FAILED, VMStateModel.DESCRI_FAIL_TO_RUN
                    )

        return public_ips, private_ips, instance_ids
    def __configure_celery(self, params, public_ips, instance_ids):
        '''
        Private method used for uploading the current celery configuration to each instance 
        that is running and ssh connectable.
        
        Args
            parameters      A dictionary of parameters
            public_ips      A list of public ips that are going to be configed
            instance_ids    A list of instance_ids that are used for terminating instances and update
                            database if fail on configuration by some reason  
        '''
        # Update celery config file...it should have the correct IP
        # of the Queue head node, which should already be running.
        # Pass it line by line so theres no weird formatting errors from
        # trying to echo a multi-line file directly on the command line

        key_file = os.path.join(os.path.dirname(__file__), '..',
                                '{0}.key'.format(params['keyname']))
        logging.debug("key_file = {0}".format(key_file))

        if not os.path.exists(key_file):
            raise Exception(
                "ssh key_file file not found: {0}".format(key_file))

        credentials = params['credentials']

        commands = []
        commands.append('source /home/ubuntu/.bashrc')
        commands.append('export AWS_ACCESS_KEY_ID={0}'.format(
            str(credentials['EC2_ACCESS_KEY'])))
        commands.append('export AWS_SECRET_ACCESS_KEY={0}'.format(
            str(credentials['EC2_SECRET_KEY'])))

        for ip, ins_id in zip(public_ips, instance_ids):
            # helper.wait_for_ssh_connection(key_file, ip)
            ins_type = VMStateModel.get_instance_type(params, ins_id)
            commands.append('export INSTANCE_TYPE={0}'.format(ins_type))
            success = helper.start_celery_on_vm(instance_type=ins_type,
                                                ip=ip,
                                                key_file=key_file,
                                                agent_type=self.agent_type,
                                                worker_name=ip.replace(
                                                    '.', '_'),
                                                prepend_commands=commands)
            if success == 0:
                # update db with successful running vms
                logging.info("celery started! ")
                logging.info("host ip: {0}".format(ip))
                VMStateModel.set_state(params, [ins_id],
                                       VMStateModel.STATE_RUNNING,
                                       VMStateModel.DESCRI_SUCCESS)
            else:
                self.agent.deregister_some_instances(params, [ins_id])
                VMStateModel.set_state(
                    params, [ins_id], VMStateModel.STATE_FAILED,
                    VMStateModel.DESCRI_FAIL_TO_COFIGURE_CELERY)
                raise Exception("Failure to start celery on {0}".format(ip))

        # get all intstance types and configure the celeryconfig.py locally
        instance_types = VMStateModel.get_running_instance_types(params)
        helper.config_celery_queues(agent_type=self.agent_type,
                                    instance_types=instance_types)
Example #24
0
    def prepare_vms(self, parameters):
        logging.debug("prepare_vms(): parameters={0}".format(parameters))

        queue_head_machine = parameters[self.PARAM_FLEX_QUEUE_HEAD]

        user_data = self.__get_user_data(parameters["user_id"])

        if (
            self.PARAM_FLEX_CLOUD_MACHINE_INFO not in parameters
            or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == None
            or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == []
        ):

            logging.error("Error: No {0} param!".format(self.PARAM_FLEX_CLOUD_MACHINE_INFO))
            # Report Error
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Invalid Parameters"
            user_data.put()
            return

        flex_cloud_machine_info = parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]

        # Set the user message to "configuring..."
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = "Flex Cloud configured. Waiting for workers to become available..."
        user_data.put()

        # Initialize the VMstateModel db
        all_accessible = True
        for machine in flex_cloud_machine_info:
            if self.agent.check_network_ports(machine["ip"], [22, 443]):
                state = VMStateModel.STATE_ACCESSIBLE
            else:
                state = VMStateModel.STATE_INACCESSIBLE
                all_accessible = False
            vm_state = VMStateModel(
                state=state,
                infra=self.agent_type,
                ins_type=FlexConfig.INSTANCE_TYPE,
                pri_ip=machine["ip"],
                pub_ip=machine["ip"],
                username=machine["username"],
                keyfile=machine["keyfile"],
                ins_id=self.agent.get_flex_instance_id(machine["ip"]),
                user_id=parameters["user_id"],
                res_id=self.reservation_id,
            )
            vm_state.put()

        if not all_accessible:
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Error: not all workers are accessible"
            user_data.put()
            return

        if queue_head_machine == None or not helper.wait_for_ssh_connection(
            queue_head_machine["keyfile"], queue_head_machine["ip"], username=queue_head_machine["username"]
        ):
            logging.error("Found no viable ssh-able/running queue head machine!")
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Error: Can not connect {0} (queue head) via SSH".format(
                queue_head_machine["ip"]
            )
            user_data.put()
            return

        if not self.__prepare_queue_head(queue_head_machine, parameters):
            logging.error("Error: could not prepare queue head!")
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Error preparing the queue head"
            user_data.put()
            return

        flex_cloud_workers = []
        for machine in parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]:
            if machine[self.PARAM_IS_QUEUE_HEAD] != True:
                if helper.wait_for_ssh_connection(machine["keyfile"], machine["ip"], username=machine["username"]):
                    flex_cloud_workers.append(machine)
                else:
                    # Report Failure
                    user_data.flex_cloud_status = False
                    user_data.flex_cloud_info_msg = "Error: Can not connect to {0} via SSH".format(machine["ip"])
                    user_data.put()
                    return

            if len(flex_cloud_workers) > 0:
                logging.debug("Preparing workers: {0}".format(flex_cloud_workers))
                params = {
                    "infrastructure": AgentTypes.FLEX,
                    self.PARAM_FLEX_CLOUD_MACHINE_INFO: flex_cloud_workers,
                    "credentials": parameters["credentials"],
                    "user_id": parameters["user_id"],
                    self.PARAM_FLEX_QUEUE_HEAD: parameters[self.PARAM_FLEX_QUEUE_HEAD],
                    "reservation_id": parameters["reservation_id"],
                }
                self.agent.prepare_instances(params)

        helper.update_celery_config_with_queue_head_ip(
            queue_head_ip=queue_head_machine["ip"], agent_type=self.agent_type
        )

        self.__configure_celery(params=parameters)

        # Report Success
        logging.debug("Flex Cloud Deployed")
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = "Flex Cloud Deployed"
        user_data.put()

        # Force the update of the instance status
        VMStateModel.synchronize(agent=self.agent, parameters=parameters)

        return
    def __poll_instances_status(self, num_vms, parameters):
        '''
        Private method that working on polling the state of instances that have already spawned 
        every some time and checking the ssh connectability if they are running.
        
        Args
            num_vms         Number of virtual machines that are needed to be polling
            parameters      A dictionary of parameters
            
        Return
            A turple of (public ips, private ips, instance ids). Each of the three is a list
        '''
        logging.info('Start polling task for infrastructure = {0}'.format(
            parameters['infrastructure']))

        ins_ids = self.agent.describe_instances_launched(parameters)
        logging.info("ins_ids = {0}".format(ins_ids))

        # update db with new instance ids and 'pending'
        VMStateModel.update_ins_ids(parameters,
                                    ins_ids,
                                    self.reservation_id,
                                    from_state=VMStateModel.STATE_CREATING,
                                    to_state=VMStateModel.STATE_PENDING)

        public_ips = None
        private_ips = None
        instance_ids = None
        keyfiles = None

        for x in xrange(EC2BackendWorker.POLL_COUNT):
            # get the ips and ids of this keyname
            public_ips, private_ips, instance_ids, instance_types, keyfiles = self.agent.describe_instances_running(
                parameters)

            logging.info("public_ips = {0}".format(public_ips))
            logging.debug("private_ips = {0}".format(private_ips))
            logging.info("instance_ids = {0}".format(instance_ids))
            logging.info("instance_types = {0}".format(instance_types))
            logging.info("keyfiles = {0}".format(keyfiles))

            # if we get the requested number of vms (the requested number will be 1 if this is queue head),
            # update reservation information and send a message to the backend server
            if num_vms == len(public_ips):
                # update db with new public ips and private ips
                VMStateModel.update_ips(parameters, instance_ids, public_ips,
                                        private_ips, instance_types, keyfiles)
                break

            else:
                if x < EC2BackendWorker.POLL_COUNT - 1:
                    time.sleep(EC2BackendWorker.POLL_WAIT_TIME)
                    logging.info('Polling task: sleep 5 seconds...')

                else:
                    VMStateModel.update_ips(parameters, instance_ids,
                                            public_ips, private_ips,
                                            instance_types, keyfiles)

                    logging.info(
                        'Polling timeout. About to terminate some instances:')
                    terminate_ins_ids = []
                    for ins_id in ins_ids:
                        if ins_id not in instance_ids:
                            logging.info(
                                'instance {0} to be terminated'.format(ins_id))
                            terminate_ins_ids.append(ins_id)
                    # terminate timeout instances
                    self.agent.deregister_some_instances(
                        parameters, terminate_ins_ids)
                    # update db with failure information
                    VMStateModel.set_state(parameters, terminate_ins_ids,
                                           VMStateModel.STATE_FAILED,
                                           VMStateModel.DESCRI_FAIL_TO_RUN)

        return public_ips, private_ips, instance_ids