def __prepare_queue_head(self, queue_head_machine, parameters):
        logging.debug('*' * 80)
        logging.debug('*' * 80)
        logging.debug(
            '__prepare_queue_head(queue_head_machine={0}, parameters={1})'.
            format(queue_head_machine, parameters))
        keyfile = queue_head_machine['keyfile']
        if not os.path.exists(keyfile):
            logging.error(
                'Queue head keyfile: {0} does not exist!'.format(keyfile))
            return False

        success = helper.wait_for_ssh_connection(
            key_file=keyfile,
            ip=queue_head_machine['ip'],
            username=queue_head_machine['username'])

        if success == True:
            logging.info(
                'Queue Head with ip {0} is successfully ssh-able'.format(
                    queue_head_machine['ip']))
        else:
            logging.error('Queue Head ssh failed!')
            logging.debug('-' * 80)
            logging.debug('-' * 80)
            return False

        try:
            queue_head_prepare_params = {
                'infrastructure':
                AgentTypes.FLEX,
                self.PARAM_FLEX_CLOUD_MACHINE_INFO: [queue_head_machine],
                'credentials':
                parameters['credentials'],
                'user_id':
                parameters['user_id'],
                self.PARAM_FLEX_DB_PASSWORD:
                parameters[self.PARAM_FLEX_DB_PASSWORD],
                self.PARAM_FLEX_QUEUE_HEAD:
                parameters[self.PARAM_FLEX_QUEUE_HEAD]
            }
            self.agent.prepare_instances(queue_head_prepare_params)

            # Create stochss table in flex db in queue head
            database = FlexDB(password=parameters[self.PARAM_FLEX_DB_PASSWORD],
                              ip=queue_head_machine['ip'])
            database.createtable(JobDatabaseConfig.TABLE_NAME)

            logging.debug('-' * 80)
            logging.debug('-' * 80)
            return True

        except Exception as e:
            logging.error(e)
            logging.debug('-' * 80)
            logging.debug('-' * 80)
            return False
    def __verify_ec2_instances_via_ssh(self, instance_ids, parameters,
                                       public_ips):
        keyfile = os.path.join(os.path.dirname(__file__), '..',
                               '{0}.key'.format(parameters['keyname']))
        logging.info('keyfile = {0}'.format(keyfile))

        if not os.path.exists(keyfile):
            raise Exception("ssh keyfile file not found: {0}".format(keyfile))

        connected_public_ips = []
        connected_instance_ids = []

        for (pub_ip, ins_id) in zip(public_ips, instance_ids):
            logging.info('connecting to {0}...'.format(pub_ip))
            success = helper.wait_for_ssh_connection(key_file=keyfile,
                                                     ip=pub_ip)

            if success == True:
                logging.info('{0} is successfully added'.format(pub_ip))
                connected_public_ips.append(pub_ip)
                connected_instance_ids.append(ins_id)

        # if there are some vms not able to be connected via ssh,
        # just shut them down explicitly
        if len(public_ips) != len(connected_public_ips):
            logging.info(
                'Time out on ssh to {0} instances. They will be terminated.'.
                format(len(public_ips) - len(connected_public_ips)))

            try:
                terminate_ins_ids = []
                for ins_id in instance_ids:
                    if ins_id not in connected_instance_ids:
                        terminate_ins_ids.append(ins_id)
                self.agent.deregister_some_instances(parameters,
                                                     terminate_ins_ids)
                # update db with failed vms
                VMStateModel.set_state(parameters, terminate_ins_ids,
                                       VMStateModel.STATE_FAILED,
                                       VMStateModel.DESCRI_TIMEOUT_ON_SSH)
            except:
                raise Exception(
                    "Errors in terminating instances that cannot be connected via ssh."
                )

        public_ips = None
        instance_ids = None

        return connected_public_ips, connected_instance_ids
예제 #3
0
    def __prepare_queue_head(self, queue_head_machine, parameters):
        logging.debug("*" * 80)
        logging.debug("*" * 80)
        logging.debug(
            "__prepare_queue_head(queue_head_machine={0}, parameters={1})".format(queue_head_machine, parameters)
        )
        keyfile = queue_head_machine["keyfile"]
        if not os.path.exists(keyfile):
            logging.error("Queue head keyfile: {0} does not exist!".format(keyfile))
            return False

        success = helper.wait_for_ssh_connection(
            key_file=keyfile, ip=queue_head_machine["ip"], username=queue_head_machine["username"]
        )

        if success == True:
            logging.info("Queue Head with ip {0} is successfully ssh-able".format(queue_head_machine["ip"]))
        else:
            logging.error("Queue Head ssh failed!")
            logging.debug("-" * 80)
            logging.debug("-" * 80)
            return False

        try:
            queue_head_prepare_params = {
                "infrastructure": AgentTypes.FLEX,
                self.PARAM_FLEX_CLOUD_MACHINE_INFO: [queue_head_machine],
                "credentials": parameters["credentials"],
                "user_id": parameters["user_id"],
                self.PARAM_FLEX_DB_PASSWORD: parameters[self.PARAM_FLEX_DB_PASSWORD],
                self.PARAM_FLEX_QUEUE_HEAD: parameters[self.PARAM_FLEX_QUEUE_HEAD],
            }
            self.agent.prepare_instances(queue_head_prepare_params)

            # Create stochss table in flex db in queue head
            database = FlexDB(password=parameters[self.PARAM_FLEX_DB_PASSWORD], ip=queue_head_machine["ip"])
            database.createtable(JobDatabaseConfig.TABLE_NAME)

            logging.debug("-" * 80)
            logging.debug("-" * 80)
            return True

        except Exception as e:
            logging.error(e)
            logging.debug("-" * 80)
            logging.debug("-" * 80)
            return False
예제 #4
0
    def __verify_ec2_instances_via_ssh(self, instance_ids, parameters, public_ips):
        keyfile = os.path.join(os.path.dirname(__file__), "..", "{0}.key".format(parameters["keyname"]))
        logging.info("keyfile = {0}".format(keyfile))

        if not os.path.exists(keyfile):
            raise Exception("ssh keyfile file not found: {0}".format(keyfile))

        connected_public_ips = []
        connected_instance_ids = []

        for (pub_ip, ins_id) in zip(public_ips, instance_ids):
            logging.info("connecting to {0}...".format(pub_ip))
            success = helper.wait_for_ssh_connection(key_file=keyfile, ip=pub_ip)

            if success == True:
                logging.info("{0} is successfully added".format(pub_ip))
                connected_public_ips.append(pub_ip)
                connected_instance_ids.append(ins_id)

        # if there are some vms not able to be connected via ssh,
        # just shut them down explicitly
        if len(public_ips) != len(connected_public_ips):
            logging.info(
                "Time out on ssh to {0} instances. They will be terminated.".format(
                    len(public_ips) - len(connected_public_ips)
                )
            )

            try:
                terminate_ins_ids = []
                for ins_id in instance_ids:
                    if ins_id not in connected_instance_ids:
                        terminate_ins_ids.append(ins_id)
                self.agent.deregister_some_instances(parameters, terminate_ins_ids)
                # update db with failed vms
                VMStateModel.set_state(
                    parameters, terminate_ins_ids, VMStateModel.STATE_FAILED, VMStateModel.DESCRI_TIMEOUT_ON_SSH
                )
            except:
                raise Exception("Errors in terminating instances that cannot be connected via ssh.")

        public_ips = None
        instance_ids = None

        return connected_public_ips, connected_instance_ids
    def prepare_vms(self, parameters):
        logging.debug('prepare_vms(): parameters={0}'.format(parameters))

        queue_head_machine = parameters[self.PARAM_FLEX_QUEUE_HEAD]

        user_data = self.__get_user_data(parameters['user_id'])

        if self.PARAM_FLEX_CLOUD_MACHINE_INFO not in parameters \
                or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == None \
                or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == []:

            logging.error('Error: No {0} param!'.format(
                self.PARAM_FLEX_CLOUD_MACHINE_INFO))
            # Report Error
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Invalid Parameters'
            user_data.put()
            return

        flex_cloud_machine_info = parameters[
            self.PARAM_FLEX_CLOUD_MACHINE_INFO]

        # Set the user message to "configuring..."
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = 'Flex Cloud configured. Waiting for workers to become available...'
        user_data.put()

        # Initialize the VMstateModel db
        all_accessible = True
        for machine in flex_cloud_machine_info:
            if self.agent.check_network_ports(machine['ip'], [22, 443]):
                state = VMStateModel.STATE_ACCESSIBLE
            else:
                state = VMStateModel.STATE_INACCESSIBLE
                all_accessible = False
            vm_state = VMStateModel(state=state,
                                    infra=self.agent_type,
                                    ins_type=FlexConfig.INSTANCE_TYPE,
                                    pri_ip=machine['ip'],
                                    pub_ip=machine['ip'],
                                    username=machine['username'],
                                    keyfile=machine['keyfile'],
                                    ins_id=self.agent.get_flex_instance_id(
                                        machine['ip']),
                                    user_id=parameters['user_id'],
                                    res_id=self.reservation_id)
            vm_state.put()

        if not all_accessible:
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Error: not all workers are accessible'
            user_data.put()
            return

        if queue_head_machine == None or not helper.wait_for_ssh_connection(
                queue_head_machine['keyfile'],
                queue_head_machine['ip'],
                username=queue_head_machine['username']):
            logging.error(
                'Found no viable ssh-able/running queue head machine!')
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Error: Can not connect {0} (queue head) via SSH'.format(
                queue_head_machine['ip'])
            user_data.put()
            return

        if not self.__prepare_queue_head(queue_head_machine, parameters):
            logging.error('Error: could not prepare queue head!')
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Error preparing the queue head'
            user_data.put()
            return

        flex_cloud_workers = []
        for machine in parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]:
            if machine[self.PARAM_IS_QUEUE_HEAD] != True:
                if helper.wait_for_ssh_connection(
                        machine['keyfile'],
                        machine['ip'],
                        username=machine['username']):
                    flex_cloud_workers.append(machine)
                else:
                    # Report Failure
                    user_data.flex_cloud_status = False
                    user_data.flex_cloud_info_msg = 'Error: Can not connect to {0} via SSH'.format(
                        machine['ip'])
                    user_data.put()
                    return

            if len(flex_cloud_workers) > 0:
                logging.debug(
                    'Preparing workers: {0}'.format(flex_cloud_workers))
                params = {
                    'infrastructure':
                    AgentTypes.FLEX,
                    self.PARAM_FLEX_CLOUD_MACHINE_INFO:
                    flex_cloud_workers,
                    'credentials':
                    parameters['credentials'],
                    'user_id':
                    parameters['user_id'],
                    self.PARAM_FLEX_QUEUE_HEAD:
                    parameters[self.PARAM_FLEX_QUEUE_HEAD],
                    'reservation_id':
                    parameters['reservation_id']
                }
                self.agent.prepare_instances(params)

        helper.update_celery_config_with_queue_head_ip(
            queue_head_ip=queue_head_machine['ip'], agent_type=self.agent_type)

        self.__configure_celery(params=parameters)

        # Report Success
        logging.debug('Flex Cloud Deployed')
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = 'Flex Cloud Deployed'
        user_data.put()

        # Force the update of the instance status
        VMStateModel.synchronize(agent=self.agent, parameters=parameters)

        return
예제 #6
0
    def prepare_vms(self, parameters):
        logging.debug("prepare_vms(): parameters={0}".format(parameters))

        queue_head_machine = parameters[self.PARAM_FLEX_QUEUE_HEAD]

        user_data = self.__get_user_data(parameters["user_id"])

        if (
            self.PARAM_FLEX_CLOUD_MACHINE_INFO not in parameters
            or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == None
            or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == []
        ):

            logging.error("Error: No {0} param!".format(self.PARAM_FLEX_CLOUD_MACHINE_INFO))
            # Report Error
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Invalid Parameters"
            user_data.put()
            return

        flex_cloud_machine_info = parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]

        # Set the user message to "configuring..."
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = "Flex Cloud configured. Waiting for workers to become available..."
        user_data.put()

        # Initialize the VMstateModel db
        all_accessible = True
        for machine in flex_cloud_machine_info:
            if self.agent.check_network_ports(machine["ip"], [22, 443]):
                state = VMStateModel.STATE_ACCESSIBLE
            else:
                state = VMStateModel.STATE_INACCESSIBLE
                all_accessible = False
            vm_state = VMStateModel(
                state=state,
                infra=self.agent_type,
                ins_type=FlexConfig.INSTANCE_TYPE,
                pri_ip=machine["ip"],
                pub_ip=machine["ip"],
                username=machine["username"],
                keyfile=machine["keyfile"],
                ins_id=self.agent.get_flex_instance_id(machine["ip"]),
                user_id=parameters["user_id"],
                res_id=self.reservation_id,
            )
            vm_state.put()

        if not all_accessible:
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Error: not all workers are accessible"
            user_data.put()
            return

        if queue_head_machine == None or not helper.wait_for_ssh_connection(
            queue_head_machine["keyfile"], queue_head_machine["ip"], username=queue_head_machine["username"]
        ):
            logging.error("Found no viable ssh-able/running queue head machine!")
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Error: Can not connect {0} (queue head) via SSH".format(
                queue_head_machine["ip"]
            )
            user_data.put()
            return

        if not self.__prepare_queue_head(queue_head_machine, parameters):
            logging.error("Error: could not prepare queue head!")
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Error preparing the queue head"
            user_data.put()
            return

        flex_cloud_workers = []
        for machine in parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]:
            if machine[self.PARAM_IS_QUEUE_HEAD] != True:
                if helper.wait_for_ssh_connection(machine["keyfile"], machine["ip"], username=machine["username"]):
                    flex_cloud_workers.append(machine)
                else:
                    # Report Failure
                    user_data.flex_cloud_status = False
                    user_data.flex_cloud_info_msg = "Error: Can not connect to {0} via SSH".format(machine["ip"])
                    user_data.put()
                    return

            if len(flex_cloud_workers) > 0:
                logging.debug("Preparing workers: {0}".format(flex_cloud_workers))
                params = {
                    "infrastructure": AgentTypes.FLEX,
                    self.PARAM_FLEX_CLOUD_MACHINE_INFO: flex_cloud_workers,
                    "credentials": parameters["credentials"],
                    "user_id": parameters["user_id"],
                    self.PARAM_FLEX_QUEUE_HEAD: parameters[self.PARAM_FLEX_QUEUE_HEAD],
                    "reservation_id": parameters["reservation_id"],
                }
                self.agent.prepare_instances(params)

        helper.update_celery_config_with_queue_head_ip(
            queue_head_ip=queue_head_machine["ip"], agent_type=self.agent_type
        )

        self.__configure_celery(params=parameters)

        # Report Success
        logging.debug("Flex Cloud Deployed")
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = "Flex Cloud Deployed"
        user_data.put()

        # Force the update of the instance status
        VMStateModel.synchronize(agent=self.agent, parameters=parameters)

        return