コード例 #1
0
    def handle_state_request_constraint_encountered( self, spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ):
        """ Constraint encountered after spot request initiated but before request fullfilled, 
            i.e. time limit expired
            Submit another spot request

        :param spot_request_msg: 
        :param spot_request_item: 
        :param spot_request_uuid: 
        :param spot_master_uuid: 

        """
        logger.info( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'handle_state_request_constraint_encountered' )
        ts_now = int( time.time() )
        spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, {
                                                                TableSpotRequest.spot_request_state_code:SpotRequestStateCode.instance_complete,
                                                                TableSpotRequest.is_open:0, 
                                                                TableSpotRequest.ts_end:ts_now
                                                                 },
                                                                 region_name=self.region_name, profile_name=self.profile_name )
        # Create a new spot request based on the spot request that just failed
        master_msg_resubmit_failed_request = SpotMasterMsg( spot_master_uuid=spot_request_msg.spot_master_uuid, 
                                                  spot_master_msg_type=SpotMasterMsg.TYPE_RESUBMIT_FAILED_REQUEST,
                                                  spot_request_uuid=spot_request_msg.spot_request_uuid
                                                   )
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageResubmitFailedRequest )
        spot_master_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, self.region_name, profile_name=self.profile_name )
        spot_master_sqs_message_durable.send_message( master_msg_resubmit_failed_request.to_json(),
                                                           message_attributes=message_attributes )
コード例 #2
0
    def handle_state_instance_force_termination_pending( self, spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ):
        """ AWS has started the termination process for this instance, i.e. the price has increased
            This is the beginning of the two minute warning pending forced termination
            Terminate the instance and start another spot request

        :param spot_request_msg: 
        :param spot_request_item: 
        :param spot_request_uuid: 
        :param spot_master_uuid: 

        """
        logger.info( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'handle_state_instance_force_termination_pending' )
        ts_now = int( time.time() )
        spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, {
                                                                TableSpotRequest.spot_request_state_code:SpotRequestStateCode.instance_complete,
                                                                TableSpotRequest.is_open:0, 
                                                                TableSpotRequest.ts_end:ts_now
                                                                 },
                                                                 region_name=self.region_name, profile_name=self.profile_name )
        # Create a new spot request based on the spot request that just failed
        master_msg_resubmit_failed_request = SpotMasterMsg( spot_master_uuid=spot_request_msg.spot_master_uuid, 
                                                  spot_master_msg_type=SpotMasterMsg.TYPE_RESUBMIT_FAILED_REQUEST,
                                                  spot_request_uuid=spot_request_msg.spot_request_uuid
                                                   )
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageResubmitFailedRequest )
        spot_master_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, self.region_name, profile_name=self.profile_name )
        spot_master_sqs_message_durable.send_message( master_msg_resubmit_failed_request.to_json(),
                                                           message_attributes=message_attributes )
    def process(self, message):
        """ 
            Spot Request has completed, write completion info to SpotRequestItem in DynamoDB,
            let master know this request has completed so the master can determine if the job has completed

        :param message: SQS Message instance

        """
        try:
            spot_request_msg = SpotRequestMsg(raw_json=message.get_body())
            spot_request_item = get_spot_request_item(
                self.spot_request_table_name,
                spot_request_msg.spot_request_uuid,
                region_name=self.region_name,
                profile_name=self.profile_name,
            )
            ts_cmd_complete = spot_request_msg.name_value_pairs[
                SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_COMPLETE_TIMESTAMP
            ]
            cmd_returncode = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_RETURNCODE]
            cmd_std_out = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_OUT]
            cmd_std_err = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_ERR]
            key_value_pairs = {
                TableSpotRequest.is_open: 0,
                TableSpotRequest.spot_request_state_code: SpotRequestStateCode.instance_complete,
                TableSpotRequest.ts_cmd_complete: ts_cmd_complete,
                TableSpotRequest.cmd_returncode: cmd_returncode,
            }
            if cmd_std_out != None and len(cmd_std_out) > 0:
                key_value_pairs[TableSpotRequest.cmd_std_out] = cmd_std_out
            if cmd_std_err != None and len(cmd_std_err) > 0:
                key_value_pairs[TableSpotRequest.cmd_std_err] = cmd_std_err
            spot_request_row_partial_save(
                self.spot_request_table_name,
                spot_request_item,
                key_value_pairs,
                region_name=self.region_name,
                profile_name=self.profile_name,
            )
            # let the Master increment the completion count to determine if the job is complete
            master_msg_incr_instance_success = SpotMasterMsg(
                spot_master_uuid=spot_request_msg.spot_master_uuid,
                spot_master_msg_type=SpotMasterMsg.TYPE_INCR_INSTANCE_SUCCESS_CNT,
            )
            message_attributes = create_microsvc_message_attributes(
                awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageIncrSuccessCnt
            )
            spot_master_sqs_message_durable = SqsMessageDurable(
                self.spot_master_queue_name, self.region_name, profile_name=self.profile_name
            )
            spot_master_sqs_message_durable.send_message(
                master_msg_incr_instance_success.to_json(), message_attributes=message_attributes
            )
            self.spot_request_sqs_message_durable.delete_message(message)

        except StandardError as e:
            logger.error(fmt_request_item_msg_hdr(spot_request_item) + "Exiting SpotRequestDispatcher due to exception")
            logger.error(fmt_request_item_msg_hdr(spot_request_item) + str(e))
            logger.error(fmt_request_item_msg_hdr(spot_request_item) + traceback.format_exc())
コード例 #4
0
    def process( self, message ) :
        """ Process the message

        :param message: SQS Message instance

        """
        try: 
            spot_master_msg = SpotMasterMsg( raw_json=message.get_body() )
            spot_master_uuid = spot_master_msg.spot_master_uuid
            logger.info( fmt_master_uuid_msg_hdr( spot_master_uuid ) + 'process_check_status' )
            # Get master row from DynamoDB and process based on state
            dynamodb_conn = boto.dynamodb2.connect_to_region( self.region_name, profile_name=self.profile_name )
            spot_master_table = Table( self.spot_master_table_name, connection=dynamodb_conn ) 
            spot_master_item = spot_master_table.get_item( spot_master_uuid=spot_master_uuid )
            logger.info( fmt_master_uuid_msg_hdr( spot_master_uuid ) + 'master state=' + spot_master_item[TableSpotMaster.spot_master_state_code])
            
            next_status_msg_delay_secs = 60
            is_send_master_msg_check_status = True
            master_state_code = spot_master_item[TableSpotMaster.spot_master_state_code]
            spot_master_item[ TableSpotMaster.ts_last_state_check ] = int( time.time() )
            spot_master_row_partial_save( self.spot_master_table_name, spot_master_item, 
                                  {TableSpotMaster.ts_last_state_check:int( time.time() )},
                                  region_name=self.region_name, profile_name=self.profile_name )
            
            # Process based on the current Master State
            if SpotMasterStateCode.master_resources_in_progress == master_state_code:
                self.handle_state_master_resources_in_progress( spot_master_item )
                next_status_msg_delay_secs = 5
            elif SpotMasterStateCode.master_role_policy_in_progress == master_state_code:
                self.handle_state_master_role_policy_in_progress( spot_master_item, dynamodb_conn )
                next_status_msg_delay_secs = 5
            elif SpotMasterStateCode.waiting_for_instances_complete == master_state_code:
                self.handle_state_waiting_for_instances_complete( spot_master_item )
            elif SpotMasterStateCode.waiting_for_instances_terminated == master_state_code:
                self.handle_state_waiting_for_instances_terminated( spot_master_item )
            elif SpotMasterStateCode.waiting_for_master_resources_terminated == master_state_code:
                self.handle_state_waiting_for_master_resources_terminated( spot_master_item )
                next_status_msg_delay_secs = 5
            elif SpotMasterStateCode.cleanup_in_progress == master_state_code:
                self.handle_state_cleanup_in_progress( spot_master_item )
            elif SpotMasterStateCode.cleanup_complete == master_state_code:
                self.handle_state_cleanup_complete( spot_master_item )
                is_send_master_msg_check_status = False
            
            self.spot_master_sqs_message_durable.delete_message(message)        
            
            if is_send_master_msg_check_status:
                spot_master_msg_check_status = SpotMasterMsg( spot_master_uuid=spot_master_uuid, spot_master_msg_type=SpotMasterMsg.TYPE_CHECK_STATUS )
                message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageCheckStatus )
                self.spot_master_sqs_message_durable.send_message( spot_master_msg_check_status.to_json(), 
                                                              delay_seconds=next_status_msg_delay_secs,
                                                              message_attributes=message_attributes )
        except StandardError as e:
            logger.error( fmt_master_uuid_msg_hdr( spot_master_uuid ) + str(e) )
            logger.error( fmt_master_uuid_msg_hdr( spot_master_uuid ) + traceback.format_exc() )
コード例 #5
0
def submit_spot_batch_job( argv ):
    """ Submit a users' spot batch job
        Submit an SQS message containing the 2 parm files - Batch Job and User Parm

    :param argv: 

    """
    import logging.config
    if len(sys.argv) == 1:
        print 'ERROR: Missing log configuration file, first argument must be path/name.ext of the log configuration file'
        sys.exit(8)
    logging.config.fileConfig( sys.argv[1], disable_existing_loggers=False)
    logger = logging.getLogger(__name__)
    
    if len(sys.argv) == 2:
        logger.error( 'ERROR: Missing Batch Job Parm file, second argument must be path/name.ext of the log Batch Job Parm file' )
        sys.exit(8)              
    
    try:
        logger.info("Starting")
        
        path_batch_job_parm_file = sys.argv[2]
        if len(sys.argv) == 4: path_user_job_parm_file = sys.argv[3]
        else: path_user_job_parm_file = None
        
        with open( path_batch_job_parm_file ) as parm_file:
            raw_batch_job_parm_item = parm_file.read()
            
        if path_user_job_parm_file != None:   
            with open( path_user_job_parm_file ) as parm_file:
                raw_user_job_parm_item = parm_file.read()
        else: raw_user_job_parm_item = None

        batch_job_parm_item = BatchJobParmItem( stringParmFile=raw_batch_job_parm_item )

        spot_master_sqs_message_durable = SqsMessageDurable( awsspotbatch.common.const.SPOT_MASTER_QUEUE_NAME, 
                                                             batch_job_parm_item.primary_region_name, 
                                                             profile_name=batch_job_parm_item.profile_name )
 
        spot_master_uuid = str(uuid.uuid1())
        logger.info('Submitting test batch message, spot_master_uuid=' + spot_master_uuid )
        spot_master_msg = SpotMasterMsg( spot_master_uuid=spot_master_uuid, spot_master_msg_type=SpotMasterMsg.TYPE_SUBMIT_BATCH,
                                         raw_batch_job_parm_item=raw_batch_job_parm_item, raw_user_job_parm_item=raw_user_job_parm_item)
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageSubmitBatch )
        spot_master_sqs_message_durable.send_message( spot_master_msg.to_json(),
                                                      message_attributes=message_attributes )
        logger.info( 'Completed Successfully' )

    except StandardError as e:
        logger.error( e )
        logger.error( traceback.format_exc() )
        sys.exit(8)
コード例 #6
0
    def send_check_status( self, spot_master_uuid ):
        """ Queue a Message to do a CheckStatus on this Master in the near future, i.e. in 5 seconds
            This is the first message that will do CheckStatus to check/transition the Master status, 
            in SpotMasterMessageCheckStatus.process() it will continue to queue up another CheckStatus
            message (with a variable message delay based on the state) until the job completes

        :param spot_master_uuid: 

        """
        spot_master_msg_check_status = SpotMasterMsg( spot_master_uuid=spot_master_uuid, spot_master_msg_type=SpotMasterMsg.TYPE_CHECK_STATUS )
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageCheckStatus )
        self.spot_master_sqs_message_durable.send_message( spot_master_msg_check_status.to_json(), 
                                                           delay_seconds=5,
                                                           message_attributes=message_attributes )
コード例 #7
0
ファイル: queuemgr.py プロジェクト: hugocalean/awsspotbatch
    def send_test_data( self ):         
        """ """
        try:
            spot_master_uuid = str( uuid.uuid1() )
            spot_master_msg_submit_batch = SpotMasterMsg( spot_master_uuid, SpotMasterMsg.TYPE_SUBMIT_BATCH )
            
            spot_master_msg_check_status = SpotMasterMsg( spot_master_uuid, SpotMasterMsg.TYPE_CHECK_STATUS )
            
            spot_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, region_name=self.region_name, 
                                                     profile_name=self.profile_name)
            
            spot_sqs_message_durable.send_message( spot_master_msg_submit_batch.to_json() )
            spot_sqs_message_durable.send_message( spot_master_msg_check_status.to_json() )

        except StandardError as e:
            logger.error( e )
            logger.error( traceback.format_exc() )
            sys.exit(8)