def process( self, message ) :
        """ Process the message

        :param message: SQS Message instance

        """
        try:
            spot_request_msg = SpotRequestMsg( raw_json=message.get_body() )
            spot_request_uuid = spot_request_msg.spot_request_uuid
            spot_master_uuid = spot_request_msg.spot_master_uuid
            spot_request_id = spot_request_msg.spot_request_id
            logger.info( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'process_check_status' )
            # Get spot request row from DynamoDB and process based on state
            spot_request_item = get_spot_request_item( self.spot_request_table_name, spot_request_uuid, region_name=self.region_name, profile_name=self.profile_name )
            logger.info( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'spot request state=' + spot_request_item[TableSpotRequest.spot_request_state_code])
             
            next_status_msg_delay_secs = 60
            is_send_request_msg_check_status = True
            spot_request_state_code = spot_request_item[TableSpotRequest.spot_request_state_code]
            # Update the LastStateCheck timestamp
            spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, {
                                            TableSpotRequest.ts_last_state_check:int( time.time() ),
                                            },
                                            region_name=self.region_name, profile_name=self.profile_name )
             
            if SpotRequestStateCode.spot_request_in_progress == spot_request_state_code:
                self.handle_state_request_spot_request_in_progress( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid )
            elif SpotRequestStateCode.instance_starting == spot_request_state_code:
                self.handle_state_request_instance_starting( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid )
            elif SpotRequestStateCode.instance_running == spot_request_state_code:
                self.handle_state_request_instance_running( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid )
            elif SpotRequestStateCode.instance_complete == spot_request_state_code:
                self.handle_state_request_instance_complete( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid )
                is_send_request_msg_check_status = False
            elif SpotRequestStateCode.instance_state_unknown == spot_request_state_code:
                self.handle_state_request_instance_state_unknown( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid )
            elif SpotRequestStateCode.constraint_encountered == spot_request_state_code:
                self.handle_state_request_constraint_encountered( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid )       
            elif SpotRequestStateCode.instance_force_termination_pending == spot_request_state_code:
                self.handle_state_instance_force_termination_pending( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid )
            elif SpotRequestStateCode.instance_force_terminated == spot_request_state_code:
                self.handle_state_request_instance_force_terminated( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid )
                is_send_request_msg_check_status = False
            
            if is_send_request_msg_check_status:
                spot_request_msg_check_status = SpotRequestMsg( spot_request_uuid=spot_request_uuid, 
                                                                spot_master_uuid=spot_master_uuid, 
                                                                spot_request_msg_type=SpotRequestMsg.TYPE_CHECK_STATUS, 
                                                                spot_request_id=spot_request_id )
                message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageCheckStatus )
                self.spot_request_sqs_message_durable.send_message( spot_request_msg_check_status.to_json(), 
                                                                    delay_seconds=next_status_msg_delay_secs, 
                                                                    message_attributes=message_attributes  )
        
            self.spot_request_sqs_message_durable.delete_message(message)

        except StandardError as e:
            logger.error( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'Exiting SpotRequestDispatcher due to exception'  )
            logger.error( fmt_request_uuid_msg_hdr( spot_request_uuid ) + str(e) )
            logger.error( fmt_request_uuid_msg_hdr( spot_request_uuid ) + traceback.format_exc() )    
    def handle_state_request_constraint_encountered( self, spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ):
        """ Constraint encountered after spot request initiated but before request fullfilled, 
            i.e. time limit expired
            Submit another spot request

        :param spot_request_msg: 
        :param spot_request_item: 
        :param spot_request_uuid: 
        :param spot_master_uuid: 

        """
        logger.info( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'handle_state_request_constraint_encountered' )
        ts_now = int( time.time() )
        spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, {
                                                                TableSpotRequest.spot_request_state_code:SpotRequestStateCode.instance_complete,
                                                                TableSpotRequest.is_open:0, 
                                                                TableSpotRequest.ts_end:ts_now
                                                                 },
                                                                 region_name=self.region_name, profile_name=self.profile_name )
        # Create a new spot request based on the spot request that just failed
        master_msg_resubmit_failed_request = SpotMasterMsg( spot_master_uuid=spot_request_msg.spot_master_uuid, 
                                                  spot_master_msg_type=SpotMasterMsg.TYPE_RESUBMIT_FAILED_REQUEST,
                                                  spot_request_uuid=spot_request_msg.spot_request_uuid
                                                   )
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageResubmitFailedRequest )
        spot_master_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, self.region_name, profile_name=self.profile_name )
        spot_master_sqs_message_durable.send_message( master_msg_resubmit_failed_request.to_json(),
                                                           message_attributes=message_attributes )
    def handle_state_instance_force_termination_pending( self, spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ):
        """ AWS has started the termination process for this instance, i.e. the price has increased
            This is the beginning of the two minute warning pending forced termination
            Terminate the instance and start another spot request

        :param spot_request_msg: 
        :param spot_request_item: 
        :param spot_request_uuid: 
        :param spot_master_uuid: 

        """
        logger.info( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'handle_state_instance_force_termination_pending' )
        ts_now = int( time.time() )
        spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, {
                                                                TableSpotRequest.spot_request_state_code:SpotRequestStateCode.instance_complete,
                                                                TableSpotRequest.is_open:0, 
                                                                TableSpotRequest.ts_end:ts_now
                                                                 },
                                                                 region_name=self.region_name, profile_name=self.profile_name )
        # Create a new spot request based on the spot request that just failed
        master_msg_resubmit_failed_request = SpotMasterMsg( spot_master_uuid=spot_request_msg.spot_master_uuid, 
                                                  spot_master_msg_type=SpotMasterMsg.TYPE_RESUBMIT_FAILED_REQUEST,
                                                  spot_request_uuid=spot_request_msg.spot_request_uuid
                                                   )
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageResubmitFailedRequest )
        spot_master_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, self.region_name, profile_name=self.profile_name )
        spot_master_sqs_message_durable.send_message( master_msg_resubmit_failed_request.to_json(),
                                                           message_attributes=message_attributes )
    def process(self, message):
        """ 
            Spot Request has completed, write completion info to SpotRequestItem in DynamoDB,
            let master know this request has completed so the master can determine if the job has completed

        :param message: SQS Message instance

        """
        try:
            spot_request_msg = SpotRequestMsg(raw_json=message.get_body())
            spot_request_item = get_spot_request_item(
                self.spot_request_table_name,
                spot_request_msg.spot_request_uuid,
                region_name=self.region_name,
                profile_name=self.profile_name,
            )
            ts_cmd_complete = spot_request_msg.name_value_pairs[
                SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_COMPLETE_TIMESTAMP
            ]
            cmd_returncode = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_RETURNCODE]
            cmd_std_out = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_OUT]
            cmd_std_err = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_ERR]
            key_value_pairs = {
                TableSpotRequest.is_open: 0,
                TableSpotRequest.spot_request_state_code: SpotRequestStateCode.instance_complete,
                TableSpotRequest.ts_cmd_complete: ts_cmd_complete,
                TableSpotRequest.cmd_returncode: cmd_returncode,
            }
            if cmd_std_out != None and len(cmd_std_out) > 0:
                key_value_pairs[TableSpotRequest.cmd_std_out] = cmd_std_out
            if cmd_std_err != None and len(cmd_std_err) > 0:
                key_value_pairs[TableSpotRequest.cmd_std_err] = cmd_std_err
            spot_request_row_partial_save(
                self.spot_request_table_name,
                spot_request_item,
                key_value_pairs,
                region_name=self.region_name,
                profile_name=self.profile_name,
            )
            # let the Master increment the completion count to determine if the job is complete
            master_msg_incr_instance_success = SpotMasterMsg(
                spot_master_uuid=spot_request_msg.spot_master_uuid,
                spot_master_msg_type=SpotMasterMsg.TYPE_INCR_INSTANCE_SUCCESS_CNT,
            )
            message_attributes = create_microsvc_message_attributes(
                awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageIncrSuccessCnt
            )
            spot_master_sqs_message_durable = SqsMessageDurable(
                self.spot_master_queue_name, self.region_name, profile_name=self.profile_name
            )
            spot_master_sqs_message_durable.send_message(
                master_msg_incr_instance_success.to_json(), message_attributes=message_attributes
            )
            self.spot_request_sqs_message_durable.delete_message(message)

        except StandardError as e:
            logger.error(fmt_request_item_msg_hdr(spot_request_item) + "Exiting SpotRequestDispatcher due to exception")
            logger.error(fmt_request_item_msg_hdr(spot_request_item) + str(e))
            logger.error(fmt_request_item_msg_hdr(spot_request_item) + traceback.format_exc())
    def process( self, message ) :
        """ Process the message

        :param message: SQS Message instance

        """
        try: 
            spot_master_msg = SpotMasterMsg( raw_json=message.get_body() )
            spot_master_uuid = spot_master_msg.spot_master_uuid
            logger.info( fmt_master_uuid_msg_hdr( spot_master_uuid ) + 'process_check_status' )
            # Get master row from DynamoDB and process based on state
            dynamodb_conn = boto.dynamodb2.connect_to_region( self.region_name, profile_name=self.profile_name )
            spot_master_table = Table( self.spot_master_table_name, connection=dynamodb_conn ) 
            spot_master_item = spot_master_table.get_item( spot_master_uuid=spot_master_uuid )
            logger.info( fmt_master_uuid_msg_hdr( spot_master_uuid ) + 'master state=' + spot_master_item[TableSpotMaster.spot_master_state_code])
            
            next_status_msg_delay_secs = 60
            is_send_master_msg_check_status = True
            master_state_code = spot_master_item[TableSpotMaster.spot_master_state_code]
            spot_master_item[ TableSpotMaster.ts_last_state_check ] = int( time.time() )
            spot_master_row_partial_save( self.spot_master_table_name, spot_master_item, 
                                  {TableSpotMaster.ts_last_state_check:int( time.time() )},
                                  region_name=self.region_name, profile_name=self.profile_name )
            
            # Process based on the current Master State
            if SpotMasterStateCode.master_resources_in_progress == master_state_code:
                self.handle_state_master_resources_in_progress( spot_master_item )
                next_status_msg_delay_secs = 5
            elif SpotMasterStateCode.master_role_policy_in_progress == master_state_code:
                self.handle_state_master_role_policy_in_progress( spot_master_item, dynamodb_conn )
                next_status_msg_delay_secs = 5
            elif SpotMasterStateCode.waiting_for_instances_complete == master_state_code:
                self.handle_state_waiting_for_instances_complete( spot_master_item )
            elif SpotMasterStateCode.waiting_for_instances_terminated == master_state_code:
                self.handle_state_waiting_for_instances_terminated( spot_master_item )
            elif SpotMasterStateCode.waiting_for_master_resources_terminated == master_state_code:
                self.handle_state_waiting_for_master_resources_terminated( spot_master_item )
                next_status_msg_delay_secs = 5
            elif SpotMasterStateCode.cleanup_in_progress == master_state_code:
                self.handle_state_cleanup_in_progress( spot_master_item )
            elif SpotMasterStateCode.cleanup_complete == master_state_code:
                self.handle_state_cleanup_complete( spot_master_item )
                is_send_master_msg_check_status = False
            
            self.spot_master_sqs_message_durable.delete_message(message)        
            
            if is_send_master_msg_check_status:
                spot_master_msg_check_status = SpotMasterMsg( spot_master_uuid=spot_master_uuid, spot_master_msg_type=SpotMasterMsg.TYPE_CHECK_STATUS )
                message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageCheckStatus )
                self.spot_master_sqs_message_durable.send_message( spot_master_msg_check_status.to_json(), 
                                                              delay_seconds=next_status_msg_delay_secs,
                                                              message_attributes=message_attributes )
        except StandardError as e:
            logger.error( fmt_master_uuid_msg_hdr( spot_master_uuid ) + str(e) )
            logger.error( fmt_master_uuid_msg_hdr( spot_master_uuid ) + traceback.format_exc() )
Ejemplo n.º 6
0
def submit_spot_batch_job( argv ):
    """ Submit a users' spot batch job
        Submit an SQS message containing the 2 parm files - Batch Job and User Parm

    :param argv: 

    """
    import logging.config
    if len(sys.argv) == 1:
        print 'ERROR: Missing log configuration file, first argument must be path/name.ext of the log configuration file'
        sys.exit(8)
    logging.config.fileConfig( sys.argv[1], disable_existing_loggers=False)
    logger = logging.getLogger(__name__)
    
    if len(sys.argv) == 2:
        logger.error( 'ERROR: Missing Batch Job Parm file, second argument must be path/name.ext of the log Batch Job Parm file' )
        sys.exit(8)              
    
    try:
        logger.info("Starting")
        
        path_batch_job_parm_file = sys.argv[2]
        if len(sys.argv) == 4: path_user_job_parm_file = sys.argv[3]
        else: path_user_job_parm_file = None
        
        with open( path_batch_job_parm_file ) as parm_file:
            raw_batch_job_parm_item = parm_file.read()
            
        if path_user_job_parm_file != None:   
            with open( path_user_job_parm_file ) as parm_file:
                raw_user_job_parm_item = parm_file.read()
        else: raw_user_job_parm_item = None

        batch_job_parm_item = BatchJobParmItem( stringParmFile=raw_batch_job_parm_item )

        spot_master_sqs_message_durable = SqsMessageDurable( awsspotbatch.common.const.SPOT_MASTER_QUEUE_NAME, 
                                                             batch_job_parm_item.primary_region_name, 
                                                             profile_name=batch_job_parm_item.profile_name )
 
        spot_master_uuid = str(uuid.uuid1())
        logger.info('Submitting test batch message, spot_master_uuid=' + spot_master_uuid )
        spot_master_msg = SpotMasterMsg( spot_master_uuid=spot_master_uuid, spot_master_msg_type=SpotMasterMsg.TYPE_SUBMIT_BATCH,
                                         raw_batch_job_parm_item=raw_batch_job_parm_item, raw_user_job_parm_item=raw_user_job_parm_item)
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageSubmitBatch )
        spot_master_sqs_message_durable.send_message( spot_master_msg.to_json(),
                                                      message_attributes=message_attributes )
        logger.info( 'Completed Successfully' )

    except StandardError as e:
        logger.error( e )
        logger.error( traceback.format_exc() )
        sys.exit(8)
    def process( self, message ) :
        """ Start SpotRequest process
            1. Create item in SpotRequestItem table
            2. queue up potRequestMessageCheckStatus, this will start the state-based process

        :param message: SQS Message instance

        """
        try:
            spot_request_msg = SpotRequestMsg( raw_json=message.get_body() )
            logger.info( fmt_request_uuid_msg_hdr( spot_request_msg.spot_request_uuid ) + 'process_spot_request_initiated for spot_master_uuid: ' + spot_request_msg.spot_master_uuid )
            ts_now = int( time.time() )
            dict_create_spot_request_item = {
                                        TableSpotRequest.spot_request_uuid:spot_request_msg.spot_request_uuid,
                                        TableSpotRequest.spot_master_uuid:spot_request_msg.spot_master_uuid,
                                        TableSpotRequest.spot_request_id:spot_request_msg.spot_request_id,
                                        TableSpotRequest.ts_last_state_check:ts_now,
                                        TableSpotRequest.attempt_number:spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_ATTEMPT_NUMBER ],
                                        TableSpotRequest.spot_price:spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_SPOT_PRICE ],
                                        TableSpotRequest.instance_username:spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_USERNAME ],
                                        TableSpotRequest.is_open:1,
                                        TableSpotRequest.spot_request_state_code:SpotRequestStateCode.spot_request_in_progress,
                                        TableSpotRequest.ts_start:ts_now,
                                       }
            put_attempt_cnt = 0
            put_attempt_max = 10
            while True:
                dynamodb_conn = boto.dynamodb2.connect_to_region( self.region_name, profile_name=self.profile_name )
                spot_request_table = Table( self.spot_request_table_name, connection=dynamodb_conn ) 
                result_spot_request_put = spot_request_table.put_item(data=dict_create_spot_request_item)
                if result_spot_request_put: break
                put_attempt_cnt += 1
                if put_attempt_cnt == put_attempt_max: 
                    raise awsspotbatch.common.exception.DynamoDbPutItemMaxAttemptsExceeded('Failed attempt to insert item in: ' + self.spot_request_table_name + 
                                                             ' for spot_request_uuid: ' + spot_request_msg.spot_request_uuid, self.spot_request_table_name )
                time.sleep(6)
            
            next_status_msg_delay_secs = 30
            spot_request_msg_check_status = SpotRequestMsg( 
                                                           spot_request_uuid=spot_request_msg.spot_request_uuid, 
                                                           spot_master_uuid=spot_request_msg.spot_master_uuid,
                                                           spot_request_msg_type=SpotRequestMsg.TYPE_CHECK_STATUS, 
                                                           spot_request_id=spot_request_msg.spot_request_id )
            message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageCheckStatus )
            self.spot_request_sqs_message_durable.send_message( spot_request_msg_check_status.to_json(), delay_seconds=next_status_msg_delay_secs, message_attributes=message_attributes )
            self.spot_request_sqs_message_durable.delete_message(message) 
            
        except StandardError as e:
            logger.error( fmt_request_uuid_msg_hdr( spot_request_msg.spot_request_uuid ) + 'Exiting SpotRequestDispatcher due to exception'  )
            logger.error( fmt_request_uuid_msg_hdr( spot_request_msg.spot_request_uuid ) + str(e) )
            logger.error( fmt_request_uuid_msg_hdr( spot_request_msg.spot_request_uuid ) + traceback.format_exc() )    
    def send_check_status( self, spot_master_uuid ):
        """ Queue a Message to do a CheckStatus on this Master in the near future, i.e. in 5 seconds
            This is the first message that will do CheckStatus to check/transition the Master status, 
            in SpotMasterMessageCheckStatus.process() it will continue to queue up another CheckStatus
            message (with a variable message delay based on the state) until the job completes

        :param spot_master_uuid: 

        """
        spot_master_msg_check_status = SpotMasterMsg( spot_master_uuid=spot_master_uuid, spot_master_msg_type=SpotMasterMsg.TYPE_CHECK_STATUS )
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageCheckStatus )
        self.spot_master_sqs_message_durable.send_message( spot_master_msg_check_status.to_json(), 
                                                           delay_seconds=5,
                                                           message_attributes=message_attributes )
    def handle_state_master_role_policy_in_progress(self, spot_master_item, dynamodb_conn ):
        """ Verify the Policy is added to the Role 

        :param spot_master_item: 
        :param dynamodb_conn: 

        """
        logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_role_policy_in_progress')
        iam_conn = awsext.iam.connect_to_region( self.region_name, profile_name=self.profile_name )
        is_role_policy_added = iam_conn.is_role_policy_added( role_name=spot_master_item[ TableSpotMaster.role_name ], 
                                       policy_name=spot_master_item[ TableSpotMaster.policy_name ])
        if not is_role_policy_added: return
        # For some bizarre timing reason, is_role_policy_added can return True but the spot request fails on IAM role not attached to instance profile
        #  - give it a few seconds to clear
        time.sleep(5)
    
        spot_master_state_code = SpotMasterStateCode.waiting_for_instances_complete
        
        # Request spot instances
        spot_instance_requests = submit_request_spot_instances( spot_master_item, self.region_name, self.profile_name )
        
        # Queue up a SpotRequestMsg for each spot request - this will manage all states for SpotRequest        
        if spot_instance_requests != None:
            spot_request_sqs_message_durable = SqsMessageDurable( self.spot_request_queue_name, self.region_name, profile_name=self.profile_name )
            for spot_instance_request in spot_instance_requests:
                spot_request_uuid = str(uuid.uuid1())
                spot_request_msg = SpotRequestMsg( spot_request_uuid=spot_request_uuid, 
                                                   spot_master_uuid=spot_master_item[ TableSpotMaster.spot_master_uuid ], 
                                                   spot_request_msg_type=SpotRequestMsg.TYPE_SPOT_REQUEST_INITIATED, 
                                                   spot_request_id=spot_instance_request.id )
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_SPOT_PRICE ] = str( spot_master_item[TableSpotMaster.cheapest_price])
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_USERNAME ] = spot_master_item[ TableSpotMaster.instance_username ]
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_ATTEMPT_NUMBER ] = 1
                message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageSpotRequestInitiated )
                spot_request_sqs_message_durable.send_message( spot_request_msg.to_json(), message_attributes=message_attributes  )
        else: spot_master_state_code = SpotMasterStateCode.no_instances_available
        
        spot_master_row_partial_save( self.spot_master_table_name, spot_master_item, {
                                                              TableSpotMaster.spot_master_state_code:spot_master_state_code
                                                              },
                                      region_name=self.region_name, profile_name=self.profile_name  )
def main():
    """ """
    import logging.config
    logging.config.fileConfig( '../../../../config/consoleandfile.conf', disable_existing_loggers=False)
    logger = logging.getLogger(__name__)
    
    try:
        logger.info( 'Starting' )
        master_parm_item = MasterParmItem( sys.argv[1] )
         
        spot_master_sqs_message_durable = SqsMessageDurable( awsspotbatch.common.const.SPOT_MASTER_QUEUE_NAME, master_parm_item.region_name, profile_name=master_parm_item.profile_name )
        spot_request_sqs_message_durable = SqsMessageDurable( awsspotbatch.common.const.SPOT_REQUEST_QUEUE_NAME, master_parm_item.region_name, profile_name=master_parm_item.profile_name )

        # TEST TEST TEST - only during development
        spot_master_sqs_message_durable.purge_queue()
        spot_request_sqs_message_durable.purge_queue()
        
        spot_master_dispatcher = SpotMasterDispatcher( region_name=master_parm_item.region_name, 
                                                       profile_name=master_parm_item.profile_name )
        spot_request_dispatcher = SpotRequestDispatcher( region_name=master_parm_item.region_name, 
                                                       profile_name=master_parm_item.profile_name )
        
        spot_master_dispatcher.start()
        spot_request_dispatcher.start()

        spot_master_msg_batch_submit = create_spot_master_msg_batch_submit( sys.argv[2], sys.argv[3] )
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageSubmitBatch )
        spot_master_sqs_message_durable.send_message( spot_master_msg_batch_submit.to_json(),
                                                      message_attributes=message_attributes )
         
        spot_master_dispatcher.join()
        
        logger.info( 'Completed Successfully' )

    except StandardError as e:
        logger.error( e )
        logger.error( traceback.format_exc() )
        sys.exit(8)
    def process( self, message ) :
        """ Try to submit another Spot Request based on the one that just failed

        :param message: SQS Message instance

        """
        try: 
            spot_master_msg = SpotMasterMsg( raw_json=message.get_body() )
            spot_master_uuid = spot_master_msg.spot_master_uuid       
            logger.info( fmt_master_uuid_msg_hdr( spot_master_uuid ) + 'process_resubmit_failed_request')
            dynamodb_conn = boto.dynamodb2.connect_to_region( self.region_name, profile_name=self.profile_name )
            spot_master_table = Table( self.spot_master_table_name, connection=dynamodb_conn ) 
            spot_master_item = spot_master_table.get_item( spot_master_uuid=spot_master_uuid )
            spot_request_table = Table( self.spot_request_table_name, connection=dynamodb_conn ) 
            failed_spot_request_item = spot_request_table.get_item( spot_request_uuid=spot_master_msg.spot_request_uuid )
    
            # Request spot instance
            spot_instance_request = self.resubmit_failed_request_spot_instance( spot_master_item, failed_spot_request_item, dynamodb_conn )
    
            # Queue up a SpotRequestMsg     
            if spot_instance_request != None:
                spot_request_uuid = str(uuid.uuid1())
                spot_request_msg = SpotRequestMsg( spot_request_uuid=spot_request_uuid, 
                                                   spot_master_uuid=spot_master_item[ TableSpotMaster.spot_master_uuid ], 
                                                   spot_request_msg_type=SpotRequestMsg.TYPE_SPOT_REQUEST_INITIATED, 
                                                   spot_request_id=spot_instance_request.id )
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_SPOT_PRICE ] = str( spot_instance_request.price )
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_USERNAME ] = spot_master_item[ TableSpotMaster.instance_username ]
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_ATTEMPT_NUMBER ] = int( failed_spot_request_item[ TableSpotRequest.attempt_number ] + 1 )
                
                spot_request_sqs_message_durable = SqsMessageDurable( self.spot_request_queue_name, self.region_name, profile_name=self.profile_name )
                spot_request_sqs_message_durable.send_message( spot_request_msg.to_json(), message_attributes=create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageSpotRequestInitiated ) )
                self.spot_master_sqs_message_durable.delete_message(message) 
            # No instances available - resubmit this message with a delay timer so it will get reprocessed in future
            else:
                logger.warning( fmt_master_uuid_msg_hdr( spot_master_uuid ) + 'No spot instances available, will try again in ' + str(awsspotbatch.common.const.NO_SPOT_INSTANCES_AVAILABLE_RECHECK_MINUTES) + ' minutes')
                delay_seconds = awsspotbatch.common.const.NO_SPOT_INSTANCES_AVAILABLE_RECHECK_MINUTES * 60
                self.spot_master_sqs_message_durable.send_message( message.get_body(), 
                                                                   message_attributes=create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageResubmitFailedRequest ), 
                                                                   delay_seconds=delay_seconds )
                self.spot_master_sqs_message_durable.delete_message(message)

        except StandardError as e:
            logger.error( fmt_master_item_msg_hdr( spot_master_item ) + str(e) )
            logger.error( fmt_master_item_msg_hdr( spot_master_item ) + traceback.format_exc() )
Ejemplo n.º 12
0
def main():     
    """ """
    if( len(sys.argv) < 2 ):
        print 'Invalid format, execution cancelled'
        print 'Correct format: python awsspotbatch.spotclientlaunch <parmFile.json>'
        sys.exit(8)
    
    logging.basicConfig( format='%(asctime)s [%(levelname)s] [%(module)s] [%(funcName)s] [%(message)s]', level=logging.INFO )
    logger = logging.getLogger(__name__)

    try:
        spot_client_parm_item = SpotClientParmItem( pathInParmFile=sys.argv[1] )    
        logger.info( 'Starting, region_name=' + spot_client_parm_item.region_name )
        spot_instance_status_thread = SpotInstanceStatusThread( 
                                                             spot_client_parm_item.spot_request_queue_name, 
                                                             spot_client_parm_item.region_name, 
                                                             spot_request_uuid=spot_client_parm_item.spot_request_uuid, 
                                                             spot_master_uuid=spot_client_parm_item.spot_master_uuid, 
                                                             spot_request_id=spot_client_parm_item.spot_request_id )
        spot_instance_status_thread.start()
        child_process = subprocess.Popen( spot_client_parm_item.script_name_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE )
        std_out, std_err = child_process.communicate( )
        returncode = child_process.returncode   
        std_out, std_err = awsspotbatch.common.util.trimStdOutErrSqsPayload( std_out, std_err )
        sqs_message_send_durable = SqsMessageDurable( spot_client_parm_item.spot_request_queue_name,
                                                      spot_client_parm_item.region_name)
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageInstanceBatchProcessComplete )
        sqs_message_send_durable.send_message( SpotRequestMsg( spot_request_uuid=spot_client_parm_item.spot_request_uuid,
                                                               spot_master_uuid=spot_client_parm_item.spot_master_uuid,
                                                               spot_request_msg_type=SpotRequestMsg.TYPE_INSTANCE_BATCH_PROCESS_COMPLETE,
                                                               spot_request_id=spot_client_parm_item.spot_request_id,
                                                               name_value_pairs={
                                                                                 SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_COMPLETE_TIMESTAMP:int(time.time()),
                                                                                 SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_RETURNCODE:str(returncode),
                                                                                 SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_OUT:std_out,
                                                                                 SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_ERR:std_err                                                                                        
                                                                                 } ).to_json(),
                                              message_attributes=message_attributes )
        spot_instance_status_thread.shutdown()
        spot_instance_status_thread.join( 60 )
        logger.info( 'Completed Successfully, child_process returncode=' + str(returncode) )

    except StandardError as e:
        spot_instance_status_thread.is_shutdown = True;
        message = ''
        for arg in e.args:
            message = arg + '|'
        logger.error( message )
        logger.error( traceback.format_exc() )
        sqs_message_send_durable = SqsMessageDurable( spot_client_parm_item.spot_request_queue_name,
                                                      spot_client_parm_item.region_name)
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageInstanceBatchProcessStartException )
        sqs_message_send_durable.send_message( SpotRequestMsg( spot_request_uuid=spot_client_parm_item.spot_request_uuid,
                                                               spot_master_uuid=spot_client_parm_item.spot_master_uuid,
                                                               spot_request_msg_type=SpotRequestMsg.TYPE_INSTANCE_BATCH_PROCESS_START_EXCEPTION,
                                                               spot_request_id=spot_client_parm_item.spot_request_id,
                                                               name_value_pairs={
                                                                                 SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_COMPLETE_TIMESTAMP:int(time.time()),
                                                                                 SpotRequestMsg.PAIR_NAME_INSTANCE_BATCH_PROCESS_START_EXCEPTION_MESSAGE:message,
                                                                                 SpotRequestMsg.PAIR_NAME_INSTANCE_BATCH_PROCESS_START_EXCEPTION_TRACEBACK:traceback.format_exc(),                                          
                                                                                 } ).to_json(),
                                              message_attributes=message_attributes  )
        spot_instance_status_thread.join( 60 )
        
        sys.exit(8)