def process( self, message ) : """ Process the message :param message: SQS Message instance """ try: spot_request_msg = SpotRequestMsg( raw_json=message.get_body() ) spot_request_uuid = spot_request_msg.spot_request_uuid spot_master_uuid = spot_request_msg.spot_master_uuid spot_request_id = spot_request_msg.spot_request_id logger.info( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'process_check_status' ) # Get spot request row from DynamoDB and process based on state spot_request_item = get_spot_request_item( self.spot_request_table_name, spot_request_uuid, region_name=self.region_name, profile_name=self.profile_name ) logger.info( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'spot request state=' + spot_request_item[TableSpotRequest.spot_request_state_code]) next_status_msg_delay_secs = 60 is_send_request_msg_check_status = True spot_request_state_code = spot_request_item[TableSpotRequest.spot_request_state_code] # Update the LastStateCheck timestamp spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, { TableSpotRequest.ts_last_state_check:int( time.time() ), }, region_name=self.region_name, profile_name=self.profile_name ) if SpotRequestStateCode.spot_request_in_progress == spot_request_state_code: self.handle_state_request_spot_request_in_progress( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ) elif SpotRequestStateCode.instance_starting == spot_request_state_code: self.handle_state_request_instance_starting( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ) elif SpotRequestStateCode.instance_running == spot_request_state_code: self.handle_state_request_instance_running( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ) elif SpotRequestStateCode.instance_complete == spot_request_state_code: self.handle_state_request_instance_complete( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ) is_send_request_msg_check_status = False elif SpotRequestStateCode.instance_state_unknown == spot_request_state_code: self.handle_state_request_instance_state_unknown( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ) elif SpotRequestStateCode.constraint_encountered == spot_request_state_code: self.handle_state_request_constraint_encountered( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ) elif SpotRequestStateCode.instance_force_termination_pending == spot_request_state_code: self.handle_state_instance_force_termination_pending( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ) elif SpotRequestStateCode.instance_force_terminated == spot_request_state_code: self.handle_state_request_instance_force_terminated( spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ) is_send_request_msg_check_status = False if is_send_request_msg_check_status: spot_request_msg_check_status = SpotRequestMsg( spot_request_uuid=spot_request_uuid, spot_master_uuid=spot_master_uuid, spot_request_msg_type=SpotRequestMsg.TYPE_CHECK_STATUS, spot_request_id=spot_request_id ) message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageCheckStatus ) self.spot_request_sqs_message_durable.send_message( spot_request_msg_check_status.to_json(), delay_seconds=next_status_msg_delay_secs, message_attributes=message_attributes ) self.spot_request_sqs_message_durable.delete_message(message) except StandardError as e: logger.error( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'Exiting SpotRequestDispatcher due to exception' ) logger.error( fmt_request_uuid_msg_hdr( spot_request_uuid ) + str(e) ) logger.error( fmt_request_uuid_msg_hdr( spot_request_uuid ) + traceback.format_exc() )
def process( self, message ) : """ Start SpotRequest process 1. Create item in SpotRequestItem table 2. queue up potRequestMessageCheckStatus, this will start the state-based process :param message: SQS Message instance """ try: spot_request_msg = SpotRequestMsg( raw_json=message.get_body() ) logger.info( fmt_request_uuid_msg_hdr( spot_request_msg.spot_request_uuid ) + 'process_spot_request_initiated for spot_master_uuid: ' + spot_request_msg.spot_master_uuid ) ts_now = int( time.time() ) dict_create_spot_request_item = { TableSpotRequest.spot_request_uuid:spot_request_msg.spot_request_uuid, TableSpotRequest.spot_master_uuid:spot_request_msg.spot_master_uuid, TableSpotRequest.spot_request_id:spot_request_msg.spot_request_id, TableSpotRequest.ts_last_state_check:ts_now, TableSpotRequest.attempt_number:spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_ATTEMPT_NUMBER ], TableSpotRequest.spot_price:spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_SPOT_PRICE ], TableSpotRequest.instance_username:spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_USERNAME ], TableSpotRequest.is_open:1, TableSpotRequest.spot_request_state_code:SpotRequestStateCode.spot_request_in_progress, TableSpotRequest.ts_start:ts_now, } put_attempt_cnt = 0 put_attempt_max = 10 while True: dynamodb_conn = boto.dynamodb2.connect_to_region( self.region_name, profile_name=self.profile_name ) spot_request_table = Table( self.spot_request_table_name, connection=dynamodb_conn ) result_spot_request_put = spot_request_table.put_item(data=dict_create_spot_request_item) if result_spot_request_put: break put_attempt_cnt += 1 if put_attempt_cnt == put_attempt_max: raise awsspotbatch.common.exception.DynamoDbPutItemMaxAttemptsExceeded('Failed attempt to insert item in: ' + self.spot_request_table_name + ' for spot_request_uuid: ' + spot_request_msg.spot_request_uuid, self.spot_request_table_name ) time.sleep(6) next_status_msg_delay_secs = 30 spot_request_msg_check_status = SpotRequestMsg( spot_request_uuid=spot_request_msg.spot_request_uuid, spot_master_uuid=spot_request_msg.spot_master_uuid, spot_request_msg_type=SpotRequestMsg.TYPE_CHECK_STATUS, spot_request_id=spot_request_msg.spot_request_id ) message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageCheckStatus ) self.spot_request_sqs_message_durable.send_message( spot_request_msg_check_status.to_json(), delay_seconds=next_status_msg_delay_secs, message_attributes=message_attributes ) self.spot_request_sqs_message_durable.delete_message(message) except StandardError as e: logger.error( fmt_request_uuid_msg_hdr( spot_request_msg.spot_request_uuid ) + 'Exiting SpotRequestDispatcher due to exception' ) logger.error( fmt_request_uuid_msg_hdr( spot_request_msg.spot_request_uuid ) + str(e) ) logger.error( fmt_request_uuid_msg_hdr( spot_request_msg.spot_request_uuid ) + traceback.format_exc() )
def process( self, message ) : """ Try to submit another Spot Request based on the one that just failed :param message: SQS Message instance """ try: spot_master_msg = SpotMasterMsg( raw_json=message.get_body() ) spot_master_uuid = spot_master_msg.spot_master_uuid logger.info( fmt_master_uuid_msg_hdr( spot_master_uuid ) + 'process_resubmit_failed_request') dynamodb_conn = boto.dynamodb2.connect_to_region( self.region_name, profile_name=self.profile_name ) spot_master_table = Table( self.spot_master_table_name, connection=dynamodb_conn ) spot_master_item = spot_master_table.get_item( spot_master_uuid=spot_master_uuid ) spot_request_table = Table( self.spot_request_table_name, connection=dynamodb_conn ) failed_spot_request_item = spot_request_table.get_item( spot_request_uuid=spot_master_msg.spot_request_uuid ) # Request spot instance spot_instance_request = self.resubmit_failed_request_spot_instance( spot_master_item, failed_spot_request_item, dynamodb_conn ) # Queue up a SpotRequestMsg if spot_instance_request != None: spot_request_uuid = str(uuid.uuid1()) spot_request_msg = SpotRequestMsg( spot_request_uuid=spot_request_uuid, spot_master_uuid=spot_master_item[ TableSpotMaster.spot_master_uuid ], spot_request_msg_type=SpotRequestMsg.TYPE_SPOT_REQUEST_INITIATED, spot_request_id=spot_instance_request.id ) spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_SPOT_PRICE ] = str( spot_instance_request.price ) spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_USERNAME ] = spot_master_item[ TableSpotMaster.instance_username ] spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_ATTEMPT_NUMBER ] = int( failed_spot_request_item[ TableSpotRequest.attempt_number ] + 1 ) spot_request_sqs_message_durable = SqsMessageDurable( self.spot_request_queue_name, self.region_name, profile_name=self.profile_name ) spot_request_sqs_message_durable.send_message( spot_request_msg.to_json(), message_attributes=create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageSpotRequestInitiated ) ) self.spot_master_sqs_message_durable.delete_message(message) # No instances available - resubmit this message with a delay timer so it will get reprocessed in future else: logger.warning( fmt_master_uuid_msg_hdr( spot_master_uuid ) + 'No spot instances available, will try again in ' + str(awsspotbatch.common.const.NO_SPOT_INSTANCES_AVAILABLE_RECHECK_MINUTES) + ' minutes') delay_seconds = awsspotbatch.common.const.NO_SPOT_INSTANCES_AVAILABLE_RECHECK_MINUTES * 60 self.spot_master_sqs_message_durable.send_message( message.get_body(), message_attributes=create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageResubmitFailedRequest ), delay_seconds=delay_seconds ) self.spot_master_sqs_message_durable.delete_message(message) except StandardError as e: logger.error( fmt_master_item_msg_hdr( spot_master_item ) + str(e) ) logger.error( fmt_master_item_msg_hdr( spot_master_item ) + traceback.format_exc() )
def handle_state_master_role_policy_in_progress(self, spot_master_item, dynamodb_conn ): """ Verify the Policy is added to the Role :param spot_master_item: :param dynamodb_conn: """ logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_role_policy_in_progress') iam_conn = awsext.iam.connect_to_region( self.region_name, profile_name=self.profile_name ) is_role_policy_added = iam_conn.is_role_policy_added( role_name=spot_master_item[ TableSpotMaster.role_name ], policy_name=spot_master_item[ TableSpotMaster.policy_name ]) if not is_role_policy_added: return # For some bizarre timing reason, is_role_policy_added can return True but the spot request fails on IAM role not attached to instance profile # - give it a few seconds to clear time.sleep(5) spot_master_state_code = SpotMasterStateCode.waiting_for_instances_complete # Request spot instances spot_instance_requests = submit_request_spot_instances( spot_master_item, self.region_name, self.profile_name ) # Queue up a SpotRequestMsg for each spot request - this will manage all states for SpotRequest if spot_instance_requests != None: spot_request_sqs_message_durable = SqsMessageDurable( self.spot_request_queue_name, self.region_name, profile_name=self.profile_name ) for spot_instance_request in spot_instance_requests: spot_request_uuid = str(uuid.uuid1()) spot_request_msg = SpotRequestMsg( spot_request_uuid=spot_request_uuid, spot_master_uuid=spot_master_item[ TableSpotMaster.spot_master_uuid ], spot_request_msg_type=SpotRequestMsg.TYPE_SPOT_REQUEST_INITIATED, spot_request_id=spot_instance_request.id ) spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_SPOT_PRICE ] = str( spot_master_item[TableSpotMaster.cheapest_price]) spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_USERNAME ] = spot_master_item[ TableSpotMaster.instance_username ] spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_ATTEMPT_NUMBER ] = 1 message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageSpotRequestInitiated ) spot_request_sqs_message_durable.send_message( spot_request_msg.to_json(), message_attributes=message_attributes ) else: spot_master_state_code = SpotMasterStateCode.no_instances_available spot_master_row_partial_save( self.spot_master_table_name, spot_master_item, { TableSpotMaster.spot_master_state_code:spot_master_state_code }, region_name=self.region_name, profile_name=self.profile_name )