def check_spot_termination(self): """Check if a spot instance termination was initiated by the cloud. There are few different methods how to detect this event in GCE: https://cloud.google.com/compute/docs/instances/create-start-preemptible-instance#detecting_if_an_instance_was_preempted but we use internal metadata because the getting of zone operations is not implemented in Apache Libcloud yet. """ try: result = self.remoter.run( 'curl "http://metadata.google.internal/computeMetadata/v1/instance/preempted' '?wait_for_change=true&timeout_sec=%d" -H "Metadata-Flavor: Google"' % SPOT_TERMINATION_METADATA_CHECK_TIMEOUT, verbose=False) status = result.stdout.strip() except Exception as details: # pylint: disable=broad-except self.log.warning( 'Error during getting spot termination notification %s', details) return 0 preempted = status.lower() == 'true' if preempted and not self._preempted_last_state: self.log.warning('Got spot termination notification from GCE') SpotTerminationEvent(node=self, message='Instance was preempted.') self._preempted_last_state = preempted return SPOT_TERMINATION_CHECK_DELAY
def monitor_aws_termination_thread(self): while True: duration = 5 if self.termination_event.isSet(): break try: self.wait_ssh_up(verbose=False) except Exception as ex: logger.warning( "Unable to connect to '%s'. Probably the node was terminated or is still booting. " "Error details: '%s'", self.name, ex) continue aws_message = self.get_aws_termination_notification() if aws_message: self.log.warning( 'Got spot termination notification from AWS %s' % aws_message) terminate_action = json.loads(aws_message) terminate_action_timestamp = time.mktime( datetime.strptime(terminate_action['time'], "%Y-%m-%dT%H:%M:%SZ").timetuple()) duration = terminate_action_timestamp - time.time() - 15 if duration <= 0: duration = 5 terminate_action[ 'time-left'] = terminate_action_timestamp - time.time() SpotTerminationEvent(node=self, aws_message=terminate_action) time.sleep(duration)
def check_spot_termination(self): try: result = self.remoter.run( 'curl http://169.254.169.254/latest/meta-data/spot/instance-action', verbose=False) status = result.stdout.strip() except Exception as details: # pylint: disable=broad-except self.log.warning( 'Error during getting spot termination notification %s', details) return 0 if '404 - Not Found' in status: return 0 self.log.warning('Got spot termination notification from AWS %s', status) terminate_action = json.loads(status) terminate_action_timestamp = time.mktime( datetime.strptime(terminate_action['time'], "%Y-%m-%dT%H:%M:%SZ").timetuple()) next_check_delay = terminate_action[ 'time-left'] = terminate_action_timestamp - time.time() SpotTerminationEvent(node=self, message=terminate_action) return max(next_check_delay - SPOT_TERMINATION_CHECK_OVERHEAD, 0)
def test_spot_termination(): str( SpotTerminationEvent( node='test', message= '{"action": "terminate", "time": "2017-09-18T08:22:00Z"}'))
def test_spot_termination(self): # pylint: disable=no-self-use str( SpotTerminationEvent( node='test', message= '{"action": "terminate", "time": "2017-09-18T08:22:00Z"}'))