def check_spot_termination(self):
        """Check if a spot instance termination was initiated by the cloud.

        There are few different methods how to detect this event in GCE:

            https://cloud.google.com/compute/docs/instances/create-start-preemptible-instance#detecting_if_an_instance_was_preempted

        but we use internal metadata because the getting of zone operations is not implemented in Apache Libcloud yet.
        """
        try:
            result = self.remoter.run(
                'curl "http://metadata.google.internal/computeMetadata/v1/instance/preempted'
                '?wait_for_change=true&timeout_sec=%d" -H "Metadata-Flavor: Google"'
                % SPOT_TERMINATION_METADATA_CHECK_TIMEOUT,
                verbose=False)
            status = result.stdout.strip()
        except Exception as details:  # pylint: disable=broad-except
            self.log.warning(
                'Error during getting spot termination notification %s',
                details)
            return 0
        preempted = status.lower() == 'true'
        if preempted and not self._preempted_last_state:
            self.log.warning('Got spot termination notification from GCE')
            SpotTerminationEvent(node=self,
                                 message='Instance was preempted.').publish()
        self._preempted_last_state = preempted
        return SPOT_TERMINATION_CHECK_DELAY
    def check_spot_termination(self):
        try:
            result = self.remoter.run(
                'curl http://169.254.169.254/latest/meta-data/spot/instance-action', verbose=False)
            status = result.stdout.strip()
        except Exception as details:  # pylint: disable=broad-except
            self.log.warning('Error during getting spot termination notification %s', details)
            return 0

        if '404 - Not Found' in status:
            return 0

        self.log.warning('Got spot termination notification from AWS %s', status)
        terminate_action = json.loads(status)
        terminate_action_timestamp = time.mktime(datetime.strptime(
            terminate_action['time'], "%Y-%m-%dT%H:%M:%SZ").timetuple())
        next_check_delay = terminate_action['time-left'] = terminate_action_timestamp - time.time()
        SpotTerminationEvent(node=self, message=terminate_action).publish()

        return max(next_check_delay - SPOT_TERMINATION_CHECK_OVERHEAD, 0)
Beispiel #3
0
    def test_file_logger(self):
        start_events_logger(_registry=self.events_processes_registry)
        file_logger = get_events_logger(_registry=self.events_processes_registry)

        time.sleep(EVENTS_SUBSCRIBERS_START_DELAY)

        try:
            self.assertIsInstance(file_logger, EventsFileLogger)
            self.assertTrue(file_logger.is_alive())
            self.assertEqual(file_logger._registry, self.events_main_device._registry)
            self.assertEqual(file_logger._registry, self.events_processes_registry)

            event_normal = SpotTerminationEvent(node="n1", message="m1")
            event_normal.severity = Severity.NORMAL
            event_warning = SpotTerminationEvent(node="n2", message="m2")
            event_warning.severity = Severity.WARNING
            event_error = SpotTerminationEvent(node="n3", message="m3")
            event_error.severity = Severity.ERROR
            event_critical = SpotTerminationEvent(node="n4", message="m4")
            event_critical.severity = Severity.CRITICAL

            with self.wait_for_n_events(file_logger, count=10, timeout=3):
                self.events_main_device.publish_event(event_normal)
                self.events_main_device.publish_event(event_warning)
                self.events_main_device.publish_event(event_warning)
                self.events_main_device.publish_event(event_error)
                self.events_main_device.publish_event(event_error)
                self.events_main_device.publish_event(event_error)
                self.events_main_device.publish_event(event_critical)
                self.events_main_device.publish_event(event_critical)
                self.events_main_device.publish_event(event_critical)
                self.events_main_device.publish_event(event_critical)

            self.assertEqual(self.events_main_device.events_counter, file_logger.events_counter)

            summary = get_logger_event_summary(_registry=self.events_processes_registry)
            self.assertDictEqual(summary, {Severity.NORMAL.name: 1,
                                           Severity.WARNING.name: 2,
                                           Severity.ERROR.name: 3,
                                           Severity.CRITICAL.name: 4, })

            grouped = get_events_grouped_by_category(_registry=self.events_processes_registry)
            self.assertEqual(len(grouped[Severity.NORMAL.name]), 1)
            self.assertEqual(len(grouped[Severity.WARNING.name]), 2)
            self.assertEqual(len(grouped[Severity.ERROR.name]), 3)
            self.assertEqual(len(grouped[Severity.CRITICAL.name]), 4)
        finally:
            file_logger.stop(timeout=1)
 def test_spot_termination_event(self):
     event = SpotTerminationEvent(node="node1", message="m1")
     self.assertEqual(
         str(event),
         "(SpotTerminationEvent Severity.CRITICAL): node=node1 message=m1")
     self.assertEqual(event, pickle.loads(pickle.dumps(event)))
    def test_file_logger(self) -> None:
        event_normal = SpotTerminationEvent(node="n1", message="m1")
        event_normal.severity = Severity.NORMAL
        event_warning = SpotTerminationEvent(node="n2", message="m2")
        event_warning.severity = Severity.WARNING
        event_error = SpotTerminationEvent(node="n3", message="m3")
        event_error.severity = Severity.ERROR
        event_critical = SpotTerminationEvent(node="n4", message="m4")
        event_critical.severity = Severity.CRITICAL
        event_debug = SpotTerminationEvent(node="n5", message="m5")
        event_debug.severity = Severity.DEBUG

        with self.wait_for_n_events(self.file_logger, count=15, timeout=3):
            self.events_main_device.publish_event(event_normal)
            self.events_main_device.publish_event(event_warning)
            self.events_main_device.publish_event(event_warning)
            self.events_main_device.publish_event(event_error)
            self.events_main_device.publish_event(event_error)
            self.events_main_device.publish_event(event_error)
            self.events_main_device.publish_event(event_critical)
            self.events_main_device.publish_event(event_critical)
            self.events_main_device.publish_event(event_critical)
            self.events_main_device.publish_event(event_critical)
            self.events_main_device.publish_event(event_debug)
            self.events_main_device.publish_event(event_debug)
            self.events_main_device.publish_event(event_debug)
            self.events_main_device.publish_event(event_debug)
            self.events_main_device.publish_event(event_debug)

        self.assertEqual(self.events_main_device.events_counter,
                         self.file_logger.events_counter)

        summary = get_logger_event_summary(
            _registry=self.events_processes_registry)
        self.assertDictEqual(
            summary, {
                Severity.NORMAL.name: 1,
                Severity.WARNING.name: 2,
                Severity.ERROR.name: 3,
                Severity.CRITICAL.name: 4,
                Severity.DEBUG.name: 5,
            })

        grouped = get_events_grouped_by_category(
            _registry=self.events_processes_registry)
        self.assertEqual(len(grouped[Severity.NORMAL.name]), 1)
        self.assertEqual(len(grouped[Severity.WARNING.name]), 2)
        self.assertEqual(len(grouped[Severity.ERROR.name]), 3)
        self.assertEqual(len(grouped[Severity.CRITICAL.name]), 4)
        self.assertEqual(len(grouped[Severity.DEBUG.name]), 5)