Example #1
0
def _create_collect_logs_handler(iterations=1, systemd_present=True):
    """
    Creates an instance of CollectLogsHandler that
        * Uses a mock_wire_protocol for network requests,
        * Runs its main loop only the number of times given in the 'iterations' parameter, and
        * Does not sleep at the end of each iteration

    The returned CollectLogsHandler is augmented with 2 methods:
        * get_mock_wire_protocol() - returns the mock protocol
        * run_and_wait() - invokes run() and wait() on the CollectLogsHandler

    """
    with mock_wire_protocol(DATA_FILE) as protocol:
        protocol_util = MagicMock()
        protocol_util.get_protocol = Mock(return_value=protocol)
        with patch("azurelinuxagent.ga.collect_logs.get_protocol_util", return_value=protocol_util):
            with patch("azurelinuxagent.ga.collect_logs.CollectLogsHandler.stopped", side_effect=[False] * iterations + [True]):
                with patch("time.sleep"):
                    with patch("azurelinuxagent.common.osutil.systemd.is_systemd", return_value=systemd_present):
                        with patch("azurelinuxagent.ga.collect_logs.conf.get_collect_logs", return_value=True):
                            def run_and_wait():
                                collect_logs_handler.run()
                                collect_logs_handler.join()

                            collect_logs_handler = get_collect_logs_handler()
                            collect_logs_handler.get_mock_wire_protocol = lambda: protocol
                            collect_logs_handler.run_and_wait = run_and_wait
                            yield collect_logs_handler
Example #2
0
def _create_collect_logs_handler(iterations=1,
                                 cgroups_enabled=True,
                                 collect_logs_conf=True):
    """
    Creates an instance of CollectLogsHandler that
        * Uses a mock_wire_protocol for network requests,
        * Runs its main loop only the number of times given in the 'iterations' parameter, and
        * Does not sleep at the end of each iteration

    The returned CollectLogsHandler is augmented with 2 methods:
        * get_mock_wire_protocol() - returns the mock protocol
        * run_and_wait() - invokes run() and wait() on the CollectLogsHandler

    """
    with mock_wire_protocol(DATA_FILE) as protocol:
        protocol_util = MagicMock()
        protocol_util.get_protocol = Mock(return_value=protocol)
        with patch("azurelinuxagent.ga.collect_logs.get_protocol_util",
                   return_value=protocol_util):
            with patch(
                    "azurelinuxagent.ga.collect_logs.CollectLogsHandler.stopped",
                    side_effect=[False] * iterations + [True]):
                with patch("time.sleep"):

                    # Grab the singleton to patch it
                    cgroups_configurator_singleton = CGroupConfigurator.get_instance(
                    )
                    with patch.object(cgroups_configurator_singleton,
                                      "enabled",
                                      return_value=cgroups_enabled):
                        with patch(
                                "azurelinuxagent.ga.collect_logs.conf.get_collect_logs",
                                return_value=collect_logs_conf):

                            def run_and_wait():
                                collect_logs_handler.run()
                                collect_logs_handler.join()

                            collect_logs_handler = get_collect_logs_handler()
                            collect_logs_handler.get_mock_wire_protocol = lambda: protocol
                            collect_logs_handler.run_and_wait = run_and_wait
                            yield collect_logs_handler
Example #3
0
    def run(self, debug=False):
        """
        This is the main loop which watches for agent and extension updates.
        """

        try:
            logger.info(u"Agent {0} is running as the goal state agent",
                        CURRENT_AGENT)

            #
            # Fetch the goal state one time; some components depend on information provided by the goal state and this
            # call ensures the required info is initialized (e.g telemetry depends on the container ID.)
            #
            protocol = self.protocol_util.get_protocol()
            protocol.update_goal_state()

            # Initialize the common parameters for telemetry events
            initialize_event_logger_vminfo_common_parameters(protocol)

            # Log OS-specific info.
            os_info_msg = u"Distro: {dist_name}-{dist_ver}; "\
                u"OSUtil: {util_name}; AgentService: {service_name}; "\
                u"Python: {py_major}.{py_minor}.{py_micro}; "\
                u"systemd: {systemd}; "\
                u"LISDrivers: {lis_ver}; "\
                u"logrotate: {has_logrotate};".format(
                    dist_name=DISTRO_NAME, dist_ver=DISTRO_VERSION,
                    util_name=type(self.osutil).__name__,
                    service_name=self.osutil.service_name,
                    py_major=PY_VERSION_MAJOR, py_minor=PY_VERSION_MINOR,
                    py_micro=PY_VERSION_MICRO, systemd=systemd.is_systemd(),
                    lis_ver=get_lis_version(), has_logrotate=has_logrotate()
            )

            logger.info(os_info_msg)
            add_event(AGENT_NAME,
                      op=WALAEventOperation.OSInfo,
                      message=os_info_msg)

            #
            # Perform initialization tasks
            #
            from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state
            exthandlers_handler = get_exthandlers_handler(protocol)
            migrate_handler_state()

            from azurelinuxagent.ga.remoteaccess import get_remote_access_handler
            remote_access_handler = get_remote_access_handler(protocol)

            self._ensure_no_orphans()
            self._emit_restart_event()
            self._emit_changes_in_default_configuration()
            self._ensure_partition_assigned()
            self._ensure_readonly_files()
            self._ensure_cgroups_initialized()
            self._ensure_extension_telemetry_state_configured_properly(
                protocol)
            self._ensure_firewall_rules_persisted(
                dst_ip=protocol.get_endpoint())

            # Get all thread handlers
            telemetry_handler = get_send_telemetry_events_handler(
                self.protocol_util)
            all_thread_handlers = [
                get_monitor_handler(),
                get_env_handler(), telemetry_handler,
                get_collect_telemetry_events_handler(telemetry_handler)
            ]

            if is_log_collection_allowed():
                all_thread_handlers.append(get_collect_logs_handler())

            # Launch all monitoring threads
            for thread_handler in all_thread_handlers:
                thread_handler.run()

            goal_state_interval = conf.get_goal_state_period(
            ) if conf.get_extensions_enabled(
            ) else GOAL_STATE_INTERVAL_DISABLED

            while self.running:
                #
                # Check that the parent process (the agent's daemon) is still running
                #
                if not debug and self._is_orphaned:
                    logger.info("Agent {0} is an orphan -- exiting",
                                CURRENT_AGENT)
                    break

                #
                # Check that all the threads are still running
                #
                for thread_handler in all_thread_handlers:
                    if not thread_handler.is_alive():
                        logger.warn("{0} thread died, restarting".format(
                            thread_handler.get_thread_name()))
                        thread_handler.start()

                #
                # Process the goal state
                #
                if not protocol.try_update_goal_state():
                    self._heartbeat_update_goal_state_error_count += 1
                else:
                    if self._upgrade_available(protocol):
                        available_agent = self.get_latest_agent()
                        if available_agent is None:
                            logger.info(
                                "Agent {0} is reverting to the installed agent -- exiting",
                                CURRENT_AGENT)
                        else:
                            logger.info(
                                u"Agent {0} discovered update {1} -- exiting",
                                CURRENT_AGENT, available_agent.name)
                        break

                    utc_start = datetime.utcnow()

                    last_etag = exthandlers_handler.last_etag
                    exthandlers_handler.run()

                    remote_access_handler.run()

                    if last_etag != exthandlers_handler.last_etag:
                        self._ensure_readonly_files()
                        duration = elapsed_milliseconds(utc_start)
                        activity_id, correlation_id, gs_creation_time = exthandlers_handler.get_goal_state_debug_metadata(
                        )
                        msg = 'ProcessGoalState completed [Incarnation: {0}; {1} ms; Activity Id: {2}; Correlation Id: {3}; GS Creation Time: {4}]'.format(
                            exthandlers_handler.last_etag, duration,
                            activity_id, correlation_id, gs_creation_time)
                        logger.info(msg)
                        add_event(AGENT_NAME,
                                  op=WALAEventOperation.ProcessGoalState,
                                  duration=duration,
                                  message=msg)

                self._send_heartbeat_telemetry(protocol)
                time.sleep(goal_state_interval)

        except Exception as error:
            msg = u"Agent {0} failed with exception: {1}".format(
                CURRENT_AGENT, ustr(error))
            self._set_sentinel(msg=msg)
            logger.warn(msg)
            logger.warn(traceback.format_exc())
            sys.exit(1)
            # additional return here because sys.exit is mocked in unit tests
            return

        self._shutdown()
        sys.exit(0)