def test_read_output_should_return_no_content(self):
     with patch(
             'azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN',
             0):
         expected = ""
         actual = read_output(self.stdout, self.stderr)
         self.assertEqual(expected, actual)
 def test_read_output_it_should_truncate_the_content(self):
     with patch(
             'azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN',
             10):
         expected = "[stdout]\nThe quick \n\n[stderr]\nThe five b"
         actual = read_output(self.stdout, self.stderr)
         self.assertEqual(expected, actual)
    def test_save_event_message_with_non_ascii_characters(self):
        test_data_dir = os.path.join(data_dir, "events", "collect_and_send_extension_stdout_stderror")
        msg = ""

        with open(os.path.join(test_data_dir, "dummy_stdout_with_non_ascii_characters"), mode="r+b") as stdout:
            with open(os.path.join(test_data_dir, "dummy_stderr_with_non_ascii_characters"), mode="r+b") as stderr:
                msg = read_output(stdout, stderr)

        duration = elapsed_milliseconds(datetime.utcnow())
        log_msg = "{0}\n{1}".format("DummyCmd", "\n".join([line for line in msg.split('\n') if line != ""]))

        with patch("azurelinuxagent.common.event.datetime") as patch_datetime:
            patch_datetime.utcnow = Mock(return_value=datetime.strptime("2019-01-01 01:30:00",
                                                                        '%Y-%m-%d %H:%M:%S'))
            with patch('os.getpid', return_value=42):
                with patch("threading.Thread.getName", return_value="HelloWorldTask"):
                    add_event('test_extension', message=log_msg, duration=duration)

        for tld_file in os.listdir(self.tmp_dir):
            event_str = MonitorHandler.collect_event(os.path.join(self.tmp_dir, tld_file))
            event_json = json.loads(event_str)

            self.assertEqual(len(event_json["parameters"]), 15)

            # Checking the contents passed above, and also validating the default values that were passed in.
            for i in event_json["parameters"]:
                if i["name"] == "Name":
                    self.assertEqual(i["value"], "test_extension")
                elif i["name"] == "Message":
                    self.assertEqual(i["value"], log_msg)
                elif i["name"] == "Version":
                    self.assertEqual(i["value"], str(CURRENT_VERSION))
                elif i['name'] == 'IsInternal':
                    self.assertEqual(i['value'], False)
                elif i['name'] == 'Operation':
                    self.assertEqual(i['value'], 'Unknown')
                elif i['name'] == 'OperationSuccess':
                    self.assertEqual(i['value'], True)
                elif i['name'] == 'Duration':
                    self.assertEqual(i['value'], 0)
                elif i['name'] == 'ExtensionType':
                    self.assertEqual(i['value'], '')
                elif i['name'] == 'ContainerId':
                    self.assertEqual(i['value'], 'UNINITIALIZED')
                elif i['name'] == 'OpcodeName':
                    self.assertEqual(i['value'], '2019-01-01 01:30:00')
                elif i['name'] == 'EventTid':
                    self.assertEqual(i['value'], threading.current_thread().ident)
                elif i['name'] == 'EventPid':
                    self.assertEqual(i['value'], 42)
                elif i['name'] == 'TaskName':
                    self.assertEqual(i['value'], 'HelloWorldTask')
                elif i['name'] == 'KeywordName':
                    self.assertEqual(i['value'], '')
                elif i['name'] == 'GAVersion':
                    self.assertEqual(i['value'], str(CURRENT_AGENT))
                else:
                    self.assertFalse(True, "Contains a field outside the defaults expected. Field Name: {0}".
                                     format(i['name']))
 def test_read_output_should_not_truncate_the_content(self):
     with patch(
             'azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN',
             90):
         expected = "[stdout]\nThe quick brown fox jumps over the lazy dog.\n\n" \
                    "[stderr]\nThe five boxing wizards jump quickly."
         actual = read_output(self.stdout, self.stderr)
         self.assertEqual(expected, actual)
Exemple #5
0
    def start_extension_command(self, extension_name, command, timeout, shell, cwd, env, stdout, stderr,
                                error_code=ExtensionErrorCodes.PluginUnknownFailure):
        scope = "{0}_{1}".format(self._get_extension_cgroup_name(extension_name), uuid.uuid4())

        process = subprocess.Popen(
            "systemd-run --unit={0} --scope {1}".format(scope, command),
            shell=shell,
            cwd=cwd,
            stdout=stdout,
            stderr=stderr,
            env=env,
            preexec_fn=os.setsid)

        scope_name = scope + '.scope'

        logger.info("Started extension in unit '{0}'", scope_name)

        try:
            # systemd-run creates the scope under the system slice by default
            cgroup_relative_path = os.path.join('system.slice', scope_name)

            cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_cgroup_mount_points()

            if cpu_cgroup_mountpoint is None:
                logger.info("The CPU controller is not mounted; will not track resource usage")
            else:
                cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path)
                CGroupsTelemetry.track_cgroup(CpuCgroup(extension_name, cpu_cgroup_path))

            if memory_cgroup_mountpoint is None:
                logger.info("The memory controller is not mounted; will not track resource usage")
            else:
                memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path)
                CGroupsTelemetry.track_cgroup(MemoryCgroup(extension_name, memory_cgroup_path))

        except IOError as e:
            if e.errno == 2:  # 'No such file or directory'
                logger.info("The extension command already completed; will not track resource usage")
            logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e))
        except Exception as e:
            logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e))

        # Wait for process completion or timeout
        try:
            process_output = handle_process_completion(process=process,
                                                       command=command,
                                                       timeout=timeout,
                                                       stdout=stdout,
                                                       stderr=stderr,
                                                       error_code=error_code)
        except ExtensionError as e:
            # The extension didn't terminate successfully. Determine whether it was due to systemd errors or
            # extension errors.
            process_output = read_output(stdout, stderr)
            systemd_failure = self._is_systemd_failure(scope, process_output)

            if not systemd_failure:
                # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error
                raise
            else:
                # There was an issue with systemd-run. We need to log it and retry the extension without systemd.
                err_msg = 'Systemd process exited with code %s and output %s' % (e.exit_code, process_output) \
                    if isinstance(e, ExtensionOperationError) else "Systemd timed-out, output: %s" % process_output
                event_msg = 'Failed to run systemd-run for unit {0}.scope. ' \
                            'Will retry invoking the extension without systemd. ' \
                            'Systemd-run error: {1}'.format(scope, err_msg)
                add_event(op=WALAEventOperation.InvokeCommandUsingSystemd, is_success=False, log_event=False, message=event_msg)
                logger.warn(event_msg)

                # Reset the stdout and stderr
                stdout.truncate(0)
                stderr.truncate(0)

                # Try invoking the process again, this time without systemd-run
                logger.info('Extension invocation using systemd failed, falling back to regular invocation '
                            'without cgroups tracking.')
                process = subprocess.Popen(command,
                                           shell=shell,
                                           cwd=cwd,
                                           env=env,
                                           stdout=stdout,
                                           stderr=stderr,
                                           preexec_fn=os.setsid)

                process_output = handle_process_completion(process=process,
                                                           command=command,
                                                           timeout=timeout,
                                                           stdout=stdout,
                                                           stderr=stderr,
                                                           error_code=error_code)

                return process_output

        # The process terminated in time and successfully
        return process_output
Exemple #6
0
    def start_extension_command(
            self,
            extension_name,
            command,
            cmd_name,
            timeout,
            shell,
            cwd,
            env,
            stdout,
            stderr,
            error_code=ExtensionErrorCodes.PluginUnknownFailure):
        scope = "{0}_{1}".format(cmd_name, uuid.uuid4())
        extension_slice_name = self.get_extension_slice_name(extension_name)
        with self._systemd_run_commands_lock:
            process = subprocess.Popen(  # pylint: disable=W1509
                "systemd-run --unit={0} --scope --slice={1} {2}".format(
                    scope, extension_slice_name, command),
                shell=shell,
                cwd=cwd,
                stdout=stdout,
                stderr=stderr,
                env=env,
                preexec_fn=os.setsid)

            # We start systemd-run with shell == True so process.pid is the shell's pid, not the pid for systemd-run
            self._systemd_run_commands.append(process.pid)

        scope_name = scope + '.scope'

        logger.info("Started extension in unit '{0}'", scope_name)

        try:
            cgroup_relative_path = os.path.join(
                'azure.slice/azure-vmextensions.slice', extension_slice_name)

            cpu_cgroup_mountpoint, _ = self.get_cgroup_mount_points()

            if cpu_cgroup_mountpoint is None:
                logger.info(
                    "The CPU controller is not mounted; will not track resource usage"
                )
            else:
                cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint,
                                               cgroup_relative_path)
                CGroupsTelemetry.track_cgroup(
                    CpuCgroup(extension_name, cpu_cgroup_path))

        except IOError as e:
            if e.errno == 2:  # 'No such file or directory'
                logger.info(
                    "The extension command already completed; will not track resource usage"
                )
            logger.info(
                "Failed to start tracking resource usage for the extension: {0}",
                ustr(e))
        except Exception as e:
            logger.info(
                "Failed to start tracking resource usage for the extension: {0}",
                ustr(e))

        # Wait for process completion or timeout
        try:
            return handle_process_completion(process=process,
                                             command=command,
                                             timeout=timeout,
                                             stdout=stdout,
                                             stderr=stderr,
                                             error_code=error_code)
        except ExtensionError as e:
            # The extension didn't terminate successfully. Determine whether it was due to systemd errors or
            # extension errors.
            if not self._is_systemd_failure(scope, stderr):
                # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error
                raise

            # There was an issue with systemd-run. We need to log it and retry the extension without systemd.
            process_output = read_output(stdout, stderr)
            # Reset the stdout and stderr
            stdout.truncate(0)
            stderr.truncate(0)

            if isinstance(e, ExtensionOperationError):
                # no-member: Instance of 'ExtensionError' has no 'exit_code' member (no-member) - Disabled: e is actually an ExtensionOperationError
                err_msg = 'Systemd process exited with code %s and output %s' % (
                    e.exit_code, process_output)  # pylint: disable=no-member
            else:
                err_msg = "Systemd timed-out, output: %s" % process_output
            raise SystemdRunError(err_msg)
        finally:
            with self._systemd_run_commands_lock:
                self._systemd_run_commands.remove(process.pid)
 def test_read_output_it_should_handle_exceptions(self):
     with patch(
             'azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN',
             "type error"):
         actual = read_output(self.stdout, self.stderr)
         self.assertIn("Cannot read stdout/stderr", actual)
    def start_extension_command(
            self,
            extension_name,
            command,
            timeout,
            shell,
            cwd,
            env,
            stdout,
            stderr,
            error_code=ExtensionErrorCodes.PluginUnknownFailure):
        scope_name = "{0}_{1}".format(
            self._get_extension_cgroup_name(extension_name), uuid.uuid4())

        process = subprocess.Popen("systemd-run --unit={0} --scope {1}".format(
            scope_name, command),
                                   shell=shell,
                                   cwd=cwd,
                                   stdout=stdout,
                                   stderr=stderr,
                                   env=env,
                                   preexec_fn=os.setsid)

        logger.info("Started extension using scope '{0}'", scope_name)
        extension_cgroups = []

        def create_cgroup(controller):
            cgroup_path = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, controller,
                                       'system.slice', scope_name + ".scope")
            extension_cgroups.append(
                CGroup.create(cgroup_path, controller, extension_name))

        self._foreach_controller(
            create_cgroup, 'Cannot create cgroup for extension {0}; '
            'resource usage will not be tracked.'.format(extension_name))
        self.track_cgroups(extension_cgroups)

        # Wait for process completion or timeout
        try:
            process_output = handle_process_completion(process=process,
                                                       command=command,
                                                       timeout=timeout,
                                                       stdout=stdout,
                                                       stderr=stderr,
                                                       error_code=error_code)
        except ExtensionError as e:
            # The extension didn't terminate successfully. Determine whether it was due to systemd errors or
            # extension errors.
            process_output = read_output(stdout, stderr)
            systemd_failure = self._is_systemd_failure(scope_name,
                                                       process_output)

            if not systemd_failure:
                # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error
                raise
            else:
                # There was an issue with systemd-run. We need to log it and retry the extension without systemd.
                err_msg = 'Systemd process exited with code %s and output %s' % (e.exit_code, process_output) \
                    if isinstance(e, ExtensionOperationError) else "Systemd timed-out, output: %s" % process_output
                event_msg = 'Failed to run systemd-run for unit {0}.scope. ' \
                            'Will retry invoking the extension without systemd. ' \
                            'Systemd-run error: {1}'.format(scope_name, err_msg)
                add_event(AGENT_NAME,
                          version=CURRENT_VERSION,
                          op=WALAEventOperation.InvokeCommandUsingSystemd,
                          is_success=False,
                          log_event=False,
                          message=event_msg)
                logger.warn(event_msg)

                # Reset the stdout and stderr
                stdout.truncate(0)
                stderr.truncate(0)

                # Try invoking the process again, this time without systemd-run
                logger.info(
                    'Extension invocation using systemd failed, falling back to regular invocation '
                    'without cgroups tracking.')
                process = subprocess.Popen(command,
                                           shell=shell,
                                           cwd=cwd,
                                           env=env,
                                           stdout=stdout,
                                           stderr=stderr,
                                           preexec_fn=os.setsid)

                process_output = handle_process_completion(
                    process=process,
                    command=command,
                    timeout=timeout,
                    stdout=stdout,
                    stderr=stderr,
                    error_code=error_code)

                return [], process_output

        # The process terminated in time and successfully
        return extension_cgroups, process_output