Example #1
0
    def test_non_ascii_data_non_utf_locale_coding_orjson(self):
        util.set_json_lib("orjson")
        original_data = "čććžšđ"

        # Default (UTF-8) locale
        result = util.json_encode(original_data)
        self.assertEqual(result, '"%s"' % (original_data))

        loaded = util.json_decode(result)
        self.assertEqual(loaded, original_data)

        # Non UTF-8 Locale
        os.environ["LC_ALL"] = "invalid"

        result = util.json_encode(original_data)
        self.assertEqual(result, '"%s"' % (original_data))

        loaded = util.json_decode(result)
        self.assertEqual(loaded, original_data)

        # Invalid UTF-8, should fall back to standard json implementation
        original_data = "\ud800"
        result = util.json_encode(original_data)
        self.assertEqual(result, '"\\ud800"')

        loaded = util.json_decode('"\ud800"')
        self.assertEqual(loaded, original_data)
        def __runtest(library):
            self._setlib(library)

            if library == FALLBACK or not isinstance(obj, (JsonArray, JsonObject)):
                text2 = util.json_encode(obj)
                self.assertEquals(text, text2)
                obj2 = util.json_decode(text2)
                text3 = util.json_encode(obj2)
                self.assertEquals(text, text3)
            else:
                self.assertRaises(TypeError, lambda: util.json_encode(obj))
        def __runtest(library):
            self._setlib(library)

            if library == FALLBACK or not isinstance(obj, (JsonArray, JsonObject)):
                text2 = util.json_encode(obj)
                self.assertEquals(text, text2)
                obj2 = util.json_decode(text2)
                text3 = util.json_encode(obj2)
                self.assertEquals(text, text3)
            else:
                self.assertRaises(TypeError, lambda: util.json_encode(obj))
        def __runtest(library):
            original_lib = util.get_json_lib()

            self._setlib(library)
            try:
                text2 = util.json_encode(obj)
                self.assertEquals(text, text2)
                obj2 = util.json_decode(text2)
                text3 = util.json_encode(obj2)
                self.assertEquals(text, text3)
            finally:
                util._set_json_lib(original_lib)
Example #5
0
    def create_configuration(extra_toplevel_config=None):
        """Creates a blank configuration file with default values. Optionally overwrites top-level key/values.
        Sets api_key to 'fake' unless defined in extra_toplevel_config

        @param extra_toplevel_config: Dict of top-level key/value objects to overwrite.
        @return: The configuration object
        @rtype: Configuration
        """
        config_dir = tempfile.mkdtemp()
        config_file = os.path.join(config_dir, "agentConfig.json")
        config_fragments_dir = os.path.join(config_dir, "configs.d")
        os.makedirs(config_fragments_dir)

        toplevel_config = {"api_key": "fake"}
        if extra_toplevel_config:
            toplevel_config.update(extra_toplevel_config)

        fp = open(config_file, "w")
        fp.write(scalyr_util.json_encode(toplevel_config))
        fp.close()

        default_paths = DefaultPaths(
            "/var/log/scalyr-agent-2",
            "/etc/scalyr-agent-2/agent.json",
            "/var/lib/scalyr-agent-2",
        )

        config = Configuration(config_file, default_paths, None)
        config.parse()

        # we need to delete the config dir when done
        atexit.register(shutil.rmtree, config_dir)

        return config
def _write_bad_checkpoint_file(path):
    """
    Write invalid JSON in file located in "path"
    """
    fp = open(path, "w")
    fp.write(scalyr_util.json_encode("}data{,:,,{}"))
    fp.close()
Example #7
0
 def _format_key_value(self, format, key, value, emit_raw_details,
                       detect_escaped_strings):
     if emit_raw_details or (detect_escaped_strings
                             and value.startswith('"')
                             and value.endswith('"')):
         return format % (key, value)
     else:
         return format % (key, scalyr_util.json_encode(value))
    def test_str_conversion(self):
        self.assertEquals(self.get(1, convert_to=six.text_type), "1")
        self.assertEquals(self.get("ah", convert_to=six.text_type), "ah")
        self.assertEquals(self.get(False, convert_to=six.text_type), "False")
        self.assertEquals(self.get(1.3, convert_to=six.text_type), "1.3")
        self.assertEquals(self.get(1, convert_to=six.text_type), "1")

        test_array = ["a", "b", "c"]

        # str -> ArrayOfStrings (must support different variations)
        arr = ArrayOfStrings(test_array)
        self.assertEquals(self.get("a,b,c", convert_to=ArrayOfStrings), arr)
        self.assertEquals(self.get("a,b,  c", convert_to=ArrayOfStrings), arr)
        self.assertEquals(self.get('"a", "b", "c"', convert_to=ArrayOfStrings),
                          arr)
        self.assertEquals(self.get("'a', 'b', 'c'", convert_to=ArrayOfStrings),
                          arr)
        self.assertEquals(self.get("[a, b, c]", convert_to=ArrayOfStrings),
                          arr)
        self.assertEquals(
            self.get("['a', \"b\", c]", convert_to=ArrayOfStrings), arr)

        # str -> JsonArray
        self.assertEquals(
            self.get(scalyr_util.json_encode(test_array),
                     convert_to=JsonArray),
            JsonArray(*test_array),
        )
        self.assertRaises(
            BadMonitorConfiguration,
            # single quotes are invalid JSON
            lambda: self.assertEquals(
                self.get(six.text_type(test_array), convert_to=JsonArray),
                JsonArray(*test_array),
            ),
        )

        # str -> JsonObject
        test_obj = {"a": 1, "b": "two", "c": [1, 2, 3]}
        self.assertEquals(
            self.get(scalyr_util.json_encode(test_obj), convert_to=JsonObject),
            scalyr_util.json_scalyr_config_decode(
                scalyr_util.json_encode(test_obj)),
        )
Example #9
0
def parse_scalyr_request(payload):
    """Parses a payload encoded with the Scalyr-specific JSON optimizations. The only place these optimizations
    are used are creating `AddEvent` requests.

    NOTE:  This method is very fragile and just does enough conversion to support the tests.  It could lead to
    erroneous results if patterns like "`s" and colons are used in strings content.  It is also not optimized
    for performance.

    :param payload: The request payload
    :type payload: bytes
    :return: The parsed request body
    :rtype: dict
    """
    # Our general strategy is to rewrite the payload to be standard JSON and then use the
    # standard JSON libraries to parse it.  There are two main optimizations we need to undo
    # here: length-prefixed strings and not using quotes around key names in JSON objects.

    # First, look for the length-prefixed strings.  These are marked by "`sXXXX" where XXXX is a four
    # byte integer holding the number of bytes in the string.  This precedes the string.  So we find
    # all of those and replace them with quotes.  We also have to escape the string.
    # NOTE: It is very important all of our regex work against byte strings because our input is in bytes.
    length_prefix_re = re.compile(b"`s(....)", re.DOTALL)

    # Accumulate the rewrite of `payload` here.  We will eventually parse this as standard JSON.
    rewritten_payload = b""
    # The index of `payload` up to which we have processed (copied into `rewritten_payload`).
    last_processed_index = -1

    for x in length_prefix_re.finditer(payload):
        # First add in the bytes between the last processed and the start of this match.
        rewritten_payload += payload[last_processed_index + 1:x.start(0)]
        # Read the 4 bytes that describe the length, which is stored in regex group 1.
        # 2->TODO struct.pack|unpack in python < 2.7.7 does not allow unicode format string.
        length = compat.struct_unpack_unicode(">i", x.group(1))[0]
        # Grab the string content as raw bytes.
        raw_string = payload[x.end(1):x.end(1) + length]
        text_string = raw_string.decode("utf-8", "replace")
        rewritten_payload += scalyr_util.json_encode(text_string, binary=True)
        last_processed_index = x.end(1) + length - 1
    rewritten_payload += payload[last_processed_index + 1:len(payload)]

    # Now convert all places where we do not have quotes around key names to have quotes.
    # This is pretty fragile.. we look for anything like
    #      foo:
    #  and convert it to
    #      "foo":
    rewritten_payload = re.sub(b"([\\w\\-]+):", b'"\\1":', rewritten_payload)

    # NOTE: Special case for Windows where path is C:\ which we don't want to convert
    rewritten_payload = rewritten_payload.replace(b'"C":\\', b"C:\\")
    # do the same for the low-case.
    rewritten_payload = rewritten_payload.replace(b'"c":\\', b"c:\\")

    return scalyr_util.json_decode(rewritten_payload.decode(
        "utf-8", "replace"))
Example #10
0
        def __runtest(library):
            original_lib = util.get_json_lib()

            self._setlib(library)
            try:
                text2 = util.json_encode(obj)
                self.assertEquals(
                    sorted(six.ensure_text(text)),
                    sorted(text2),
                    "%s != %s" % (str(text), str(text2)),
                )
                obj2 = util.json_decode(text2)
                text3 = util.json_encode(obj2)
                self.assertEquals(
                    sorted(six.ensure_text(text)),
                    sorted(text3),
                    "%s != %s" % (str(text), str(text3)),
                )
                obj3 = util.json_decode(text)
                self.assertEquals(obj3, obj)

                # Sanity test to ensure curly brace is always the last character when serializing
                # a dict.
                # Our "rewind to last curly brace" logic in scalyr_agent/scalyr_client.py relies on
                # this behavior.
                values = [
                    {},
                    {
                        "a": "b"
                    },
                    {
                        "a": 1,
                        "b": 2
                    },
                ]

                for value in values:
                    result = util.json_encode(value)
                    self.assertEqual(result[-1], "}")
            finally:
                util.set_json_lib(original_lib)
Example #11
0
    def test_non_ascii_data_non_utf_locale_coding_default_json_lib(self):
        util.set_json_lib("json")
        original_data = "čććžšđ"

        # Default (UTF-8) locale
        result = util.json_encode(original_data)
        self.assertEqual(result,
                         '"\\u010d\\u0107\\u0107\\u017e\\u0161\\u0111"')

        loaded = util.json_decode(result)
        self.assertEqual(loaded, original_data)

        # Non UTF-8 Locale
        os.environ["LC_ALL"] = "invalid"

        result = util.json_encode(original_data)
        self.assertEqual(result,
                         '"\\u010d\\u0107\\u0107\\u017e\\u0161\\u0111"')

        loaded = util.json_decode(result)
        self.assertEqual(loaded, original_data)
Example #12
0
    def format_msg(
        self,
        metric_name,
        metric_value,
        extra_fields=None,
    ):
        string_buffer = StringIO()

        string_buffer.write(
            "%s %s" % (metric_name, scalyr_util.json_encode(metric_value)))

        if extra_fields is not None:
            for field_name in extra_fields:
                field_value = extra_fields[field_name]
                string_buffer.write(
                    " %s=%s" %
                    (field_name, scalyr_util.json_encode(field_value)))

        msg = string_buffer.getvalue()
        string_buffer.close()
        return msg
Example #13
0
    def create_copying_manager(self, config):

        if 'api_key' not in config:
            config['api_key'] = 'fake'

        f = open(self._config_file, "w")
        if f:

            f.write(scalyr_util.json_encode(config))
            f.close()

        default_paths = DefaultPaths(self._log_dir, self._config_file,
                                     self._data_dir)

        configuration = Configuration(self._config_file, default_paths, None)
        configuration.parse()
        self._manager = TestableCopyingManager(configuration, [])
        self._controller = self._manager.controller
    def create_copying_manager(self, config, monitor_agent_log=False):

        if "api_key" not in config:
            config["api_key"] = "fake"

        if not monitor_agent_log:
            config["implicit_agent_log_collection"] = False

        f = open(self._config_file, "w")
        if f:

            f.write(scalyr_util.json_encode(config))
            f.close()

        default_paths = DefaultPaths(self._log_dir, self._config_file,
                                     self._data_dir)

        configuration = Configuration(self._config_file, default_paths, None)
        configuration.parse()
        self._manager = TestableCopyingManager(configuration, [])
        self._controller = self._manager.controller
def _add_non_utf8_to_checkpoint_file(path):
    """
    Add a unicode character to the checkpoint data stored in file located in "path"
    """
    fp = open(path, "r")
    data = scalyr_util.json_decode(fp.read())
    fp.close()
    # 2-> TODO json libraries do not allow serialize bytes string with invalid UTF-8(ujson)or even bytes in general(json).
    # so to test this case we must write non-utf8 byte directly, without serializing.

    # this string will be replaced with invalid utf8 byte after encoding.
    data["test"] = "__replace_me__"

    json_string = scalyr_util.json_encode(data, binary=True)

    # replace prepared substring to invalid byte.
    json_string = json_string.replace(b"__replace_me__", b"\x96")

    fp = open(path, "wb")
    fp.write(json_string)
    fp.close()
Example #16
0
    def run(self):
        """Begins executing the monitor, writing metric output to logger.
        """
        if self.__disable_monitor:
            global_log.info(
                "kubernetes_events_monitor exiting because it has been disabled."
            )
            return

        try:
            self._global_config.k8s_api_url
            self._global_config.k8s_verify_api_queries

            # We only create the k8s_cache while we are the leader
            k8s_cache = None

            if self.__log_watcher:
                self.log_config = self.__log_watcher.add_log_config(
                    self.module_name, self.log_config
                )

            # First instance of k8s api uses the main rate limiter.  Leader election related API calls to the k8s
            # masters will go through this api/rate limiter.
            k8s_api_main = KubernetesApi.create_instance(
                self._global_config, rate_limiter_key="K8S_CACHE_MAIN_RATELIMITER"
            )

            # Second instance of k8s api uses an ancillary ratelimiter (for exclusive use by events monitor)
            k8s_api_events = KubernetesApi.create_instance(
                self._global_config, rate_limiter_key="K8S_EVENTS_RATELIMITER"
            )

            # k8s_cache is initialized with the main rate limiter. However, streaming-related API calls should go
            # through the ancillary ratelimiter. This is achieved by passing ApiQueryOptions with desired rate_limiter.
            k8s_events_query_options = ApiQueryOptions(
                max_retries=self._global_config.k8s_controlled_warmer_max_query_retries,
                rate_limiter=k8s_api_events.default_query_options.rate_limiter,
            )

            pod_name = k8s_api_main.get_pod_name()
            self._node_name = k8s_api_main.get_node_name(pod_name)
            cluster_name = k8s_api_main.get_cluster_name()

            last_event = None
            last_resource = 0

            last_check = time.time() - self._leader_check_interval

            last_reported_leader = None
            while not self._is_thread_stopped():
                current_time = time.time()

                # if we are the leader, we could be going through this loop before the leader_check_interval
                # has expired, so make sure to only check for a new leader if the interval has expired
                if last_check + self._leader_check_interval <= current_time:
                    last_check = current_time
                    # check if we are the leader
                    if not self._is_leader(k8s_api_main):
                        # if not, then sleep and try again
                        global_log.log(
                            scalyr_logging.DEBUG_LEVEL_1,
                            "Leader is %s" % (six.text_type(self._current_leader)),
                        )
                        if (
                            self._current_leader is not None
                            and last_reported_leader != self._current_leader
                        ):
                            global_log.info(
                                "Kubernetes event leader is %s"
                                % six.text_type(self._current_leader)
                            )
                            last_reported_leader = self._current_leader
                        if k8s_cache is not None:
                            k8s_cache.stop()
                            k8s_cache = None
                        self._sleep_but_awaken_if_stopped(self._leader_check_interval)
                        continue

                    global_log.log(
                        scalyr_logging.DEBUG_LEVEL_1,
                        "Leader is %s" % (six.text_type(self._current_leader)),
                    )
                try:
                    if last_reported_leader != self._current_leader:
                        global_log.info("Acting as Kubernetes event leader")
                        last_reported_leader = self._current_leader

                    if k8s_cache is None:
                        # create the k8s cache
                        k8s_cache = k8s_utils.cache(self._global_config)

                    # start streaming events
                    lines = k8s_api_events.stream_events(last_event=last_event)

                    json = {}
                    for line in lines:
                        try:
                            json = scalyr_util.json_decode(line)
                        except Exception as e:
                            global_log.warning(
                                "Error parsing event json: %s, %s, %s"
                                % (line, six.text_type(e), traceback.format_exc())
                            )
                            continue

                        try:
                            # check to see if the resource version we are using has expired
                            if self._is_resource_expired(json):
                                last_event = None
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_1, "K8S resource expired"
                                )
                                continue

                            obj = json.get("object", dict())
                            event_type = json.get("type", "UNKNOWN")

                            # resource version hasn't expired, so update it to the most recently seen version
                            last_event = last_resource

                            metadata = obj.get("metadata", dict())

                            # skip any events with resourceVersions higher than ones we've already seen
                            resource_version = metadata.get("resourceVersion", None)
                            if resource_version is not None:
                                resource_version = int(resource_version)
                            if resource_version and resource_version <= last_resource:
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_2,
                                    "Skipping older resource events",
                                )
                                continue

                            last_resource = resource_version
                            last_event = resource_version

                            # see if this event is about an object we are interested in
                            (kind, namespace, name) = self._get_involved_object(obj)

                            if kind is None:
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_1,
                                    "Ignoring event due to None kind",
                                )
                                continue

                            # exclude any events that don't involve objects we are interested in
                            if (
                                self.__event_object_filter
                                and kind not in self.__event_object_filter
                            ):
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_1,
                                    "Ignoring event due to unknown kind %s - %s"
                                    % (kind, six.text_type(metadata)),
                                )
                                continue

                            # ignore events that belong to namespaces we are not interested in
                            if namespace not in self.__k8s_namespaces_to_include:
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_1,
                                    "Ignoring event due to belonging to an excluded namespace '%s'"
                                    % (namespace),
                                )
                                continue

                            # get cluster and deployment information
                            extra_fields = {
                                "k8s-cluster": cluster_name,
                                "watchEventType": event_type,
                            }
                            if kind:
                                if kind == "Pod":
                                    extra_fields["pod_name"] = name
                                    extra_fields["pod_namespace"] = namespace
                                    pod = k8s_cache.pod(
                                        namespace,
                                        name,
                                        current_time,
                                        query_options=k8s_events_query_options,
                                    )
                                    if pod and pod.controller:
                                        extra_fields[
                                            "k8s-controller"
                                        ] = pod.controller.name
                                        extra_fields["k8s-kind"] = pod.controller.kind
                                elif kind != "Node":
                                    controller = k8s_cache.controller(
                                        namespace,
                                        name,
                                        kind,
                                        current_time,
                                        query_options=k8s_events_query_options,
                                    )
                                    if controller:
                                        extra_fields["k8s-controller"] = controller.name
                                        extra_fields["k8s-kind"] = controller.kind

                            # if so, log to disk
                            self.__disk_logger.info(
                                "event=%s extra=%s"
                                % (
                                    six.text_type(scalyr_util.json_encode(obj)),
                                    six.text_type(
                                        scalyr_util.json_encode(extra_fields)
                                    ),
                                )
                            )

                            # see if we need to check for a new leader
                            if last_check + self._leader_check_interval <= current_time:
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_1,
                                    "Time to check for a new event leader",
                                )
                                break

                        except Exception as e:
                            global_log.exception(
                                "Failed to process single k8s event line due to following exception: %s, %s, %s"
                                % (repr(e), six.text_type(e), traceback.format_exc()),
                                limit_once_per_x_secs=300,
                                limit_key="k8s-stream-events-general-exception",
                            )
                except K8sApiAuthorizationException:
                    global_log.warning(
                        "Could not stream K8s events due to an authorization error.  The "
                        "Scalyr Service Account does not have permission to watch available events.  "
                        "Please recreate the role with the latest definition which can be found "
                        "at https://raw.githubusercontent.com/scalyr/scalyr-agent-2/release/k8s/scalyr-service-account.yaml "
                        "K8s event collection will be disabled until this is resolved.  See the K8s install "
                        "directions for instructions on how to create the role "
                        "https://www.scalyr.com/help/install-agent-kubernetes",
                        limit_once_per_x_secs=300,
                        limit_key="k8s-stream-events-no-permission",
                    )
                except ConnectionError:
                    # ignore these, and just carry on querying in the next loop
                    pass
                except Exception as e:
                    global_log.exception(
                        "Failed to stream k8s events due to the following exception: %s, %s, %s"
                        % (repr(e), six.text_type(e), traceback.format_exc())
                    )

            if k8s_cache is not None:
                k8s_cache.stop()
                k8s_cache = None

        except Exception:
            # TODO:  Maybe remove this catch here and let the higher layer catch it.  However, we do not
            # right now join on the monitor threads, so no one would catch it.  We should change that.
            global_log.exception(
                "Monitor died due to exception:", error_code="failedMonitor"
            )
 def run_benchmark():
     return json_encode(data)