Beispiel #1
0
    def test_non_ascii_data_non_utf_locale_coding_orjson(self):
        util.set_json_lib("orjson")
        original_data = "čććžšđ"

        # Default (UTF-8) locale
        result = util.json_encode(original_data)
        self.assertEqual(result, '"%s"' % (original_data))

        loaded = util.json_decode(result)
        self.assertEqual(loaded, original_data)

        # Non UTF-8 Locale
        os.environ["LC_ALL"] = "invalid"

        result = util.json_encode(original_data)
        self.assertEqual(result, '"%s"' % (original_data))

        loaded = util.json_decode(result)
        self.assertEqual(loaded, original_data)

        # Invalid UTF-8, should fall back to standard json implementation
        original_data = "\ud800"
        result = util.json_encode(original_data)
        self.assertEqual(result, '"\\ud800"')

        loaded = util.json_decode('"\ud800"')
        self.assertEqual(loaded, original_data)
Beispiel #2
0
        def __runtest(library):
            original_lib = util.get_json_lib()

            self._setlib(library)
            try:
                text2 = util.json_encode(obj)
                self.assertEquals(six.ensure_text(text), text2)
                obj2 = util.json_decode(text2)
                text3 = util.json_encode(obj2)
                self.assertEquals(six.ensure_text(text), text3)
                obj3 = util.json_decode(text)
                self.assertEquals(obj3, obj)
            finally:
                util._set_json_lib(original_lib)
Beispiel #3
0
    def query_api(self, path, pretty=0):
        """ Queries the k8s API at 'path', and converts OK responses to JSON objects
        """
        self._ensure_session()
        pretty = 'pretty=%d' % pretty
        if "?" in path:
            pretty = '&%s' % pretty
        else:
            pretty = '?%s' % pretty

        url = self._http_host + path + pretty
        response = self._session.get(url,
                                     verify=self._verify_connection(),
                                     timeout=self._timeout)
        response.encoding = "utf-8"
        if response.status_code != 200:
            global_log.log(
                scalyr_logging.DEBUG_LEVEL_3,
                "Invalid response from K8S API.\n\turl: %s\n\tstatus: %d\n\tresponse length: %d"
                % (url, response.status_code, len(response.text)),
                limit_once_per_x_secs=300,
                limit_key='k8s_api_query')
            raise K8sApiException(
                "Invalid response from Kubernetes API when querying '%s': %s" %
                (path, str(response)))

        return util.json_decode(response.text)
def test_json_encode_with_custom_options(benchmark, json_lib, keys_count,
                                         sort_keys):
    # NOTE: orjson doesn't support sort_keys=True
    if json_lib == "orjson":
        if not six.PY3:
            pytest.skip(
                "Skipping under Python 2, orjson is only available for Python 3"
            )
        elif sort_keys is True:
            pytest.skip("orjson doesn't support sort_keys=True")

    set_json_lib(json_lib)
    scalyr_agent.util.SORT_KEYS = sort_keys

    data = generate_random_dict(keys_count=keys_count)

    def run_benchmark():
        return json_encode(data)

    result = benchmark.pedantic(run_benchmark, iterations=50, rounds=100)

    assert get_json_lib() == json_lib
    assert scalyr_agent.util.SORT_KEYS == sort_keys
    assert isinstance(result, six.text_type)
    assert json_decode(result) == data
Beispiel #5
0
def parse_scalyr_request(payload):
    """Parses a payload encoded with the Scalyr-specific JSON optimizations. The only place these optimizations
    are used are creating `AddEvent` requests.

    NOTE:  This method is very fragile and just does enough conversion to support the tests.  It could lead to
    erroneous results if patterns like "`s" and colons are used in strings content.  It is also not optimized
    for performance.

    :param payload: The request payload
    :type payload: bytes
    :return: The parsed request body
    :rtype: dict
    """
    # Our general strategy is to rewrite the payload to be standard JSON and then use the
    # standard JSON libraries to parse it.  There are two main optimizations we need to undo
    # here: length-prefixed strings and not using quotes around key names in JSON objects.

    # First, look for the length-prefixed strings.  These are marked by "`sXXXX" where XXXX is a four
    # byte integer holding the number of bytes in the string.  This precedes the string.  So we find
    # all of those and replace them with quotes.  We also have to escape the string.
    # NOTE: It is very important all of our regex work against byte strings because our input is in bytes.
    length_prefix_re = re.compile(b"`s(....)", re.DOTALL)

    # Accumulate the rewrite of `payload` here.  We will eventually parse this as standard JSON.
    rewritten_payload = b""
    # The index of `payload` up to which we have processed (copied into `rewritten_payload`).
    last_processed_index = -1

    for x in length_prefix_re.finditer(payload):
        # First add in the bytes between the last processed and the start of this match.
        rewritten_payload += payload[last_processed_index + 1:x.start(0)]
        # Read the 4 bytes that describe the length, which is stored in regex group 1.
        # 2->TODO struct.pack|unpack in python < 2.7.7 does not allow unicode format string.
        length = compat.struct_unpack_unicode(">i", x.group(1))[0]
        # Grab the string content as raw bytes.
        raw_string = payload[x.end(1):x.end(1) + length]
        text_string = raw_string.decode("utf-8", "replace")
        rewritten_payload += scalyr_util.json_encode(text_string, binary=True)
        last_processed_index = x.end(1) + length - 1
    rewritten_payload += payload[last_processed_index + 1:len(payload)]

    # Now convert all places where we do not have quotes around key names to have quotes.
    # This is pretty fragile.. we look for anything like
    #      foo:
    #  and convert it to
    #      "foo":
    rewritten_payload = re.sub(b"([\\w\\-]+):", b'"\\1":', rewritten_payload)

    # NOTE: Special case for Windows where path is C:\ which we don't want to convert
    rewritten_payload = rewritten_payload.replace(b'"C":\\', b"C:\\")
    # do the same for the low-case.
    rewritten_payload = rewritten_payload.replace(b'"c":\\', b"c:\\")

    return scalyr_util.json_decode(rewritten_payload.decode(
        "utf-8", "replace"))
        def __runtest(library):
            self._setlib(library)

            if library == FALLBACK or not isinstance(obj, (JsonArray, JsonObject)):
                text2 = util.json_encode(obj)
                self.assertEquals(text, text2)
                obj2 = util.json_decode(text2)
                text3 = util.json_encode(obj2)
                self.assertEquals(text, text3)
            else:
                self.assertRaises(TypeError, lambda: util.json_encode(obj))
        def __runtest(library):
            self._setlib(library)

            if library == FALLBACK or not isinstance(obj, (JsonArray, JsonObject)):
                text2 = util.json_encode(obj)
                self.assertEquals(text, text2)
                obj2 = util.json_decode(text2)
                text3 = util.json_encode(obj2)
                self.assertEquals(text, text3)
            else:
                self.assertRaises(TypeError, lambda: util.json_encode(obj))
Beispiel #8
0
        def __runtest(library):
            original_lib = util.get_json_lib()

            self._setlib(library)
            try:
                text2 = util.json_encode(obj)
                self.assertEquals(
                    sorted(six.ensure_text(text)),
                    sorted(text2),
                    "%s != %s" % (str(text), str(text2)),
                )
                obj2 = util.json_decode(text2)
                text3 = util.json_encode(obj2)
                self.assertEquals(
                    sorted(six.ensure_text(text)),
                    sorted(text3),
                    "%s != %s" % (str(text), str(text3)),
                )
                obj3 = util.json_decode(text)
                self.assertEquals(obj3, obj)

                # Sanity test to ensure curly brace is always the last character when serializing
                # a dict.
                # Our "rewind to last curly brace" logic in scalyr_agent/scalyr_client.py relies on
                # this behavior.
                values = [
                    {},
                    {
                        "a": "b"
                    },
                    {
                        "a": 1,
                        "b": 2
                    },
                ]

                for value in values:
                    result = util.json_encode(value)
                    self.assertEqual(result[-1], "}")
            finally:
                util.set_json_lib(original_lib)
Beispiel #9
0
    def test_non_ascii_data_non_utf_locale_coding_default_json_lib(self):
        util.set_json_lib("json")
        original_data = "čććžšđ"

        # Default (UTF-8) locale
        result = util.json_encode(original_data)
        self.assertEqual(result,
                         '"\\u010d\\u0107\\u0107\\u017e\\u0161\\u0111"')

        loaded = util.json_decode(result)
        self.assertEqual(loaded, original_data)

        # Non UTF-8 Locale
        os.environ["LC_ALL"] = "invalid"

        result = util.json_encode(original_data)
        self.assertEqual(result,
                         '"\\u010d\\u0107\\u0107\\u017e\\u0161\\u0111"')

        loaded = util.json_decode(result)
        self.assertEqual(loaded, original_data)
Beispiel #10
0
    def query_api(self, path):
        """ Queries the kubelet API at 'path', and converts OK responses to JSON objects
        """
        url = self._http_host + path
        response = self._session.get(url, timeout=self._timeout)
        response.encoding = "utf-8"
        if response.status_code != 200:
            global_log.log(
                scalyr_logging.DEBUG_LEVEL_3,
                "Invalid response from Kubelet API.\n\turl: %s\n\tstatus: %d\n\tresponse length: %d"
                % (url, response.status_code, len(response.text)),
                limit_once_per_x_secs=300,
                limit_key='kubelet_api_query')
            raise KubeletApiException(
                "Invalid response from Kubelet API when querying '%s': %s" %
                (path, str(response)))

        return util.json_decode(response.text)
def _add_non_utf8_to_checkpoint_file(path):
    """
    Add a unicode character to the checkpoint data stored in file located in "path"
    """
    fp = open(path, "r")
    data = scalyr_util.json_decode(fp.read())
    fp.close()
    # 2-> TODO json libraries do not allow serialize bytes string with invalid UTF-8(ujson)or even bytes in general(json).
    # so to test this case we must write non-utf8 byte directly, without serializing.

    # this string will be replaced with invalid utf8 byte after encoding.
    data["test"] = "__replace_me__"

    json_string = scalyr_util.json_encode(data, binary=True)

    # replace prepared substring to invalid byte.
    json_string = json_string.replace(b"__replace_me__", b"\x96")

    fp = open(path, "wb")
    fp.write(json_string)
    fp.close()
Beispiel #12
0
    def run(self):
        """Begins executing the monitor, writing metric output to logger.
        """
        if self.__disable_monitor:
            global_log.info(
                "kubernetes_events_monitor exiting because it has been disabled."
            )
            return

        try:
            self._global_config.k8s_api_url
            self._global_config.k8s_verify_api_queries

            # We only create the k8s_cache while we are the leader
            k8s_cache = None

            if self.__log_watcher:
                self.log_config = self.__log_watcher.add_log_config(
                    self.module_name, self.log_config
                )

            # First instance of k8s api uses the main rate limiter.  Leader election related API calls to the k8s
            # masters will go through this api/rate limiter.
            k8s_api_main = KubernetesApi.create_instance(
                self._global_config, rate_limiter_key="K8S_CACHE_MAIN_RATELIMITER"
            )

            # Second instance of k8s api uses an ancillary ratelimiter (for exclusive use by events monitor)
            k8s_api_events = KubernetesApi.create_instance(
                self._global_config, rate_limiter_key="K8S_EVENTS_RATELIMITER"
            )

            # k8s_cache is initialized with the main rate limiter. However, streaming-related API calls should go
            # through the ancillary ratelimiter. This is achieved by passing ApiQueryOptions with desired rate_limiter.
            k8s_events_query_options = ApiQueryOptions(
                max_retries=self._global_config.k8s_controlled_warmer_max_query_retries,
                rate_limiter=k8s_api_events.default_query_options.rate_limiter,
            )

            pod_name = k8s_api_main.get_pod_name()
            self._node_name = k8s_api_main.get_node_name(pod_name)
            cluster_name = k8s_api_main.get_cluster_name()

            last_event = None
            last_resource = 0

            last_check = time.time() - self._leader_check_interval

            last_reported_leader = None
            while not self._is_thread_stopped():
                current_time = time.time()

                # if we are the leader, we could be going through this loop before the leader_check_interval
                # has expired, so make sure to only check for a new leader if the interval has expired
                if last_check + self._leader_check_interval <= current_time:
                    last_check = current_time
                    # check if we are the leader
                    if not self._is_leader(k8s_api_main):
                        # if not, then sleep and try again
                        global_log.log(
                            scalyr_logging.DEBUG_LEVEL_1,
                            "Leader is %s" % (six.text_type(self._current_leader)),
                        )
                        if (
                            self._current_leader is not None
                            and last_reported_leader != self._current_leader
                        ):
                            global_log.info(
                                "Kubernetes event leader is %s"
                                % six.text_type(self._current_leader)
                            )
                            last_reported_leader = self._current_leader
                        if k8s_cache is not None:
                            k8s_cache.stop()
                            k8s_cache = None
                        self._sleep_but_awaken_if_stopped(self._leader_check_interval)
                        continue

                    global_log.log(
                        scalyr_logging.DEBUG_LEVEL_1,
                        "Leader is %s" % (six.text_type(self._current_leader)),
                    )
                try:
                    if last_reported_leader != self._current_leader:
                        global_log.info("Acting as Kubernetes event leader")
                        last_reported_leader = self._current_leader

                    if k8s_cache is None:
                        # create the k8s cache
                        k8s_cache = k8s_utils.cache(self._global_config)

                    # start streaming events
                    lines = k8s_api_events.stream_events(last_event=last_event)

                    json = {}
                    for line in lines:
                        try:
                            json = scalyr_util.json_decode(line)
                        except Exception as e:
                            global_log.warning(
                                "Error parsing event json: %s, %s, %s"
                                % (line, six.text_type(e), traceback.format_exc())
                            )
                            continue

                        try:
                            # check to see if the resource version we are using has expired
                            if self._is_resource_expired(json):
                                last_event = None
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_1, "K8S resource expired"
                                )
                                continue

                            obj = json.get("object", dict())
                            event_type = json.get("type", "UNKNOWN")

                            # resource version hasn't expired, so update it to the most recently seen version
                            last_event = last_resource

                            metadata = obj.get("metadata", dict())

                            # skip any events with resourceVersions higher than ones we've already seen
                            resource_version = metadata.get("resourceVersion", None)
                            if resource_version is not None:
                                resource_version = int(resource_version)
                            if resource_version and resource_version <= last_resource:
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_2,
                                    "Skipping older resource events",
                                )
                                continue

                            last_resource = resource_version
                            last_event = resource_version

                            # see if this event is about an object we are interested in
                            (kind, namespace, name) = self._get_involved_object(obj)

                            if kind is None:
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_1,
                                    "Ignoring event due to None kind",
                                )
                                continue

                            # exclude any events that don't involve objects we are interested in
                            if (
                                self.__event_object_filter
                                and kind not in self.__event_object_filter
                            ):
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_1,
                                    "Ignoring event due to unknown kind %s - %s"
                                    % (kind, six.text_type(metadata)),
                                )
                                continue

                            # ignore events that belong to namespaces we are not interested in
                            if namespace not in self.__k8s_namespaces_to_include:
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_1,
                                    "Ignoring event due to belonging to an excluded namespace '%s'"
                                    % (namespace),
                                )
                                continue

                            # get cluster and deployment information
                            extra_fields = {
                                "k8s-cluster": cluster_name,
                                "watchEventType": event_type,
                            }
                            if kind:
                                if kind == "Pod":
                                    extra_fields["pod_name"] = name
                                    extra_fields["pod_namespace"] = namespace
                                    pod = k8s_cache.pod(
                                        namespace,
                                        name,
                                        current_time,
                                        query_options=k8s_events_query_options,
                                    )
                                    if pod and pod.controller:
                                        extra_fields[
                                            "k8s-controller"
                                        ] = pod.controller.name
                                        extra_fields["k8s-kind"] = pod.controller.kind
                                elif kind != "Node":
                                    controller = k8s_cache.controller(
                                        namespace,
                                        name,
                                        kind,
                                        current_time,
                                        query_options=k8s_events_query_options,
                                    )
                                    if controller:
                                        extra_fields["k8s-controller"] = controller.name
                                        extra_fields["k8s-kind"] = controller.kind

                            # if so, log to disk
                            self.__disk_logger.info(
                                "event=%s extra=%s"
                                % (
                                    six.text_type(scalyr_util.json_encode(obj)),
                                    six.text_type(
                                        scalyr_util.json_encode(extra_fields)
                                    ),
                                )
                            )

                            # see if we need to check for a new leader
                            if last_check + self._leader_check_interval <= current_time:
                                global_log.log(
                                    scalyr_logging.DEBUG_LEVEL_1,
                                    "Time to check for a new event leader",
                                )
                                break

                        except Exception as e:
                            global_log.exception(
                                "Failed to process single k8s event line due to following exception: %s, %s, %s"
                                % (repr(e), six.text_type(e), traceback.format_exc()),
                                limit_once_per_x_secs=300,
                                limit_key="k8s-stream-events-general-exception",
                            )
                except K8sApiAuthorizationException:
                    global_log.warning(
                        "Could not stream K8s events due to an authorization error.  The "
                        "Scalyr Service Account does not have permission to watch available events.  "
                        "Please recreate the role with the latest definition which can be found "
                        "at https://raw.githubusercontent.com/scalyr/scalyr-agent-2/release/k8s/scalyr-service-account.yaml "
                        "K8s event collection will be disabled until this is resolved.  See the K8s install "
                        "directions for instructions on how to create the role "
                        "https://www.scalyr.com/help/install-agent-kubernetes",
                        limit_once_per_x_secs=300,
                        limit_key="k8s-stream-events-no-permission",
                    )
                except ConnectionError:
                    # ignore these, and just carry on querying in the next loop
                    pass
                except Exception as e:
                    global_log.exception(
                        "Failed to stream k8s events due to the following exception: %s, %s, %s"
                        % (repr(e), six.text_type(e), traceback.format_exc())
                    )

            if k8s_cache is not None:
                k8s_cache.stop()
                k8s_cache = None

        except Exception:
            # TODO:  Maybe remove this catch here and let the higher layer catch it.  However, we do not
            # right now join on the monitor threads, so no one would catch it.  We should change that.
            global_log.exception(
                "Monitor died due to exception:", error_code="failedMonitor"
            )
 def parse_file_as_json(self, filename):
     result = {}
     with open(filename, "r") as f:
         content = f.read()
         result = scalyr_util.json_decode(content)
     return result
 def parse_file_as_json( self, filename ):
     result = {}
     with open( filename, 'r' ) as f:
         content = f.read()
         result = scalyr_util.json_decode( content )
     return result
 def run_benchmark():
     return json_decode(data)