Esempio n. 1
0
def _postprocess_yaml(data):
    '''
    This function is a hack to let us use datetimes in JSON-formatted feedback
    objects. Otherwise the datetimes will remain strings after loading the YAML.
    Modifies the YAML object directly.
    It's also used for any other YAML massaging.
    '''

    TIMESTAMP_SUFFIX = '!!timestamp'

    # First just collect the paths to change, so we're not modifying while
    # walking the object (which might risk the walk changing...?).
    timestamps = [(path, val) for path, val in utils.objwalk(data)
                  if str(path[-1]).endswith(TIMESTAMP_SUFFIX)]

    # Replace the timestamp strings with actual datetimes and change the key name.
    for path, val in timestamps:
        new_path = list(path[:-1])
        new_path.append(path[-1][:path[-1].rindex(TIMESTAMP_SUFFIX)])
        new_val = datetime.datetime.strptime(val, '%Y-%m-%dT%H:%M:%S.%fZ')
        utils.rename_key_in_obj_at_path(data, path, new_path[-1])
        utils.assign_value_to_obj_at_path(data, new_path, new_val)

    #
    # Fix integer-looking IDs
    #
    # If a hex ID happens to have all numbers, YAML will decode it as an
    # integer rather than a string. This could mess up processing later on.
    _ensure_field_is_string(str, data, ('Metadata', 'id'))

    # Fix data type of other fields.
    # For example, if just a number is entered in the feedback text, it should
    # still be interpreted as a string.
    _ensure_field_is_string(unicode, data, ('Feedback', 'email'))
    _ensure_field_is_string(unicode, data, ('Feedback', 'Message', 'text'))
def _postprocess_yaml(data):
    '''
    This function is a hack to let us use datetimes in JSON-formatted feedback
    objects. Otherwise the datetimes will remain strings after loading the YAML.
    Modifies the YAML object directly.
    It's also used for any other YAML massaging.
    '''

    TIMESTAMP_SUFFIX = '!!timestamp'

    # First just collect the paths to change, so we're not modifying while
    # walking the object (which might risk the walk changing...?).
    timestamps = [(path, val) for path, val in utils.objwalk(data)
                  if str(path[-1]).endswith(TIMESTAMP_SUFFIX)]

    # Replace the timestamp strings with actual datetimes and change the key name.
    for path, val in timestamps:
        new_path = list(path[:-1])
        new_path.append(path[-1][:path[-1].rindex(TIMESTAMP_SUFFIX)])
        new_val = datetime.datetime.strptime(val, '%Y-%m-%dT%H:%M:%S.%fZ')
        utils.rename_key_in_obj_at_path(data, path, new_path[-1])
        utils.assign_value_to_obj_at_path(data, new_path, new_val)

    #
    # Fix integer-looking IDs
    #
    # If a hex ID happens to have all numbers, YAML will decode it as an
    # integer rather than a string. This could mess up processing later on.
    data['Metadata']['id'] = str(data['Metadata']['id'])
def _windows_redact_panic_logs(obj, path, val):
    '''
    Redact all panic lines.
    See `_windows_redact_panic_logs_test()` for examples.
    '''
    if isinstance(val, utils.string_types):
        panicLinePrefix = "core panic: "
        if val.startswith(panicLinePrefix):
            utils.assign_value_to_obj_at_path(obj, path,
                                              panicLinePrefix + "[REDACTED]")
def _shorten_ints(data):
    """
    Python can handle integer values (longs) that are larger than MongoDB will store; it
    will throw an exception if an attempt is made to insert them. We'll go through all the
    integer values and convert them to floats if they're too big.
    We have seen this occur with freeVirtualMemoryKB in Windows feedback.
    """
    for path, value in utils.objwalk(data):
        if (type(value) == int
                or type(value) == long) and value > sys.maxsize - 1:
            utils.assign_value_to_obj_at_path(data, path, float(value))
def _redact_upstream_proxy_errors(obj, path, val):
    '''
    Redacts any text which follows the target upstream proxy error string.

    If the diagnostic message is of the format "<prefix>: <json object>",
    as defined by `diagnostic_msg_regex`, then the JSON is deserialized
    and an attempt is made to preserve the JSON structure by traversing
    the values of the dictionary and performing the redaction in place.
    Instead of truncating the text following the upstream proxy error 
    string and breaking the JSON structure. 
    '''
    if isinstance(val, utils.string_types):

        target = "upstreamproxy error: proxyURI url.Parse: parse "

        # An optimization to avoid deserializing the JSON string contained
        # within the diagnostic message if there is no match.
        #
        # Warnings:
        # - This search will fail if the target string is contained
        #   within the inner JSON, but represented with escaped unicode
        #   characters -- ref. https://tools.ietf.org/html/rfc8259#section-8.3.
        #   There is no attempt to address this because we do not currently
        #   expect our clients to generate any diagnostic logs with escaped
        #   unicode characters.
        # - Structural JSON characters will be escaped in the inner JSON and
        #   the target string should reflect this if it is updated to include
        #   any -- ref. https://tools.ietf.org/html/rfc8259#section-7.
        index = val.find(target)
        if index == -1:
            return

        result = diagnostic_msg_regex.match(val)
        if result is not None:
            try:
                j = json.loads(result.group(2))
                redacted = _redact_text_proceeding_target_from_dict(target, j)
                if redacted:
                    redacted_val = result.group(1) + ": " + json.dumps(j)
                    utils.assign_value_to_obj_at_path(obj, path, redacted_val)
                    return
            except ValueError:
                pass

        # Fallback on a less finessed redaction
        redacted_val = val[:index + len(target)] + "<redacted>"
        utils.assign_value_to_obj_at_path(obj, path, redacted_val)
def _ios_vpn_redact_start_tunnel_with_options(obj, path, val):
    '''
    Redact target fields from startTunnelWithOptions log.
    See `_redact_sensitive_values_test()` for examples.
    '''
    if isinstance(val, utils.string_types):

        extensionInfoPrefix = "ExtensionInfo: "

        if val.startswith(extensionInfoPrefix):

            try:
                j = json.loads(val[len(extensionInfoPrefix):])
            except ValueError:
                return

            try:
                event = j["PacketTunnelProvider"]["Event"]

                if event == "Start":

                    redacted = _redact_start_tunnel_with_options(
                        j["PacketTunnelProvider"])

                    if not _validate_start_tunnel_with_options(redacted):
                        # Invalid log, redact for safe measure.
                        utils.assign_value_to_obj_at_path(
                            obj, path, "[REDACTED]")

                    else:
                        redacted_val = extensionInfoPrefix + json.dumps(
                            {"PacketTunnelProvider": redacted})
                        utils.assign_value_to_obj_at_path(
                            obj, path, redacted_val)

            except KeyError:
                return

            except TypeError:
                return
Esempio n. 7
0
def _ensure_field_is_string(stringtype, data, fieldpath):
    prev_val = utils.coalesce(data, fieldpath)
    if prev_val is not None:
        utils.assign_value_to_obj_at_path(data, fieldpath,
                                          stringtype(prev_val))
Esempio n. 8
0
def _ensure_field_is_string(stringtype, data, fieldpath):
    prev_val = utils.coalesce(data, fieldpath)
    if prev_val is not None:
        utils.assign_value_to_obj_at_path(data, fieldpath, stringtype(prev_val))