예제 #1
0
    def test25array_machine_started_from_json_machine(self):
        """Test if the array_machine is started from the json_machine."""
        def check_value(data):  # skipcq: PY-D0003
            self.assertEqual(data, compare_value)

        value = b'{"values_array": ["string", 22, 22.50, true, false, null]}'
        compare_value = {
            'values_array': ['string', 22, 22.5, True, False, None]
        }
        state = json_machine(check_value)
        for c in value:
            state = state(c)
        self.assertEqual(state.__name__, '_value')

        value = b'{"values_array": [\n\t\t"string",\n\t\t22,\n\t\t22.50,\n\t\ttrue,\n\t\tfalse,\n\t\tnull]}'
        state = json_machine(check_value)
        for c in value:
            state = state(c)
        self.assertEqual(state.__name__, '_value')

        value = b'{"objects_array": [{"value": 22}, {"value": "string"}]}'
        compare_value = {'objects_array': [{'value': 22}, {'value': 'string'}]}
        state = json_machine(check_value)
        for c in value:
            state = state(c)
        self.assertEqual(state.__name__, '_value')
예제 #2
0
    def test18constant_machine_started_from_json_machine(self):
        """Test if the constant_machine is started from the json_machine. Due to changes in the json_machine all values must be objects."""
        def check_value(data):  # skipcq: PY-D0003
            self.assertEqual(data, {'var': value})

        OBJECT_PREFIX = [0x7b, 0x22, 0x76, 0x61, 0x72, 0x22, 0x3a,
                         0x20]  # {"var":
        TRUE = [0x74, 0x72, 0x75, 0x65]
        FALSE = [0x66, 0x61, 0x6c, 0x73, 0x65]
        NULL = [0x6e, 0x75, 0x6c, 0x6c]
        value = True
        state = json_machine(check_value)
        for t in OBJECT_PREFIX + TRUE:
            state = state(t)
        self.assertEqual(state(ord('}')).__name__, '_value')

        value = False
        state = json_machine(check_value)
        for f in OBJECT_PREFIX + FALSE:
            state = state(f)
        self.assertEqual(state(ord('}')).__name__, '_value')

        value = None
        state = json_machine(check_value)
        for n in OBJECT_PREFIX + NULL:
            state = state(n)
        self.assertEqual(state(ord('}')).__name__, '_value')
    def test28object_machine_started_from_json_machine(self):
        """Test if the object_machine is started from the json_machine."""
        def check_value(data):  # skipcq: PY-D0003
            self.assertEqual(data, compare_value)

        # single line, no spaces
        value = b'{"string":"Hello World","integer":22,"float":22.23,"bool":true,"array":["Hello","World"]}'
        compare_value = {
            'string': 'Hello World',
            'integer': 22,
            'float': 22.23,
            'bool': True,
            'array': ['Hello', 'World']
        }
        state = json_machine(check_value)
        for c in value:
            state = state(c)
        self.assertEqual(state.__name__, '_value')
        # single line with spaces
        value = b'{"string": "Hello World", "integer": 22, "float": 22.23, "bool": true, "array": ["Hello", "World"]}'
        state = json_machine(check_value)
        for c in value:
            state = state(c)
        self.assertEqual(state.__name__, '_value')
        # multiline with tabs
        value = b'{\n\t"string": "Hello World",\n\t"integer": 22,\n\t"float": 22.23,\n\t"bool": true,\n\t"array": [' \
                b'\n\t\t"Hello",\n\t\t"World"]}'
        state = json_machine(check_value)
        for c in value:
            state = state(c)
        self.assertEqual(state.__name__, '_value')
예제 #4
0
    def test29json_machine_only_allow_objects_at_start(self):
        """The json_machine must only allow objects at the start."""
        def raise_error(_):  # skipcq: PY-D0003
            raise Exception("Invalid returned as valid.")

        forbidden_values = [0x22, 0x2b, 0x2d, 0x31, 0x5b, 0x74, 0x66, 0x6e]
        for value in forbidden_values:
            state = json_machine(raise_error)
            self.assertIsNone(state(value))
        state = json_machine(raise_error)
        self.assertIsNotNone(state(ord('{')))
예제 #5
0
    def test28object_machine_started_from_json_machine(self):
        """Test if the object_machine is started from the json_machine."""
        def check_value(data):  # skipcq: PY-D0003
            self.assertEqual(data, compare_value)

        # single line, no spaces
        value = b'{"string":"Hello World","integer":22,"float":22.23,"bool":true,"array":["Hello","World"]}'
        compare_value = {
            'string': 'Hello World',
            'integer': 22,
            'float': 22.23,
            'bool': True,
            'array': ['Hello', 'World']
        }
        state = json_machine(check_value)
        for c in value:
            state = state(c)
        self.assertEqual(state.__name__, '_value')
        # single line with spaces
        value = b'{"string": "Hello World", "integer": 22, "float": 22.23, "bool": true, "array": ["Hello", "World"]}'
        state = json_machine(check_value)
        for c in value:
            state = state(c)
        self.assertEqual(state.__name__, '_value')
        # multiline with tabs
        value = b'{\n\t"string": "Hello World",\n\t"integer": 22,\n\t"float": 22.23,\n\t"bool": true,\n\t"array": [' \
                b'\n\t\t"Hello",\n\t\t"World"]}'
        state = json_machine(check_value)
        for c in value:
            state = state(c)
        self.assertEqual(state.__name__, '_value')

        value = b"""{"HistogramData": {"Bins": {"...-0]": 0, "[0-1]": 0, "[1-2]": 0, "[2-3]": 0, "[3-...]": 0\n}, "BinNames": ["...-0]",
"[0-1]", "[1-2]", "[2-3]", "[3-...]"]}}"""
        compare_value = {
            "HistogramData": {
                "Bins": {
                    '...-0]': 0,
                    '[0-1]': 0,
                    '[1-2]': 0,
                    '[2-3]': 0,
                    '[3-...]': 0
                },
                "BinNames": ['...-0]', '[0-1]', '[1-2]', '[2-3]', '[3-...]']
            }
        }
        state = json_machine(check_value)
        for c in value:
            state = state(c)
        self.assertEqual(state.__name__, '_value')
예제 #6
0
 def check_number_machine_from_json_machine(self, check_int_value, value,
                                            end_sign):  # skipcq: PY-D0003
     state = json_machine(check_int_value)
     for c in value:
         state = state(c)
     self.assertEqual(state(end_sign).__name__, '_value')
    def consume_data(self, stream_data, end_of_stream_flag=False):
        """
        Consume data from the underlying stream for atomizing.
        @return the number of consumed bytes, 0 if the atomizer would need more data for a complete atom or -1 when no data was
        consumed at the moment but data might be consumed later on.
        """
        # Loop until as much streamData as possible was processed and then return a result. The correct processing of endOfStreamFlag
        # is tricky: by default, even when all data was processed, do one more iteration to handle also the flag.
        consumed_length = 0
        while True:
            if self.last_unconsumed_log_atom is not None:
                # Keep length before dispatching: dispatch will reset the field.
                data_length = len(self.last_unconsumed_log_atom.raw_data)
                if self.dispatch_atom(self.last_unconsumed_log_atom):
                    consumed_length += data_length + len(self.eol_sep)
                    continue
                # Nothing consumed, tell upstream to wait if appropriate.
                if consumed_length == 0:
                    consumed_length = -1
                break

            line_end = None
            global breakout  # skipcq: PYL-W0603
            breakout = False
            global data  # skipcq: PYL-W0603
            data = None
            valid_json = False
            if self.json_format:
                state = json_machine(found_json)
                i = 0
                for i, char in enumerate(stream_data[consumed_length:]):
                    state = state(char)
                    if breakout or state is None or i > self.max_line_length:
                        break
                # check if the json is still valid, but the stream_data is at the end
                if not breakout and state is not None and i + consumed_length == len(
                        stream_data) - 1 and not end_of_stream_flag:
                    return consumed_length
                if 0 < i <= self.max_line_length and b'{' in stream_data[
                        consumed_length:consumed_length + i +
                        1] and data is not None:
                    line_end = consumed_length + i + 1
                    valid_json = True
                elif i > self.max_line_length:
                    self.in_overlong_line_flag = True
            if line_end is None:
                line_end = stream_data.find(self.eol_sep, consumed_length)

            if self.in_overlong_line_flag:
                if line_end < 0:
                    consumed_length = len(stream_data)
                    if end_of_stream_flag:
                        self.dispatch_event(
                            'Overlong line terminated by end of stream',
                            stream_data)
                        self.in_overlong_line_flag = False
                    break
                consumed_length = line_end + len(self.eol_sep)
                self.in_overlong_line_flag = False
                continue

            # This is the valid start of a normal/incomplete/overlong line.
            if line_end < 0:
                tail_length = len(stream_data) - consumed_length
                if tail_length > self.max_line_length:
                    self.dispatch_event('Start of overlong line detected',
                                        stream_data[consumed_length:])
                    self.in_overlong_line_flag = True
                    consumed_length = len(stream_data)
                    # Stay in loop to handle also endOfStreamFlag!
                    continue
                if end_of_stream_flag and (tail_length != 0):
                    self.dispatch_event('Incomplete last line',
                                        stream_data[consumed_length:])
                    consumed_length = len(stream_data)
                break

            # This is at least a complete/overlong line.
            line_length = line_end + len(self.eol_sep) - consumed_length
            if line_length > self.max_line_length and not valid_json:
                self.dispatch_event('Overlong line detected',
                                    stream_data[consumed_length:line_end])
                consumed_length = line_end + len(self.eol_sep)
                continue

            # This is a normal line.
            line_data = stream_data[consumed_length:line_end]
            log_atom = LogAtom(line_data, None, None, self)
            if self.parsing_model is not None:
                match_context = MatchContext(line_data)
                match_element = self.parsing_model.get_match_element(
                    '', match_context)
                if (match_element
                        is not None) and not match_context.match_data:
                    log_atom.parser_match = ParserMatch(match_element)
                    for default_timestamp_path in self.default_timestamp_paths:
                        ts_match = log_atom.parser_match.get_match_dictionary(
                        ).get(default_timestamp_path, None)
                        if ts_match is not None:
                            log_atom.set_timestamp(ts_match.match_object)
                            break
            if self.dispatch_atom(log_atom):
                consumed_length = line_end + len(self.eol_sep) - (
                    valid_json
                    and stream_data[line_end:line_end + len(self.eol_sep)] !=
                    self.eol_sep)
                continue
            if consumed_length == 0:
                # Downstream did not want the data, so tell upstream to block for a while.
                consumed_length = -1
            break
        return consumed_length