def test25array_machine_started_from_json_machine(self): """Test if the array_machine is started from the json_machine.""" def check_value(data): # skipcq: PY-D0003 self.assertEqual(data, compare_value) value = b'{"values_array": ["string", 22, 22.50, true, false, null]}' compare_value = { 'values_array': ['string', 22, 22.5, True, False, None] } state = json_machine(check_value) for c in value: state = state(c) self.assertEqual(state.__name__, '_value') value = b'{"values_array": [\n\t\t"string",\n\t\t22,\n\t\t22.50,\n\t\ttrue,\n\t\tfalse,\n\t\tnull]}' state = json_machine(check_value) for c in value: state = state(c) self.assertEqual(state.__name__, '_value') value = b'{"objects_array": [{"value": 22}, {"value": "string"}]}' compare_value = {'objects_array': [{'value': 22}, {'value': 'string'}]} state = json_machine(check_value) for c in value: state = state(c) self.assertEqual(state.__name__, '_value')
def test18constant_machine_started_from_json_machine(self): """Test if the constant_machine is started from the json_machine. Due to changes in the json_machine all values must be objects.""" def check_value(data): # skipcq: PY-D0003 self.assertEqual(data, {'var': value}) OBJECT_PREFIX = [0x7b, 0x22, 0x76, 0x61, 0x72, 0x22, 0x3a, 0x20] # {"var": TRUE = [0x74, 0x72, 0x75, 0x65] FALSE = [0x66, 0x61, 0x6c, 0x73, 0x65] NULL = [0x6e, 0x75, 0x6c, 0x6c] value = True state = json_machine(check_value) for t in OBJECT_PREFIX + TRUE: state = state(t) self.assertEqual(state(ord('}')).__name__, '_value') value = False state = json_machine(check_value) for f in OBJECT_PREFIX + FALSE: state = state(f) self.assertEqual(state(ord('}')).__name__, '_value') value = None state = json_machine(check_value) for n in OBJECT_PREFIX + NULL: state = state(n) self.assertEqual(state(ord('}')).__name__, '_value')
def test28object_machine_started_from_json_machine(self): """Test if the object_machine is started from the json_machine.""" def check_value(data): # skipcq: PY-D0003 self.assertEqual(data, compare_value) # single line, no spaces value = b'{"string":"Hello World","integer":22,"float":22.23,"bool":true,"array":["Hello","World"]}' compare_value = { 'string': 'Hello World', 'integer': 22, 'float': 22.23, 'bool': True, 'array': ['Hello', 'World'] } state = json_machine(check_value) for c in value: state = state(c) self.assertEqual(state.__name__, '_value') # single line with spaces value = b'{"string": "Hello World", "integer": 22, "float": 22.23, "bool": true, "array": ["Hello", "World"]}' state = json_machine(check_value) for c in value: state = state(c) self.assertEqual(state.__name__, '_value') # multiline with tabs value = b'{\n\t"string": "Hello World",\n\t"integer": 22,\n\t"float": 22.23,\n\t"bool": true,\n\t"array": [' \ b'\n\t\t"Hello",\n\t\t"World"]}' state = json_machine(check_value) for c in value: state = state(c) self.assertEqual(state.__name__, '_value')
def test29json_machine_only_allow_objects_at_start(self): """The json_machine must only allow objects at the start.""" def raise_error(_): # skipcq: PY-D0003 raise Exception("Invalid returned as valid.") forbidden_values = [0x22, 0x2b, 0x2d, 0x31, 0x5b, 0x74, 0x66, 0x6e] for value in forbidden_values: state = json_machine(raise_error) self.assertIsNone(state(value)) state = json_machine(raise_error) self.assertIsNotNone(state(ord('{')))
def test28object_machine_started_from_json_machine(self): """Test if the object_machine is started from the json_machine.""" def check_value(data): # skipcq: PY-D0003 self.assertEqual(data, compare_value) # single line, no spaces value = b'{"string":"Hello World","integer":22,"float":22.23,"bool":true,"array":["Hello","World"]}' compare_value = { 'string': 'Hello World', 'integer': 22, 'float': 22.23, 'bool': True, 'array': ['Hello', 'World'] } state = json_machine(check_value) for c in value: state = state(c) self.assertEqual(state.__name__, '_value') # single line with spaces value = b'{"string": "Hello World", "integer": 22, "float": 22.23, "bool": true, "array": ["Hello", "World"]}' state = json_machine(check_value) for c in value: state = state(c) self.assertEqual(state.__name__, '_value') # multiline with tabs value = b'{\n\t"string": "Hello World",\n\t"integer": 22,\n\t"float": 22.23,\n\t"bool": true,\n\t"array": [' \ b'\n\t\t"Hello",\n\t\t"World"]}' state = json_machine(check_value) for c in value: state = state(c) self.assertEqual(state.__name__, '_value') value = b"""{"HistogramData": {"Bins": {"...-0]": 0, "[0-1]": 0, "[1-2]": 0, "[2-3]": 0, "[3-...]": 0\n}, "BinNames": ["...-0]", "[0-1]", "[1-2]", "[2-3]", "[3-...]"]}}""" compare_value = { "HistogramData": { "Bins": { '...-0]': 0, '[0-1]': 0, '[1-2]': 0, '[2-3]': 0, '[3-...]': 0 }, "BinNames": ['...-0]', '[0-1]', '[1-2]', '[2-3]', '[3-...]'] } } state = json_machine(check_value) for c in value: state = state(c) self.assertEqual(state.__name__, '_value')
def check_number_machine_from_json_machine(self, check_int_value, value, end_sign): # skipcq: PY-D0003 state = json_machine(check_int_value) for c in value: state = state(c) self.assertEqual(state(end_sign).__name__, '_value')
def consume_data(self, stream_data, end_of_stream_flag=False): """ Consume data from the underlying stream for atomizing. @return the number of consumed bytes, 0 if the atomizer would need more data for a complete atom or -1 when no data was consumed at the moment but data might be consumed later on. """ # Loop until as much streamData as possible was processed and then return a result. The correct processing of endOfStreamFlag # is tricky: by default, even when all data was processed, do one more iteration to handle also the flag. consumed_length = 0 while True: if self.last_unconsumed_log_atom is not None: # Keep length before dispatching: dispatch will reset the field. data_length = len(self.last_unconsumed_log_atom.raw_data) if self.dispatch_atom(self.last_unconsumed_log_atom): consumed_length += data_length + len(self.eol_sep) continue # Nothing consumed, tell upstream to wait if appropriate. if consumed_length == 0: consumed_length = -1 break line_end = None global breakout # skipcq: PYL-W0603 breakout = False global data # skipcq: PYL-W0603 data = None valid_json = False if self.json_format: state = json_machine(found_json) i = 0 for i, char in enumerate(stream_data[consumed_length:]): state = state(char) if breakout or state is None or i > self.max_line_length: break # check if the json is still valid, but the stream_data is at the end if not breakout and state is not None and i + consumed_length == len( stream_data) - 1 and not end_of_stream_flag: return consumed_length if 0 < i <= self.max_line_length and b'{' in stream_data[ consumed_length:consumed_length + i + 1] and data is not None: line_end = consumed_length + i + 1 valid_json = True elif i > self.max_line_length: self.in_overlong_line_flag = True if line_end is None: line_end = stream_data.find(self.eol_sep, consumed_length) if self.in_overlong_line_flag: if line_end < 0: consumed_length = len(stream_data) if end_of_stream_flag: self.dispatch_event( 'Overlong line terminated by end of stream', stream_data) self.in_overlong_line_flag = False break consumed_length = line_end + len(self.eol_sep) self.in_overlong_line_flag = False continue # This is the valid start of a normal/incomplete/overlong line. if line_end < 0: tail_length = len(stream_data) - consumed_length if tail_length > self.max_line_length: self.dispatch_event('Start of overlong line detected', stream_data[consumed_length:]) self.in_overlong_line_flag = True consumed_length = len(stream_data) # Stay in loop to handle also endOfStreamFlag! continue if end_of_stream_flag and (tail_length != 0): self.dispatch_event('Incomplete last line', stream_data[consumed_length:]) consumed_length = len(stream_data) break # This is at least a complete/overlong line. line_length = line_end + len(self.eol_sep) - consumed_length if line_length > self.max_line_length and not valid_json: self.dispatch_event('Overlong line detected', stream_data[consumed_length:line_end]) consumed_length = line_end + len(self.eol_sep) continue # This is a normal line. line_data = stream_data[consumed_length:line_end] log_atom = LogAtom(line_data, None, None, self) if self.parsing_model is not None: match_context = MatchContext(line_data) match_element = self.parsing_model.get_match_element( '', match_context) if (match_element is not None) and not match_context.match_data: log_atom.parser_match = ParserMatch(match_element) for default_timestamp_path in self.default_timestamp_paths: ts_match = log_atom.parser_match.get_match_dictionary( ).get(default_timestamp_path, None) if ts_match is not None: log_atom.set_timestamp(ts_match.match_object) break if self.dispatch_atom(log_atom): consumed_length = line_end + len(self.eol_sep) - ( valid_json and stream_data[line_end:line_end + len(self.eol_sep)] != self.eol_sep) continue if consumed_length == 0: # Downstream did not want the data, so tell upstream to block for a while. consumed_length = -1 break return consumed_length