def consume_data(self, stream_data, end_of_stream_flag=False): """ Consume data from the underlying stream for atomizing. @return the number of consumed bytes, 0 if the atomizer would need more data for a complete atom or -1 when no data was consumed at the moment but data might be consumed later on. """ # Loop until as much streamData as possible was processed and then return a result. The correct processing of endOfStreamFlag # is tricky: by default, even when all data was processed, do one more iteration to handle also the flag. consumed_length = 0 while True: if self.last_unconsumed_log_atom is not None: # Keep length before dispatching: dispatch will reset the field. data_length = len(self.last_unconsumed_log_atom.raw_data) if self.dispatch_atom(self.last_unconsumed_log_atom): consumed_length += data_length + 1 continue # Nothing consumed, tell upstream to wait if appropriate. if consumed_length == 0: consumed_length = -1 break line_end = stream_data.find(b'\n', consumed_length) if self.in_overlong_line_flag: if line_end < 0: consumed_length = len(stream_data) if end_of_stream_flag: self.dispatch_event('Overlong line terminated by end of stream', stream_data) self.in_overlong_line_flag = False break consumed_length = line_end + 1 self.in_overlong_line_flag = False continue # This is the valid start of a normal/incomplete/overlong line. if line_end < 0: tail_length = len(stream_data) - consumed_length if tail_length > self.max_line_length: self.dispatch_event('Start of overlong line detected', stream_data[consumed_length:]) self.in_overlong_line_flag = True consumed_length = len(stream_data) # Stay in loop to handle also endOfStreamFlag! continue if end_of_stream_flag and (tail_length != 0): self.dispatch_event('Incomplete last line', stream_data[consumed_length:]) consumed_length = len(stream_data) break # This is at least a complete/overlong line. line_length = line_end + 1 - consumed_length if line_length > self.max_line_length: self.dispatch_event('Overlong line detected', stream_data[consumed_length:line_end]) consumed_length = line_end + 1 continue # This is a normal line. line_data = stream_data[consumed_length:line_end] log_atom = LogAtom(line_data, None, None, self) if self.parsing_model is not None: match_context = MatchContext(line_data) match_element = self.parsing_model.get_match_element('', match_context) if (match_element is not None) and not match_context.match_data: log_atom.parser_match = ParserMatch(match_element) for default_timestamp_path in self.default_timestamp_paths: ts_match = log_atom.parser_match.get_match_dictionary().get(default_timestamp_path, None) if ts_match is not None: log_atom.set_timestamp(ts_match.match_object) break if self.dispatch_atom(log_atom): consumed_length = line_end + 1 continue if consumed_length == 0: # Downstream did not want the data, so tell upstream to block for a while. consumed_length = -1 break return consumed_length
def consume_data(self, stream_data, end_of_stream_flag=False): """ Consume data from the underlying stream for atomizing. @return the number of consumed bytes, 0 if the atomizer would need more data for a complete atom or -1 when no data was consumed at the moment but data might be consumed later on. """ # Loop until as much streamData as possible was processed and then return a result. The correct processing of endOfStreamFlag # is tricky: by default, even when all data was processed, do one more iteration to handle also the flag. consumed_length = 0 while True: if self.last_unconsumed_log_atom is not None: # Keep length before dispatching: dispatch will reset the field. data_length = len(self.last_unconsumed_log_atom.raw_data) if self.dispatch_atom(self.last_unconsumed_log_atom): consumed_length += data_length + len(self.eol_sep) continue # Nothing consumed, tell upstream to wait if appropriate. if consumed_length == 0: consumed_length = -1 break line_end = None global breakout # skipcq: PYL-W0603 breakout = False global data # skipcq: PYL-W0603 data = None valid_json = False if self.json_format: state = json_machine(found_json) i = 0 for i, char in enumerate(stream_data[consumed_length:]): state = state(char) if breakout or state is None or i > self.max_line_length: break # check if the json is still valid, but the stream_data is at the end if not breakout and state is not None and i + consumed_length == len( stream_data) - 1 and not end_of_stream_flag: return consumed_length if 0 < i <= self.max_line_length and b'{' in stream_data[ consumed_length:consumed_length + i + 1] and data is not None: line_end = consumed_length + i + 1 valid_json = True elif i > self.max_line_length: self.in_overlong_line_flag = True if line_end is None: line_end = stream_data.find(self.eol_sep, consumed_length) if self.in_overlong_line_flag: if line_end < 0: consumed_length = len(stream_data) if end_of_stream_flag: self.dispatch_event( 'Overlong line terminated by end of stream', stream_data) self.in_overlong_line_flag = False break consumed_length = line_end + len(self.eol_sep) self.in_overlong_line_flag = False continue # This is the valid start of a normal/incomplete/overlong line. if line_end < 0: tail_length = len(stream_data) - consumed_length if tail_length > self.max_line_length: self.dispatch_event('Start of overlong line detected', stream_data[consumed_length:]) self.in_overlong_line_flag = True consumed_length = len(stream_data) # Stay in loop to handle also endOfStreamFlag! continue if end_of_stream_flag and (tail_length != 0): self.dispatch_event('Incomplete last line', stream_data[consumed_length:]) consumed_length = len(stream_data) break # This is at least a complete/overlong line. line_length = line_end + len(self.eol_sep) - consumed_length if line_length > self.max_line_length and not valid_json: self.dispatch_event('Overlong line detected', stream_data[consumed_length:line_end]) consumed_length = line_end + len(self.eol_sep) continue # This is a normal line. line_data = stream_data[consumed_length:line_end] log_atom = LogAtom(line_data, None, None, self) if self.parsing_model is not None: match_context = MatchContext(line_data) match_element = self.parsing_model.get_match_element( '', match_context) if (match_element is not None) and not match_context.match_data: log_atom.parser_match = ParserMatch(match_element) for default_timestamp_path in self.default_timestamp_paths: ts_match = log_atom.parser_match.get_match_dictionary( ).get(default_timestamp_path, None) if ts_match is not None: log_atom.set_timestamp(ts_match.match_object) break if self.dispatch_atom(log_atom): consumed_length = line_end + len(self.eol_sep) - ( valid_json and stream_data[line_end:line_end + len(self.eol_sep)] != self.eol_sep) continue if consumed_length == 0: # Downstream did not want the data, so tell upstream to block for a while. consumed_length = -1 break return consumed_length