def test3timestamp_higher_than_last_timestamp(self):
        """This test case checks if nothing happens, when the timestamp is, as expected, higher than the last one."""
        description = "Test3TimestampsUnsortedDetector"
        match_context_fixed_dme = MatchContext(self.pid)
        fixed_dme = FixedDataModelElement('s1', self.pid)
        match_element_fixed_dme = fixed_dme.get_match_element(
            "match", match_context_fixed_dme)
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)

        t = time()
        log_atom = LogAtom(fixed_dme.fixed_data,
                           ParserMatch(match_element_fixed_dme), t,
                           new_match_path_detector)
        timestamp_unsorted_detector = TimestampsUnsortedDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            False,
            output_log_line=False)
        self.analysis_context.register_component(timestamp_unsorted_detector,
                                                 description + "2")
        self.assertTrue(timestamp_unsorted_detector.receive_atom(log_atom))
        self.assertEqual(self.output_stream.getvalue(), '')

        log_atom.set_timestamp(t)
        self.assertTrue(timestamp_unsorted_detector.receive_atom(log_atom))
        self.assertEqual(self.output_stream.getvalue(), '')

        log_atom.set_timestamp(t + 10000)
        self.assertTrue(timestamp_unsorted_detector.receive_atom(log_atom))
        self.assertEqual(self.output_stream.getvalue(), '')
    def test2timestamp_lower_than_last_timestamp_exit_on_error(self):
        """This test case checks if the program exits, when the timestamp is lower than the last one and the exitOnError flag is set."""
        description = "Test2TimestampsUnsortedDetector"
        match_context_fixed_dme = MatchContext(self.pid)
        fixed_dme = FixedDataModelElement('s1', self.pid)
        match_element_fixed_dme = fixed_dme.get_match_element(
            "match", match_context_fixed_dme)
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)

        t = time()
        log_atom = LogAtom(fixed_dme.fixed_data,
                           ParserMatch(match_element_fixed_dme), t,
                           new_match_path_detector)
        timestamp_unsorted_detector = TimestampsUnsortedDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            True,
            output_log_line=False)
        self.analysis_context.register_component(timestamp_unsorted_detector,
                                                 description + "2")
        self.assertTrue(timestamp_unsorted_detector.receive_atom(log_atom))
        self.assertEqual(self.output_stream.getvalue(), '')

        log_atom.set_timestamp(t - 10000)
        with self.assertRaises(SystemExit) as cm:
            timestamp_unsorted_detector.receive_atom(log_atom)
        self.assertEqual(cm.exception.code, 1)
        self.assertEqual(
            self.output_stream.getvalue(), self.__expected_string %
            (datetime.fromtimestamp(t - 10000).strftime(
                self.datetime_format_string),
             datetime.fromtimestamp(t - 10000).strftime(
                 self.datetime_format_string),
             datetime.fromtimestamp(t).strftime(self.datetime_format_string),
             timestamp_unsorted_detector.__class__.__name__, description + "2",
             1, "b' pid='"))
コード例 #3
0
    def consume_data(self, stream_data, end_of_stream_flag=False):
        """
        Consume data from the underlying stream for atomizing.
        @return the number of consumed bytes, 0 if the atomizer would need more data for a complete atom or -1 when no data was
        consumed at the moment but data might be consumed later on.
        """
        # Loop until as much streamData as possible was processed and then return a result. The correct processing of endOfStreamFlag
        # is tricky: by default, even when all data was processed, do one more iteration to handle also the flag.
        consumed_length = 0
        while True:
            if self.last_unconsumed_log_atom is not None:
                # Keep length before dispatching: dispatch will reset the field.
                data_length = len(self.last_unconsumed_log_atom.raw_data)
                if self.dispatch_atom(self.last_unconsumed_log_atom):
                    consumed_length += data_length + 1
                    continue
                # Nothing consumed, tell upstream to wait if appropriate.
                if consumed_length == 0:
                    consumed_length = -1
                break

            line_end = stream_data.find(b'\n', consumed_length)
            if self.in_overlong_line_flag:
                if line_end < 0:
                    consumed_length = len(stream_data)
                    if end_of_stream_flag:
                        self.dispatch_event('Overlong line terminated by end of stream', stream_data)
                        self.in_overlong_line_flag = False
                    break
                consumed_length = line_end + 1
                self.in_overlong_line_flag = False
                continue

            # This is the valid start of a normal/incomplete/overlong line.
            if line_end < 0:
                tail_length = len(stream_data) - consumed_length
                if tail_length > self.max_line_length:
                    self.dispatch_event('Start of overlong line detected', stream_data[consumed_length:])
                    self.in_overlong_line_flag = True
                    consumed_length = len(stream_data)
                    # Stay in loop to handle also endOfStreamFlag!
                    continue
                if end_of_stream_flag and (tail_length != 0):
                    self.dispatch_event('Incomplete last line', stream_data[consumed_length:])
                    consumed_length = len(stream_data)
                break

            # This is at least a complete/overlong line.
            line_length = line_end + 1 - consumed_length
            if line_length > self.max_line_length:
                self.dispatch_event('Overlong line detected', stream_data[consumed_length:line_end])
                consumed_length = line_end + 1
                continue

            # This is a normal line.
            line_data = stream_data[consumed_length:line_end]
            log_atom = LogAtom(line_data, None, None, self)
            if self.parsing_model is not None:
                match_context = MatchContext(line_data)
                match_element = self.parsing_model.get_match_element('', match_context)
                if (match_element is not None) and not match_context.match_data:
                    log_atom.parser_match = ParserMatch(match_element)
                    for default_timestamp_path in self.default_timestamp_paths:
                        ts_match = log_atom.parser_match.get_match_dictionary().get(default_timestamp_path, None)
                        if ts_match is not None:
                            log_atom.set_timestamp(ts_match.match_object)
                            break
            if self.dispatch_atom(log_atom):
                consumed_length = line_end + 1
                continue
            if consumed_length == 0:
                # Downstream did not want the data, so tell upstream to block for a while.
                consumed_length = -1
            break
        return consumed_length
コード例 #4
0
    def consume_data(self, stream_data, end_of_stream_flag=False):
        """
        Consume data from the underlying stream for atomizing.
        @return the number of consumed bytes, 0 if the atomizer would need more data for a complete atom or -1 when no data was
        consumed at the moment but data might be consumed later on.
        """
        # Loop until as much streamData as possible was processed and then return a result. The correct processing of endOfStreamFlag
        # is tricky: by default, even when all data was processed, do one more iteration to handle also the flag.
        consumed_length = 0
        while True:
            if self.last_unconsumed_log_atom is not None:
                # Keep length before dispatching: dispatch will reset the field.
                data_length = len(self.last_unconsumed_log_atom.raw_data)
                if self.dispatch_atom(self.last_unconsumed_log_atom):
                    consumed_length += data_length + len(self.eol_sep)
                    continue
                # Nothing consumed, tell upstream to wait if appropriate.
                if consumed_length == 0:
                    consumed_length = -1
                break

            line_end = None
            global breakout  # skipcq: PYL-W0603
            breakout = False
            global data  # skipcq: PYL-W0603
            data = None
            valid_json = False
            if self.json_format:
                state = json_machine(found_json)
                i = 0
                for i, char in enumerate(stream_data[consumed_length:]):
                    state = state(char)
                    if breakout or state is None or i > self.max_line_length:
                        break
                # check if the json is still valid, but the stream_data is at the end
                if not breakout and state is not None and i + consumed_length == len(
                        stream_data) - 1 and not end_of_stream_flag:
                    return consumed_length
                if 0 < i <= self.max_line_length and b'{' in stream_data[
                        consumed_length:consumed_length + i +
                        1] and data is not None:
                    line_end = consumed_length + i + 1
                    valid_json = True
                elif i > self.max_line_length:
                    self.in_overlong_line_flag = True
            if line_end is None:
                line_end = stream_data.find(self.eol_sep, consumed_length)

            if self.in_overlong_line_flag:
                if line_end < 0:
                    consumed_length = len(stream_data)
                    if end_of_stream_flag:
                        self.dispatch_event(
                            'Overlong line terminated by end of stream',
                            stream_data)
                        self.in_overlong_line_flag = False
                    break
                consumed_length = line_end + len(self.eol_sep)
                self.in_overlong_line_flag = False
                continue

            # This is the valid start of a normal/incomplete/overlong line.
            if line_end < 0:
                tail_length = len(stream_data) - consumed_length
                if tail_length > self.max_line_length:
                    self.dispatch_event('Start of overlong line detected',
                                        stream_data[consumed_length:])
                    self.in_overlong_line_flag = True
                    consumed_length = len(stream_data)
                    # Stay in loop to handle also endOfStreamFlag!
                    continue
                if end_of_stream_flag and (tail_length != 0):
                    self.dispatch_event('Incomplete last line',
                                        stream_data[consumed_length:])
                    consumed_length = len(stream_data)
                break

            # This is at least a complete/overlong line.
            line_length = line_end + len(self.eol_sep) - consumed_length
            if line_length > self.max_line_length and not valid_json:
                self.dispatch_event('Overlong line detected',
                                    stream_data[consumed_length:line_end])
                consumed_length = line_end + len(self.eol_sep)
                continue

            # This is a normal line.
            line_data = stream_data[consumed_length:line_end]
            log_atom = LogAtom(line_data, None, None, self)
            if self.parsing_model is not None:
                match_context = MatchContext(line_data)
                match_element = self.parsing_model.get_match_element(
                    '', match_context)
                if (match_element
                        is not None) and not match_context.match_data:
                    log_atom.parser_match = ParserMatch(match_element)
                    for default_timestamp_path in self.default_timestamp_paths:
                        ts_match = log_atom.parser_match.get_match_dictionary(
                        ).get(default_timestamp_path, None)
                        if ts_match is not None:
                            log_atom.set_timestamp(ts_match.match_object)
                            break
            if self.dispatch_atom(log_atom):
                consumed_length = line_end + len(self.eol_sep) - (
                    valid_json
                    and stream_data[line_end:line_end + len(self.eol_sep)] !=
                    self.eol_sep)
                continue
            if consumed_length == 0:
                # Downstream did not want the data, so tell upstream to block for a while.
                consumed_length = -1
            break
        return consumed_length