Python multiline_merge Examples, beaver.utils.multiline_merge Python Examples

Example #1

0

Show file

File: tail.py Project: DiegoSanjuan/beaver

    def _seek_to_end(self):
        self._log_debug('seek_to_end')

        if self._sincedb_path:
            sincedb_start_position = self._sincedb_start_position()
            if sincedb_start_position:
                self._start_position = sincedb_start_position

        if self._start_position == 'beginning':
            self._log_debug('no start_position specified')
            return

        line_count = 0

        if str(self._start_position).isdigit():
            self._log_debug('going to start position {0}'.format(self._start_position))
            self._start_position = int(self._start_position)
            for encoding in ENCODINGS:
                line_count, encoded = self._seek_to_position(encoding=encoding, position=True)
                if line_count is None and encoded is None:
                    return

                if encoded:
                    break

        if self._start_position == 'beginning':
            self._log_debug('Bad start position specified')
            return

        if self._start_position == 'end':
            self._log_debug('getting end position')
            for encoding in ENCODINGS:
                line_count, encoded = self._seek_to_position(encoding=encoding)
                if line_count is None and encoded is None:
                    return

                if encoded:
                    break

        current_position = self._file.tell()
        self._log_debug('line count {0}'.format(line_count))
        self._log_debug('current position {0}'.format(current_position))
        self._sincedb_update_position(lines=line_count, force_update=True)

        if self._tail_lines:
            self._log_debug('tailing {0} lines'.format(self._tail_lines))
            lines = self.tail(self._filename, encoding=self._encoding, window=self._tail_lines, position=current_position)
            if lines:
                if self._multiline_regex_after or self._multiline_regex_before:
                    # Multiline is enabled for this file.
                    events = multiline_merge(
                            lines,
                            self._current_event,
                            self._multiline_regex_after,
                            self._multiline_regex_before)
                else:
                    events = lines
                self._callback_wrapper(events)

        return

Example #2

0

Show file

    def _run_pass(self):
        """Read lines from a file and performs a callback against them"""
        events = []
        buffered_lines = 0
        buffered_bytes = 0
        run_start = time.time()

        while self.active:
            try:
                data = self._file.read(self._file_read_blocksize)
            except IOError, e:
                if e.errno == errno.ESTALE:
                    self.active = False
                    # break so we can still try to flush the existing buffers.
                    break
            lines = self._buffer_extract(data)

            if not lines:
                if time.time() - run_start < self._buffered_lines_max_seconds:
                    # If we are here, then theres no new lines but we have a _buffered_lines_max_seconds value to
                    # respect, so just keep polling for new lines, with a bit of a sleep to avoid starving everything else.
                    time.sleep(0.1)
                else:
                    # Too much time without lines, flush what we've got so far.
                    break
            else:
                self._last_activity = time.time()

                # We for sure have lines here tho.
                if self._multiline_regex_after or self._multiline_regex_before:
                    # Multiline is enabled for this file.
                    events += multiline_merge(lines, self._current_event,
                                              self._multiline_regex_after,
                                              self._multiline_regex_before)
                else:
                    events += lines

                buffered_lines = len(events)
                buffered_bytes += len(data)

                if events and (buffered_bytes >= self._buffered_lines_max_bytes
                               or
                               buffered_lines >= self._buffered_lines_max_lines
                               or time.time() - run_start >=
                               self._buffered_lines_max_seconds):
                    self._callback_wrapper(events)
                    self._sincedb_update_position(lines=len(events))

                    events = []
                    buffered_lines = 0
                    buffered_bytes = 0
                    run_start = time.time()

Example #3

0

Show file

File: worker.py Project: timgriffiths/python-beaver

    def _run_pass(self, fid, file):
        """Read lines from a file and performs a callback against them"""
        while True:
            try:
                data = file.read(4096)
            except IOError, e:
                if e.errno == errno.ESTALE:
                    self.active = False
                    return False

            lines = self._buffer_extract(data=data, fid=fid)

            if not lines:
                # Before returning, check if an event (maybe partial) is waiting for too long.
                if self._file_map[fid]['current_event'] and time.time() - self._file_map[fid]['last_activity'] > 1:
                    event = '\n'.join(self._file_map[fid]['current_event'])
                    self._file_map[fid]['current_event'].clear()
                    if self._file_map[fid]['include_filter_regex']:
                        events = include_filter_line([event], self._file_map[fid]['include_filter_regex'])
                    else:
                        events = [event]

                    if events:
                        self._callback_wrapper(filename=file.name, lines=events)
                break

            self._file_map[fid]['last_activity'] = time.time()

            if self._file_map[fid]['multiline_regex_after'] or self._file_map[fid]['multiline_regex_before']:
                # Multiline is enabled for this file.
                events = multiline_merge(
                        lines,
                        self._file_map[fid]['current_event'],
                        self._file_map[fid]['multiline_regex_after'],
                        self._file_map[fid]['multiline_regex_before'])
            else:
                events = lines

            if events and self._file_map[fid]['include_filter_regex']:
                events = include_filter_line(events, self._file_map[fid]['include_filter_regex'])

            if events:
                self._callback_wrapper(filename=file.name, lines=events)

            if self._sincedb_path:
                current_line_count = len(lines)
                self._sincedb_update_position(file, fid=fid, lines=current_line_count)

Example #4

0

Show file

    def _run_pass(self, fid, file):
        """Read lines from a file and performs a callback against them"""
        line_count = 0
        while True:
            try:
                data = file.read(4096)
            except IOError, e:
                if e.errno == errno.ESTALE:
                    self.active = False
                    return False

            lines = self._buffer_extract(data=data, fid=fid)

            if not lines:
                # Before returning, check if an event (maybe partial) is waiting for too long.
                if self._file_map[fid]['current_event'] and time.time(
                ) - self._file_map[fid]['last_activity'] > 1:
                    event = '\n'.join(self._file_map[fid]['current_event'])
                    self._file_map[fid]['current_event'].clear()
                    self._callback_wrapper(filename=file.name, lines=[event])
                break

            self._file_map[fid]['last_activity'] = time.time()

            if self._file_map[fid]['multiline_regex_after'] or self._file_map[
                    fid]['multiline_regex_before']:
                # Multiline is enabled for this file.
                events = multiline_merge(
                    lines, self._file_map[fid]['current_event'],
                    self._file_map[fid]['multiline_regex_after'],
                    self._file_map[fid]['multiline_regex_before'])
            else:
                events = lines

            if events:
                self._callback_wrapper(filename=file.name, lines=events)

            if self._sincedb_path:
                current_line_count = len(lines)
                if not self._sincedb_update_position(
                        file, fid=fid, lines=current_line_count):
                    line_count += current_line_count

Example #5

0

Show file

File: tail.py Project: XooR/beaver

    def _run_pass(self):
        """Read lines from a file and performs a callback against them"""
        line_count = 0
        while True:
            try:
                data = self._file.read(4096)
            except IOError, e:
                if e.errno == errno.ESTALE:
                    self.active = False
                    return False

            lines = self._buffer_extract(data)

            if not lines:
                # Before returning, check if an event (maybe partial) is waiting for too long.
                if self._current_event and time.time() - self._last_activity > 1:
                    event = '\n'.join(self._current_event)
                    self._current_event.clear()
                    self._callback_wrapper([event])
                break

            self._last_activity = time.time()

            if self._multiline_regex_after or self._multiline_regex_before or self._multiline_regex_begin:
                # Multiline is enabled for this file.
                events = multiline_merge(
                        lines,
                        self._current_event,
                        self._multiline_regex_after,
                        self._multiline_regex_before,
                        self._multiline_regex_begin)
            else:
                events = lines

            if events:
                self._callback_wrapper(events)

            if self._sincedb_path:
                current_line_count = len(lines)
                if not self._sincedb_update_position(lines=current_line_count):
                    line_count += current_line_count

Example #6

0

Show file

    def _run_pass(self):
        """Read lines from a file and performs a callback against them"""
        while True:
            try:
                data = self._file.read(4096)
            except IOError, e:
                if e.errno == errno.ESTALE:
                    self.active = False
                    return False

            lines = self._buffer_extract(data)

            if not lines:
                # Before returning, check if an event (maybe partial) is waiting for too long.
                if self._current_event and time.time(
                ) - self._last_activity > 1:
                    event = '\n'.join(self._current_event)
                    self._current_event.clear()
                    self._callback_wrapper([event])
                break

            self._last_activity = time.time()

            if self._multiline_regex_after or self._multiline_regex_before:
                # Multiline is enabled for this file.
                events = multiline_merge(lines, self._current_event,
                                         self._multiline_regex_after,
                                         self._multiline_regex_before)
            else:
                events = lines

            if events:
                self._callback_wrapper(events)

            if self._sincedb_path:
                current_line_count = len(lines)
                self._sincedb_update_position(lines=current_line_count)

Example #7

0

Show file

    def _seek_to_end(self):
        self._log_debug('seek_to_end')

        if self._sincedb_path:
            sincedb_start_position = self._sincedb_start_position()
            if sincedb_start_position:
                self._start_position = sincedb_start_position

        if self._start_position == 'beginning':
            self._log_debug('no start_position specified')
            return

        line_count = 0

        if str(self._start_position).isdigit():
            self._log_debug('going to start position {0}'.format(
                self._start_position))
            self._start_position = int(self._start_position)
            for encoding in ENCODINGS:
                line_count, encoded = self._seek_to_position(encoding=encoding,
                                                             position=True)
                if line_count is None and encoded is None:
                    return

                if encoded:
                    break

        if self._start_position == 'beginning':
            self._log_debug('Bad start position specified')
            return

        if self._start_position == 'end':
            self._log_debug('getting end position')
            for encoding in ENCODINGS:
                line_count, encoded = self._seek_to_position(encoding=encoding)
                if line_count is None and encoded is None:
                    return

                if encoded:
                    break

        current_position = self._file.tell()
        self._log_debug('line count {0}'.format(line_count))
        self._log_debug('current position {0}'.format(current_position))
        self._sincedb_update_position(lines=line_count, force_update=True)
        # Reset this, so line added processed just after this initialization
        # will update the sincedb. Without this, if beaver run for less than
        # sincedb_write_interval it will always re-process the last lines.
        self._last_sincedb_write = 0

        if self._tail_lines:
            self._log_debug('tailing {0} lines'.format(self._tail_lines))
            lines = self.tail(self._filename,
                              encoding=self._encoding,
                              window=self._tail_lines,
                              position=current_position)
            if lines:
                if self._multiline_regex_after or self._multiline_regex_before:
                    # Multiline is enabled for this file.
                    events = multiline_merge(lines, self._current_event,
                                             self._multiline_regex_after,
                                             self._multiline_regex_before)
                else:
                    events = lines

                if self._ignoreline_regex:
                    # Ignoreline is enabled for this file.
                    self._log_debug(
                        "################ Ignore line regex is enabled")
                    tmpevents = []
                    for line in events:
                        self._log_debug("check ignore line: " + line)
                        if re_ignoreline and re_ignoreline.search(line):
                            self._log_debug("ignore this line: " + line)
                        else:
                            tmpevents.append(line)

                self._callback_wrapper(events)

        return

Example #8

0

Show file

File: worker.py Project: josephglanville/beaver

    def _seek_to_end(self):
        unwatch_list = []

        # The first time we run the script we move all file markers at EOF.
        # In case of files created afterwards we don't do this.
        for fid, data in self._file_map.iteritems():
            self._logger.debug("[{0}] - getting start position {1}".format(fid, data["file"].name))
            start_position = self._beaver_config.get_field("start_position", data["file"].name)
            is_active = data["active"]

            if self._sincedb_path:
                sincedb_start_position = self._sincedb_start_position(data["file"], fid=fid)
                if sincedb_start_position:
                    start_position = sincedb_start_position

            if start_position == "beginning":
                continue

            line_count = 0

            if str(start_position).isdigit():
                self._logger.debug(
                    "[{0}] - going to start position {1} for {2}".format(fid, start_position, data["file"].name)
                )
                start_position = int(start_position)
                for encoding in ENCODINGS:
                    try:
                        line_count = 0
                        while data["file"].readline():
                            line_count += 1
                            if line_count == start_position:
                                break
                    except UnicodeDecodeError:
                        self._logger.debug(
                            "[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format(
                                fid, data["file"].name, data["encoding"]
                            )
                        )
                        data["file"] = self.open(data["file"].name, encoding=encoding)
                        if not data["file"]:
                            unwatch_list.append(fid)
                            is_active = False
                            break

                        data["encoding"] = encoding

                    if line_count != start_position:
                        self._logger.debug(
                            "[{0}] - file at different position than {1}, assuming manual truncate for {2}".format(
                                fid, start_position, data["file"].name
                            )
                        )
                        data["file"].seek(0, os.SEEK_SET)
                        start_position == "beginning"

            if not is_active:
                continue

            if start_position == "beginning":
                continue

            if start_position == "end":
                self._logger.debug("[{0}] - getting end position for {1}".format(fid, data["file"].name))
                for encoding in ENCODINGS:
                    try:
                        line_count = 0
                        while data["file"].readline():
                            line_count += 1
                        break
                    except UnicodeDecodeError:
                        self._logger.debug(
                            "[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format(
                                fid, data["file"].name, data["encoding"]
                            )
                        )
                        data["file"] = self.open(data["file"].name, encoding=encoding)
                        if not data["file"]:
                            unwatch_list.append(fid)
                            is_active = False
                            break

                        data["encoding"] = encoding

            if not is_active:
                continue

            current_position = data["file"].tell()
            self._logger.debug("[{0}] - line count {1} for {2}".format(fid, line_count, data["file"].name))
            self._sincedb_update_position(data["file"], fid=fid, lines=line_count, force_update=True)

            tail_lines = self._beaver_config.get_field("tail_lines", data["file"].name)
            tail_lines = int(tail_lines)
            if tail_lines:
                encoding = data["encoding"]

                lines = self.tail(data["file"].name, encoding=encoding, window=tail_lines, position=current_position)
                if lines:
                    if self._file_map[fid]["multiline_regex_after"] or self._file_map[fid]["multiline_regex_before"]:
                        # Multiline is enabled for this file.
                        events = multiline_merge(
                            lines,
                            self._file_map[fid]["current_event"],
                            self._file_map[fid]["multiline_regex_after"],
                            self._file_map[fid]["multiline_regex_before"],
                        )
                    else:
                        events = lines
                    self._callback_wrapper(filename=data["file"].name, lines=events)

        self.unwatch_list(unwatch_list)

Example #9

0

Show file

    def _seek_to_end(self):
        unwatch_list = []

        # The first time we run the script we move all file markers at EOF.
        # In case of files created afterwards we don't do this.
        for fid, data in self._file_map.iteritems():
            self._logger.debug("[{0}] - getting start position {1}".format(fid, data['file'].name))
            start_position = self._beaver_config.get_field('start_position', data['file'].name)
            is_active = data['active']

            if self._sincedb_path:
                sincedb_start_position = self._sincedb_start_position(data['file'], fid=fid)
                if sincedb_start_position:
                    start_position = sincedb_start_position

            if start_position == "beginning":
                continue

            line_count = 0

            if str(start_position).isdigit():
                self._logger.debug("[{0}] - going to start position {1} for {2}".format(fid, start_position, data['file'].name))
                start_position = int(start_position)
                for encoding in ENCODINGS:
                    try:
                        line_count = 0
                        while data['file'].readline():
                            line_count += 1
                            if line_count == start_position:
                                break
                    except UnicodeDecodeError:
                        self._logger.debug("[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format(fid, data['file'].name, data['encoding']))
                        data['file'] = self.open(data['file'].name, encoding=encoding)
                        if not data['file']:
                            unwatch_list.append(fid)
                            is_active = False
                            break

                        data['encoding'] = encoding

                    if line_count != start_position:
                        self._logger.debug("[{0}] - file at different position than {1}, assuming manual truncate for {2}".format(fid, start_position, data['file'].name))
                        data['file'].seek(0, os.SEEK_SET)
                        start_position == "beginning"

            if not is_active:
                continue

            if start_position == "beginning":
                continue

            if start_position == "end":
                self._logger.debug("[{0}] - getting end position for {1}".format(fid, data['file'].name))
                for encoding in ENCODINGS:
                    try:
                        line_count = 0
                        while data['file'].readline():
                            line_count += 1
                        break
                    except UnicodeDecodeError:
                        self._logger.debug("[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format(fid, data['file'].name, data['encoding']))
                        data['file'] = self.open(data['file'].name, encoding=encoding)
                        if not data['file']:
                            unwatch_list.append(fid)
                            is_active = False
                            break

                        data['encoding'] = encoding

            if not is_active:
                continue

            current_position = data['file'].tell()
            self._logger.debug("[{0}] - line count {1} for {2}".format(fid, line_count, data['file'].name))
            self._sincedb_update_position(data['file'], fid=fid, lines=line_count, force_update=True)

            tail_lines = self._beaver_config.get_field('tail_lines', data['file'].name)
            tail_lines = int(tail_lines)
            if tail_lines:
                encoding = data['encoding']

                lines = self.tail(data['file'].name, encoding=encoding, window=tail_lines, position=current_position)
                if lines:
                    if self._file_map[fid]['multiline_regex_after'] or self._file_map[fid]['multiline_regex_before']:
                        # Multiline is enabled for this file.
                        events = multiline_merge(
                                lines,
                                self._file_map[fid]['current_event'],
                                self._file_map[fid]['multiline_regex_after'],
                                self._file_map[fid]['multiline_regex_before'])
                    else:
                        events = lines
                    self._callback_wrapper(filename=data['file'].name, lines=events)

        self.unwatch_list(unwatch_list)

Example #10

0

Show file

    def _seek_to_end(self):
        self._log_debug('seek_to_end')

        if self._sincedb_path:
            sincedb_start_position = self._sincedb_start_position()
            if sincedb_start_position:
                self._start_position = sincedb_start_position

        if self._start_position == 'beginning':
            self._log_debug('no start_position specified')
            return

        line_count = 0

        if str(self._start_position).isdigit():
            self._log_debug('going to start position {0}'.format(
                self._start_position))
            self._start_position = int(self._start_position)
            for encoding in ENCODINGS:
                line_count, encoded = self._seek_to_position(encoding=encoding,
                                                             position=True)
                if line_count is None and encoded is None:
                    return

                if encoded:
                    break

        if self._start_position == 'beginning':
            self._log_debug('Bad start position specified')
            return

        if self._start_position == 'end':
            self._log_debug('getting end position')
            for encoding in ENCODINGS:
                line_count, encoded = self._seek_to_position(encoding=encoding)
                if line_count is None and encoded is None:
                    return

                if encoded:
                    break

        current_position = self._file.tell()
        self._log_debug('line count {0}'.format(line_count))
        self._log_debug('current position {0}'.format(current_position))
        self._sincedb_update_position(lines=line_count, force_update=True)

        if self._tail_lines:
            self._log_debug('tailing {0} lines'.format(self._tail_lines))
            lines = self.tail(self._filename,
                              encoding=self._encoding,
                              window=self._tail_lines,
                              position=current_position)
            if lines:
                if self._multiline_regex_after or self._multiline_regex_before:
                    # Multiline is enabled for this file.
                    events = multiline_merge(lines, self._current_event,
                                             self._multiline_regex_after,
                                             self._multiline_regex_before)
                else:
                    events = lines
                self._callback_wrapper(events)

        return

Example #11

0

Show file

File: worker.py Project: timgriffiths/python-beaver

    def _seek_to_end(self):
        unwatch_list = []

        # The first time we run the script we move all file markers at EOF.
        # In case of files created afterwards we don't do this.
        for fid, data in self._file_map.iteritems():
            self._logger.debug("[{0}] - getting start position {1}".format(fid, data['file'].name))
            start_position = self._beaver_config.get_field('start_position', data['file'].name)
            is_active = data['active']

            if self._sincedb_path:
                sincedb_start_position = self._sincedb_start_position(data['file'], fid=fid)
                if sincedb_start_position:
                    start_position = sincedb_start_position

            if start_position == "beginning":
                continue

            line_count = 0

            if str(start_position).isdigit():
                self._logger.debug("[{0}] - going to start position {1} for {2}".format(fid, start_position, data['file'].name))
                start_position = int(start_position)
                for encoding in ENCODINGS:
                    try:
                        line_count = 0
                        while data['file'].readline():
                            line_count += 1
                            if line_count == start_position:
                                break
                    except UnicodeDecodeError:
                        self._logger.debug("[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format(fid, data['file'].name, data['encoding']))
                        data['file'] = self.open(data['file'].name, encoding=encoding)
                        if not data['file']:
                            unwatch_list.append(fid)
                            is_active = False
                            break

                        data['encoding'] = encoding

                    if line_count != start_position:
                        self._logger.debug("[{0}] - file at different position than {1}, assuming manual truncate for {2}".format(fid, start_position, data['file'].name))
                        data['file'].seek(0, os.SEEK_SET)
                        start_position == "beginning"

            if not is_active:
                continue

            if start_position == "beginning":
                continue

            if start_position == "end":
                self._logger.debug("[{0}] - getting end position for {1}".format(fid, data['file'].name))
                for encoding in ENCODINGS:
                    try:
                        line_count = 0
                        while data['file'].readline():
                            line_count += 1
                        break
                    except UnicodeDecodeError:
                        self._logger.debug("[{0}] - UnicodeDecodeError raised for {1} with encoding {2}".format(fid, data['file'].name, data['encoding']))
                        data['file'] = self.open(data['file'].name, encoding=encoding)
                        if not data['file']:
                            unwatch_list.append(fid)
                            is_active = False
                            break

                        data['encoding'] = encoding

            if not is_active:
                continue

            current_position = data['file'].tell()
            self._logger.debug("[{0}] - line count {1} for {2}".format(fid, line_count, data['file'].name))
            self._sincedb_update_position(data['file'], fid=fid, lines=line_count, force_update=True)
            # Reset this, so line added processed just after this initialization
            # will update the sincedb. Without this, if beaver run for less than
            # sincedb_write_interval it will always re-process the last lines.
            data['update_time'] = 0

            tail_lines = self._beaver_config.get_field('tail_lines', data['file'].name)
            tail_lines = int(tail_lines)
            if tail_lines:
                encoding = data['encoding']

                lines = self.tail(data['file'].name, encoding=encoding, window=tail_lines, position=current_position)
                if lines:
                    if self._file_map[fid]['multiline_regex_after'] or self._file_map[fid]['multiline_regex_before']:
                        # Multiline is enabled for this file.
                        events = multiline_merge(
                                lines,
                                self._file_map[fid]['current_event'],
                                self._file_map[fid]['multiline_regex_after'],
                                self._file_map[fid]['multiline_regex_before'])
                    else:
                        events = lines
                    
                    if events and self._file_map[fid]['include_filter_regex']:
                        events = include_filter_line(events, self._file_map[fid]['include_filter_regex'])

                    if events:
                        self._callback_wrapper(filename=data['file'].name, lines=events)

        self.unwatch_list(unwatch_list)