def test_counting_with_date_and_time_columns_without_groupby(self):
        q = get_queue()
        conf = {
            "logpath": "/tmp/count.log",
            "columns": ["c0", "c1", "date", "time", "method"],
            "delimiter": "\t",
            "date_column": "date",
            "time_column": "time",
            "counts": [{"column": "method", "match": "GET", "period": 1}],
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process
        time.sleep(0.1)

        messages = []
        while q.qsize() > 0:
            messages.append(q.get())

        result = map(
            lambda x: (
                datetime.datetime.utcfromtimestamp(x.content["interval_started_at"] / 1000).minute,
                x.content["aggregation_type"],
                x.content["value"],
            ),
            messages,
        )

        self.assertIn((7, "count", 1), result)
        self.assertIn((8, "count", 2), result)
        self.assertIn((9, "count", 0), result)
        self.assertIn((10, "count", 0), result)
        self.assertIn((11, "count", 0), result)
Exemple #2
0
    def test_counting_with_date_and_time_columns_without_groupby(self):
        q = get_queue()
        conf = {
            'logpath': '/tmp/count.log',
            'columns': ['c0', 'c1', 'date', 'time', 'method'],
            'delimiter': '\t',
            'date_column': 'date',
            'time_column': 'time',
            'counts': [{
                'column': 'method',
                'match': 'GET',
                'period': 1
            }]
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process
        time.sleep(0.1)

        messages = []
        while q.qsize() > 0:
            messages.append(q.get())

        result = map(
            lambda x: (datetime.datetime.utcfromtimestamp(x.content[
                'interval_started_at'] / 1000).minute, x.content[
                    'aggregation_type'], x.content['value']), messages)

        self.assertIn((7, 'count', 1), result)
        self.assertIn((8, 'count', 2), result)
        self.assertIn((9, 'count', 0), result)
        self.assertIn((10, 'count', 0), result)
        self.assertIn((11, 'count', 0), result)
Exemple #3
0
    def test_summing_2_columns_without_groupby(self):
        q = get_queue()
        conf = {
            'logpath':
            '/tmp/sum.log',
            'columns': ['c0', 'c1', 'datetime', 'primes', 'evens'],
            'delimiter':
            '\t',
            'datetime_column':
            'datetime',
            'sums': [{
                'column': 'primes',
                'period': 1
            }, {
                'column': 'evens',
                'period': 1
            }]
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process
        time.sleep(0.1)

        messages = []
        while q.qsize() > 0:
            messages.append(q.get())

        # it should generate messages for the
        # minutes: 7, 8, 9, 10, 11 (x 2, since there are 2 sums)
        self.assertEqual(10, len(messages))

        evens = filter(lambda x: x.content['column_name'] == 'evens', messages)
        primes = filter(lambda x: x.content['column_name'] == 'primes',
                        messages)

        # assert that all minutes were delivered for each sum
        self.assertEqual(5, len(primes))
        self.assertEqual(5, len(evens))

        result = map(
            lambda x: (datetime.datetime.utcfromtimestamp(x.content[
                'interval_started_at'] / 1000).minute, x.content[
                    'aggregation_type'], x.content['value']), primes)
        self.assertIn((7, 'sum', 5), result)
        self.assertIn((8, 'sum', 18), result)
        self.assertIn((9, 'sum', 0), result)
        self.assertIn((10, 'sum', 0), result)
        self.assertIn((11, 'sum', 13), result)

        result = map(
            lambda x: (datetime.datetime.utcfromtimestamp(x.content[
                'interval_started_at'] / 1000).minute, x.content[
                    'aggregation_type'], x.content['value']), evens)
        self.assertIn((7, 'sum', 4), result)
        self.assertIn((8, 'sum', 6), result)
        self.assertIn((9, 'sum', 0), result)
        self.assertIn((10, 'sum', 0), result)
        self.assertIn((11, 'sum', 2), result)
    def test_summing_2_columns_without_groupby(self):
        q = get_queue()
        conf = {
            "logpath": "/tmp/sum.log",
            "columns": ["c0", "c1", "datetime", "primes", "evens"],
            "delimiter": "\t",
            "datetime_column": "datetime",
            "sums": [{"column": "primes", "period": 1}, {"column": "evens", "period": 1}],
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process
        time.sleep(0.1)

        messages = []
        while q.qsize() > 0:
            messages.append(q.get())

        # it should generate messages for the
        # minutes: 7, 8, 9, 10, 11 (x 2, since there are 2 sums)
        self.assertEqual(10, len(messages))

        evens = filter(lambda x: x.content["column_name"] == "evens", messages)
        primes = filter(lambda x: x.content["column_name"] == "primes", messages)

        # assert that all minutes were delivered for each sum
        self.assertEqual(5, len(primes))
        self.assertEqual(5, len(evens))

        result = map(
            lambda x: (
                datetime.datetime.utcfromtimestamp(x.content["interval_started_at"] / 1000).minute,
                x.content["aggregation_type"],
                x.content["value"],
            ),
            primes,
        )
        self.assertIn((7, "sum", 5), result)
        self.assertIn((8, "sum", 18), result)
        self.assertIn((9, "sum", 0), result)
        self.assertIn((10, "sum", 0), result)
        self.assertIn((11, "sum", 13), result)

        result = map(
            lambda x: (
                datetime.datetime.utcfromtimestamp(x.content["interval_started_at"] / 1000).minute,
                x.content["aggregation_type"],
                x.content["value"],
            ),
            evens,
        )
        self.assertIn((7, "sum", 4), result)
        self.assertIn((8, "sum", 6), result)
        self.assertIn((9, "sum", 0), result)
        self.assertIn((10, "sum", 0), result)
        self.assertIn((11, "sum", 2), result)
    def test_reading_log_with_delimiters_and_columns_and_saving_into_queue(self):
        q = get_queue()
        conf = {"logpath": self.logpath, "delimiter": "\t", "columns": ["col0", "col1", "col2"]}
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process log lines
        time.sleep(0.1)

        msg = q.get()
        self.assertEqual({"col0": "a", "col1": "b", "col2": "c"}, msg.content)

        msg = q.get()
        self.assertEqual({"col0": "x", "col1": "y", "col2": "z"}, msg.content)
Exemple #6
0
    def test_reading_log_with_delimiters_and_saving_into_queue(self):
        # starting reader
        q = get_queue()
        conf = {'logpath': self.logpath, 'delimiter': '\t'}
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process log lines
        time.sleep(0.1)

        msg = q.get()
        self.assertEqual(['a', 'b', 'c'], msg.content)

        msg = q.get()
        self.assertEqual(['x', 'y', 'z'], msg.content)
    def test_reading_log_with_delimiters_and_saving_into_queue(self):
        # starting reader
        q = get_queue()
        conf = {"logpath": self.logpath, "delimiter": "\t"}
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process log lines
        time.sleep(0.1)

        msg = q.get()
        self.assertEqual(["a", "b", "c"], msg.content)

        msg = q.get()
        self.assertEqual(["x", "y", "z"], msg.content)
    def test_reading_log_and_saving_into_queue(self):
        # starting reader
        q = get_queue()
        conf = {"logpath": self.logpath, "checkpoint_path": self.reader_checkpoint, "checkpoint_enabled": True}
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process log lines
        time.sleep(0.1)

        msg = q.get()
        self.assertEqual("a\tb\tc\n", msg.content)

        msg = q.get()
        self.assertEqual("x\ty\tz\n", msg.content)
Exemple #9
0
    def test_couting_with_groupby_and_regexp(self):
        q = get_queue()
        conf = {
            'logpath':
            '/tmp/count.log',
            'columns': ['host', 'unknown', 'datetime', 'method'],
            'delimiter':
            '\t',
            'datetime_column':
            'datetime',
            'counts': [{
                'column': 'method',
                'match': 'GET',
                'period': 1,
                'groupby': {
                    'column': 'host',
                    'match': '^(host\d).*$'
                }
            }]
        }

        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process
        time.sleep(0.1)

        messages = []

        while q.qsize() > 0:
            messages.append(q.get())

        result = map(
            lambda x:
            (x.content['host'], x.content['aggregation_type'],
             datetime.datetime.utcfromtimestamp(x.content[
                 'interval_started_at'] / 1000).minute, x.content['value']),
            messages)
        self.assertIn(("host1", "count", 7, 2), result)
        self.assertIn(("host1", "count", 8, 0), result)
        self.assertIn(("host1", "count", 9, 1), result)
        self.assertIn(("host1", "count", 10, 0), result)
        self.assertIn(("host1", "count", 11, 0), result)
        self.assertIn(("host2", "count", 8, 0), result)
        self.assertIn(("host2", "count", 9, 0), result)
        self.assertIn(("host2", "count", 10, 0), result)
        self.assertIn(("host2", "count", 11, 0), result)
        self.assertIn(("host3", "count", 11, 1), result)
    def test_saving_checkpoint_in_bytes_read(self):
        # starting reader
        f = open(self.reader_checkpoint, "rb")
        q = get_queue()
        conf = {"logpath": self.logpath, "checkpoint_path": self.reader_checkpoint, "checkpoint_enabled": True}
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process log lines
        time.sleep(0.1)

        msg = q.get()
        self.assertEqual(6, msg.checkpoint["bytes_read"])

        msg = q.get()
        self.assertEqual(12, msg.checkpoint["bytes_read"])
    def test_couting_with_groupby_and_regexp(self):
        q = get_queue()
        conf = {
            "logpath": "/tmp/count.log",
            "columns": ["host", "unknown", "datetime", "method"],
            "delimiter": "\t",
            "datetime_column": "datetime",
            "counts": [
                {
                    "column": "method",
                    "match": "GET",
                    "period": 1,
                    "groupby": {"column": "host", "match": "^(host\d).*$"},
                }
            ],
        }

        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process
        time.sleep(0.1)

        messages = []

        while q.qsize() > 0:
            messages.append(q.get())

        result = map(
            lambda x: (
                x.content["host"],
                x.content["aggregation_type"],
                datetime.datetime.utcfromtimestamp(x.content["interval_started_at"] / 1000).minute,
                x.content["value"],
            ),
            messages,
        )
        self.assertIn(("host1", "count", 7, 2), result)
        self.assertIn(("host1", "count", 8, 0), result)
        self.assertIn(("host1", "count", 9, 1), result)
        self.assertIn(("host1", "count", 10, 0), result)
        self.assertIn(("host1", "count", 11, 0), result)
        self.assertIn(("host2", "count", 8, 0), result)
        self.assertIn(("host2", "count", 9, 0), result)
        self.assertIn(("host2", "count", 10, 0), result)
        self.assertIn(("host2", "count", 11, 0), result)
        self.assertIn(("host3", "count", 11, 1), result)
Exemple #12
0
    def test_summing_with_groupby(self):
        q = get_queue()
        conf = {
            'logpath':
            '/tmp/sum.log',
            'columns': ['host', 'unknown', 'datetime', 'x'],
            'delimiter':
            '\t',
            'datetime_column':
            'datetime',
            'sums': [{
                'column': 'x',
                'period': 1,
                'groupby': {
                    'column': 'host',
                    'match': '(.*)'
                }
            }]
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process
        time.sleep(0.1)

        messages = []
        while q.qsize() > 0:
            messages.append(q.get())

        result = map(
            lambda x: (x.content['host'],
                       datetime.datetime.utcfromtimestamp(x.content[
                           'interval_started_at'] / 1000).minute, x.content[
                               'aggregation_type'], x.content['value']),
            messages)
        self.assertIn(("host1", 7, "sum", 5), result)
        self.assertIn(("host1", 8, "sum", 0), result)
        self.assertIn(("host1", 9, "sum", 0), result)
        self.assertIn(("host1", 10, "sum", 42), result)
        self.assertIn(("host1", 11, "sum", 0), result)
        self.assertIn(("host2", 7, "sum", 5.1), result)
        self.assertIn(("host2", 8, "sum", 0), result)
        self.assertIn(("host2", 9, "sum", 0), result)
        self.assertIn(("host2", 10, "sum", 0), result)
        self.assertIn(("host2", 11, "sum", 0), result)
        self.assertIn(("host3", 11, "sum", 2), result)
Exemple #13
0
    def test_reading_log_and_saving_into_queue(self):
        # starting reader
        q = get_queue()
        conf = {
            'logpath': self.logpath,
            'checkpoint_path': self.reader_checkpoint,
            'checkpoint_enabled': True
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process log lines
        time.sleep(0.1)

        msg = q.get()
        self.assertEqual('a\tb\tc\n', msg.content)

        msg = q.get()
        self.assertEqual('x\ty\tz\n', msg.content)
Exemple #14
0
    def test_reading_log_with_delimiters_and_columns_and_saving_into_queue(
            self):
        q = get_queue()
        conf = {
            'logpath': self.logpath,
            'delimiter': '\t',
            'columns': ['col0', 'col1', 'col2']
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process log lines
        time.sleep(0.1)

        msg = q.get()
        self.assertEqual({'col0': 'a', 'col1': 'b', 'col2': 'c'}, msg.content)

        msg = q.get()
        self.assertEqual({'col0': 'x', 'col1': 'y', 'col2': 'z'}, msg.content)
Exemple #15
0
    def test_saving_checkpoint_in_bytes_read(self):
        # starting reader
        f = open(self.reader_checkpoint, 'rb')
        q = get_queue()
        conf = {
            'logpath': self.logpath,
            'checkpoint_path': self.reader_checkpoint,
            'checkpoint_enabled': True
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process log lines
        time.sleep(0.1)

        msg = q.get()
        self.assertEqual(6, msg.checkpoint['bytes_read'])

        msg = q.get()
        self.assertEqual(12, msg.checkpoint['bytes_read'])
    def test_removing_log_file_during_reading(self):
        q = get_queue()
        retry_log = "/tmp/retry.log"
        conf = {
            "logpath": retry_log,
            "checkpoint_path": self.reader_checkpoint,
            "retry_open_file_period": 1,
            "period": 1,
            "checkpoint_enabled": True,
        }

        def remove_file():
            os.remove(retry_log)

        def create_file():
            open(retry_log, "w").close()

        create_file()
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # starting overhead
        time.sleep(0.1)

        self.assertEqual(False, myreader.log_not_found)

        remove_file()

        # time to perceive file removing
        time.sleep(1)

        self.assertEqual(True, myreader.log_not_found)

        create_file()

        # waiting retry open file period
        time.sleep(1.1)

        # asserting file was found
        self.assertEqual(False, myreader.log_not_found)
Exemple #17
0
    def test_removing_log_file_during_reading(self):
        q = get_queue()
        retry_log = '/tmp/retry.log'
        conf = {
            'logpath': retry_log,
            'checkpoint_path': self.reader_checkpoint,
            'retry_open_file_period': 1,
            'period': 1,
            'checkpoint_enabled': True
        }

        def remove_file():
            os.remove(retry_log)

        def create_file():
            open(retry_log, 'w').close()

        create_file()
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # starting overhead
        time.sleep(0.1)

        self.assertEqual(False, myreader.log_not_found)

        remove_file()

        # time to perceive file removing
        time.sleep(1)

        self.assertEqual(True, myreader.log_not_found)

        create_file()

        # waiting retry open file period
        time.sleep(1.1)

        # asserting file was found
        self.assertEqual(False, myreader.log_not_found)
    def test_summing_with_groupby(self):
        q = get_queue()
        conf = {
            "logpath": "/tmp/sum.log",
            "columns": ["host", "unknown", "datetime", "x"],
            "delimiter": "\t",
            "datetime_column": "datetime",
            "sums": [{"column": "x", "period": 1, "groupby": {"column": "host", "match": "(.*)"}}],
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process
        time.sleep(0.1)

        messages = []
        while q.qsize() > 0:
            messages.append(q.get())

        result = map(
            lambda x: (
                x.content["host"],
                datetime.datetime.utcfromtimestamp(x.content["interval_started_at"] / 1000).minute,
                x.content["aggregation_type"],
                x.content["value"],
            ),
            messages,
        )
        self.assertIn(("host1", 7, "sum", 5), result)
        self.assertIn(("host1", 8, "sum", 0), result)
        self.assertIn(("host1", 9, "sum", 0), result)
        self.assertIn(("host1", 10, "sum", 42), result)
        self.assertIn(("host1", 11, "sum", 0), result)
        self.assertIn(("host2", 7, "sum", 5.1), result)
        self.assertIn(("host2", 8, "sum", 0), result)
        self.assertIn(("host2", 9, "sum", 0), result)
        self.assertIn(("host2", 10, "sum", 0), result)
        self.assertIn(("host2", 11, "sum", 0), result)
        self.assertIn(("host3", 11, "sum", 2), result)
Exemple #19
0
    def test_counting_2_columns_without_groupby(self):

        q = get_queue()
        conf = {
            'logpath':
            '/tmp/count.log',
            'columns': ['c0', 'c1', 'datetime', 'method', 'status'],
            'delimiter':
            '\t',
            'datetime_column':
            'datetime',
            'counts': [{
                'column': 'method',
                'match': 'GET',
                'period': 1,
            }, {
                'column': 'status',
                'match': '200',
                'period': 1,
            }]
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process
        time.sleep(0.1)

        messages = []
        while q.qsize() > 0:
            messages.append(q.get())

        # it should generate messages for the
        # minutes: 7, 8, 9, 10, 11 (x 2, since there are 2 counts)
        self.assertEqual(10, len(messages))

        status = filter(lambda x: x.content['column_name'] == 'status',
                        messages)
        methods = filter(lambda x: x.content['column_name'] == 'method',
                         messages)

        # assert that all minutes were delivered for each count
        self.assertEqual(5, len(status))
        self.assertEqual(5, len(methods))

        result = map(
            lambda x: (datetime.datetime.utcfromtimestamp(x.content[
                'interval_started_at'] / 1000).minute, x.content[
                    'aggregation_type'], x.content['value']), methods)
        self.assertIn((7, 'count', 1), result)
        self.assertIn((8, 'count', 2), result)
        self.assertIn((9, 'count', 0), result)
        self.assertIn((10, 'count', 0), result)
        self.assertIn((11, 'count', 0), result)

        result = map(
            lambda x: (datetime.datetime.utcfromtimestamp(x.content[
                'interval_started_at'] / 1000).minute, x.content[
                    'aggregation_type'], x.content['value']), status)
        self.assertIn((7, 'count', 1), result)
        self.assertIn((8, 'count', 1), result)
        self.assertIn((9, 'count', 0), result)
        self.assertIn((10, 'count', 0), result)
        self.assertIn((11, 'count', 0), result)
    def test_counting_2_columns_without_groupby(self):

        q = get_queue()
        conf = {
            "logpath": "/tmp/count.log",
            "columns": ["c0", "c1", "datetime", "method", "status"],
            "delimiter": "\t",
            "datetime_column": "datetime",
            "counts": [
                {"column": "method", "match": "GET", "period": 1},
                {"column": "status", "match": "200", "period": 1},
            ],
        }
        myreader = LogReader(q, conf=conf)
        myreader.start()

        # time to process
        time.sleep(0.1)

        messages = []
        while q.qsize() > 0:
            messages.append(q.get())

        # it should generate messages for the
        # minutes: 7, 8, 9, 10, 11 (x 2, since there are 2 counts)
        self.assertEqual(10, len(messages))

        status = filter(lambda x: x.content["column_name"] == "status", messages)
        methods = filter(lambda x: x.content["column_name"] == "method", messages)

        # assert that all minutes were delivered for each count
        self.assertEqual(5, len(status))
        self.assertEqual(5, len(methods))

        result = map(
            lambda x: (
                datetime.datetime.utcfromtimestamp(x.content["interval_started_at"] / 1000).minute,
                x.content["aggregation_type"],
                x.content["value"],
            ),
            methods,
        )
        self.assertIn((7, "count", 1), result)
        self.assertIn((8, "count", 2), result)
        self.assertIn((9, "count", 0), result)
        self.assertIn((10, "count", 0), result)
        self.assertIn((11, "count", 0), result)

        result = map(
            lambda x: (
                datetime.datetime.utcfromtimestamp(x.content["interval_started_at"] / 1000).minute,
                x.content["aggregation_type"],
                x.content["value"],
            ),
            status,
        )
        self.assertIn((7, "count", 1), result)
        self.assertIn((8, "count", 1), result)
        self.assertIn((9, "count", 0), result)
        self.assertIn((10, "count", 0), result)
        self.assertIn((11, "count", 0), result)