def test_parseline_upstream_no_status(self): line = "1|1611749281.126|critiquebrainz.org|s|-|499|0|-|408|0.414|10.2.2.39:13032|-|0.412|-|-" row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertIn('upstreams', row) addr = '10.2.2.39:13032' self.assertIn(addr, row['upstreams']['response_time']) self.assertNotIn(addr, row['upstreams']['connect_time']) self.assertNotIn(addr, row['upstreams']['header_time']) self.assertIn(addr, row['upstreams']['servers']) self.assertIn(addr, row['upstreams']['status'])
def test_parseline_upstream_addr_valid_one_redirect(self): line = self.get_sample_line(PosField.upstream_addr, replace_with='6.6.6.6 : 7.7.7.7') row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertIn('upstreams', row) self.assertIn('6.6.6.6', row['upstreams']['status']) self.assertIn('6.6.6.6', row['upstreams']['response_time']) self.assertIn('6.6.6.6', row['upstreams']['connect_time']) self.assertIn('6.6.6.6', row['upstreams']['header_time']) self.assertEqual(row['upstreams']['servers_contacted'], 1) self.assertEqual(row['upstreams']['internal_redirects'], 1) self.assertIn('6.6.6.6', row['upstreams']['servers'])
def test_parseline_upstream_addr_valid_multiple(self): line = "1|1611817838.597|musicbrainz.org|-|ws|200|6110|-|516|0.726|10.2.2.36:65412 : 195.201.47.148:80|200 : 200|0.012 : 0.712|0.000 : 0.000|0.012 : 0.712" row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertIn('upstreams', row) for addr in ('10.2.2.36:65412', '195.201.47.148:80'): self.assertIn(addr, row['upstreams']['response_time']) self.assertIn(addr, row['upstreams']['connect_time']) self.assertIn(addr, row['upstreams']['header_time']) self.assertIn(addr, row['upstreams']['response_time_count']) self.assertIn(addr, row['upstreams']['connect_time_count']) self.assertIn(addr, row['upstreams']['header_time_count']) self.assertIn(addr, row['upstreams']['servers']) self.assertIn(addr, row['upstreams']['status']) self.assertEqual(row['upstreams']['status'][addr], {'200': 1}) self.assertEqual(row['upstreams']['servers_contacted'], 1) self.assertEqual(row['upstreams']['internal_redirects'], 1)
def test_parseline_2(self): line = ("1|1612013386.275|coverartarchive.org|s|-|502|664|-|49|0.019|10.2.2.23:62080, 10.2.2.37:62080, 10.2.2.40:62080," " caa-redirect|502, 502, 502, 502|0.000, 0.000, 0.000, 0.000|0.000, 0.000, 0.000, -|-, -, -, -") row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) expected = { 'connect_time': { '10.2.2.23:62080': 0.0, '10.2.2.37:62080': 0.0, '10.2.2.40:62080': 0.0 }, 'connect_time_count': { '10.2.2.23:62080': 1, '10.2.2.37:62080': 1, '10.2.2.40:62080': 1 }, 'header_time': {}, 'header_time_count': {}, 'internal_redirects': 0, 'response_time': { '10.2.2.23:62080': 0.0, '10.2.2.37:62080': 0.0, '10.2.2.40:62080': 0.0, 'caa-redirect': 0.0 }, 'response_time_count': { '10.2.2.23:62080': 1, '10.2.2.37:62080': 1, '10.2.2.40:62080': 1, 'caa-redirect': 1 }, 'servers': [ '10.2.2.23:62080', '10.2.2.37:62080', '10.2.2.40:62080', 'caa-redirect' ], 'servers_contacted': 4, 'status': { '10.2.2.23:62080': {'502': 1}, '10.2.2.37:62080': {'502': 1}, '10.2.2.40:62080': {'502': 1}, 'caa-redirect': {'502': 1} } } self.assertEqual(row['upstreams'], expected) self.assertEqual(last_msec, 1612013386.275)
def test_parseline(self): line = '1|1568962563.374|musicbrainz.org|s|ws|200|2799|2.5|289|0.026|10.2.2.31:65412|200|0.024|0.000|0.024' row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) expected = { 'vhost': 'musicbrainz.org', 'protocol': 's', 'loctag': 'ws', 'status': 200, 'bytes_sent': 2799, 'request_length': 289, 'gzip_ratio': 2.5, 'request_time': 0.026, 'upstreams': { 'servers_contacted': 1, 'internal_redirects': 0, 'status': { '10.2.2.31:65412': { '200': 1 } }, 'response_time': { '10.2.2.31:65412': 0.024 }, 'response_time_count': { '10.2.2.31:65412': 1 }, 'connect_time': { '10.2.2.31:65412': 0.0 }, 'connect_time_count': { '10.2.2.31:65412': 1 }, 'header_time': { '10.2.2.31:65412': 0.024 }, 'header_time_count': { '10.2.2.31:65412': 1 }, 'servers': ['10.2.2.31:65412'] } } self.assertEqual(row, expected) self.assertEqual(last_msec, 1568962563.374) self.assertEqual(bucket, 1568962564)
def test_parseline_upstream_header_time_invalid(self): line = self.get_sample_line(PosField.upstream_header_time, replace_with='xxx') with self.assertRaisesRegex(ParseSkip, "^could not convert string to float: 'xxx'$"): row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0)
def test_parseline_upstream_addr_invalid(self): line = self.get_sample_line(PosField.upstream_addr, replace_with='-') row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertNotIn('upstreams', row)
def test_parseline_upstream_status_valid(self): line = self.get_sample_line(PosField.upstream_status, replace_with='444') row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertEqual(row['upstreams']['status']['10.2.2.31:65412'], {'444': 1})
def test_parseline_request_time_valid(self): line = self.get_sample_line(PosField.request_time, replace_with='12.34') row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertEqual(row['request_time'], 12.34)
def test_parseline_bytes_sent_valid(self): line = self.get_sample_line(PosField.bytes_sent, replace_with='12345') row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertEqual(row['bytes_sent'], 12345)
def test_parseline_status_valid(self): line = self.get_sample_line(PosField.status, replace_with='444') row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertEqual(row['status'], 444)
def test_parseline_loctag(self): line = self.get_sample_line(PosField.loctag, replace_with='xxx') row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertEqual(row['loctag'], 'xxx')
def test_parseline_msec_invalid(self): line = self.get_sample_line(PosField.msec) with self.assertRaisesRegex(ParseSkip, "^could not convert string to float: 'xxx'"): row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0)
def test_parseline_msec_valid(self): row, last_msec, bucket = parseline(self.sample_line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertEqual(last_msec, 1568962563.374)
def test_parseline_version_invalid(self): line = self.get_sample_line(PosField.version) with self.assertRaisesRegex(ParseSkip, "^invalid log version: xxx$"): row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0)
def test_parseline_upstream_header_time_valid(self): line = self.get_sample_line(PosField.upstream_header_time, replace_with='12.34') row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0) self.assertEqual(row['upstreams']['header_time']['10.2.2.31:65412'], 12.34)
def test_parser_all(self): storage = get_storage() mbs = mbsdict() bucket_duration = 5 status = get_default_status(bucket_duration, 0, 30) sample_line = '1|1568962563.374|musicbrainz.org|s|ws|200|2799|2.5|289|0.026|10.2.2.31:65412|200|0.027|0.057|0.024' start = 1568962563.374 for i in range(20): line = self.get_sample_line(PosField.msec, replace_with=str(start), line=sample_line) if i == 3: # just to ensure upstream header time absence gives correct result line = self.get_sample_line(PosField.upstream_header_time, replace_with='-', line=line) elif i == 6: line = self.get_sample_line(PosField.upstream_header_time, replace_with='x', line=line) elif i >= 11 and i <= 13: # empty upstream header time, that could lead to division by zero line = self.get_sample_line(PosField.upstream_addr, replace_with='10.2.2.31:65412, 10.2.2.32:65412', line=line) line = self.get_sample_line(PosField.upstream_status, replace_with='500, 500', line=line) line = self.get_sample_line(PosField.upstream_response_time, replace_with='0.000, 0.000', line=line) line = self.get_sample_line(PosField.upstream_connect_time, replace_with='0.000, 0.000', line=line) line = self.get_sample_line(PosField.upstream_header_time, replace_with='-, -', line=line) if i % 2: line = self.get_sample_line(PosField.status, replace_with='302', line=line) try: row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=bucket_duration, last_msec=0) storage[bucket].append(row) self.assertEqual(last_msec, start) except ParseSkip: pass start += bucket_duration / 3.0 buckets = list(storage) self.assertEqual(buckets, [313792513, 313792514, 313792515, 313792516, 313792517, 313792518, 313792519, 313792520]) self.assertEqual(list(map(len, storage.values())), [1, 3, 2, 3, 3, 3, 3, 1]) self.assertEqual(list(map(lambda v: bucket2time(v, bucket_duration), storage)), ['2019-09-20T06:56:05Z', '2019-09-20T06:56:10Z', '2019-09-20T06:56:15Z', '2019-09-20T06:56:20Z', '2019-09-20T06:56:25Z', '2019-09-20T06:56:30Z', '2019-09-20T06:56:35Z', '2019-09-20T06:56:40Z']) # process first bucket current = buckets[0] key = (current, 'musicbrainz.org', 's', 'ws') self.assertNotIn(key, mbs['bytes_sent']) process_bucket(current, storage, status, mbs) self.assertEqual(len(storage[current]), 0) self.assertIn(key, mbs['bytes_sent']) self.assertEqual(mbs['bytes_sent'][key], 2799) # process second bucket current = buckets[1] key = (current, 'musicbrainz.org', 's', 'ws') self.assertNotIn(key, mbs['bytes_sent']) process_bucket(current, storage, status, mbs) self.assertEqual(len(storage[current]), 0) self.assertIn(key, mbs['bytes_sent']) self.assertEqual(mbs['bytes_sent'][key], 2799 * 3) upstream_key = (current, 'musicbrainz.org', 's', 'ws', '10.2.2.31:65412') # NOTE: for this bucket, we actually made a variation for upstream header time (si if i == 3 above) # NOTE: float imprecision can lead to a result not exactly equal to expected value self.assertAlmostEqual(mbs['_upstreams_header_time_premean'][upstream_key], 0.0480, places=4) self.assertEqual(mbs['_upstreams_header_time_count_premean'][upstream_key], 2) # process third bucket current = buckets[2] key = (current, 'musicbrainz.org', 's', 'ws') self.assertNotIn(key, mbs['bytes_sent']) process_bucket(current, storage, status, mbs) self.assertEqual(len(storage[current]), 0) self.assertIn(key, mbs['bytes_sent']) # NOTE: one was skipped due to forced parse error (see if i == 6 above) self.assertEqual(mbs['bytes_sent'][key], 2799 * 2) upstream_key = (current, 'musicbrainz.org', 's', 'ws', '10.2.2.31:65412') # NOTE: float imprecision can lead to a result not exactly equal to expected value self.assertAlmostEqual(mbs['_upstreams_header_time_premean'][upstream_key], 0.0480, places=4) self.assertEqual(mbs['_upstreams_header_time_count_premean'][upstream_key], 2) # process fourth bucket current = buckets[3] key = (current, 'musicbrainz.org', 's', 'ws') self.assertNotIn(key, mbs['bytes_sent']) process_bucket(current, storage, status, mbs) self.assertEqual(len(storage[current]), 0) self.assertIn(key, mbs['bytes_sent']) self.assertEqual(mbs['bytes_sent'][key], 2799 * 3) upstream_key = (current, 'musicbrainz.org', 's', 'ws', '10.2.2.31:65412') # NOTE: float imprecision can lead to a result not exactly equal to expected value self.assertAlmostEqual(mbs['_upstreams_header_time_premean'][upstream_key], 0.0720, places=4) self.assertEqual(mbs['_upstreams_header_time_count_premean'][upstream_key], 3) # now process all buckets for bucket in buckets: process_bucket(bucket, storage, status, mbs) mbspostprocess(mbs) count_200 = 0 count_302 = 0 for bucket in buckets: key = (bucket, 'musicbrainz.org', 's', 'ws', 200) count_200 += mbs['status'][key] key = (bucket, 'musicbrainz.org', 's', 'ws', 302) count_302 += mbs['status'][key] # 20 lines, one was skipped due to forced parse error, we expect 19 total self.assertEqual(count_200, 9) self.assertEqual(count_302, 10)
def test_parseline_upstream_status_invalid(self): line = self.get_sample_line(PosField.upstream_status, replace_with='xxx') with self.assertRaisesRegex(ParseSkip, "^invalid literal for int\(\) with base 10: 'xxx'$"): row, last_msec, bucket = parseline(line, ignore_before=0, bucket_duration=1, last_msec=0)