def test_ttop_allocation_report(self): """ttop parser allocation test""" test_file = os.path.join(current_dir(__file__), "testdata", "ttop-cpu.out") ttop = TTopAnalyzer([test_file]) def run(): ttop.print_report(alloc=True) self.maxDiff = None output = steal_output(run) self.assertIn( """================================================================================ CoreThread 20 2.14 mb ------------- RMI TCP Connection(2) 4 1.14 mb ------- DseGossipStateUpdater 4 38.00 kb ScheduledTasks 4 24.00 kb NodeHealthPlugin-Scheduler-thread 4 19.00 kb ContainerBackgroundProcessor[StandardEngine[Solr]] 4 6.01 kb JMX server connection timeout 425 4 4.18 kb PO-thread 4 2.47 kb AsyncAppender-Worker-ASYNCDEBUGLOG 4 1.90 kb LeasePlugin 4 1.38 kb NonPeriodicTasks 4 1.07 kb GossipTasks 4 841 bytes RxSchedulerPurge 4 710 bytes OptionalTasks 4 323 bytes ParkedThreadsMonitor 4 317 bytes internode-messaging RemoteMessageServer acceptor 4 0 byte""", output, )
def test_sperf_68(self): """integration test, this is not the best test and only verifies no change in calculations as changes in the codebase occur.""" args = types.SimpleNamespace() args.diag_dir = os.path.join(current_dir(__file__), "testdata", "dse68") args.files = [] args.top = 3 args.interval = 3600 args.start = None args.end = None def run(): slowquery.run(args) output = steal_output(run) # reads better with the extra newline self.assertEqual( output, "sperf core slowquery version: %s" % (VERSION) + """ . <5073ms + >5073ms ! >5073ms X >5073ms ------------------------------ 2020-07-22 13:39:05.889000+00:00 X worst period: 2020-07-22 13:39:05.889000+00:00 (5074ms) 1 slow queries, 0 cross-node Top 3 slow queries: ------------------------------ 5074ms: <SELECT config FROM dse_insights.insights_config WHERE key = 1>""", )
def test_sperf(self): """integration test""" args = make_67_diag_args() def run(): sperf_default.run(args) output = steal_output(run) # reads better with the extra newline self.assertEqual( "\n" + output, """ nodes 3 dse version(s) (startup logs) { 6.7.7 } cassandra version(s) (startup logs) { DSE private fork } solr version(s) (startup logs) { 6.0.1.2.2647 } spark version(s) (startup logs) { 2.2.3.9 } worst gc pause (system logs) 800.00 ms (10.101.35.71) worst read latency (cfstats) 6.11 ms (system_schema.keyspaces 10.101.35.102) worst write latency (cfstats) 4.72 ms (system_schema.dropped_columns 10.101.35.102) worst tombstones query (cfstats) 319 (system_distributed.nodesync_status 10.101.35.71) worst live cells query (cfstats) 447 (system_schema.columns 10.101.35.71) largest table (cfstats) my_solr.my_table (133.46 mb 10.101.35.102) busiest table reads (cfstats) my_solr.my_table (99.78% of reads) busiest table writes (cfstats) my_solr.my_table (95.80% of writes) largest partition (cfstats) 28.83 kb system.size_estimates (10.101.35.71) errors parsing -------------- No parsing errors recommendations --------------- * There were 16 incidents of GC over 500ms. Run `sperf core gc` for more analysis.""", )
def test_run_jarcheck_with_empty_file(self): """integration test for a single file with no jar data in it""" test_file_1 = os.path.join(current_dir(__file__), "testdata", "empty.log") parser = jarcheck.JarCheckParser(files=[test_file_1]) output = steal_output(parser.print_report) self.assertEqual(output, "Analyzed 1 file")
def test_ttop_report(self): """ttop parser test""" test_file = os.path.join(current_dir(__file__), "testdata", "ttop-cpu.out") ttop = TTopAnalyzer([test_file]) output = steal_output(ttop.print_report) self.maxDiff = None self.assertIn( """2020-01-09 16:08:06 Threads CPU% Total: 28.06% ================================================================================ ParkedThreadsMonitor 4 23.52 ----------------- RMI TCP Connection(2) 4 2.90 -- CoreThread 20 1.20 - DseGossipStateUpdater 4 0.10 ScheduledTasks 4 0.08 NodeHealthPlugin-Scheduler-thread 4 0.06 OptionalTasks 4 0.05 JMX server connection timeout 425 4 0.04 ContainerBackgroundProcessor[StandardEngine[Solr]] 4 0.02 PO-thread 4 0.02 GossipTasks 4 0.01 AsyncAppender-Worker-ASYNCDEBUGLOG 4 0.01 LeasePlugin 4 0.01 NonPeriodicTasks 4 0.01 RxSchedulerPurge 4 0.01 internode-messaging RemoteMessageServer acceptor 4 0.00""", output, )
def test_ttop_allocation_report(self): """ttop parser allocation test""" test_file = os.path.join(current_dir(__file__), "testdata", "ttop-cpu.out") ttop = TTopAnalyzer([test_file]) def run(): ttop.print_report(alloc=True) self.maxDiff = None output = steal_output(run) self.assertIn( """2020-01-09 16:08:46 Threads Alloc/s Total: 3.24 mb ================================================================================ CoreThread 7 2.15 mb ------------- RMI TCP Connection(2) 1 1.05 mb ------ ScheduledTasks 1 24.00 kb ContainerBackgroundProcessor[StandardEngine[Solr]] 1 4.18 kb JMX server connection timeout 425 1 4.08 kb BatchlogTasks 1 1.55 kb LeasePlugin 1 1.49 kb GossipTasks 1 916 bytes RxSchedulerPurge 1 646 bytes ParkedThreadsMonitor 1 317 bytes OptionalTasks 1 305 bytes http-bio 1 57 bytes AsyncFileHandlerWriter 1 32 bytes internode-messaging RemoteMessageServer acceptor 1 0 byte""", output, output.replace("\\n", "\n"), )
def test_sperf(self): """integration test, this is not the best test and only verifies no change in calculations as changes in the codebase occur.""" args = types.SimpleNamespace() args.diag_dir = os.path.join( current_dir(__file__), "testdata", "diag", "DSE_CLUSTER" ) args.files = [] args.stages = "all" args.start = None args.end = None args.debug_log_prefix = "debug.log" args.reporter = "summary" args.system_log_prefix = "system.log" def run(): statuslogger.run(args) output = steal_output(run) # reads better with the extra newline self.assertEqual( output, "sperf core statuslogger version: %s\n" % (VERSION) + """ Summary (22,054 lines) Summary (444 skipped lines) dse versions: {'6.7.7'} cassandra versions: {'DSE Private Fork'} first log time: 2020-01-10 15:27:58.554000+00:00 last log time: 2020-01-10 17:21:13.549000+00:00 duration: 1.89 hours total stages analyzed: 2 total nodes analyzed: 3 GC pauses max p99 p75 p50 p25 min --- --- --- --- --- --- ms 800 729 358 282 243 201 total GC events: 236 busiest tables by ops across all nodes ------------------------------ * 10.101.35.102: OpsCenter.rollups60: 66,464 ops / 3.38 mb data busiest table by data across all nodes ------------------------------ * 10.101.35.102: my_solr.my_table: 37,132 ops / 9.37 mb data busiest stages across all nodes ------------------------------ * CompactionExecutor active: 1 (10.101.35.102) * TPC/0/WRITE_REMOTE active: 1 (10.101.35.102) * CompactionExecutor pending: 1 (10.101.35.102) busiest stages in PENDING ------------------------------ 10.101.35.102: CompactionExecutor: 1""", )
def test_gcinspector(): """ test gcinspector analysis """ g = GCInspector(test_dse_tarball()) g.analyze() assert len(g.pauses) == 3 assert len(g.all_pauses()) == 236 output = steal_output(g.print_report) assert '!!++.+.+.!++.+.+...+.+..+.+.+.+..+++....++..+++....+..++.+++.+!+..+.+.+.+!......+++....+' in output
def test_gcinspector(self): """ test gcinspector analysis """ g = GCInspector(test_dse_tarball()) g.analyze() self.assertEqual(len(g.pauses), 3) self.assertEqual(len(g.all_pauses()), 236) output = steal_output(g.print_report) self.assertIn( "!!++.+.+.!++.+.+...+.+..+.+.+.+..+++....++..+++....+..++.+++.+!+..+.+.+.+!......+++....+", output, )
def test_run_jarcheck_diff_only(): """integration test for the full command with diff_only""" test_file_1 = os.path.join(current_dir(__file__), "testdata", "simple-output.log") test_file_2 = os.path.join(current_dir(__file__), "testdata", "simple-output2.log") parser = jarcheck.JarCheckParser(files=[test_file_1, test_file_2]) output = steal_output(parser.print_report, diff_only=True) assert """Inconsistent jars ------------------------------ dse-advrep-6.7.1.jar Analyzed 2 files""" in output
def test_sperf(): """integration test""" args = types.SimpleNamespace() args.diag_dir = os.path.join(current_dir(__file__), "testdata", "diag", "DSE_CLUSTER") args.output_log_prefix = "output.log" args.debug_log_prefix = "debug.log" args.system_log_prefix = "system.log" args.node_info_prefix = "node_info.json" args.block_dev_prefix = "blockdev_report" args.cfstats_prefix = "cfstats" def run(): sperf_default.run(args) output = steal_output(run) #reads better with the extra newline assert "\n" + output == """
def test_sperf_68(self): """integration test""" args = types.SimpleNamespace() args.diag_dir = os.path.join(current_dir(__file__), "testdata", "dse68") args.output_log_prefix = "output.log" args.debug_log_prefix = "debug.log" args.system_log_prefix = "system.log" args.node_info_prefix = "node_info.json" args.block_dev_prefix = "blockdev_report" args.cfstats_prefix = "cfstats" def run(): sperf_default.run(args) output = steal_output(run) # reads better with the extra newline self.assertEqual( "\n" + output, """ nodes 1 dse version(s) (startup logs) { 6.8.1 } cassandra version(s) (startup logs) { DSE private fork } solr version(s) (startup logs) { 6.0.1.4.2746 } spark version(s) (startup logs) { 2.4.0.14 } worst gc pause (system logs) 2.07 seconds (172.17.0.2) worst read latency (cfstats) 0.19 ms (dse_insights.insights_config 172.17.0.2) worst write latency (cfstats) 0.42 ms (system.sstable_activity 172.17.0.2) worst tombstones query (cfstats) 1 (dse_system.shared_data 172.17.0.2) worst live cells query (cfstats) 639 (system_schema.columns 172.17.0.2) largest table (cfstats) keyspace1.standard1 (1.79 gb 172.17.0.2) busiest table reads (cfstats) system.paxos (64.62% of reads) busiest table writes (cfstats) keyspace1.standard1 (99.27% of writes) largest partition (cfstats) 41.51 kb keyspace1.standard1 (172.17.0.2) errors parsing -------------- * missing output logs: all nodes * unable to read 'node_info.json' recommendations --------------- * There were 5 incidents of GC over 500ms. Run `sperf core gc` for more analysis.""", )
def test_sperf(self): """integration test, this is not the best test and only verifies no change in calculations as changes in the codebase occur.""" self.maxDiff = None args = types.SimpleNamespace() args.diag_dir = os.path.join( current_dir(__file__), "testdata", "diag", "DSE_CLUSTER" ) args.files = [] args.top = 3 args.interval = 3600 args.start = None args.end = None def run(): slowquery.run(args) output = steal_output(run) # reads better with the extra newline self.assertEqual( output, "sperf core slowquery version: %s" % (VERSION) + "\n\n" + """this is not a very accurate report, use it to discover basics, but I suggest analyzing the logs by hand for any outliers . <574ms + >661ms ! >1285ms X >1687ms ------------------------------ 2020-01-10 16:58:55.839000+00:00 ........................!!.!!++++!+!.!!.++!+!+..+..X.++!+!!.+!.!+!++!!.+.+. worst period: 2020-01-10 16:58:55.839000+00:00 (65342ms) slow query breakdown -------------------- 75 total, 48 cross-node, 0 timeouts Top 3 slow queries: ------------------------------ 1688ms: SELECT * FROM my_solr.my_table WHERE id = 00000000-0041-d584-0000-0000004956fd LIMIT 5000 1535ms: SELECT * FROM my_solr.my_table WHERE id = 00000000-0047-87cc-0000-0000004e94af LIMIT 5000 1522ms: SELECT * FROM my_solr.my_table WHERE id = 00000000-0008-b249-0000-00000044c1f3 LIMIT 5000""", )
def test_sperf_68(self): """integration test for DSE 6.8 format, this is not the best test and only verifies no change in calculations as changes in the codebase occur.""" args = types.SimpleNamespace() args.diag_dir = os.path.join(current_dir(__file__), "testdata", "dse68") args.files = [] args.top = 3 args.interval = 3600 args.start = None args.end = None def run(): slowquery.run(args) output = steal_output(run) # reads better with the extra newline self.assertEqual( output, "sperf core slowquery version: %s" % (VERSION) + "\n\n" + """this is not a very accurate report, use it to discover basics, but I suggest analyzing the logs by hand for any outliers . <10000ms + >10003ms ! >10005ms X >10057ms ------------------------------ 2020-07-22 13:39:05.889000+00:00 .X.+.!!. worst period: 2020-07-22 13:39:05.889000+00:00 (72158ms) slow query breakdown -------------------- 8 total, 0 cross-node, 7 timeouts Top 3 slow queries: ------------------------------ 10058ms: SELECT * FROM keyspace1.standard1 WHERE key= 1 10006ms: SELECT * FROM keyspace1.standard1 WHERE C3 = 30783739393164656164636535346463653436633764343738393962313463616366396262623565643135366538613864386630396562336233343235623662373464386563 AND LIMIT 1 10006ms: SELECT * FROM keyspace1.standard1 WHERE C2 = 307836313933373336353935666436333031643163 AND LIMIT 1""", )
def test_sperf(self): """integration test, this is not the best test and only verifies no change in calculations as changes in the codebase occur.""" args = types.SimpleNamespace() args.diag_dir = os.path.join(current_dir(__file__), "testdata", "diag", "DSE_CLUSTER") args.files = [] def run(): schema.run(args) output = steal_output(run) # reads better with the extra newline header = "sperf core schema version: %s" % (VERSION) self.assertEqual( output, header + """ Schema read : %s Keyspace Count : 15 Table Count : 61 2i Count : 1 MV Count : 0 UDT Count : 6 Solr Index Count: 1 Solr Table Count: 1""" % os.path.join( current_dir(__file__), "testdata", "diag", "DSE_CLUSTER", "nodes", "10.101.33.205", "driver", "schema", ), )
def test_sperf(self): """integration test, this is not the best test and only verifies no change in calculations as changes in the codebase occur.""" args = types.SimpleNamespace() args.diag_dir = os.path.join(current_dir(__file__), "testdata", "diag", "DSE_CLUSTER") args.files = [] args.top = 3 args.interval = 3600 args.start = None args.end = None def run(): slowquery.run(args) output = steal_output(run) # reads better with the extra newline self.assertEqual( output, "sperf core slowquery version: %s" % (VERSION) + """ . <5000ms + >5000ms ! >5004ms X >5004ms ------------------------------ 2020-01-10 16:58:55.839000+00:00 XXXXXXXXXXXXXXXXXXXXXXXX++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ worst period: 2020-01-10 16:58:55.839000+00:00 (930282ms) 3 slow queries, 1 cross-node Top 3 slow queries: ------------------------------ 5005ms: <SELECT * FROM my_solr.my_table WHERE id = 00000000-0057-1aa2-0000-0000002c726f LIMIT 5000> 5001ms: <SELECT * FROM my_solr.my_table WHERE id = 00000000-004f-c914-0000-0000004d6abe LIMIT 5000> 5001ms: <SELECT * FROM my_solr.my_table WHERE id = 00000000-0040-c812-0000-0000002016a4 LIMIT 5000>""", )
def test_run_jarcheck_found_no_files(): """no files found output""" parser = jarcheck.JarCheckParser() output = steal_output(parser.print_report, diff_only=False) assert output == "Nothing analyzed"
def test_core_diag_integtration(self): """integration test with 6.7 tarball""" args = make_67_diag_args() def run(): diag_cmd.run(args) output = steal_output(run) self.assertEqual( output, "sperf core diag version: %s" % VERSION + """ configuration #1 ---------------- nodes count 2 cass drive(s) read ahead N/A dse version 6.7.7 cassandra version N/A solr version 6.0.1.2.2647 spark version 2.2.3.9 spark connector version N/A disk access mode standard disk optimization ssd memtable cleanup threshold default flush writers default compaction throughput 16 mb concurrent compactors default memtable size (heap) default memtable size (offheap) default memtable allocation type offheap_objects file cache size default heap size 3759M gc G1GC total ram 15 gb total cpu cores (real) 8 threads per core 1 worst gc pause 800.00 ms (10.101.35.71) worst write latency (all) 4.22 ms (system_schema.dropped_columns 10.101.35.71) worst read latency (all) 2.88 ms (system.local 10.101.35.71) worst tombstones query (all) 319 (system_distributed.nodesync_status 10.101.35.71) worst live cells query (all) 447 (system_schema.columns 10.101.35.71) largest table my_solr.my_table (133.32 mb 10.101.35.71) busiest table reads my_solr.my_table (99.76% of reads) busiest table writes my_solr.my_table (96.18% of writes) worst partition in system.size_estimates (10.101.35.71) * max partition size 28.83 kb * mean partition size 8.10 kb * min partition size 2.25 kb nodes ----- 10.101.35.71, 10.101.33.205 config diff from common: * listen_interface: eth0 configuration #2 ---------------- nodes count 1 cass drive(s) read ahead N/A dse version 6.7.7 cassandra version N/A solr version 6.0.1.2.2647 spark version 2.2.3.9 spark connector version N/A disk access mode standard disk optimization ssd memtable cleanup threshold default flush writers default compaction throughput 16 mb concurrent compactors default memtable size (heap) default memtable size (offheap) default memtable allocation type offheap_objects file cache size default heap size 3759M gc G1GC total ram 15 gb total cpu cores (real) 8 threads per core 1 worst gc pause 258.00 ms (10.101.35.102) worst write latency (all) 4.72 ms (system_schema.dropped_columns 10.101.35.102) worst read latency (all) 6.11 ms (system_schema.keyspaces 10.101.35.102) worst tombstones query (all) 7 (OpsCenter.events 10.101.35.102) worst live cells query (all) 447 (system_schema.columns 10.101.35.102) largest table my_solr.my_table (133.46 mb 10.101.35.102) busiest table reads my_solr.my_table (99.81% of reads) busiest table writes my_solr.my_table (95.04% of writes) worst partition in system_schema.columns (10.101.35.102) * max partition size 9.66 kb * mean partition size 2.16 kb * min partition size 180 bytes nodes ----- 10.101.35.102 config diff from common: * listen_interface: default parser warnings --------------- no warnings""", )
def test_sperf_68(self): """integration test, this is not the best test and only verifies no change in calculations as changes in the codebase occur.""" args = types.SimpleNamespace() args.diag_dir = os.path.join(current_dir(__file__), "testdata", "dse68") args.files = [] args.start = None args.end = None args.stages = "all" args.reporter = "summary" args.debug_log_prefix = "debug.log" args.system_log_prefix = "system.log" self.maxDiff = None def run(): statuslogger.run(args) output = steal_output(run) # reads better with the extra newline self.assertEqual( output, "sperf core statuslogger version: %s\n" % (VERSION) + """ Summary (20,245 lines) Summary (2,204 skipped lines) dse versions: {'6.8.1'} cassandra versions: {'DSE Private Fork'} first log time: 2020-07-20 09:09:27.757000+00:00 last log time: 2020-07-22 13:49:40.782000+00:00 duration: 2.19 days total stages analyzed: 17 total nodes analyzed: 1 GC pauses max p99 p75 p50 p25 min --- --- --- --- --- --- ms 2066 2066 1371 615 436 251 total GC events: 7 busiest tables by ops across all nodes ------------------------------ * 172.17.0.2: system.paxos: 103,106 ops / 309 bytes data busiest table by data across all nodes ------------------------------ * 172.17.0.2: keyspace1.standard1: 4,237 ops / 45.40 mb data busiest stages across all nodes ------------------------------ * TPC/1/READ_LOCAL pending: 100 (172.17.0.2) * TPC/1/READ_LOCAL active: 32 (172.17.0.2) * MemtablePostFlush pending: 6 (172.17.0.2) * MemtableFlushWriter active: 3 (172.17.0.2) * PerDiskMemtableFlushWriter_0 active: 2 (172.17.0.2) * CompactionExecutor active: 1 (172.17.0.2) * MemtablePostFlush active: 1 (172.17.0.2) * TPC/all/EXECUTE_STATEMENT active: 1 (172.17.0.2) * LwtStage active: 1 (172.17.0.2) * TPC/other active: 1 (172.17.0.2) * TPC/other/EXECUTE_STATEMENT active: 1 (172.17.0.2) * TPC/0/TIMED_TIMEOUT active: 1 (172.17.0.2) busiest stages in PENDING ------------------------------ 172.17.0.2: TPC/1/READ_LOCAL: 100 MemtablePostFlush: 6""", )