def bytes_consumed(output, human_readable): events = EventsSummary(output) consumed = events.get_bytes_consumed() if human_readable: print(bytes2human(consumed)) else: print(consumed)
def exec_time(output, human_readable): events = EventsSummary(output) config_exec_time = events.get_config_exec_time() if human_readable: print(datetime.timedelta(seconds=config_exec_time)) else: print(config_exec_time)
def test_event_summary__show_events(test_data_dir, capsys): """Should print tabular events in terminal""" event_dir = os.path.join(test_data_dir, "events", "job-outputs", "australia") event_summary = EventsSummary(event_dir) event_summary.show_events(EVENT_NAME_UNHANDLED_ERROR) captured = capsys.readouterr() assert "Exception" in captured.out assert "australia" in captured.out assert "united_states" not in captured.out
def collect(duration, force, interval, output): """Collect resource utilization stats.""" if os.path.exists(output): if force: shutil.rmtree(output) else: print( f"The directory {output} already exists. Delete it or run with --force", file=sys.stderr, ) sys.exit(1) os.makedirs(output) event_file = os.path.join(output, "stats_events.log") setup_event_logging(event_file) monitor = ResourceMonitorLogger("ResourceMonitor") start_time = time.time() show_cmd = f"jade stats show -o {output} [STATS]" print(f"Collecting stats. When complete run '{show_cmd}' to view stats.") try: while True: monitor.log_resource_stats() time.sleep(interval) if duration is not None and time.time() - start_time > duration: print(f"Exceeded {duration} seconds. Exiting.") EventsSummary(output) break except KeyboardInterrupt: # TODO: This doesn't actually work. click catches KeyboardInterrupt. # Need to prevent it from doing that. # Then always call EventsSummary(output) at the end. pass
def show(stats, output): """Shows stats from a run. \b Examples: jade stats jade stats cpu jade stats disk jade stats mem jade stats net jade stats cpu disk mem """ events = EventsSummary(output) if not stats: stats = STATS for stat in stats: if stat == "cpu": viewer = CpuStatsViewer(events) elif stat == "disk": viewer = DiskStatsViewer(events) elif stat == "mem": viewer = MemoryStatsViewer(events) elif stat == "net": viewer = NetworkStatsViewer(events) else: print(f"Invalid stat={stat}") sys.exit(1) viewer.show_stats()
def plot(stats, output): """Plot stats from a run to files. \b Examples: jade stats plot jade stats plot cpu jade stats plot disk jade stats plot mem jade stats plot net jade stats plot cpu disk mem """ events = EventsSummary(output) if not stats: stats = STATS plot_dir = Path(output) / STATS_DIR plot_dir.mkdir(exist_ok=True) for stat in stats: if stat == "cpu": viewer = CpuStatsViewer(events) elif stat == "disk": viewer = DiskStatsViewer(events) elif stat == "mem": viewer = MemoryStatsViewer(events) elif stat == "net": viewer = NetworkStatsViewer(events) else: print(f"Invalid stat={stat}", file=sys.stderr) sys.exit(1) viewer.plot_to_file(plot_dir)
def test_try_add_blocked_jobs(cleanup): num_commands = 5 commands = ["ls ."] * num_commands with open(TEST_FILENAME, "w") as f_out: for command in commands: f_out.write(command + "\n") inputs = GenericCommandInputs(TEST_FILENAME) config = GenericCommandConfiguration(job_inputs=inputs) jobs = list(inputs.iter_jobs()) for i, job_param in enumerate(jobs): if i == num_commands - 1: job_param.blocked_by = set([1, 2, 3, 4]) config.add_job(job_param) config.dump(CONFIG_FILE) os.environ["FAKE_HPC_CLUSTER"] = "True" for option in ("--try-add-blocked-jobs", "--no-try-add-blocked-jobs"): cmd = f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT} -p 0.1 {option}" ret = run_command(cmd) assert ret == 0 events_file = os.path.join(OUTPUT, "submit_jobs_events.log") events_summary = EventsSummary(OUTPUT, preload=True) submit_events = events_summary.list_events(EVENT_NAME_HPC_SUBMIT) if option == "--try-add-blocked-jobs": assert len(submit_events) == 1 event = submit_events[0] assert event.data["batch_size"] == num_commands assert event.data["num_blocked"] == 0 shutil.rmtree(OUTPUT) else: assert len(submit_events) == 2 event1 = submit_events[0] event2 = submit_events[1] assert event1.data["batch_size"] == num_commands - 1 assert event2.data["batch_size"] == 1 assert event1.data["num_blocked"] == 1 assert event2.data["num_blocked"] == 0
def _show_periodic_stats(stats, json_summary, output, summary_only): events = EventsSummary(output) summaries_as_dicts = [] for stat in stats: if stat == "cpu": viewer = CpuStatsViewer(events) elif stat == "disk": viewer = DiskStatsViewer(events) elif stat == "mem": viewer = MemoryStatsViewer(events) elif stat == "net": viewer = NetworkStatsViewer(events) else: print(f"Invalid stat={stat}", file=sys.stderr) sys.exit(1) if json_summary: summaries_as_dicts += viewer.get_stats_summary() else: viewer.show_stats(show_all_timestamps=not summary_only) if json_summary: print(json.dumps(summaries_as_dicts, indent=2))
def test_resource_stats(): with tempfile.TemporaryDirectory() as tmpdir: event_file = os.path.join(tmpdir, "events.log") setup_logging("event", event_file, console_level=logging.ERROR, file_level=logging.INFO) resource_monitor = ResourceMonitor("test") count = 2 found_cpu = 0 found_disk = 0 found_mem = 0 found_net = 0 for i in range(count): resource_monitor.log_resource_stats() summary = EventsSummary(tmpdir) assert len(summary.list_events(EVENT_NAME_CPU_STATS)) == count assert len(summary.list_events(EVENT_NAME_DISK_STATS)) == count assert len(summary.list_events(EVENT_NAME_MEMORY_STATS)) == count assert len(summary.list_events(EVENT_NAME_NETWORK_STATS)) == count viewers = [ CpuStatsViewer(summary), DiskStatsViewer(summary), MemoryStatsViewer(summary), NetworkStatsViewer(summary), ] for viewer in viewers: df = viewer.get_dataframe("test") assert len(df) == 2 if isinstance(viewer, MemoryStatsViewer): mem_df = viewer.get_dataframe("test") averages = viewer._calc_batch_averages("test") for field, val in averages.items(): assert val == df[field].mean() output = {} cmd = f"jade stats show -o {tmpdir} cpu disk mem net" ret = run_command(cmd, output=output) assert ret == 0 for term in ("IOPS", "read_bytes", "bytes_recv", "idle"): assert term in output["stdout"]
def show_events(output, names, categories=False, json_fmt=False, names_only=False, categories_only=False, verbose=False): """Shows the events after jobs run. \b Examples: jade show-events jade show-events unhandled_error jade show-events error -c jade show-events --names-only jade show-events --categories-only """ level = logging.DEBUG if verbose else logging.WARNING setup_logging("show_results", None, console_level=level) results = EventsSummary(output) if names_only: results.show_event_names() elif categories_only: results.show_event_categories() elif json_fmt: print(results.to_json()) else: if not names: names = results.list_unique_names() for name in names: if categories: results.show_events_in_category(name) else: results.show_events(name)