def compute_metrics(folder): """ :param folder: :return: """ print(folder) if folder[-1] != "/": folder += "/" config = load_config(folder) print(config) global PLOT_SID_STAGE PLOT_SID_STAGE = 1 if config["HDFS"] else 0 app_logs = glob.glob(folder + "*.err") + glob.glob(folder + "*.dat") app_info = {} for app_log in sorted(app_logs): app_info = load_app_data(app_log) for app_id in app_info: compute_errors(app_id, app_info[app_id], folder, config) worker_logs = glob.glob(folder + "*worker*.out") cpu_logs = glob.glob(folder + "sar*.log") if len(worker_logs) == len(cpu_logs): workers_dict = {} for worker_log, cpu_log in zip(sorted(worker_logs), sorted(cpu_logs)): worker_dict = load_worker_data(worker_log, cpu_log, config) workers_dict[worker_log] = worker_dict for app_id in app_info: compute_cpu_time(app_id, app_info, workers_dict, config, folder) else: print("ERROR: SAR != WORKER LOGS")
def compute_metrics(folder): """ :param folder: :return: """ print(folder) if folder[-1] != "/": folder += "/" config = load_config(folder) print(config) global PLOT_SID_STAGE PLOT_SID_STAGE = 1 if config["HDFS"] else 0 #app_logs = glob.glob(folder + "*.err") + glob.glob(folder + "*.dat") app_logs = [ x for x in glob.glob(folder + "*.err") if not x.split('.')[-2].endswith('scheduling-throughput') ] + glob.glob(folder + "*.dat") #print(app_logs) app_info = {} for app_log in sorted(app_logs): app_info = load_app_data(app_log) for app_id in app_info: if app_id == max( app_info.keys() ): # DB- Dagsymb: consider only the latest app_id in log (i.e. discard all initial data-generation app_id's compute_errors(app_id, app_info[app_id], folder, config) worker_logs = glob.glob(folder + "*worker*.out") cpu_logs = glob.glob(folder + "sar*.log") if len(worker_logs) == len(cpu_logs): workers_dict = {} for worker_log, cpu_log in zip(sorted(worker_logs), sorted(cpu_logs)): worker_dict = load_worker_data(worker_log, cpu_log, config) workers_dict[worker_log] = worker_dict for app_id in app_info: if app_id == max( app_info.keys() ): # DB- Dagsymb: consider only the latest app_id in log (i.e. discard all initial data-generation app_id's compute_cpu_time(app_id, app_info, workers_dict, config, folder) else: print("ERROR: SAR != WORKER LOGS")
def plot(folder): """ :param folder: :return: """ print(folder) if folder[-1] != "/": folder += "/" config = load_config(folder) print(config) global PLOT_SID_STAGE PLOT_SID_STAGE = 1 if config["HDFS"] else 0 app_logs = glob.glob(folder + "*.err") + glob.glob(folder + "*.dat") app_info = {} for app_log in sorted(app_logs): app_info = load_app_data(app_log) for app_id in app_info: plot_app_overview(app_id, app_info[app_id], folder, config) worker_logs = glob.glob(folder + "*worker*.out") cpu_logs = glob.glob(folder + "sar*.log") if len(worker_logs) == len(cpu_logs): workers_dict = {} for worker_log, cpu_log in zip(sorted(worker_logs), sorted(cpu_logs)): worker_dict = load_worker_data(worker_log, cpu_log, config) workers_dict[worker_log] = worker_dict first_ts_worker = -1.0 for worker_log, cpu_log in zip(sorted(worker_logs), sorted(cpu_logs)): for app_id in app_info: if first_ts_worker == -1.0: first_ts_worker = find_first_ts_worker( app_id, workers_dict) if first_ts_worker == -1.0: print("ERROR FIRST TS WORKER") exit(1) plot_worker(app_id, app_info, worker_log, workers_dict[worker_log], config, first_ts_worker) for app_id in app_info: plot_overview_cpu(app_id, app_info, workers_dict, config, folder) else: print("ERROR: SAR != WORKER LOGS")
def plot_mean_comparision(folders): """ :param folders: :return: """ import numpy as np x_multi = [] for folder in folders: print(folder) if folder[-1] != "/": folder += "/" config = load_config(folder) print(config) global PLOT_SID_STAGE PLOT_SID_STAGE = 1 if config["HDFS"] else 0 app_logs = glob.glob(folder + "*.err") + glob.glob(folder + "*.dat") app_info = {} for app_log in sorted(app_logs): app_info = load_app_data(app_log) worker_logs = glob.glob(folder + "*worker*.out") cpu_logs = glob.glob(folder + "sar*.log") if len(worker_logs) == len(cpu_logs): workers_dict = {} for worker_log, cpu_log in zip(sorted(worker_logs), sorted(cpu_logs)): worker_dict = load_worker_data(worker_log, cpu_log, config) workers_dict[worker_log] = worker_dict for app_id in app_info: cpus, ts_cpu = plot_overview_cpu(app_id, app_info, workers_dict, config, folder) if "/0%" in folder and len(x_multi) > 0: break # old_set = x_multi.pop(len(x_multi) - 1) # x_multi.append(np.mean(np.array([old_set, cpus]), axis=0, dtype=np.float64)) elif "20%" in folder and len(x_multi) > 1: break # old_set = x_multi.pop(len(x_multi) - 1) # print(old_set) # print(cpus) # if len(old_set) > len(cpus) # print(np.array([old_set, cpus])) # print(np.mean(np.array([old_set, cpus]), axis=0, dtype=np.float64)) # x_multi.append() elif "40%" in folder and len(x_multi) > 2: break # old_set = x_multi.pop(len(x_multi) - 1) # x_multi.append(np.mean(np.array([old_set, cpus]), axis=0, dtype=np.float64)) else: if config["Control"]["TSample"] == 5000: print("FOLDER", folder) print("XMULTI", x_multi) print("XMULTI LEN", len(x_multi)) x_multi.append(cpus) else: print("ERROR: SAR != WORKER LOGS") max_len = 0 for cpu in x_multi: max_len = max(len(cpu), max_len) for cpu in x_multi: if len(cpu) < max_len: print(max_len - len(cpu)) for i in range(max_len - len(cpu)): x_multi[x_multi.index(cpu)].append(0) print(len(x_multi[0])) zero = np.mean(np.array(x_multi[0]).reshape(-1, 4), axis=1) twenty = np.mean(np.array(x_multi[1]).reshape(-1, 4), axis=1) fourty = np.mean(np.array(x_multi[2]).reshape(-1, 4), axis=1) ind = np.arange(len(zero)) width = 0.35 # the width of the bars fig, ax1 = plt.subplots(figsize=(16, 5), dpi=300) zero_bar = ax1.bar(ind, zero, width, color='r', label="0%") twenty_bar = ax1.bar(ind + width, twenty, width, color='b', label="20%") fourty_bar = ax1.bar(ind + width + width, fourty, width, color='green', label="40%") ax1.set_ylabel('Core [avg]') ax1.set_xlabel('TimeSlot [10s]') ax1.set_xticks(ind + width + width) labels = ax1.get_xticklabels() plt.setp(labels, rotation=45) plt.legend() plt.tight_layout() fig.savefig("hist.png")