Beispiel #1
0
def compute_metrics(folder):
    """

    :param folder:
    :return:
    """
    print(folder)
    if folder[-1] != "/":
        folder += "/"
    config = load_config(folder)
    print(config)

    global PLOT_SID_STAGE
    PLOT_SID_STAGE = 1 if config["HDFS"] else 0

    app_logs = glob.glob(folder + "*.err") + glob.glob(folder + "*.dat")
    app_info = {}
    for app_log in sorted(app_logs):
        app_info = load_app_data(app_log)

        for app_id in app_info:
            compute_errors(app_id, app_info[app_id], folder, config)

    worker_logs = glob.glob(folder + "*worker*.out")
    cpu_logs = glob.glob(folder + "sar*.log")

    if len(worker_logs) == len(cpu_logs):
        workers_dict = {}
        for worker_log, cpu_log in zip(sorted(worker_logs), sorted(cpu_logs)):
            worker_dict = load_worker_data(worker_log, cpu_log, config)
            workers_dict[worker_log] = worker_dict
        for app_id in app_info:
            compute_cpu_time(app_id, app_info, workers_dict, config, folder)
    else:
        print("ERROR: SAR != WORKER LOGS")
Beispiel #2
0
def compute_metrics(folder):
    """

    :param folder:
    :return:
    """
    print(folder)
    if folder[-1] != "/":
        folder += "/"
    config = load_config(folder)
    print(config)

    global PLOT_SID_STAGE
    PLOT_SID_STAGE = 1 if config["HDFS"] else 0

    #app_logs = glob.glob(folder + "*.err") + glob.glob(folder + "*.dat")
    app_logs = [
        x for x in glob.glob(folder + "*.err")
        if not x.split('.')[-2].endswith('scheduling-throughput')
    ] + glob.glob(folder + "*.dat")
    #print(app_logs)
    app_info = {}
    for app_log in sorted(app_logs):
        app_info = load_app_data(app_log)

        for app_id in app_info:
            if app_id == max(
                    app_info.keys()
            ):  # DB- Dagsymb: consider only the latest app_id in log (i.e. discard all initial data-generation app_id's
                compute_errors(app_id, app_info[app_id], folder, config)

    worker_logs = glob.glob(folder + "*worker*.out")
    cpu_logs = glob.glob(folder + "sar*.log")

    if len(worker_logs) == len(cpu_logs):
        workers_dict = {}
        for worker_log, cpu_log in zip(sorted(worker_logs), sorted(cpu_logs)):
            worker_dict = load_worker_data(worker_log, cpu_log, config)
            workers_dict[worker_log] = worker_dict
        for app_id in app_info:
            if app_id == max(
                    app_info.keys()
            ):  # DB- Dagsymb: consider only the latest app_id in log (i.e. discard all initial data-generation app_id's
                compute_cpu_time(app_id, app_info, workers_dict, config,
                                 folder)
    else:
        print("ERROR: SAR != WORKER LOGS")
Beispiel #3
0
def plot(folder):
    """

    :param folder:
    :return:
    """
    print(folder)
    if folder[-1] != "/":
        folder += "/"
    config = load_config(folder)
    print(config)

    global PLOT_SID_STAGE
    PLOT_SID_STAGE = 1 if config["HDFS"] else 0

    app_logs = glob.glob(folder + "*.err") + glob.glob(folder + "*.dat")
    app_info = {}
    for app_log in sorted(app_logs):
        app_info = load_app_data(app_log)

        for app_id in app_info:
            plot_app_overview(app_id, app_info[app_id], folder, config)

    worker_logs = glob.glob(folder + "*worker*.out")
    cpu_logs = glob.glob(folder + "sar*.log")

    if len(worker_logs) == len(cpu_logs):
        workers_dict = {}
        for worker_log, cpu_log in zip(sorted(worker_logs), sorted(cpu_logs)):
            worker_dict = load_worker_data(worker_log, cpu_log, config)
            workers_dict[worker_log] = worker_dict

        first_ts_worker = -1.0
        for worker_log, cpu_log in zip(sorted(worker_logs), sorted(cpu_logs)):
            for app_id in app_info:
                if first_ts_worker == -1.0:
                    first_ts_worker = find_first_ts_worker(
                        app_id, workers_dict)
                    if first_ts_worker == -1.0:
                        print("ERROR FIRST TS WORKER")
                        exit(1)
                plot_worker(app_id, app_info, worker_log,
                            workers_dict[worker_log], config, first_ts_worker)
        for app_id in app_info:
            plot_overview_cpu(app_id, app_info, workers_dict, config, folder)
    else:
        print("ERROR: SAR != WORKER LOGS")
Beispiel #4
0
def plot_mean_comparision(folders):
    """

    :param folders:
    :return:
    """
    import numpy as np

    x_multi = []
    for folder in folders:
        print(folder)
        if folder[-1] != "/":
            folder += "/"
        config = load_config(folder)
        print(config)

        global PLOT_SID_STAGE
        PLOT_SID_STAGE = 1 if config["HDFS"] else 0

        app_logs = glob.glob(folder + "*.err") + glob.glob(folder + "*.dat")
        app_info = {}
        for app_log in sorted(app_logs):
            app_info = load_app_data(app_log)

        worker_logs = glob.glob(folder + "*worker*.out")
        cpu_logs = glob.glob(folder + "sar*.log")

        if len(worker_logs) == len(cpu_logs):
            workers_dict = {}
            for worker_log, cpu_log in zip(sorted(worker_logs),
                                           sorted(cpu_logs)):
                worker_dict = load_worker_data(worker_log, cpu_log, config)
                workers_dict[worker_log] = worker_dict

            for app_id in app_info:
                cpus, ts_cpu = plot_overview_cpu(app_id, app_info,
                                                 workers_dict, config, folder)
                if "/0%" in folder and len(x_multi) > 0:
                    break
                    # old_set = x_multi.pop(len(x_multi) - 1)
                    # x_multi.append(np.mean(np.array([old_set, cpus]), axis=0, dtype=np.float64))
                elif "20%" in folder and len(x_multi) > 1:
                    break
                    # old_set = x_multi.pop(len(x_multi) - 1)
                    # print(old_set)
                    # print(cpus)
                    # if len(old_set) > len(cpus)
                    #     print(np.array([old_set, cpus]))
                    #     print(np.mean(np.array([old_set, cpus]), axis=0, dtype=np.float64))
                    # x_multi.append()
                elif "40%" in folder and len(x_multi) > 2:
                    break
                    # old_set = x_multi.pop(len(x_multi) - 1)
                    # x_multi.append(np.mean(np.array([old_set, cpus]), axis=0, dtype=np.float64))
                else:
                    if config["Control"]["TSample"] == 5000:
                        print("FOLDER", folder)
                        print("XMULTI", x_multi)
                        print("XMULTI LEN", len(x_multi))
                        x_multi.append(cpus)

        else:
            print("ERROR: SAR != WORKER LOGS")
    max_len = 0
    for cpu in x_multi:
        max_len = max(len(cpu), max_len)
    for cpu in x_multi:
        if len(cpu) < max_len:
            print(max_len - len(cpu))
            for i in range(max_len - len(cpu)):
                x_multi[x_multi.index(cpu)].append(0)
    print(len(x_multi[0]))
    zero = np.mean(np.array(x_multi[0]).reshape(-1, 4), axis=1)
    twenty = np.mean(np.array(x_multi[1]).reshape(-1, 4), axis=1)
    fourty = np.mean(np.array(x_multi[2]).reshape(-1, 4), axis=1)
    ind = np.arange(len(zero))
    width = 0.35  # the width of the bars

    fig, ax1 = plt.subplots(figsize=(16, 5), dpi=300)
    zero_bar = ax1.bar(ind, zero, width, color='r', label="0%")
    twenty_bar = ax1.bar(ind + width, twenty, width, color='b', label="20%")
    fourty_bar = ax1.bar(ind + width + width,
                         fourty,
                         width,
                         color='green',
                         label="40%")
    ax1.set_ylabel('Core [avg]')
    ax1.set_xlabel('TimeSlot [10s]')
    ax1.set_xticks(ind + width + width)
    labels = ax1.get_xticklabels()
    plt.setp(labels, rotation=45)
    plt.legend()
    plt.tight_layout()
    fig.savefig("hist.png")