Beispiel #1
0
def calc_cost_to_generate_dfs(dfs, df_labels, logger=None):
    if logger is None:
        logger = logging.getLogger()

    total_wall_seconds = 0
    total_node_seconds = 0
    for df, df_label in zip(dfs, df_labels):
        if isinstance(df.columns, pd.MultiIndex):
            subset = df
            makespan = subset.makespan["median"]
            num_nodes = subset.num_nodes["median"]
        else:
            subset = df[df.repetition == 0]
            makespan = subset.makespan
            num_nodes = subset.num_nodes
        df_wall_seconds = makespan.sum()
        df_node_seconds = (makespan * num_nodes).sum()
        total_wall_seconds += df_wall_seconds
        total_node_seconds += df_node_seconds
        logger.debug(
            "Processing DF with the name %s, that took %f wall seconds and %f node seconds to build (single repetition)",
            df_label,
            df_wall_seconds,
            df_node_seconds,
        )
        util.pretty_log_df(subset, "DF subset used for cost calculation",
                           logger)
    return total_wall_seconds, total_node_seconds
Beispiel #2
0
    def __init__(self, model_df, setup_df, sleep0_df, logger=None):
        super().__init__(logger=logger)

        self.depth_1_model_df = agg_by(
            model_df[(model_df.num_levels == 1) & (model_df.num_nodes == 1)],
            ["num_jobs"],
        )
        util.pretty_log_df(self.depth_1_model_df, "depth 1 model DF:",
                           self.logger)

        self.empty_hierarchy_df = agg_by(
            setup_df[(setup_df["just_setup"] == True)
                     & (setup_df["num_levels"] == 1)],
            ["num_levels"],
        )
        util.pretty_log_df(self.empty_hierarchy_df, "Empty hierarchy DF:",
                           self.logger)
        assert len(
            self.empty_hierarchy_df) == 1  # should only contain first level

        # Just needed to know the raw sched overhead of launching num_jobs ==
        # topology branching factors, (to estimate hierarchy setup cost)
        self.sleep0_df = agg_by(
            sleep0_df[(sleep0_df.num_levels == 1)
                      & (sleep0_df.num_nodes == 1)],
            ["num_jobs"],
        )
        util.pretty_log_df(self.sleep0_df, "Sleep0 Job Launch Model df",
                           self.logger)
Beispiel #3
0
    def __init__(self, model_df, setup_df, depth_1_real_df, logger=None):
        super().__init__(logger=logger)

        self.depth_1_real_df = depth_1_real_df

        self.depth_two_leaf_perf_df = model_df[(model_df.num_levels == 1)
                                               & (model_df.num_nodes == 1)]
        self.depth_two_leaf_perf_df = agg_by(self.depth_two_leaf_perf_df,
                                             ["num_jobs"])
        util.pretty_log_df(self.depth_two_leaf_perf_df,
                           "Depth-2 leaf performance", self.logger)

        self.depth_three_leaf_perf_df = model_df[(model_df.num_levels == 2)
                                                 & (model_df.num_nodes == 1)
                                                 &
                                                 (model_df.first_branch_factor
                                                  == 36)]
        self.depth_three_leaf_perf_df = agg_by(self.depth_three_leaf_perf_df,
                                               ["num_jobs"])
        util.pretty_log_df(self.depth_three_leaf_perf_df,
                           "Depth-3 leaf performance", self.logger)

        self.empty_hierarchy_df = agg_by(
            setup_df[(setup_df["just_setup"] == True)],
            ["num_levels", "first_branch_factor"],
        )
        util.pretty_log_df(self.empty_hierarchy_df, "Empty hierarchy DF:",
                           self.logger)
        assert len(
            self.empty_hierarchy_df) == 3  # should contain all three levels
Beispiel #4
0
def main():
    num_nodes = 4500
    global cores_per_node
    cores_per_node = 44
    global total_cores
    total_cores = num_nodes * cores_per_node
    global sched_rate
    sched_rate = 3.6
    global sched_create_cost
    sched_create_cost = 3.4

    topologies = [
        [1],
        [1, 32],
        #[1, num_nodes],
        [1, 32, 36],
        #[1, 444, 444],
        [1, num_nodes, cores_per_node],
        [1, 55, 60, 60],
    ]
    unique_ids = ["sleep0", "sleep5", "firestarter", "stream"]
    func_args = itertools.product(topologies, unique_ids)
    with mp.Pool() as pool:
        dfs = pool.map(parallel_gen_df, func_args)

    model = create_model("sleep5")
    for topology in topologies:
        create_time = model.get_empty_hierarchy_init_cost(topology)
        logger.info("Topology %s is predicted to take %f seconds to create",
                    topology, create_time)

    new_model_df = pd.concat(dfs, sort=True)
    new_model_df["model_type"] = "analyticalWithContention"
    util.pretty_log_df(new_model_df, "Final DataFrame", logger)

    logger.info("Saving data to {}".format(args.output_pkl))
    new_model_df.to_pickle(args.output_pkl)
Beispiel #5
0
def main():
    real_df = pd.read_pickle(args.df_pkl)
    model_df = pd.read_pickle(args.model_pkl)

    unique_id = util.unique_id(real_df)
    logger.info("App: %s", unique_id)
    sleep0_df = model_df[(model_df.unique_id == "sleep0")
                         & (model_df.num_jobs.isin([1, 32, 36]))]
    setup_df = model_df[(model_df.unique_id == "setup")]
    model_df = model_df[model_df.unique_id == unique_id]

    real_df = trim_max_jobs_per_leaf(real_df, args.max_jobs_per_leaf)
    model_df = trim_max_jobs_per_leaf(model_df, args.max_jobs_per_leaf)
    setup_df = trim_max_jobs_per_leaf(setup_df, args.max_jobs_per_leaf)
    sleep0_df = trim_max_jobs_per_leaf(sleep0_df, args.max_jobs_per_leaf)

    num_nodes = 32
    cores_per_node = 36
    total_cores = num_nodes * cores_per_node
    subset_df = real_df[(real_df.num_nodes == num_nodes)
                        & (real_df.cores_per_node == cores_per_node)]
    sched_rate = 4.5
    sched_create_cost = 4.3

    agg_real_df = agg_by(subset_df, ["num_levels", "num_jobs", "just_setup"])
    depth_1_real_df = agg_real_df[agg_real_df.num_levels == 1]
    topologies = real_df["topology"].unique()

    header = """
=========================
====ANALYTICAL MODEL=====
========================="""
    logger.debug(header)
    analytical_model = PurelyAnalyticalModel(
        sched_rate=sched_rate,
        sched_create_cost=sched_create_cost,
        job_runtime=modelling.get_avg_runtime(unique_id),
        resource_cap=total_cores,
        logger=logger)
    analytical_model_df = pd.concat([
        analytical_model.get_interpolated_predictions(topology)
        for topology in topologies
    ])
    analytical_model_df["model_type"] = "analytical"
    logger.info(
        "Analytical model cost (single rep): {:.2f} wall second, {:.2f} node seconds"
        .format(*analytical_model.calc_model_cost()))

    header = """
====================================
=ANALYTICAL MODEL w/ CONTENTION=====
===================================="""
    logger.debug(header)
    analyticalWithContention_model = AnalyticalModelContentedRuntime(
        sched_rate=sched_rate,
        sched_create_cost=sched_create_cost,
        resource_cap=total_cores,
        avg_runtime_func=functools.partial(modelling.get_avg_runtime,
                                           unique_id),
        cores_per_node=cores_per_node,
        logger=logger,
    )
    analyticalWithContention_model_df = pd.concat([
        analyticalWithContention_model.get_interpolated_predictions(topology)
        for topology in topologies
    ])
    analyticalWithContention_model_df[
        "model_type"] = "analyticalWithContention"
    logger.info(
        "AnalyticalWithContention model cost (single rep): {:.2f} wall second, {:.2f} node seconds"
        .format(*analyticalWithContention_model.calc_model_cost()))

    #     header = """
    # =========================
    # ======SIMPLE MODEL=======
    # ========================="""
    #     logger.debug(header)
    #     simple_model = SimpleModel(
    #         model_df[model_df.num_nodes == 1],
    #         setup_df[(setup_df.num_nodes == 1) & (setup_df.num_levels == 1)],
    #         sleep0_df[sleep0_df.num_nodes == 1],
    #     )
    #     simple_model_df = pd.concat(
    #         [simple_model.get_interpolated_predictions(topology) for topology in topologies]
    #     )
    #     simple_model_df["model_type"] = "simple"
    #     logger.info(
    #         "Simple model cost (single rep): {:.2f} wall second, {:.2f} node seconds".format(
    #             *simple_model.calc_model_cost()
    #         )
    #     )

    #     header = """

    # =========================
    # =====EMPIRICAL MODEL=====
    # ========================="""
    #     logger.debug(header)
    #     empirical_model = EmpiricalModel(model_df, setup_df, depth_1_real_df)
    #     empirical_model_df = pd.concat(
    #         [
    #             empirical_model.get_interpolated_predictions(topology)
    #             for topology in topologies
    #         ]
    #     )
    #     empirical_model_df["model_type"] = "empirical"
    #     logger.info(
    #         "Empirical model cost (single rep): {:.2f} wall second, {:.2f} node seconds".format(
    #             *empirical_model.calc_model_cost()
    #         )
    #     )

    new_model_df = pd.concat(
        [
            #simple_model_df,
            #empirical_model_df,
            analytical_model_df,
            analyticalWithContention_model_df,
        ],
        sort=True,
    )
    new_model_df["unique_id"] = unique_id
    new_model_df["throughput_upperbound"] = new_model_df["num_jobs"].apply(
        lambda x: modelling.calc_upperbound(unique_id, x, total_cores))
    util.pretty_log_df(new_model_df, "Final DataFrame", logger)

    logger.info("Saving data to {}".format(args.output_pkl))
    new_model_df.to_pickle(args.output_pkl)