# add checkpoint all result to list of global points to force plot to reach zero overhead
    rounded_eval_points.append(
        roundup(ILP_ROUND_FACTOR, check_all_sched.peak_ram))
    rounded_eval_points.append(
        roundup(ILP_ROUND_FACTOR, check_all_sched.peak_ram - g.cost_ram_fixed))

    return rounded_eval_points


if __name__ == "__main__":
    args = extract_params()
    key = "_".join(
        map(str, [
            args.platform, args.model_name, args.batch_size, args.input_shape
        ]))
    log_base = checkmate_data_dir() / "budget_sweep" / key / str(
        datetime.datetime.now().isoformat())
    shutil.rmtree(log_base, ignore_errors=True)
    pathlib.Path(log_base).mkdir(parents=True, exist_ok=True)

    logging.basicConfig(
        filename=log_base / "budget_sweep.log",
        filemode="a",
        format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
        datefmt="%H:%M:%S",
        level=logging.DEBUG,
    )
    logger = logging.getLogger("budget_sweep")

    # due to bug on havoc, limit parallelism on high-core machines
    if os.cpu_count() > 48:
    _args = parser.parse_args()
    _args.input_shape = _args.input_shape if _args.input_shape else None
    return _args


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
    # due to bug on havoc, limit parallelism on high-core machines
    if os.cpu_count() > 48:
        os.environ["OMP_NUM_THREADS"] = "1"
    args = extract_params()

    key = "_".join(map(str,
                       [args.platform, args.model_name, args.input_shape]))
    log_base = checkmate_data_dir() / "max_batch_size_ilp" / key / str(
        datetime.datetime.now().isoformat())
    shutil.rmtree(log_base, ignore_errors=True)
    pathlib.Path(log_base).mkdir(parents=True, exist_ok=True)
    result_dict = defaultdict(lambda: defaultdict(
        list))  # type: Dict[int, Dict[SolveStrategy, List[ScheduledResult]]]
    model_name = args.model_name

    # load costs, and plot optionally, if platform is not flops
    logging.info("Loading costs")
    if args.platform == "flops":
        cost_model = None
    else:
        cost_model = CostModel(model_name,
                               args.platform,
                               log_base,
def compare_checkpoint_loss_curves(dataset: str, model_name: str, n_epochs: int = 1, batch_size: int = 32):
    def test_baseline(train_ds, test_ds, epochs=5):
        logging.info("Configuring basic model")
        model = make_model(dataset=dataset, model=model_name)
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
        optimizer = tf.keras.optimizers.Adam()

        train_loss = tf.keras.metrics.Mean(name="train_loss")
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="train_accuracy")
        test_loss = tf.keras.metrics.Mean(name="test_loss")
        test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="test_accuracy")

        @tf.function
        def train_step(images, labels):
            with tf.GradientTape() as tape:
                predictions = model(images)
                loss = loss_object(labels, predictions)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            train_loss(loss)
            train_accuracy(labels, predictions)

        @tf.function
        def test_step(images, labels):
            predictions = model(images)
            t_loss = loss_object(labels, predictions)
            test_loss(t_loss)
            test_accuracy(labels, predictions)

        logging.info("Training baseline model")
        orig_losses = train_model(
            train_ds, test_ds, train_step, test_step, train_loss, train_accuracy, test_loss, test_accuracy, n_epochs=epochs
        )
        return orig_losses

    def test_checkpointed(train_ds, test_ds, solver, epochs=1):
        check_model = _build_model_via_solver(dataset, model_name, train_ds.element_spec, solver)
        _, train_step_check, test_step_check, train_loss, train_accuracy, test_loss, test_accuracy = check_model
        logging.info("Training checkpointed model")
        sqrtn_losses = train_model(
            train_ds,
            test_ds,
            train_step_check,
            test_step_check,
            train_loss,
            train_accuracy,
            test_loss,
            test_accuracy,
            n_epochs=epochs,
        )
        return sqrtn_losses

    import matplotlib.pyplot as plt
    import seaborn as sns

    sns.set_style("darkgrid")

    train_ds, test_ds = get_data(dataset, batch_size=batch_size)
    data = {
        "baseline": (test_baseline(train_ds, test_ds, n_epochs)),
        "checkpoint_all": (test_checkpointed(train_ds, test_ds, solve_checkpoint_all, epochs=n_epochs)),
        "checkpoint_sqrtn_ap": (test_checkpointed(train_ds, test_ds, solve_chen_sqrtn_ap, epochs=n_epochs)),
        # "checkpoint_sqrtn_noap": (test_checkpointed(train_ds, test_ds, solve_chen_sqrtn_noap, epochs=n_epochs)),
    }

    for loss_name, loss_data in data.items():
        plt.plot(loss_data, label=loss_name)
    plt.legend(loc="upper right")
    (checkmate_data_dir() / "exec").mkdir(parents=True, exist_ok=True)
    plt.savefig(checkmate_data_dir() / "exec" / "{}_{}_bs{}_epochs{}.pdf".format(dataset, model_name, batch_size, n_epochs))
    with (checkmate_data_dir() / "exec" / "{}_{}_bs{}_epochs{}.json".format(dataset, model_name, batch_size, n_epochs)).open(
        "w"
    ) as f:
        json.dump(data, f)
if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)

    # Set parameters
    args = parse_args()
    N = args.num_layers
    IMPOSED_SCHEDULE = args.imposed_schedule
    APPROX = False
    EPS_NOISE = 0
    SOLVE_R = False

    # Compute integrality gap for each budget
    for B in reversed(range(4, N + 3)):  # Try several budgets
        g = gen_linear_graph(N)
        scratch_dir = (
            checkmate_data_dir()
            / "scratch_integrality_gap_linear"
            / "{}_layers".format(N)
            / str(IMPOSED_SCHEDULE)
            / "{}_budget".format(B)
        )
        scratch_dir.mkdir(parents=True, exist_ok=True)
        data = []

        logging.error("Skipping Griewank baselines as it was broken in parasj/checkmate#65")
        # griewank = solve_griewank(g, B)

        logging.info("--- Solving LP relaxation for lower bound")
        lb_lp = lower_bound_lp_relaxation(g, B, approx=APPROX, eps_noise=EPS_NOISE, imposed_schedule=IMPOSED_SCHEDULE)
        plot_schedule(lb_lp, False, save_file=scratch_dir / "CHECKMATE_LB_LP.png")
Beispiel #5
0
import re

import numpy as np
import pandas as pd

from experiments.common.definitions import checkmate_data_dir

# compute aggregated tables of max and geomean lp approximation ratios
exp_name_re = re.compile(
    r"^(?P<platform>.+?)_(?P<model_name>.+?)_(?P<batch_size>[0-9]+?)_(?P<input_shape>None|.+?)$"
)
dfs = []
for path in (checkmate_data_dir() / "budget_sweep").glob("**/slowdowns.csv"):
    slowdown_df = pd.read_csv(path)
    matches = exp_name_re.match(path.parents[0].name)
    model_name = matches.group("model_name")
    slowdown_df["Model name"] = [model_name] * len(slowdown_df)
    dfs.append(slowdown_df)
df = pd.concat(dfs)
del df["Unnamed: 0"]
for valuekey in ["geomean_slowdown", "max"]:
    pivot_df = pd.pivot_table(df,
                              values=valuekey,
                              index=["Model name"],
                              columns=["method"])
    pivot_df.to_csv(checkmate_data_dir() / "budget_sweep" /
                    "{}_aggr.csv".format(valuekey))

# compute lp relaxation speedups
ilp_runtime_dict = {}
lp_runtime_dict = {}
import pandas as pd
import seaborn as sns

from checkmate.core.solvers.strategy_optimal_ilp import solve_ilp_gurobi
from checkmate.tf2_keras.extraction import dfgraph_from_keras
from experiments.common.definitions import checkmate_data_dir
from checkmate.tf2.load_keras_model import get_keras_model

if __name__ == "__main__":
    # get sample network and generate a graph on it
    model = get_keras_model("VGG16")
    g = dfgraph_from_keras(mod=model)
    budget = sum(g.cost_ram.values()) + g.cost_ram_parameters

    # solve for a schedule
    scheduler_result = solve_ilp_gurobi(g, budget)
    R = scheduler_result.schedule_aux_data.R

    # compute costs for 1000 runs
    r = R.sum(axis=0)
    C = [g.cost_cpu[key] for key in sorted(g.cost_cpu)]
    results = [np.random.normal(C, 1e7) @ r for i in range(1000)]
    x = pd.Series(results, name="Cost in flops")

    # plot costs
    plt.figure()
    sns.distplot(x)
    checkmate_data_dir().mkdir(parents=True, exist_ok=True)
    plt.savefig(checkmate_data_dir() /
                "distribution_of_perturbed_cpu_costs.pdf")
    return parser.parse_args()


def b2gb(data):
    if hasattr(data, "__iter__"):
        return [d * 1e-9 for d in data]
    return data * 1e-9


if __name__ == "__main__":
    args = extract_params()

    if args.model_name == "linear16":
        N = 16
        B = 8
        scratch_dir = checkmate_data_dir(
        ) / "approxcomparison" / args.model_name / f"budget{B}"
        scratch_dir.mkdir(parents=True, exist_ok=True)

        g = gen_linear_graph(N)
    else:
        B = platform_memory(args.platform)
        scratch_dir = checkmate_data_dir(
        ) / "approxcomparison" / args.model_name / f"budget{B}"
        scratch_dir.mkdir(parents=True, exist_ok=True)

        # load costs, and plot optionally, if platform is not flops
        print("Loading costs")
        if args.platform == "flops":
            cost_model = None
        else:
            cost_model = CostModel(args.model_name,
Beispiel #8
0
from experiments.common.definitions import checkmate_data_dir
from experiments.common.graph_plotting import plot_schedule
from checkmate.core.solvers.strategy_checkpoint_all import solve_checkpoint_all
from checkmate.core.solvers.strategy_chen import solve_chen_sqrtn
from checkmate.core.solvers.strategy_optimal_ilp import solve_ilp_gurobi
from checkmate.core.utils.timer import Timer
import pandas as pd
import matplotlib.pyplot as plt

if __name__ == "__main__":
    N = 16
    for B in range(4, 12):
        # model = get_keras_model("MobileNet")
        # g = dfgraph_from_keras(mod=model)
        g = gen_linear_graph(N)
        scratch_dir = checkmate_data_dir() / "scratch_linear" / str(N) / str(B)
        scratch_dir.mkdir(parents=True, exist_ok=True)
        data = []

        scheduler_result_all = solve_checkpoint_all(g)
        scheduler_result_sqrtn = solve_chen_sqrtn(g, True)
        plot_schedule(scheduler_result_all,
                      False,
                      save_file=scratch_dir / "CHECKPOINT_ALL.png")
        plot_schedule(scheduler_result_sqrtn,
                      False,
                      save_file=scratch_dir / "CHEN_SQRTN.png")
        data.append({
            "Strategy":
            str(scheduler_result_all.solve_strategy.value),
            "Name":