def test_ilp(): try: import gurobipy as _ except ImportError as e: logging.exception(e) logging.warning("Continuing with tests, gurobi not installed") return from checkmate.core.solvers.strategy_optimal_ilp import solve_ilp_gurobi for graph_length in test_points: g = gen_linear_graph(graph_length) assert g.size == 2 * graph_length + 1 total_cost = sum(g.cost_ram.values()) scheduler_result = solve_ilp_gurobi(g, total_cost, print_to_console=False, write_log_file=None) assert scheduler_result.feasible if SAVE_DEBUG_PLOTS: for budget in np.arange(0, 1, 0.25): scheduler_result = solve_ilp_gurobi(g, total_cost * budget, print_to_console=False, write_log_file=None, time_limit=15) if scheduler_result.feasible: plot_schedule( scheduler_result, save_file="/tmp/test_checkmate/plot_ilp/{}_{}.png". format(graph_length, budget))
def test_checkpoint_all_ap(): for graph_length in test_points: g = gen_linear_graph(graph_length) assert g.size == 2 * graph_length + 1 scheduler_result = solve_checkpoint_all_ap(g) assert scheduler_result.feasible if SAVE_DEBUG_PLOTS: plot_schedule( scheduler_result, save_file="/tmp/test_checkmate/plot_checkpoint_all_ap/{}.png". format(graph_length))
def test_chen_sqrtn(): for graph_length in test_points: g = gen_linear_graph(graph_length) assert g.size == 2 * graph_length + 1 total_cost = sum(g.cost_ram.values()) scheduler_result = solve_chen_sqrtn(g, total_cost) assert scheduler_result.feasible if SAVE_DEBUG_PLOTS: plot_schedule( scheduler_result, save_file="/tmp/test_checkmate/plot_chen_sqrtn/{}.png".format( graph_length))
def test_chen_greedy_ap(): for graph_length in [2, 4, 5, 7, 8]: g = gen_linear_graph(graph_length) assert g.size == 2 * graph_length + 1 total_cost = sum(g.cost_ram.values()) scheduler_result = solve_chen_greedy(g, total_cost, True) assert scheduler_result.feasible if SAVE_DEBUG_PLOTS: for budget in np.arange(0, 1, 0.1): scheduler_result = solve_chen_greedy(g, total_cost * budget, False) if scheduler_result.feasible: plot_schedule( scheduler_result, save_file= "/tmp/test_checkmate/plot_chen_greedy_ap/{}_{}.png". format(graph_length, budget))
bs_futures = defaultdict(list) # type: Dict[int, List] bs_fwd2xcost = {} # type: Dict[int, int] # load model at batch size g = dfgraph_from_keras(model, batch_size=1, cost_model=cost_model, loss_cpu_cost=0, loss_ram_cost=(4)) plot_dfgraph(g, log_base, name=model_name) model_file = str(log_base / "max_bs_{}.mps".format(model_name)) param_dict = { "LogToConsole": 1, "LogFile": str(log_base / "max_bs_{}.solve.log".format(model_name)), "Threads": os.cpu_count(), "TimeLimit": math.inf, } ilp_solver = MaxBatchILPSolver( g, budget=platform_memory("p32xlarge") - g.cost_ram_fixed, model_file=model_file, gurobi_params=param_dict, cpu_fwd_factor=2, ) ilp_solver.build_model() result, batch_size = ilp_solver.solve() logging.info("Max batch size = {}".format(batch_size)) save_file = log_base / "{}_plot.png".format(model) plot_schedule(result, plot_mem_usage=True, save_file=save_file)
scratch_dir = ( checkmate_data_dir() / "scratch_integrality_gap_linear" / "{}_layers".format(N) / str(IMPOSED_SCHEDULE) / "{}_budget".format(B) ) scratch_dir.mkdir(parents=True, exist_ok=True) data = [] logging.error("Skipping Griewank baselines as it was broken in parasj/checkmate#65") # griewank = solve_griewank(g, B) logging.info("--- Solving LP relaxation for lower bound") lb_lp = lower_bound_lp_relaxation(g, B, approx=APPROX, eps_noise=EPS_NOISE, imposed_schedule=IMPOSED_SCHEDULE) plot_schedule(lb_lp, False, save_file=scratch_dir / "CHECKMATE_LB_LP.png") logging.info("--- Solving ILP") ilp = solve_ilp_gurobi(g, B, approx=APPROX, eps_noise=EPS_NOISE, imposed_schedule=IMPOSED_SCHEDULE, solve_r=SOLVE_R) ilp_feasible = ilp.schedule_aux_data.activation_ram <= B plot_schedule(ilp, False, save_file=scratch_dir / "CHECKMATE_ILP.png") integrality_gap = ilp.schedule_aux_data.cpu / lb_lp.schedule_aux_data.cpu speedup = ilp.solve_time_s / lb_lp.solve_time_s approx_ratio_actual, approx_ratio_ub = float("inf"), float("inf") try: logging.info("--- Solving deterministic rounting of LP") approx_lp_determinstic = solve_approx_lp_deterministic_sweep( g, B, approx=APPROX, eps_noise=EPS_NOISE, imposed_schedule=IMPOSED_SCHEDULE )
loss_ram_cost=(4 * args.batch_size)) common_kwargs = dict(g=g, budget=B, print_to_console=False, eps_noise=0, approx=False) print("Common args:", common_kwargs) data = [] # Checkpoint all scheduler_result_all = solve_checkpoint_all(g) plot_schedule(scheduler_result_all, False, save_file=scratch_dir / "ALL.png") data.append({ "Strategy": str(scheduler_result_all.solve_strategy.value), "Name": "CHECKPOINT_ALL", "CPU": scheduler_result_all.schedule_aux_data.cpu, "Activation RAM": scheduler_result_all.schedule_aux_data.activation_ram, }) if args.model_name in LINEAR_MODELS: # Sqrt(n) scheduler_result_sqrtn = solve_chen_sqrtn(g, True)
import matplotlib.pyplot as plt if __name__ == "__main__": N = 16 for B in range(4, 12): # model = get_keras_model("MobileNet") # g = dfgraph_from_keras(mod=model) g = gen_linear_graph(N) scratch_dir = checkmate_data_dir() / "scratch_linear" / str(N) / str(B) scratch_dir.mkdir(parents=True, exist_ok=True) data = [] scheduler_result_all = solve_checkpoint_all(g) scheduler_result_sqrtn = solve_chen_sqrtn(g, True) plot_schedule(scheduler_result_all, False, save_file=scratch_dir / "CHECKPOINT_ALL.png") plot_schedule(scheduler_result_sqrtn, False, save_file=scratch_dir / "CHEN_SQRTN.png") data.append({ "Strategy": str(scheduler_result_all.solve_strategy.value), "Name": "CHECKPOINT_ALL", "CPU": scheduler_result_all.schedule_aux_data.cpu, "Activation RAM": scheduler_result_all.schedule_aux_data.activation_ram, }) data.append({