예제 #1
0
    def make_constraints(self, budget):
        constraints = []
        T = self.T
        ram_costs = self.g.cost_ram
        ram_cost_vec = np.asarray([ram_costs[i] for i in range(T)])

        with Timer("Var bounds"):
            constraints.extend([self.R >= 0, self.R <= 1])
            constraints.extend([self.Sram >= 0, self.Sram <= 1])
            constraints.extend([self.Ssd >= 0, self.Ssd <= 1])
            constraints.extend([self.Min >= 0, self.Min <= 1])
            constraints.extend([self.Mout >= 0, self.Mout <= 1])
            constraints.extend([self.Free_E >= 0, self.Free_E <= 1])
            constraints.extend([self.U >= 0, self.U <= budget])
            constraints.append(cp.diag(self.R) == 1)
            constraints.append(cp.upper_tri(self.R) == 0)
            constraints.append(cp.diag(self.Sram) == 0)
            constraints.append(cp.upper_tri(self.Sram) == 0)
            constraints.append(cp.diag(self.Ssd) == 0)
            constraints.append(cp.upper_tri(self.Ssd) == 0)
            constraints.append(cp.upper_tri(self.Min) == 0)
            constraints.append(cp.upper_tri(self.Mout) == 0)

        with Timer("Correctness constraints"):
            # ensure all computations are possible
            for (u, v) in self.g.edge_list:
                constraints.append(self.R[:, v] <= self.R[:, u] + self.Sram[:, u])

            # ensure all checkpoints are in memory
            constraints.append(self.Sram[1:, :] <= self.R[:-1, :] + self.Sram[:-1, :] + self.Min[:-1, :])
            constraints.append(self.Ssd[1:, :] <= self.Ssd[:-1, :] + self.Mout[:-1, :])
            constraints.append(self.Min <= self.Ssd)
            constraints.append(self.Mout <= self.Sram)

        with Timer("Free_E constraints"):
            # Constraint: sum_k Free_{t,i,k} <= 1
            for i in range(T):
                frees = [self.Free_E[:, eidx] for eidx, (j, _) in enumerate(self.g.edge_list) if i == j]
                if frees:
                    constraints.append(cp.sum(frees, axis=0) <= 1)

            # Constraint: Free_{t,i,k} <= 1 - S_{t+1, i}
            for eidx, (i, k) in enumerate(self.g.edge_list):
                constraints.append(self.Free_E[:-1, eidx] + self.Sram[1:, i] <= 1)

            # Constraint: Free_{t,i,k} <= 1 - R_{t, j}
            for eidx, (i, k) in enumerate(self.g.edge_list):
                for j in self.g.successors(i):
                    if j > k:
                        constraints.append(self.Free_E[:, eidx] + self.R[:, j] <= 1)

        with Timer("U constraints"):
            constraints.append(self.U[:, 0] == self.R[:, 0] * ram_costs[0] + ram_cost_vec @ self.Sram.T)
            for k in range(T - 1):
                mem_freed = cp.sum([ram_costs[i] * self.Free_E[:, eidx] for (eidx, i) in self.g.predecessors_indexed(k)])
                constraints.append(self.U[:, k + 1] == self.U[:, k] + self.R[:, k + 1] * ram_costs[k + 1] - mem_freed)
        return constraints
예제 #2
0
def solve_chen_greedy(g: DFGraph,
                      segment_mem_B: int,
                      use_actuation_points: bool = True):
    with Timer("solve_chen_greedy") as timer_solve:
        C = g.articulation_points if use_actuation_points else g.v
        temp = 0
        x = 0
        checkpoints = set()
        for v in g.topological_order_fwd:
            temp += g.cost_ram[v]
            if v in C and temp > segment_mem_B:
                x += g.cost_ram[v]
                temp = 0
                checkpoints.add(v)
        S = gen_s_matrix_fixed_checkpoints(g, checkpoints)
        R = solve_r_opt(g, S)
    schedule, aux_data = schedule_from_rs(g, R, S)
    return ScheduledResult(
        solve_strategy=SolveStrategy.CHEN_GREEDY
        if use_actuation_points else SolveStrategy.CHEN_GREEDY_NOAP,
        solver_budget=segment_mem_B,
        feasible=True,
        schedule=schedule,
        schedule_aux_data=aux_data,
        solve_time_s=timer_solve.elapsed,
    )
    def solve(self):
        T = self.g.size
        with self.profiler("Gurobi model optimization", extra_data={"T": str(T), "budget": str(self.budget)}):
            with Timer("ILPSolve") as solve_ilp:
                self.m.optimize()
            self.solve_time = solve_ilp.elapsed

        infeasible = self.m.status == GRB.INFEASIBLE
        try:
            _ = self.R[0, 0].X
            _ = self.S[0, 0].X
            _ = self.U[0, 0].X
            _ = self.batch_size.X
        except AttributeError as e:
            infeasible = True

        if infeasible:
            raise ValueError("Infeasible model, check constraints carefully. Insufficient memory?")

        Rout = np.zeros((T, T), dtype=SOLVER_DTYPE)
        Sout = np.zeros((T, T), dtype=SOLVER_DTYPE)
        Uout = np.zeros((T, T), dtype=SOLVER_DTYPE)
        Free_Eout = np.zeros((T, len(self.g.edge_list)), dtype=SOLVER_DTYPE)
        batch_size = self.batch_size.X
        try:
            for t in range(T):
                for i in range(T):
                    Rout[t][i] = int(self.R[t, i].X)
                    Sout[t][i] = int(self.S[t, i].X)
                    Uout[t][i] = self.U[t, i].X * self.ram_gcd
                for e in range(len(self.g.edge_list)):
                    Free_Eout[t][e] = int(self.Free_E[t, e].X)
        except AttributeError as e:
            logging.exception(e)
            return None, None, None, None

        Rout = solve_r_opt(self.g, Sout)  # prune R using optimal recomputation solver

        ilp_aux_data = ILPAuxData(
            U=Uout,
            Free_E=Free_Eout,
            ilp_approx=False,
            ilp_time_limit=0,
            ilp_eps_noise=0,
            ilp_num_constraints=self.m.numConstrs,
            ilp_num_variables=self.m.numVars,
        )
        schedule, aux_data = schedule_from_rs(self.g, Rout, Sout)
        return (
            ScheduledResult(
                solve_strategy=SolveStrategy.OPTIMAL_ILP_GC,
                solver_budget=self.budget,
                feasible=True,
                schedule=schedule,
                schedule_aux_data=aux_data,
                solve_time_s=self.solve_time,
                ilp_aux_data=ilp_aux_data,
            ),
            batch_size,
        )
예제 #4
0
def schedule_from_rs(
        g: DFGraph, r: np.ndarray, s: np.ndarray
) -> Tuple[Optional[Schedule], Optional[SchedulerAuxData]]:
    debug_collect_ram_usage = "DEBUG_SCHEDULER_RAM" in active_env_var_flags
    if r is None or s is None:
        return None, None  # infeasible
    T = g.size

    def _used_after(t_, u_, i_):
        """Returns True if v_u is used after v_i in stage t"""
        is_retained_snapshot = t_ < T - 1 and s[t_ + 1, u_] == 1
        is_used_by_successor = not all(
            [r[t_, v] == 0 or v <= i_ for v in g.successors(u_)])
        return is_retained_snapshot or is_used_by_successor

    with Timer("schedule_rs_matrix") as schedule_timer:
        # compute last usage to determine whether to update auxiliary variables
        # last_used = {i: max([t for t in range(T) if r[t, i] == 1]) for i in range(T)}
        mem_usage = np.zeros((T, T), dtype=np.int)
        sb = ScheduleBuilder(g, verbosity=1)
        for t in range(T):
            # Free unused checkpoints
            if debug_collect_ram_usage:
                for i in filter(lambda x: sb.is_op_cached(x), range(T)):
                    if not _used_after(t, i, i):
                        sb.deallocate_register(i)

            for i in range(T):
                if r[t, i] == 1:
                    # sb.run_operator(i, last_used[i] == t)
                    sb.run_operator(
                        i, False
                    )  # todo(paras) prune away last_used in favor of recompute blacklist
                if debug_collect_ram_usage:
                    mem_usage[t, i] = sb.current_ram + g.cost_ram_fixed

                # Free memory
                if debug_collect_ram_usage:
                    for u in filter(lambda x: sb.is_op_cached(x),
                                    itertools.chain(g.predecessors(i), [i])):
                        if not _used_after(t, u, i):
                            sb.deallocate_register(u)
        total_ram = sb.max_ram + g.cost_ram_fixed
        ram_timeline = [mem + g.cost_ram_fixed for mem in sb.ram_timeline]

    return (
        sb.schedule,
        SchedulerAuxData(
            R=r,
            S=s,
            cpu=sb.total_cpu,
            peak_ram=total_ram,
            activation_ram=sb.max_ram,
            mem_grid=mem_usage,
            mem_timeline=ram_timeline,
            schedule_time_s=schedule_timer.elapsed,
        ),
    )
def solve_checkpoint_all(g: DFGraph):
    with Timer("solve_checkpoint_all") as timer_solve:
        s = gen_s_matrix_fixed_checkpoints(g, g.vfwd)
        r = solve_r_opt(g, s)
    schedule, aux_data = schedule_from_rs(g, r, s)
    return ScheduledResult(
        solve_strategy=SolveStrategy.CHECKPOINT_ALL,
        solver_budget=0,
        feasible=True,
        schedule=schedule,
        schedule_aux_data=aux_data,
        solve_time_s=timer_solve.elapsed,
    )
예제 #6
0
def solve_checkpoint_last_node(g: DFGraph):
    """Checkpoint only one node between stages"""
    with Timer("solve_checkpoint_last_node") as timer_solve:
        s = np.zeros((g.size, g.size), dtype=SOLVER_DTYPE)
        np.fill_diagonal(s[1:], 1)
        r = solve_r_opt(g, s)
    schedule, aux_data = schedule_from_rs(g, r, s)
    return ScheduledResult(
        solve_strategy=SolveStrategy.CHECKPOINT_LAST_NODE,
        solver_budget=0,
        feasible=True,
        schedule=schedule,
        schedule_aux_data=aux_data,
        solve_time_s=timer_solve.elapsed,
    )
예제 #7
0
 def solve(self, solver_override=None, verbose=False, num_threads=os.cpu_count()):
     installed_solvers = cp.installed_solvers()
     with Timer("Solve", print_results=verbose) as solve_timer:
         if solver_override is not None:
             self.problem.solve(verbose=verbose, solver=solver_override)
         elif "MOSEK" in installed_solvers:
             self.problem.solve(verbose=verbose, solver=cp.MOSEK)
         elif "GUROBI" in installed_solvers:
             self.problem.solve(verbose=verbose, solver=cp.GUROBI)
         elif "CBC" in installed_solvers:
             self.problem.solve(verbose=verbose, solver=cp.CBC, numberThreads=num_threads)
         else:
             self.problem.solve(verbose=verbose)
     self.solve_time = solve_timer.elapsed
     if self.problem.status in ["infeasible", "unbounded"]:
         raise ValueError("Model infeasible")
     return self.R.value, self.S.value, self.U.value, self.Free_E.value
예제 #8
0
def solve_chen_sqrtn(g: DFGraph,
                     use_actuation_points: bool = True) -> ScheduledResult:
    with Timer("solve_chen_sqrtn") as timer_solve:
        C = g.articulation_points if use_actuation_points else g.v
        k = int(math.sqrt(len(C)))
        checkpoints = [v for idx, v in enumerate(C) if (idx + 1) % k == 0]
        S = gen_s_matrix_fixed_checkpoints(g, set(checkpoints))
        R = solve_r_opt(g, S)
    schedule, aux_data = schedule_from_rs(g, R, S)
    return ScheduledResult(
        solve_strategy=SolveStrategy.CHEN_SQRTN
        if use_actuation_points else SolveStrategy.CHEN_SQRTN_NOAP,
        solver_budget=0,
        feasible=True,
        schedule=schedule,
        schedule_aux_data=aux_data,
        solve_time_s=timer_solve.elapsed,
    )
예제 #9
0
def _load_griewank(graph_size: int) -> pd.DataFrame:
    fname = "{}.pkl.gz".format(graph_size)
    local_path_base = checkmate_cache_dir() / "griewank_solutions"
    local_path = local_path_base / fname
    remote_path = "https://optimalcheckpointing.s3.amazonaws.com/griewank_solutions/pickle/{}".format(
        fname)
    if local_path.exists():
        try:
            return pd.read_pickle(local_path)
        except Exception as e:
            logging.exception(e)
            logging.warning(
                "Error loading cached griewank solution, corrupt file? Reloading from S3"
            )
    with Timer("griewank_dl") as dl_timer:
        local_path_base.mkdir(parents=True, exist_ok=True)
        urllib.request.urlretrieve(remote_path, local_path)
    logging.info("Loaded graph from {} and saving to {} in {:.2f}s".format(
        remote_path, local_path, dl_timer.elapsed))
    return pd.read_pickle(local_path)
예제 #10
0
import tensorflow as tf
import logging

from checkmate.core.solvers.strategy_chen import solve_chen_sqrtn
from checkmate.core.utils.timer import Timer
from checkmate.tf2.extraction import dfgraph_from_tf_function
from checkmate.tf2.util.load_keras_model import get_keras_model

BS = 128

if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
    logging.info("building graph")
    with Timer("build_graph", print_results=True):
        model = get_keras_model("ResNet50")

        def grads(images, labels):
            with tf.GradientTape() as tape:
                pred = model(images)
                loss = tf.reduce_mean(pred - labels)
            gradient = tape.gradient(loss, model.trainable_variables)
            return loss, gradient

        grad_fn = tf.function(grads).get_concrete_function(
            tf.TensorSpec(shape=(BS, 224, 224, 3)),
            tf.TensorSpec(shape=(BS, 1000)))
    logging.info("tracing graph")
    with Timer("trace_graph", print_results=True):
        g = dfgraph_from_tf_function(grad_fn)
    # sched_result = solve_ilp_gurobi(g, budget=platform_memory("p2xlarge"), approx=False, eps_noise=0.0)
    # sched_result = solve_approx_lp_deterministic_05_threshold(g, budget=platform_memory("p2xlarge"))
예제 #11
0
    def build_model(self):
        T = self.g.size
        dict_val_div = lambda cost_dict, divisor: {
            k: v / divisor
            for k, v in cost_dict.items()
        }
        permute_ram = dict_val_div(self.g.cost_ram, self.ram_gcd)
        budget = self.budget / self.ram_gcd

        permute_eps = lambda cost_dict, eps: {
            k: v * (1.0 + eps * np.random.randn())
            for k, v in cost_dict.items()
        }
        permute_cpu = dict_val_div(self.g.cost_cpu, self.g.cpu_gcd())
        if self.eps_noise:
            permute_cpu = permute_eps(permute_cpu, self.eps_noise)

        with Timer("Gurobi model construction",
                   extra_data={
                       "T": str(T),
                       "budget": str(budget)
                   }):
            with Timer("Objective construction",
                       extra_data={
                           "T": str(T),
                           "budget": str(budget)
                       }):
                # seed solver with a baseline strategy
                if self.seed_s is not None:
                    for x in range(T):
                        for y in range(T):
                            if self.seed_s[x, y] < 1:
                                self.init_constraints.append(
                                    self.m.addLConstr(self.S[x, y], GRB.EQUAL,
                                                      0))
                    self.m.update()

                # define objective function
                self.m.setObjective(
                    quicksum(self.R[t, i] * permute_cpu[i] for t in range(T)
                             for i in range(T)), GRB.MINIMIZE)

            with Timer("Variable initialization",
                       extra_data={
                           "T": str(T),
                           "budget": str(budget)
                       }):
                if self.imposed_schedule == ImposedSchedule.FULL_SCHEDULE:
                    self.m.addLConstr(
                        quicksum(self.R[t, i] for t in range(T)
                                 for i in range(t + 1, T)), GRB.EQUAL, 0)
                    self.m.addLConstr(
                        quicksum(self.S[t, i] for t in range(T)
                                 for i in range(t, T)), GRB.EQUAL, 0)
                    self.m.addLConstr(quicksum(self.R[t, t] for t in range(T)),
                                      GRB.EQUAL, T)
                elif self.imposed_schedule == ImposedSchedule.COVER_ALL_NODES:
                    self.m.addLConstr(quicksum(self.S[0, i] for i in range(T)),
                                      GRB.EQUAL, 0)
                    for i in range(T):
                        self.m.addLConstr(
                            quicksum(self.R[t, i] for t in range(T)),
                            GRB.GREATER_EQUAL, 1)
                elif self.imposed_schedule == ImposedSchedule.COVER_LAST_NODE:
                    self.m.addLConstr(quicksum(self.S[0, i] for i in range(T)),
                                      GRB.EQUAL, 0)
                    # note: the integrality gap is very large as this constraint
                    # is only applied to the last node (last column of self.R).
                    self.m.addLConstr(
                        quicksum(self.R[t, T - 1] for t in range(T)),
                        GRB.GREATER_EQUAL, 1)

            with Timer("Correctness constraints",
                       extra_data={
                           "T": str(T),
                           "budget": str(budget)
                       }):
                # ensure all checkpoints are in memory
                for t in range(T - 1):
                    for i in range(T):
                        self.m.addLConstr(self.S[t + 1, i], GRB.LESS_EQUAL,
                                          self.S[t, i] + self.R[t, i])
                # ensure all computations are possible
                for (u, v) in self.g.edge_list:
                    for t in range(T):
                        self.m.addLConstr(self.R[t, v], GRB.LESS_EQUAL,
                                          self.R[t, u] + self.S[t, u])

            # define memory constraints
            def _num_hazards(t, i, k):
                if t + 1 < T:
                    return 1 - self.R[t, k] + self.S[t + 1, i] + quicksum(
                        self.R[t, j] for j in self.g.successors(i) if j > k)
                return 1 - self.R[t, k] + quicksum(
                    self.R[t, j] for j in self.g.successors(i) if j > k)

            def _max_num_hazards(t, i, k):
                num_uses_after_k = sum(1 for j in self.g.successors(i)
                                       if j > k)
                if t + 1 < T:
                    return 2 + num_uses_after_k
                return 1 + num_uses_after_k

            with Timer("Constraint: upper bound for 1 - Free_E",
                       extra_data={
                           "T": str(T),
                           "budget": str(budget)
                       }):
                for t in range(T):
                    for eidx, (i, k) in enumerate(self.g.edge_list):
                        self.m.addLConstr(1 - self.Free_E[t, eidx],
                                          GRB.LESS_EQUAL,
                                          _num_hazards(t, i, k))
            with Timer("Constraint: lower bound for 1 - Free_E",
                       extra_data={
                           "T": str(T),
                           "budget": str(budget)
                       }):
                for t in range(T):
                    for eidx, (i, k) in enumerate(self.g.edge_list):
                        self.m.addLConstr(
                            _max_num_hazards(t, i, k) *
                            (1 - self.Free_E[t, eidx]), GRB.GREATER_EQUAL,
                            _num_hazards(t, i, k))
            with Timer(
                    "Constraint: initialize memory usage (includes spurious checkpoints)",
                    extra_data={
                        "T": str(T),
                        "budget": str(budget)
                    },
            ):
                for t in range(T):
                    self.m.addLConstr(
                        self.U[t, 0],
                        GRB.EQUAL,
                        self.R[t, 0] * permute_ram[0] +
                        quicksum(self.S[t, i] * permute_ram[i]
                                 for i in range(T)),
                    )
            with Timer("Constraint: memory recurrence",
                       extra_data={
                           "T": str(T),
                           "budget": str(budget)
                       }):
                for t in range(T):
                    for k in range(T - 1):
                        mem_freed = quicksum(
                            permute_ram[i] * self.Free_E[t, eidx]
                            for (eidx, i) in self.g.predecessors_indexed(k))
                        self.m.addLConstr(
                            self.U[t, k + 1], GRB.EQUAL, self.U[t, k] +
                            self.R[t, k + 1] * permute_ram[k + 1] - mem_freed)

        if self.model_file is not None and self.g.size < 200:  # skip for big models to save runtime
            with Timer("Saving model",
                       extra_data={
                           "T": str(T),
                           "budget": str(budget)
                       }):
                self.m.write(self.model_file)
        return None  # return value ensures ray remote call can be chained
예제 #12
0
    def solve(self):
        T = self.g.size
        with Timer("Gurobi model optimization",
                   extra_data={
                       "T": str(T),
                       "budget": str(self.budget)
                   }):
            if self.seed_s is not None:
                self.m.Params.TimeLimit = self.GRB_CONSTRAINED_PRESOLVE_TIME_LIMIT
                self.m.optimize()
                if self.m.status == GRB.INFEASIBLE:
                    print("Infeasible ILP seed at budget {:.2E}".format(
                        self.budget))
                self.m.remove(self.init_constraints)
            self.m.Params.TimeLimit = self.gurobi_params.get("TimeLimit", 0)
            self.m.message("\n\nRestarting solve\n\n")
            with Timer("ILPSolve") as solve_ilp:
                self.m.optimize()
            self.solve_time = solve_ilp.elapsed

        infeasible = self.m.status == GRB.INFEASIBLE
        if infeasible:
            raise ValueError(
                "Infeasible model, check constraints carefully. Insufficient memory?"
            )

        if self.m.solCount < 1:
            raise ValueError(
                "Model status is {} (not infeasible), but solCount is {}".
                format(self.m.status, self.m.solCount))

        Rout = np.zeros((T, T),
                        dtype=checkmate.core.utils.solver_common.SOLVER_DTYPE
                        if self.integral else np.float)
        Sout = np.zeros((T, T),
                        dtype=checkmate.core.utils.solver_common.SOLVER_DTYPE
                        if self.integral else np.float)
        Uout = np.zeros((T, T),
                        dtype=checkmate.core.utils.solver_common.SOLVER_DTYPE
                        if self.integral else np.float)
        Free_Eout = np.zeros(
            (T, len(self.g.edge_list)),
            dtype=checkmate.core.utils.solver_common.SOLVER_DTYPE)
        solver_dtype_cast = int if self.integral else float
        try:
            for t in range(T):
                for i in range(T):
                    try:
                        Rout[t][i] = solver_dtype_cast(self.R[t, i].X)
                    except (AttributeError, TypeError) as e:
                        Rout[t][i] = solver_dtype_cast(self.R[t, i])

                    try:
                        Sout[t][i] = solver_dtype_cast(self.S[t, i])
                    except (AttributeError, TypeError) as e:
                        Sout[t][i] = solver_dtype_cast(self.S[t, i].X)

                    try:
                        Uout[t][i] = self.U[t, i].X * self.ram_gcd
                    except (AttributeError, TypeError) as e:
                        Uout[t][i] = self.U[t, i] * self.ram_gcd
                for e in range(len(self.g.edge_list)):
                    try:
                        Free_Eout[t][e] = solver_dtype_cast(self.Free_E[t,
                                                                        e].X)
                    except (AttributeError, TypeError) as e:
                        Free_Eout[t][e] = solver_dtype_cast(self.Free_E[t, e])
        except AttributeError as e:
            logging.exception(e)
            return None, None, None, None

        # prune R using closed-form solver
        if self.solve_r and self.integral:
            Rout = solve_r_opt(self.g, Sout)

        return Rout, Sout, Uout, Free_Eout
예제 #13
0
        logging.error(
            "Skipping Griewank baselines as it was broken in parasj/checkmate#65"
        )
        # scheduler_result_griewank = solve_griewank(g, B)
        # plot_schedule(scheduler_result_griewank, False, save_file=scratch_dir / "GRIEWANK.png")
        # data.append(
        #     {
        #         "Strategy": str(scheduler_result_griewank.solve_strategy.value),
        #         "Name": "GRIEWANK",
        #         "CPU": scheduler_result_griewank.schedule_aux_data.cpu,
        #         "Activation RAM": scheduler_result_griewank.schedule_aux_data.activation_ram,
        #     }
        # )

        with Timer("ilp") as timer_ilp:
            scheduler_result_ilp = solve_ilp_gurobi(
                g, B, seed_s=scheduler_result_sqrtn.schedule_aux_data.S)
            plot_schedule(scheduler_result_ilp,
                          False,
                          save_file=scratch_dir / "CHECKMATE_ILP.png")
            data.append({
                "Strategy":
                str(scheduler_result_ilp.solve_strategy.value),
                "Name":
                "CHECKMATE_ILP",
                "CPU":
                scheduler_result_ilp.schedule_aux_data.cpu,
                "Activation RAM":
                scheduler_result_ilp.schedule_aux_data.activation_ram,
            })