def share_blobs( net, heads, namescope, dont_share_blobs=None, blob_shapes=None, ): external_input = set(net.Proto().external_input) def is_new_blob(b): name = str(b) # Note: need to look at _{namescope} pattern as it matches # to handle the auto-split gradients return b not in external_input and (name.startswith(namescope) or name.startswith("_" + namescope)) log.warn("NOTE: Executing memonger to optimize gradient memory") # Collect ops that have something to do with gradients if namescope != "" and not namescope.endswith("/"): namescope += "/" netproto = copy.deepcopy(net.Proto()) # ops shared_op_indices = [] for idx, op in enumerate(netproto.op): shared_op_indices.append(idx) shared_blobs = set() for op in net.Proto().op: for b in list(op.input) + list(op.output): if is_new_blob(b): shared_blobs.add(b) print(external_input) print(shared_blobs) start_time = time.time() optim_str = C.memonger_compute_blob_recycling_for_dag( netproto.SerializeToString(), [str(s).encode('utf-8') for s in heads], shared_op_indices, set(str(s).encode('utf-8') for s in shared_blobs), namescope.encode('utf-8'), set() if dont_share_blobs is None else dont_share_blobs, {} if blob_shapes is None else blob_shapes) log.info( "Memonger memory optimization took {} secs".format(time.time() - start_time), ) optim = caffe2_pb2.NetDef() optim.ParseFromString(optim_str) assert verify_graph_equality(net.Proto(), optim), \ "Memonger graph is not equal to original." assert verify_inplace_blobs(net.Proto(), optim), \ "Inplace assignments differ in memonger net." return optim
def optimize_inference_for_dag(net, input_blobs, namescope=""): netproto = copy.deepcopy(net.Proto()) external_input = set(net.Proto().external_input) external_output = set(net.Proto().external_output) def is_activation_blob(b): return b not in external_input and b not in external_output activation_blobs = set() seen_as_output = set() ops = list(net.Proto().op) op_indices = [index for index, op in enumerate(net.Proto().op)] # Sanity check: check that all external inputs are properly accounted # and that no gradient ops are included in 'net' for op in ops: for b in op.input: if is_activation_blob(b): activation_blobs.add(b) if b not in seen_as_output: assert False, "{} not in external input".format(b) for b in op.output: if is_activation_blob(b): activation_blobs.add(b) seen_as_output = seen_as_output.union(set(op.output)) assert not op.is_gradient_op, \ "You can only pass inference-only nets to optimize_inference_for_dag" start_time = time.time() optim_str = C.memonger_compute_blob_recycling_for_dag( netproto.SerializeToString(), [str(s).encode('utf-8') for s in input_blobs], op_indices, set(str(s).encode('utf-8') for s in activation_blobs), namescope.encode('utf-8'), set(), {} ) log.info("Memonger memory optimization took {} secs".format( time.time() - start_time), ) optim = caffe2_pb2.NetDef() optim.ParseFromString(optim_str) assert verify_graph_equality(net.Proto(), optim), \ "Memonger graph is not equal to original." assert verify_inplace_blobs(net.Proto(), optim), \ "Inplace assignments differ in memonger net." return optim
def optimize_inference_for_dag(net, input_blobs, namescope=""): netproto = copy.deepcopy(net.Proto()) external_input = set(net.Proto().external_input) external_output = set(net.Proto().external_output) def is_activation_blob(b): return b not in external_input and b not in external_output activation_blobs = set() seen_as_output = set() ops = list(net.Proto().op) op_indices = [index for index, op in enumerate(net.Proto().op)] # Sanity check: check that all external inputs are properlyh accounted # and that no gradient ops are included in 'net' for op in ops: for b in op.input: if is_activation_blob(b): activation_blobs.add(b) if b not in seen_as_output: assert False, "{} not in external input".format(b) for b in op.output: if is_activation_blob(b): activation_blobs.add(b) seen_as_output = seen_as_output.union(set(op.output)) assert not op.is_gradient_op, \ "You can only pass inference-only nets to optimize_inference_for_dag" start_time = time.time() optim_str = C.memonger_compute_blob_recycling_for_dag( netproto.SerializeToString(), [str(s).encode('utf-8') for s in input_blobs], op_indices, set(str(s).encode('utf-8') for s in activation_blobs), namescope.encode('utf-8'), set(), {} ) log.info("Memonger memory optimization took {} secs".format( time.time() - start_time), ) optim = caffe2_pb2.NetDef() optim.ParseFromString(optim_str) assert verify_graph_equality(net.Proto(), optim), \ "Memonger graph is not equal to original." assert verify_inplace_blobs(net.Proto(), optim), \ "Inplace assignments differ in memonger net." return optim
def share_grad_blobs( net, losses, param_grads, namescope, dont_share_blobs=None, share_activations=False, blob_shapes=None, ): ''' Implements similar optimization as Torch's shareGradInput(): for the gradients that are passed between layers, share blobs between operators when possible. This yields significant memory savings with deep networks. Returns an optimized protobuf (assign to net._net) ''' def is_grad_blob(b): name = str(b) # Note: need to look at _{namescope} pattern as it matches # to handle the auto-split gradients return name.endswith("_grad") and (name.startswith(namescope) or name.startswith("_" + namescope)) and name not in param_grads def is_grad_op(op): # TODO: something smarter for b in list(op.input) + list(op.output): if is_grad_blob(b): return True return False log.warn("NOTE: Executing memonger to optimize gradient memory") # Collect ops that have something to do with gradients if namescope != "" and not namescope.endswith("/"): namescope += "/" netproto = copy.deepcopy(net.Proto()) activations = [] external_output = set(net.Proto().external_output) # Hacky way to get activations, think of a better way for op in net.Proto().op: for b in op.output: if b + "_w" in op.input and b not in external_output: activations.append(b) # Remove last activations, as they are usually accessed externally activations = set(activations[:-2]) # Gradient ops grad_op_indices = [] for idx, op in enumerate(netproto.op): if (is_grad_op(op)): grad_op_indices.append(idx) shared_blobs = set() for op in net.Proto().op: for b in list(op.input) + list(op.output): if is_grad_blob(b) or (share_activations and b in activations): shared_blobs.add(b) start_time = time.time() optim_str = C.memonger_compute_blob_recycling_for_dag( netproto.SerializeToString(), [str(s).encode('utf-8') for s in losses], grad_op_indices, set(str(s).encode('utf-8') for s in shared_blobs), namescope.encode('utf-8'), set() if dont_share_blobs is None else dont_share_blobs, {} if blob_shapes is None else blob_shapes ) log.info("Memonger memory optimization took {} secs".format( time.time() - start_time), ) optim = caffe2_pb2.NetDef() optim.ParseFromString(optim_str) assert verify_graph_equality(net.Proto(), optim), \ "Memonger graph is not equal to original." assert verify_inplace_blobs(net.Proto(), optim), \ "Inplace assignments differ in memonger net." return optim
def share_grad_blobs( net, losses, param_grads, namescope, dont_share_blobs=None, share_activations=False, blob_shapes=None, ): ''' Implements similar optimization as Torch's shareGradInput(): for the gradients that are passed between layers, share blobs between operators when possible. This yields significant memory savings with deep networks. Returns an optimized protobuf (assign to net._net) ''' def is_grad_blob(b): name = str(b) # Note: need to look at _{namescope} pattern as it matches # to handle the auto-split gradients return name.endswith("_grad") and (name.startswith(namescope) or name.startswith("_" + namescope)) and name not in param_grads def is_grad_op(op): # TODO: something smarter for b in list(op.input) + list(op.output): if is_grad_blob(b): return True return False log.warn("NOTE: Executing memonger to optimize gradient memory") # Collect ops that have something to do with gradients if namescope != "" and not namescope.endswith("/"): namescope += "/" netproto = copy.deepcopy(net.Proto()) activations = [] external_output = set(net.Proto().external_output) # Hacky way to get activations, think of a better way for op in net.Proto().op: for b in op.output: if b + "_w" in op.input and b not in external_output: activations.append(b) # Remove last activations, as they are usually accessed externally activations = set(activations[:-2]) # Gradient ops grad_op_indices = [] for idx, op in enumerate(netproto.op): if (is_grad_op(op)): grad_op_indices.append(idx) shared_blobs = set() for op in net.Proto().op: for b in list(op.input) + list(op.output): if is_grad_blob(b) or (share_activations and b in activations): shared_blobs.add(b) start_time = time.time() optim_str = C.memonger_compute_blob_recycling_for_dag( netproto.SerializeToString(), [str(s).encode('utf-8') for s in losses], grad_op_indices, set(str(s).encode('utf-8') for s in shared_blobs), namescope.encode('utf-8'), set() if dont_share_blobs is None else dont_share_blobs, {} if blob_shapes is None else blob_shapes ) log.info("Memonger memory optimization took {} secs".format( time.time() - start_time), ) optim = caffe2_pb2.NetDef() optim.ParseFromString(optim_str) assert verify_graph_equality(net.Proto(), optim), \ "Memonger graph is not equal to original." assert verify_inplace_blobs(net.Proto(), optim), \ "Inplace assignments differ in memonger net." return optim
def share_freeze_blobs_c2( net, namescope, ): log.warn("NOTE: Executing memonger to optimize gradient memory") # Collect ops that have something to do with gradients if namescope != "" and not namescope.endswith("/"): namescope += "/" netproto = copy.deepcopy(net.Proto()) activations = [] external_input = set(net.Proto().external_input) external_output = set(net.Proto().external_output) start_idx = -1 end_idx = -1 # ops for idx, op in enumerate(netproto.op): # print(op) if namescope not in op.input[0]: continue if op.type == 'Conv' and start_idx < 0: start_idx = idx if op.type == 'StopGradient': end_idx = idx print(namescope, 'start_idx: ', start_idx, ' end_idx: ', end_idx) # Hacky way to get activations, think of a better way for idx, op in enumerate(netproto.op[start_idx:end_idx]): for b in op.output: if b not in external_output: activations.append(b) print('activations: ', activations) share_pool = [namescope + '_shared_' + str(i) for i in range(1000, 10000)] map_pool = {} heads = [namescope + 'data'] print('heads: ', heads) # Remove last activations, as they are usually accessed externally activations = set(activations[:-1]) print('activations: ', activations) shared_blobs = activations dont_share_blobs = None blob_shapes = None op_indices = [ index for index, op in enumerate(netproto.op[start_idx:end_idx + 2]) ] print(op_indices) start_time = time.time() optim_str = C.memonger_compute_blob_recycling_for_dag( netproto.SerializeToString(), [str(s).encode('utf-8') for s in heads], op_indices, set(str(s).encode('utf-8') for s in shared_blobs), namescope.encode('utf-8'), set() if dont_share_blobs is None else dont_share_blobs, {} if blob_shapes is None else blob_shapes) optim = caffe2_pb2.NetDef() optim.ParseFromString(optim_str) assert verify_graph_equality(net.Proto(), optim), \ "Memonger graph is not equal to original." assert verify_inplace_blobs(net.Proto(), optim), \ "Inplace assignments differ in memonger net." return optim