Ejemplo n.º 1
0
def share_blobs(
    net,
    heads,
    namescope,
    dont_share_blobs=None,
    blob_shapes=None,
):
    external_input = set(net.Proto().external_input)

    def is_new_blob(b):
        name = str(b)
        # Note: need to look at _{namescope} pattern as it matches
        # to handle the auto-split gradients
        return b not in external_input and (name.startswith(namescope) or
                                            name.startswith("_" + namescope))

    log.warn("NOTE: Executing memonger to optimize gradient memory")

    # Collect ops that have something to do with gradients
    if namescope != "" and not namescope.endswith("/"):
        namescope += "/"

    netproto = copy.deepcopy(net.Proto())

    # ops
    shared_op_indices = []
    for idx, op in enumerate(netproto.op):
        shared_op_indices.append(idx)

    shared_blobs = set()
    for op in net.Proto().op:
        for b in list(op.input) + list(op.output):
            if is_new_blob(b):
                shared_blobs.add(b)
    print(external_input)
    print(shared_blobs)
    start_time = time.time()
    optim_str = C.memonger_compute_blob_recycling_for_dag(
        netproto.SerializeToString(), [str(s).encode('utf-8') for s in heads],
        shared_op_indices, set(str(s).encode('utf-8') for s in shared_blobs),
        namescope.encode('utf-8'),
        set() if dont_share_blobs is None else dont_share_blobs,
        {} if blob_shapes is None else blob_shapes)

    log.info(
        "Memonger memory optimization took {} secs".format(time.time() -
                                                           start_time), )

    optim = caffe2_pb2.NetDef()
    optim.ParseFromString(optim_str)
    assert verify_graph_equality(net.Proto(), optim), \
        "Memonger graph is not equal to original."
    assert verify_inplace_blobs(net.Proto(), optim), \
        "Inplace assignments differ in memonger net."
    return optim
Ejemplo n.º 2
0
def optimize_inference_for_dag(net, input_blobs, namescope=""):
    netproto = copy.deepcopy(net.Proto())
    external_input = set(net.Proto().external_input)
    external_output = set(net.Proto().external_output)

    def is_activation_blob(b):
        return b not in external_input and b not in external_output

    activation_blobs = set()
    seen_as_output = set()
    ops = list(net.Proto().op)
    op_indices = [index for index, op in enumerate(net.Proto().op)]

    # Sanity check: check that all external inputs are properly accounted
    # and that no gradient ops are included in 'net'
    for op in ops:
        for b in op.input:
            if is_activation_blob(b):
                activation_blobs.add(b)
                if b not in seen_as_output:
                    assert False, "{} not in external input".format(b)
        for b in op.output:
            if is_activation_blob(b):
                activation_blobs.add(b)
        seen_as_output = seen_as_output.union(set(op.output))
        assert not op.is_gradient_op, \
            "You can only pass inference-only nets to optimize_inference_for_dag"
    start_time = time.time()
    optim_str = C.memonger_compute_blob_recycling_for_dag(
        netproto.SerializeToString(),
        [str(s).encode('utf-8') for s in input_blobs],
        op_indices,
        set(str(s).encode('utf-8') for s in activation_blobs),
        namescope.encode('utf-8'),
        set(),
        {}
    )

    log.info("Memonger memory optimization took {} secs".format(
        time.time() - start_time),
    )

    optim = caffe2_pb2.NetDef()
    optim.ParseFromString(optim_str)

    assert verify_graph_equality(net.Proto(), optim), \
        "Memonger graph is not equal to original."
    assert verify_inplace_blobs(net.Proto(), optim), \
        "Inplace assignments differ in memonger net."
    return optim
Ejemplo n.º 3
0
def optimize_inference_for_dag(net, input_blobs, namescope=""):
    netproto = copy.deepcopy(net.Proto())
    external_input = set(net.Proto().external_input)
    external_output = set(net.Proto().external_output)

    def is_activation_blob(b):
        return b not in external_input and b not in external_output

    activation_blobs = set()
    seen_as_output = set()
    ops = list(net.Proto().op)
    op_indices = [index for index, op in enumerate(net.Proto().op)]

    # Sanity check: check that all external inputs are properlyh accounted
    # and that no gradient ops are included in 'net'
    for op in ops:
        for b in op.input:
            if is_activation_blob(b):
                activation_blobs.add(b)
                if b not in seen_as_output:
                    assert False, "{} not in external input".format(b)
        for b in op.output:
            if is_activation_blob(b):
                activation_blobs.add(b)
        seen_as_output = seen_as_output.union(set(op.output))
        assert not op.is_gradient_op, \
            "You can only pass inference-only nets to optimize_inference_for_dag"
    start_time = time.time()
    optim_str = C.memonger_compute_blob_recycling_for_dag(
        netproto.SerializeToString(),
        [str(s).encode('utf-8') for s in input_blobs],
        op_indices,
        set(str(s).encode('utf-8') for s in activation_blobs),
        namescope.encode('utf-8'),
        set(),
        {}
    )

    log.info("Memonger memory optimization took {} secs".format(
        time.time() - start_time),
    )

    optim = caffe2_pb2.NetDef()
    optim.ParseFromString(optim_str)

    assert verify_graph_equality(net.Proto(), optim), \
        "Memonger graph is not equal to original."
    assert verify_inplace_blobs(net.Proto(), optim), \
        "Inplace assignments differ in memonger net."
    return optim
Ejemplo n.º 4
0
def share_grad_blobs(
    net,
    losses,
    param_grads,
    namescope,
    dont_share_blobs=None,
    share_activations=False,
    blob_shapes=None,
):
    '''
    Implements similar optimization as Torch's shareGradInput():
    for the gradients that are passed between layers, share blobs between
    operators when possible. This yields significant memory savings with
    deep networks.

    Returns an optimized protobuf (assign to net._net)
    '''
    def is_grad_blob(b):
        name = str(b)
        # Note: need to look at _{namescope} pattern as it matches
        # to handle the auto-split gradients
        return name.endswith("_grad") and (name.startswith(namescope) or
            name.startswith("_" + namescope)) and name not in param_grads

    def is_grad_op(op):
        # TODO: something smarter
        for b in list(op.input) + list(op.output):
            if is_grad_blob(b):
                return True
        return False

    log.warn("NOTE: Executing memonger to optimize gradient memory")

    # Collect ops that have something to do with gradients
    if namescope != "" and not namescope.endswith("/"):
        namescope += "/"

    netproto = copy.deepcopy(net.Proto())
    activations = []
    external_output = set(net.Proto().external_output)

    # Hacky way to get activations, think of a better way
    for op in net.Proto().op:
        for b in op.output:
            if b + "_w" in op.input and b not in external_output:
                activations.append(b)

    # Remove last activations, as they are usually accessed externally
    activations = set(activations[:-2])

    # Gradient ops
    grad_op_indices = []
    for idx, op in enumerate(netproto.op):
        if (is_grad_op(op)):
            grad_op_indices.append(idx)

    shared_blobs = set()
    for op in net.Proto().op:
        for b in list(op.input) + list(op.output):
            if is_grad_blob(b) or (share_activations and b in activations):
                shared_blobs.add(b)
    start_time = time.time()
    optim_str = C.memonger_compute_blob_recycling_for_dag(
        netproto.SerializeToString(),
        [str(s).encode('utf-8') for s in losses],
        grad_op_indices,
        set(str(s).encode('utf-8') for s in shared_blobs),
        namescope.encode('utf-8'),
        set() if dont_share_blobs is None else dont_share_blobs,
        {} if blob_shapes is None else blob_shapes
    )

    log.info("Memonger memory optimization took {} secs".format(
        time.time() - start_time),
    )

    optim = caffe2_pb2.NetDef()
    optim.ParseFromString(optim_str)
    assert verify_graph_equality(net.Proto(), optim), \
        "Memonger graph is not equal to original."
    assert verify_inplace_blobs(net.Proto(), optim), \
        "Inplace assignments differ in memonger net."
    return optim
Ejemplo n.º 5
0
def share_grad_blobs(
    net,
    losses,
    param_grads,
    namescope,
    dont_share_blobs=None,
    share_activations=False,
    blob_shapes=None,
):
    '''
    Implements similar optimization as Torch's shareGradInput():
    for the gradients that are passed between layers, share blobs between
    operators when possible. This yields significant memory savings with
    deep networks.

    Returns an optimized protobuf (assign to net._net)
    '''
    def is_grad_blob(b):
        name = str(b)
        # Note: need to look at _{namescope} pattern as it matches
        # to handle the auto-split gradients
        return name.endswith("_grad") and (name.startswith(namescope) or
            name.startswith("_" + namescope)) and name not in param_grads

    def is_grad_op(op):
        # TODO: something smarter
        for b in list(op.input) + list(op.output):
            if is_grad_blob(b):
                return True
        return False

    log.warn("NOTE: Executing memonger to optimize gradient memory")

    # Collect ops that have something to do with gradients
    if namescope != "" and not namescope.endswith("/"):
        namescope += "/"

    netproto = copy.deepcopy(net.Proto())
    activations = []
    external_output = set(net.Proto().external_output)

    # Hacky way to get activations, think of a better way
    for op in net.Proto().op:
        for b in op.output:
            if b + "_w" in op.input and b not in external_output:
                activations.append(b)

    # Remove last activations, as they are usually accessed externally
    activations = set(activations[:-2])

    # Gradient ops
    grad_op_indices = []
    for idx, op in enumerate(netproto.op):
        if (is_grad_op(op)):
            grad_op_indices.append(idx)

    shared_blobs = set()
    for op in net.Proto().op:
        for b in list(op.input) + list(op.output):
            if is_grad_blob(b) or (share_activations and b in activations):
                shared_blobs.add(b)
    start_time = time.time()
    optim_str = C.memonger_compute_blob_recycling_for_dag(
        netproto.SerializeToString(),
        [str(s).encode('utf-8') for s in losses],
        grad_op_indices,
        set(str(s).encode('utf-8') for s in shared_blobs),
        namescope.encode('utf-8'),
        set() if dont_share_blobs is None else dont_share_blobs,
        {} if blob_shapes is None else blob_shapes
    )

    log.info("Memonger memory optimization took {} secs".format(
        time.time() - start_time),
    )

    optim = caffe2_pb2.NetDef()
    optim.ParseFromString(optim_str)
    assert verify_graph_equality(net.Proto(), optim), \
        "Memonger graph is not equal to original."
    assert verify_inplace_blobs(net.Proto(), optim), \
        "Inplace assignments differ in memonger net."
    return optim
Ejemplo n.º 6
0
def share_freeze_blobs_c2(
    net,
    namescope,
):

    log.warn("NOTE: Executing memonger to optimize gradient memory")

    # Collect ops that have something to do with gradients
    if namescope != "" and not namescope.endswith("/"):
        namescope += "/"

    netproto = copy.deepcopy(net.Proto())
    activations = []
    external_input = set(net.Proto().external_input)
    external_output = set(net.Proto().external_output)

    start_idx = -1
    end_idx = -1

    # ops
    for idx, op in enumerate(netproto.op):
        # print(op)
        if namescope not in op.input[0]:
            continue
        if op.type == 'Conv' and start_idx < 0:
            start_idx = idx
        if op.type == 'StopGradient':
            end_idx = idx

    print(namescope, 'start_idx: ', start_idx, ' end_idx: ', end_idx)

    # Hacky way to get activations, think of a better way
    for idx, op in enumerate(netproto.op[start_idx:end_idx]):
        for b in op.output:
            if b not in external_output:
                activations.append(b)

    print('activations: ', activations)

    share_pool = [namescope + '_shared_' + str(i) for i in range(1000, 10000)]
    map_pool = {}

    heads = [namescope + 'data']
    print('heads: ', heads)

    # Remove last activations, as they are usually accessed externally
    activations = set(activations[:-1])
    print('activations: ', activations)

    shared_blobs = activations
    dont_share_blobs = None
    blob_shapes = None
    op_indices = [
        index for index, op in enumerate(netproto.op[start_idx:end_idx + 2])
    ]

    print(op_indices)

    start_time = time.time()
    optim_str = C.memonger_compute_blob_recycling_for_dag(
        netproto.SerializeToString(), [str(s).encode('utf-8') for s in heads],
        op_indices, set(str(s).encode('utf-8') for s in shared_blobs),
        namescope.encode('utf-8'),
        set() if dont_share_blobs is None else dont_share_blobs,
        {} if blob_shapes is None else blob_shapes)

    optim = caffe2_pb2.NetDef()
    optim.ParseFromString(optim_str)
    assert verify_graph_equality(net.Proto(), optim), \
        "Memonger graph is not equal to original."
    assert verify_inplace_blobs(net.Proto(), optim), \
        "Inplace assignments differ in memonger net."
    return optim