Exemple #1
0
def test_dual_chain():
    """Runs regular chain gradient, makes sure memory usage makes sense."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 5
    nodes1 = make_chain_tanh_constant(n, "a")
    nodes2 = make_chain_tanh_constant(n, "b")

    a0, b0 = nodes1[0], nodes2[0]
    a, b = nodes1[-1], nodes2[-1]
    grad = tf.gradients([a + b], [a0, b0])

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun([grad[0].op, grad[1].op])

    peak_memory = cpu_peak()
    expected_peak = (2 * n + 1) * 10**6
    util.report_memory(peak_memory, expected_peak)

    # 1 unit of memory slack since parallel computation chains adds
    # scheduling variablity
    if not REMOVE_ASSERTS:
        assert (peak_memory -
                expected_peak) < 1.1 * 10**9, "Difference too large."
Exemple #2
0
def test_dual_chain_rewrite():
    """Runs regular chain gradient, makes sure memory usage makes sense."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 5
    nodes1 = make_chain_tanh_constant(n, "a")
    nodes2 = make_chain_tanh_constant(n, "b")

    a0, b0 = nodes1[0], nodes2[0]
    a, b = nodes1[-1], nodes2[-1]

    grad = memory_saving_gradients.gradients(
        [a + b], [a0, b0], checkpoints=[nodes1[2], nodes2[2]])

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun([grad[0].op, grad[1].op])

    peak_memory = cpu_peak()
    # normal usage comes from 2*n nodes + default ygrad node + 2 gradient nodes
    # here we save two 2 units of memory by dropping 2 activations (a1/b1) temporarily
    # also, this moves "peak memory" scenario lower down the chain
    # where the final addition node activations are no longer needed (another -1)
    expected_peak = (2 * (n - 1) + 1) * 10**6
    util.report_memory(peak_memory, expected_peak)

    # since two independent chains, some variability in node scheduling
    # allow 1MB slack
    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 4.1e6, "Difference too large."
def test_dual_chain():
  """Runs regular chain gradient, makes sure memory usage makes sense."""


  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 5
  nodes1 = make_chain_tanh_constant(n, "a")
  nodes2 = make_chain_tanh_constant(n, "b")

  a0,b0 = nodes1[0], nodes2[0]
  a, b = nodes1[-1], nodes2[-1]
  grad = tf.gradients([a+b], [a0, b0])

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun([grad[0].op, grad[1].op])

  peak_memory = cpu_peak()
  expected_peak = (2*n+1)*10**6
  util.report_memory(peak_memory, expected_peak)

  # 1 unit of memory slack since parallel computation chains adds
  # scheduling variablity
  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1*10**9, "Difference too large."
def test_dual_chain_rewrite():
  """Runs regular chain gradient, makes sure memory usage makes sense."""


  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 5
  nodes1 = make_chain_tanh_constant(n, "a")
  nodes2 = make_chain_tanh_constant(n, "b")

  a0,b0 = nodes1[0], nodes2[0]
  a, b = nodes1[-1], nodes2[-1]

  grad = memory_saving_gradients.gradients([a+b], [a0, b0],
                                           checkpoints=[nodes1[2], nodes2[2]])

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun([grad[0].op, grad[1].op])

  peak_memory = cpu_peak()
  # normal usage comes from 2*n nodes + default ygrad node + 2 gradient nodes
  # here we save two 2 units of memory by dropping 2 activations (a1/b1) temporarily
  # also, this moves "peak memory" scenario lower down the chain
  # where the final addition node activations are no longer needed (another -1)
  expected_peak = (2*(n-1)+1)*10**6 
  util.report_memory(peak_memory, expected_peak)

  # since two independent chains, some variability in node scheduling
  # allow 1MB slack
  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 4.1e6, "Difference too large."
Exemple #5
0
def test_long_chain_tarjan(linearize=False):
    """Like test_chain, but use automatic rewriting with checkpoints="tarjan" 
  strategy."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 100

    nodes = make_chain_tanh_constant(n)
    a0 = nodes[0]
    a = nodes[-1]
    grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0]

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    if linearize:
        added = linearize_lib.linearize()

    peak_memory = cpu_peak()
    # points picked
    #  a09:0,19:0,a29:0,a39:0,a49:0,a58:0,a68:0,a78:0,a88:0,a97:0
    expected_peak = 18e6
    util.report_memory(peak_memory, expected_peak)

    # todo: remove "REMOVE_ASSERTS"
    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
Exemple #6
0
def test_long_chain_memory(linearize=False):
    """Like test_chain, but use automatic rewriting with checkpoints="memory" 
  strategy."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 100

    nodes = make_chain_tanh_constant(n)
    a0 = nodes[0]
    a = nodes[-1]
    tf.add_to_collection("checkpoints", nodes[10])
    tf.add_to_collection("checkpoints", nodes[20])
    #grad = memory_saving_gradients.gradients_collection([a], [a0])[0]
    grad = memory_saving_gradients.gradients_memory([a], [a0])[0]

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    if linearize:
        added = linearize_lib.linearize()

    peak_memory = cpu_peak()
    # 20 mem used with following tensors picked automatically as bottlenecks
    # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0',
    # 'a82:0', 'a91:0']
    expected_peak = 20 * 10**6
    util.report_memory(peak_memory, expected_peak)

    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
Exemple #7
0
def test_chain_memory(linearize=False):
    """Like test_chain, but use automatic rewriting with checkpoints="memory" strat."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 6  # for n=5, only choice of a2 saves memory, and alg picks a3
    # hence use n>5 to avoid this edge condition

    nodes = make_chain_tanh_constant(n)
    a0 = nodes[0]
    a = nodes[-1]
    grad = memory_saving_gradients.gradients_memory([a], [a0])[0]

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    if linearize:
        linearize_lib.linearize()

    peak_memory = cpu_peak()
    expected_peak = (n + 1 -
                     1) * 10**6  # 1 for each node + 1 for generated - 1 saved
    # "loss" tensor
    util.report_memory(peak_memory, expected_peak)
    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 10000, "Difference too large."
def test_long_chain_tarjan(linearize=False):
  """Like test_chain, but use automatic rewriting with checkpoints="tarjan" 
  strategy."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 100

  nodes = make_chain_tanh_constant(n)
  a0 = nodes[0]
  a = nodes[-1]
  grad = memory_saving_gradients.gradients_tarjan([a], [a0])[0]

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  if linearize:
    added = linearize_lib.linearize()

  peak_memory = cpu_peak()
  # points picked
  #  a09:0,19:0,a29:0,a39:0,a49:0,a58:0,a68:0,a78:0,a88:0,a97:0
  expected_peak = 18e6
  util.report_memory(peak_memory, expected_peak)

  # todo: remove "REMOVE_ASSERTS"
  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_long_chain_memory(linearize=False):
  """Like test_chain, but use automatic rewriting with checkpoints="memory" 
  strategy."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 100

  nodes = make_chain_tanh_constant(n)
  a0 = nodes[0]
  a = nodes[-1]
  tf.add_to_collection("checkpoints", nodes[10])
  tf.add_to_collection("checkpoints", nodes[20])
  #grad = memory_saving_gradients.gradients_collection([a], [a0])[0]
  grad = memory_saving_gradients.gradients_memory([a], [a0])[0]

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  if linearize:
    added = linearize_lib.linearize()

  peak_memory = cpu_peak()
  # 20 mem used with following tensors picked automatically as bottlenecks
  # ['a10:0', 'a19:0', 'a28:0', 'a37:0', 'a46:0', 'a55:0', 'a64:0', 'a73:0',
  # 'a82:0', 'a91:0']
  expected_peak = 20 * 10**6 
  util.report_memory(peak_memory, expected_peak)

  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_chain_memory(linearize=False):
  """Like test_chain, but use automatic rewriting with checkpoints="memory" strat."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 6  # for n=5, only choice of a2 saves memory, and alg picks a3
         # hence use n>5 to avoid this edge condition

  nodes = make_chain_tanh_constant(n)
  a0 = nodes[0]
  a = nodes[-1]
  grad = memory_saving_gradients.gradients_memory([a], [a0])[0]

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  if linearize:
    linearize_lib.linearize()

  peak_memory = cpu_peak()
  expected_peak = (n+1-1)*10**6  # 1 for each node + 1 for generated - 1 saved
                                 # "loss" tensor
  util.report_memory(peak_memory, expected_peak)
  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 10000, "Difference too large."
Exemple #11
0
def test_targets():
    tf.reset_default_graph()
    n = 5
    g = tf.get_default_graph()
    nodes1 = util.make_chain_tanh_constant(n, "a")
    nodes2 = util.make_chain_tanh_constant(n, "b")

    a0, b0 = nodes1[0], nodes2[0]
    a, b = nodes1[-1], nodes2[-1]
    grad1 = tf.gradients([a], [a0, b0])
    grad2 = tf.gradients([b], [a0, b0])
    assert linearize_lib.linearize(grad1) == 3
    old_version = g._version
    assert linearize_lib.linearize(grad1) == 0
    assert g._version == old_version

    assert linearize_lib.linearize(grad2) == 3
    assert linearize_lib.linearize(grad2) == 0
def test_targets():
  tf.reset_default_graph()
  n = 5
  g = tf.get_default_graph()
  nodes1 = util.make_chain_tanh_constant(n, "a")
  nodes2 = util.make_chain_tanh_constant(n, "b")
    
  a0,b0 = nodes1[0], nodes2[0]
  a, b = nodes1[-1], nodes2[-1]
  grad1 = tf.gradients([a], [a0, b0])
  grad2 = tf.gradients([b], [a0, b0])
  assert linearize_lib.linearize(grad1) == 3
  old_version = g._version
  assert linearize_lib.linearize(grad1) == 0
  assert g._version == old_version  
  
  assert linearize_lib.linearize(grad2) == 3
  assert linearize_lib.linearize(grad2) == 0
Exemple #13
0
def test_chain_linearize():
    tf.reset_default_graph()
    n = 5
    nodes = util.make_chain_tanh_constant(n)
    a0 = nodes[0]
    a = nodes[-1]
    order1 = linearize_lib.obtain_linear_order()
    observed_order1 = [n.name for n in order1]

    num_new_deps = linearize_lib.linearize()
    assert num_new_deps == 0
Exemple #14
0
def test_articulation_points():
    tf.reset_default_graph()
    n = 5
    nodes = util.make_chain_tanh_constant(n)
    a0 = nodes[0]
    a = nodes[-1]
    points = linearize_lib.sorted_articulation_points(None)
    # original list is ['a00', 'a01', 'a02', 'a03', 'a04']
    # end-points are not considered separators, so result should be
    assert util.format_ops(points) == ['a01', 'a02', 'a03']

    tf.reset_default_graph()
    n = 5
    nodes = _make_simple_caterpillar_graph(n)
    a0 = nodes[0]
    a = nodes[-1]
    points = linearize_lib.sorted_articulation_points(None)

    assert util.format_ops(points) == [
        'merge0', 'merge1', 'merge2', 'merge3', 'merge4', 'merge5'
    ]
Exemple #15
0
def test_chain_rewrite_save_one_before_last():
    """Take chain of length 5, save first node."""

    tf.reset_default_graph()
    tf_dev = tf.device('/cpu:0')
    tf_dev.__enter__()

    n = 5

    a0, a1, a2, a3, a4 = make_chain_tanh_constant(n)
    grad = memory_saving_gradients.gradients([a4], [a0], checkpoints=[a2])[0]
    expected_peak = (n + 1 - 2) * 10**6

    sess = create_session()
    sessrun(tf.global_variables_initializer())

    sessrun(grad.op)

    peak_memory = cpu_peak()
    util.report_memory(peak_memory, expected_peak)

    if not REMOVE_ASSERTS:
        assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."
def test_chain_rewrite_save_first():
  """Take chain of length 5, save first node."""

  tf.reset_default_graph()
  tf_dev = tf.device('/cpu:0')
  tf_dev.__enter__()
  
  n = 5

  a0, a1, a2, a3, a4 = make_chain_tanh_constant(n)
  grad = memory_saving_gradients.gradients([a4], [a0], checkpoints=[a1, a3])[0]
  expected_peak = (n+1-2)*10**6 

  sess = create_session()
  sessrun(tf.global_variables_initializer())

  sessrun(grad.op)

  peak_memory = cpu_peak()
  util.report_memory(peak_memory, expected_peak)

  if not REMOVE_ASSERTS:
    assert (peak_memory - expected_peak) < 1.1e6, "Difference too large."