Exemple #1
0
    def testChainOfMatmul(self):
        # MaxBytesInUse is registerd on GPU only. See kernels/memory_stats_ops.cc.
        if not test.is_gpu_available():
            return

        with self.test_session(use_gpu=True) as sess:
            matrix_size = 64
            matrix_shape = tensor_shape.TensorShape([matrix_size, matrix_size])
            dtype = dtypes.float32
            matrix_size_in_bytes = matrix_shape.num_elements() * dtype.size
            a = random_ops.random_uniform(matrix_shape, dtype=dtype)
            b = random_ops.random_uniform(matrix_shape, dtype=dtype)
            c = math_ops.matmul(a, b)
            d = math_ops.matmul(c, b)
            sess.run(d)

            max_bytes_in_use = sess.run(memory_stats_ops.MaxBytesInUse())
            self.assertGreaterEqual(max_bytes_in_use, matrix_size_in_bytes * 3)
            self.assertLess(max_bytes_in_use, matrix_size_in_bytes * 4)
    def testChainOfMatmul(self):
        # MaxBytesInUse is registered on GPU only. See kernels/memory_stats_ops.cc.
        if not test.is_gpu_available():
            return

        with self.test_session(use_gpu=True) as sess:
            matrix_size = 64
            matrix_shape = tensor_shape.TensorShape([matrix_size, matrix_size])
            dtype = dtypes.float32
            matrix_size_in_bytes = matrix_shape.num_elements() * dtype.size
            a = random_ops.random_uniform(matrix_shape, dtype=dtype)
            b = random_ops.random_uniform(matrix_shape, dtype=dtype)
            c = math_ops.matmul(a, b)
            d = math_ops.matmul(c, b)
            sess.run(d)

            max_bytes_in_use_op = memory_stats_ops.MaxBytesInUse()
            max_bytes_in_use = sess.run(max_bytes_in_use_op)
            self.assertGreaterEqual(max_bytes_in_use, matrix_size_in_bytes * 3)
            self.assertLess(max_bytes_in_use, matrix_size_in_bytes * 4)

            # run chain with 2 ops, make sure BytesInUse captures intermediate
            # memory usage
            a = random_ops.random_uniform(matrix_shape, dtype=dtype)
            with ops.control_dependencies([a]):
                bytes_in_use_op = memory_stats_ops.BytesInUse()
            with ops.control_dependencies([bytes_in_use_op]):
                b = random_ops.random_uniform(matrix_shape, dtype=dtype)
                c = math_ops.matmul(a, b)

            _, bytes_in_use, max_bytes_in_use = sess.run(
                [c, bytes_in_use_op, max_bytes_in_use_op])

            # intermediate result allocates 1 matrix, max usage is at least 2
            self.assertGreaterEqual(bytes_in_use, matrix_size_in_bytes * 1)
            self.assertLess(bytes_in_use, matrix_size_in_bytes * 2)

            # max usage is still 3 because it reflects maxium from previous .run call
            self.assertGreaterEqual(max_bytes_in_use, matrix_size_in_bytes * 3)
Exemple #3
0
import numpy as np
picture = np.ones([batch_size, 200 * 200], dtype=np.float32)
picture_label = np.ones([batch_size], dtype=np.float32)

with tf.Session(config=config) as sess:
    init.run()
    for epoch in range(n_epochs):
        #for iteration in range(mnist.train.num_examples // batch_size):
        for iteration in range(5):
            #X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={
                X: picture,
                y: picture_label
            })  #, options=run_options, run_metadata=run_metadata)
            max_bytes_in_use = sess.run(memory_stats_ops.MaxBytesInUse()) / 1e6
            print("step:%i, Max Memory used: %.2f MB " %
                  (iteration, max_bytes_in_use))
            """
            for device in run_metadata.step_stats.dev_stats:
                device_name = device.device
                print(".........device:", device.device)
                for node in device.node_stats:
                    print("   ................node_stats:", str(node))

            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open('timeline_step_%d.json' % iteration, 'w') as f:
                f.write(chrome_trace)
            """
Exemple #4
0
def main(_):
    #from tensorflow_large_model_support import ome
    #tf.__dict__["gradients"] = memory_saving_gradients.gradients_memory
    #tf.__dict__["gradients"] = ome.gradients_ome

    # Import data
    mnist = input_data.read_data_sets(FLAGS.data_dir)

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.int64, [None])

    # Build the graph for the deep net
    y_conv, keep_prob = deepnn(x)

    with tf.name_scope('loss'):
        cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_,
                                                               logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

    with tf.name_scope('adam_optimizer'):
        optimizer = tf.train.AdamOptimizer()
        grads = tf.gradients(cross_entropy,
                             tf.trainable_variables())  # importent
        #print ([g.op for g in grads if g is not None])
        grads_and_vars = list(zip(grads, tf.trainable_variables()))
        train_step = optimizer.apply_gradients(grads_and_vars)
        #train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    #g = tf.get_default_graph()
    #print(g.get_operations())
    #print ('----------------------------------------------------')
    # Enable Large Model Support
    from tensorflow_large_model_support import LMS
    #import stop
    #lms_model = LMS({'adam_optimizer'}, debug=False)
    #lms_model = LMS(None)

    #import tensorflow.contrib.graph_editor as ge
    #
    '''
  for scope in {'adam_optimizer'}:
      ops_for_scope = set(ge.filter_ops_from_regex(
          ge.make_list_of_op(tf.get_default_graph()), "^{}".format(scope)))
      #print (ops_for_scope)
  '''
    '''
  lms_model = LMS({'adam_optimizer'},
                  excl_scopes={'loss', 'accuracy', 'dropout'},
                  lb=3)
  '''
    lms_model = LMS({'adam_optimizer'})
    lms_model.run(graph=tf.get_default_graph())
    #tf.__dict__['gradients'] = None
    #print (tf.__dict__["gradients"])
    #import tensorflow.contrib.graph_editor as ge
    '''
  for scope in {'adam_optimizer'}:
      ops_for_scope = set(ge.filter_ops_from_regex(
          ge.make_list_of_op(tf.get_default_graph()), "^{}".format(scope)))
      print (ops_for_scope)
  '''
    #print (ge.get_backward_walk_ops( {'adam_optimizer'}))
    #lms_model.run(tf.get_default_graph())
    #print (tf.get_default_graph())
    #ops =[op for op in tf.get_default_graph().get_operations()]
    #print (ops)
    #print (tf.trainable_variables())
    '''
  import tensorflow.contrib.graph_editor as ge
  fwd_ops = ge.get_forward_walk_ops([x.op for x in tf.trainable_variables()], inclusive=True)
  bwd_ops = ge.get_backward_walk_ops([y.op for y in [cross_entropy]], inclusive=True)
  print (fwd_ops)
  print (bwd_ops)
  print (set(fwd_ops + bwd_ops))
  '''
    #g = tf.get_default_graph()
    #print (g.get_operations())
    #print (tf.trainable_variables())
    #print (tf.get_default_graph().get_operations()[-1:])
    graph_location = tempfile.mkdtemp()
    #print('Saving graph to: %s' % graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())
    max_use = memory_stats_ops.MaxBytesInUse()

    config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(30):
            batch = mnist.train.next_batch(2000)
            if i % 10 == 0:
                train_accuracy = accuracy.eval(feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    keep_prob: 1.0
                })
                print('step %d, training accuracy %g' % (i, train_accuracy))
                print(sess.run(max_use) / 1e6)
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                keep_prob: 0.5
            })

        print('test accuracy %g' % accuracy.eval(feed_dict={
            x: mnist.test.images,
            y_: mnist.test.labels,
            keep_prob: 1.0
        }))
Exemple #5
0
                     },
                     options=run_options,
                     run_metadata=run_metadata)
            if iteration % check_interval == 0:
                loss_val = loss.eval(feed_dict={
                    X: mnist.validation.images,
                    y: mnist.validation.labels
                })
                if loss_val < best_loss_val:
                    best_loss_val = loss_val
                    checks_since_last_progress = 0
                    best_model_params = get_model_params()
                else:
                    checks_since_last_progress += 1
                max_bytes_in_use = sess.run(
                    memory_stats_ops.MaxBytesInUse()) / 1e6
                print("Max Memory used: %.2f MB " % (max_bytes_in_use))
                #mem_use = mem_util.peak_memory(run_metadata)['/gpu:0']/1e6
                #print("Memory used: %.2f MB "%(mem_use))

        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={
            X: mnist.validation.images,
            y: mnist.validation.labels
        })
        print(
            "Epoch {}, train accuracy: {:.4f}%, valid. accuracy: {:.4f}%, valid. best loss: {:.6f}"
            .format(epoch, acc_train * 100, acc_val * 100, best_loss_val))
        if checks_since_last_progress > max_checks_without_progress:
            print("Early stopping!")
            break