def main(_): # Initialize print("ok") tr.init() train_images, train_labels, test_images, test_labels = load_data() print("ok2") global_step = tf.Variable(0, name="global_step", trainable=False) with tf.name_scope('input'): # input # x_input = tf.placeholder(shape=[None, 32, 32, 3], dtype=tf.float32) # output # ans = tf.placeholder(shape=None, dtype=tf.float32) ans = tf.squeeze(tf.cast(ans, tf.float32)) print("ok3") # use VGG16 network vgg = VGG16() # params for converting to answer-label-size w = tf.Variable(tf.truncated_normal([512, 10], 0.0, 1.0) * 0.01, name='w_last') b = tf.Variable(tf.truncated_normal([10], 0.0, 1.0) * 0.01, name='b_last') print("ok4") fmap = vgg.build(x_input, is_training=True) predict = tf.nn.softmax(tf.add(tf.matmul(fmap, w), b)) loss = tf.reduce_mean(-tf.reduce_sum(ans * tf.log(predict), reduction_indices=[1])) print("ok5") with tf.name_scope('train'): optimizer = tf.train.GradientDescentOptimizer(learning_rate) #add our code optimizer = tr.DistributedOptimizer(optimizer) ''' if FLAGS.sync_replicas: optimizer = tf.train.SyncReplicasOptimizer( optimizer, replicas_to_aggregate = num_workers, total_num_replicas = num_workers, name = "vgg16_sync_replicas") ''' train_step = optimizer.minimize(loss, global_step = global_step) print("ok7")
def main(_): # Initialize print("ok") tr.init() train_images, train_labels, test_images, test_labels = load_data() print("ok2") global_step = tf.Variable(0, name="global_step", trainable=False) with tf.name_scope('input'): # input # x_input = tf.placeholder(shape=[None, 32, 32, 3], dtype=tf.float32) # output # ans = tf.placeholder(shape=None, dtype=tf.float32) ans = tf.squeeze(tf.cast(ans, tf.float32)) print("ok3") # use VGG16 network vgg = VGG16() # params for converting to answer-label-size w = tf.Variable(tf.truncated_normal([512, 10], 0.0, 1.0) * 0.01, name='w_last') b = tf.Variable(tf.truncated_normal([10], 0.0, 1.0) * 0.01, name='b_last') print("ok4") fmap = vgg.build(x_input, is_training=True) predict = tf.nn.softmax(tf.add(tf.matmul(fmap, w), b)) loss = tf.reduce_mean( -tf.reduce_sum(ans * tf.log(predict), reduction_indices=[1])) print("ok5") with tf.name_scope('train'): optimizer = tf.train.GradientDescentOptimizer(learning_rate) #add our code optimizer = tr.DistributedOptimizer(optimizer) ''' if FLAGS.sync_replicas: optimizer = tf.train.SyncReplicasOptimizer( optimizer, replicas_to_aggregate = num_workers, total_num_replicas = num_workers, name = "vgg16_sync_replicas") ''' train_step = optimizer.minimize(loss, global_step=global_step) print("ok7") #add our code hooks = [ tr.BroadcastGlobalVariablesHook(0), tf.train.StopAtStepHook(last_step=10000), tf.train.LoggingTensorHook(tensors={ 'step': global_step, 'loss': loss }, every_n_iter=10), ] # Pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.visible_device_list = str(bq.local_rank()) # Save checkpoints only on worker 0 to prevent other workers from corrupting them. #checkpoint_dir = './checkpoints' if tr.rank() == 0 else None checkpoint_dir = None # The MonitoredTrainingSession takes care of session initialization, # restoring from a checkpoint, saving to a checkpoint, and closing when done # or an error occurs. print("ok 9") with tf.train.MonitoredTrainingSession(checkpoint_dir=checkpoint_dir, hooks=hooks, config=config) as mon_sess: while not mon_sess.should_stop(): # Run a training step synchronously. batch, actuals = get_next_batch(train_images, train_labels, len(train_labels)) _, step = mon_sess.run([train_step, global_step], feed_dict={ x_input: batch, ans: actuals })
def main(_): # Initialize Bcube. tr.init() # Download and load MNIST dataset. mnist = learn.datasets.mnist.read_data_sets('MNIST-data-%d' % tr.rank()) # Build model... with tf.name_scope('input'): image = tf.placeholder(tf.float32, [None, 784], name='image') label = tf.placeholder(tf.float32, [None], name='label') predict, loss = conv_model(image, label, tf.contrib.learn.ModeKeys.TRAIN) opt = tf.train.RMSPropOptimizer(0.01) # Add Bcube Distributed Optimizer. opt = tr.DistributedOptimizer(opt) global_step = tf.contrib.framework.get_or_create_global_step() train_op = opt.minimize(loss, global_step=global_step) # BroadcastGlobalVariablesHook broadcasts initial variable states from rank 0 # to all other processes. This is necessary to ensure consistent initialization # of all workers when training is started with random weights or restored # from a checkpoint. hooks = [ tr.BroadcastGlobalVariablesHook(0), tf.train.StopAtStepHook(last_step=10000), tf.train.LoggingTensorHook(tensors={ 'step': global_step, 'loss': loss }, every_n_iter=10), ] # Pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.visible_device_list = str(bq.local_rank()) # Save checkpoints only on worker 0 to prevent other workers from corrupting them. #checkpoint_dir = './checkpoints' if tr.rank() == 0 else None checkpoint_dir = None # The MonitoredTrainingSession takes care of session initialization, # restoring from a checkpoint, saving to a checkpoint, and closing when done # or an error occurs. start_t = time.time() cnt = 0 with tf.train.MonitoredTrainingSession(checkpoint_dir=checkpoint_dir, hooks=hooks, config=config) as mon_sess: while not mon_sess.should_stop(): # Run a training step synchronously. if cnt == 0: start_t = time.time() image_, label_ = mnist.train.next_batch(batch_size) mon_sess.run(train_op, feed_dict={image: image_, label: label_}) cnt = cnt + 1 if (cnt % 1000 == 0): edt = time.time() time_period = edt - start_t print("cnt = %d time_period = %f seconds" % (cnt, time_period)) start_t = time.time()
def main(_): tr.init() global_step = tf.Variable(0, name="global_step", trainable=False) with tf.name_scope('input'): sess = tf.Session() images = tf.placeholder(tf.float32, [batch_size, 224, 224, 3]) true_out = tf.placeholder(tf.float32, [batch_size, 1000]) train_mode = tf.placeholder(tf.bool) #vgg = vgg19.Vgg19('./vgg19.npy') vgg = vgg19.Vgg19() vgg.build(images, train_mode) # print number of variables used: 143667240 variables, i.e. ideal size = 548MB print(vgg.get_var_count()) sess.run(tf.global_variables_initializer()) batch, actuals = get_next_batch(batch_size) # test classification prob = sess.run(vgg.prob, feed_dict={images: batch, train_mode: False}) #utils.print_prob(prob[0], './synset.txt') print("ok5") with tf.name_scope('train'): loss = tf.reduce_sum((vgg.prob - true_out)**2) optimizer = tf.train.GradientDescentOptimizer(0.0001) optimizer = tr.DistributedOptimizer(optimizer) train_step = optimizer.minimize(loss, global_step=global_step) print("ok7") #add our code hooks = [ tr.BroadcastGlobalVariablesHook(0), tf.train.StopAtStepHook(last_step=10000), tf.train.LoggingTensorHook(tensors={ 'step': global_step, 'loss': loss }, every_n_iter=1), ] # Pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.visible_device_list = str(bq.local_rank()) # Save checkpoints only on worker 0 to prevent other workers from corrupting them. #checkpoint_dir = './checkpoints' if tr.rank() == 0 else None checkpoint_dir = None # The MonitoredTrainingSession takes care of session initialization, # restoring from a checkpoint, saving to a checkpoint, and closing when done # or an error occurs. train_images = [] train_labels = [] print("ok 9") cnt = 0 start_t = 0 end_t = 0 with tf.train.MonitoredTrainingSession(checkpoint_dir=checkpoint_dir, hooks=hooks, config=config) as mon_sess: start_t = time.time() while not mon_sess.should_stop(): #for i in range(10000): # Run a training step synchronously. #print("start") #print(cnt) batch, actuals = get_next_batch(batch_size) mon_sess.run(train_step, feed_dict={ images: batch, true_out: actuals, train_mode: True }) cnt = cnt + 1 if (cnt % 100 == 0): end_t = time.time() inter_val = end_t - start_t start_t = end_t print("cnt = %d interval = %d" % (cnt, inter_val)) #print("FIN") #print(cnt) # test classification again, should have a higher probability about tiger #prob = mon_sess.run(vgg.prob, feed_dict={images: batch1, train_mode: False}) print("DONE") print(cnt)
def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor): """Adds a new softmax and fully-connected layer for training. We need to retrain the top layer to identify our new classes, so this function adds the right operations to the graph, along with some variables to hold the weights, and then sets up all the gradients for the backward pass. The set up for the softmax and fully-connected layers is based on: https://tensorflow.org/versions/master/tutorials/mnist/beginners/index.html Args: class_count: Integer of how many categories of things we're trying to recognize. final_tensor_name: Name string for the new final node that produces results. bottleneck_tensor: The output of the main CNN graph. Returns: The tensors for the training and cross entropy results, and tensors for the bottleneck input and ground truth input. """ with tf.name_scope('input'): bottleneck_input = tf.placeholder_with_default( bottleneck_tensor, shape=[None, BOTTLENECK_TENSOR_SIZE], name='BottleneckInputPlaceholder') ground_truth_input = tf.placeholder(tf.float32, [None, class_count], name='GroundTruthInput') # Organizing the following ops as `final_training_ops` so they're easier # to see in TensorBoard layer_name = 'final_training_ops' with tf.name_scope(layer_name): with tf.name_scope('weights'): initial_value = tf.truncated_normal([BOTTLENECK_TENSOR_SIZE, class_count], stddev=0.001) layer_weights = tf.Variable(initial_value, name='final_weights') variable_summaries(layer_weights) with tf.name_scope('biases'): layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') variable_summaries(layer_biases) with tf.name_scope('Wx_plus_b'): logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases tf.summary.histogram('pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) tf.summary.histogram('activations', final_tensor) with tf.name_scope('cross_entropy'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=ground_truth_input, logits=logits) with tf.name_scope('total'): cross_entropy_mean = tf.reduce_mean(cross_entropy) tf.summary.scalar('cross_entropy', cross_entropy_mean) with tf.name_scope('train'): optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) #add our code optimizer = tr.DistributedOptimizer(optimizer) train_step = optimizer.minimize(cross_entropy_mean) return (train_step, cross_entropy_mean, bottleneck_input, ground_truth_input, final_tensor)
tf.summary.histogram('pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) tf.summary.histogram('activations', final_tensor) with tf.name_scope('cross_entropy'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=ground_truth_input, logits=logits) with tf.name_scope('total'): cross_entropy_mean = tf.reduce_mean(cross_entropy) tf.summary.scalar('cross_entropy', cross_entropy_mean) with tf.name_scope('train'): optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) #add our code optimizer = tr.DistributedOptimizer(optimizer) train_step = optimizer.minimize(cross_entropy_mean, global_step = global_step) # Create the operations we need to evaluate the accuracy of our new layer. evaluation_step, prediction = add_evaluation_step( final_tensor, ground_truth_input) # Merge all the summaries and write them out to the summaries_dir merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph)