def train(model='fcn5'): config = tf.ConfigProto(allow_soft_placement=False,log_device_placement=FLAGS.log_device_placement) device_id = FLAGS.device_id device_str = '' if int(device_id) >= 0: device_str = '/gpu:%d'%int(device_id) else: device_str = '/cpu:0' with tf.Graph().as_default(), tf.device(device_str), tf.Session(config=config) as sess: feature_dim = models.feature_dim label_dim = models.label_dim images = tf.placeholder(tf.float32, [None, feature_dim]) labels = tf.placeholder(tf.float32, [None, label_dim]) logits = None if model == 'fcn5': logits = models.model_fcn5(images) else: logits = models.model_fcn8(images) loss = models.loss(logits, labels) predictionCorrectness = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(predictionCorrectness, "float")) lr = 0.05 #optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss) optimizer = tf.train.MomentumOptimizer(lr, 0.9).minimize(loss) init = tf.initialize_all_variables() sess.run(init) tf.train.start_queue_runners(sess=sess) batch_size_per_epoch = int((EPOCH_SIZE + FLAGS.batch_size - 1)/ FLAGS.batch_size) iterations = FLAGS.epochs * batch_size_per_epoch average_batch_time = 0.0 epochs_info = [] average_loss = 0.0 for step in range(iterations): start_time = time.time() imgs, labs = get_real_batch_data(FLAGS.batch_size, 10) _, loss_value = sess.run([optimizer, loss], feed_dict={images:imgs,labels:labs}) average_loss += loss_value duration = time.time() - start_time average_batch_time += float(duration) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % FLAGS.log_step == 0: examples_per_sec = FLAGS.batch_size / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step > 0 and step % (FLAGS.eval_step * batch_size_per_epoch) == 0: average_loss /= FLAGS.eval_step * batch_size_per_epoch accuracy_value = accuracy.eval(feed_dict={images: mnist.test.images, labels: mnist.test.labels}) print("test accuracy %g"%accuracy_value) epochs_info.append('%d:%g:%s'%(step/(FLAGS.eval_step*batch_size_per_epoch), accuracy_value, average_loss)) average_loss = 0.0 average_batch_time /= iterations print 'average_batch_time: ', average_batch_time print ('epoch_info: %s' % ','.join(epochs_info))
def train(model='fcn5'): if FLAGS.num_gpus < 2: print("The number of GPU should be 2 or more, if you use one GPU, please use fcn5_mnist.py to train") return config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=FLAGS.log_device_placement) with tf.Graph().as_default(), tf.device("/cpu:0"): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) device_ids = FLAGS.device_ids.split(',') if len(device_ids) > FLAGS.num_gpus: print('The device_ids should have the same number of GPUs with num_gpus') return lr = 0.05 #optimizer = tf.train.GradientDescentOptimizer(lr) optimizer = tf.train.MomentumOptimizer(lr, 0.9) tower_grads = [] feed_vars = [] average_loss_tensor = [] for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%s'%device_ids[i]): with tf.name_scope('%s_%s' % ('TOWER', device_ids[i])) as scope: feature_dim = models.feature_dim label_dim = models.label_dim images = tf.placeholder(tf.float32, [None, feature_dim], name='images') labels = tf.placeholder(tf.float32, [None, label_dim], name='labels') feed_vars.append((images, labels)) logits = models.model_fcn5(images) loss = models.loss(logits, labels) tf.add_to_collection('losses', loss) #tf.add_n(tf.get_collection('losses'), name='total_loss') losses = tf.get_collection('losses', scope) total_loss = tf.add_n(losses, name='total_loss') average_loss_tensor.append(total_loss) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_loss) tower_grads.append(grads) print('tower_grads: ', tower_grads, '\nlen: ', len(tower_grads)) print ('total_loss: ', total_loss) grads = average_gradients(tower_grads) apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) train_op = apply_gradient_op average_op = tf.reduce_mean(average_loss_tensor, 0) saver = tf.train.Saver(tf.all_variables()) init = tf.initialize_all_variables() sess = tf.Session(config=config) sess.run(init) tf.train.start_queue_runners(sess=sess) real_batch_size = FLAGS.batch_size * FLAGS.num_gpus num_batches_per_epoch = int((EPOCH_SIZE + real_batch_size - 1)/ real_batch_size) iterations = FLAGS.epochs * num_batches_per_epoch average_batch_time = 0.0 epochs_info = [] step = 0 average_loss = 0.0 for step in range(iterations): start_time = time.time() imgs, labs = get_real_batch_data(real_batch_size, 10) feed_dict = {} for i in range(FLAGS.num_gpus): feed_dict[feed_vars[i][0]] = imgs[i*FLAGS.batch_size:(i+1)*FLAGS.batch_size] feed_dict[feed_vars[i][1]] = labs[i*FLAGS.batch_size:(i+1)*FLAGS.batch_size] # _, loss_value = sess.run([train_op, total_loss], feed_dict=feed_dict) _, loss_value = sess.run([train_op, average_op], feed_dict=feed_dict) duration = time.time() - start_time average_batch_time += float(duration) average_loss += loss_value assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % FLAGS.log_step == 0: examples_per_sec = (FLAGS.batch_size * FLAGS.num_gpus) / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step > 0 and step % (FLAGS.eval_step * num_batches_per_epoch) == 0: average_loss /= num_batches_per_epoch * FLAGS.eval_step print ('epoch: %d, loss: %.2f' % (step/(FLAGS.eval_step*num_batches_per_epoch), average_loss)) epochs_info.append('%d:-:%s'%(step/(FLAGS.eval_step*num_batches_per_epoch), average_loss)) average_loss = 0.0 checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) average_batch_time /= iterations print 'average_batch_time: ', average_batch_time print ('epoch_info: %s' % ','.join(epochs_info))
def train(model='fcn5'): config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=FLAGS.log_device_placement) if FLAGS.xla: # Turns on XLA. XLA is not included in the standard build. For single GPU this shows ~5% improvement config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 with tf.Graph().as_default(), tf.device("/" + FLAGS.local_ps_device + ":0"): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) device_ids = FLAGS.device_ids if not device_ids: device_ids = [str(i) for i in range(FLAGS.num_gpus)] else: device_ids = device_ids.split(',') lr = 0.05 #optimizer = tf.train.GradientDescentOptimizer(lr) optimizer = tf.train.MomentumOptimizer(lr, 0.9) def assign_to_device(device, ps_device=FLAGS.local_ps_device): worker_device = device ps_sizes = [0] if FLAGS.local_ps_device.lower == 'gpu': ps_sizes = [0] * FLAGS.num_gpus def _assign(op): if op.device: return op.device if op.type not in ['Variable', 'VariableV2']: return worker_device device_index, _ = min(enumerate( ps_sizes), key=operator.itemgetter(1)) device_name = '/' + FLAGS.local_ps_device +':' + str(device_index) var_size = op.outputs[0].get_shape().num_elements() ps_sizes[device_index] += var_size return device_name return _assign images = None labels = None if FLAGS.use_dataset: with tf.device('/CPU:0'): d_features = mnist.train.images d_labels = mnist.train.labels dataset = tf.contrib.data.Dataset.from_tensor_slices((d_features, d_labels)) dataset = dataset.shuffle(buffer_size=60000) dataset = dataset.repeat() dataset = dataset.batch(FLAGS.batch_size) # Trick to get datasets to buffer the next epoch. This is needed because # the data loading is occuring outside DataSets in python. Normally preprocessing # would occur in DataSets and this odd looking line is not needed. dataset = dataset.map(lambda x,y:(x,y), num_threads=FLAGS.num_gpus, output_buffer_size=FLAGS.num_gpus) iterator = dataset.make_initializable_iterator() images,labels = iterator.get_next() tower_grads = [] feed_vars = [] average_loss_tensor = [] reuse_variables = False accuracy = None for i in xrange(FLAGS.num_gpus): with tf.device(assign_to_device('/gpu:%s'%device_ids[i])): with tf.name_scope('%s_%s' % ('TOWER', device_ids[i])) as scope: if not FLAGS.use_dataset: feature_dim = models.feature_dim label_dim = models.label_dim images = tf.placeholder(tf.float32, [None, feature_dim], name='images') labels = tf.placeholder(tf.int64, [None, label_dim], name='labels') feed_vars.append((images, labels)) with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): logits = models.model_fcn5(images) if i == 0: # Prediction only on GPU:0 predictionCorrectness = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(predictionCorrectness, "float")) loss = models.loss(logits, labels) reuse_variables = True average_loss_tensor.append(loss) grads = optimizer.compute_gradients(loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) train_op = apply_gradient_op average_op = tf.reduce_mean(average_loss_tensor) saver = tf.train.Saver(tf.global_variables()) init = tf.global_variables_initializer() sess = tf.Session(config=config) sess.run(init) if FLAGS.use_dataset: sess.run(iterator.initializer) real_batch_size = FLAGS.batch_size * FLAGS.num_gpus num_batches_per_epoch = int((EPOCH_SIZE + real_batch_size - 1)/ real_batch_size) iterations = FLAGS.epochs * num_batches_per_epoch average_batch_time = 0.0 epochs_info = [] step = 0 average_loss = 0.0 for step in range(iterations): start_time = time.time() feed_dict = {} if not FLAGS.use_dataset: imgs, labs = get_real_batch_data(real_batch_size, 10) for i in range(FLAGS.num_gpus): feed_dict[feed_vars[i][0]] = imgs[i*FLAGS.batch_size:(i+1)*FLAGS.batch_size] feed_dict[feed_vars[i][1]] = labs[i*FLAGS.batch_size:(i+1)*FLAGS.batch_size] _, loss_value = sess.run([train_op, average_op], feed_dict=feed_dict) duration = time.time() - start_time average_batch_time += float(duration) average_loss += loss_value assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % FLAGS.log_step == 0: examples_per_sec = (FLAGS.batch_size * FLAGS.num_gpus) / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step > 0 and step % (FLAGS.eval_step * num_batches_per_epoch) == 0: average_loss /= num_batches_per_epoch * FLAGS.eval_step print ('epoch: %d, loss: %.2f' % (step/(FLAGS.eval_step*num_batches_per_epoch), average_loss)) epochs_info.append('%d:-:%s'%(step/(FLAGS.eval_step*num_batches_per_epoch), average_loss)) average_loss = 0.0 feed_dict = { images: mnist.test.images, labels :mnist.test.labels } if not FLAGS.use_dataset: feed_dict = {} feed_dict[feed_vars[0][0]] = mnist.test.images feed_dict[feed_vars[0][1]] = mnist.test.labels accuracy_value = accuracy.eval(session=sess, feed_dict=feed_dict) print("test accuracy %g"%accuracy_value) checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) average_batch_time /= iterations print 'average_batch_time: ', average_batch_time print ('epoch_info: %s' % ','.join(epochs_info))
def train(model='fcn5'): config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement) device_id = FLAGS.device_id device_str = '' if int(device_id) >= 0: device_str = '/gpu:%d' % int(device_id) config.allow_soft_placement = True config.intra_op_parallelism_threads = 1 config.inter_op_parallelism_threads = 0 else: device_str = '/cpu:0' num_threads = os.getenv('OMP_NUM_THREADS', 1) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=int(num_threads)) if FLAGS.xla: # Turns on XLA. XLA is not included in the standard build. For single GPU this shows ~5% improvement config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 with tf.Graph().as_default(), tf.device(device_str), tf.Session( config=config) as sess: feature_dim = models.feature_dim label_dim = models.label_dim images = None labels = None iterator = None if FLAGS.use_dataset: with tf.device('/CPU:0'): d_features = mnist.train.images d_labels = mnist.train.labels dataset = tf.contrib.data.Dataset.from_tensor_slices( (d_features, d_labels)) dataset = dataset.repeat() dataset = dataset.shuffle(buffer_size=60000) dataset = dataset.batch(FLAGS.batch_size) # Trick to get datasets to buffer the next epoch. This is needed because # the data loading is occuring outside DataSets in python. Normally preprocessing # would occur in DataSets and this odd looking line is not needed. dataset = dataset.map(lambda x, y: (x, y), num_threads=1, output_buffer_size=1) iterator = dataset.make_initializable_iterator() images, labels = iterator.get_next() else: images = tf.placeholder(tf.float32, [None, feature_dim], name="images_placeholder") labels = tf.placeholder(tf.int64, [None, label_dim], name="labels_placeholder") logits = None loss = None if model == 'fcn5': logits = models.model_fcn5(images) else: logits = models.model_fcn8(images) loss = models.loss(logits, labels) predictionCorrectness = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(predictionCorrectness, "float")) lr = 0.05 optimizer = tf.train.MomentumOptimizer(lr, 0.9).minimize(loss) init = tf.global_variables_initializer() sess.run(init) if FLAGS.use_dataset: sess.run(iterator.initializer) batch_size_per_epoch = int( (EPOCH_SIZE + FLAGS.batch_size - 1) / FLAGS.batch_size) iterations = FLAGS.epochs * batch_size_per_epoch average_batch_time = 0.0 epochs_info = [] average_loss = 0.0 for step in range(iterations): start_time = time.time() imgs = None labs = None if FLAGS.use_dataset: _, loss_value = sess.run([optimizer, loss]) else: imgs, labs = get_real_batch_data(FLAGS.batch_size, 10) _, loss_value = sess.run([optimizer, loss], feed_dict={ images: imgs, labels: labs }) duration = time.time() - start_time average_loss += loss_value average_batch_time += float(duration) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % FLAGS.log_step == 0: examples_per_sec = FLAGS.batch_size / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)' ) print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step > 0 and step % (FLAGS.eval_step * batch_size_per_epoch) == 0: average_loss /= FLAGS.eval_step * batch_size_per_epoch accuracy_value = accuracy.eval(feed_dict={ images: mnist.test.images, labels: mnist.test.labels }) print("test accuracy %g" % accuracy_value) epochs_info.append('%d:%g:%s' % (step / (FLAGS.eval_step * batch_size_per_epoch), accuracy_value, average_loss)) average_loss = 0.0 average_batch_time /= iterations print 'average_batch_time: ', average_batch_time print('epoch_info: %s' % ','.join(epochs_info))