def train(H, test_images): ''' Setup computation graph, run 2 prefetch data threads, and then run the main loop ''' if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir']) ckpt_file = H['save_dir'] + '/save.ckpt' with open(H['save_dir'] + '/hypes.json', 'w') as f: json.dump(H, f, indent=4) x_in = tf.placeholder(tf.float32) confs_in = tf.placeholder(tf.float32) boxes_in = tf.placeholder(tf.float32) q = {} enqueue_op = {} for phase in ['train', 'test']: dtypes = [tf.float32, tf.float32, tf.float32] grid_size = H['grid_width'] * H['grid_height'] shapes = ( [H['image_height'], H['image_width'], 3], [grid_size, H['rnn_len'], H['num_classes']], [grid_size, H['rnn_len'], 4], ) q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes) enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in)) def make_feed(d): return { x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'], learning_rate: H['solver']['learning_rate'] } def thread_loop(sess, enqueue_op, phase, gen): for d in gen: sess.run(enqueue_op[phase], feed_dict=make_feed(d)) (config, loss, accuracy, summary_op, train_op, smooth_op, global_step, learning_rate) = build(H, q) saver = tf.train.Saver(max_to_keep=None) writer = tf.summary.FileWriter(logdir=H['save_dir'], flush_secs=10) with tf.Session(config=config) as sess: tf.train.start_queue_runners(sess=sess) for phase in ['train', 'test']: # enqueue once manually to avoid thread start delay gen = train_utils.load_data_gen(H, phase, jitter=H['solver']['use_jitter']) d = gen.next() sess.run(enqueue_op[phase], feed_dict=make_feed(d)) t = threading.Thread(target=thread_loop, args=(sess, enqueue_op, phase, gen)) t.daemon = True t.start() tf.set_random_seed(H['solver']['rnd_seed']) sess.run(tf.global_variables_initializer()) writer.add_graph(sess.graph) weights_str = H['solver']['weights'] if len(weights_str) > 0: print('Restoring from: %s' % weights_str) saver.restore(sess, weights_str) else: init_fn = slim.assign_from_checkpoint_fn( '%s/data/%s' % (os.path.dirname(os.path.realpath(__file__)), H['slim_ckpt']), [ x for x in tf.global_variables() if x.name.startswith(H['slim_basename']) and H['solver']['opt'] not in x.name ]) #init_fn = slim.assign_from_checkpoint_fn( #'%s/data/inception_v1.ckpt' % os.path.dirname(os.path.realpath(__file__)), #[x for x in tf.global_variables() if x.name.startswith('InceptionV1') and not H['solver']['opt'] in x.name]) init_fn(sess) # train model for N iterations start = time.time() max_iter = H['solver'].get('max_iter', 800000) for i in xrange(max_iter): display_iter = H['logging']['display_iter'] adjusted_lr = ( H['solver']['learning_rate'] * 0.5**max(0, (i / H['solver']['learning_rate_step']) - 2)) lr_feed = {learning_rate: adjusted_lr} if i % display_iter != 0: # train network batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed) else: # test network every N iterations; log additional info if i > 0: dt = (time.time() - start) / (H['batch_size'] * display_iter) start = time.time() (train_loss, test_accuracy, summary_str, _, _) = sess.run([ loss['train'], accuracy['test'], summary_op, train_op, smooth_op, ], feed_dict=lr_feed) writer.add_summary(summary_str, global_step=global_step.eval()) print_str = string.join([ 'Step: %d', 'lr: %f', 'Train Loss: %.2f', 'Softmax Test Accuracy: %.1f%%', 'Time/image (ms): %.1f' ], ', ') print(print_str % (i, adjusted_lr, train_loss, test_accuracy * 100, dt * 1000 if i > 0 else 0)) if global_step.eval() % H['logging'][ 'save_iter'] == 0 or global_step.eval() == max_iter - 1: saver.save(sess, ckpt_file, global_step=global_step)
def train(H, test_images): ''' Setup computation graph, run 2 prefetch data threads, and then run the main loop ''' if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir']) ckpt_file = H['save_dir'] + '/save.ckpt' with open(H['save_dir'] + '/hypes.json', 'w') as f: json.dump(H, f, indent=4) x_in = tf.placeholder(tf.float32) confs_in = tf.placeholder(tf.float32) boxes_in = tf.placeholder(tf.float32) q = {} enqueue_op = {} for phase in ['train', 'test']: dtypes = [tf.float32, tf.float32, tf.float32] grid_size = H['grid_width'] * H['grid_height'] shapes = ( [H['image_height'], H['image_width'], 3], [grid_size, H['rnn_len'], H['num_classes']], [grid_size, H['rnn_len'], 4], ) q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes) enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in)) def make_feed(d): return {x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'], learning_rate: H['solver']['learning_rate']} def thread_loop(sess, enqueue_op, phase, gen): for d in gen: sess.run(enqueue_op[phase], feed_dict=make_feed(d)) (config, loss, accuracy, summary_op, train_op, smooth_op, global_step, learning_rate, encoder_net) = build(H, q) saver = tf.train.Saver(max_to_keep=None) writer = tf.train.SummaryWriter( logdir=H['save_dir'], flush_secs=10 ) with tf.Session(config=config) as sess: tf.train.start_queue_runners(sess=sess) for phase in ['train', 'test']: # enqueue once manually to avoid thread start delay gen = train_utils.load_data_gen(H, phase, jitter=H['solver']['use_jitter']) d = gen.next() sess.run(enqueue_op[phase], feed_dict=make_feed(d)) t = tf.train.threading.Thread(target=thread_loop, args=(sess, enqueue_op, phase, gen)) t.daemon = True t.start() tf.set_random_seed(H['solver']['rnd_seed']) sess.run(tf.initialize_all_variables()) writer.add_graph(sess.graph) weights_str = H['solver']['weights'] if len(weights_str) > 0: print('Restoring from: %s' % weights_str) saver.restore(sess, weights_str) # train model for N iterations start = time.time() max_iter = H['solver'].get('max_iter', 10000000) for i in xrange(max_iter): display_iter = H['logging']['display_iter'] adjusted_lr = (H['solver']['learning_rate'] * 0.5 ** max(0, (i / H['solver']['learning_rate_step']) - 2)) lr_feed = {learning_rate: adjusted_lr} if i % display_iter != 0: # train network batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed) else: # test network every N iterations; log additional info if i > 0: dt = (time.time() - start) / (H['batch_size'] * display_iter) start = time.time() (train_loss, test_accuracy, summary_str, _, _) = sess.run([loss['train'], accuracy['test'], summary_op, train_op, smooth_op, ], feed_dict=lr_feed) writer.add_summary(summary_str, global_step=global_step.eval()) print_str = string.join([ 'Step: %d', 'lr: %f', 'Train Loss: %.2f', 'Test Accuracy: %.1f%%', 'Time/image (ms): %.1f' ], ', ') print(print_str % (i, adjusted_lr, train_loss, test_accuracy * 100, dt * 1000 if i > 0 else 0)) if global_step.eval() % H['logging']['save_iter'] == 0 or global_step.eval() == max_iter - 1: saver.save(sess, ckpt_file, global_step=global_step)
def train(H, test_images): if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir']) ckpt_file = H['save_dir'] + '/save.ckpt' with open(H['save_dir'] + '/hypes.json', 'w') as f: json.dump(H, f, indent=4) x_in = tf.placeholder(tf.float32) confs_in = tf.placeholder(tf.float32) boxes_in = tf.placeholder(tf.float32) q = {} enqueue_op = {} for phase in ['train', 'test']: dtypes = [tf.float32, tf.float32, tf.float32] grid_size = H['arch']['grid_width'] * H['arch']['grid_height'] shapes = ( [H['arch']['image_height'], H['arch']['image_width'], 3], [grid_size, H['arch']['rnn_len'], H['arch']['num_classes']], [grid_size, H['arch']['rnn_len'], 4], ) q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes) enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in)) def make_feed(d): return { x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'], learning_rate: H['solver']['learning_rate'] } def MyLoop(sess, enqueue_op, phase, gen): for d in gen: sess.run(enqueue_op[phase], feed_dict=make_feed(d)) (config, loss, accuracy, summary_op, train_op, W_norm, test_image, test_pred_boxes, test_pred_confidences, test_true_boxes, test_true_confidences, smooth_op, global_step, learning_rate) = build(H, q) saver = tf.train.Saver(max_to_keep=None) writer = tf.train.SummaryWriter(logdir=H['save_dir'], flush_secs=10) test_image_to_log = tf.placeholder( tf.uint8, [H['arch']['image_height'], H['arch']['image_width'], 3]) log_image_name = tf.placeholder(tf.string) log_image = tf.image_summary(log_image_name, tf.expand_dims(test_image_to_log, 0)) with tf.Session(config=config) as sess: threads = [] for phase in ['train', 'test']: # enqueue once manually to avoid thread start delay gen = train_utils.load_data_gen(H, phase, jitter=H['solver']['use_jitter']) d = next(gen) sess.run(enqueue_op[phase], feed_dict=make_feed(d)) threads.append( tf.train.threading.Thread(target=MyLoop, args=(sess, enqueue_op, phase, gen))) threads[-1].start() tf.set_random_seed(H['solver']['rnd_seed']) sess.run(tf.initialize_all_variables()) weights_str = H['solver']['weights'] if len(weights_str) > 0: print('Restoring from: %s' % weights_str) saver.restore(sess, weights_str) # train model for N iterations for i in range(10000000): display_iter = 10 adjusted_lr = ( H['solver']['learning_rate'] * 0.5**max(0, (i / H['solver']['learning_rate_step']) - 2)) lr_feed = {learning_rate: adjusted_lr} if i % display_iter == 0: if i > 0: dt = (time.time() - start) / (H['arch']['batch_size'] * display_iter) start = time.time() (batch_loss_train, test_accuracy, weights_norm, summary_str, np_test_image, np_test_pred_boxes, np_test_pred_confidences, np_test_true_boxes, np_test_true_confidences, _, _) = sess.run([ loss['train'], accuracy['test'], W_norm, summary_op, test_image, test_pred_boxes, test_pred_confidences, test_true_boxes, test_true_confidences, train_op, smooth_op, ], feed_dict=lr_feed) pred_true = [("%d_pred_output" % (i % 3), np_test_pred_boxes, np_test_pred_confidences), ("%d_true_output" % (i % 3), np_test_true_boxes, np_test_true_confidences)] for name, boxes, confidences in pred_true: test_output_to_log = train_utils.add_rectangles( np_test_image, confidences, boxes, H["arch"])[0] assert test_output_to_log.shape == ( H['arch']['image_height'], H['arch']['image_width'], 3) feed = { test_image_to_log: test_output_to_log, log_image_name: name } test_image_summary_str = sess.run(log_image, feed_dict=feed) writer.add_summary(test_image_summary_str, global_step=global_step.eval()) writer.add_summary(summary_str, global_step=global_step.eval()) print_str = ', '.join([ 'Step: %d', 'lr: %f', 'Train Loss: %.2f', 'Test Accuracy: %.1f%%', 'Time/image (ms): %.1f' ]) print(print_str % (i, adjusted_lr, batch_loss_train, test_accuracy * 100, dt * 1000 if i > 0 else 0)) else: batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed) if global_step.eval() % 1000 == 0: saver.save(sess, ckpt_file, global_step=global_step)
def train(H, test_images): if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir']) ckpt_file = H['save_dir'] + '/save.ckpt' with open(H['save_dir'] + '/hypes.json', 'w') as f: json.dump(H, f, indent=4) x_in = tf.placeholder(tf.float32) confs_in = tf.placeholder(tf.float32) boxes_in = tf.placeholder(tf.float32) q = {} enqueue_op = {} for phase in ['train', 'test']: dtypes = [tf.float32, tf.float32, tf.float32] grid_size = H['arch']['grid_width'] * H['arch']['grid_height'] shapes = ( [H['arch']['image_height'], H['arch']['image_width'], 3], [grid_size, H['arch']['rnn_len'], H['arch']['num_classes']], [grid_size, H['arch']['rnn_len'], 4], ) q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes) enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in)) def make_feed(d): return {x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'], learning_rate: H['solver']['learning_rate']} def MyLoop(sess, enqueue_op, phase, gen): for d in gen: sess.run(enqueue_op[phase], feed_dict=make_feed(d)) (config, loss, accuracy, summary_op, train_op, W_norm, test_image, test_pred_boxes, test_pred_confidences, test_true_boxes, test_true_confidences, smooth_op, global_step, learning_rate) = build(H, q) saver = tf.train.Saver(max_to_keep=None) writer = tf.train.SummaryWriter( logdir=H['save_dir'], flush_secs=10 ) test_image_to_log = tf.placeholder(tf.uint8, [H['arch']['image_height'], H['arch']['image_width'], 3]) log_image_name = tf.placeholder(tf.string) log_image = tf.image_summary(log_image_name, tf.expand_dims(test_image_to_log, 0)) with tf.Session(config=config) as sess: threads = [] for phase in ['train', 'test']: # enqueue once manually to avoid thread start delay gen = train_utils.load_data_gen(H, phase, jitter=H['solver']['use_jitter']) d = gen.next() sess.run(enqueue_op[phase], feed_dict=make_feed(d)) threads.append(tf.train.threading.Thread(target=MyLoop, args=(sess, enqueue_op, phase, gen))) threads[-1].start() tf.set_random_seed(H['solver']['rnd_seed']) sess.run(tf.initialize_all_variables()) weights_str = H['solver']['weights'] if len(weights_str) > 0: print('Restoring from: %s' % weights_str) saver.restore(sess, weights_str) # train model for N iterations for i in xrange(10000000): display_iter = 10 adjusted_lr = (H['solver']['learning_rate'] * 0.5 ** max(0, (i / H['solver']['learning_rate_step']) - 2)) lr_feed = {learning_rate: adjusted_lr} if i % display_iter == 0: if i > 0: dt = (time.time() - start) / (H['arch']['batch_size'] * display_iter) start = time.time() (batch_loss_train, test_accuracy, weights_norm, summary_str, np_test_image, np_test_pred_boxes, np_test_pred_confidences, np_test_true_boxes, np_test_true_confidences, _, _) = sess.run([ loss['train'], accuracy['test'], W_norm, summary_op, test_image, test_pred_boxes, test_pred_confidences, test_true_boxes, test_true_confidences, train_op, smooth_op, ], feed_dict=lr_feed) pred_true = [("%d_pred_output" % (i % 3), np_test_pred_boxes, np_test_pred_confidences), ("%d_true_output" % (i % 3), np_test_true_boxes, np_test_true_confidences)] for name, boxes, confidences in pred_true: test_output_to_log = train_utils.add_rectangles(np_test_image, confidences, boxes, H["arch"])[0] assert test_output_to_log.shape == (H['arch']['image_height'], H['arch']['image_width'], 3) feed = {test_image_to_log: test_output_to_log, log_image_name: name} test_image_summary_str = sess.run(log_image, feed_dict=feed) writer.add_summary(test_image_summary_str, global_step=global_step.eval()) writer.add_summary(summary_str, global_step=global_step.eval()) print_str = string.join([ 'Step: %d', 'lr: %f', 'Train Loss: %.2f', 'Test Accuracy: %.1f%%', 'Time/image (ms): %.1f' ], ', ') print(print_str % (i, adjusted_lr, batch_loss_train, test_accuracy * 100, dt * 1000 if i > 0 else 0)) else: batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed) if global_step.eval() % 1000 == 0: saver.save(sess, ckpt_file, global_step=global_step)
def train(self, output_model_path): train_images = self.train_images test_images = self.test_images settings = self.settings logger.info( 'Invoking train() with {} training images and {} test images)'. format(len(train_images), len(test_images))) tf.reset_default_graph() start_ts = time.time() num_steps = settings['num_steps'] x_in = tf.placeholder(tf.float32) confs_in = tf.placeholder(tf.float32) boxes_in = tf.placeholder(tf.float32) queue = {} enqueue_op = {} for phase in ['train', 'test']: dtypes = [tf.float32, tf.float32, tf.float32] grid_size = settings['grid_width'] * settings['grid_height'] shapes = ( [settings['image_height'], settings['image_width'], 3], [grid_size, settings['rnn_len'], settings['num_classes']], [grid_size, settings['rnn_len'], 4], ) queue[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes) enqueue_op[phase] = queue[phase].enqueue( (x_in, confs_in, boxes_in)) def make_feed(d): return { x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'], learning_rate: settings['solver']['learning_rate'] } def thread_loop(sess, enqueue_op, phase, gen, stop_event): for d in gen: if stop_event.is_set(): return sess.run(enqueue_op[phase], feed_dict=make_feed(d)) (config, loss, accuracy, train_op, smooth_op, global_step, learning_rate) = build(settings, queue) saver = tf.train.Saver(max_to_keep=None) logger.info('train() initialization took {}s'.format(time.time() - start_ts)) threads = [] with tf.Session(config=config) as sess: for phase in ['train', 'test']: # enqueue once manually to avoid thread start delay if phase == 'train': gen = train_utils.load_data_gen( settings, train_images, jitter=settings['solver']['use_jitter']) if phase == 'test': gen = train_utils.load_data_gen(settings, test_images) d = gen.next() sess.run(enqueue_op[phase], feed_dict=make_feed(d)) thread_stop_event = threading.Event() thread = threading.Thread(target=thread_loop, args=(sess, enqueue_op, phase, gen, thread_stop_event)) thread.stop_event = thread_stop_event threads.append(thread) thread.daemon = True thread.start() tf.set_random_seed(settings['solver']['rnd_seed']) sess.run(tf.global_variables_initializer()) init_fn = slim.assign_from_checkpoint_fn( settings['base_model_ckpt'], [ x for x in tf.global_variables() if x.name.startswith(settings['base_name']) and settings['solver']['opt'] not in x.name ]) init_fn(sess) # train model for N iterations start = time.time() for i in xrange(num_steps): display_iter = settings['logging']['display_iter'] adjusted_lr = (settings['solver']['learning_rate'] * 0.5**max( 0, (i / settings['solver']['learning_rate_step']) - 2)) lr_feed = {learning_rate: adjusted_lr} if i % display_iter != 0: # train network batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed) else: # test network every N iterations; log additional info if i > 0: dt = (time.time() - start) / (settings['batch_size'] * display_iter) start = time.time() (train_loss, test_accuracy, _, _) = sess.run([ loss['train'], accuracy['test'], train_op, smooth_op, ], feed_dict=lr_feed) print_str = string.join([ 'Step: %d', 'lr: %f', 'Train Loss: %.2f', 'Softmax Test Accuracy: %.1f%%', 'Time/image (ms): %.1f' ], ', ') logger.info(print_str % (i, adjusted_lr, train_loss, test_accuracy * 100, dt * 1000 if i > 0 else 0)) for thread in threads: thread.stop_event.set() create_empty_model(output_model_path) transfer_model_meta(self.scaffold_path, output_model_path) save_model_state(sess, output_model_path) benchmark_info = { 'adjusted_lr': float(adjusted_lr), 'train_loss': float(train_loss), 'test_accuracy': float(test_accuracy) } save_model_benchmark_info(output_model_path, benchmark_info) return benchmark_info