def main(argv=None): # load config file and setup params = {} config = configparser.ConfigParser() config_file = "experiments/mv2_cpm.cfg" if len(argv) != 1: config_file = argv[1] config.read(config_file) for _ in config.options("Train"): params[_] = eval(config.get("Train", _)) os.environ['CUDA_VISIBLE_DEVICES'] = params['visible_devices'] gpus_index = params['visible_devices'].split(",") params['gpus'] = len(gpus_index) if not os.path.exists(params['modelpath']): os.makedirs(params['modelpath']) if not os.path.exists(params['logpath']): os.makedirs(params['logpath']) dataset.set_config(params) set_network_input_wh(params['input_width'], params['input_height']) set_network_scale(params['scale']) gpus = 'gpus' if platform.system() == 'Darwin': gpus = 'cpu' training_name = '{}_batch-{}_lr-{}_{}-{}_{}x{}_{}'.format( params['model'], params['batchsize'], params['lr'], gpus, params['gpus'], params['input_width'], params['input_height'], config_file.replace("/", "-").replace(".cfg", "")) with tf.Graph().as_default(), tf.device("/cpu:0"): train_dataset = get_train_dataset_pipeline(params['batchsize'], params['max_epoch'], buffer_size=5) valid_dataset = get_valid_dataset_pipeline(params['batchsize'], params['max_epoch'], buffer_size=5) train_iterator = train_dataset.make_one_shot_iterator() ''' sess2 = tf.Session() coord2 = tf.train.Coordinator() #input_image, input_heat = sess2.run(train_iterator.get_next()) #print(input_image) #print(input_heat) train_queue = tf.FIFOQueue(capacity=10, dtypes=(tf.float32, tf.float32)) enqueue_op = train_queue.enqueue(train_iterator.get_next()) numberOfThreads = 1 qr = tf.train.QueueRunner(train_queue, [enqueue_op] * numberOfThreads) enqueue_threads = qr.create_threads(sess2, coord=coord2, start=True) # tf.train.add_queue_runner(qr) input = train_queue.dequeue() print("wait data prepare: %d" % sess2.run(train_queue.size())) time.sleep(20) for i in range(1000): #print("wait 5 second data prepare: %d" % sess2.run(train_queue.size())) print("dequeue begin:%d , queue size: %d " % (i, sess2.run(train_queue.size())) ) img1, heat1 = sess2.run(input) print("dequeue end:%d" % i) #print('image:', img1) #print('heat:', heat1) coord2.request_stop() # And wait for them to actually do it. coord2.join(enqueue_threads) ''' valid_iterator = valid_dataset.make_one_shot_iterator() #handle = tf.placeholder(tf.string, shape=[]) input_image_array = tf.placeholder(tf.float32, shape=(None, 192, 192, 3)) input_heat_array = tf.placeholder(tf.float32, shape=(None, 96, 96, 14)) #input_iterator = tf.data.Iterator.from_string_handle(handle, train_dataset.output_types, train_dataset.output_shapes) #print(input_iterator) global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(float(params['lr']), global_step, decay_steps=10000, decay_rate=float( params['decay_rate']), staircase=True) opt = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8) tower_grads = [] reuse_variable = False if platform.system() == 'Darwin': # cpu (mac only) with tf.device("/cpu:0"): with tf.name_scope("CPU_0"): #input_image, input_heat = input_iterator.get_next() input_image = tf.convert_to_tensor(input_image_array) input_heat = tf.convert_to_tensor(input_heat_array) loss, last_heat_loss, pred_heat = get_loss_and_output( params['model'], params['batchsize'], input_image, input_heat, reuse_variable) reuse_variable = True grads = opt.compute_gradients(loss) tower_grads.append(grads) else: # multiple gpus for i in range(params['gpus']): with tf.device("/gpu:%d" % i): with tf.name_scope("GPU_%d" % i): #input_image, input_heat = input_iterator.get_next() #print(input_image) input_image = input_image_array input_heat = input_heat_array #if input_image.device == '/device:CPU:0': # input_image, input_heat = input_iterator.get_next() #input_heat = tf.convert_to_tensor(input_heat_array) loss, last_heat_loss, pred_heat = get_loss_and_output( params['model'], params['batchsize'], input_image, input_heat, reuse_variable) reuse_variable = True grads = opt.compute_gradients(loss) tower_grads.append(grads) grads = average_gradients(tower_grads) for grad, var in grads: if grad is not None: tf.summary.histogram("gradients_on_average/%s" % var.op.name, grad) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) MOVING_AVERAGE_DECAY = 0.99 variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variable_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply(variable_to_average) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = tf.group(apply_gradient_op, variables_averages_op) saver = tf.train.Saver(max_to_keep=100) tf.summary.scalar("learning_rate", learning_rate) tf.summary.scalar("loss", loss) tf.summary.scalar("loss_lastlayer_heat", last_heat_loss) summary_merge_op = tf.summary.merge_all() pred_result_image = tf.placeholder( tf.float32, shape=[params['batchsize'], 480, 640, 3]) pred_result__summary = tf.summary.image("pred_result_image", pred_result_image, params['batchsize']) init = tf.global_variables_initializer() config = tf.ConfigProto() # occupy gpu gracefully config.gpu_options.allow_growth = True ''' sess_q = tf.Session() coord_q = tf.train.Coordinator() train_queue = tf.FIFOQueue(capacity=10, dtypes=(tf.float32, tf.float32)) enqueue_op = train_queue.enqueue(train_iterator.get_next()) numberOfThreads = 1 qr = tf.train.QueueRunner(train_queue, [enqueue_op] * numberOfThreads) enqueue_threads = qr.create_threads(sess_q, coord=coord_q, start=True) #tf.train.add_queue_runner(qr) ''' train_queue = tf.FIFOQueue(capacity=5, dtypes=(tf.float32, tf.float32)) train_enqueue_op = train_queue.enqueue(train_iterator.get_next()) valid_queue = tf.FIFOQueue(capacity=5, dtypes=(tf.float32, tf.float32)) valid_enqueue_op = valid_queue.enqueue(valid_iterator.get_next()) with tf.Session(config=config) as sess: init.run() train_data_input = train_queue.dequeue() valid_data_input = valid_queue.dequeue() numberOfThreads = 1 train_qr = tf.train.QueueRunner(train_queue, [train_enqueue_op] * numberOfThreads) valid_qr = tf.train.QueueRunner(valid_queue, [valid_enqueue_op] * numberOfThreads) tf.train.add_queue_runner(train_qr) tf.train.add_queue_runner(valid_qr) #train_handle = sess.run(train_iterator.string_handle()) #valid_handle = sess.run(valid_iterator.string_handle()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_writer = tf.summary.FileWriter( os.path.join(params['logpath'], training_name), sess.graph) total_step_num = params['num_train_samples'] * params[ 'max_epoch'] // (params['batchsize'] * params['gpus']) print("Start training...") for step in range(total_step_num): start_time = time.time() #print("dequeue a batchsize begin") input_image_array_h, input_heat_array_h = sess.run( train_data_input) #print('image shape:', input_image_array_h.shape) end_q_time = time.time() #print("dequeue a batchsize end: %d" % (end_q_time - start_time)) _, loss_value, lh_loss = sess.run( [train_op, loss, last_heat_loss], feed_dict={ input_image_array: input_image_array_h, input_heat_array: input_heat_array_h }) ''' _, loss_value, lh_loss = sess.run([train_op, loss, last_heat_loss], feed_dict={handle: train_handle} ) ''' duration = time.time() - start_time #print('step: %d, duration:%d' % (step, duration)) if step != 0 and step % params[ 'per_update_tensorboard_step'] == 0: # False will speed up the training time. if params['pred_image_on_tensorboard'] is True: input_image_array_h, input_heat_array_h = sess.run( valid_data_input) valid_loss_value, valid_lh_loss, valid_in_image, valid_in_heat, valid_p_heat = sess.run( [ loss, last_heat_loss, input_image, input_heat, pred_heat ], feed_dict={ input_image_array: input_image_array_h, input_heat_array: input_heat_array_h }) ''' valid_loss_value, valid_lh_loss, valid_in_image, valid_in_heat, valid_p_heat = sess.run( [loss, last_heat_loss, input_image, input_heat, pred_heat], feed_dict={handle: valid_handle} ) ''' result = [] for index in range(params['batchsize']): r = CocoPose.display_image( valid_in_image[index, :, :, :], valid_in_heat[index, :, :, :], valid_p_heat[index, :, :, :], True) result.append(r.astype(np.float32)) comparsion_of_pred_result = sess.run( pred_result__summary, feed_dict={pred_result_image: np.array(result)}) summary_writer.add_summary(comparsion_of_pred_result, step) # print train info num_examples_per_step = params['batchsize'] * params['gpus'] examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / params['gpus'] format_str = ( '%s: step %d, loss = %.2f, last_heat_loss = %.2f (%.1f examples/sec; %.3f sec/batch)' ) print(format_str % (datetime.now(), step, loss_value, lh_loss, examples_per_sec, sec_per_batch)) # tensorboard visualization #merge_op = sess.run(summary_merge_op, feed_dict={handle: valid_handle}) input_image_array_h, input_heat_array_h = sess.run( valid_data_input) merge_op = sess.run(summary_merge_op, feed_dict={ input_image_array: input_image_array_h, input_heat_array: input_heat_array_h }) summary_writer.add_summary(merge_op, step) # save model if step != 0 and step % params['per_saved_model_step'] == 0: checkpoint_path = os.path.join(params['modelpath'], training_name, 'model') saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) '''
def main(argv=None): # load config file and setup params = {} config = configparser.ConfigParser() config_file = "experiments/mv2_cpm.cfg" if len(argv) != 1: config_file = argv[1] config.read(config_file) for _ in config.options("Train"): params[_] = eval(config.get("Train", _)) os.environ['CUDA_VISIBLE_DEVICES'] = params['visible_devices'] gpus_index = params['visible_devices'].split(",") params['gpus'] = len(gpus_index) if not os.path.exists(params['modelpath']): os.makedirs(params['modelpath']) if not os.path.exists(params['logpath']): os.makedirs(params['logpath']) dataset.set_config(params) set_network_input_wh(params['input_width'], params['input_height']) set_network_scale(params['scale']) training_name = '{}_batch-{}_lr-{}_gpus-{}_{}x{}_{}'.format( params['model'], params['batchsize'], params['lr'], params['gpus'], params['input_width'], params['input_height'], config_file.replace("/", "-").replace(".cfg", "")) with tf.Graph().as_default(), tf.device("/cpu:0"): input_image, input_heat = get_input(params['batchsize'], params['max_epoch'], is_train=True) valid_input_image, valid_input_heat = get_input(params['batchsize'], params['max_epoch'], is_train=False) global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(float(params['lr']), global_step, decay_steps=10000, decay_rate=float( params['decay_rate']), staircase=True) opt = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8) tower_grads = [] reuse_variable = False # multiple gpus for i in range(params['gpus']): with tf.device("/gpu:%d" % i): with tf.name_scope("GPU_%d" % i): loss, last_heat_loss, pred_heat = get_loss_and_output( params['model'], params['batchsize'], input_image, input_heat, reuse_variable) reuse_variable = True grads = opt.compute_gradients(loss) tower_grads.append(grads) valid_loss, valid_last_heat_loss, valid_pred_heat = get_loss_and_output( params['model'], params['batchsize'], valid_input_image, valid_input_heat, reuse_variable) grads = average_gradients(tower_grads) for grad, var in grads: if grad is not None: tf.summary.histogram("gradients_on_average/%s" % var.op.name, grad) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) MOVING_AVERAGE_DECAY = 0.99 variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variable_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply(variable_to_average) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = tf.group(apply_gradient_op, variables_averages_op) saver = tf.train.Saver(max_to_keep=100) tf.summary.scalar("learning_rate", learning_rate) tf.summary.scalar("loss", loss) tf.summary.scalar("loss_lastlayer_heat", last_heat_loss) summary_merge_op = tf.summary.merge_all() pred_result_image = tf.placeholder( tf.float32, shape=[params['batchsize'], 480, 640, 3]) pred_result__summary = tf.summary.image("pred_result_image", pred_result_image, params['batchsize']) init = tf.global_variables_initializer() config = tf.ConfigProto() # occupy gpu gracefully config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: init.run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_writer = tf.summary.FileWriter( os.path.join(params['logpath'], training_name), sess.graph) total_step_num = params['num_train_samples'] * params[ 'max_epoch'] // (params['batchsize'] * params['gpus']) print("Start training...") for step in range(total_step_num): start_time = time.time() _, loss_value, lh_loss, in_image, in_heat, p_heat = sess.run([ train_op, loss, last_heat_loss, input_image, input_heat, pred_heat ]) duration = time.time() - start_time if step != 0 and step % params[ 'per_update_tensorboard_step'] == 0: # False will speed up the training time. if params['pred_image_on_tensorboard'] is True: valid_loss_value, valid_lh_loss, valid_in_image, valid_in_heat, valid_p_heat = sess.run( [ valid_loss, valid_last_heat_loss, valid_input_image, valid_input_heat, valid_pred_heat ]) result = [] for index in range(params['batchsize']): r = CocoPose.display_image( valid_in_image[index, :, :, :], valid_in_heat[index, :, :, :], valid_p_heat[index, :, :, :], True) result.append(r.astype(np.float32)) comparsion_of_pred_result = sess.run( pred_result__summary, feed_dict={pred_result_image: np.array(result)}) summary_writer.add_summary(comparsion_of_pred_result, step) # print train info num_examples_per_step = params['batchsize'] * params['gpus'] examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / params['gpus'] format_str = ( '%s: step %d, loss = %.2f, last_heat_loss = %.2f (%.1f examples/sec; %.3f sec/batch)' ) print(format_str % (datetime.now(), step, loss_value, lh_loss, examples_per_sec, sec_per_batch)) # tensorboard visualization merge_op = sess.run(summary_merge_op) summary_writer.add_summary(merge_op, step) # save model if step % params['per_saved_model_step'] == 0: checkpoint_path = os.path.join(params['modelpath'], training_name, 'model') saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads)