def get_input(batchsize, epoch, is_train=True):
    if is_train is True:
        input_pipeline = get_train_dataset_pipeline(batch_size=batchsize, epoch=epoch, buffer_size=100)
    else:
        input_pipeline = get_valid_dataset_pipeline(batch_size=batchsize, epoch=epoch, buffer_size=100)
    iter = input_pipeline.make_one_shot_iterator()
    _ = iter.get_next()
    return _[0], _[1]
Beispiel #2
0
def main(argv=None):
    # load config file and setup
    params = {}
    config = configparser.ConfigParser()
    config_file = "experiments/mv2_cpm.cfg"
    if len(argv) != 1:
        config_file = argv[1]
    config.read(config_file)
    for _ in config.options("Train"):
        params[_] = eval(config.get("Train", _))

    os.environ['CUDA_VISIBLE_DEVICES'] = params['visible_devices']

    gpus_index = params['visible_devices'].split(",")
    params['gpus'] = len(gpus_index)

    if not os.path.exists(params['modelpath']):
        os.makedirs(params['modelpath'])
    if not os.path.exists(params['logpath']):
        os.makedirs(params['logpath'])

    dataset.set_config(params)
    set_network_input_wh(params['input_width'], params['input_height'])
    set_network_scale(params['scale'])

    gpus = 'gpus'
    if platform.system() == 'Darwin':
        gpus = 'cpu'
    training_name = '{}_batch-{}_lr-{}_{}-{}_{}x{}_{}'.format(
        params['model'], params['batchsize'], params['lr'], gpus,
        params['gpus'], params['input_width'], params['input_height'],
        config_file.replace("/", "-").replace(".cfg", ""))

    with tf.Graph().as_default(), tf.device("/cpu:0"):
        train_dataset = get_train_dataset_pipeline(params['batchsize'],
                                                   params['max_epoch'],
                                                   buffer_size=5)
        valid_dataset = get_valid_dataset_pipeline(params['batchsize'],
                                                   params['max_epoch'],
                                                   buffer_size=5)

        train_iterator = train_dataset.make_one_shot_iterator()
        '''
        sess2 = tf.Session()
        coord2 = tf.train.Coordinator()
        
        #input_image, input_heat = sess2.run(train_iterator.get_next())
        #print(input_image)
        #print(input_heat)
        train_queue = tf.FIFOQueue(capacity=10, dtypes=(tf.float32, tf.float32))
        enqueue_op = train_queue.enqueue(train_iterator.get_next())
        numberOfThreads = 1
        qr = tf.train.QueueRunner(train_queue, [enqueue_op] * numberOfThreads)
        enqueue_threads = qr.create_threads(sess2, coord=coord2, start=True)
        # tf.train.add_queue_runner(qr)
        input = train_queue.dequeue()

        print("wait data prepare: %d" % sess2.run(train_queue.size()))
        time.sleep(20)
        for i in range(1000):
            #print("wait 5 second data prepare: %d" % sess2.run(train_queue.size()))
            print("dequeue begin:%d , queue size: %d " % (i, sess2.run(train_queue.size())) )
            img1, heat1 = sess2.run(input)
            print("dequeue end:%d" % i)
            #print('image:', img1)
            #print('heat:', heat1)

        coord2.request_stop()
        # And wait for them to actually do it.
        coord2.join(enqueue_threads)
        '''

        valid_iterator = valid_dataset.make_one_shot_iterator()

        #handle = tf.placeholder(tf.string, shape=[])
        input_image_array = tf.placeholder(tf.float32,
                                           shape=(None, 192, 192, 3))
        input_heat_array = tf.placeholder(tf.float32, shape=(None, 96, 96, 14))
        #input_iterator = tf.data.Iterator.from_string_handle(handle, train_dataset.output_types, train_dataset.output_shapes)
        #print(input_iterator)

        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(float(params['lr']),
                                                   global_step,
                                                   decay_steps=10000,
                                                   decay_rate=float(
                                                       params['decay_rate']),
                                                   staircase=True)
        opt = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8)
        tower_grads = []
        reuse_variable = False

        if platform.system() == 'Darwin':
            # cpu (mac only)
            with tf.device("/cpu:0"):
                with tf.name_scope("CPU_0"):
                    #input_image, input_heat = input_iterator.get_next()
                    input_image = tf.convert_to_tensor(input_image_array)
                    input_heat = tf.convert_to_tensor(input_heat_array)
                    loss, last_heat_loss, pred_heat = get_loss_and_output(
                        params['model'], params['batchsize'], input_image,
                        input_heat, reuse_variable)
                    reuse_variable = True
                    grads = opt.compute_gradients(loss)
                    tower_grads.append(grads)
        else:
            # multiple gpus
            for i in range(params['gpus']):
                with tf.device("/gpu:%d" % i):
                    with tf.name_scope("GPU_%d" % i):
                        #input_image, input_heat = input_iterator.get_next()
                        #print(input_image)
                        input_image = input_image_array
                        input_heat = input_heat_array
                        #if input_image.device == '/device:CPU:0':
                        #    input_image, input_heat = input_iterator.get_next()
                        #input_heat = tf.convert_to_tensor(input_heat_array)
                        loss, last_heat_loss, pred_heat = get_loss_and_output(
                            params['model'], params['batchsize'], input_image,
                            input_heat, reuse_variable)
                        reuse_variable = True
                        grads = opt.compute_gradients(loss)
                        tower_grads.append(grads)

        grads = average_gradients(tower_grads)
        for grad, var in grads:
            if grad is not None:
                tf.summary.histogram("gradients_on_average/%s" % var.op.name,
                                     grad)

        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)

        MOVING_AVERAGE_DECAY = 0.99
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variable_to_average = (tf.trainable_variables() +
                               tf.moving_average_variables())
        variables_averages_op = variable_averages.apply(variable_to_average)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = tf.group(apply_gradient_op, variables_averages_op)

        saver = tf.train.Saver(max_to_keep=100)

        tf.summary.scalar("learning_rate", learning_rate)
        tf.summary.scalar("loss", loss)
        tf.summary.scalar("loss_lastlayer_heat", last_heat_loss)
        summary_merge_op = tf.summary.merge_all()

        pred_result_image = tf.placeholder(
            tf.float32, shape=[params['batchsize'], 480, 640, 3])
        pred_result__summary = tf.summary.image("pred_result_image",
                                                pred_result_image,
                                                params['batchsize'])

        init = tf.global_variables_initializer()
        config = tf.ConfigProto()
        # occupy gpu gracefully
        config.gpu_options.allow_growth = True
        '''
        sess_q = tf.Session()
        coord_q = tf.train.Coordinator()
        train_queue = tf.FIFOQueue(capacity=10, dtypes=(tf.float32, tf.float32))
        enqueue_op = train_queue.enqueue(train_iterator.get_next())
        numberOfThreads = 1
        qr = tf.train.QueueRunner(train_queue, [enqueue_op] * numberOfThreads)
        enqueue_threads = qr.create_threads(sess_q, coord=coord_q, start=True)
        #tf.train.add_queue_runner(qr)
        '''
        train_queue = tf.FIFOQueue(capacity=5, dtypes=(tf.float32, tf.float32))
        train_enqueue_op = train_queue.enqueue(train_iterator.get_next())
        valid_queue = tf.FIFOQueue(capacity=5, dtypes=(tf.float32, tf.float32))
        valid_enqueue_op = valid_queue.enqueue(valid_iterator.get_next())

        with tf.Session(config=config) as sess:
            init.run()
            train_data_input = train_queue.dequeue()
            valid_data_input = valid_queue.dequeue()
            numberOfThreads = 1
            train_qr = tf.train.QueueRunner(train_queue, [train_enqueue_op] *
                                            numberOfThreads)
            valid_qr = tf.train.QueueRunner(valid_queue, [valid_enqueue_op] *
                                            numberOfThreads)
            tf.train.add_queue_runner(train_qr)
            tf.train.add_queue_runner(valid_qr)

            #train_handle = sess.run(train_iterator.string_handle())
            #valid_handle = sess.run(valid_iterator.string_handle())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            summary_writer = tf.summary.FileWriter(
                os.path.join(params['logpath'], training_name), sess.graph)
            total_step_num = params['num_train_samples'] * params[
                'max_epoch'] // (params['batchsize'] * params['gpus'])
            print("Start training...")
            for step in range(total_step_num):
                start_time = time.time()

                #print("dequeue a batchsize begin")
                input_image_array_h, input_heat_array_h = sess.run(
                    train_data_input)
                #print('image shape:', input_image_array_h.shape)
                end_q_time = time.time()
                #print("dequeue a batchsize end: %d" % (end_q_time - start_time))
                _, loss_value, lh_loss = sess.run(
                    [train_op, loss, last_heat_loss],
                    feed_dict={
                        input_image_array: input_image_array_h,
                        input_heat_array: input_heat_array_h
                    })
                '''
                _, loss_value, lh_loss = sess.run([train_op, loss, last_heat_loss],
                                                  feed_dict={handle: train_handle}
                )
                '''
                duration = time.time() - start_time
                #print('step: %d, duration:%d' % (step, duration))

                if step != 0 and step % params[
                        'per_update_tensorboard_step'] == 0:
                    # False will speed up the training time.
                    if params['pred_image_on_tensorboard'] is True:
                        input_image_array_h, input_heat_array_h = sess.run(
                            valid_data_input)
                        valid_loss_value, valid_lh_loss, valid_in_image, valid_in_heat, valid_p_heat = sess.run(
                            [
                                loss, last_heat_loss, input_image, input_heat,
                                pred_heat
                            ],
                            feed_dict={
                                input_image_array: input_image_array_h,
                                input_heat_array: input_heat_array_h
                            })
                        '''
                        valid_loss_value, valid_lh_loss, valid_in_image, valid_in_heat, valid_p_heat = sess.run(
                            [loss, last_heat_loss, input_image, input_heat, pred_heat],
                            feed_dict={handle: valid_handle}
                        )
                        '''
                        result = []
                        for index in range(params['batchsize']):
                            r = CocoPose.display_image(
                                valid_in_image[index, :, :, :],
                                valid_in_heat[index, :, :, :],
                                valid_p_heat[index, :, :, :], True)
                            result.append(r.astype(np.float32))

                        comparsion_of_pred_result = sess.run(
                            pred_result__summary,
                            feed_dict={pred_result_image: np.array(result)})
                        summary_writer.add_summary(comparsion_of_pred_result,
                                                   step)

                    # print train info
                    num_examples_per_step = params['batchsize'] * params['gpus']
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = duration / params['gpus']
                    format_str = (
                        '%s: step %d, loss = %.2f, last_heat_loss = %.2f (%.1f examples/sec; %.3f sec/batch)'
                    )
                    print(format_str %
                          (datetime.now(), step, loss_value, lh_loss,
                           examples_per_sec, sec_per_batch))

                    # tensorboard visualization
                    #merge_op = sess.run(summary_merge_op, feed_dict={handle: valid_handle})
                    input_image_array_h, input_heat_array_h = sess.run(
                        valid_data_input)
                    merge_op = sess.run(summary_merge_op,
                                        feed_dict={
                                            input_image_array:
                                            input_image_array_h,
                                            input_heat_array:
                                            input_heat_array_h
                                        })
                    summary_writer.add_summary(merge_op, step)

                # save model
                if step != 0 and step % params['per_saved_model_step'] == 0:
                    checkpoint_path = os.path.join(params['modelpath'],
                                                   training_name, 'model')
                    saver.save(sess, checkpoint_path, global_step=step)
            coord.request_stop()
            coord.join(threads)
        '''        
def main(argv=None):
    # load config file and setup
    params = {}
    device_name = tf.test.gpu_device_name()
    print('Found GPU at: {}'.format(device_name))
    config = configparser.ConfigParser()
    config_file = "experiments/mv2_cpm.cfg"
    if len(argv) != 1:
        config_file = argv[1]
    config.read(config_file)
    for _ in config.options("Train"):
        params[_] = eval(config.get("Train", _))

    os.environ['CUDA_VISIBLE_DEVICES'] = params['visible_devices']

    gpus_index = params['visible_devices'].split(",")
    params['gpus'] = len(gpus_index)

    if not os.path.exists(params['modelpath']):
        os.makedirs(params['modelpath'])
    if not os.path.exists(params['logpath']):
        os.makedirs(params['logpath'])

    dataset.set_config(params)
    set_network_input_wh(params['input_width'], params['input_height'])
    set_network_scale(params['scale'])

    device_name = tf.test.gpu_device_name()
    print('2.Found GPU at: {}'.format(device_name))
    gpus = 'gpus'
    training_name = '{}_batch-{}_lr-{}_{}-{}_{}x{}_{}'.format(
        params['model'], params['batchsize'], params['lr'], gpus,
        params['gpus'], params['input_width'], params['input_height'],
        config_file.replace("/", "-").replace(".cfg", ""))

    with tf.Graph().as_default(), tf.device("/cpu:0"):
        train_dataset = get_train_dataset_pipeline(params['batchsize'],
                                                   params['max_epoch'],
                                                   buffer_size=100)
        valid_dataset = get_valid_dataset_pipeline(params['batchsize'],
                                                   params['max_epoch'],
                                                   buffer_size=100)

        train_iterator = train_dataset.make_one_shot_iterator()
        valid_iterator = valid_dataset.make_one_shot_iterator()

        handle = tf.compat.v1.placeholder(tf.string, shape=[])
        input_iterator = tf.compat.v1.data.Iterator.from_string_handle(
            handle, train_dataset.output_types, train_dataset.output_shapes)

        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.compat.v1.train.exponential_decay(
            float(params['lr']),
            global_step,
            decay_steps=10000,
            decay_rate=float(params['decay_rate']),
            staircase=True)
        opt = tf.compat.v1.train.AdamOptimizer(learning_rate, epsilon=1e-8)
        tower_grads = []
        reuse_variable = False

        device_name = tf.test.gpu_device_name()
        print('3.Found GPU at: {}'.format(device_name))

        # multiple gpus
        for i in range(params['gpus']):
            #            with tf.device("/gpu:%d" % i):
            with tf.device("/device:GPU:%d" % i):
                #                with tf.name_scope("GPU_%d" % i):
                input_image, input_heat = input_iterator.get_next()
                loss, last_heat_loss, pred_heat = get_loss_and_output(
                    params['model'], params['batchsize'], input_image,
                    input_heat, reuse_variable)
                reuse_variable = True
                grads = opt.compute_gradients(loss)
                tower_grads.append(grads)

        grads = average_gradients(tower_grads)
        for grad, var in grads:
            if grad is not None:
                tf.compat.v1.summary.histogram(
                    "gradients_on_average/%s" % var.op.name, grad)

        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
        for var in tf.compat.v1.trainable_variables():
            tf.compat.v1.summary.histogram(var.op.name, var)

        MOVING_AVERAGE_DECAY = 0.99
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variable_to_average = (tf.compat.v1.trainable_variables() +
                               tf.compat.v1.moving_average_variables())
        variables_averages_op = variable_averages.apply(variable_to_average)

        update_ops = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = tf.group(apply_gradient_op, variables_averages_op)

        saver = tf.compat.v1.train.Saver(max_to_keep=100)

        tf.compat.v1.summary.scalar("learning_rate", learning_rate)
        tf.compat.v1.summary.scalar("loss", loss)
        tf.compat.v1.summary.scalar("loss_lastlayer_heat", last_heat_loss)
        summary_merge_op = tf.compat.v1.summary.merge_all()

        pred_result_image = tf.compat.v1.placeholder(
            tf.float32, shape=[params['batchsize'], 480, 640, 3])
        pred_result__summary = tf.compat.v1.summary.image(
            "pred_result_image", pred_result_image, params['batchsize'])

        init = tf.compat.v1.global_variables_initializer()
        config = tf.compat.v1.ConfigProto()
        # occupy gpu gracefully
        config.gpu_options.allow_growth = True
        with tf.compat.v1.Session(config=config) as sess:
            init.run()
            train_handle = sess.run(train_iterator.string_handle())
            valid_handle = sess.run(valid_iterator.string_handle())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            summary_writer = tf.summary.FileWriter(
                os.path.join(params['logpath'], training_name), sess.graph)
            total_step_num = params['num_train_samples'] * params[
                'max_epoch'] // (params['batchsize'] * params['gpus'])
            print("Start training...")
            for step in range(total_step_num):
                start_time = time.time()
                _, loss_value, lh_loss = sess.run(
                    [train_op, loss, last_heat_loss],
                    feed_dict={handle: train_handle})
                duration = time.time() - start_time

                if step != 0 and step % params[
                        'per_update_tensorboard_step'] == 0:
                    # False will speed up the training time.
                    if params['pred_image_on_tensorboard'] is True:
                        valid_loss_value, valid_lh_loss, valid_in_image, valid_in_heat, valid_p_heat = sess.run(
                            [
                                loss, last_heat_loss, input_image, input_heat,
                                pred_heat
                            ],
                            feed_dict={handle: valid_handle})
                        result = []
                        for index in range(params['batchsize']):
                            r = CocoPose.display_image(
                                valid_in_image[index, :, :, :],
                                valid_in_heat[index, :, :, :],
                                valid_p_heat[index, :, :, :], True)
                            result.append(r.astype(np.float32))

                        comparsion_of_pred_result = sess.run(
                            pred_result__summary,
                            feed_dict={pred_result_image: np.array(result)})
                        summary_writer.add_summary(comparsion_of_pred_result,
                                                   step)

                    # print train info
                    num_examples_per_step = params['batchsize'] * params['gpus']
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = duration / params['gpus']
                    format_str = (
                        '%s: step %d, loss = %.2f, last_heat_loss = %.2f (%.1f examples/sec; %.3f sec/batch)'
                    )
                    print(format_str %
                          (datetime.now(), step, loss_value, lh_loss,
                           examples_per_sec, sec_per_batch))

                    # tensorboard visualization
                    merge_op = sess.run(summary_merge_op,
                                        feed_dict={handle: valid_handle})
                    summary_writer.add_summary(merge_op, step)

                # save model
                if step != 0 and step % params['per_saved_model_step'] == 0:
                    checkpoint_path = os.path.join(params['modelpath'],
                                                   training_name, 'model')
                    saver.save(sess, checkpoint_path, global_step=step)
            coord.request_stop()
            coord.join(threads)