Beispiel #1
0
    parser.add_argument('--quant-delay', type=int, default=-1)
    args = parser.parse_args()

    modelpath = logpath = '../models/train/'

    if args.gpus <= 0:
        raise Exception('gpus <= 0')

    # define input placeholder
    set_network_input_wh(args.input_width, args.input_height)
    scale = 4

    if args.model in ['cmu', 'vgg'] or 'mobilenet' in args.model:
        scale = 8

    set_network_scale(scale)
    output_w, output_h = args.input_width // scale, args.input_height // scale

    logger.info('define model+')
    with tf.device(tf.DeviceSpec(device_type="CPU")):
        input_node = tf.placeholder(tf.float32, shape=(args.batchsize, args.input_height, args.input_width, 3), name='image')
        vectmap_node = tf.placeholder(tf.float32, shape=(args.batchsize, output_h, output_w, 38), name='vectmap')
        heatmap_node = tf.placeholder(tf.float32, shape=(args.batchsize, output_h, output_w, 19), name='heatmap')

        # prepare data
        df = get_dataflow_batch(args.datapath, True, args.batchsize, img_path=args.imgpath)
        enqueuer = DataFlowToQueue(df, [input_node, heatmap_node, vectmap_node], queue_size=100)
        q_inp, q_heat, q_vect = enqueuer.dequeue()

    df_valid = get_dataflow_batch(args.datapath, False, args.batchsize, img_path=args.imgpath)
    df_valid.reset_state()
Beispiel #2
0
                    self.close_op.run()
                except Exception:
                    pass
                logger.info("{} Exited.".format(self.name))

    def dequeue(self):
        return self.queue.dequeue()


if __name__ == '__main__':
    os.environ['CUDA_VISIBLE_DEVICES'] = ''

    from pose_augment import set_network_input_wh, set_network_scale
    # set_network_input_wh(368, 368)
    set_network_input_wh(480, 320)
    set_network_scale(8)

    # df = get_dataflow('/data/public/rw/coco/annotations', True, '/data/public/rw/coco/')
    df = _get_dataflow_onlyread('/data/public/rw/coco/annotations', True,
                                '/data/public/rw/coco/')
    # df = get_dataflow('/root/coco/annotations', False, img_path='http://gpu-twg.kakaocdn.net/braincloud/COCO/')

    from tensorpack.dataflow.common import TestDataSpeed
    TestDataSpeed(df).start()
    sys.exit(0)

    with tf.Session() as sess:
        df.reset_state()
        t1 = time.time()
        for idx, dp in enumerate(df.get_data()):
            if idx == 0:
def train():
    parser = argparse.ArgumentParser(
        description='Training codes for Openpose using Tensorflow')
    parser.add_argument('--batch_size', type=str, default=10)
    parser.add_argument('--continue_training', type=bool, default=False)
    parser.add_argument('--checkpoint_path',
                        type=str,
                        default='checkpoints/train/mn_sepconv_33')
    # parser.add_argument('--backbone_net_ckpt_path', type=str, default='checkpoints/vgg/vgg_19.ckpt')
    parser.add_argument(
        '--backbone_net_ckpt_path',
        type=str,
        default='checkpoints/mobilenet/mobilenet_v2_1.0_96.ckpt')
    parser.add_argument('--train_vgg', type=bool, default=True)
    parser.add_argument('--annot_path',
                        type=str,
                        default='./COCO/annotations/')
    parser.add_argument('--img_path', type=str, default='./COCO/images/')
    # parser.add_argument('--annot_path_val', type=str,
    #                     default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/'
    #                             'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/annotations/'
    #                             'person_keypoints_val2017.json')
    # parser.add_argument('--img_path_val', type=str,
    #                     default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/'
    #                             'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/images/val2017/')
    parser.add_argument('--save_checkpoint_frequency', type=str, default=1000)
    parser.add_argument('--save_summary_frequency', type=str, default=100)
    parser.add_argument('--stage_num', type=str, default=6)
    parser.add_argument('--hm_channels', type=str, default=19)
    parser.add_argument('--paf_channels', type=str, default=38)
    parser.add_argument('--input-width', type=int, default=368)
    parser.add_argument('--input-height', type=int, default=368)
    parser.add_argument('--max_echos', type=str, default=5)
    parser.add_argument('--use_bn', type=bool, default=False)
    parser.add_argument('--loss_func', type=str, default='l2')
    args = parser.parse_args()

    if not args.continue_training:
        start_time = time.localtime(time.time())
        checkpoint_path = args.checkpoint_path + ('%d-%d-%d-%d-%d-%d' %
                                                  start_time[0:6])
        os.mkdir(checkpoint_path)
    else:
        checkpoint_path = args.checkpoint_path

    logger = logging.getLogger('train')
    logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler(checkpoint_path + '/train_log.log')
    fh.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    logger.addHandler(fh)
    logger.info(args)
    logger.info('checkpoint_path: ' + checkpoint_path)

    # define input placeholder
    with tf.name_scope('inputs'):
        raw_img = tf.placeholder(tf.float32,
                                 shape=[args.batch_size, 368, 368, 3])
        # mask_hm = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.hm_channels])
        # mask_paf = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.paf_channels])
        hm = tf.placeholder(dtype=tf.float32,
                            shape=[args.batch_size, 46, 46, args.hm_channels])
        paf = tf.placeholder(
            dtype=tf.float32,
            shape=[args.batch_size, 46, 46, args.paf_channels])

    # defien data loader
    logger.info('initializing data loader...')
    set_network_input_wh(args.input_width, args.input_height)
    scale = 8
    set_network_scale(scale)
    df = get_dataflow_batch(args.annot_path,
                            True,
                            args.batch_size,
                            img_path=args.img_path)
    steps_per_echo = df.size()
    enqueuer = DataFlowToQueue(df, [raw_img, hm, paf], queue_size=100)
    q_inp, q_heat, q_vect = enqueuer.dequeue()
    q_inp_split, q_heat_split, q_vect_split = tf.split(q_inp, 1), tf.split(
        q_heat, 1), tf.split(q_vect, 1)
    img_normalized = q_inp_split[0] / 255 - 0.5  # [-0.5, 0.5]

    df_valid = get_dataflow_batch(args.annot_path,
                                  False,
                                  args.batch_size,
                                  img_path=args.img_path)
    df_valid.reset_state()
    validation_cache = []

    logger.info('initializing model...')
    # define vgg19
    # with slim.arg_scope(vgg.vgg_arg_scope()):
    #     vgg_outputs, end_points = vgg.vgg_19(img_normalized)
    #     with slim.arg_scope(mobilenet_v2.training_scope(is_training=False)):
    #         logits, endpoints = mobilenet_v2.mobilenet(img_normalized)
    layers = {}
    name = ""
    with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
        logits, endpoints = mobilenet_v2.mobilenet(img_normalized)
        for k, tensor in sorted(list(endpoints.items()), key=lambda x: x[0]):
            layers['%s%s' % (name, k)] = tensor
            # print(k, tensor.shape)
    def upsample(input, target):
        return tf.image.resize_bilinear(
            input,
            tf.constant([target.shape[1].value, target.shape[2].value]),
            align_corners=False)

    mobilenet_feature = tf.concat([
        layers['layer_7/output'],
        upsample(layers['layer_14/output'], layers['layer_7/output'])
    ], 3)

    # pdb.set_trace()
    # get net graph
    net = PafNet(inputs_x=mobilenet_feature,
                 stage_num=args.stage_num,
                 hm_channel_num=args.hm_channels,
                 use_bn=args.use_bn)
    hm_pre, paf_pre, added_layers_out = net.gen_net()

    # two kinds of loss
    losses = []
    with tf.name_scope('loss'):
        for idx, (l1, l2), in enumerate(zip(hm_pre, paf_pre)):
            if args.loss_func == 'square':
                hm_loss = tf.reduce_sum(
                    tf.square(tf.concat(l1, axis=0) - q_heat_split[0]))
                paf_loss = tf.reduce_sum(
                    tf.square(tf.concat(l2, axis=0) - q_vect_split[0]))
                losses.append(tf.reduce_sum([hm_loss, paf_loss]))
                logger.info('use square loss')
            else:
                hm_loss = tf.nn.l2_loss(
                    tf.concat(l1, axis=0) - q_heat_split[0])
                paf_loss = tf.nn.l2_loss(
                    tf.concat(l2, axis=0) - q_vect_split[0])
                losses.append(tf.reduce_mean([hm_loss, paf_loss]))
                logger.info('use l2 loss')
        loss = tf.reduce_sum(losses) / args.batch_size

    global_step = tf.Variable(0, name='global_step', trainable=False)
    learning_rate = tf.train.exponential_decay(1e-4,
                                               global_step,
                                               steps_per_echo,
                                               0.5,
                                               staircase=True)
    trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='openpose_layers')
    if args.train_vgg:
        trainable_var_list = trainable_var_list + tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='MobilenetV2')
    with tf.name_scope('train'):
        train = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                       epsilon=1e-8).minimize(
                                           loss=loss,
                                           global_step=global_step,
                                           var_list=trainable_var_list)
    logger.info('initialize saver...')
    restorer = tf.train.Saver(tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='MobilenetV2'),
                              name='mobilenet_restorer')
    saver = tf.train.Saver(trainable_var_list)

    logger.info('initialize tensorboard')
    tf.summary.scalar("lr", learning_rate)
    tf.summary.scalar("loss2", loss)
    tf.summary.histogram('img_normalized', img_normalized)
    tf.summary.histogram('mobilenet_outputs', logits)
    tf.summary.histogram('added_layers_out', added_layers_out)
    tf.summary.image('mobilenet_out',
                     tf.transpose(logits[0:1, :, :, :], perm=[3, 1, 2, 0]),
                     max_outputs=512)
    tf.summary.image('added_layers_out',
                     tf.transpose(added_layers_out[0:1, :, :, :],
                                  perm=[3, 1, 2, 0]),
                     max_outputs=128)
    tf.summary.image('paf_gt',
                     tf.transpose(q_vect_split[0][0:1, :, :, :],
                                  perm=[3, 1, 2, 0]),
                     max_outputs=38)
    tf.summary.image('hm_gt',
                     tf.transpose(q_heat_split[0][0:1, :, :, :],
                                  perm=[3, 1, 2, 0]),
                     max_outputs=19)
    for i in range(args.stage_num):
        tf.summary.image('hm_pre_stage_%d' % i,
                         tf.transpose(hm_pre[i][0:1, :, :, :],
                                      perm=[3, 1, 2, 0]),
                         max_outputs=19)
        tf.summary.image('paf_pre_stage_%d' % i,
                         tf.transpose(paf_pre[i][0:1, :, :, :],
                                      perm=[3, 1, 2, 0]),
                         max_outputs=38)
    tf.summary.image('input', img_normalized, max_outputs=4)

    logger.info('initialize session...')
    merged = tf.summary.merge_all()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        writer = tf.summary.FileWriter(checkpoint_path, sess.graph)
        sess.run(tf.group(tf.global_variables_initializer()))
        if args.backbone_net_ckpt_path is not None:
            logger.info('restoring mobilenet weights from %s' %
                        args.backbone_net_ckpt_path)
            restorer.restore(sess, args.backbone_net_ckpt_path)
        if args.continue_training:
            saver.restore(
                sess,
                tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path))
            logger.info('restoring from checkpoint...')
        logger.info('start training...')
        coord = tf.train.Coordinator()
        enqueuer.set_coordinator(coord)
        enqueuer.start()
        while True:
            best_checkpoint = float('inf')
            for _ in tqdm(range(steps_per_echo), ):
                total_loss, _, gs_num = sess.run([loss, train, global_step])
                echo = gs_num / steps_per_echo

                if gs_num % args.save_summary_frequency == 0:
                    total_loss, gs_num, summary, lr = sess.run(
                        [loss, global_step, merged, learning_rate])
                    writer.add_summary(summary, gs_num)
                    logger.info('echos=%f, setp=%d, total_loss=%f, lr=%f' %
                                (echo, gs_num, total_loss, lr))

                if gs_num % args.save_checkpoint_frequency == 0:
                    valid_loss = 0
                    if len(validation_cache) == 0:
                        for images_test, heatmaps, vectmaps in tqdm(
                                df_valid.get_data()):
                            validation_cache.append(
                                (images_test, heatmaps, vectmaps))
                        df_valid.reset_state()
                        del df_valid
                        df_valid = None

                    for images_test, heatmaps, vectmaps in validation_cache:
                        valid_loss += sess.run(loss,
                                               feed_dict={
                                                   q_inp: images_test,
                                                   q_vect: vectmaps,
                                                   q_heat: heatmaps
                                               })

                    if valid_loss / len(validation_cache) <= best_checkpoint:
                        best_checkpoint = valid_loss / len(validation_cache)
                        saver.save(sess,
                                   save_path=checkpoint_path + '/' + 'model',
                                   global_step=gs_num)
                        logger.info(
                            'best_checkpoint = %f, saving checkpoint to ' %
                            best_checkpoint + checkpoint_path + '/' +
                            'model-%d' % gs_num)

                    else:
                        logger.info('loss = %f drop' % valid_loss /
                                    len(validation_cache))

                if echo >= args.max_echos:
                    sess.close()
                    return 0
from pose_dataset import get_dataflow_batch
from pose_augment import set_network_input_wh, set_network_scale

if __name__ == '__main__':
    """
    OpenPose Data Preparation might be a bottleneck for training.
    You can run multiple workers to generate input batches in multi-nodes to make training process faster.
    """
    parser = argparse.ArgumentParser(
        description='Worker for preparing input batches.')
    parser.add_argument('--datapath', type=str, default='/coco/annotations/')
    parser.add_argument('--imgpath', type=str, default='/coco/')
    parser.add_argument('--batchsize', type=int, default=64)
    parser.add_argument('--train', type=bool, default=True)
    parser.add_argument('--master',
                        type=str,
                        default='tcp://csi-cluster-gpu20.dakao.io:1027')
    parser.add_argument('--input-width', type=int, default=368)
    parser.add_argument('--input-height', type=int, default=368)
    parser.add_argument('--scale-factor', type=int, default=2)
    args = parser.parse_args()

    set_network_input_wh(args.input_width, args.input_height)
    set_network_scale(args.scale_factor)

    df = get_dataflow_batch(args.datapath, args.train, args.batchsize,
                            args.imgpath)

    send_dataflow_zmq(df, args.master, hwm=10)
import argparse

from tensorpack.dataflow.remote import send_dataflow_zmq

from pose_dataset import get_dataflow_batch
from pose_augment import set_network_input_wh, set_network_scale

if __name__ == '__main__':
    """
    OpenPose Data Preparation might be a bottleneck for training.
    You can run multiple workers to generate input batches in multi-nodes to make training process faster.
    """
    parser = argparse.ArgumentParser(description='Worker for preparing input batches.')
    parser.add_argument('--datapath', type=str, default='/coco/annotations/')
    parser.add_argument('--imgpath', type=str, default='/coco/')
    parser.add_argument('--batchsize', type=int, default=64)
    parser.add_argument('--train', type=bool, default=True)
    parser.add_argument('--master', type=str, default='tcp://csi-cluster-gpu20.dakao.io:1027')
    parser.add_argument('--input-width', type=int, default=368)
    parser.add_argument('--input-height', type=int, default=368)
    parser.add_argument('--scale-factor', type=int, default=2)
    args = parser.parse_args()

    set_network_input_wh(args.input_width, args.input_height)
    set_network_scale(args.scale_factor)

    df = get_dataflow_batch(args.datapath, args.train, args.batchsize, args.imgpath)

    send_dataflow_zmq(df, args.master, hwm=10)
Beispiel #6
0
    parser.add_argument('--input-width', type=int, default=368)
    parser.add_argument('--input-height', type=int, default=368)
    args = parser.parse_args()

    if args.gpus <= 0:
        raise Exception('gpus <= 0')

    # define input placeholder
    set_network_input_wh(args.input_width, args.input_height)
    scale = 4

    if args.model in ['cmu', 'vgg', 'mobilenet_thin', 'mobilenet_try', 'mobilenet_try2', 'mobilenet_try3', 'hybridnet_try']:
        scale = 8

    set_network_scale(scale)
    output_w, output_h = args.input_width // scale, args.input_height // scale

    logger.info('define model+')
    with tf.device(tf.DeviceSpec(device_type="GPU", device_index=0)):
        input_node = tf.placeholder(tf.float32, shape=(args.batchsize, args.input_height, args.input_width, 3), name='image')
        vectmap_node = tf.placeholder(tf.float32, shape=(args.batchsize, output_h, output_w, 38), name='vectmap')
        heatmap_node = tf.placeholder(tf.float32, shape=(args.batchsize, output_h, output_w, 19), name='heatmap')

        # prepare data
        if not args.remote_data:
            df = get_dataflow_batch(args.datapath, True, args.batchsize, img_path=args.imgpath)
        else:
            # transfer inputs from ZMQ
            df = RemoteDataZMQ(args.remote_data, hwm=3)
        enqueuer = DataFlowToQueue(df, [input_node, heatmap_node, vectmap_node], queue_size=100)