예제 #1
0
 def get_roidb(imdb_name):
     imdb = get_imdb(imdb_name)
     print 'Loaded dataset `{:s}` for training'.format(imdb.name)
     imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
     print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)
     roidb = get_training_roidb(imdb)
     return roidb
예제 #2
0
def train_main(data_dir, model_dir, train_steps, input_yaml):
    cfg_from_file(input_yaml)
    print('Using config:')
    pprint.pprint(cfg)

    imdb = get_imdb('voc_2007_trainval')
    print('Loaded dataset `{:s}` for training'.format(imdb.name))
    roidb = get_training_roidb(imdb)

    output_dir = get_output_dir(imdb, None)
    log_dir = get_log_dir(imdb)
    print('Output will be saved to `{:s}`'.format(output_dir))
    print('Logs will be saved to `{:s}`'.format(log_dir))
    device_name = '/gpu:0'
    print(device_name)

    network = get_network('VGGnet_train')

    train_net(network,
              imdb,
              roidb,
              output_dir=output_dir,
              log_dir=log_dir,
              pretrained_model='data/pretrain_model/VGG_imagenet.npy',
              max_iters=int(cfg.TRAIN.max_steps),
              restore=bool(int(cfg.TRAIN.restore)))
sys.path.append(os.getcwd())
this_dir = os.path.dirname(__file__)

from lib.fast_rcnn.train import get_training_roidb, train_net
from lib.fast_rcnn.config import cfg_from_file, get_output_dir, get_log_dir
from lib.datasets.factory import get_imdb
from lib.networks.factory import get_network
from lib.fast_rcnn.config import cfg

if __name__ == '__main__':
    cfg_from_file('ctpn/text.yml')
    print('Using config:')
    pprint.pprint(cfg)
    imdb = get_imdb('voc_2007_trainval')
    print('Loaded dataset `{:s}` for training'.format(imdb.name))
    roidb = get_training_roidb(imdb)

    output_dir = get_output_dir(imdb, None)
    log_dir = get_log_dir(imdb)
    print('Output will be saved to `{:s}`'.format(output_dir))
    print('Logs will be saved to `{:s}`'.format(log_dir))

    device_name = '/gpu:0'
    print(device_name)

    network = get_network('VGGnet_train')

    train_net(network,
              imdb,
              roidb,
              output_dir=output_dir,
예제 #4
0
sys.path.append(os.getcwd())
this_dir = os.path.dirname(__file__)

from lib.fast_rcnn.train import get_training_roidb, train_net
from lib.fast_rcnn.config import cfg_from_file, get_output_dir, get_log_dir
from lib.datasets.factory import get_imdb
from lib.networks.factory import get_network
from lib.fast_rcnn.config import cfg

if __name__ == '__main__':
    cfg_from_file('ctpn/text.yml')
    print('Using config:')
    pprint.pprint(cfg)
    imdb = get_imdb('voc_2007_trainval')
    print('Loaded dataset `{:s}` for training'.format(imdb.name))
    roidb = get_training_roidb(imdb)

    output_dir = get_output_dir(imdb, None)
    log_dir = get_log_dir(imdb)
    print('Output will be saved to `{:s}`'.format(output_dir))
    print('Logs will be saved to `{:s}`'.format(log_dir))

    device_name = '/gpu:0'
    print(device_name)

    network = get_network('VGGnet_train')

    train_net(network, imdb, roidb,
              output_dir=output_dir,
              log_dir=log_dir,
              pretrained_model='data/pretrain_model/VGG_imagenet.npy',
예제 #5
0
파일: train.py 프로젝트: UpCoder/MyCTPN
def train(max_step=50000,
          pretrained_model=None,
          restore=None,
          output_dir='./trained_model'):
    cfg_from_file('/home/give/PycharmProjects/MyCTPN/ctpn/text.yml')
    cfg.TRAIN.DISPLAY = 1
    imdb = get_imdb('voc_2007_trainval')
    print('Loaded dataset `{:s}` for training'.format(imdb.name))
    roidb = get_training_roidb(imdb)

    print('Computing bounding-box regression targets...')
    if cfg.TRAIN.BBOX_REG:
        bbox_means, bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb)
    print('done')
    data_layer = get_data_layer(roidb, imdb.num_classes)

    vggModel = VGGTrainModel(trainable=True)
    dataset = Dataset('/home/give/Game/OCR/data/ICDAR2017/img',
                      '/home/give/Game/OCR/data/ICDAR2017/txt')
    train_generator = dataset.train_generator
    val_generator = dataset.val_generator
    total_loss, model_loss, rpn_regression_l1_loss, rpn_cross_entropy_loss, regularization_loss = vggModel.build_loss(
    )
    tf.summary.scalar('total_loss', total_loss)
    tf.summary.scalar('model_loss', model_loss)
    tf.summary.scalar('regularization_loss', regularization_loss)

    global_step = tf.Variable(0, trainable=False)
    lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False)
    if cfg.TRAIN.SOLVER == 'Adam':
        opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE)
    elif cfg.TRAIN.SOLVER == 'RMS':
        opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE)
    else:
        # lr = tf.Variable(0.0, trainable=False)
        momentum = cfg.TRAIN.MOMENTUM
        opt = tf.train.MomentumOptimizer(lr, momentum)
    with_clip = True
    if with_clip:
        tvars = tf.trainable_variables()
        grads, norm = tf.clip_by_global_norm(tf.gradients(total_loss, tvars),
                                             10.0)
        train_op = opt.apply_gradients(list(zip(grads, tvars)),
                                       global_step=global_step)
    else:
        train_op = opt.minimize(total_loss, global_step=global_step)
    # tf_config = tf.ConfigProto(allow_soft_placement=True)
    # tf_config.gpu_options.allow_growth = True
    # tf_config.gpu_options.per_process_gpu_memory_fraction = 0.75
    start = 0
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=5)
        if pretrained_model is not None and restore is None:
            try:
                print(('Loading pretrained model '
                       'weights from {:s}').format(pretrained_model))
                load(pretrained_model, sess, saver, ignore_missing=True)
            except:
                raise Exception('Check your pretrained model {:s}'.format(
                    pretrained_model))
        if pretrained_model is None and restore is not None:
            try:
                print(('Loading pretrained model '
                       'weights from {:s}').format(restore))
                global_step_restore = load(restore, sess, saver)
                sess.run(global_step.assign(global_step_restore))
                start = global_step_restore
            except:
                raise Exception(
                    'Check your pretrained model {:s}'.format(restore))
        summary_op = tf.summary.merge_all()
        timer = Timer()
        for iter in range(start, max_step):
            timer.tic()
            imageGT = train_generator.next()
            blobs = data_layer.forward()
            fetch_list = [
                train_op, total_loss, model_loss, rpn_regression_l1_loss,
                rpn_cross_entropy_loss, regularization_loss, summary_op
            ]
            feed_dict_obj = {
                vggModel.input_img: imageGT.re_imgs,
                vggModel.input_gt: imageGT.gt_bboxes,
                vggModel.input_img_info: imageGT.im_info,
                vggModel.input_is_hard: imageGT.is_hard,
                vggModel.input_notcare: np.reshape(imageGT.notcare, [-1, 4]),
                vggModel.input_keepprob: 0.5
            }
            _, total_loss_val, model_loss_val, rpn_regression_l1_loss_value, rpn_cross_entropy_loss_value, regularization_loss_val, summary_op_value = sess.run(
                fetch_list, feed_dict=feed_dict_obj)
            if DEBUG:
                outputs = sess.run([
                    vggModel.input_img, vggModel.conv1_1, vggModel.conv1_2,
                    vggModel.conv2_1, vggModel.conv2_2, vggModel.conv3_1,
                    vggModel.conv3_2, vggModel.conv3_3, vggModel.conv4_1,
                    vggModel.conv4_2, vggModel.conv4_3, vggModel.conv5_1,
                    vggModel.conv5_2, vggModel.conv5_3, vggModel.rpn_conv,
                    vggModel.lstm_o, vggModel.lstm_bilstm, vggModel.lstm_fc,
                    vggModel.rpn_bbox_score, vggModel.rpn_bbox_pred,
                    vggModel.rpn_data[0], vggModel.rpn_data[1],
                    vggModel.rpn_data[2], vggModel.rpn_data[3],
                    vggModel.rpn_cls_prob
                ],
                                   feed_dict=feed_dict_obj)
                layer_names = [
                    'conv1_1',
                    'conv1_2',
                    'conv2_1',
                    'conv2_2',
                    'conv3_1',
                    'conv3_2',
                    'conv3_3',
                    'conv4_1',
                    'conv4_2',
                    'conv4_3',
                    'conv5_1',
                    'conv5_2',
                    'conv5_3',
                ]
                weights_biases_names = []
                for i in range(13):
                    with tf.variable_scope(layer_names[i], reuse=True):
                        weights_biases_names.append(tf.get_variable('weights'))
                        weights_biases_names.append(tf.get_variable('biases'))
                # tf.gradients()
                parameters_output = sess.run(weights_biases_names,
                                             feed_dict=feed_dict_obj)
                for i in range(13):
                    print '%s max is %.4f, min is %.4f' % (
                        layer_names[i], np.max(
                            outputs[i + 1]), np.min(outputs[i + 1]))
                    print 'responding parameters max is %.4f, min is %.4f' % (
                        np.max(parameters_output[i]),
                        np.min(parameters_output[i]))
                print 'rpn_conv', np.max(outputs[14]), np.min(outputs[14])
                print 'lstm_o', np.max(outputs[15]), np.min(outputs[15])
                print 'lstm_bilstm', np.max(outputs[16]), np.min(outputs[16])
                print 'lstm_fc', np.max(outputs[17]), np.min(outputs[17])
                print 'rpn bbox score ', np.max(outputs[18]), np.min(
                    outputs[18])
                print 'rpn bbox pre', np.max(outputs[19]), np.min(outputs[19])
                print 'rpn_labels', np.max(outputs[20]), np.min(outputs[20])
                print 'rpn_bbox_targets', np.max(outputs[21]), np.min(
                    outputs[21])
                print 'rpn_bbox_inside_weights', np.max(outputs[22]), np.min(
                    outputs[22])
                print 'rpn_bbox_outside_weights', np.max(outputs[23]), np.min(
                    outputs[23])
                print 'rpn_cls_prob', np.max(outputs[24]), np.min(outputs[24])
                # 1717
            _diff_time = timer.toc(average=False)
            if (iter) % (cfg.TRAIN.DISPLAY) == 0:
                print(
                'iter: %d / %d, total loss: %.4f, model loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, lr: %f' % \
                (iter, max_step, total_loss_val, model_loss_val, rpn_cross_entropy_loss_value,
                 regularization_loss_val, lr.eval()))
                print('speed: {:.3f}s / iter'.format(_diff_time))
            if np.isnan(total_loss_val):
                label_value, score_value, rpn_cross_entropy_loss_value = sess.run(
                    [
                        vggModel.rpn_data[0], vggModel.rpn_cls_score_reshape,
                        vggModel.rpn_cross_entropy_loss
                    ],
                    feed_dict={
                        vggModel.input_img:
                        imageGT.re_imgs,
                        vggModel.input_gt:
                        imageGT.gt_bboxes,
                        vggModel.input_img_info:
                        imageGT.im_info,
                        vggModel.input_is_hard:
                        imageGT.is_hard,
                        vggModel.input_notcare:
                        np.reshape(imageGT.notcare, [-1, 4]),
                        vggModel.input_keepprob:
                        0.5
                    })
                print 'label value is \n', label_value
                print 'score value is \n', score_value
                print 'rpn_cross_entropy_loss_value is \n', rpn_cross_entropy_loss_value
                # print 'label value is \n', label_value
                assert not np.isnan(total_loss_val)
            if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = iter
                save(sess,
                     saver=saver,
                     output_dir=output_dir,
                     prefix='VGG',
                     infix='_',
                     iter_index=last_snapshot_iter)
예제 #6
0
def train(rpn_data,
          roi_data,
          rpn_cls_score_reshape,
          rpn_bbox_pred,
          feature_map,
          cls_score,
          bbox_pred,
          input_image_tensor,
          input_gt_box_tensor,
          input_im_info_tensor,
          pretrain_model=None):
    output_dir = '/home/give/PycharmProjects/MyFasterRCNN/parameters'
    saver = tf.train.Saver(max_to_keep=5)
    # RPN
    # classification loss(fg bg)
    rpn_cls_score = tf.reshape(rpn_cls_score_reshape, [-1, 2])
    rpn_label = tf.reshape(rpn_data[0], [-1])
    rpn_cls_score = tf.reshape(
        tf.gather(rpn_cls_score, tf.where(tf.not_equal(rpn_label, -1))),
        [-1, 2])
    rpn_label = tf.reshape(
        tf.gather(rpn_label, tf.where(tf.not_equal(rpn_label, -1))), [-1])
    rpn_cross_entropy = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score,
                                                       labels=rpn_label))

    # bounding box regression L1 loss
    rpn_bbox_pred = rpn_bbox_pred
    rpn_bbox_targets = tf.transpose(rpn_data[1], [0, 2, 3, 1])
    rpn_bbox_inside_weights = tf.transpose(rpn_data[2], [0, 2, 3, 1])
    rpn_bbox_outside_weights = tf.transpose(rpn_data[3], [0, 2, 3, 1])

    rpn_smooth_l1 = _modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets,
                                        rpn_bbox_inside_weights,
                                        rpn_bbox_outside_weights)
    rpn_loss_box = tf.reduce_mean(
        tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3]))

    # classification loss
    label = tf.reshape(roi_data[1], [-1])
    cross_entropy = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score,
                                                       labels=label))

    smooth_l1 = _modified_smooth_l1(1.0, bbox_pred, roi_data[2], roi_data[3],
                                    roi_data[4])
    loss_box = tf.reduce_mean(tf.reduce_sum(smooth_l1, reduction_indices=[1]))

    loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box

    global_step = tf.Variable(0, trainable=False)
    lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE,
                                    global_step,
                                    cfg.TRAIN.STEPSIZE,
                                    0.001,
                                    staircase=True)
    momentum = cfg.TRAIN.MOMENTUM
    train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(
        loss, global_step=global_step)
    with tf.Session() as sess:
        args = parse_args()
        from lib.datasets.factory import get_imdb
        from lib.fast_rcnn.train import get_training_roidb, filter_roidb
        from lib.fast_rcnn.train import get_data_layer
        import lib.roi_data_layer.roidb as rdl_roidb
        import numpy as np
        from lib.fast_rcnn.config import cfg_from_file, cfg_from_list
        if args.cfg_file is not None:
            cfg_from_file(args.cfg_file)
        if args.set_cfgs is not None:
            cfg_from_list(args.set_cfgs)
        imdb = get_imdb('voc_2007_trainval')
        roidb = get_training_roidb(imdb)
        roidb = filter_roidb(roidb)
        bbox_means, bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb)
        data_layer = get_data_layer(roidb, imdb.num_classes)
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        if pretrain_model is not None:
            print('Loading pretrained model '
                  'weights from {:s}').format(pretrain_model)
            load(pretrain_model, sess, saver, True)
        for iter_index in range(args.max_iters):
            blobs = data_layer.forward()
            # blobs keys include: data, gt_boxesm im_info
            _, rpn_bbox_targets_value, rpn_bbox_inside_weights_value, rpn_bbox_outside_weights_value = sess.run(
                [
                    train_op, rpn_bbox_targets, rpn_bbox_inside_weights,
                    rpn_bbox_outside_weights
                ],
                feed_dict={
                    input_image_tensor: blobs['data'],
                    input_gt_box_tensor: blobs['gt_boxes'],
                    input_im_info_tensor: blobs['im_info']
                })
            rpn_label_value, loss_value, loss_box_value, cross_entropy_value, rpn_cross_entropy_value, rpn_loss_box_value = sess.run(
                [
                    rpn_label, loss, loss_box, cross_entropy,
                    rpn_cross_entropy, rpn_loss_box
                ],
                feed_dict={
                    input_image_tensor: blobs['data'],
                    input_gt_box_tensor: blobs['gt_boxes'],
                    input_im_info_tensor: blobs['im_info']
                })
            if iter_index % 100 == 0:
                print 'iter: %d / %d' % (iter_index, args.max_iters)
                print 'total loss: %.4f, rpn cross entropy: %.4f,  rpn_loss_box: %.4f, cross entroopy: %.4f, loss box: %.4f' % (
                    loss_value, rpn_cross_entropy_value, rpn_loss_box_value,
                    cross_entropy_value, loss_box_value)
                # print np.shape(rpn_label_value), rpn_label_value
                if iter_index == 0:
                    continue
                infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
                         if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
                filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix +
                            '_iter_{:d}'.format(iter_index + 1) + '.ckpt')
                filename = os.path.join(output_dir, filename)

                saver.save(sess, filename)