def __init__(self,
                 sess,
                 network,
                 imdb,
                 roidb,
                 output_dir,
                 logdir,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.net = network
        self.imdb = imdb
        self.roidb = roidb
        self.output_dir = output_dir
        self.pretrained_model = pretrained_model

        print('Computing bounding-box regression targets...')
        if cfg.TRAIN.BBOX_REG:
            self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(
                roidb)
        print('done')

        # For checkpoint
        self.saver = tf.train.Saver(max_to_keep=100,
                                    write_version=tf.train.SaverDef.V2)
        self.writer = tf.summary.FileWriter(logdir=logdir,
                                            graph=tf.get_default_graph(),
                                            flush_secs=5)
Beispiel #2
0
    def __init__(self,
                 sess,
                 network,
                 imdb,
                 roidb,
                 output_dir,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.net = network
        self.imdb = imdb
        self.roidb = roidb
        self.output_dir = output_dir
        self.pretrained_model = pretrained_model

        print('Computing bounding-box regression targets...')
        if cfg.ZLRM.TRAIN.BBOX_REG:
            self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(
                roidb)
        print('done')

        # For checkpoint
        # variables = tf.all_variables()
        # print('---------------', variables)
        # variables_to_resotre = [v for v in variables if (v.name.split('/')[0] != 'conv_new_1' and v.name.split('/')[0] !='rfcn_cls' and v.name.split('/')[0] !='rfcn_bbox')]
        # print('=============', variables_to_resotre)
        self.saver = tf.train.Saver(max_to_keep=100)
        self.rpn_restor_saver = tf.train.Saver()
Beispiel #3
0
    def __init__(self, solver_prototxt, roidb, output_dir,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.output_dir = output_dir

        if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and
            cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
            # RPN can only use precomputed normalization because there are no
            # fixed statistics to compute a priori
            assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED

        if cfg.TRAIN.BBOX_REG:
            print 'Computing bounding-box regression targets...'
            self.bbox_means, self.bbox_stds = \
                    rdl_roidb.add_bbox_regression_targets(roidb)
            print 'done'

        self.solver = caffe.SGDSolver(solver_prototxt)
        if pretrained_model is not None:
            print ('Loading pretrained model '
                   'weights from {:s}').format(pretrained_model)
            self.solver.net.copy_from(pretrained_model)

        self.solver_param = caffe_pb2.SolverParameter()
        with open(solver_prototxt, 'rt') as f:
            pb2.text_format.Merge(f.read(), self.solver_param)

        self.solver.net.layers[0].set_roidb(roidb)
Beispiel #4
0
    def __init__(self, sess, network, imdb, roidb, output_dir, logdir, pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.net = network
        self.imdb = imdb
        self.roidb = roidb
        self.output_dir = output_dir
        self.pretrained_model = pretrained_model

        print('Computing bounding-box regression targets...')
        if cfg.TRAIN.BBOX_REG:
            self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb)
        print('done')

        # For checkpoint
        self.saver = tf.train.Saver(max_to_keep=100,write_version=tf.train.SaverDef.V2)
        self.writer = tf.summary.FileWriter(logdir=logdir,
                                             graph=tf.get_default_graph(),
                                             flush_secs=5)
Beispiel #5
0
    def __init__(self,
                 sess,
                 network,
                 imdb,
                 roidb,
                 output_dir,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.net = network
        self.imdb = imdb
        self.roidb = roidb
        self.output_dir = output_dir
        self.pretrained_model = pretrained_model

        print('Computing bounding-box regression targets...')
        if cfg.ZLRM.TRAIN.BBOX_REG:
            self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(
                roidb)
        print('done')

        self.saver = tf.train.Saver(max_to_keep=100)
        self.rpn_restor_saver = tf.train.Saver()
Beispiel #6
0
    def __init__(self,
                 sess,
                 saver,
                 network,
                 imdb,
                 roidb,
                 output_dir,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.net = network
        self.imdb = imdb
        self.roidb = roidb
        self.output_dir = output_dir
        self.pretrained_model = pretrained_model

        print 'Computing bounding-box regression targets...'
        if cfg.TRAIN.BBOX_REG:
            self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(
                roidb)
        print 'done'

        # For checkpoint
        self.saver = saver
Beispiel #7
0
    def __init__(self,
                 solver_prototxt,
                 roidb,
                 output_dir,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.output_dir = output_dir

        print 'Computing bounding-box regression targets...'
        self.bbox_means, self.bbox_stds = \
                rdl_roidb.add_bbox_regression_targets(roidb)
        print 'done'

        self.solver = caffe.SGDSolver(solver_prototxt)
        if pretrained_model is not None:
            print('Loading pretrained model '
                  'weights from {:s}').format(pretrained_model)
            self.solver.net.copy_from(pretrained_model)

        self.solver_param = caffe_pb2.SolverParameter()
        with open(solver_prototxt, 'rt') as f:
            pb2.text_format.Merge(f.read(), self.solver_param)

        self.solver.net.layers[0].set_roidb(roidb)
Beispiel #8
0
def train(max_step=50000,
          pretrained_model=None,
          restore=None,
          output_dir='./trained_model'):
    cfg_from_file('/home/give/PycharmProjects/MyCTPN/ctpn/text.yml')
    cfg.TRAIN.DISPLAY = 1
    imdb = get_imdb('voc_2007_trainval')
    print('Loaded dataset `{:s}` for training'.format(imdb.name))
    roidb = get_training_roidb(imdb)

    print('Computing bounding-box regression targets...')
    if cfg.TRAIN.BBOX_REG:
        bbox_means, bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb)
    print('done')
    data_layer = get_data_layer(roidb, imdb.num_classes)

    vggModel = VGGTrainModel(trainable=True)
    dataset = Dataset('/home/give/Game/OCR/data/ICDAR2017/img',
                      '/home/give/Game/OCR/data/ICDAR2017/txt')
    train_generator = dataset.train_generator
    val_generator = dataset.val_generator
    total_loss, model_loss, rpn_regression_l1_loss, rpn_cross_entropy_loss, regularization_loss = vggModel.build_loss(
    )
    tf.summary.scalar('total_loss', total_loss)
    tf.summary.scalar('model_loss', model_loss)
    tf.summary.scalar('regularization_loss', regularization_loss)

    global_step = tf.Variable(0, trainable=False)
    lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False)
    if cfg.TRAIN.SOLVER == 'Adam':
        opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE)
    elif cfg.TRAIN.SOLVER == 'RMS':
        opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE)
    else:
        # lr = tf.Variable(0.0, trainable=False)
        momentum = cfg.TRAIN.MOMENTUM
        opt = tf.train.MomentumOptimizer(lr, momentum)
    with_clip = True
    if with_clip:
        tvars = tf.trainable_variables()
        grads, norm = tf.clip_by_global_norm(tf.gradients(total_loss, tvars),
                                             10.0)
        train_op = opt.apply_gradients(list(zip(grads, tvars)),
                                       global_step=global_step)
    else:
        train_op = opt.minimize(total_loss, global_step=global_step)
    # tf_config = tf.ConfigProto(allow_soft_placement=True)
    # tf_config.gpu_options.allow_growth = True
    # tf_config.gpu_options.per_process_gpu_memory_fraction = 0.75
    start = 0
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=5)
        if pretrained_model is not None and restore is None:
            try:
                print(('Loading pretrained model '
                       'weights from {:s}').format(pretrained_model))
                load(pretrained_model, sess, saver, ignore_missing=True)
            except:
                raise Exception('Check your pretrained model {:s}'.format(
                    pretrained_model))
        if pretrained_model is None and restore is not None:
            try:
                print(('Loading pretrained model '
                       'weights from {:s}').format(restore))
                global_step_restore = load(restore, sess, saver)
                sess.run(global_step.assign(global_step_restore))
                start = global_step_restore
            except:
                raise Exception(
                    'Check your pretrained model {:s}'.format(restore))
        summary_op = tf.summary.merge_all()
        timer = Timer()
        for iter in range(start, max_step):
            timer.tic()
            imageGT = train_generator.next()
            blobs = data_layer.forward()
            fetch_list = [
                train_op, total_loss, model_loss, rpn_regression_l1_loss,
                rpn_cross_entropy_loss, regularization_loss, summary_op
            ]
            feed_dict_obj = {
                vggModel.input_img: imageGT.re_imgs,
                vggModel.input_gt: imageGT.gt_bboxes,
                vggModel.input_img_info: imageGT.im_info,
                vggModel.input_is_hard: imageGT.is_hard,
                vggModel.input_notcare: np.reshape(imageGT.notcare, [-1, 4]),
                vggModel.input_keepprob: 0.5
            }
            _, total_loss_val, model_loss_val, rpn_regression_l1_loss_value, rpn_cross_entropy_loss_value, regularization_loss_val, summary_op_value = sess.run(
                fetch_list, feed_dict=feed_dict_obj)
            if DEBUG:
                outputs = sess.run([
                    vggModel.input_img, vggModel.conv1_1, vggModel.conv1_2,
                    vggModel.conv2_1, vggModel.conv2_2, vggModel.conv3_1,
                    vggModel.conv3_2, vggModel.conv3_3, vggModel.conv4_1,
                    vggModel.conv4_2, vggModel.conv4_3, vggModel.conv5_1,
                    vggModel.conv5_2, vggModel.conv5_3, vggModel.rpn_conv,
                    vggModel.lstm_o, vggModel.lstm_bilstm, vggModel.lstm_fc,
                    vggModel.rpn_bbox_score, vggModel.rpn_bbox_pred,
                    vggModel.rpn_data[0], vggModel.rpn_data[1],
                    vggModel.rpn_data[2], vggModel.rpn_data[3],
                    vggModel.rpn_cls_prob
                ],
                                   feed_dict=feed_dict_obj)
                layer_names = [
                    'conv1_1',
                    'conv1_2',
                    'conv2_1',
                    'conv2_2',
                    'conv3_1',
                    'conv3_2',
                    'conv3_3',
                    'conv4_1',
                    'conv4_2',
                    'conv4_3',
                    'conv5_1',
                    'conv5_2',
                    'conv5_3',
                ]
                weights_biases_names = []
                for i in range(13):
                    with tf.variable_scope(layer_names[i], reuse=True):
                        weights_biases_names.append(tf.get_variable('weights'))
                        weights_biases_names.append(tf.get_variable('biases'))
                # tf.gradients()
                parameters_output = sess.run(weights_biases_names,
                                             feed_dict=feed_dict_obj)
                for i in range(13):
                    print '%s max is %.4f, min is %.4f' % (
                        layer_names[i], np.max(
                            outputs[i + 1]), np.min(outputs[i + 1]))
                    print 'responding parameters max is %.4f, min is %.4f' % (
                        np.max(parameters_output[i]),
                        np.min(parameters_output[i]))
                print 'rpn_conv', np.max(outputs[14]), np.min(outputs[14])
                print 'lstm_o', np.max(outputs[15]), np.min(outputs[15])
                print 'lstm_bilstm', np.max(outputs[16]), np.min(outputs[16])
                print 'lstm_fc', np.max(outputs[17]), np.min(outputs[17])
                print 'rpn bbox score ', np.max(outputs[18]), np.min(
                    outputs[18])
                print 'rpn bbox pre', np.max(outputs[19]), np.min(outputs[19])
                print 'rpn_labels', np.max(outputs[20]), np.min(outputs[20])
                print 'rpn_bbox_targets', np.max(outputs[21]), np.min(
                    outputs[21])
                print 'rpn_bbox_inside_weights', np.max(outputs[22]), np.min(
                    outputs[22])
                print 'rpn_bbox_outside_weights', np.max(outputs[23]), np.min(
                    outputs[23])
                print 'rpn_cls_prob', np.max(outputs[24]), np.min(outputs[24])
                # 1717
            _diff_time = timer.toc(average=False)
            if (iter) % (cfg.TRAIN.DISPLAY) == 0:
                print(
                'iter: %d / %d, total loss: %.4f, model loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, lr: %f' % \
                (iter, max_step, total_loss_val, model_loss_val, rpn_cross_entropy_loss_value,
                 regularization_loss_val, lr.eval()))
                print('speed: {:.3f}s / iter'.format(_diff_time))
            if np.isnan(total_loss_val):
                label_value, score_value, rpn_cross_entropy_loss_value = sess.run(
                    [
                        vggModel.rpn_data[0], vggModel.rpn_cls_score_reshape,
                        vggModel.rpn_cross_entropy_loss
                    ],
                    feed_dict={
                        vggModel.input_img:
                        imageGT.re_imgs,
                        vggModel.input_gt:
                        imageGT.gt_bboxes,
                        vggModel.input_img_info:
                        imageGT.im_info,
                        vggModel.input_is_hard:
                        imageGT.is_hard,
                        vggModel.input_notcare:
                        np.reshape(imageGT.notcare, [-1, 4]),
                        vggModel.input_keepprob:
                        0.5
                    })
                print 'label value is \n', label_value
                print 'score value is \n', score_value
                print 'rpn_cross_entropy_loss_value is \n', rpn_cross_entropy_loss_value
                # print 'label value is \n', label_value
                assert not np.isnan(total_loss_val)
            if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = iter
                save(sess,
                     saver=saver,
                     output_dir=output_dir,
                     prefix='VGG',
                     infix='_',
                     iter_index=last_snapshot_iter)
Beispiel #9
0
    def __init__(self,
                 imdb,
                 roidb,
                 rpn_graph,
                 detect_graph,
                 shared_conv_graph,
                 rpn_output_dir,
                 detect_output_dir,
                 shared_conv_rpn_output_dir,
                 shared_conv_detect_output_dir,
                 train_step='train_step_rpn',
                 restore=False,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.imdb = imdb
        self.roidb = roidb
        self.rpn_graph = rpn_graph
        self.detect_graph = detect_graph
        self.shared_conv_graph = shared_conv_graph
        self.rpn_output_dir = rpn_output_dir
        self.detect_output_dir = detect_output_dir
        self.shared_conv_rpn_output_dir = shared_conv_rpn_output_dir
        self.shared_conv_detect_output_dir = shared_conv_detect_output_dir
        self.train_step = train_step
        self.restore = restore
        self.pretrained_model = pretrained_model
        self.train_op = {}  # 训练的操作节点,在会话中使用,比如 rpn_train_op rpn_init

        print('Computing bounding-box regression targets...')
        if cfg.ZLRM.TRAIN.BBOX_REG:
            self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(
                roidb)
        print('done')

        with self.rpn_graph.as_default():
            self.rpn_restore_saver = tf.train.Saver()
        with self.detect_graph.as_default():
            self.detect_restore_saver = tf.train.Saver()
        with self.shared_conv_graph.as_default():
            if self.restore:
                self.shared_conv_rpn_restore_saver = tf.train.Saver()
                self.shared_conv_detect_restore_saver = tf.train.Saver()
            elif self.train_step == 'train_step_shared_conv_rpn' or self.train_step == 'train_step_shared_conv_detect':
                tvars = tf.all_variables()
                # 从
                detect_ckpt = tf.train.get_checkpoint_state(
                    self.detect_output_dir)
                reader = tf.train.NewCheckpointReader(
                    detect_ckpt.model_checkpoint_path)
                detect_variables = reader.get_variable_to_shape_map()
                tvars_detect = [
                    v for v in tvars
                    if (v.name.split(':')[0] in detect_variables)
                ]
                print('tvars_detect', tvars_detect)
                self.shared_conv_rpn_restore_detect_fraction_saver = tf.train.Saver(
                    var_list=tvars_detect)

                tvars_rpn = [
                    v for v in tvars
                    if (v.name.split('/')[0] == 'rpn_conv'
                        or v.name.split('/')[0] == 'rpn_cls_score'
                        or v.name.split('/')[0] == 'rpn_bbox_pred')
                ]
                print('tvars_rpn', tvars_rpn)
                self.shared_conv_rpn_restore_rpn_fraction_saver = tf.train.Saver(
                    var_list=tvars_rpn)

                self.shared_conv_rpn_restore_saver = tf.train.Saver()

                self.shared_conv_detect_restore_saver = tf.train.Saver()
Beispiel #10
0
def train(rpn_data,
          roi_data,
          rpn_cls_score_reshape,
          rpn_bbox_pred,
          feature_map,
          cls_score,
          bbox_pred,
          input_image_tensor,
          input_gt_box_tensor,
          input_im_info_tensor,
          pretrain_model=None):
    output_dir = '/home/give/PycharmProjects/MyFasterRCNN/parameters'
    saver = tf.train.Saver(max_to_keep=5)
    # RPN
    # classification loss(fg bg)
    rpn_cls_score = tf.reshape(rpn_cls_score_reshape, [-1, 2])
    rpn_label = tf.reshape(rpn_data[0], [-1])
    rpn_cls_score = tf.reshape(
        tf.gather(rpn_cls_score, tf.where(tf.not_equal(rpn_label, -1))),
        [-1, 2])
    rpn_label = tf.reshape(
        tf.gather(rpn_label, tf.where(tf.not_equal(rpn_label, -1))), [-1])
    rpn_cross_entropy = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score,
                                                       labels=rpn_label))

    # bounding box regression L1 loss
    rpn_bbox_pred = rpn_bbox_pred
    rpn_bbox_targets = tf.transpose(rpn_data[1], [0, 2, 3, 1])
    rpn_bbox_inside_weights = tf.transpose(rpn_data[2], [0, 2, 3, 1])
    rpn_bbox_outside_weights = tf.transpose(rpn_data[3], [0, 2, 3, 1])

    rpn_smooth_l1 = _modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets,
                                        rpn_bbox_inside_weights,
                                        rpn_bbox_outside_weights)
    rpn_loss_box = tf.reduce_mean(
        tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3]))

    # classification loss
    label = tf.reshape(roi_data[1], [-1])
    cross_entropy = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score,
                                                       labels=label))

    smooth_l1 = _modified_smooth_l1(1.0, bbox_pred, roi_data[2], roi_data[3],
                                    roi_data[4])
    loss_box = tf.reduce_mean(tf.reduce_sum(smooth_l1, reduction_indices=[1]))

    loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box

    global_step = tf.Variable(0, trainable=False)
    lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE,
                                    global_step,
                                    cfg.TRAIN.STEPSIZE,
                                    0.001,
                                    staircase=True)
    momentum = cfg.TRAIN.MOMENTUM
    train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(
        loss, global_step=global_step)
    with tf.Session() as sess:
        args = parse_args()
        from lib.datasets.factory import get_imdb
        from lib.fast_rcnn.train import get_training_roidb, filter_roidb
        from lib.fast_rcnn.train import get_data_layer
        import lib.roi_data_layer.roidb as rdl_roidb
        import numpy as np
        from lib.fast_rcnn.config import cfg_from_file, cfg_from_list
        if args.cfg_file is not None:
            cfg_from_file(args.cfg_file)
        if args.set_cfgs is not None:
            cfg_from_list(args.set_cfgs)
        imdb = get_imdb('voc_2007_trainval')
        roidb = get_training_roidb(imdb)
        roidb = filter_roidb(roidb)
        bbox_means, bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb)
        data_layer = get_data_layer(roidb, imdb.num_classes)
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        if pretrain_model is not None:
            print('Loading pretrained model '
                  'weights from {:s}').format(pretrain_model)
            load(pretrain_model, sess, saver, True)
        for iter_index in range(args.max_iters):
            blobs = data_layer.forward()
            # blobs keys include: data, gt_boxesm im_info
            _, rpn_bbox_targets_value, rpn_bbox_inside_weights_value, rpn_bbox_outside_weights_value = sess.run(
                [
                    train_op, rpn_bbox_targets, rpn_bbox_inside_weights,
                    rpn_bbox_outside_weights
                ],
                feed_dict={
                    input_image_tensor: blobs['data'],
                    input_gt_box_tensor: blobs['gt_boxes'],
                    input_im_info_tensor: blobs['im_info']
                })
            rpn_label_value, loss_value, loss_box_value, cross_entropy_value, rpn_cross_entropy_value, rpn_loss_box_value = sess.run(
                [
                    rpn_label, loss, loss_box, cross_entropy,
                    rpn_cross_entropy, rpn_loss_box
                ],
                feed_dict={
                    input_image_tensor: blobs['data'],
                    input_gt_box_tensor: blobs['gt_boxes'],
                    input_im_info_tensor: blobs['im_info']
                })
            if iter_index % 100 == 0:
                print 'iter: %d / %d' % (iter_index, args.max_iters)
                print 'total loss: %.4f, rpn cross entropy: %.4f,  rpn_loss_box: %.4f, cross entroopy: %.4f, loss box: %.4f' % (
                    loss_value, rpn_cross_entropy_value, rpn_loss_box_value,
                    cross_entropy_value, loss_box_value)
                # print np.shape(rpn_label_value), rpn_label_value
                if iter_index == 0:
                    continue
                infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
                         if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
                filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix +
                            '_iter_{:d}'.format(iter_index + 1) + '.ckpt')
                filename = os.path.join(output_dir, filename)

                saver.save(sess, filename)