コード例 #1
0
def run_train():

    # output dir, etc
    out_dir = '/home/dongwoo/Project/MV3D/data/out'
    makedirs(out_dir + '/tf')
    makedirs(out_dir + '/check_points')
    log = Logger(out_dir + '/log.txt', mode='a')

    #lidar data -----------------
    if 1:
        ratios = np.array([0.5, 1, 2], dtype=np.float32)
        scales = np.array([1, 2, 3], dtype=np.float32)
        bases = make_bases(base_size=16, ratios=ratios, scales=scales)
        num_bases = len(bases)
        stride = 8

        rgbs, tops, fronts, gt_labels, gt_boxes3d, top_imgs, front_imgs, lidars = load_dummy_datas(
        )
        num_frames = len(rgbs)

        top_shape = tops[0].shape
        front_shape = fronts[0].shape
        rgb_shape = rgbs[0].shape
        top_feature_shape = (top_shape[0] // stride, top_shape[1] // stride)
        out_shape = (8, 3)

        #-----------------------
        #check data
        if 0:
            fig = mlab.figure(figure=None,
                              bgcolor=(0, 0, 0),
                              fgcolor=None,
                              engine=None,
                              size=(1000, 500))
            draw_lidar(lidars[0], fig=fig)
            draw_gt_boxes3d(gt_boxes3d[0], fig=fig)
            mlab.show(1)
            cv2.waitKey(1)

    # set anchor boxes

    num_class = 2  #incude background
    anchors, inside_inds = make_anchors(bases, stride, top_shape[0:2],
                                        top_feature_shape[0:2])
    inside_inds = np.arange(0, len(anchors), dtype=np.int32)  #use all  #<todo>
    print('out_shape=%s' % str(out_shape))
    print('num_frames=%d' % num_frames)

    #load model ####################################################################################################
    top_anchors = tf.placeholder(shape=[None, 4],
                                 dtype=tf.int32,
                                 name='anchors')
    top_inside_inds = tf.placeholder(shape=[None],
                                     dtype=tf.int32,
                                     name='inside_inds')

    top_images = tf.placeholder(shape=[None, *top_shape],
                                dtype=tf.float32,
                                name='top')
    front_images = tf.placeholder(shape=[None, *front_shape],
                                  dtype=tf.float32,
                                  name='front')
    rgb_images = tf.placeholder(shape=[None, *rgb_shape],
                                dtype=tf.float32,
                                name='rgb')
    top_rois = tf.placeholder(shape=[None, 5],
                              dtype=tf.float32,
                              name='top_rois')  #<todo> change to int32???
    front_rois = tf.placeholder(shape=[None, 5],
                                dtype=tf.float32,
                                name='front_rois')
    rgb_rois = tf.placeholder(shape=[None, 5],
                              dtype=tf.float32,
                              name='rgb_rois')

    top_features, top_scores, top_probs, top_deltas, proposals, proposal_scores = \
        top_feature_net(top_images, top_anchors, top_inside_inds, num_bases)

    front_features = front_feature_net(front_images)
    rgb_features = rgb_feature_net(rgb_images)

    fuse_scores, fuse_probs, fuse_deltas = \
        fusion_net(
    ( [top_features,     top_rois,     6,6,1./stride],
     [front_features,   front_rois,   0,0,1./stride],  #disable by 0,0
     [rgb_features,     rgb_rois,     6,6,1./stride],),
            num_class, out_shape) #<todo>  add non max suppression

    #loss ########################################################################################################
    top_inds = tf.placeholder(shape=[None], dtype=tf.int32, name='top_ind')
    top_pos_inds = tf.placeholder(shape=[None],
                                  dtype=tf.int32,
                                  name='top_pos_ind')
    top_labels = tf.placeholder(shape=[None], dtype=tf.int32, name='top_label')
    top_targets = tf.placeholder(shape=[None, 4],
                                 dtype=tf.float32,
                                 name='top_target')
    top_cls_loss, top_reg_loss = rpn_loss(top_scores, top_deltas, top_inds,
                                          top_pos_inds, top_labels,
                                          top_targets)

    fuse_labels = tf.placeholder(shape=[None],
                                 dtype=tf.int32,
                                 name='fuse_label')
    fuse_targets = tf.placeholder(shape=[None, *out_shape],
                                  dtype=tf.float32,
                                  name='fuse_target')
    fuse_cls_loss, fuse_reg_loss = rcnn_loss(fuse_scores, fuse_deltas,
                                             fuse_labels, fuse_targets)

    #solver
    l2 = l2_regulariser(decay=0.001)
    learning_rate = tf.placeholder(tf.float32, shape=[])
    #solver = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
    solver = tf.train.GradientDescentOptimizer(learning_rate=learning_rate,
                                               use_locking=False,
                                               name='GradientDescent')
    #solver_step = solver.minimize(top_cls_loss+top_reg_loss+l2)
    solver_step = solver.minimize(top_cls_loss + top_reg_loss + fuse_cls_loss +
                                  0.1 * fuse_reg_loss + l2)

    max_iter = 10000
    iter_debug = 8

    # start training here  #########################################################################################
    log.write(
        'epoch     iter    rate   |  top_cls_loss   reg_loss   |  fuse_cls_loss  reg_loss  |  \n'
    )
    log.write(
        '-------------------------------------------------------------------------------------\n'
    )

    num_ratios = len(ratios)
    num_scales = len(scales)
    fig, axs = plt.subplots(num_ratios, num_scales)

    sess = tf.InteractiveSession()
    with sess.as_default():
        sess.run(tf.global_variables_initializer(), {IS_TRAIN_PHASE: True})
        summary_writer = tf.summary.FileWriter(out_dir + '/tf', sess.graph)
        saver = tf.train.Saver()

        batch_top_cls_loss = 0
        batch_top_reg_loss = 0
        batch_fuse_cls_loss = 0
        batch_fuse_reg_loss = 0
        iter = 0
        while iter < max_iter:
            #for iter in range(max_iter):
            epoch = 1.0 * iter
            rate = 0.05

            ## generate train image -------------
            idx = np.random.choice(num_frames)  #*10   #num_frames)  #0
            #print (idx)
            batch_top_images = tops[idx].reshape(1, *top_shape)
            batch_front_images = fronts[idx].reshape(1, *front_shape)
            batch_rgb_images = rgbs[idx].reshape(1, *rgb_shape)

            batch_gt_labels = gt_labels[idx]
            batch_gt_boxes3d = gt_boxes3d[idx]
            batch_gt_top_boxes = box3d_to_top_box(batch_gt_boxes3d)

            if len(batch_gt_labels) == 0:
                continue

## run propsal generation ------------
            fd1 = {
                top_images: batch_top_images,
                top_anchors: anchors,
                top_inside_inds: inside_inds,
                learning_rate: rate,
                IS_TRAIN_PHASE: True
            }
            batch_proposals, batch_proposal_scores, batch_top_features = sess.run(
                [proposals, proposal_scores, top_features], fd1)

            ## generate  train rois  ------------
            #print (anchors)
            #print (inside_inds)
            #print (batch_gt_labels)
            #print (batch_gt_top_boxes)
            batch_top_inds, batch_top_pos_inds, batch_top_labels, batch_top_targets  = \
                rpn_target ( anchors, inside_inds, batch_gt_labels,  batch_gt_top_boxes)

            batch_top_rois, batch_fuse_labels, batch_fuse_targets  = \
                 rcnn_target(  batch_proposals, batch_gt_labels, batch_gt_top_boxes, batch_gt_boxes3d )

            batch_rois3d = project_to_roi3d(batch_top_rois)
            batch_front_rois = project_to_front_roi(batch_rois3d)
            batch_rgb_rois = project_to_rgb_roi(batch_rois3d)

            ##debug gt generation
            if 1 and iter % iter_debug == 1:
                top_image = top_imgs[idx]
                rgb = rgbs[idx]

                img_gt = draw_rpn_gt(top_image, batch_gt_top_boxes,
                                     batch_gt_labels)
                img_label = draw_rpn_labels(top_image, anchors, batch_top_inds,
                                            batch_top_labels)
                img_target = draw_rpn_targets(top_image, anchors,
                                              batch_top_pos_inds,
                                              batch_top_targets)
                #imshow('img_rpn_gt',img_gt)
                #imshow('img_rpn_label',img_label)
                #imshow('img_rpn_target',img_target)

                img_label = draw_rcnn_labels(top_image, batch_top_rois,
                                             batch_fuse_labels)
                img_target = draw_rcnn_targets(top_image, batch_top_rois,
                                               batch_fuse_labels,
                                               batch_fuse_targets)
                #imshow('img_rcnn_label',img_label)
                #imshow('img_rcnn_target',img_target)

                img_rgb_rois = draw_boxes(rgb,
                                          batch_rgb_rois[:, 1:5],
                                          color=(255, 0, 255),
                                          thickness=1)
                #imshow('img_rgb_rois',img_rgb_rois)

                #cv2.waitKey(1)

            ## run classification and regression loss -----------
            fd2 = {
                **fd1,
                top_images: batch_top_images,
                front_images: batch_front_images,
                rgb_images: batch_rgb_images,
                top_rois: batch_top_rois,
                front_rois: batch_front_rois,
                rgb_rois: batch_rgb_rois,
                top_inds: batch_top_inds,
                top_pos_inds: batch_top_pos_inds,
                top_labels: batch_top_labels,
                top_targets: batch_top_targets,
                fuse_labels: batch_fuse_labels,
                fuse_targets: batch_fuse_targets,
            }
            #_, batch_top_cls_loss, batch_top_reg_loss = sess.run([solver_step, top_cls_loss, top_reg_loss],fd2)


            _, batch_top_cls_loss, batch_top_reg_loss, batch_fuse_cls_loss, batch_fuse_reg_loss = \
               sess.run([solver_step, top_cls_loss, top_reg_loss, fuse_cls_loss, fuse_reg_loss],fd2)

            log.write('%3.1f   %d   %0.4f   |   %0.5f   %0.5f   |   %0.5f   %0.5f  \n' %\
    (epoch, iter, rate, batch_top_cls_loss, batch_top_reg_loss, batch_fuse_cls_loss, batch_fuse_reg_loss))

            # debug: ------------------------------------

            if iter % iter_debug == 0:
                top_image = top_imgs[idx]
                rgb = rgbs[idx]

                batch_top_probs, batch_top_scores, batch_top_deltas  = \
                    sess.run([ top_probs, top_scores, top_deltas ],fd2)

                batch_fuse_probs, batch_fuse_deltas = \
                    sess.run([ fuse_probs, fuse_deltas ],fd2)

                #batch_fuse_deltas=0*batch_fuse_deltas #disable 3d box prediction
                probs, boxes3d = rcnn_nms(batch_fuse_probs,
                                          batch_fuse_deltas,
                                          batch_rois3d,
                                          threshold=0.5)

                ## show rpn score maps
                p = batch_top_probs.reshape(*(top_feature_shape[0:2]),
                                            2 * num_bases)
                for n in range(num_bases):
                    r = n % num_scales
                    s = n // num_scales
                    pn = p[:, :, 2 * n + 1] * 255
                    axs[s, r].cla()
                    axs[s, r].imshow(pn, cmap='gray', vmin=0, vmax=255)
                plt.pause(0.01)

                ## show rpn(top) nms
                #img_rpn     = draw_rpn    (top_image, batch_top_probs, batch_top_deltas, anchors, inside_inds)
                img_rpn_nms = draw_rpn_nms(top_image, batch_proposals,
                                           batch_proposal_scores)
                #imshow('img_rpn',img_rpn)
                #imshow('img_rpn_nms',img_rpn_nms)
                #cv2.waitKey(1)

                ## show rcnn(fuse) nms
                #img_rcnn     = draw_rcnn (top_image, batch_fuse_probs, batch_fuse_deltas, batch_top_rois, batch_rois3d,darker=1)
                #img_rcnn_nms = draw_rcnn_nms(rgb, boxes3d, probs)
                #imshow('img_rcnn',img_rcnn)
                #imshow('img_rcnn_nms',img_rcnn_nms)
                #cv2.waitKey(1)

            # save: ------------------------------------
            if iter % 500 == 0:
                #saver.save(sess, out_dir + '/check_points/%06d.ckpt'%iter)  #iter
                saver.save(sess, out_dir + '/check_points/snap.ckpt')  #iter

            iter = iter + 1
コード例 #2
0
def run_train():

    # output dir, etc
    out_dir = '/root/sharefolder/sdcnd/didi1/output'
    # makedirs(out_dir +'/tf')
    # makedirs(out_dir +'/check_points')
    log = Logger(out_dir+'/log.txt',mode='a')
    # log.write(unicode('aaa {}'.format('aaa')))
    #lidar data -----------------
    if 1:
        ratios=np.array([0.5,1,2], dtype=np.float32)
        scales=np.array([1,2,3],   dtype=np.float32)
        bases = make_bases(
            base_size = 16,
            ratios=ratios,
            scales=scales
        )
        num_bases = len(bases)
        stride = 8

        num_frames = 154
        # num_frames = 2
        rgbs, tops, fronts, gt_labels, gt_boxes3d, top_imgs, front_imgs, lidars = load_dummy_datas(num_frames)
        num_frames = len(rgbs)

        top_shape   = tops[0].shape
        front_shape = fronts[0].shape
        rgb_shape   = rgbs[0].shape
        top_feature_shape = (top_shape[0]//stride, top_shape[1]//stride)
        out_shape=(8,3)


        #-----------------------
        #check data
        if 0:
            fig = mlab.figure(figure=None, bgcolor=(0,0,0), fgcolor=None, engine=None, size=(1000, 500))
            draw_lidar(lidars[0], fig=fig)
            draw_gt_boxes3d(gt_boxes3d[0], fig=fig)
            mlab.show(1)
            cv2.waitKey(1)




    # set anchor boxes
    num_class = 2 #incude background
    anchors, inside_inds =  make_anchors(bases, stride, top_shape[0:2], top_feature_shape[0:2])
    inside_inds = np.arange(0,len(anchors),dtype=np.int32)  #use all  #<todo>
    print ('out_shape=%s'%str(out_shape))
    print ('num_frames=%d'%num_frames)


    #load model ####################################################################################################
    top_anchors     = tf.placeholder(shape=[None, 4], dtype=tf.int32,   name ='anchors'    )
    top_inside_inds = tf.placeholder(shape=[None   ], dtype=tf.int32,   name ='inside_inds')

    top_images   = tf.placeholder(shape=[None, 400, 400, 8 ], dtype=tf.float32, name='input_top'  )
    front_images = tf.placeholder(shape=[None, 1, 1], dtype=tf.float32, name='front')
    rgb_images   = tf.placeholder(shape=[None, 375, 1242, 3  ], dtype=tf.float32, name='rgb'  )
    top_rois     = tf.placeholder(shape=[None, 5], dtype=tf.float32,   name ='top_rois'   ) #<todo> change to int32???
    front_rois   = tf.placeholder(shape=[None, 5], dtype=tf.float32,   name ='front_rois' )
    rgb_rois     = tf.placeholder(shape=[None, 5], dtype=tf.float32,   name ='rgb_rois'   )

    top_features, top_scores, top_probs, top_deltas, proposals, proposal_scores = \
        top_feature_net(top_images, top_anchors, top_inside_inds, num_bases)

    front_features = front_feature_net(front_images)
    rgb_features   = rgb_feature_net(rgb_images)

    # import pdb; pdb.set_trace()
    fuse_scores, fuse_probs, fuse_deltas, aux_fuse_scores, aux_fuse_probs, aux_fuse_deltas = \
        fusion_net(
			( [top_features,     top_rois,     6,6,1./stride],
			  [front_features,   front_rois,   0,0,1./stride],  #disable by 0,0
			  [rgb_features,     rgb_rois,     6,6,1./stride],),
            num_class, out_shape) #<todo>  add non max suppression

    # import pdb; pdb.set_trace()


    #loss ########################################################################################################
    top_inds     = tf.placeholder(shape=[None   ], dtype=tf.int32,   name='top_ind'    )
    top_pos_inds = tf.placeholder(shape=[None   ], dtype=tf.int32,   name='top_pos_ind')
    top_labels   = tf.placeholder(shape=[None   ], dtype=tf.int32,   name='top_label'  )
    top_targets  = tf.placeholder(shape=[None, 4], dtype=tf.float32, name='top_target' )
    with tf.variable_scope('rpn-loss') as scope:
        top_cls_loss, top_reg_loss = rpn_loss(top_scores, top_deltas, top_inds, top_pos_inds, top_labels, top_targets)
    tf.summary.scalar('top_cls_loss', top_cls_loss)
    tf.summary.scalar('top_reg_loss', top_reg_loss)


    fuse_labels  = tf.placeholder(shape=[None            ], dtype=tf.int32,   name='fuse_label' )
    fuse_targets = tf.placeholder(shape=[None, 8, 3], dtype=tf.float32, name='fuse_target')
    with tf.variable_scope('rcnn-loss') as scope:
        fuse_cls_loss, fuse_reg_loss = rcnn_loss(fuse_scores, fuse_deltas, fuse_labels, fuse_targets)
    tf.summary.scalar('fuse_cls_loss', fuse_cls_loss)
    tf.summary.scalar('fuse_reg_loss', fuse_reg_loss)

    with tf.variable_scope('aux_rcnn_loss') as scope:
        with tf.variable_scope('aux_loss_1') as scope:
            aux_fuse_cls_loss_1, aux_fuse_reg_loss_1 = rcnn_loss(aux_fuse_scores[0], aux_fuse_deltas[0],
             fuse_labels, fuse_targets)
        tf.summary.scalar('aux_fuse_cls_loss_1', aux_fuse_cls_loss_1)
        tf.summary.scalar('aux_fuse_reg_loss_1', aux_fuse_reg_loss_1)
        with tf.variable_scope('aux_loss_2') as scope:
            aux_fuse_cls_loss_2, aux_fuse_reg_loss_2 = rcnn_loss(aux_fuse_scores[1], aux_fuse_deltas[1],
             fuse_labels, fuse_targets)
        tf.summary.scalar('aux_fuse_cls_loss_2', aux_fuse_cls_loss_2)
        tf.summary.scalar('aux_fuse_reg_loss_2', aux_fuse_reg_loss_1)
    #solver
    # with tf.variable_scope('l2-reg') as scope:
    #     l2 = l2_regulariser(decay=0.0005)
    # tf.summary.scalar('total_l2reg', l2)

    with tf.variable_scope('total_loss') as scope:
        total_loss = top_cls_loss+top_reg_loss+fuse_cls_loss+0.1*fuse_reg_loss \
                    + aux_fuse_cls_loss_1 + aux_fuse_reg_loss_1 + aux_fuse_cls_loss_2 + aux_fuse_reg_loss_2
    tf.summary.scalar('total_loss', total_loss)

    learning_rate = tf.placeholder(tf.float32, shape=[])
    solver = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
    #solver_step = solver.minimize(top_cls_loss+top_reg_loss+l2)
    solver_step = solver.minimize(total_loss)

    max_iter = 10000
    iter_debug=8

    # start training here  #########################################################################################
    log.write(unicode('epoch     iter    rate   |  top_cls_loss   reg_loss   |  fuse_cls_loss  reg_loss  |  \n'))
    log.write(unicode('-------------------------------------------------------------------------------------\n'))

    num_ratios=len(ratios)
    num_scales=len(scales)
    # fig, axs = plt.subplots(num_ratios,num_scales)

    sess = tf.InteractiveSession()
    # with sess.as_default():
    merged = tf.summary.merge_all()

    log_dir = out_dir+'/train'
    if tf.gfile.Exists(log_dir):
        #gotta be careful
        tf.gfile.DeleteRecursively(log_dir)
        print 'Removed files in {}'.format(log_dir)
    train_writer = tf.summary.FileWriter(log_dir, sess.graph)

    saver  = tf.train.Saver()
    tf.global_variables_initializer().run()
    # sess.run( tf.global_variables_initializer(), { IS_TRAIN_PHASE : True } )

    #option: loading pretrained model
    saver.restore(sess, '/root/sharefolder/sdcnd/didi1/output/check_points/snap.ckpt')

    batch_top_cls_loss =0
    batch_top_reg_loss =0
    batch_fuse_cls_loss=0
    batch_fuse_reg_loss=0
    for iter in range(max_iter):
        epoch=1.0*iter
        rate=0.05


        ## generate train image -------------
        idx = np.random.choice(num_frames)     #*10   #num_frames)  #0
        idx = 87
        batch_top_images    = tops[idx].reshape(1,*top_shape)
        batch_front_images  = fronts[idx].reshape(1,*front_shape)
        batch_rgb_images    = rgbs[idx].reshape(1,*rgb_shape)

        batch_gt_labels    = gt_labels[idx]
        batch_gt_boxes3d   = gt_boxes3d[idx]
        batch_gt_top_boxes = box3d_to_top_box(batch_gt_boxes3d)


		## run propsal generation ------------
        fd1={
            top_images:      batch_top_images,
            top_anchors:     anchors,
            top_inside_inds: inside_inds,

            learning_rate:   rate,
            IS_TRAIN_PHASE:  True
        }
        batch_proposals, batch_proposal_scores, batch_top_features = sess.run([proposals, proposal_scores, top_features],fd1)

        ## generate  train rois  ------------
        batch_top_inds, batch_top_pos_inds, batch_top_labels, batch_top_targets  = \
            rpn_target ( anchors, inside_inds, batch_gt_labels,  batch_gt_top_boxes)

        batch_top_rois, batch_fuse_labels, batch_fuse_targets  = \
             rcnn_target(  batch_proposals, batch_gt_labels, batch_gt_top_boxes, batch_gt_boxes3d )

        batch_rois3d	 = project_to_roi3d    (batch_top_rois)
        batch_front_rois = project_to_front_roi(batch_rois3d  )
        batch_rgb_rois   = project_to_rgb_roi  (batch_rois3d  )


        ##debug gt generation
        if False:
        # if 1 and iter%iter_debug==0:
            top_image = top_imgs[idx]
            rgb       = rgbs[idx]

            img_gt     = draw_rpn_gt(top_image, batch_gt_top_boxes, batch_gt_labels)
            img_label  = draw_rpn_labels (top_image, anchors, batch_top_inds, batch_top_labels )
            img_target = draw_rpn_targets(top_image, anchors, batch_top_pos_inds, batch_top_targets)
            #imshow('img_rpn_gt',img_gt)
            #imshow('img_rpn_label',img_label)
            #imshow('img_rpn_target',img_target)

            img_label  = draw_rcnn_labels (top_image, batch_top_rois, batch_fuse_labels )
            img_target = draw_rcnn_targets(top_image, batch_top_rois, batch_fuse_labels, batch_fuse_targets)
            #imshow('img_rcnn_label',img_label)
            imshow('img_rcnn_target',img_target)


            img_rgb_rois = draw_boxes(rgb, batch_rgb_rois[:,1:5], color=(255,0,255), thickness=1)
            imshow('img_rgb_rois',img_rgb_rois)

            cv2.waitKey(1)

        ## run classification and regression loss -----------
        fd2={
			top_images:      batch_top_images,
            top_anchors:     anchors,
            top_inside_inds: inside_inds,

            learning_rate:   rate,
            IS_TRAIN_PHASE:  True,

            top_images: batch_top_images,
            front_images: batch_front_images,
            rgb_images: batch_rgb_images,

			top_rois:   batch_top_rois,
            front_rois: batch_front_rois,
            rgb_rois:   batch_rgb_rois,

            top_inds:     batch_top_inds,
            top_pos_inds: batch_top_pos_inds,
            top_labels:   batch_top_labels,
            top_targets:  batch_top_targets,

            fuse_labels:  batch_fuse_labels,
            fuse_targets: batch_fuse_targets,
        }
        #_, batch_top_cls_loss, batch_top_reg_loss = sess.run([solver_step, top_cls_loss, top_reg_loss],fd2)

        # import pdb; pdb.set_trace()
        # run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        # run_metadata = tf.RunMetadata()
        run_options = None
        run_metadata = None
        _, summary, batch_top_cls_loss, batch_top_reg_loss, batch_fuse_cls_loss, batch_fuse_reg_loss = \
           sess.run([solver_step, merged, top_cls_loss, top_reg_loss, fuse_cls_loss, fuse_reg_loss],
                    feed_dict = fd2,
                    options = run_options,
                    run_metadata = run_metadata)
        # train_writer.add_run_metadata(run_metadata, 'step%03d' % iter)
        train_writer.add_summary(summary, iter)
        train_writer.flush()

        log.write(unicode('%3.1f   %d   %0.4f   |   %0.5f   %0.5f   |   %0.5f   %0.5f  \n' %\
			(epoch, iter, rate, batch_top_cls_loss, batch_top_reg_loss, batch_fuse_cls_loss, batch_fuse_reg_loss)))






        #print('ok')
        # debug: ------------------------------------
        if iter%10==0:
            top_image = top_imgs[idx]
            rgb       = rgbs[idx]


            batch_fuse_probs, batch_fuse_deltas = \
                sess.run([ fuse_probs, fuse_deltas ],fd2)

            #batch_fuse_deltas=0*batch_fuse_deltas #disable 3d box prediction
            probs, boxes3d = rcnn_nms(batch_fuse_probs, batch_fuse_deltas, batch_rois3d, threshold=0.5)


            ## show rcnn(fuse) nms
            gt_2d_box = batch_gt_top_boxes
            img_rcnn     = draw_rcnn (top_image, batch_fuse_probs, batch_fuse_deltas, batch_top_rois, batch_rois3d, gt_2d_box,darker=1)
            boxes_3d = rcnn_result( batch_fuse_probs, batch_fuse_deltas,  batch_top_rois, batch_rois3d, gt_2d_box)
            img_rcnn_nms = draw_rcnn_nms(rgb, boxes3d, probs)
            imshow('img_rcnn',img_rcnn)
            imshow('img_rcnn_nms',img_rcnn_nms)
            # cv2.imwrite('result.png', img_rcnn_nms)
            cv2.waitKey(1)

        if False:
        # if iter%100==0:
        # if iter%iter_debug==0:
            top_image = top_imgs[idx]
            rgb       = rgbs[idx]

            batch_top_probs, batch_top_scores, batch_top_deltas  = \
                sess.run([ top_probs, top_scores, top_deltas ],fd2)

            batch_fuse_probs, batch_fuse_deltas = \
                sess.run([ fuse_probs, fuse_deltas ],fd2)

            #batch_fuse_deltas=0*batch_fuse_deltas #disable 3d box prediction
            probs, boxes3d = rcnn_nms(batch_fuse_probs, batch_fuse_deltas, batch_rois3d, threshold=0.5)


            ## show rpn score maps
            # import pdb; pdb.set_trace()
            fig, axs = plt.subplots(num_ratios,num_scales)
            p = batch_top_probs.reshape( 50, 50, 2*num_bases)
            for n in range(num_bases):
                r=n%num_scales
                s=n//num_scales
                pn = p[:,:,2*n+1]*255
                axs[s,r].cla()
                axs[s,r].imshow(pn, cmap='gray', vmin=0, vmax=255)
            plt.pause(0.01)

			## show rpn(top) nms
            img_rpn     = draw_rpn    (top_image, batch_top_probs, batch_top_deltas, anchors, inside_inds)
            img_rpn_nms = draw_rpn_nms(top_image, batch_proposals, batch_proposal_scores)
            #imshow('img_rpn',img_rpn)
            imshow('img_rpn_nms',img_rpn_nms)
            cv2.waitKey(1)

            ## show rcnn(fuse) nms
            img_rcnn     = draw_rcnn (top_image, batch_fuse_probs, batch_fuse_deltas, batch_top_rois, batch_rois3d,darker=1)
            img_rcnn_nms = draw_rcnn_nms(rgb, boxes3d, probs)
            imshow('img_rcnn',img_rcnn)
            imshow('img_rcnn_nms',img_rcnn_nms)
            cv2.waitKey(1)

        # save: ------------------------------------
        if iter%500==0:
            #saver.save(sess, out_dir + '/check_points/%06d.ckpt'%iter)  #iter
            saver.save(sess, out_dir + '/check_points/snap.ckpt')  #iter

    train_writer.close()
コード例 #3
0
ファイル: trainer.py プロジェクト: zbqq/didi-udacity-2017
def run_train():

    # output dir, etc
    out_dir = '/root/share/out/didi/xxx'
    makedirs(out_dir + '/tf')
    log = Logger(out_dir + '/log.txt', mode='a')

    #one lidar data -----------------
    if 1:
        ratios = np.array([0.5, 1, 2], dtype=np.float32)
        scales = np.array([1, 2, 3], dtype=np.float32)
        bases = make_bases(base_size=16, ratios=ratios, scales=scales)
        num_bases = len(bases)
        stride = 8

        rgb, top, top_image, lidar, gt_labels, gt_boxes3d, gt_top_boxes = load_dummy_data(
        )
        top_shape = top.shape
        top_feature_shape = (top_shape[0] // stride, top_shape[1] // stride)

        rgb_shape = rgb.shape
        out_shape = (8, 3)

        #-----------------------
        #check data
        if 0:
            fig = mlab.figure(figure=None,
                              bgcolor=(0, 0, 0),
                              fgcolor=None,
                              engine=None,
                              size=(1000, 500))
            draw_lidar(lidar, fig=fig)
            draw_gt_boxes3d(gt_boxes3d, fig=fig)
            mlab.show(1)

            draw_gt_boxes(top_image, gt_top_boxes)
            draw_projected_gt_boxes3d(rgb, gt_boxes3d)

            #imshow('top_image',top_image)
            #imshow('rgb',rgb)
            cv2.waitKey(1)

    #one dummy data -----------------
    if 0:
        ratios = [0.5, 1, 2]
        scales = 2**np.arange(3, 6)
        bases = make_bases(base_size=16, ratios=ratios, scales=scales)
        num_bases = len(bases)
        stride = 8

        rgb, top, top_image, lidar, gt_labels, gt_boxes3d, gt_top_boxes = load_dummy_data1(
        )
        top_shape = top.shape
        top_feature_shape = (54, 72
                             )  #(top_shape[0]//stride, top_shape[1]//stride)

        rgb_shape = rgb.shape
        out_shape = (4, )

        # img_gt =draw_gt_boxes(top_image, gt_top_boxes)
        # imshow('img_gt',img_gt)
        # cv2.waitKey(1)

    # set anchor boxes
    dim = np.prod(out_shape)
    num_class = 2  #incude background
    anchors, inside_inds = make_anchors(bases, stride, top_shape[0:2],
                                        top_feature_shape[0:2])
    inside_inds = np.arange(0, len(anchors), dtype=np.int32)  #use all
    print('dim=%d' % dim)

    #load model ##############
    top_images = tf.placeholder(shape=[None, *top_shape],
                                dtype=tf.float32,
                                name='top')
    top_anchors = tf.placeholder(shape=[None, 4],
                                 dtype=tf.int32,
                                 name='anchors')
    top_inside_inds = tf.placeholder(shape=[None],
                                     dtype=tf.int32,
                                     name='inside_inds')

    top_features, top_scores, top_probs, top_deltas, top_rois1, top_roi_scores1 = \
        top_lidar_feature_net(top_images, top_anchors, top_inside_inds, num_bases)

    rgb_images = tf.placeholder(shape=[None, *rgb_shape],
                                dtype=tf.float32,
                                name='rgb')
    rgb_features = rgb_feature_net(rgb_images)

    top_rois = tf.placeholder(shape=[None, 5],
                              dtype=tf.float32,
                              name='top_rois')  #<todo> change to int32???
    rgb_rois = tf.placeholder(shape=[None, 5],
                              dtype=tf.float32,
                              name='rgb_rois')
    fuse_scores, fuse_probs, fuse_deltas = \
        fusion_net(
            (top_features,   rgb_features,),
            (top_rois,       rgb_rois,),
            ([6,6,1./stride],[6,6,1./stride],),
            num_class, out_shape)

    #loss ####################
    top_inds = tf.placeholder(shape=[None], dtype=tf.int32, name='top_ind')
    top_pos_inds = tf.placeholder(shape=[None],
                                  dtype=tf.int32,
                                  name='top_pos_ind')
    top_labels = tf.placeholder(shape=[None], dtype=tf.int32, name='top_label')
    top_targets = tf.placeholder(shape=[None, 4],
                                 dtype=tf.float32,
                                 name='top_target')
    top_cls_loss, top_reg_loss = rpn_loss(top_scores, top_deltas, top_inds,
                                          top_pos_inds, top_labels,
                                          top_targets)

    fuse_labels = tf.placeholder(shape=[None],
                                 dtype=tf.int32,
                                 name='fuse_label')
    fuse_targets = tf.placeholder(shape=[None, *out_shape],
                                  dtype=tf.float32,
                                  name='fuse_target')
    fuse_cls_loss, fuse_reg_loss = rcnn_loss(fuse_scores, fuse_deltas,
                                             fuse_labels, fuse_targets)

    #put your solver here
    l2 = l2_regulariser(decay=0.0005)
    learning_rate = tf.placeholder(tf.float32, shape=[])
    solver = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                        momentum=0.9)
    #solver_step = solver.minimize(top_cls_loss+top_reg_loss+l2)
    solver_step = solver.minimize(top_cls_loss + top_reg_loss + fuse_cls_loss +
                                  fuse_reg_loss + l2)

    max_iter = 10000

    # start training here ------------------------------------------------
    log.write('epoch        iter      rate     |  train_mse   valid_mse  |\n')
    log.write(
        '----------------------------------------------------------------------------\n'
    )

    num_ratios = len(ratios)
    num_scales = len(scales)
    fig, axs = plt.subplots(num_ratios, num_scales)

    sess = tf.InteractiveSession()
    with sess.as_default():
        sess.run(tf.global_variables_initializer(), {IS_TRAIN_PHASE: True})
        summary_writer = tf.summary.FileWriter(out_dir + '/tf', sess.graph)
        rate = 0.1

        batch_top_cls_loss = 0
        batch_top_reg_loss = 0
        batch_fuse_cls_loss = 0
        batch_fuse_reg_loss = 0
        for iter in range(max_iter):

            #random sample train data
            batch_top_images = top.reshape(1, *top_shape)
            batch_top_gt_labels = gt_labels
            batch_top_gt_boxes = gt_top_boxes

            batch_rgb_images = rgb.reshape(1, *rgb_shape)

            batch_fuse_gt_labels = gt_labels
            batch_fuse_gt_boxes = gt_top_boxes
            batch_fuse_gt_boxes3d = gt_boxes3d

            ##-------------------------------
            fd = {
                top_images: batch_top_images,
                top_anchors: anchors,
                top_inside_inds: inside_inds,
                learning_rate: rate,
                IS_TRAIN_PHASE: True
            }
            batch_top_rois1, batch_top_roi_scores1, batch_top_features = sess.run(
                [top_rois1, top_roi_scores1, top_features], fd)

            ## generate ground truth
            batch_top_inds, batch_top_pos_inds, batch_top_labels, batch_top_targets  = \
                rpn_target ( anchors, inside_inds, batch_top_gt_labels,  batch_top_gt_boxes)

            batch_top_rois, batch_fuse_labels, batch_fuse_targets  = \
                 rcnn_target(  batch_top_rois1, batch_fuse_gt_labels, batch_fuse_gt_boxes, batch_fuse_gt_boxes3d )

            #project to rgb roi -------------------------------------------------
            batch_rgb_rois = batch_top_rois.copy()
            num = len(batch_top_rois)
            for n in range(num):
                box3d = box_to_box3d(batch_top_rois[n, 1:5].reshape(
                    1, 4)).reshape(8, 3)
                qs = make_projected_box3d(box3d)

                minx = np.min(qs[:, 0])
                maxx = np.max(qs[:, 0])
                miny = np.min(qs[:, 1])
                maxy = np.max(qs[:, 1])
                batch_rgb_rois[n, 1:5] = minx, miny, maxx, maxy

            darken = 0.7
            img_rgb_roi = rgb.copy() * darken
            for n in range(num):
                b = batch_rgb_rois[n, 1:5]
                cv2.rectangle(img_rgb_roi, (b[0], b[1]), (b[2], b[3]),
                              (0, 255, 255), 1)

            imshow('img_rgb_roi', img_rgb_roi)
            #--------------------------------------------------------------------

            ##debug
            if 1:
                img_gt = draw_rpn_gt(top_image, batch_top_gt_boxes,
                                     batch_top_gt_labels)
                img_label = draw_rpn_labels(top_image, anchors, batch_top_inds,
                                            batch_top_labels)
                img_target = draw_rpn_targets(top_image, anchors,
                                              batch_top_pos_inds,
                                              batch_top_targets)
                imshow('img_rpn_gt', img_gt)
                imshow('img_rpn_label', img_label)
                imshow('img_rpn_target', img_target)

                img_label = draw_rcnn_labels(top_image, batch_top_rois,
                                             batch_fuse_labels)
                img_target = draw_rcnn_targets(top_image, batch_top_rois,
                                               batch_fuse_labels,
                                               batch_fuse_targets)
                imshow('img_rcnn_label', img_label)
                imshow('img_rcnn_target', img_target)
                cv2.waitKey(1)

            #---------------------------------------------------
            fd = {
                top_images: batch_top_images,
                top_anchors: anchors,
                top_inside_inds: inside_inds,
                top_inds: batch_top_inds,
                top_pos_inds: batch_top_pos_inds,
                top_labels: batch_top_labels,
                top_targets: batch_top_targets,
                top_rois: batch_top_rois,
                #front_rois1: batch_front_rois,
                rgb_images: batch_rgb_images,
                rgb_rois: batch_rgb_rois,
                fuse_labels: batch_fuse_labels,
                fuse_targets: batch_fuse_targets,
                learning_rate: rate,
                IS_TRAIN_PHASE: True
            }
            #_, batch_top_cls_loss, batch_top_reg_loss = sess.run([solver_step, top_cls_loss, top_reg_loss],fd)


            _, batch_top_cls_loss, batch_top_reg_loss, batch_fuse_cls_loss, batch_fuse_reg_loss = \
               sess.run([solver_step, top_cls_loss, top_reg_loss, fuse_cls_loss, fuse_reg_loss],fd)

            #print('ok')
            # debug: ------------------------------------

            if iter % 4 == 0:
                batch_top_probs, batch_top_scores, batch_top_deltas  = \
                    sess.run([ top_probs, top_scores, top_deltas ],fd)

                batch_fuse_probs, batch_fuse_deltas = \
                    sess.run([ fuse_probs, fuse_deltas ],fd)

                probs, boxes3d, priors, priors3d, deltas = rcnn_nms(
                    batch_fuse_probs, batch_fuse_deltas, batch_top_rois)

                ## show rpn score maps
                p = batch_top_probs.reshape(*(top_feature_shape[0:2]),
                                            2 * num_bases)
                for n in range(num_bases):
                    r = n % num_scales
                    s = n // num_scales
                    pn = p[:, :, 2 * n + 1] * 255
                    axs[s, r].cla()
                    axs[s, r].imshow(pn, cmap='gray', vmin=0, vmax=255)
                plt.pause(0.01)

                img_rpn = draw_rpn(top_image, batch_top_probs,
                                   batch_top_deltas, anchors, inside_inds)
                img_rpn_nms = draw_rpn_nms(
                    top_image, batch_top_rois1,
                    batch_top_roi_scores1)  # estimat after non-max
                imshow('img_rpn', img_rpn)
                imshow('img_rpn_nms', img_rpn_nms)
                cv2.waitKey(1)

                #draw rcnn results --------------------------------
                img_rcnn = draw_rcnn(top_image, batch_fuse_probs,
                                     batch_fuse_deltas, batch_top_rois)
                draw_projected_gt_boxes3d(rgb,
                                          boxes3d,
                                          color=(255, 255, 255),
                                          thickness=1)

                imshow('img_rcnn', img_rcnn)
                cv2.waitKey(1)

            # debug: ------------------------------------

            log.write('%d   | %0.5f   %0.5f  %0.5f   %0.5f : \n' %
                      (iter, batch_top_cls_loss, batch_top_reg_loss,
                       batch_fuse_cls_loss, batch_fuse_reg_loss))
コード例 #4
0
def run_train():

    # output dir, etc
    out_dir = './outputs'
    makedirs(out_dir +'/tf')
    makedirs(out_dir +'/check_points')
    log = Logger(out_dir+'/log_%s.txt'%(time.strftime('%Y-%m-%d %H:%M:%S')),mode='a')
    index=np.load(data_root+'seg/train_list.npy')
    index=sorted(index)
    index=np.array(index)
    num_frames = len(index)
    # pdb.set_trace()
    #lidar data -----------------
    if 1:
        ###generate anchor base 
        # ratios=np.array([0.4,0.6,1.7,2.4], dtype=np.float32)
        # scales=np.array([0.5,1,2,3],   dtype=np.float32)
        # bases = make_bases(
        #     base_size = 16,
        #     ratios=ratios,
        #     scales=scales
        # )
        ratios=np.array([1.7,2.4])
        scales=np.array([1.7,2.4])
        bases=np.array([[-19.5, -8, 19.5, 8],
                        [-8, -19.5, 8, 19.5],
                        [-5, -3, 5, 3],
                        [-3, -5, 3, 5]
                        ])
        # pdb.set_trace()
        num_bases = len(bases)
        stride = 4

        out_shape=(8,3)


        rgbs, tops, fronts, gt_labels, gt_boxes3d, top_imgs, front_imgs, rgbs_norm, image_index = load_dummy_datas(index[:3])
        # rgbs, tops, fronts, gt_labels, gt_boxes3d, top_imgs, front_imgs, rgbs_norm, image_index, lidars = load_dummy_datas()
        top_shape   = tops[0].shape
        front_shape = fronts[0].shape
        rgb_shape   = rgbs[0].shape
        top_feature_shape = ((top_shape[0]-1)//stride+1, (top_shape[1]-1)//stride+1)
        # pdb.set_trace()
        # set anchor boxes
        num_class = 2 #incude background
        anchors, inside_inds =  make_anchors(bases, stride, top_shape[0:2], top_feature_shape[0:2])
        # inside_inds = np.arange(0,len(anchors),dtype=np.int32)  #use all  #<todo>
        print ('out_shape=%s'%str(out_shape))
        print ('num_frames=%d'%num_frames)

        #-----------------------
        #check data
        if 0:
            fig = mlab.figure(figure=None, bgcolor=(0,0,0), fgcolor=None, engine=None, size=(1000, 500))
            draw_lidar(lidars[0], fig=fig)
            draw_gt_boxes3d(gt_boxes3d[0], fig=fig)
            mlab.show(1)
            cv2.waitKey(1)




    #load model ####################################################################################################
    top_anchors     = tf.placeholder(shape=[None, 4], dtype=tf.int32,   name ='anchors'    )
    top_inside_inds = tf.placeholder(shape=[None   ], dtype=tf.int32,   name ='inside_inds')

    top_images   = tf.placeholder(shape=[None, *top_shape  ], dtype=tf.float32, name='top'  )
    front_images = tf.placeholder(shape=[None, *front_shape], dtype=tf.float32, name='front')
    rgb_images   = tf.placeholder(shape=[None, None, None, 3 ], dtype=tf.float32, name='rgb'  )
    top_rois     = tf.placeholder(shape=[None, 5], dtype=tf.float32,   name ='top_rois'   ) #<todo> change to int32???
    front_rois   = tf.placeholder(shape=[None, 5], dtype=tf.float32,   name ='front_rois' )
    rgb_rois     = tf.placeholder(shape=[None, 5], dtype=tf.float32,   name ='rgb_rois'   )

    top_features, top_scores, top_probs, top_deltas, proposals, proposal_scores = \
        top_feature_net(top_images, top_anchors, top_inside_inds, num_bases)
    # pdb.set_trace()
    front_features = front_feature_net(front_images)
    rgb_features   = rgb_feature_net(rgb_images)

    fuse_scores, fuse_probs, fuse_deltas = \
        fusion_net(
			( [top_features,     top_rois,     6,6,1./stride],
			  [front_features,   front_rois,   0,0,1./stride],  #disable by 0,0
			  [rgb_features,     rgb_rois,     6,6,1./(2*stride)],),
            num_class, out_shape) #<todo>  add non max suppression



    #loss ########################################################################################################
    top_inds     = tf.placeholder(shape=[None   ], dtype=tf.int32,   name='top_ind'    )
    top_pos_inds = tf.placeholder(shape=[None   ], dtype=tf.int32,   name='top_pos_ind')
    top_labels   = tf.placeholder(shape=[None   ], dtype=tf.int32,   name='top_label'  )
    top_targets  = tf.placeholder(shape=[None, 4], dtype=tf.float32, name='top_target' )
    top_cls_loss, top_reg_loss = rpn_loss(2*top_scores, top_deltas, top_inds, top_pos_inds, top_labels, top_targets)

    fuse_labels  = tf.placeholder(shape=[None            ], dtype=tf.int32,   name='fuse_label' )
    fuse_targets = tf.placeholder(shape=[None, *out_shape], dtype=tf.float32, name='fuse_target')
    fuse_cls_loss, fuse_reg_loss = rcnn_loss(fuse_scores, fuse_deltas, fuse_labels, fuse_targets)
    tf.summary.scalar('rpn_cls_loss', top_cls_loss)
    tf.summary.scalar('rpn_reg_loss', top_reg_loss)
    tf.summary.scalar('rcnn_cls_loss', fuse_cls_loss)
    tf.summary.scalar('rcnn_reg_loss', fuse_reg_loss)

    #solver
    l2 = l2_regulariser(decay=0.000005)
    tf.summary.scalar('l2', l2)
    learning_rate = tf.placeholder(tf.float32, shape=[])
    solver = tf.train.AdamOptimizer(learning_rate)
    # solver = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
    #solver_step = solver.minimize(top_cls_loss+top_reg_loss+l2)
    solver_step = solver.minimize(1*top_cls_loss+1*top_reg_loss+1.5*fuse_cls_loss+2*fuse_reg_loss+l2)

    max_iter = 200000
    iter_debug=1

    # start training here  #########################################################################################
    log.write('epoch     iter    speed   rate   |  top_cls_loss   reg_loss   |  fuse_cls_loss  reg_loss  |  \n')
    log.write('-------------------------------------------------------------------------------------\n')

    num_ratios=len(ratios)
    num_scales=len(scales)
    fig, axs = plt.subplots(num_ratios,num_scales)

    merged = tf.summary.merge_all()

    sess = tf.InteractiveSession()
    train_writer = tf.summary.FileWriter( './outputs/tensorboard/Res_Vgg_up',
                                      sess.graph)
    with sess.as_default():
        sess.run( tf.global_variables_initializer(), { IS_TRAIN_PHASE : True } )
        # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        # summary_writer = tf.summary.FileWriter(out_dir+'/tf', sess.graph)
        saver  = tf.train.Saver() 
        saver.restore(sess, './outputs/check_points/snap_ResNet_vgg_up_NGT_060000.ckpt') 
        # # saver.restore(sess, './outputs/check_points/MobileNet.ckpt')  

        # var_lt_res=[v for v in tf.trainable_variables() if v.name.startswith('res')]#resnet_v1_50
        # # pdb.set_trace()
        # ## var_lt=[v for v in tf.trainable_variables() if not(v.name.startswith('fuse-block-1')) and not(v.name.startswith('fuse')) and not(v.name.startswith('fuse-input'))]

        # # # var_lt.pop(0)
        # # # var_lt.pop(0)
        # # # pdb.set_trace()
        # saver_0=tf.train.Saver(var_lt_res)        
        # # # 
        # saver_0.restore(sess, './outputs/check_points/resnet_v1_50.ckpt')
        # # pdb.set_trace()
        # top_lt=[v for v in tf.trainable_variables() if v.name.startswith('top_base')]
        # top_lt.pop(0)
        # # # top_lt.pop(0)
        # for v in top_lt:
        #     # pdb.set_trace()
        #     for v_rgb in var_lt:
        #         if v.name[9:]==v_rgb.name:
        #             print ("assign weights:%s"%v.name)
        #             v.assign(v_rgb)
        # var_lt_vgg=[v for v in tf.trainable_variables() if v.name.startswith('vgg')]
        # var_lt_vgg.pop(0)
        # saver_1=tf.train.Saver(var_lt_vgg)
        
        # # pdb.set_trace()
        # saver_1.restore(sess, './outputs/check_points/vgg_16.ckpt')

        batch_top_cls_loss =0
        batch_top_reg_loss =0
        batch_fuse_cls_loss=0
        batch_fuse_reg_loss=0
        rate=0.000005
        frame_range = np.arange(num_frames)
        idx=0
        frame=0
        for iter in range(max_iter):
            epoch=iter//num_frames+1
            # rate=0.001
            start_time=time.time()


            # generate train image -------------
            # idx = np.random.choice(num_frames)     #*10   #num_frames)  #0
            # shuffle the samples every 4*num_frames
            if iter%(num_frames*2)==0:
                idx=0
                frame=0
                count=0
                end_flag=0
                frame_range1 = np.random.permutation(num_frames)
                if np.all(frame_range1==frame_range):
                    raise Exception("Invalid level!", permutation)
                frame_range=frame_range1

            #load 500 samples every 2000 iterations
            freq=int(200)
            if idx%freq==0 :
                count+=idx
                if count%(2*freq)==0:
                    frame+=idx
                    frame_end=min(frame+freq,num_frames)
                    if frame_end==num_frames:
                        end_flag=1
                    # pdb.set_trace()
                    del rgbs, tops, fronts, gt_labels, gt_boxes3d, top_imgs, front_imgs, rgbs_norm, image_index
                    rgbs, tops, fronts, gt_labels, gt_boxes3d, top_imgs, front_imgs, rgbs_norm, image_index = load_dummy_datas(index[frame_range[frame:frame_end]])
                idx=0
            if (end_flag==1) and (idx+frame)==num_frames:
                idx=0
            print('processing image : %s'%image_index[idx])

            if (iter+1)%(10000)==0:
                rate=0.8*rate

            rgb_shape   = rgbs[idx].shape
            batch_top_images    = tops[idx].reshape(1,*top_shape)
            batch_front_images  = fronts[idx].reshape(1,*front_shape)
            batch_rgb_images    = rgbs_norm[idx].reshape(1,*rgb_shape)
            # batch_rgb_images    = rgbs[idx].reshape(1,*rgb_shape)

            top_img=tops[idx]
            # pdb.set_trace()
            inside_inds_filtered=anchor_filter(top_img[:,:,-1], anchors, inside_inds)

            # pdb.set_trace()
            batch_gt_labels    = gt_labels[idx]
            if len(batch_gt_labels)==0:
                # pdb.set_trace()
                idx=idx+1
                continue
            batch_gt_boxes3d   = gt_boxes3d[idx]
            # pdb.set_trace()
            batch_gt_top_boxes = box3d_to_top_box(batch_gt_boxes3d)




			## run propsal generation ------------
            fd1={
                top_images:      batch_top_images,
                top_anchors:     anchors,
                top_inside_inds: inside_inds_filtered,

                learning_rate:   rate,
                IS_TRAIN_PHASE:  True
            }
            batch_proposals, batch_proposal_scores, batch_top_features = sess.run([proposals, proposal_scores, top_features],fd1)
            print(batch_proposal_scores[:50])
            # pdb.set_trace()
            ## generate  train rois  ------------
            batch_top_inds, batch_top_pos_inds, batch_top_labels, batch_top_targets  = \
                rpn_target ( anchors, inside_inds_filtered, batch_gt_labels,  batch_gt_top_boxes)

            batch_top_rois, batch_fuse_labels, batch_fuse_targets  = \
                 rcnn_target(  batch_proposals, batch_gt_labels, batch_gt_top_boxes, batch_gt_boxes3d )

            batch_rois3d	 = project_to_roi3d    (batch_top_rois)
            batch_front_rois = project_to_front_roi(batch_rois3d  )
            batch_rgb_rois   = project_to_rgb_roi  (batch_rois3d  )


            # keep = np.where((batch_rgb_rois[:,1]>=-200) & (batch_rgb_rois[:,2]>=-200) & (batch_rgb_rois[:,3]<=(rgb_shape[1]+200)) & (batch_rgb_rois[:,4]<=(rgb_shape[0]+200)))[0]
            # batch_rois3d        = batch_rois3d[keep]      
            # batch_front_rois    = batch_front_rois[keep]
            # batch_rgb_rois      = batch_rgb_rois[keep]  
            # batch_proposal_scores=batch_proposal_scores[keep]
            # batch_top_rois      =batch_top_rois[keep]

            if len(batch_rois3d)==0:
                # pdb.set_trace()
                idx=idx+1
                continue




            ##debug gt generation
            if vis and iter%iter_debug==0:
                top_image = top_imgs[idx]
                rgb       = rgbs[idx]

                img_gt     = draw_rpn_gt(top_image, batch_gt_top_boxes, batch_gt_labels)
                img_label  = draw_rpn_labels (img_gt, anchors, batch_top_inds, batch_top_labels )
                img_target = draw_rpn_targets(top_image, anchors, batch_top_pos_inds, batch_top_targets)
                #imshow('img_rpn_gt',img_gt)
                imshow('img_anchor_label',img_label)
                #imshow('img_rpn_target',img_target)

                img_label  = draw_rcnn_labels (top_image, batch_top_rois, batch_fuse_labels )
                img_target = draw_rcnn_targets(top_image, batch_top_rois, batch_fuse_labels, batch_fuse_targets)
                #imshow('img_rcnn_label',img_label)
                if vis :
                    imshow('img_rcnn_target',img_target)


                img_rgb_rois = draw_boxes(rgb, batch_rgb_rois[:,1:5], color=(255,0,255), thickness=1)
                if vis :
                    imshow('img_rgb_rois',img_rgb_rois)
                    cv2.waitKey(1)

            ## run classification and regression loss -----------
            fd2={
				**fd1,

                top_images: batch_top_images,
                front_images: batch_front_images,
                rgb_images: batch_rgb_images,

				top_rois:   batch_top_rois,
                front_rois: batch_front_rois,
                rgb_rois:   batch_rgb_rois,

                top_inds:     batch_top_inds,
                top_pos_inds: batch_top_pos_inds,
                top_labels:   batch_top_labels,
                top_targets:  batch_top_targets,

                fuse_labels:  batch_fuse_labels,
                fuse_targets: batch_fuse_targets,
            }
            #_, batch_top_cls_loss, batch_top_reg_loss = sess.run([solver_step, top_cls_loss, top_reg_loss],fd2)


            _, batch_top_cls_loss, batch_top_reg_loss, batch_fuse_cls_loss, batch_fuse_reg_loss = \
               sess.run([solver_step, top_cls_loss, top_reg_loss, fuse_cls_loss, fuse_reg_loss],fd2)

            speed=time.time()-start_time
            log.write('%5.1f   %5d    %0.4fs   %0.4f   |   %0.5f   %0.5f   |   %0.5f   %0.5f  \n' %\
				(epoch, iter, speed, rate, batch_top_cls_loss, batch_top_reg_loss, batch_fuse_cls_loss, batch_fuse_reg_loss))



            #print('ok')
            # debug: ------------------------------------

            if vis and iter%iter_debug==0:
                top_image = top_imgs[idx]
                rgb       = rgbs[idx]

                batch_top_probs, batch_top_scores, batch_top_deltas  = \
                    sess.run([ top_probs, top_scores, top_deltas ],fd2)

                batch_fuse_probs, batch_fuse_deltas = \
                    sess.run([ fuse_probs, fuse_deltas ],fd2)

                #batch_fuse_deltas=0*batch_fuse_deltas #disable 3d box prediction
                probs, boxes3d = rcnn_nms(batch_fuse_probs, batch_fuse_deltas, batch_rois3d, threshold=0.05)


                ## show rpn score maps
                p = batch_top_probs.reshape( *(top_feature_shape[0:2]), 2*num_bases)
                for n in range(num_bases):
                    r=n%num_scales
                    s=n//num_scales
                    pn = p[:,:,2*n+1]*255
                    axs[s,r].cla()
                    if vis :
                        axs[s,r].imshow(pn, cmap='gray', vmin=0, vmax=255)
                        plt.pause(0.01)

				## show rpn(top) nms
                img_rpn     = draw_rpn    (top_image, batch_top_probs, batch_top_deltas, anchors, inside_inds)
                img_rpn_nms = draw_rpn_nms(img_gt, batch_proposals, batch_proposal_scores)
                #imshow('img_rpn',img_rpn)
                if vis :
                    imshow('img_rpn_nms',img_rpn_nms)
                    cv2.waitKey(1)

                ## show rcnn(fuse) nms
                img_rcnn     = draw_rcnn (top_image, batch_fuse_probs, batch_fuse_deltas, batch_top_rois, batch_rois3d,darker=1)
                img_rcnn_nms = draw_rcnn_nms(rgb, boxes3d, probs)
                if vis :
                    imshow('img_rcnn',img_rcnn)
                    imshow('img_rcnn_nms',img_rcnn_nms)
                    cv2.waitKey(0)
            if (iter)%10==0:
                summary = sess.run(merged,fd2)
                train_writer.add_summary(summary, iter)
            # save: ------------------------------------
            if (iter)%2000==0 and (iter!=0):
                #saver.save(sess, out_dir + '/check_points/%06d.ckpt'%iter)  #iter
                saver.save(sess, out_dir + '/check_points/snap_ResNet_vgg_NGT_%06d.ckpt'%iter)  #iter
                # saver.save(sess, out_dir + '/check_points/MobileNet.ckpt')  #iter
                # pdb.set_trace()
                pass

            idx=idx+1
コード例 #5
0
def run_train():
    CFG.KEEPPROBS = 0.5
    # output dir  for tensorboard, checkpoints and log
    out_dir = CFG.PATH.TRAIN.OUTPUT
    makedirs(out_dir + '/tf')
    makedirs(out_dir + '/check_points')
    makedirs(out_dir + '/log')
    log = Logger(out_dir + '/log/log_%s.txt' %
                 (time.strftime('%Y-%m-%d %H:%M:%S')),
                 mode='a')

    index = np.load(CFG.PATH.TRAIN.TARGET + '/train.npy')
    index = sorted(index)
    index = np.array(index)
    num_frames = len(index)

    #lidar data -----------------
    if 1:
        ###generate anchor base
        ratios_rgb = np.array([0.5, 1, 2], dtype=np.float32)
        scales_rgb = np.array([0.5, 1, 2, 4, 5], dtype=np.float32)
        bases_rgb = make_bases(base_size=48,
                               ratios=ratios_rgb,
                               scales=scales_rgb)

        num_bases_rgb = len(bases_rgb)
        stride = 8
        out_shape = (2, 2)

        rgbs, gt_labels, gt_3dTo2Ds, gt_boxes2d, rgbs_norm, image_index = load_dummy_datas(
            index[10:13])

        rgb_shape = rgbs[0].shape

        # set anchor boxes
        num_class = 2  #incude background
    #load model ####################################################################################################

    rgb_anchors = tf.placeholder(shape=[None, 4],
                                 dtype=tf.int32,
                                 name='anchors_rgb')
    rgb_inside_inds = tf.placeholder(shape=[None],
                                     dtype=tf.int32,
                                     name='inside_inds_rgb')

    rgb_images = tf.placeholder(shape=[None, None, None, 3],
                                dtype=tf.float32,
                                name='rgb')
    rgb_rois = tf.placeholder(shape=[None, 5],
                              dtype=tf.float32,
                              name='rgb_rois')

    rgb_features, rgb_scores, rgb_probs, rgb_deltas= \
        top_feature_net(rgb_images, rgb_anchors, rgb_inside_inds, num_bases_rgb)

    fuse_scores, fuse_probs, fuse_deltas, fuse_deltas_3dTo2D = \
        fusion_net(
            ( [rgb_features,     rgb_rois,     7,7,1./(1*stride)],),num_class, out_shape) #<todo>  add non max suppression

    #loss ########################################################################################################
    rgb_inds = tf.placeholder(shape=[None], dtype=tf.int32, name='rgb_ind')
    rgb_pos_inds = tf.placeholder(shape=[None],
                                  dtype=tf.int32,
                                  name='rgb_pos_ind')
    rgb_labels = tf.placeholder(shape=[None], dtype=tf.int32, name='rgb_label')
    rgb_targets = tf.placeholder(shape=[None, 4],
                                 dtype=tf.float32,
                                 name='rgb_target')
    rgb_cls_loss, rgb_reg_loss = rpn_loss(2 * rgb_scores, rgb_deltas, rgb_inds,
                                          rgb_pos_inds, rgb_labels,
                                          rgb_targets)

    fuse_labels = tf.placeholder(shape=[None],
                                 dtype=tf.int32,
                                 name='fuse_label')
    fuse_targets = tf.placeholder(shape=[None, 4],
                                  dtype=tf.float32,
                                  name='fuse_target')
    fuse_targets_3dTo2Ds = tf.placeholder(shape=[None, 16],
                                          dtype=tf.float32,
                                          name='fuse_target')

    fuse_cls_loss, fuse_reg_loss, fuse_reg_loss_3dTo2D = rcnn_loss(
        fuse_scores, fuse_deltas, fuse_labels, fuse_targets,
        fuse_deltas_3dTo2D, fuse_targets_3dTo2Ds)

    tf.summary.scalar('rcnn_cls_loss', fuse_cls_loss)
    tf.summary.scalar('rcnn_reg_loss', fuse_reg_loss)
    tf.summary.scalar('rcnn_reg_loss_3dTo2D', fuse_reg_loss_3dTo2D)
    tf.summary.scalar('rpn_cls_loss', rgb_cls_loss)
    tf.summary.scalar('rpn_reg_loss', rgb_reg_loss)

    #solver
    l2 = l2_regulariser(decay=CFG.TRAIN.WEIGHT_DECAY)
    tf.summary.scalar('l2', l2)
    learning_rate = tf.placeholder(tf.float32, shape=[])
    solver = tf.train.AdamOptimizer(learning_rate)
    solver_step = solver.minimize(2 * rgb_cls_loss + 1 * rgb_reg_loss +
                                  2 * fuse_cls_loss + 1 * fuse_reg_loss +
                                  0.5 * fuse_reg_loss_3dTo2D + l2)
    # 2*rgb_cls_loss+1*rgb_reg_loss+2*fuse_cls_loss+1*fuse_reg_loss+

    max_iter = 200000
    iter_debug = 1

    # start training here  #########################################################################################
    log.write(
        'epoch     iter    speed   rate   |  top_cls_loss   reg_loss   |  fuse_cls_loss  reg_loss  |  \n'
    )
    log.write(
        '-------------------------------------------------------------------------------------\n'
    )

    merged = tf.summary.merge_all()

    sess = tf.InteractiveSession()
    train_writer = tf.summary.FileWriter(
        './outputs/tensorboard/V_2dTo3d_finetune', sess.graph)
    with sess.as_default():
        sess.run(tf.global_variables_initializer(), {IS_TRAIN_PHASE: True})
        saver = tf.train.Saver()

        ##Initialize network ##

        ##Initialize network from relatively welll trained model
        # saver.restore(sess, './outputs/check_points/snap_2dTo3D__data_augmentation090000trainval.ckpt')

        #Initialize network from 2D Bounding Box  pretrained model whict trained on Last14000 datasets of  UISEE
        # var_lt_res=[v for v in tf.global_variables() if  not v.name.startswith('fuse/3D')]
        # saver_0=tf.train.Saver(var_lt_res)
        # saver_0.restore(sess, './outputs/check_points/snap_2D_pretrain.ckpt')

        ##Initialize network from ResNet50
        # var_lt_res=[v for v in tf.trainable_variables() if v.name.startswith('resnet_v1')]#resnet_v1_50
        # saver_0=tf.train.Saver(var_lt_res)
        # saver_0.restore(sess, './outputs/check_points/resnet_v1_50.ckpt')

        batch_top_cls_loss = 0
        batch_top_reg_loss = 0
        batch_fuse_cls_loss = 0
        batch_fuse_reg_loss = 0
        frame_range = np.arange(num_frames)
        idx = 0
        frame = 0

        for iter in range(max_iter):
            epoch = iter // num_frames + 1
            # rate=0.001
            start_time = time.time()
            if iter % (num_frames * 1) == 0:
                idx = 0
                frame = 0
                count = 0
                end_flag = 0
                frame_range1 = np.random.permutation(num_frames)
                if np.all(frame_range1 == frame_range):
                    raise Exception("Invalid level!", permutation)
                frame_range = frame_range1

            #load 500 samples every 2000 iterations
            freq = int(10)
            if idx % freq == 0:
                count += idx
                if count % (1 * freq) == 0:
                    frame += idx
                    frame_end = min(frame + freq, num_frames)
                    if frame_end == num_frames:
                        end_flag = 1
                    rgbs, gt_labels, gt_3dTo2Ds, gt_boxes2d, rgbs_norm, image_index = load_dummy_datas(
                        index[frame_range[frame:frame_end]])
                idx = 0
            if (end_flag == 1) and (idx + frame) == num_frames:
                idx = 0
            print('processing image : %s' % image_index[idx])

            if (iter + 1) % (CFG.TRAIN.LEARNING_RATE_DECAY_STEP) == 0:
                CFG.TRAIN.LEARNING_RATE = CFG.TRAIN.LEARNING_RATE_DECAY_SCALE * CFG.TRAIN.LEARNING_RATE

            rgb_shape = rgbs[idx].shape
            batch_rgb_images = rgbs_norm[idx].reshape(1, *rgb_shape)
            batch_gt_labels = gt_labels[idx]
            batch_gt_3dTo2Ds = gt_3dTo2Ds[idx]
            batch_gt_boxes2d = gt_boxes2d[idx]

            if len(batch_gt_labels) == 0:
                idx = idx + 1
                continue

            rgb_feature_shape = ((rgb_shape[0] - 1) // stride + 1,
                                 (rgb_shape[1] - 1) // stride + 1)
            anchors_rgb, inside_inds_rgb = make_anchors(
                bases_rgb, stride, rgb_shape[0:2], rgb_feature_shape[0:2])

            ## run propsal generation ------------
            fd1 = {
                rgb_images: batch_rgb_images,
                rgb_anchors: anchors_rgb,
                rgb_inside_inds: inside_inds_rgb,
                learning_rate: CFG.TRAIN.LEARNING_RATE,
                IS_TRAIN_PHASE: True,
            }
            batch_rgb_probs, batch_deltas, batch_rgb_features = sess.run(
                [rgb_probs, rgb_deltas, rgb_features], fd1)

            rpn_nms = rpn_nms_generator(
                stride,
                rgb_shape[1],
                rgb_shape[0],
                img_scale=1,
                nms_thresh=0.7,
                min_size=stride,
                nms_pre_topn=CFG.TRAIN.RPN_NMS_PRE_TOPN,
                nms_post_topn=CFG.TRAIN.RPN_NMS_POST_TOPN)
            batch_proposals, batch_proposal_scores = rpn_nms(
                batch_rgb_probs, batch_deltas, anchors_rgb, inside_inds_rgb)

            ## generate  train rois  ------------
            batch_rgb_inds, batch_rgb_pos_inds, batch_rgb_labels, batch_rgb_targets  = \
                rpn_target ( anchors_rgb, inside_inds_rgb, batch_gt_labels,  batch_gt_boxes2d)

            batch_rgb_rois, batch_fuse_labels, batch_fuse_targets2d, batch_fuse_targets_3dTo2Ds = rcnn_target(
                batch_proposals, batch_gt_labels, batch_gt_boxes2d,
                batch_gt_3dTo2Ds, rgb_shape[1], rgb_shape[0])

            print('nums of rcnn batch: %d' % len(batch_rgb_rois))
            ##debug gt generation
            if CFG.TRAIN.VISUALIZATION and iter % iter_debug == 0:
                rgb = rgbs[idx]

                img_gt = draw_rpn_gt(rgb, batch_gt_boxes2d, batch_gt_labels)
                rgb_label = draw_rpn_labels(img_gt, anchors_rgb,
                                            batch_rgb_inds, batch_rgb_labels)
                rgb_target = draw_rpn_targets(rgb, anchors_rgb,
                                              batch_rgb_pos_inds,
                                              batch_rgb_targets)
                #imshow('img_rpn_gt',img_gt)
                imshow('img_rgb_label', rgb_label)
                imshow('img_rpn_target', rgb_target)

                img_label = draw_rcnn_labels(rgb, batch_rgb_rois,
                                             batch_fuse_labels)
                img_target = draw_rcnn_targets(rgb, batch_rgb_rois,
                                               batch_fuse_labels,
                                               batch_fuse_targets2d)
                imshow('img_rcnn_label', img_label)
                imshow('img_rcnn_target', img_target)

                img_rgb_rois = draw_boxes(rgb,
                                          batch_rgb_rois[:, 1:5],
                                          color=(255, 0, 255),
                                          thickness=1)
                imshow('img_rgb_rois', img_rgb_rois)

                projections = box_transform_3dTo2D_inv(
                    batch_rgb_rois[:, 1:], batch_fuse_targets_3dTo2Ds)
                img_rcnn_3dTo2D = draw_rgb_projections(rgb,
                                                       projections,
                                                       color=(0, 0, 255),
                                                       thickness=1)
                imshow('img_rcnn_3dTo2D', img_rcnn_3dTo2D)
                # plt.pause(0.5)
                # cv2.waitKey(500)
                cv2.waitKey(0)

            ## run classification and regression loss -----------
            fd2 = {
                **fd1, rgb_images: batch_rgb_images,
                rgb_rois: batch_rgb_rois,
                rgb_inds: batch_rgb_inds,
                rgb_pos_inds: batch_rgb_pos_inds,
                rgb_labels: batch_rgb_labels,
                rgb_targets: batch_rgb_targets,
                fuse_labels: batch_fuse_labels,
                fuse_targets: batch_fuse_targets2d,
                fuse_targets_3dTo2Ds: batch_fuse_targets_3dTo2Ds
            }

            _, rcnn_probs, batch_rgb_cls_loss, batch_rgb_reg_loss, batch_fuse_cls_loss, batch_fuse_reg_loss, batch_fuse_reg_loss_dTo2D = \
               sess.run([solver_step, fuse_probs, rgb_cls_loss, rgb_reg_loss, fuse_cls_loss, fuse_reg_loss, fuse_reg_loss_3dTo2D],fd2)

            speed = time.time() - start_time
            log.write('%5.1f   %5d    %0.4fs   %0.6f   |   %0.5f   %0.5f   |   %0.5f   %0.5f  |%0.5f   \n' %\
                (epoch, iter, speed, CFG.TRAIN.LEARNING_RATE, batch_rgb_cls_loss, batch_rgb_reg_loss, batch_fuse_cls_loss, batch_fuse_reg_loss, batch_fuse_reg_loss_dTo2D))

            if (iter) % 10 == 0:
                summary = sess.run(merged, fd2)
                train_writer.add_summary(summary, iter)
            # save: ------------------------------------

            if (iter) % 5000 == 0 and (iter != 0):
                saver.save(sess, out_dir + '/check_points/' +
                           CFG.PATH.TRAIN.CHECKPOINT_NAME +
                           '%06d.ckpt' % iter)  #iter
                # saver_rgb.save(sess, out_dir + '/check_points/pretrained_Res_rgb_model%06d.ckpt'%iter)
                # saver_top.save(sess, out_dir + '/check_points/pretrained_Res_top_model%06d.ckpt'%iter)
                # pdb.set_trace()

            idx = idx + 1