예제 #1
0
def plot_scene_with_3DBoxes(scene_res_dirs,dataset_name='tless',scene_id=1,save=False):

    # sixd_img_path = '/home_local/sund_ma/data/linemod_dataset/test'
    # model_path = '/home_local/sund_ma/data/linemod_dataset/models'
    



    # inout.save_results_sixd17(res_path, preds, run_time=run_time)


    # obj_gts = []
    # obj_infos = []
    # for object_id in xrange(1,noofobjects+1):
    #     obj_gts.append(inout.load_gt(os.path.join(sixd_img_path,'{:02d}'.format(object_id),'gt.yml')))
    #     obj_infos.append(inout.load_info(os.path.join(sixd_img_path,'{:02d}'.format(object_id),'info.yml')))
    #     print len(obj_gts)

    # dataset_name = eval_args.get('DATA','DATASET')
    # cam_type = eval_args.get('DATA','CAM_TYPE')

    # data_params = dataset_params.get_dataset_params(dataset_name, model_type='', train_type='', test_type='primesense', cam_type='primesense')
    data_params = dataset_params.get_dataset_params(dataset_name, model_type='', train_type='')


    models_cad_files = sorted(glob.glob(os.path.join(os.path.dirname(data_params['model_mpath']),'*.ply')))
    W,H = data_params['test_im_size']

    from auto_pose.meshrenderer import box3d_renderer
    renderer_line = box3d_renderer.Renderer(
        models_cad_files, 
        1,
        W,
        H
    )

    scene_result_dirs = sorted(glob.glob(scene_res_dirs))
    print data_params['test_rgb_mpath']
    print data_params['scene_gt_mpath']

    # for scene_id in xrange(1,21):
        # sixd_img_path = data_params['test_rgb_mpath'].format(scene_id)
    scene_gts = inout.load_gt(data_params['scene_gt_mpath'].format(scene_id))
    scene_infos = inout.load_info(data_params['scene_info_mpath'].format(scene_id))

    scene_dirs = [d for d in scene_result_dirs if '%02d' % scene_id == d.split('/')[-1]]
    print scene_dirs

    for view in xrange(len(scene_infos)):
        sixd_img_path = data_params['test_rgb_mpath'].format(scene_id,view)
        img = cv2.imread(sixd_img_path)
        box_img = img.copy()
        # cv2.imshow('',img)
        # cv2.waitKey(0)
        K = scene_infos[view]['cam_K']

        for bb in scene_gts[view]:

            xmin = int(bb['obj_bb'][0])
            ymin = int(bb['obj_bb'][1])
            xmax = int(bb['obj_bb'][0]+bb['obj_bb'][2])
            ymax = int(bb['obj_bb'][1]+bb['obj_bb'][3])

            cv2.rectangle(box_img, (xmin,ymin),(xmax,ymax), (0,255,0), 2)
            cv2.putText(box_img, '%s' % (bb['obj_id']), (xmin, ymax+20), cv2.FONT_ITALIC, .5, (0,255,0), 2)
        # for gt in scene_gts[view]:
            # if gt['obj_id'] not in [1,8,9]:
            #     lines_gt = renderer_line.render(gt['obj_id']-1,K,gt['cam_R_m2c'],gt['cam_t_m2c'],10,5000)
            #     lines_gt_mask = (np.sum(lines_gt,axis=2) < 20)[:,:,None]
            #     print lines_gt.shape
            #     lines_gt = lines_gt[:,:,[1,0,2]]
            #     img = lines_gt_mask*img + lines_gt

        for scene_dir in scene_dirs: 
            try:
                res_path = glob.glob(os.path.join(scene_dir,'%04d_*.yml' % (view)))
                print res_path
                res_path = res_path[0]
                # print 'here', res_path
                obj_id = int(res_path.split('_')[-1].split('.')[0])
                results = inout.load_results_sixd17(res_path)
                print results
                e = results['ests'][0]
                R_est = e['R']
                t_est = e['t']
                K = scene_infos[view]['cam_K']
                lines = renderer_line.render(obj_id-1,K,R_est,t_est,10,5000)
                lines_mask = (np.sum(lines,axis=2) < 20)[:,:,None]
                # img[lines>0] = lines[lines>0]
                if obj_id % 7 == 1:
                    lines[:,:,0] = lines[:,:,1] 
                elif obj_id % 7 == 2:
                    lines[:,:,2] = lines[:,:,1]
                elif obj_id % 7 == 3:
                    lines[:,:,0] = lines[:,:,1]
                    lines[:,:,1] = lines[:,:,2]

                img = lines_mask*img + lines
            except:
                print 'undeteceted obj: ', scene_dir
        cv2.imshow('',img)
        if cv2.waitKey(1) == 32:
            cv2.waitKey(0)
        if save:
            if 'icp' in scene_res_dirs:

                if not os.path.exists('%02d' % scene_id):
                    os.makedirs('%02d' % scene_id)
                cv2.imwrite(os.path.join('%02d' % scene_id,'%04d.png' % view), img)
            else:

                if not os.path.exists('%02d_rgb' % scene_id):
                    os.makedirs('%02d_rgb' % scene_id)
                cv2.imwrite(os.path.join('%02d_rgb' % scene_id,'%04d.png' % view), img)
예제 #2
0
def main():
    '''
    lxc:
    use_euclidean means the similarity between test embedding and template embedding 
    are computed using Euclidean Distance
    '''
    #use_euclidean = False

    parser = argparse.ArgumentParser()

    parser.add_argument('experiment_name')
    parser.add_argument('evaluation_name')
    parser.add_argument('--eval_cfg', default='eval.cfg', required=False)
    parser.add_argument('--at_step', default=None, required=False)
    arguments = parser.parse_args()
    full_name = arguments.experiment_name.split('/')
    experiment_name = full_name.pop()
    experiment_group = full_name.pop() if len(full_name) > 0 else ''
    evaluation_name = arguments.evaluation_name
    eval_cfg = arguments.eval_cfg
    at_step = arguments.at_step

    workspace_path = os.environ.get('AE_WORKSPACE_PATH')
    train_cfg_file_path = u.get_config_file_path(workspace_path,
                                                 experiment_name,
                                                 experiment_group)
    eval_cfg_file_path = u.get_eval_config_file_path(workspace_path,
                                                     eval_cfg=eval_cfg)

    train_args = configparser.ConfigParser()
    eval_args = configparser.ConfigParser()
    train_args.read(train_cfg_file_path)
    eval_args.read(eval_cfg_file_path)

    #[DATA]
    # target data params
    dataset_name = eval_args.get('DATA', 'DATASET')
    obj_id = eval_args.getint('DATA', 'OBJ_ID')
    scenes = eval(eval_args.get(
        'DATA', 'SCENES')) if len(eval(eval_args.get(
            'DATA',
            'SCENES'))) > 0 else eval_utils.get_all_scenes_for_obj(eval_args)
    cam_type = eval_args.get('DATA', 'cam_type')
    model_type = 'reconst' if dataset_name == 'tless' else ''  # model_type set to reconst only for tless.

    data_params = dataset_params.get_dataset_params(dataset_name,
                                                    model_type=model_type,
                                                    train_type='',
                                                    test_type=cam_type,
                                                    cam_type=cam_type)
    target_models_info = inout.load_yaml(
        data_params['models_info_path'])  # lxc

    # source data params, lxc
    source_dataset_name = 'toyotalight'
    # source_dataset_name = train_args.get('DATA','DATASET') # TODO train args no section DATA
    # source_obj_id = train_args.getint('DATA','OBJ_ID') # TODO train args no section DATA
    source_obj_id = int(train_cfg_file_path[-6:-4])  # TODO workaround
    source_data_params = dataset_params.get_dataset_params(source_dataset_name,
                                                           model_type='',
                                                           train_type='',
                                                           test_type='',
                                                           cam_type='')
    # for tless temporarily.
    # source_data_params = dataset_params.get_dataset_params(source_dataset_name, model_type='', train_type='', test_type='kinect', cam_type='kinect')
    source_models_info = inout.load_yaml(
        source_data_params['models_info_path'])
    print("source_models_info_path:", source_data_params['models_info_path'])
    # 'diameter' is not equal to sqrt(x^2+y^2+z^2) for hinterstoisser, rutgers, tless, tejaniDB. etc.
    # for toyotalight, 'diameter' == sqrt(...).
    target_models_3Dlength = np.linalg.norm([
        target_models_info[obj_id][key]
        for key in ['size_x', 'size_y', 'size_z']
    ])
    source_models_3Dlength = np.linalg.norm([
        source_models_info[source_obj_id][key]
        for key in ['size_x', 'size_y', 'size_z']
    ])

    target_source_length_ratio = target_models_3Dlength / source_models_3Dlength
    print("target_source_length_ratio:", target_source_length_ratio)
    print("source id {:02d}, target id {:02d}".format(source_obj_id, obj_id))
    print('basepath: ', data_params['base_path'])
    #[BBOXES]
    estimate_bbs = eval_args.getboolean('BBOXES', 'ESTIMATE_BBS')
    #[METRIC]
    top_nn = eval_args.getint('METRIC', 'TOP_N')
    #[EVALUATION]
    icp = eval_args.getboolean('EVALUATION', 'ICP')

    evaluation_name = evaluation_name + '_icp' if icp else evaluation_name
    evaluation_name = evaluation_name + '_bbest' if estimate_bbs else evaluation_name

    data = dataset_name + '_' + cam_type if len(cam_type) > 0 else dataset_name

    log_dir = u.get_log_dir(workspace_path, experiment_name, experiment_group)
    ckpt_dir = u.get_checkpoint_dir(log_dir)
    eval_dir = u.get_eval_dir(log_dir, evaluation_name, data)

    # if eval_args.getboolean('EVALUATION','EVALUATE_ERRORS'):
    #     eval_loc.match_and_eval_performance_scores(eval_args, eval_dir)
    #     exit()

    if not os.path.exists(eval_dir):
        os.makedirs(eval_dir)
    shutil.copy2(eval_cfg_file_path, eval_dir)

    print "eval_args: ", eval_args

    codebook, dataset, decoder = factory.build_codebook_from_name(
        experiment_name,
        experiment_group,
        return_dataset=True,
        return_decoder=True)
    dataset.renderer
    gpu_options = tf.GPUOptions(allow_growth=True,
                                per_process_gpu_memory_fraction=0.5)
    config = tf.ConfigProto(gpu_options=gpu_options)

    sess = tf.Session(config=config)
    factory.restore_checkpoint(sess,
                               tf.train.Saver(),
                               ckpt_dir,
                               at_step=at_step)

    if estimate_bbs:
        #Object Detection, seperate from main
        # sys.path.append('/net/rmc-lx0050/home_local/sund_ma/src/SSD_Tensorflow')
        # from ssd_detector import SSD_detector
        # #TODO: set num_classes, network etc.
        # ssd = SSD_detector(sess, num_classes=31, net_shape=(300,300))
        from rmcssd.bin import detector
        ssd = detector.Detector(eval_args.get('BBOXES', 'CKPT'))

    t_errors = []
    R_errors = []
    all_test_visibs = []

    test_embeddings = []
    for scene_id in scenes:

        test_imgs = eval_utils.load_scenes(scene_id, eval_args)
        test_imgs_depth = eval_utils.load_scenes(
            scene_id, eval_args, depth=True) if icp else None

        if estimate_bbs:
            print eval_args.get('BBOXES', 'EXTERNAL')
            if eval_args.get('BBOXES', 'EXTERNAL') == 'False':
                bb_preds = {}
                for i, img in enumerate(test_imgs):
                    print img.shape
                    bb_preds[i] = ssd.detectSceneBBs(img,
                                                     min_score=.2,
                                                     nms_threshold=.45)
                # inout.save_yaml(os.path.join(scene_res_dir,'bb_preds.yml'), bb_preds)
                print bb_preds
            else:
                bb_preds = inout.load_yaml(
                    os.path.join(eval_args.get('BBOXES', 'EXTERNAL'),
                                 '{:02d}.yml'.format(scene_id)))

            test_img_crops, test_img_depth_crops, bbs, bb_scores, visibilities = eval_utils.generate_scene_crops(
                test_imgs, test_imgs_depth, bb_preds, eval_args, train_args)
        else:
            # test_img_crops: each crop contains some bbox(es) for specified object id.
            test_img_crops, test_img_depth_crops, bbs, bb_scores, visibilities = eval_utils.get_gt_scene_crops(
                scene_id, eval_args, train_args)

        if len(test_img_crops) == 0:
            print 'ERROR: object %s not in scene %s' % (obj_id, scene_id)
            exit()

        info = inout.load_info(
            data_params['scene_info_mpath'].format(scene_id))
        Ks_test = [np.array(v['cam_K']).reshape(3, 3) for v in info.values()]

        ######remove
        gts = inout.load_gt(data_params['scene_gt_mpath'].format(scene_id))
        visib_gts = inout.load_yaml(data_params['scene_gt_stats_mpath'].format(
            scene_id, 15))
        #######
        W_test, H_test = data_params['test_im_size']

        icp_renderer = icp_utils.SynRenderer(train_args) if icp else None
        noof_scene_views = eval_utils.noof_scene_views(scene_id, eval_args)

        test_embeddings.append([])

        scene_res_dir = os.path.join(
            eval_dir, '{scene_id:02d}'.format(scene_id=scene_id))
        if not os.path.exists(scene_res_dir):
            os.makedirs(scene_res_dir)

        for view in xrange(
                noof_scene_views
        ):  # for example, LINEMOD ape noof_scene_views = 1236
            try:
                # only a specified object id is selected throughout the whole scene views.
                test_crops, test_crops_depth, test_bbs, test_scores, test_visibs = eval_utils.select_img_crops(
                    test_img_crops[view][obj_id],
                    test_img_depth_crops[view][obj_id] if icp else None,
                    bbs[view][obj_id], bb_scores[view][obj_id],
                    visibilities[view][obj_id], eval_args)
            except:
                print 'no detections'
                continue

            print view
            preds = {}
            pred_views = []
            all_test_visibs.append(test_visibs[0])
            t_errors_crop = []
            R_errors_crop = []

            for i, (test_crop, test_bb, test_score) in enumerate(
                    zip(test_crops, test_bbs, test_scores)):
                # each test_crop is a ground truth patch
                if train_args.getint('Dataset', 'C') == 1:
                    test_crop = cv2.cvtColor(test_crop,
                                             cv2.COLOR_BGR2GRAY)[:, :, None]
                start = time.time()
                '''modify here to change the pose estimation algorithm. lxc'''

                Rs_est, ts_est = codebook.auto_pose6d(
                    sess,
                    test_crop,
                    test_bb,
                    Ks_test[view].copy(),
                    top_nn,
                    train_args,
                    target_source_length_ratio=target_source_length_ratio)
                ae_time = time.time() - start
                run_time = ae_time + bb_preds[view][0][
                    'det_time'] if estimate_bbs else ae_time

                if eval_args.getboolean('PLOT', 'EMBEDDING_PCA'):
                    test_embeddings[-1].append(
                        codebook.test_embedding(sess,
                                                test_crop,
                                                normalized=True))

                # icp = False if view<350 else True
                #TODO:
                Rs_est_old, ts_est_old = Rs_est.copy(), ts_est.copy()
                for p in xrange(top_nn):
                    if icp:
                        start = time.time()
                        # icp only along tz
                        R_est_refined, t_est_refined = icp_utils.icp_refinement(
                            test_crops_depth[i],
                            icp_renderer,
                            Rs_est[p],
                            ts_est[p],
                            Ks_test[view].copy(), (W_test, H_test),
                            depth_only=True,
                            max_mean_dist_factor=5.0)
                        print ts_est[p]
                        print t_est_refined
                        # x,y update,does not change tz:
                        _, ts_est_refined = codebook.auto_pose6d(
                            sess,
                            test_crop,
                            test_bb,
                            Ks_test[view].copy(),
                            top_nn,
                            train_args,
                            depth_pred=t_est_refined[2])
                        # commented by lxc
                        # _, ts_est_refined, _ = codebook.auto_pose6d(sess, test_crop, test_bb, Ks_test[view].copy(), top_nn, train_args,depth_pred=t_est_refined[2])
                        t_est_refined = ts_est_refined[p]
                        # rotation icp, only accepted if below 20 deg change
                        R_est_refined, _ = icp_utils.icp_refinement(
                            test_crops_depth[i],
                            icp_renderer,
                            R_est_refined,
                            t_est_refined,
                            Ks_test[view].copy(), (W_test, H_test),
                            no_depth=True)
                        print Rs_est[p]
                        print R_est_refined
                        icp_time = time.time() - start
                        Rs_est[p], ts_est[p] = R_est_refined, t_est_refined
                    preds.setdefault('ests', []).append({
                        'score': test_score,
                        'R': Rs_est[p],
                        't': ts_est[p]
                    })
                run_time = run_time + icp_time if icp else run_time

                min_t_err, min_R_err = eval_plots.print_trans_rot_errors(
                    gts[view], obj_id, ts_est, ts_est_old, Rs_est, Rs_est_old)
                t_errors_crop.append(min_t_err)
                R_errors_crop.append(min_R_err)

                if eval_args.getboolean('PLOT', 'RECONSTRUCTION'):
                    eval_plots.plot_reconstruction_test(
                        sess, codebook._encoder, decoder, test_crop)
                    # eval_plots.plot_reconstruction_train(sess, decoder, nearest_train_codes[0])
                if eval_args.getboolean('PLOT',
                                        'NEAREST_NEIGHBORS') and not icp:
                    for R_est, t_est in zip(Rs_est, ts_est):
                        pred_views.append(
                            dataset.render_rot(R_est, downSample=2))
                    eval_plots.show_nearest_rotation(pred_views, test_crop,
                                                     view)
                if eval_args.getboolean('PLOT', 'SCENE_WITH_ESTIMATE'):
                    eval_plots.plot_scene_with_estimate(
                        test_imgs[view].copy(),
                        icp_renderer.renderer if icp else dataset.renderer,
                        Ks_test[view].copy(), Rs_est_old[0], ts_est_old[0],
                        Rs_est[0], ts_est[0], test_bb, test_score, obj_id,
                        gts[view], bb_preds[view] if estimate_bbs else None)

                if cv2.waitKey(1) == 32:
                    cv2.waitKey(0)

            t_errors.append(t_errors_crop[np.argmin(
                np.linalg.norm(np.array(t_errors_crop), axis=1))])
            R_errors.append(R_errors_crop[np.argmin(
                np.linalg.norm(np.array(t_errors_crop), axis=1))])

            # save predictions in sixd format
            res_path = os.path.join(scene_res_dir,
                                    '%04d_%02d.yml' % (view, obj_id))
            inout.save_results_sixd17(res_path, preds, run_time=run_time)

    if not os.path.exists(os.path.join(eval_dir, 'latex')):
        os.makedirs(os.path.join(eval_dir, 'latex'))
    if not os.path.exists(os.path.join(eval_dir, 'figures')):
        os.makedirs(os.path.join(eval_dir, 'figures'))
    '''evaluation code
        dataset_renderer renders source object model for evaluation;
        If we need target object model for evaluation, go get a new renderer.
    '''

    if eval_args.getboolean('EVALUATION', 'COMPUTE_ERRORS'):
        eval_calc_errors.eval_calc_errors(eval_args,
                                          eval_dir,
                                          dataset_renderer=dataset.renderer)
    if eval_args.getboolean('EVALUATION', 'EVALUATE_ERRORS'):
        eval_loc.match_and_eval_performance_scores(eval_args, eval_dir)
    '''plot code'''
    cyclo = train_args.getint('Embedding', 'NUM_CYCLO')
    if eval_args.getboolean('PLOT', 'EMBEDDING_PCA'):
        embedding = sess.run(codebook.embedding_normalized)
        eval_plots.compute_pca_plot_embedding(eval_dir,
                                              embedding[::cyclo],
                                              np.array(test_embeddings[0]),
                                              obj_id=obj_id)
    if eval_args.getboolean('PLOT', 'VIEWSPHERE'):
        eval_plots.plot_viewsphere_for_embedding(
            dataset.viewsphere_for_embedding[::cyclo], eval_dir, obj_id=obj_id)
    if eval_args.getboolean('PLOT', 'CUM_T_ERROR_HIST'):
        eval_plots.plot_t_err_hist(np.array(t_errors), eval_dir, obj_id=obj_id)
        eval_plots.plot_t_err_hist2(np.array(t_errors),
                                    eval_dir,
                                    obj_id=obj_id)
    if eval_args.getboolean('PLOT', 'CUM_R_ERROR_HIST'):
        eval_plots.plot_R_err_hist(eval_args, eval_dir, scenes)
        eval_plots.plot_R_err_hist2(np.array(R_errors),
                                    eval_dir,
                                    obj_id=obj_id)
    if eval_args.getboolean('PLOT', 'CUM_VSD_ERROR_HIST'):
        eval_plots.plot_vsd_err_hist(eval_args, eval_dir, scenes)
    if eval_args.getboolean('PLOT', 'VSD_OCCLUSION'):
        eval_plots.plot_vsd_occlusion(eval_args, eval_dir, scenes,
                                      np.array(all_test_visibs))
    if eval_args.getboolean('PLOT', 'R_ERROR_OCCLUSION'):
        eval_plots.plot_re_rect_occlusion(eval_args, eval_dir, scenes,
                                          np.array(all_test_visibs))
    if eval_args.getboolean('PLOT', 'ANIMATE_EMBEDDING_PCA'):
        eval_plots.animate_embedding_path(test_embeddings[0])
    if eval_args.getboolean('PLOT', 'RECONSTRUCTION_TEST_BATCH'):
        eval_plots.plot_reconstruction_test_batch(sess,
                                                  codebook,
                                                  decoder,
                                                  test_img_crops,
                                                  noof_scene_views,
                                                  obj_id,
                                                  eval_dir=eval_dir)
        # plt.show()

        # calculate 6D pose errors
        # print 'exiting ...'
        # eval_calc_errors.eval_calc_errors(eval_args, eval_dir)
        # calculate 6D pose errors

    report = latex_report.Report(eval_dir, log_dir)
    report.write_configuration(train_cfg_file_path, eval_cfg_file_path)
    report.merge_all_tex_files()
    report.include_all_figures()
    report.save(open_pdf=False)
def eval_calc_errors(eval_args, eval_dir):
    # Results for which the errors will be calculated
    #-------------------------------------------------------------------------------

    # result_base = '/path/to/results/'
    # result_paths = [
    #     pjoin(result_base, 'hodan-iros15_hinterstoisser'),
    #     # pjoin(result_base, 'hodan-iros15_tless_primesense'),
    # ]

    #[METHOD]
    method = eval_args.get('METHOD', 'METHOD')

    #[DATA]
    dataset = eval_args.get('DATA', 'DATASET')
    test_type = eval_args.get('DATA', 'CAM_TYPE')

    #[METRIC]
    # Top N pose estimates (with the highest score) to be evaluated for each
    # object in each image
    n_top = eval_args.getint(
        'EVALUATION', 'TOP_N_EVAL'
    )  # 0 = all estimates, -1 = given by the number of GT poses
    n_top_str = eval_args.getint('METRIC', 'TOP_N')
    # Pose error function
    error_types = eval(eval_args.get(
        'METRIC', 'ERROR_TYPE'))  # 'vsd', 'adi', 'add', 'cou', 're', 'te'
    # VSD parameters
    vsd_delta = eval_args.getint('METRIC', 'VSD_DELTA')
    vsd_tau = eval_args.getint('METRIC', 'VSD_TAU')
    vsd_cost = eval_args.get('METRIC', 'VSD_COST')  # 'step', 'tlinear'

    result_path = eval_dir
    print('Processing: ' + result_path)
    # Other paths
    #-------------------------------------------------------------------------------

    for error_type in error_types:

        # Mask of path to the output file with calculated errors
        # errors_mpath = pjoin('{result_path}', '..', '..', 'eval', '{result_name}',
        #                      '{error_sign}', 'errors_{scene_id:02d}.yml')
        errors_mpath = '{result_path}/{error_sign}/errors_{scene_id:02d}.yml'

        # Error signature
        error_sign = 'error=' + error_type + '_ntop=' + str(n_top_str)
        if error_type == 'vsd':
            error_sign += '_delta={}_tau={}_cost={}'.format(
                vsd_delta, vsd_tau, vsd_cost)

        # Error calculation
        #-------------------------------------------------------------------------------

        # Select data type
        if dataset == 'tless':
            cam_type = test_type
            if error_type in ['adi', 'add']:
                model_type = 'cad_subdivided'
            else:
                model_type = 'cad'
        else:
            model_type = ''
            cam_type = ''

        # Load dataset parameters
        dp = get_dataset_params(dataset,
                                model_type=model_type,
                                test_type=test_type,
                                cam_type=cam_type)

        # Load object models
        if error_type in ['vsd', 'add', 'adi', 'cou', 'proj', 'projamb']:
            print('Loading object models...')
            models = {}
            for obj_id in range(1, dp['obj_count'] + 1):
                models[obj_id] = inout.load_ply(
                    dp['model_mpath'].format(obj_id))

        test_sensor = pjoin(dp['base_path'], dp['test_dir'])
        # Directories with results for individual scenes
        scene_dirs = sorted([
            d for d in glob.glob(os.path.join(result_path, '*'))
            if os.path.isdir(d) and os.path.basename(d).isdigit()
        ])
        print scene_dirs

        for scene_dir in scene_dirs:
            scene_id = int(os.path.basename(scene_dir))

            # Load info and GT poses for the current scene
            scene_info = inout.load_info(
                dp['scene_info_mpath'].format(scene_id))
            scene_gt = inout.load_gt(dp['scene_gt_mpath'].format(scene_id))

            res_paths = sorted(glob.glob(os.path.join(scene_dir, '*.yml')))

            errs = []
            im_id = -1
            depth_im = None
            for res_id, res_path in enumerate(res_paths):
                # t = time.time()

                # Parse image ID and object ID from the filename
                filename = os.path.basename(res_path).split('.')[0]
                im_id_prev = im_id
                im_id, obj_id = map(int, filename.split('_'))

                if res_id % 10 == 0:
                    dataset_str = dataset
                    if test_type != '':
                        dataset_str += ' - {}'.format(test_type)
                    print('Calculating error: {}, {}, {}, {}, {}, {}'.format(
                        error_type, method, dataset_str, scene_id, im_id,
                        obj_id))

                # Load depth image if VSD is selected
                if error_type == 'vsd' and im_id != im_id_prev:
                    depth_path = dp['test_depth_mpath'].format(scene_id, im_id)
                    # depth_im = inout.load_depth(depth_path)
                    depth_im = inout.load_depth2(depth_path)  # Faster
                    depth_im *= dp['cam']['depth_scale']  # to [mm]

                # Load camera matrix
                if error_type in ['vsd', 'cou', 'proj', 'projamb']:
                    K = scene_info[im_id]['cam_K']

                # Load pose estimates
                res = inout.load_results_sixd17(res_path)
                ests = res['ests']

                # Sort the estimates by score (in descending order)
                ests_sorted = sorted(enumerate(ests),
                                     key=lambda x: x[1]['score'],
                                     reverse=True)

                # Select the required number of top estimated poses
                if n_top == 0:  # All estimates are considered
                    n_top_curr = None
                elif n_top == -1:  # Given by the number of GT poses
                    n_gt = sum(
                        [gt['obj_id'] == obj_id for gt in scene_gt[im_id]])
                    n_top_curr = n_gt
                else:
                    n_top_curr = n_top
                ests_sorted = ests_sorted[slice(0, n_top_curr)]

                for est_id, est in ests_sorted:
                    est_errs = []
                    R_e = est['R']
                    t_e = est['t']

                    errs_gts = {}  # Errors w.r.t. GT poses of the same object
                    for gt_id, gt in enumerate(scene_gt[im_id]):
                        if gt['obj_id'] != obj_id:
                            continue

                        e = -1.0
                        R_g = gt['cam_R_m2c']
                        t_g = gt['cam_t_m2c']

                        if error_type == 'vsd':
                            e = pose_error.vsd(R_e, t_e, R_g, t_g,
                                               models[obj_id], depth_im, K,
                                               vsd_delta, vsd_tau, vsd_cost)
                        elif error_type == 'add':
                            e = pose_error.add(R_e, t_e, R_g, t_g,
                                               models[obj_id])
                        elif error_type == 'adi':
                            e = pose_error.adi(R_e, t_e, R_g, t_g,
                                               models[obj_id])
                        elif error_type == 'proj':
                            e = pose_error.arp_2d(R_e, t_e, R_g, t_g,
                                                  models[obj_id], K)
                        elif error_type == 'projamb':
                            e = pose_error.arpi_2d(R_e, t_e, R_g, t_g,
                                                   models[obj_id], K)
                        elif error_type == 'cou':
                            e = pose_error.cou(R_e, t_e, R_g, t_g,
                                               models[obj_id],
                                               dp['test_im_size'], K)
                        elif error_type == 're':
                            e = pose_error.re(R_e, R_g)
                        elif error_type == 'te':
                            e = pose_error.te(t_e, t_g)

                        errs_gts[gt_id] = e

                    errs.append({
                        'im_id': im_id,
                        'obj_id': obj_id,
                        'est_id': est_id,
                        'score': est['score'],
                        'errors': errs_gts
                    })
                # print('Evaluation time: {}s'.format(time.time() - t))

            print('Saving errors...')
            errors_path = errors_mpath.format(result_path=result_path,
                                              error_sign=error_sign,
                                              scene_id=scene_id)

            misc.ensure_dir(os.path.dirname(errors_path))
            inout.save_errors(errors_path, errs)

            print('')
    print('Done.')
    return True
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('experiment_name')
    parser.add_argument('evaluation_name')
    parser.add_argument('--eval_cfg', default='eval.cfg', required=False)
    parser.add_argument('--at_step', default=None, type=str, required=False)
    parser.add_argument('--model_path', default=None, required=True)
    arguments = parser.parse_args()
    full_name = arguments.experiment_name.split('/')
    experiment_name = full_name.pop()
    experiment_group = full_name.pop() if len(full_name) > 0 else ''
    evaluation_name = arguments.evaluation_name
    eval_cfg = arguments.eval_cfg
    at_step = arguments.at_step
    model_path = arguments.model_path

    workspace_path = os.environ.get('AE_WORKSPACE_PATH')
    log_dir = u.get_log_dir(workspace_path, experiment_name, experiment_group)
    train_cfg_file_path = u.get_train_config_exp_file_path(
        log_dir, experiment_name)
    eval_cfg_file_path = u.get_eval_config_file_path(workspace_path,
                                                     eval_cfg=eval_cfg)

    train_args = configparser.ConfigParser(inline_comment_prefixes="#")
    eval_args = configparser.ConfigParser(inline_comment_prefixes="#")
    train_args.read(train_cfg_file_path)
    eval_args.read(eval_cfg_file_path)

    #[DATA]
    dataset_name = eval_args.get('DATA', 'DATASET')
    obj_id = eval_args.getint('DATA', 'OBJ_ID')
    scenes = eval(eval_args.get(
        'DATA', 'SCENES')) if len(eval(eval_args.get(
            'DATA',
            'SCENES'))) > 0 else eval_utils.get_all_scenes_for_obj(eval_args)
    cam_type = eval_args.get('DATA', 'cam_type')
    data_params = dataset_params.get_dataset_params(dataset_name,
                                                    model_type='',
                                                    train_type='',
                                                    test_type=cam_type,
                                                    cam_type=cam_type)
    #[BBOXES]
    estimate_bbs = eval_args.getboolean('BBOXES', 'ESTIMATE_BBS')
    gt_masks = eval_args.getboolean('BBOXES', 'gt_masks')
    estimate_masks = eval_args.getboolean('BBOXES', 'estimate_masks')

    #[METRIC]
    top_nn = eval_args.getint('METRIC', 'TOP_N')
    #[EVALUATION]
    icp = eval_args.getboolean('EVALUATION', 'ICP')
    gt_trans = eval_args.getboolean('EVALUATION', 'gt_trans')
    iterative_code_refinement = eval_args.getboolean(
        'EVALUATION', 'iterative_code_refinement')

    H_AE = train_args.getint('Dataset', 'H')
    W_AE = train_args.getint('Dataset', 'W')

    evaluation_name = evaluation_name + '_icp' if icp else evaluation_name
    evaluation_name = evaluation_name + '_bbest' if estimate_bbs else evaluation_name
    evaluation_name = evaluation_name + '_maskest' if estimate_masks else evaluation_name
    evaluation_name = evaluation_name + '_gttrans' if gt_trans else evaluation_name
    evaluation_name = evaluation_name + '_gtmasks' if gt_masks else evaluation_name
    evaluation_name = evaluation_name + '_refined' if iterative_code_refinement else evaluation_name

    data = dataset_name + '_' + cam_type if len(cam_type) > 0 else dataset_name

    if at_step is None:
        checkpoint_file = u.get_checkpoint_basefilename(
            log_dir,
            False,
            latest=train_args.getint('Training', 'NUM_ITER'),
            joint=True)
    else:
        checkpoint_file = u.get_checkpoint_basefilename(log_dir,
                                                        False,
                                                        latest=at_step,
                                                        joint=True)
    print(checkpoint_file)
    eval_dir = u.get_eval_dir(log_dir, evaluation_name, data)

    if not os.path.exists(eval_dir):
        os.makedirs(eval_dir)
    shutil.copy2(eval_cfg_file_path, eval_dir)

    codebook, dataset = factory.build_codebook_from_name(experiment_name,
                                                         experiment_group,
                                                         return_dataset=True,
                                                         joint=True)
    dataset._kw['model_path'] = [model_path]
    dataset._kw['model'] = 'cad' if 'cad' in model_path else 'reconst'
    dataset._kw['model'] = 'reconst' if 'reconst' in model_path else 'cad'

    gpu_options = tf.GPUOptions(allow_growth=True,
                                per_process_gpu_memory_fraction=0.5)
    config = tf.ConfigProto(gpu_options=gpu_options)

    sess = tf.Session(config=config)
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_file)

    t_errors = []
    R_errors = []
    all_test_visibs = []

    external_path = eval_args.get('BBOXES', 'EXTERNAL')

    test_embeddings = []
    for scene_id in scenes:

        test_imgs = eval_utils.load_scenes(scene_id, eval_args)
        test_imgs_depth = eval_utils.load_scenes(
            scene_id, eval_args, depth=True) if icp else None

        if estimate_bbs:
            print(external_path)
            if external_path == 'False':
                bb_preds = {}
                for i, img in enumerate(test_imgs):
                    print((img.shape))
                    bb_preds[i] = ssd.detectSceneBBs(img,
                                                     min_score=.05,
                                                     nms_threshold=.45)
                print(bb_preds)
            else:
                if estimate_masks:
                    bb_preds = inout.load_yaml(
                        os.path.join(
                            external_path,
                            '{:02d}/mask_rcnn_predict.yml'.format(scene_id)))
                    print(list(bb_preds[0][0].keys()))
                    mask_paths = glob.glob(
                        os.path.join(external_path,
                                     '{:02d}/masks/*.npy'.format(scene_id)))
                    maskrcnn_scene_masks = [np.load(mp) for mp in mask_paths]
                else:
                    maskrcnn_scene_masks = None
                    bb_preds = inout.load_yaml(
                        os.path.join(external_path,
                                     '{:02d}.yml'.format(scene_id)))

            test_img_crops, test_img_depth_crops, bbs, bb_scores, visibilities = eval_utils.generate_scene_crops(
                test_imgs,
                test_imgs_depth,
                bb_preds,
                eval_args, (H_AE, W_AE),
                inst_masks=maskrcnn_scene_masks)
        else:
            test_img_crops, test_img_depth_crops, bbs, bb_scores, visibilities = eval_utils.get_gt_scene_crops(
                scene_id,
                eval_args,
                train_args,
                load_gt_masks=external_path if gt_masks else gt_masks)

        if len(test_img_crops) == 0:
            print(('ERROR: object %s not in scene %s' % (obj_id, scene_id)))
            exit()

        info = inout.load_info(
            data_params['scene_info_mpath'].format(scene_id))
        Ks_test = [
            np.array(v['cam_K']).reshape(3, 3) for v in list(info.values())
        ]

        ######remove
        gts = inout.load_gt(data_params['scene_gt_mpath'].format(scene_id))
        visib_gts = inout.load_yaml(data_params['scene_gt_stats_mpath'].format(
            scene_id, 15))
        #######
        W_test, H_test = data_params['test_im_size']

        icp_renderer = icp_utils.SynRenderer(
            train_args, dataset._kw['model_path'][0]) if icp else None
        noof_scene_views = eval_utils.noof_scene_views(scene_id, eval_args)

        test_embeddings.append([])

        scene_res_dir = os.path.join(
            eval_dir, '{scene_id:02d}'.format(scene_id=scene_id))
        if not os.path.exists(scene_res_dir):
            os.makedirs(scene_res_dir)

        for view in range(noof_scene_views):
            try:
                test_crops, test_crops_depth, test_bbs, test_scores, test_visibs = eval_utils.select_img_crops(
                    test_img_crops[view][obj_id],
                    test_img_depth_crops[view][obj_id] if icp else None,
                    bbs[view][obj_id], bb_scores[view][obj_id],
                    visibilities[view][obj_id], eval_args)
            except:
                print('no detections')
                continue

            print(view)
            preds = {}
            pred_views = []
            all_test_visibs.append(test_visibs[0])
            t_errors_crop = []
            R_errors_crop = []

            for i, (test_crop, test_bb, test_score) in enumerate(
                    zip(test_crops, test_bbs, test_scores)):

                start = time.time()
                if train_args.getint('Dataset', 'C') == 1:
                    test_crop = cv2.cvtColor(test_crop,
                                             cv2.COLOR_BGR2GRAY)[:, :, None]
                Rs_est, ts_est, _ = codebook.auto_pose6d(
                    sess,
                    test_crop,
                    test_bb,
                    Ks_test[view].copy(),
                    top_nn,
                    train_args,
                    codebook._get_codebook_name(model_path),
                    refine=iterative_code_refinement)
                Rs_est_old, ts_est_old = Rs_est.copy(), ts_est.copy()
                ae_time = time.time() - start

                if eval_args.getboolean('PLOT', 'EMBEDDING_PCA'):
                    test_embeddings[-1].append(
                        codebook.test_embedding(sess,
                                                test_crop,
                                                normalized=True))

                if eval_args.getboolean('EVALUATION', 'gt_trans'):
                    ts_est = np.empty((top_nn, 3))
                    for n in range(top_nn):
                        smallest_diff = np.inf
                        for visib_gt, gt in zip(visib_gts[view], gts[view]):
                            if gt['obj_id'] == obj_id:
                                diff = np.sum(
                                    np.abs(gt['obj_bb'] -
                                           np.array(visib_gt['bbox_obj'])))
                                if diff < smallest_diff:
                                    smallest_diff = diff
                                    gt_obj = gt.copy()
                                    print('Im there')
                        ts_est[n] = np.array(gt_obj['cam_t_m2c']).reshape(-1)

                try:
                    run_time = ae_time + bb_preds[view][0][
                        'det_time'] if estimate_bbs else ae_time
                except:
                    run_time = ae_time

                for p in range(top_nn):
                    if icp:
                        # note: In the CVPR paper a different ICP was used
                        start = time.time()
                        # depth icp
                        R_est_refined, t_est_refined = icp_utils.icp_refinement(
                            test_crops_depth[i],
                            icp_renderer,
                            Rs_est[p],
                            ts_est[p],
                            Ks_test[view].copy(), (W_test, H_test),
                            depth_only=True,
                            max_mean_dist_factor=5.0)
                        print(t_est_refined)

                        # x,y update,does not change tz:
                        _, ts_est_refined, _ = codebook.auto_pose6d(
                            sess,
                            test_crop,
                            test_bb,
                            Ks_test[view].copy(),
                            top_nn,
                            train_args,
                            codebook._get_codebook_name(model_path),
                            depth_pred=t_est_refined[2],
                            refine=iterative_code_refinement)

                        t_est_refined = ts_est_refined[p]

                        # rotation icp, only accepted if below 20 deg change
                        R_est_refined, _ = icp_utils.icp_refinement(
                            test_crops_depth[i],
                            icp_renderer,
                            R_est_refined,
                            t_est_refined,
                            Ks_test[view].copy(), (W_test, H_test),
                            no_depth=True)
                        print((Rs_est[p]))
                        print(R_est_refined)

                        icp_time = time.time() - start
                        Rs_est[p], ts_est[p] = R_est_refined, t_est_refined

                    preds.setdefault('ests', []).append({
                        'score': test_score,
                        'R': Rs_est[p],
                        't': ts_est[p]
                    })
                run_time = run_time + icp_time if icp else run_time

                min_t_err, min_R_err = eval_plots.print_trans_rot_errors(
                    gts[view], obj_id, ts_est, ts_est_old, Rs_est, Rs_est_old)
                t_errors_crop.append(min_t_err)
                R_errors_crop.append(min_R_err)

                if eval_args.getboolean('PLOT',
                                        'NEAREST_NEIGHBORS') and not icp:
                    for R_est, t_est in zip(Rs_est, ts_est):
                        pred_views.append(
                            dataset.render_rot(R_est, downSample=2))
                    eval_plots.show_nearest_rotation(pred_views, test_crop,
                                                     view)
                if eval_args.getboolean('PLOT', 'SCENE_WITH_ESTIMATE'):
                    eval_plots.plot_scene_with_estimate(
                        test_imgs[view].copy(),
                        icp_renderer.renderer if icp else dataset.renderer,
                        Ks_test[view].copy(), Rs_est_old[0], ts_est_old[0],
                        Rs_est[0], ts_est[0], test_bb, test_score, obj_id,
                        gts[view], bb_preds[view] if estimate_bbs else None)

                if cv2.waitKey(1) == 32:
                    cv2.waitKey(0)

            t_errors.append(t_errors_crop[np.argmin(
                np.linalg.norm(np.array(t_errors_crop), axis=1))])
            R_errors.append(R_errors_crop[np.argmin(
                np.linalg.norm(np.array(t_errors_crop), axis=1))])

            # save predictions in sixd format
            res_path = os.path.join(scene_res_dir,
                                    '%04d_%02d.yml' % (view, obj_id))
            inout.save_results_sixd17(res_path, preds, run_time=run_time)

    if not os.path.exists(os.path.join(eval_dir, 'latex')):
        os.makedirs(os.path.join(eval_dir, 'latex'))
    if not os.path.exists(os.path.join(eval_dir, 'figures')):
        os.makedirs(os.path.join(eval_dir, 'figures'))

    if eval_args.getboolean('EVALUATION', 'COMPUTE_ERRORS'):
        eval_calc_errors.eval_calc_errors(eval_args, eval_dir)
    if eval_args.getboolean('EVALUATION', 'EVALUATE_ERRORS'):
        eval_loc.match_and_eval_performance_scores(eval_args, eval_dir)

    cyclo = train_args.getint('Embedding', 'NUM_CYCLO')
    if eval_args.getboolean('PLOT', 'EMBEDDING_PCA'):
        embedding = sess.run(codebook.embedding_normalized)
        eval_plots.compute_pca_plot_embedding(eval_dir, embedding[::cyclo],
                                              np.array(test_embeddings[0]))
    if eval_args.getboolean('PLOT', 'VIEWSPHERE'):
        eval_plots.plot_viewsphere_for_embedding(
            dataset.viewsphere_for_embedding[::cyclo], eval_dir)
    if eval_args.getboolean('PLOT', 'CUM_T_ERROR_HIST'):
        eval_plots.plot_t_err_hist(np.array(t_errors), eval_dir)
        eval_plots.plot_t_err_hist2(np.array(t_errors), eval_dir)
    if eval_args.getboolean('PLOT', 'CUM_R_ERROR_HIST'):
        eval_plots.plot_R_err_recall(eval_args, eval_dir, scenes)
        eval_plots.plot_R_err_hist2(np.array(R_errors), eval_dir)
    if eval_args.getboolean('PLOT', 'CUM_VSD_ERROR_HIST'):
        try:
            eval_plots.plot_vsd_err_hist(eval_args, eval_dir, scenes)
        except:
            pass
    if eval_args.getboolean('PLOT', 'VSD_OCCLUSION'):
        try:
            eval_plots.plot_vsd_occlusion(eval_args, eval_dir, scenes,
                                          np.array(all_test_visibs))
        except:
            pass
    if eval_args.getboolean('PLOT', 'R_ERROR_OCCLUSION'):
        try:
            eval_plots.plot_re_rect_occlusion(eval_args, eval_dir, scenes,
                                              np.array(all_test_visibs))
        except:
            pass
    if eval_args.getboolean('PLOT', 'ANIMATE_EMBEDDING_PCA'):
        eval_plots.animate_embedding_path(test_embeddings[0])

    report = latex_report.Report(eval_dir, log_dir)
    report.write_configuration(train_cfg_file_path, eval_cfg_file_path)
    report.merge_all_tex_files()
    report.include_all_figures()
    report.save(open_pdf=True)
예제 #5
0
def main():
    visualize = True
    gt_masks = False
    dataset = 'tless'
    num_train_imgs = 80000
    max_objects_in_scene = 6
    noofvoc_imgs = 15000
    min_visib = 0.6
    blackness_thres = 16
    vocpath = '/home_local_nvme/sund_ma/data/VOCdevkit/VOC2012/JPEGImages/*.jpg'
    voc_img_pathes = glob.glob(vocpath)
    output_path = '/home_local_nvme/sund_ma/data/scene_renderings/linemod_real_imgs_voc_rotated/01/rgb'  

    if dataset == 'linemod':
        sixd_train_path = '/home_local_nvme/sund_ma/data/train_linemod'
        cad_path = '/home_local/sund_ma/data/linemod_dataset/models'
        W, H = 640,480
        noofobjects = 15
    elif dataset == 'tless':
        sixd_train_path = '/home_local/sund_ma/data/t-less/t-less_v2/train_primesense'
        cad_path = '/home_local/sund_ma/data/t-less/t-less_v2/models_reconst'
        W, H = 720,540
        noofobjects = 30

    # with ground truth masks
    if gt_masks:
        from auto_pose.meshrenderer import meshrenderer_phong
        models_cad_files = sorted(glob.glob(os.path.join(cad_path,'*.ply')))
        renderer = meshrenderer_phong.Renderer(
            models_cad_files, 
            1
        )
        obj_gts = []
        for obj_id in xrange(1,noofobjects+1):
            obj_gts.append(inout.load_gt(os.path.join(sixd_train_path,'{:02d}'.format(obj_id),'gt.yml')))

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    voc_imgs = []
    print 'loading bg'
    for i,path in enumerate(voc_img_pathes[:noofvoc_imgs]):
        voc_imgs.append(cv2.resize(cv2.imread(path),(W,H)))
        print i,voc_imgs[-1].shape

    obj_infos = []
    for obj_id in xrange(1,noofobjects+1):
        obj_infos.append(inout.load_info(os.path.join(sixd_train_path,'{:02d}'.format(obj_id),'info.yml')))


    augmenters = Sequential([
        Sometimes(0.2, GaussianBlur(0.4)),
        Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),
        Sometimes(0.4, Add((-15, 15), per_channel=0.5)),
        #Sometimes(0.3, Invert(0.2, per_channel=True)),
        Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),
        # Sometimes(0.5, Multiply((0.6, 1.4))),
        Sometimes(0.5, ContrastNormalization((0.5, 2.2), per_channel=0.3)),
        #Sometimes(0.2, CoarseDropout( p=0.1, size_px = 10, size_percent=0.001) )
    ], random_order=True)


    new_scene_gt = {}

    bar = pb.ProgressBar(
        maxval=num_train_imgs, 
        widgets=[' [', pb.Timer(), ' | ', pb.Counter('%0{}d / {}'.format(len(str(num_train_imgs)), num_train_imgs)), ' ] ', 
        pb.Bar(), ' (', pb.ETA(), ') ']
    )

    for i in bar(xrange(num_train_imgs)):
        new_scene_gt[i] = []
        new_train_img = np.zeros((H,W,3),dtype=np.uint8)
        new_train_mask = np.zeros((H,W,1),dtype=np.uint8)
        random_imgs = []
        orig_bbs = []
        random_trans = []
        for k in xrange(max_objects_in_scene):
            rand_obj_id = np.random.randint(0,noofobjects)
            rand_view_id = np.random.randint(0,len(obj_infos[rand_obj_id]))
            img_path = os.path.join(sixd_train_path,'{:02d}'.format(rand_obj_id+1),'rgb','{:04d}.png'.format(rand_view_id))
            
            rand_img = cv2.imread(img_path)
            # rand_depth_img = inout.load_depth2(os.path.join(sixd_train_path,'{:02d}'.format(rand_obj_id+1),'depth','{:04d}.png'.format(rand_view_id)))
            
            # random rotate in-plane
            rot_angle= np.random.rand()*360
            M = cv2.getRotationMatrix2D((int(rand_img.shape[1]/2),int(rand_img.shape[0]/2)),rot_angle,1)
            rand_img = cv2.warpAffine(rand_img, M, (rand_img.shape[1],rand_img.shape[0]))

            # with ground truth masks
            if gt_masks:
                gt = obj_gts[rand_obj_id][rand_view_id][0]
                K = obj_infos[rand_obj_id][rand_view_id]['cam_K']
                _, depth = renderer.render(rand_obj_id,rand_img.shape[1],rand_img.shape[0],K,gt['cam_R_m2c'],gt['cam_t_m2c'],10,5000)
                depth = cv2.warpAffine(depth, M, (depth.shape[1],depth.shape[0]))
                mask = depth > 0
                rand_img[mask == False] = 0
            else:
                rand_img[(rand_img[:,:,0] < blackness_thres) & (rand_img[:,:,1] < blackness_thres) & (rand_img[:,:,2] < blackness_thres)] = 0
                mask = np.all(rand_img > 0,axis=2)

            # print rand_img.shape,mask.shape
            # cv2.imshow('mask2',mask.astype(np.float32))
            # cv2.imshow('rand_img',rand_img)
            # cv2.waitKey(0)

            ys, xs = np.nonzero(mask)
            new_bb = misc.calc_2d_bbox(xs, ys, mask.shape[:2])
            
            if dataset == 'tless':
                #tless specific
                crop_x = np.array([20,380]) + np.random.randint(-15,15)
                crop_y = np.array([20,380]) + np.random.randint(-15,15)
            elif dataset == 'linemod':
                #linemod specific
                crop_x = np.array([80,560]) + np.random.randint(-20,20)
                crop_y = np.array([0,480])# + np.random.randint(-20,20)

            mask = mask[crop_y[0]:crop_y[1],crop_x[0]:crop_x[1]]
            rand_img = rand_img[crop_y[0]:crop_y[1],crop_x[0]:crop_x[1]] 




            orig_H, orig_W = rand_img.shape[:2]
            s = 0.5*np.random.rand()+0.5
            new_H, new_W = int(s*orig_H),int(s*orig_W)
            scaled_img = cv2.resize(rand_img,(new_W,new_H), interpolation=cv2.INTER_NEAREST)
            scaled_mask = cv2.resize(mask.astype(np.int32).reshape(orig_W,orig_H,1),(new_W,new_H), interpolation=cv2.INTER_NEAREST)
            y_offset = np.random.randint(0,H-scaled_img.shape[0])
            x_offset = np.random.randint(0,W-scaled_img.shape[1])

            y1, y2 = y_offset, y_offset + scaled_img.shape[0]
            x1, x2 = x_offset, x_offset + scaled_img.shape[1]

            alpha_s = np.dstack((scaled_mask,scaled_mask,scaled_mask)) > 0
            alpha_l = 1.0 - alpha_s
            old_train_mask = new_train_mask.copy()
            new_train_mask[y1:y2, x1:x2, 0] = alpha_s[:,:,0] * scaled_mask + alpha_l[:,:,0] * new_train_mask[y1:y2, x1:x2, 0]

            old_scene_pix = len(old_train_mask[y1:y2, x1:x2, 0]>0)
            new_scene_pix = len(new_train_mask>0)
            new_mask_pix = len(scaled_mask>0)
            if (new_scene_pix-old_scene_pix)/float(new_mask_pix) < min_visib:
                new_train_mask = old_train_mask.copy()
                continue


            new_train_img[y1:y2, x1:x2, :] = alpha_s * scaled_img + alpha_l * new_train_img[y1:y2, x1:x2, :]

            x,y,w,h = np.round((np.array(new_bb)+np.array([-crop_x[0],-crop_y[0],0,0]))*s+np.array([x_offset,y_offset,0,0])).astype(np.int32)
            # x,y,w,h = np.round(np.array(gt['obj_bb'])*s+np.array([x_offset,y_offset,0,0])).astype(np.int32)
            new_scene_gt[i].append({'obj_id':rand_obj_id+1,'obj_bb':[x,y,w,h]})

            
        
        bg = voc_imgs[np.random.randint(0,noofvoc_imgs)]
        stacked_new_train_mask = np.dstack((new_train_mask,new_train_mask,new_train_mask))
        new_train_img[stacked_new_train_mask==0] = bg[stacked_new_train_mask==0]
        new_train_img = augmenters.augment_image(new_train_img)

        if visualize:
            print new_scene_gt[i]
            for sc_gt in new_scene_gt[i]: 
                x,y,w,h = sc_gt['obj_bb']
                cv2.rectangle(new_train_img, (x, y), (x+w, y+h), color=(32, 192, 192))
            cv2.imshow('new_train_img', new_train_img)
            cv2.imshow('new_train_mask', new_train_mask.astype(np.float32))
            cv2.waitKey(0)

        cv2.imwrite(os.path.join(output_path,'%s.png' % i), new_train_img)



    with open(os.path.join(os.path.dirname(output_path),'gt.yml'), 'w') as f:
        yaml.dump(new_scene_gt, f, Dumper=yaml.CDumper, width=10000)