def train(agent):

    senv = ShapeNetEnv(FLAGS)
    replay_mem = ReplayMemory(FLAGS)

    #### for debug
    #a = np.array([[1,0,1],[0,0,0]])
    #b = np.array([[1,0,1],[0,1,0]])
    #print('IoU: {}'.format(replay_mem.calu_IoU(a, b)))
    #sys.exit()
    #### for debug

    log_string('====== Starting burning in memories ======')
    burn_in(senv, replay_mem)
    log_string('====== Done. {} trajectories burnt in ======'.format(
        FLAGS.burn_in_length))

    #epsilon = FLAGS.init_eps
    K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0],
                           [0.0, 0.0, 1]])
    K_list = np.tile(K_single[None, None, ...],
                     (1, FLAGS.max_episode_length, 1, 1))
    for i_idx in range(FLAGS.max_iter):
        state, model_id = senv.reset(True)
        actions = []
        RGB_temp_list = np.zeros(
            (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3),
            dtype=np.float32)
        R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32)
        vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution,
                             FLAGS.voxel_resolution),
                            dtype=np.float32)

        RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8(
            state[0][0], state[1][0], model_id)
        R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0])
        vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list,
                                                0)
        vox_temp = np.squeeze(vox_temp_list[0, ...])
        ## run simulations and get memories
        for e_idx in range(FLAGS.max_episode_length - 1):
            agent_action = select_action(agent, RGB_temp_list[e_idx], vox_temp)
            actions.append(agent_action)
            state, next_state, done, model_id = senv.step(actions[-1])
            RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8(
                next_state[0], next_state[1], model_id)
            R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0],
                                                      next_state[1])
            ## TODO: update vox_temp
            vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list,
                                                    K_list, e_idx + 1)
            vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...])
            if done:
                traj_state = state
                traj_state[0] += [next_state[0]]
                traj_state[1] += [next_state[1]]
                rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list,
                                                     K_list, model_id)
                temp_traj = trajectData(traj_state, actions, rewards, model_id)
                replay_mem.append(temp_traj)
                break

        rgb_batch, vox_batch, reward_batch, action_batch = replay_mem.get_batch(
            FLAGS.batch_size)
        #print 'reward_batch: {}'.format(reward_batch)
        #print 'rewards: {}'.format(rewards)
        feed_dict = {
            agent.is_training: True,
            agent.rgb_batch: rgb_batch,
            agent.vox_batch: vox_batch,
            agent.reward_batch: reward_batch,
            agent.action_batch: action_batch
        }
        opt_train, merge_summary, loss = agent.sess.run(
            [agent.opt, agent.merged_train, agent.loss], feed_dict=feed_dict)
        log_string(
            '+++++Iteration: {}, loss: {:.4f}, mean_reward: {:.4f}+++++'.
            format(i_idx, loss, np.mean(rewards)))
        tf_util.save_scalar(i_idx, 'episode_total_reward', np.sum(rewards[:]),
                            agent.train_writer)
        agent.train_writer.add_summary(merge_summary, i_idx)

        if i_idx % FLAGS.save_every_step == 0 and i_idx > 0:
            save(agent, i_idx, i_idx, i_idx)

        if i_idx % FLAGS.test_every_step == 0 and i_idx > 0:
            eval_r_mean, eval_IoU_mean, eval_loss_mean = evaluate(
                agent, FLAGS.test_episode_num, replay_mem)
            tf_util.save_scalar(i_idx, 'eval_mean_reward', eval_r_mean,
                                agent.train_writer)
            tf_util.save_scalar(i_idx, 'eval_mean_IoU', eval_IoU_mean,
                                agent.train_writer)
            tf_util.save_scalar(i_idx, 'eval_mean_loss', eval_loss_mean,
                                agent.train_writer)
Exemple #2
0
def evaluate_burnin(active_mv,
                    test_episode_num,
                    replay_mem,
                    train_i,
                    rollout_obj,
                    mode='random'):
    senv = ShapeNetEnv(FLAGS)

    #epsilon = FLAGS.init_eps
    rewards_list = []
    IoU_list = []
    loss_list = []

    for i_idx in xrange(test_episode_num):

        ## use active policy
        mvnet_input, actions = rollout_obj.go(i_idx,
                                              verbose=False,
                                              add_to_mem=False,
                                              mode=mode,
                                              is_train=False)
        #stop_idx = np.argwhere(np.asarray(actions)==8) ## find stop idx
        #if stop_idx.size == 0:
        #    pred_idx = -1
        #else:
        #    pred_idx = stop_idx[0, 0]

        model_id = rollout_obj.env.current_model
        voxel_name = os.path.join(
            'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id))
        vox_gt = np.squeeze(replay_mem.read_vox(voxel_name))

        mvnet_input.put_voxel(vox_gt)
        pred_out = active_mv.predict_vox_list(mvnet_input)

        vox_gtr = np.squeeze(pred_out.rotated_vox_test)

        PRINT_SUMMARY_STATISTICS = False
        if PRINT_SUMMARY_STATISTICS:
            lastpred = pred_out.vox_pred_test[-1]
            print 'prediction statistics'
            print 'min', np.min(lastpred)
            print 'max', np.max(lastpred)
            print 'mean', np.mean(lastpred)
            print 'std', np.std(lastpred)

        #final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[-1], vox_gtr)
        #final_IoU = replay_mem.calu_IoU(np.squeeze(pred_out.vox_pred_test[-1]), vox_gt, FLAGS.iou_thres)
        ious = []
        for vi in range(FLAGS.max_episode_length):
            final_IoU = replay_mem.calu_IoU(
                np.squeeze(pred_out.vox_pred_test[vi, ...]), vox_gt,
                FLAGS.iou_thres)
            ious.append(final_IoU)
        eval_log(i_idx, pred_out, ious)
        #eval_log(i_idx, pred_out, final_IoU)

        rewards_list.append(np.sum(pred_out.reward_raw_test))
        IoU_list.append(final_IoU)
        loss_list.append(np.mean(pred_out.recon_loss_list_test))

        if FLAGS.if_save_eval:

            save_dict = {
                'voxel_list': np.squeeze(pred_out.vox_pred_test),
                'vox_gt': vox_gt,
                'vox_gtr': vox_gtr,
                'model_id': model_id,
                'states': rollout_obj.last_trajectory,
                'RGB_list': mvnet_input.rgb
            }

            dump_outputs(save_dict, train_i, i_idx, mode)

    rewards_list = np.asarray(rewards_list)
    IoU_list = np.asarray(IoU_list)
    loss_list = np.asarray(loss_list)

    eval_r_mean = np.mean(rewards_list)
    eval_IoU_mean = np.mean(IoU_list)
    eval_loss_mean = np.mean(loss_list)

    tf_util.save_scalar(train_i, 'burnin_eval_mean_reward_{}'.format(mode),
                        eval_r_mean, active_mv.train_writer)
    tf_util.save_scalar(train_i, 'burnin_eval_mean_IoU_{}'.format(mode),
                        eval_IoU_mean, active_mv.train_writer)
    tf_util.save_scalar(train_i, 'burnin_eval_mean_loss_{}'.format(mode),
                        eval_loss_mean, active_mv.train_writer)
Exemple #3
0
def test(active_mv, test_episode_num, replay_mem, train_i, rollout_obj):
    senv = ShapeNetEnv(FLAGS)

    #epsilon = FLAGS.init_eps
    rewards_list = []
    IoU_list = []
    loss_list = []

    for i_idx in xrange(test_episode_num):

        mvnet_input, actions = rollout_obj.go(i_idx,
                                              verbose=False,
                                              add_to_mem=False,
                                              is_train=False,
                                              test_idx=i_idx)
        stop_idx = np.argwhere(np.asarray(actions) == 8)  ## find stop idx
        if stop_idx.size == 0:
            pred_idx = -1
        else:
            pred_idx = stop_idx[0, 0]

        model_id = rollout_obj.env.current_model
        voxel_name = os.path.join(
            'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id))
        if FLAGS.category == '1111':
            category_, model_id_ = model_id.split('/')
            voxel_name = os.path.join(
                'voxels', '{}/{}/model.binvox'.format(category_, model_id_))
        vox_gt = replay_mem.read_vox(voxel_name)

        mvnet_input.put_voxel(vox_gt)
        pred_out = active_mv.predict_vox_list(mvnet_input)

        vox_gtr = np.squeeze(pred_out.rotated_vox_test)

        PRINT_SUMMARY_STATISTICS = False
        if PRINT_SUMMARY_STATISTICS:
            lastpred = pred_out.vox_pred_test[-1]
            print 'prediction statistics'
            print 'min', np.min(lastpred)
            print 'max', np.max(lastpred)
            print 'mean', np.mean(lastpred)
            print 'std', np.std(lastpred)

        final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[pred_idx],
                                        vox_gtr, FLAGS.iou_thres)
        eval_log(i_idx, pred_out, final_IoU)

        rewards_list.append(np.sum(pred_out.reward_raw_test))
        IoU_list.append(final_IoU)
        loss_list.append(pred_out.recon_loss_list_test)

        if FLAGS.if_save_eval:

            save_dict = {
                'voxel_list': np.squeeze(pred_out.vox_pred_test),
                'vox_gt': vox_gt,
                'vox_gtr': vox_gtr,
                'model_id': model_id,
                'states': rollout_obj.last_trajectory,
                'RGB_list': mvnet_input.rgb
            }

            dump_outputs(save_dict, train_i, i_idx)

    rewards_list = np.asarray(rewards_list)
    IoU_list = np.asarray(IoU_list)
    loss_list = np.asarray(loss_list)

    eval_r_mean = np.mean(rewards_list)
    eval_IoU_mean = np.mean(IoU_list)
    eval_loss_mean = np.mean(loss_list)

    tf_util.save_scalar(train_i, 'eval_mean_reward', eval_r_mean,
                        active_mv.train_writer)
    tf_util.save_scalar(train_i, 'eval_mean_IoU', eval_IoU_mean,
                        active_mv.train_writer)
    tf_util.save_scalar(train_i, 'eval_mean_loss', eval_loss_mean,
                        active_mv.train_writer)
def train(ae):

    #v = VisVox()

    #ae.opt_step = getattr(ae, FLAGS.opt_step_name)
    #ae.loss_tensor = getattr(ae, FLAGS.loss_name)

    i = 0
    try:
        while not ae.coord.should_stop():
            ae.sess.run(ae.assign_i_op, feed_dict={ae.set_i_to_pl: i})

            tic = time.time()
            feed_dict = {
                ae.is_training: True,
                ae.data_loader.is_training: True
            }

            ops_to_run = [
                ae.opt_step, ae.merge_train, ae.counter, ae.recon_loss
            ]

            if FLAGS.use_gan:
                ops_to_run = ops_to_run + [ae.opt_D, ae.D_loss, ae.G_loss]

            stuff = ae.sess.run(ops_to_run, feed_dict=feed_dict)
            if FLAGS.use_gan:
                opt, summary, step, recon_loss, opt_D, D_loss, G_loss = stuff
            else:
                opt, summary, step, recon_loss = stuff
            toc = time.time()

            log_string('Iteration: {} time {}, recon_loss: {}'.format(
                i, toc - tic, recon_loss))

            if FLAGS.use_gan:
                log_string('D_loss: {}, G_loss: {}'.format(D_loss, G_loss))

            log_string(' maxrss: {}'.format(
                resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
            #gc.collect()

            print 'cpu: {}, vmem: {}, avai: {}'.format(
                psutil.cpu_percent(),
                psutil.virtual_memory().used >> 30,
                psutil.virtual_memory().available >> 30)

            i += 1
            ae.train_writer.add_summary(summary, i)
            ae.train_writer.flush()

            if i % FLAGS.save_every_step == 0:
                save(ae, i, i, i)

            if i % FLAGS.test_every_step == 0:
                test_losses = evaluate(ae)
                for key, value in test_losses.iteritems():
                    tf_util.save_scalar(i, 'test/' + key, value,
                                        ae.train_writer)

            gc.collect()

            #if i%FLAGS.vis_every_step == 0:
            #    v.process(vis, 'train', i)

            if i > FLAGS.max_iter:
                print('Done training')
                break
    except tf.errors.OutOfRangeError:
        print('Done training')
    finally:
        ae.coord.request_stop()

    ae.coord.join(ae.threads)
    ae.sess.close()
Exemple #5
0
def evaluate_burnin(active_mv,
                    test_episode_num,
                    replay_mem,
                    train_i,
                    rollout_obj,
                    mode='random',
                    override_mvnet_input=None):

    senv = ShapeNetEnv(FLAGS)

    #epsilon = FLAGS.init_eps
    rewards_list = []
    IoU_list = []
    loss_list = []

    for i_idx in xrange(test_episode_num):

        ## use active policy
        mvnet_input, actions = rollout_obj.go(i_idx,
                                              verbose=False,
                                              add_to_mem=False,
                                              mode=mode,
                                              is_train=False)
        #stop_idx = np.argwhere(np.asarray(actions)==8) ## find stop idx
        #if stop_idx.size == 0:
        #    pred_idx = -1
        #else:
        #    pred_idx = stop_idx[0, 0]

        model_id = rollout_obj.env.current_model
        voxel_name = os.path.join(
            'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id))
        if FLAGS.category == '1111':
            category_, model_id_ = model_id.split('/')
            voxel_name = os.path.join(
                'voxels', '{}/{}/model.binvox'.format(category_, model_id_))
        vox_gt = replay_mem.read_vox(voxel_name)

        if FLAGS.use_segs and FLAGS.category == '3333':  #this is the only categ for which we have seg data
            seg1_name = os.path.join(
                'voxels', '{}/{}/obj1.binvox'.format(FLAGS.category, model_id))
            seg2_name = os.path.join(
                'voxels', '{}/{}/obj2.binvox'.format(FLAGS.category, model_id))
            seg1 = replay_mem.read_vox(seg1_name)
            seg2 = replay_mem.read_vox(seg2_name)
            mvnet_input.put_segs(seg1, seg2)

        mvnet_input.put_voxel(vox_gt)

        if override_mvnet_input is not None:
            mvnet_input = override_mvnet_input

        pred_out = active_mv.predict_vox_list(mvnet_input)

        vox_gtr = np.squeeze(pred_out.rotated_vox_test)

        PRINT_SUMMARY_STATISTICS = False
        if PRINT_SUMMARY_STATISTICS:
            lastpred = pred_out.vox_pred_test[-1]
            print 'prediction statistics'
            print 'min', np.min(lastpred)
            print 'max', np.max(lastpred)
            print 'mean', np.mean(lastpred)
            print 'std', np.std(lastpred)

        #final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[-1], vox_gtr)
        final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[-1], vox_gtr,
                                        FLAGS.iou_thres)
        eval_log(i_idx, pred_out, final_IoU)

        rewards_list.append(np.sum(pred_out.reward_raw_test))
        IoU_list.append(final_IoU)
        loss_list.append(np.mean(pred_out.recon_loss_list_test))

        #import ipdb
        #ipdb.set_trace()

        if FLAGS.if_save_eval:

            save_dict = {
                'voxel_list': np.squeeze(pred_out.vox_pred_test),
                'vox_gt': vox_gt,
                'vox_gtr': vox_gtr,
                'model_id': model_id,
                'states': rollout_obj.last_trajectory,
                'RGB_list': mvnet_input.rgb,
            }

            if FLAGS.use_segs:
                save_dict['pred_seg1_test'] = pred_out.pred_seg1_test
                save_dict['pred_seg2_test'] = pred_out.pred_seg2_test

            dump_outputs(save_dict, train_i, i_idx, mode)

    rewards_list = np.asarray(rewards_list)
    IoU_list = np.asarray(IoU_list)
    loss_list = np.asarray(loss_list)

    eval_r_mean = np.mean(rewards_list)
    eval_IoU_mean = np.mean(IoU_list)
    eval_loss_mean = np.mean(loss_list)

    tf_util.save_scalar(train_i, 'burnin_eval_mean_reward_{}'.format(mode),
                        eval_r_mean, active_mv.train_writer)
    tf_util.save_scalar(train_i, 'burnin_eval_mean_IoU_{}'.format(mode),
                        eval_IoU_mean, active_mv.train_writer)
    tf_util.save_scalar(train_i, 'burnin_eval_mean_loss_{}'.format(mode),
                        eval_loss_mean, active_mv.train_writer)