コード例 #1
0
def evaluate(agent, test_episode_num, replay_mem):
    senv = ShapeNetEnv(FLAGS)

    #epsilon = FLAGS.init_eps
    K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0],
                           [0.0, 0.0, 1]])
    K_list = np.tile(K_single[None, None, ...],
                     (1, FLAGS.max_episode_length, 1, 1))
    rewards_list = []
    IoU_list = []
    loss_list = []
    for i_idx in range(test_episode_num):
        state, model_id = senv.reset(True)
        actions = []
        RGB_temp_list = np.zeros(
            (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3),
            dtype=np.float32)
        R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32)
        vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution,
                             FLAGS.voxel_resolution),
                            dtype=np.float32)

        RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8(
            state[0][0], state[1][0], model_id)
        R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0])
        vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list,
                                                0)
        vox_temp = np.squeeze(vox_temp_list[0, ...])
        ## run simulations and get memories
        for e_idx in range(FLAGS.max_episode_length - 1):
            agent_action = select_action(agent,
                                         RGB_temp_list[e_idx],
                                         vox_temp,
                                         is_training=False)
            actions.append(agent_action)
            state, next_state, done, model_id = senv.step(actions[-1])
            RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8(
                next_state[0], next_state[1], model_id)
            R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0],
                                                      next_state[1])
            ## TODO: update vox_temp
            vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list,
                                                    K_list, e_idx + 1)
            vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...])
            if done:
                traj_state = state
                traj_state[0] += [next_state[0]]
                traj_state[1] += [next_state[1]]
                rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list,
                                                     K_list, model_id)
                #temp_traj = trajectData(traj_state, actions, rewards, model_id)
                break

        vox_final_list = np.squeeze(vox_temp_list)
        voxel_name = os.path.join(
            'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id))
        vox_gt = replay_mem.read_vox(voxel_name)
        vox_final_ = vox_final_list[-1, ...]
        vox_final_[vox_final_ > 0.5] = 1
        vox_final_[vox_final_ <= 0.5] = 0
        final_IoU = replay_mem.calu_IoU(vox_final_, vox_gt)
        final_loss = replay_mem.calu_cross_entropy(vox_final_list[-1, ...],
                                                   vox_gt)
        log_string(
            '------Episode: {}, episode_reward: {:.4f}, IoU: {:.4f}, Loss: {:.4f}------'
            .format(i_idx, np.sum(rewards), final_IoU, final_loss))
        rewards_list.append(np.sum(rewards))
        IoU_list.append(final_IoU)
        loss_list.append(final_loss)

    rewards_list = np.asarray(rewards_list)
    IoU_list = np.asarray(IoU_list)
    loss_list = np.asarray(loss_list)

    return np.mean(rewards_list), np.mean(IoU_list), np.mean(loss_list)
コード例 #2
0
def test(agent, test_episode_num, model_iter):
    senv = ShapeNetEnv(FLAGS)
    replay_mem = ReplayMemory(FLAGS)

    K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0],
                           [0.0, 0.0, 1]])
    K_list = np.tile(K_single[None, None, ...],
                     (1, FLAGS.max_episode_length, 1, 1))
    for i_idx in range(test_episode_num):
        state, model_id = senv.reset(True)
        senv.current_model = '53180e91cd6651ab76e29c9c43bc7aa'
        senv.current_model = '41d9bd662687cf503ca22f17e86bab24'
        model_id = senv.current_model
        actions = []
        RGB_temp_list = np.zeros(
            (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3),
            dtype=np.float32)
        R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32)
        vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution,
                             FLAGS.voxel_resolution),
                            dtype=np.float32)

        RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8(
            state[0][0], state[1][0], model_id)
        R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0])
        vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list,
                                                0)
        vox_temp = np.squeeze(vox_temp_list[0, ...])
        ## run simulations and get memories
        for e_idx in range(FLAGS.max_episode_length - 1):
            agent_action = select_action(agent,
                                         RGB_temp_list[e_idx],
                                         vox_temp,
                                         is_training=False)
            actions.append(agent_action)
            state, next_state, done, model_id = senv.step(actions[-1])
            RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8(
                next_state[0], next_state[1], model_id)
            R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0],
                                                      next_state[1])
            ## TODO: update vox_temp
            vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list,
                                                    K_list, e_idx + 1)
            vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...])
            if done:
                traj_state = state
                traj_state[0] += [next_state[0]]
                traj_state[1] += [next_state[1]]
                rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list,
                                                     K_list, model_id)
                temp_traj = trajectData(traj_state, actions, rewards, model_id)
                break

        vox_final_list = vox_temp_list

        result_path = os.path.join(FLAGS.LOG_DIR, 'results')
        if not os.path.exists(result_path):
            os.mkdir(result_path)

        if FLAGS.save_test_results:
            result_path_iter = os.path.join(result_path,
                                            '{}'.format(model_iter))
            if not os.path.exists(result_path_iter):
                os.mkdir(result_path_iter)

        voxel_name = os.path.join(
            'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id))
        vox_gt = replay_mem.read_vox(voxel_name)

        mat_path = os.path.join(result_path_iter, '{}.mat'.format(i_idx))
        sio.savemat(
            mat_path, {
                'vox_list': vox_final_list,
                'vox_gt': vox_gt,
                'RGB': RGB_temp_list,
                'model_id': model_id,
                'states': traj_state
            })
コード例 #3
0
def train(agent):

    senv = ShapeNetEnv(FLAGS)
    replay_mem = ReplayMemory(FLAGS)

    #### for debug
    #a = np.array([[1,0,1],[0,0,0]])
    #b = np.array([[1,0,1],[0,1,0]])
    #print('IoU: {}'.format(replay_mem.calu_IoU(a, b)))
    #sys.exit()
    #### for debug

    log_string('====== Starting burning in memories ======')
    burn_in(senv, replay_mem)
    log_string('====== Done. {} trajectories burnt in ======'.format(
        FLAGS.burn_in_length))

    #epsilon = FLAGS.init_eps
    K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0],
                           [0.0, 0.0, 1]])
    K_list = np.tile(K_single[None, None, ...],
                     (1, FLAGS.max_episode_length, 1, 1))
    for i_idx in range(FLAGS.max_iter):
        state, model_id = senv.reset(True)
        actions = []
        RGB_temp_list = np.zeros(
            (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3),
            dtype=np.float32)
        R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32)
        vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution,
                             FLAGS.voxel_resolution),
                            dtype=np.float32)

        RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8(
            state[0][0], state[1][0], model_id)
        R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0])
        vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list,
                                                0)
        vox_temp = np.squeeze(vox_temp_list[0, ...])
        ## run simulations and get memories
        for e_idx in range(FLAGS.max_episode_length - 1):
            agent_action = select_action(agent, RGB_temp_list[e_idx], vox_temp)
            actions.append(agent_action)
            state, next_state, done, model_id = senv.step(actions[-1])
            RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8(
                next_state[0], next_state[1], model_id)
            R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0],
                                                      next_state[1])
            ## TODO: update vox_temp
            vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list,
                                                    K_list, e_idx + 1)
            vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...])
            if done:
                traj_state = state
                traj_state[0] += [next_state[0]]
                traj_state[1] += [next_state[1]]
                rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list,
                                                     K_list, model_id)
                temp_traj = trajectData(traj_state, actions, rewards, model_id)
                replay_mem.append(temp_traj)
                break

        rgb_batch, vox_batch, reward_batch, action_batch = replay_mem.get_batch(
            FLAGS.batch_size)
        #print 'reward_batch: {}'.format(reward_batch)
        #print 'rewards: {}'.format(rewards)
        feed_dict = {
            agent.is_training: True,
            agent.rgb_batch: rgb_batch,
            agent.vox_batch: vox_batch,
            agent.reward_batch: reward_batch,
            agent.action_batch: action_batch
        }
        opt_train, merge_summary, loss = agent.sess.run(
            [agent.opt, agent.merged_train, agent.loss], feed_dict=feed_dict)
        log_string(
            '+++++Iteration: {}, loss: {:.4f}, mean_reward: {:.4f}+++++'.
            format(i_idx, loss, np.mean(rewards)))
        tf_util.save_scalar(i_idx, 'episode_total_reward', np.sum(rewards[:]),
                            agent.train_writer)
        agent.train_writer.add_summary(merge_summary, i_idx)

        if i_idx % FLAGS.save_every_step == 0 and i_idx > 0:
            save(agent, i_idx, i_idx, i_idx)

        if i_idx % FLAGS.test_every_step == 0 and i_idx > 0:
            eval_r_mean, eval_IoU_mean, eval_loss_mean = evaluate(
                agent, FLAGS.test_episode_num, replay_mem)
            tf_util.save_scalar(i_idx, 'eval_mean_reward', eval_r_mean,
                                agent.train_writer)
            tf_util.save_scalar(i_idx, 'eval_mean_IoU', eval_IoU_mean,
                                agent.train_writer)
            tf_util.save_scalar(i_idx, 'eval_mean_loss', eval_loss_mean,
                                agent.train_writer)