def evaluate(agent, test_episode_num, replay_mem): senv = ShapeNetEnv(FLAGS) #epsilon = FLAGS.init_eps K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0], [0.0, 0.0, 1]]) K_list = np.tile(K_single[None, None, ...], (1, FLAGS.max_episode_length, 1, 1)) rewards_list = [] IoU_list = [] loss_list = [] for i_idx in range(test_episode_num): state, model_id = senv.reset(True) actions = [] RGB_temp_list = np.zeros( (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3), dtype=np.float32) R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32) vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution, FLAGS.voxel_resolution), dtype=np.float32) RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8( state[0][0], state[1][0], model_id) R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0]) vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, 0) vox_temp = np.squeeze(vox_temp_list[0, ...]) ## run simulations and get memories for e_idx in range(FLAGS.max_episode_length - 1): agent_action = select_action(agent, RGB_temp_list[e_idx], vox_temp, is_training=False) actions.append(agent_action) state, next_state, done, model_id = senv.step(actions[-1]) RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8( next_state[0], next_state[1], model_id) R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0], next_state[1]) ## TODO: update vox_temp vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, e_idx + 1) vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...]) if done: traj_state = state traj_state[0] += [next_state[0]] traj_state[1] += [next_state[1]] rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list, K_list, model_id) #temp_traj = trajectData(traj_state, actions, rewards, model_id) break vox_final_list = np.squeeze(vox_temp_list) voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) vox_gt = replay_mem.read_vox(voxel_name) vox_final_ = vox_final_list[-1, ...] vox_final_[vox_final_ > 0.5] = 1 vox_final_[vox_final_ <= 0.5] = 0 final_IoU = replay_mem.calu_IoU(vox_final_, vox_gt) final_loss = replay_mem.calu_cross_entropy(vox_final_list[-1, ...], vox_gt) log_string( '------Episode: {}, episode_reward: {:.4f}, IoU: {:.4f}, Loss: {:.4f}------' .format(i_idx, np.sum(rewards), final_IoU, final_loss)) rewards_list.append(np.sum(rewards)) IoU_list.append(final_IoU) loss_list.append(final_loss) rewards_list = np.asarray(rewards_list) IoU_list = np.asarray(IoU_list) loss_list = np.asarray(loss_list) return np.mean(rewards_list), np.mean(IoU_list), np.mean(loss_list)
def test(agent, test_episode_num, model_iter): senv = ShapeNetEnv(FLAGS) replay_mem = ReplayMemory(FLAGS) K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0], [0.0, 0.0, 1]]) K_list = np.tile(K_single[None, None, ...], (1, FLAGS.max_episode_length, 1, 1)) for i_idx in range(test_episode_num): state, model_id = senv.reset(True) senv.current_model = '53180e91cd6651ab76e29c9c43bc7aa' senv.current_model = '41d9bd662687cf503ca22f17e86bab24' model_id = senv.current_model actions = [] RGB_temp_list = np.zeros( (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3), dtype=np.float32) R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32) vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution, FLAGS.voxel_resolution), dtype=np.float32) RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8( state[0][0], state[1][0], model_id) R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0]) vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, 0) vox_temp = np.squeeze(vox_temp_list[0, ...]) ## run simulations and get memories for e_idx in range(FLAGS.max_episode_length - 1): agent_action = select_action(agent, RGB_temp_list[e_idx], vox_temp, is_training=False) actions.append(agent_action) state, next_state, done, model_id = senv.step(actions[-1]) RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8( next_state[0], next_state[1], model_id) R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0], next_state[1]) ## TODO: update vox_temp vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, e_idx + 1) vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...]) if done: traj_state = state traj_state[0] += [next_state[0]] traj_state[1] += [next_state[1]] rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list, K_list, model_id) temp_traj = trajectData(traj_state, actions, rewards, model_id) break vox_final_list = vox_temp_list result_path = os.path.join(FLAGS.LOG_DIR, 'results') if not os.path.exists(result_path): os.mkdir(result_path) if FLAGS.save_test_results: result_path_iter = os.path.join(result_path, '{}'.format(model_iter)) if not os.path.exists(result_path_iter): os.mkdir(result_path_iter) voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) vox_gt = replay_mem.read_vox(voxel_name) mat_path = os.path.join(result_path_iter, '{}.mat'.format(i_idx)) sio.savemat( mat_path, { 'vox_list': vox_final_list, 'vox_gt': vox_gt, 'RGB': RGB_temp_list, 'model_id': model_id, 'states': traj_state })
def train(agent): senv = ShapeNetEnv(FLAGS) replay_mem = ReplayMemory(FLAGS) #### for debug #a = np.array([[1,0,1],[0,0,0]]) #b = np.array([[1,0,1],[0,1,0]]) #print('IoU: {}'.format(replay_mem.calu_IoU(a, b))) #sys.exit() #### for debug log_string('====== Starting burning in memories ======') burn_in(senv, replay_mem) log_string('====== Done. {} trajectories burnt in ======'.format( FLAGS.burn_in_length)) #epsilon = FLAGS.init_eps K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0], [0.0, 0.0, 1]]) K_list = np.tile(K_single[None, None, ...], (1, FLAGS.max_episode_length, 1, 1)) for i_idx in range(FLAGS.max_iter): state, model_id = senv.reset(True) actions = [] RGB_temp_list = np.zeros( (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3), dtype=np.float32) R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32) vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution, FLAGS.voxel_resolution), dtype=np.float32) RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8( state[0][0], state[1][0], model_id) R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0]) vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, 0) vox_temp = np.squeeze(vox_temp_list[0, ...]) ## run simulations and get memories for e_idx in range(FLAGS.max_episode_length - 1): agent_action = select_action(agent, RGB_temp_list[e_idx], vox_temp) actions.append(agent_action) state, next_state, done, model_id = senv.step(actions[-1]) RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8( next_state[0], next_state[1], model_id) R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0], next_state[1]) ## TODO: update vox_temp vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, e_idx + 1) vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...]) if done: traj_state = state traj_state[0] += [next_state[0]] traj_state[1] += [next_state[1]] rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list, K_list, model_id) temp_traj = trajectData(traj_state, actions, rewards, model_id) replay_mem.append(temp_traj) break rgb_batch, vox_batch, reward_batch, action_batch = replay_mem.get_batch( FLAGS.batch_size) #print 'reward_batch: {}'.format(reward_batch) #print 'rewards: {}'.format(rewards) feed_dict = { agent.is_training: True, agent.rgb_batch: rgb_batch, agent.vox_batch: vox_batch, agent.reward_batch: reward_batch, agent.action_batch: action_batch } opt_train, merge_summary, loss = agent.sess.run( [agent.opt, agent.merged_train, agent.loss], feed_dict=feed_dict) log_string( '+++++Iteration: {}, loss: {:.4f}, mean_reward: {:.4f}+++++'. format(i_idx, loss, np.mean(rewards))) tf_util.save_scalar(i_idx, 'episode_total_reward', np.sum(rewards[:]), agent.train_writer) agent.train_writer.add_summary(merge_summary, i_idx) if i_idx % FLAGS.save_every_step == 0 and i_idx > 0: save(agent, i_idx, i_idx, i_idx) if i_idx % FLAGS.test_every_step == 0 and i_idx > 0: eval_r_mean, eval_IoU_mean, eval_loss_mean = evaluate( agent, FLAGS.test_episode_num, replay_mem) tf_util.save_scalar(i_idx, 'eval_mean_reward', eval_r_mean, agent.train_writer) tf_util.save_scalar(i_idx, 'eval_mean_IoU', eval_IoU_mean, agent.train_writer) tf_util.save_scalar(i_idx, 'eval_mean_loss', eval_loss_mean, agent.train_writer)