def evaluate(agent, test_episode_num, replay_mem): senv = ShapeNetEnv(FLAGS) #epsilon = FLAGS.init_eps K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0], [0.0, 0.0, 1]]) K_list = np.tile(K_single[None, None, ...], (1, FLAGS.max_episode_length, 1, 1)) rewards_list = [] IoU_list = [] loss_list = [] for i_idx in range(test_episode_num): state, model_id = senv.reset(True) actions = [] RGB_temp_list = np.zeros( (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3), dtype=np.float32) R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32) vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution, FLAGS.voxel_resolution), dtype=np.float32) RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8( state[0][0], state[1][0], model_id) R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0]) vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, 0) vox_temp = np.squeeze(vox_temp_list[0, ...]) ## run simulations and get memories for e_idx in range(FLAGS.max_episode_length - 1): agent_action = select_action(agent, RGB_temp_list[e_idx], vox_temp, is_training=False) actions.append(agent_action) state, next_state, done, model_id = senv.step(actions[-1]) RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8( next_state[0], next_state[1], model_id) R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0], next_state[1]) ## TODO: update vox_temp vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, e_idx + 1) vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...]) if done: traj_state = state traj_state[0] += [next_state[0]] traj_state[1] += [next_state[1]] rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list, K_list, model_id) #temp_traj = trajectData(traj_state, actions, rewards, model_id) break vox_final_list = np.squeeze(vox_temp_list) voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) vox_gt = replay_mem.read_vox(voxel_name) vox_final_ = vox_final_list[-1, ...] vox_final_[vox_final_ > 0.5] = 1 vox_final_[vox_final_ <= 0.5] = 0 final_IoU = replay_mem.calu_IoU(vox_final_, vox_gt) final_loss = replay_mem.calu_cross_entropy(vox_final_list[-1, ...], vox_gt) log_string( '------Episode: {}, episode_reward: {:.4f}, IoU: {:.4f}, Loss: {:.4f}------' .format(i_idx, np.sum(rewards), final_IoU, final_loss)) rewards_list.append(np.sum(rewards)) IoU_list.append(final_IoU) loss_list.append(final_loss) rewards_list = np.asarray(rewards_list) IoU_list = np.asarray(IoU_list) loss_list = np.asarray(loss_list) return np.mean(rewards_list), np.mean(IoU_list), np.mean(loss_list)
def test(agent, test_episode_num, model_iter): senv = ShapeNetEnv(FLAGS) replay_mem = ReplayMemory(FLAGS) K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0], [0.0, 0.0, 1]]) K_list = np.tile(K_single[None, None, ...], (1, FLAGS.max_episode_length, 1, 1)) for i_idx in range(test_episode_num): state, model_id = senv.reset(True) senv.current_model = '53180e91cd6651ab76e29c9c43bc7aa' senv.current_model = '41d9bd662687cf503ca22f17e86bab24' model_id = senv.current_model actions = [] RGB_temp_list = np.zeros( (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3), dtype=np.float32) R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32) vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution, FLAGS.voxel_resolution), dtype=np.float32) RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8( state[0][0], state[1][0], model_id) R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0]) vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, 0) vox_temp = np.squeeze(vox_temp_list[0, ...]) ## run simulations and get memories for e_idx in range(FLAGS.max_episode_length - 1): agent_action = select_action(agent, RGB_temp_list[e_idx], vox_temp, is_training=False) actions.append(agent_action) state, next_state, done, model_id = senv.step(actions[-1]) RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8( next_state[0], next_state[1], model_id) R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0], next_state[1]) ## TODO: update vox_temp vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, e_idx + 1) vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...]) if done: traj_state = state traj_state[0] += [next_state[0]] traj_state[1] += [next_state[1]] rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list, K_list, model_id) temp_traj = trajectData(traj_state, actions, rewards, model_id) break vox_final_list = vox_temp_list result_path = os.path.join(FLAGS.LOG_DIR, 'results') if not os.path.exists(result_path): os.mkdir(result_path) if FLAGS.save_test_results: result_path_iter = os.path.join(result_path, '{}'.format(model_iter)) if not os.path.exists(result_path_iter): os.mkdir(result_path_iter) voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) vox_gt = replay_mem.read_vox(voxel_name) mat_path = os.path.join(result_path_iter, '{}.mat'.format(i_idx)) sio.savemat( mat_path, { 'vox_list': vox_final_list, 'vox_gt': vox_gt, 'RGB': RGB_temp_list, 'model_id': model_id, 'states': traj_state })
def evaluate_burnin(active_mv, test_episode_num, replay_mem, train_i, rollout_obj, mode='random'): senv = ShapeNetEnv(FLAGS) #epsilon = FLAGS.init_eps rewards_list = [] IoU_list = [] loss_list = [] for i_idx in xrange(test_episode_num): ## use active policy mvnet_input, actions = rollout_obj.go(i_idx, verbose=False, add_to_mem=False, mode=mode, is_train=False) #stop_idx = np.argwhere(np.asarray(actions)==8) ## find stop idx #if stop_idx.size == 0: # pred_idx = -1 #else: # pred_idx = stop_idx[0, 0] model_id = rollout_obj.env.current_model voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) vox_gt = np.squeeze(replay_mem.read_vox(voxel_name)) mvnet_input.put_voxel(vox_gt) pred_out = active_mv.predict_vox_list(mvnet_input) vox_gtr = np.squeeze(pred_out.rotated_vox_test) PRINT_SUMMARY_STATISTICS = False if PRINT_SUMMARY_STATISTICS: lastpred = pred_out.vox_pred_test[-1] print 'prediction statistics' print 'min', np.min(lastpred) print 'max', np.max(lastpred) print 'mean', np.mean(lastpred) print 'std', np.std(lastpred) #final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[-1], vox_gtr) #final_IoU = replay_mem.calu_IoU(np.squeeze(pred_out.vox_pred_test[-1]), vox_gt, FLAGS.iou_thres) ious = [] for vi in range(FLAGS.max_episode_length): final_IoU = replay_mem.calu_IoU( np.squeeze(pred_out.vox_pred_test[vi, ...]), vox_gt, FLAGS.iou_thres) ious.append(final_IoU) eval_log(i_idx, pred_out, ious) #eval_log(i_idx, pred_out, final_IoU) rewards_list.append(np.sum(pred_out.reward_raw_test)) IoU_list.append(final_IoU) loss_list.append(np.mean(pred_out.recon_loss_list_test)) if FLAGS.if_save_eval: save_dict = { 'voxel_list': np.squeeze(pred_out.vox_pred_test), 'vox_gt': vox_gt, 'vox_gtr': vox_gtr, 'model_id': model_id, 'states': rollout_obj.last_trajectory, 'RGB_list': mvnet_input.rgb } dump_outputs(save_dict, train_i, i_idx, mode) rewards_list = np.asarray(rewards_list) IoU_list = np.asarray(IoU_list) loss_list = np.asarray(loss_list) eval_r_mean = np.mean(rewards_list) eval_IoU_mean = np.mean(IoU_list) eval_loss_mean = np.mean(loss_list) tf_util.save_scalar(train_i, 'burnin_eval_mean_reward_{}'.format(mode), eval_r_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'burnin_eval_mean_IoU_{}'.format(mode), eval_IoU_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'burnin_eval_mean_loss_{}'.format(mode), eval_loss_mean, active_mv.train_writer)
def train(agent): senv = ShapeNetEnv(FLAGS) replay_mem = ReplayMemory(FLAGS) #### for debug #a = np.array([[1,0,1],[0,0,0]]) #b = np.array([[1,0,1],[0,1,0]]) #print('IoU: {}'.format(replay_mem.calu_IoU(a, b))) #sys.exit() #### for debug log_string('====== Starting burning in memories ======') burn_in(senv, replay_mem) log_string('====== Done. {} trajectories burnt in ======'.format( FLAGS.burn_in_length)) #epsilon = FLAGS.init_eps K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0], [0.0, 0.0, 1]]) K_list = np.tile(K_single[None, None, ...], (1, FLAGS.max_episode_length, 1, 1)) for i_idx in range(FLAGS.max_iter): state, model_id = senv.reset(True) actions = [] RGB_temp_list = np.zeros( (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3), dtype=np.float32) R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32) vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution, FLAGS.voxel_resolution), dtype=np.float32) RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8( state[0][0], state[1][0], model_id) R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0]) vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, 0) vox_temp = np.squeeze(vox_temp_list[0, ...]) ## run simulations and get memories for e_idx in range(FLAGS.max_episode_length - 1): agent_action = select_action(agent, RGB_temp_list[e_idx], vox_temp) actions.append(agent_action) state, next_state, done, model_id = senv.step(actions[-1]) RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8( next_state[0], next_state[1], model_id) R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0], next_state[1]) ## TODO: update vox_temp vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, e_idx + 1) vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...]) if done: traj_state = state traj_state[0] += [next_state[0]] traj_state[1] += [next_state[1]] rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list, K_list, model_id) temp_traj = trajectData(traj_state, actions, rewards, model_id) replay_mem.append(temp_traj) break rgb_batch, vox_batch, reward_batch, action_batch = replay_mem.get_batch( FLAGS.batch_size) #print 'reward_batch: {}'.format(reward_batch) #print 'rewards: {}'.format(rewards) feed_dict = { agent.is_training: True, agent.rgb_batch: rgb_batch, agent.vox_batch: vox_batch, agent.reward_batch: reward_batch, agent.action_batch: action_batch } opt_train, merge_summary, loss = agent.sess.run( [agent.opt, agent.merged_train, agent.loss], feed_dict=feed_dict) log_string( '+++++Iteration: {}, loss: {:.4f}, mean_reward: {:.4f}+++++'. format(i_idx, loss, np.mean(rewards))) tf_util.save_scalar(i_idx, 'episode_total_reward', np.sum(rewards[:]), agent.train_writer) agent.train_writer.add_summary(merge_summary, i_idx) if i_idx % FLAGS.save_every_step == 0 and i_idx > 0: save(agent, i_idx, i_idx, i_idx) if i_idx % FLAGS.test_every_step == 0 and i_idx > 0: eval_r_mean, eval_IoU_mean, eval_loss_mean = evaluate( agent, FLAGS.test_episode_num, replay_mem) tf_util.save_scalar(i_idx, 'eval_mean_reward', eval_r_mean, agent.train_writer) tf_util.save_scalar(i_idx, 'eval_mean_IoU', eval_IoU_mean, agent.train_writer) tf_util.save_scalar(i_idx, 'eval_mean_loss', eval_loss_mean, agent.train_writer)
def test_active(active_mv, test_episode_num, replay_mem, train_i, rollout_obj): senv = ShapeNetEnv(FLAGS) #epsilon = FLAGS.init_eps rewards_list = [] IoU_list_ = [] loss_list_ = [] for i_idx in xrange(test_episode_num): print('======testing on {}/{} model======'.format( i_idx + 1, test_episode_num)) IoU_lists_ = [] loss_list_ = [] actions_list = [] mvnet_input, actions = rollout_obj.go(i_idx, verbose=False, add_to_mem=False, mode='active', is_train=False) #stop_idx = np.argwhere(np.asarray(actions)==8) ## find stop idx #if stop_idx.size == 0: # pred_idx = -1 #else: # pred_idx = stop_idx[0, 0] model_id = rollout_obj.env.current_model voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) if FLAGS.category == '1111': category_, model_id_ = model_id.split('/') voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(category_, model_id_)) vox_gt = replay_mem.read_vox(voxel_name) mvnet_input.put_voxel(vox_gt) pred_out = active_mv.predict_vox_list(mvnet_input) vox_gtr = np.squeeze(pred_out.rotated_vox_test) PRINT_SUMMARY_STATISTICS = False if PRINT_SUMMARY_STATISTICS: lastpred = pred_out.vox_pred_test[-1] print 'prediction statistics' print 'min', np.min(lastpred) print 'max', np.max(lastpred) print 'mean', np.mean(lastpred) print 'std', np.std(lastpred) IoUs = [] for vi in range(FLAGS.max_episode_length): IoUs.append( replay_mem.calu_IoU(np.squeeze(pred_out.vox_pred_test[vi]), np.squeeze(vox_gtr), FLAGS.iou_thres)) #final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[-1], vox_gtr) #eval_log(i_idx, pred_out, final_IoU) #rewards_list.append(np.sum(pred_out.reward_raw_test)) IoU_lists_.append(IoUs) loss_list_.append(pred_out.recon_loss_list_test) actions_list.append(actions) if FLAGS.if_save_eval: save_dict = { 'voxel_list': np.squeeze(pred_out.vox_pred_test), 'voxel_rot_list': np.squeeze(pred_out.vox_pred_test_rot), 'vox_gt': vox_gt, 'vox_gtr': vox_gtr, 'model_id': model_id, 'states': rollout_obj.last_trajectory, 'RGB_list': mvnet_input.rgb, 'voxel_rot_list': np.squeeze(pred_out.vox_pred_test_rot) } dump_outputs_model(save_dict, train_i, i_idx, mode='active') IoU_lists_ = np.asarray(IoU_lists_) loss_list_ = np.asarray(loss_list_) actions_list = np.asarray(actions_list) save_dict = { 'IoU_list': IoU_lists_, 'loss_list': loss_list_, 'actions_list': actions_list } dump_outputs(save_dict, train_i, i_idx, mode='active')
def train(active_mv): senv = ShapeNetEnv(FLAGS) replay_mem = ReplayMemory(FLAGS) #### for debug #a = np.array([[1,0,1],[0,0,0]]) #b = np.array([[1,0,1],[0,1,0]]) #print('IoU: {}'.format(replay_mem.calu_IoU(a, b))) #sys.exit() #### for debug log_string('====== Starting burning in memories ======') burn_in(senv, replay_mem) log_string('====== Done. {} trajectories burnt in ======'.format( FLAGS.burn_in_length)) #epsilon = FLAGS.init_eps K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0], [0.0, 0.0, 1]]) K_list = np.tile(K_single[None, None, ...], (1, FLAGS.max_episode_length, 1, 1)) rollout_obj = Rollout(active_mv, senv, replay_mem, FLAGS) ### burn in(pretrain) for MVnet if FLAGS.burn_in_iter > 0: for i in xrange(FLAGS.burnin_start_iter, FLAGS.burnin_start_iter + FLAGS.burn_in_iter): rollout_obj.go(i, verbose=True, add_to_mem=True, mode='random', is_train=True) if not FLAGS.random_pretrain: replay_mem.enable_gbl() mvnet_input = replay_mem.get_batch_list(FLAGS.batch_size) else: mvnet_input = replay_mem.get_batch_list_random( senv, FLAGS.batch_size) tic = time.time() out_stuff = active_mv.run_step(mvnet_input, mode='burnin', is_training=True) burnin_summ = burnin_log(i, out_stuff, time.time() - tic) active_mv.train_writer.add_summary(burnin_summ, i) if (i + 1) % 5000 == 0 and i > FLAGS.burnin_start_iter: save_pretrain(active_mv, i + 1) if (i + 1) % 1000 == 0 and i > FLAGS.burnin_start_iter: evaluate_burnin(active_mv, FLAGS.test_episode_num, replay_mem, i + 1, rollout_obj, mode='random') for i_idx in xrange(FLAGS.max_iter): t0 = time.time() rollout_obj.go(i_idx, verbose=True, add_to_mem=True, mode='random') t1 = time.time() replay_mem.enable_gbl() mvnet_input = replay_mem.get_batch_list(FLAGS.batch_size) t2 = time.time() out_stuff = active_mv.run_step(mvnet_input, mode='train_mv', is_training=True) replay_mem.disable_gbl() t3 = time.time() train_log(i_idx, out_stuff, (t0, t1, t2, t3)) active_mv.train_writer.add_summary(out_stuff.merged_train, i_idx) if i_idx % FLAGS.save_every_step == 0 and i_idx > 0: save(active_mv, i_idx, i_idx, i_idx) if i_idx % FLAGS.test_every_step == 0 and i_idx > 0: #print('Evaluating active policy') #evaluate(active_mv, FLAGS.test_episode_num, replay_mem, i_idx, rollout_obj, mode='active') print('Evaluating random policy') evaluate(active_mv, FLAGS.test_episode_num, replay_mem, i_idx, rollout_obj, mode='random')
with open(pth, 'wb') as f: binvox_obj.write(f) if __name__ == "__main__": #MODEL = importlib.import_module(FLAGS.model_file) # import network module #MODEL_FILE = os.path.join(BASE_DIR, 'models', FLAGS.model_file+'.py') ####### log writing FLAGS.LOG_DIR = FLAGS.LOG_DIR + '/' + FLAGS.task_name #FLAGS.CHECKPOINT_DIR = os.path.join(FLAGS.CHECKPOINT_DIR, FLAGS.task_name) #tf_util.mkdir(FLAGS.CHECKPOINT_DIR) if not FLAGS.is_training: agent = ActiveMVnet(FLAGS) senv = ShapeNetEnv(FLAGS) if FLAGS.pretrain_restore: restore_pretrain(agent) else: restore_from_iter(agent, FLAGS.test_iter) replay_mem = ReplayMemory(FLAGS) rollout_obj = Rollout(agent, senv, replay_mem, FLAGS) if FLAGS.test_random: test_random(agent, FLAGS.test_episode_num, replay_mem, FLAGS.test_iter, rollout_obj) elif FLAGS.test_oneway: test_oneway(agent, FLAGS.test_episode_num, replay_mem, FLAGS.test_iter, rollout_obj) else: test_active(agent, FLAGS.test_episode_num, replay_mem, FLAGS.test_iter, rollout_obj)
def test(active_mv, test_episode_num, replay_mem, train_i, rollout_obj): senv = ShapeNetEnv(FLAGS) # epsilon = FLAGS.init_eps rewards_list = [] IoU_list = [] loss_list = [] for i_idx in range(test_episode_num): mvnet_input, actions = rollout_obj.go(i_idx, verbose=False, add_to_mem=False, is_train=False, test_idx=i_idx) stop_idx = np.argwhere(np.asarray(actions) == 8) ## find stop idx if stop_idx.size == 0: pred_idx = -1 else: pred_idx = stop_idx[0, 0] model_id = rollout_obj.env.current_model voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) if FLAGS.category == '1111': category_, model_id_ = model_id.split('/') voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(category_, model_id_)) vox_gt = replay_mem.read_vox(voxel_name) mvnet_input.put_voxel(vox_gt) pred_out = active_mv.predict_vox_list(mvnet_input) vox_gtr = np.squeeze(pred_out.rotated_vox_test) PRINT_SUMMARY_STATISTICS = False if PRINT_SUMMARY_STATISTICS: lastpred = pred_out.vox_pred_test[-1] print 'prediction statistics' print 'min', np.min(lastpred) print 'max', np.max(lastpred) print 'mean', np.mean(lastpred) print 'std', np.std(lastpred) final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[pred_idx], vox_gtr, FLAGS.iou_thres) eval_log(i_idx, pred_out, final_IoU) rewards_list.append(np.sum(pred_out.reward_raw_test)) IoU_list.append(final_IoU) loss_list.append(pred_out.recon_loss_list_test) if FLAGS.if_save_eval: save_dict = { 'voxel_list': np.squeeze(pred_out.vox_pred_test), 'vox_gt': vox_gt, 'vox_gtr': vox_gtr, 'model_id': model_id, 'states': rollout_obj.last_trajectory, 'RGB_list': mvnet_input.rgb } dump_outputs(save_dict, train_i, i_idx) rewards_list = np.asarray(rewards_list) IoU_list = np.asarray(IoU_list) loss_list = np.asarray(loss_list) eval_r_mean = np.mean(rewards_list) eval_IoU_mean = np.mean(IoU_list) eval_loss_mean = np.mean(loss_list) tf_util.save_scalar(train_i, 'eval_mean_reward', eval_r_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'eval_mean_IoU', eval_IoU_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'eval_mean_loss', eval_loss_mean, active_mv.train_writer)
def train(active_mv): senv = ShapeNetEnv(FLAGS) replay_mem = ReplayMemory(FLAGS) log_string('====== Starting burning in memories ======') burn_in(senv, replay_mem) log_string('====== Done. {} trajectories burnt in ======'.format( FLAGS.burn_in_length)) rollout_obj = Rollout(active_mv, senv, replay_mem, FLAGS) # burn in(pretrain) for MVnet if FLAGS.burn_in_iter > 0: for i in range(FLAGS.burnin_start_iter, FLAGS.burnin_start_iter + FLAGS.burn_in_iter): rollout_obj.go(i, verbose=True, add_to_mem=True, mode=FLAGS.burnin_mode, is_train=True) mvnet_input = replay_mem.get_batch_list(FLAGS.batch_size) tic = time.time() out_stuff = run_step(mvnet_input, mode='burnin', is_training=True) if (i + 1 ) % FLAGS.save_every_step == 0 and i > FLAGS.burnin_start_iter: save_pretrain(active_mv, i + 1) if (((i + 1) % FLAGS.test_every_step == 0 and i > FLAGS.burnin_start_iter) or (FLAGS.eval0 and i == FLAGS.burnin_start_iter)): evaluate_burnin( active_mv, FLAGS.test_episode_num, replay_mem, i + 1, rollout_obj, mode=FLAGS.burnin_mode, override_mvnet_input=(batch_to_single_mvinput(mvnet_input) if FLAGS.reproj_mode else None)) for i_idx in range(FLAGS.max_iter): t0 = time.time() if np.random.uniform() < FLAGS.epsilon: rollout_obj.go(i_idx, verbose=True, add_to_mem=True, mode=FLAGS.explore_mode, is_train=True) else: rollout_obj.go(i_idx, verbose=True, add_to_mem=True, is_train=True) t1 = time.time() mvnet_input = replay_mem.get_batch_list(FLAGS.batch_size) t2 = time.time() out_stuff = active_mv.run_step(mvnet_input, mode='train', is_training=True) t3 = time.time() train_log(i_idx, out_stuff, (t0, t1, t2, t3)) if (i_idx + 1) % FLAGS.save_every_step == 0 and i_idx > 0: save(active_mv, i_idx + 1, i_idx + 1, i_idx + 1) if (i_idx + 1) % FLAGS.test_every_step == 0 and i_idx > 0: print('Evaluating active policy') evaluate(active_mv, FLAGS.test_episode_num, replay_mem, i_idx + 1, rollout_obj, mode='active') print('Evaluating random policy') evaluate(active_mv, FLAGS.test_episode_num, replay_mem, i_idx + 1, rollout_obj, mode='oneway')
def evaluate_burnin(active_mv, test_episode_num, replay_mem, train_i, rollout_obj, mode='random', override_mvnet_input=None): senv = ShapeNetEnv(FLAGS) #epsilon = FLAGS.init_eps rewards_list = [] IoU_list = [] loss_list = [] for i_idx in xrange(test_episode_num): ## use active policy mvnet_input, actions = rollout_obj.go(i_idx, verbose=False, add_to_mem=False, mode=mode, is_train=False) #stop_idx = np.argwhere(np.asarray(actions)==8) ## find stop idx #if stop_idx.size == 0: # pred_idx = -1 #else: # pred_idx = stop_idx[0, 0] model_id = rollout_obj.env.current_model voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) if FLAGS.category == '1111': category_, model_id_ = model_id.split('/') voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(category_, model_id_)) vox_gt = replay_mem.read_vox(voxel_name) if FLAGS.use_segs and FLAGS.category == '3333': #this is the only categ for which we have seg data seg1_name = os.path.join( 'voxels', '{}/{}/obj1.binvox'.format(FLAGS.category, model_id)) seg2_name = os.path.join( 'voxels', '{}/{}/obj2.binvox'.format(FLAGS.category, model_id)) seg1 = replay_mem.read_vox(seg1_name) seg2 = replay_mem.read_vox(seg2_name) mvnet_input.put_segs(seg1, seg2) mvnet_input.put_voxel(vox_gt) if override_mvnet_input is not None: mvnet_input = override_mvnet_input pred_out = active_mv.predict_vox_list(mvnet_input) vox_gtr = np.squeeze(pred_out.rotated_vox_test) PRINT_SUMMARY_STATISTICS = False if PRINT_SUMMARY_STATISTICS: lastpred = pred_out.vox_pred_test[-1] print 'prediction statistics' print 'min', np.min(lastpred) print 'max', np.max(lastpred) print 'mean', np.mean(lastpred) print 'std', np.std(lastpred) #final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[-1], vox_gtr) final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[-1], vox_gtr, FLAGS.iou_thres) eval_log(i_idx, pred_out, final_IoU) rewards_list.append(np.sum(pred_out.reward_raw_test)) IoU_list.append(final_IoU) loss_list.append(np.mean(pred_out.recon_loss_list_test)) #import ipdb #ipdb.set_trace() if FLAGS.if_save_eval: save_dict = { 'voxel_list': np.squeeze(pred_out.vox_pred_test), 'vox_gt': vox_gt, 'vox_gtr': vox_gtr, 'model_id': model_id, 'states': rollout_obj.last_trajectory, 'RGB_list': mvnet_input.rgb, } if FLAGS.use_segs: save_dict['pred_seg1_test'] = pred_out.pred_seg1_test save_dict['pred_seg2_test'] = pred_out.pred_seg2_test dump_outputs(save_dict, train_i, i_idx, mode) rewards_list = np.asarray(rewards_list) IoU_list = np.asarray(IoU_list) loss_list = np.asarray(loss_list) eval_r_mean = np.mean(rewards_list) eval_IoU_mean = np.mean(IoU_list) eval_loss_mean = np.mean(loss_list) tf_util.save_scalar(train_i, 'burnin_eval_mean_reward_{}'.format(mode), eval_r_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'burnin_eval_mean_IoU_{}'.format(mode), eval_IoU_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'burnin_eval_mean_loss_{}'.format(mode), eval_loss_mean, active_mv.train_writer)
def train(active_mv): senv = ShapeNetEnv(FLAGS) replay_mem = ReplayMemory(FLAGS) #### for debug #a = np.array([[1,0,1],[0,0,0]]) #b = np.array([[1,0,1],[0,1,0]]) #print('IoU: {}'.format(replay_mem.calu_IoU(a, b))) #sys.exit() #### for debug log_string('====== Starting burning in memories ======') burn_in(senv, replay_mem) log_string('====== Done. {} trajectories burnt in ======'.format( FLAGS.burn_in_length)) #epsilon = FLAGS.init_eps K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0], [0.0, 0.0, 1]]) K_list = np.tile(K_single[None, None, ...], (1, FLAGS.max_episode_length, 1, 1)) rollout_obj = Rollout(active_mv, senv, replay_mem, FLAGS) ### burn in(pretrain) for MVnet if FLAGS.burn_in_iter > 0: for i in xrange(FLAGS.burnin_start_iter, FLAGS.burnin_start_iter + FLAGS.burn_in_iter): if (not FLAGS.reproj_mode) or (i == FLAGS.burnin_start_iter): rollout_obj.go(i, verbose=True, add_to_mem=True, mode=FLAGS.burnin_mode, is_train=True) if not FLAGS.random_pretrain: replay_mem.enable_gbl() mvnet_input = replay_mem.get_batch_list(FLAGS.batch_size) else: mvnet_input = replay_mem.get_batch_list_random( senv, FLAGS.batch_size) tic = time.time() out_stuff = active_mv.run_step(mvnet_input, mode='burnin', is_training=True) #import ipdb #ipdb.set_trace() summs_burnin = burnin_log(i, out_stuff, time.time() - tic) for summ in summs_burnin: active_mv.train_writer.add_summary(summ, i) if (i + 1 ) % FLAGS.save_every_step == 0 and i > FLAGS.burnin_start_iter: save_pretrain(active_mv, i + 1) if (((i + 1) % FLAGS.test_every_step == 0 and i > FLAGS.burnin_start_iter) or (FLAGS.eval0 and i == FLAGS.burnin_start_iter)): evaluate_burnin( active_mv, FLAGS.test_episode_num, replay_mem, i + 1, rollout_obj, mode=FLAGS.burnin_mode, override_mvnet_input=(batch_to_single_mvinput(mvnet_input) if FLAGS.reproj_mode else None)) for i_idx in xrange(FLAGS.max_iter): t0 = time.time() if np.random.uniform() < FLAGS.epsilon: rollout_obj.go(i_idx, verbose=True, add_to_mem=True, mode=FLAGS.explore_mode, is_train=True) else: rollout_obj.go(i_idx, verbose=True, add_to_mem=True, is_train=True) t1 = time.time() replay_mem.enable_gbl() mvnet_input = replay_mem.get_batch_list(FLAGS.batch_size) t2 = time.time() if FLAGS.finetune_dqn: out_stuff = active_mv.run_step(mvnet_input, mode='train_dqn', is_training=True) elif FLAGS.finetune_dqn_only: out_stuff = active_mv.run_step(mvnet_input, mode='train_dqn_only', is_training=True) else: out_stuff = active_mv.run_step(mvnet_input, mode='train', is_training=True) replay_mem.disable_gbl() t3 = time.time() train_log(i_idx, out_stuff, (t0, t1, t2, t3)) active_mv.train_writer.add_summary(out_stuff.merged_train, i_idx) if (i_idx + 1) % FLAGS.save_every_step == 0 and i_idx > 0: save(active_mv, i_idx + 1, i_idx + 1, i_idx + 1) if (i_idx + 1) % FLAGS.test_every_step == 0 and i_idx > 0: print('Evaluating active policy') evaluate(active_mv, FLAGS.test_episode_num, replay_mem, i_idx + 1, rollout_obj, mode='active') print('Evaluating random policy') evaluate(active_mv, FLAGS.test_episode_num, replay_mem, i_idx + 1, rollout_obj, mode='oneway')