def train(agent): senv = ShapeNetEnv(FLAGS) replay_mem = ReplayMemory(FLAGS) #### for debug #a = np.array([[1,0,1],[0,0,0]]) #b = np.array([[1,0,1],[0,1,0]]) #print('IoU: {}'.format(replay_mem.calu_IoU(a, b))) #sys.exit() #### for debug log_string('====== Starting burning in memories ======') burn_in(senv, replay_mem) log_string('====== Done. {} trajectories burnt in ======'.format( FLAGS.burn_in_length)) #epsilon = FLAGS.init_eps K_single = np.asarray([[420.0, 0.0, 112.0], [0.0, 420.0, 112.0], [0.0, 0.0, 1]]) K_list = np.tile(K_single[None, None, ...], (1, FLAGS.max_episode_length, 1, 1)) for i_idx in range(FLAGS.max_iter): state, model_id = senv.reset(True) actions = [] RGB_temp_list = np.zeros( (FLAGS.max_episode_length, FLAGS.resolution, FLAGS.resolution, 3), dtype=np.float32) R_list = np.zeros((FLAGS.max_episode_length, 3, 4), dtype=np.float32) vox_temp = np.zeros((FLAGS.voxel_resolution, FLAGS.voxel_resolution, FLAGS.voxel_resolution), dtype=np.float32) RGB_temp_list[0, ...], _ = replay_mem.read_png_to_uint8( state[0][0], state[1][0], model_id) R_list[0, ...] = replay_mem.get_R(state[0][0], state[1][0]) vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, 0) vox_temp = np.squeeze(vox_temp_list[0, ...]) ## run simulations and get memories for e_idx in range(FLAGS.max_episode_length - 1): agent_action = select_action(agent, RGB_temp_list[e_idx], vox_temp) actions.append(agent_action) state, next_state, done, model_id = senv.step(actions[-1]) RGB_temp_list[e_idx + 1, ...], _ = replay_mem.read_png_to_uint8( next_state[0], next_state[1], model_id) R_list[e_idx + 1, ...] = replay_mem.get_R(next_state[0], next_state[1]) ## TODO: update vox_temp vox_temp_list = replay_mem.get_vox_pred(RGB_temp_list, R_list, K_list, e_idx + 1) vox_temp = np.squeeze(vox_temp_list[e_idx + 1, ...]) if done: traj_state = state traj_state[0] += [next_state[0]] traj_state[1] += [next_state[1]] rewards = replay_mem.get_seq_rewards(RGB_temp_list, R_list, K_list, model_id) temp_traj = trajectData(traj_state, actions, rewards, model_id) replay_mem.append(temp_traj) break rgb_batch, vox_batch, reward_batch, action_batch = replay_mem.get_batch( FLAGS.batch_size) #print 'reward_batch: {}'.format(reward_batch) #print 'rewards: {}'.format(rewards) feed_dict = { agent.is_training: True, agent.rgb_batch: rgb_batch, agent.vox_batch: vox_batch, agent.reward_batch: reward_batch, agent.action_batch: action_batch } opt_train, merge_summary, loss = agent.sess.run( [agent.opt, agent.merged_train, agent.loss], feed_dict=feed_dict) log_string( '+++++Iteration: {}, loss: {:.4f}, mean_reward: {:.4f}+++++'. format(i_idx, loss, np.mean(rewards))) tf_util.save_scalar(i_idx, 'episode_total_reward', np.sum(rewards[:]), agent.train_writer) agent.train_writer.add_summary(merge_summary, i_idx) if i_idx % FLAGS.save_every_step == 0 and i_idx > 0: save(agent, i_idx, i_idx, i_idx) if i_idx % FLAGS.test_every_step == 0 and i_idx > 0: eval_r_mean, eval_IoU_mean, eval_loss_mean = evaluate( agent, FLAGS.test_episode_num, replay_mem) tf_util.save_scalar(i_idx, 'eval_mean_reward', eval_r_mean, agent.train_writer) tf_util.save_scalar(i_idx, 'eval_mean_IoU', eval_IoU_mean, agent.train_writer) tf_util.save_scalar(i_idx, 'eval_mean_loss', eval_loss_mean, agent.train_writer)
def evaluate_burnin(active_mv, test_episode_num, replay_mem, train_i, rollout_obj, mode='random'): senv = ShapeNetEnv(FLAGS) #epsilon = FLAGS.init_eps rewards_list = [] IoU_list = [] loss_list = [] for i_idx in xrange(test_episode_num): ## use active policy mvnet_input, actions = rollout_obj.go(i_idx, verbose=False, add_to_mem=False, mode=mode, is_train=False) #stop_idx = np.argwhere(np.asarray(actions)==8) ## find stop idx #if stop_idx.size == 0: # pred_idx = -1 #else: # pred_idx = stop_idx[0, 0] model_id = rollout_obj.env.current_model voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) vox_gt = np.squeeze(replay_mem.read_vox(voxel_name)) mvnet_input.put_voxel(vox_gt) pred_out = active_mv.predict_vox_list(mvnet_input) vox_gtr = np.squeeze(pred_out.rotated_vox_test) PRINT_SUMMARY_STATISTICS = False if PRINT_SUMMARY_STATISTICS: lastpred = pred_out.vox_pred_test[-1] print 'prediction statistics' print 'min', np.min(lastpred) print 'max', np.max(lastpred) print 'mean', np.mean(lastpred) print 'std', np.std(lastpred) #final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[-1], vox_gtr) #final_IoU = replay_mem.calu_IoU(np.squeeze(pred_out.vox_pred_test[-1]), vox_gt, FLAGS.iou_thres) ious = [] for vi in range(FLAGS.max_episode_length): final_IoU = replay_mem.calu_IoU( np.squeeze(pred_out.vox_pred_test[vi, ...]), vox_gt, FLAGS.iou_thres) ious.append(final_IoU) eval_log(i_idx, pred_out, ious) #eval_log(i_idx, pred_out, final_IoU) rewards_list.append(np.sum(pred_out.reward_raw_test)) IoU_list.append(final_IoU) loss_list.append(np.mean(pred_out.recon_loss_list_test)) if FLAGS.if_save_eval: save_dict = { 'voxel_list': np.squeeze(pred_out.vox_pred_test), 'vox_gt': vox_gt, 'vox_gtr': vox_gtr, 'model_id': model_id, 'states': rollout_obj.last_trajectory, 'RGB_list': mvnet_input.rgb } dump_outputs(save_dict, train_i, i_idx, mode) rewards_list = np.asarray(rewards_list) IoU_list = np.asarray(IoU_list) loss_list = np.asarray(loss_list) eval_r_mean = np.mean(rewards_list) eval_IoU_mean = np.mean(IoU_list) eval_loss_mean = np.mean(loss_list) tf_util.save_scalar(train_i, 'burnin_eval_mean_reward_{}'.format(mode), eval_r_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'burnin_eval_mean_IoU_{}'.format(mode), eval_IoU_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'burnin_eval_mean_loss_{}'.format(mode), eval_loss_mean, active_mv.train_writer)
def test(active_mv, test_episode_num, replay_mem, train_i, rollout_obj): senv = ShapeNetEnv(FLAGS) #epsilon = FLAGS.init_eps rewards_list = [] IoU_list = [] loss_list = [] for i_idx in xrange(test_episode_num): mvnet_input, actions = rollout_obj.go(i_idx, verbose=False, add_to_mem=False, is_train=False, test_idx=i_idx) stop_idx = np.argwhere(np.asarray(actions) == 8) ## find stop idx if stop_idx.size == 0: pred_idx = -1 else: pred_idx = stop_idx[0, 0] model_id = rollout_obj.env.current_model voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) if FLAGS.category == '1111': category_, model_id_ = model_id.split('/') voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(category_, model_id_)) vox_gt = replay_mem.read_vox(voxel_name) mvnet_input.put_voxel(vox_gt) pred_out = active_mv.predict_vox_list(mvnet_input) vox_gtr = np.squeeze(pred_out.rotated_vox_test) PRINT_SUMMARY_STATISTICS = False if PRINT_SUMMARY_STATISTICS: lastpred = pred_out.vox_pred_test[-1] print 'prediction statistics' print 'min', np.min(lastpred) print 'max', np.max(lastpred) print 'mean', np.mean(lastpred) print 'std', np.std(lastpred) final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[pred_idx], vox_gtr, FLAGS.iou_thres) eval_log(i_idx, pred_out, final_IoU) rewards_list.append(np.sum(pred_out.reward_raw_test)) IoU_list.append(final_IoU) loss_list.append(pred_out.recon_loss_list_test) if FLAGS.if_save_eval: save_dict = { 'voxel_list': np.squeeze(pred_out.vox_pred_test), 'vox_gt': vox_gt, 'vox_gtr': vox_gtr, 'model_id': model_id, 'states': rollout_obj.last_trajectory, 'RGB_list': mvnet_input.rgb } dump_outputs(save_dict, train_i, i_idx) rewards_list = np.asarray(rewards_list) IoU_list = np.asarray(IoU_list) loss_list = np.asarray(loss_list) eval_r_mean = np.mean(rewards_list) eval_IoU_mean = np.mean(IoU_list) eval_loss_mean = np.mean(loss_list) tf_util.save_scalar(train_i, 'eval_mean_reward', eval_r_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'eval_mean_IoU', eval_IoU_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'eval_mean_loss', eval_loss_mean, active_mv.train_writer)
def train(ae): #v = VisVox() #ae.opt_step = getattr(ae, FLAGS.opt_step_name) #ae.loss_tensor = getattr(ae, FLAGS.loss_name) i = 0 try: while not ae.coord.should_stop(): ae.sess.run(ae.assign_i_op, feed_dict={ae.set_i_to_pl: i}) tic = time.time() feed_dict = { ae.is_training: True, ae.data_loader.is_training: True } ops_to_run = [ ae.opt_step, ae.merge_train, ae.counter, ae.recon_loss ] if FLAGS.use_gan: ops_to_run = ops_to_run + [ae.opt_D, ae.D_loss, ae.G_loss] stuff = ae.sess.run(ops_to_run, feed_dict=feed_dict) if FLAGS.use_gan: opt, summary, step, recon_loss, opt_D, D_loss, G_loss = stuff else: opt, summary, step, recon_loss = stuff toc = time.time() log_string('Iteration: {} time {}, recon_loss: {}'.format( i, toc - tic, recon_loss)) if FLAGS.use_gan: log_string('D_loss: {}, G_loss: {}'.format(D_loss, G_loss)) log_string(' maxrss: {}'.format( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)) #gc.collect() print 'cpu: {}, vmem: {}, avai: {}'.format( psutil.cpu_percent(), psutil.virtual_memory().used >> 30, psutil.virtual_memory().available >> 30) i += 1 ae.train_writer.add_summary(summary, i) ae.train_writer.flush() if i % FLAGS.save_every_step == 0: save(ae, i, i, i) if i % FLAGS.test_every_step == 0: test_losses = evaluate(ae) for key, value in test_losses.iteritems(): tf_util.save_scalar(i, 'test/' + key, value, ae.train_writer) gc.collect() #if i%FLAGS.vis_every_step == 0: # v.process(vis, 'train', i) if i > FLAGS.max_iter: print('Done training') break except tf.errors.OutOfRangeError: print('Done training') finally: ae.coord.request_stop() ae.coord.join(ae.threads) ae.sess.close()
def evaluate_burnin(active_mv, test_episode_num, replay_mem, train_i, rollout_obj, mode='random', override_mvnet_input=None): senv = ShapeNetEnv(FLAGS) #epsilon = FLAGS.init_eps rewards_list = [] IoU_list = [] loss_list = [] for i_idx in xrange(test_episode_num): ## use active policy mvnet_input, actions = rollout_obj.go(i_idx, verbose=False, add_to_mem=False, mode=mode, is_train=False) #stop_idx = np.argwhere(np.asarray(actions)==8) ## find stop idx #if stop_idx.size == 0: # pred_idx = -1 #else: # pred_idx = stop_idx[0, 0] model_id = rollout_obj.env.current_model voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(FLAGS.category, model_id)) if FLAGS.category == '1111': category_, model_id_ = model_id.split('/') voxel_name = os.path.join( 'voxels', '{}/{}/model.binvox'.format(category_, model_id_)) vox_gt = replay_mem.read_vox(voxel_name) if FLAGS.use_segs and FLAGS.category == '3333': #this is the only categ for which we have seg data seg1_name = os.path.join( 'voxels', '{}/{}/obj1.binvox'.format(FLAGS.category, model_id)) seg2_name = os.path.join( 'voxels', '{}/{}/obj2.binvox'.format(FLAGS.category, model_id)) seg1 = replay_mem.read_vox(seg1_name) seg2 = replay_mem.read_vox(seg2_name) mvnet_input.put_segs(seg1, seg2) mvnet_input.put_voxel(vox_gt) if override_mvnet_input is not None: mvnet_input = override_mvnet_input pred_out = active_mv.predict_vox_list(mvnet_input) vox_gtr = np.squeeze(pred_out.rotated_vox_test) PRINT_SUMMARY_STATISTICS = False if PRINT_SUMMARY_STATISTICS: lastpred = pred_out.vox_pred_test[-1] print 'prediction statistics' print 'min', np.min(lastpred) print 'max', np.max(lastpred) print 'mean', np.mean(lastpred) print 'std', np.std(lastpred) #final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[-1], vox_gtr) final_IoU = replay_mem.calu_IoU(pred_out.vox_pred_test[-1], vox_gtr, FLAGS.iou_thres) eval_log(i_idx, pred_out, final_IoU) rewards_list.append(np.sum(pred_out.reward_raw_test)) IoU_list.append(final_IoU) loss_list.append(np.mean(pred_out.recon_loss_list_test)) #import ipdb #ipdb.set_trace() if FLAGS.if_save_eval: save_dict = { 'voxel_list': np.squeeze(pred_out.vox_pred_test), 'vox_gt': vox_gt, 'vox_gtr': vox_gtr, 'model_id': model_id, 'states': rollout_obj.last_trajectory, 'RGB_list': mvnet_input.rgb, } if FLAGS.use_segs: save_dict['pred_seg1_test'] = pred_out.pred_seg1_test save_dict['pred_seg2_test'] = pred_out.pred_seg2_test dump_outputs(save_dict, train_i, i_idx, mode) rewards_list = np.asarray(rewards_list) IoU_list = np.asarray(IoU_list) loss_list = np.asarray(loss_list) eval_r_mean = np.mean(rewards_list) eval_IoU_mean = np.mean(IoU_list) eval_loss_mean = np.mean(loss_list) tf_util.save_scalar(train_i, 'burnin_eval_mean_reward_{}'.format(mode), eval_r_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'burnin_eval_mean_IoU_{}'.format(mode), eval_IoU_mean, active_mv.train_writer) tf_util.save_scalar(train_i, 'burnin_eval_mean_loss_{}'.format(mode), eval_loss_mean, active_mv.train_writer)