def __call__(self, lock, history, sess, coord): assert isinstance(history, deque) assert isinstance(sess, tf.Session) assert isinstance(coord, tf.train.Coordinator) history_lock = lock env = environment.get_game_environment( self._maps, multiproc=FLAGS.multiproc, random_goal=FLAGS.random_goal, random_spawn=FLAGS.random_spawn, apple_prob=FLAGS.apple_prob, episode_length=FLAGS.episode_length) exp = Expert() with sess.as_default(), sess.graph.as_default(): while not coord.should_stop(): try: if not self._eval: train_global_step, np_global_step, model_version = sess.run( [ self._train_global_step, self._update_explore_global_step_op, self._model_version ]) if model_version != train_global_step: self._update_graph(sess) random_rate = FLAGS.supervision_rate * np.exp( -train_global_step / FLAGS.decay) if FLAGS.learn_mapper: random_rate = 2 else: np_global_step = sess.run( self._update_explore_global_step_op) random_rate = 0 env.reset() obs, info = env.observations() episode = dict() episode['act'] = [np.argmax(exp.get_optimal_action(info))] episode['obs'] = [self._merge_depth(obs, info['depth'])] episode['ego'] = [[0., 0., 0.]] episode['est'] = [ exp.get_free_space_map( info, estimate_size=FLAGS.estimate_size) ] episode['gol'] = [ exp.get_goal_map(info, estimate_size=FLAGS.estimate_size) ] episode['rwd'] = [0.] episode['inf'] = [deepcopy(info)] estimate_map_list = [ np.zeros( (1, FLAGS.estimate_size, FLAGS.estimate_size, 3)) for _ in xrange(FLAGS.estimate_scale) ] old_estimate_map_list = estimate_map_list for _ in xrange(FLAGS.episode_size): prev_info = deepcopy(episode['inf'][-1]) optimal_action = exp.get_optimal_action(prev_info) expand_dim = lambda x: np.array([[x[-1]]]) feed_data = { 'sequence_length': np.array([1]), 'visual_input': expand_dim(episode['obs']), 'egomotion': expand_dim(episode['ego']), 'reward': expand_dim(episode['rwd']), 'space_map': expand_dim(episode['est']), 'goal_map': expand_dim(episode['gol']), 'estimate_map_list': estimate_map_list, 'optimal_action': expand_dim(episode['act']), 'optimal_estimate': expand_dim(episode['est']), 'is_training': False } feed_dict = prepare_feed_dict(self._net.input_tensors, feed_data) results = sess.run( [self._net.output_tensors['action']] + self._net. intermediate_tensors['estimate_map_list'], feed_dict=feed_dict) predict_action = np.squeeze(results[0]) old_estimate_map_list = estimate_map_list estimate_map_list = [m[0] for m in results[1:]] if np.random.rand() < random_rate and not self._eval: dagger_action = optimal_action else: dagger_action = predict_action action = np.argmax(dagger_action) obs, reward, terminal, info = env.step(action) if not terminal: episode['act'].append(np.argmax(optimal_action)) episode['obs'].append( self._merge_depth(obs, info['depth'])) episode['ego'].append( environment.calculate_egomotion( prev_info['POSE'], info['POSE'])) episode['est'].append( exp.get_free_space_map( info, estimate_size=FLAGS.estimate_size)) episode['gol'].append( exp.get_goal_map( info, estimate_size=FLAGS.estimate_size)) episode['rwd'].append(deepcopy(reward)) episode['inf'].append(deepcopy(info)) else: break if not self._eval: history.append(episode) if np_global_step % FLAGS.save_every == 0 or self._eval: feed_data = { 'sequence_length': np.array([1]), 'visual_input': expand_dim(episode['obs']), 'egomotion': expand_dim(episode['ego']), 'reward': expand_dim(episode['rwd']), 'space_map': expand_dim(episode['est']), 'goal_map': expand_dim(episode['gol']), 'estimate_map_list': old_estimate_map_list, 'optimal_action': expand_dim(episode['act']), 'optimal_estimate': expand_dim(episode['est']), 'is_training': False } feed_dict = prepare_feed_dict(self._net.input_tensors, feed_data) summary_ops = self._estimate_maps + self._goal_maps + self._reward_maps + self._value_maps results = sess.run(summary_ops, feed_dict=feed_dict) estimate_maps_images = results[:len(self._estimate_maps )] results = results[len(self._estimate_maps):] goal_maps_images = results[:len(self._goal_maps)] results = results[len(self._goal_maps):] fused_maps_images = results[:len(self._reward_maps)] results = results[len(self._reward_maps):] value_maps_images = results[:len(self._value_maps)] results = results[len(self._value_maps):] assert len(results) == 0 postfix = '_eval' if self._eval else '' self._writer.add_summary(self._build_map_summary( estimate_maps_images, episode['est'], goal_maps_images, fused_maps_images, value_maps_images, postfix), global_step=np_global_step) # summary_text = ','.join('{}[{}]-{}={}'.format(key, idx, step, value) # for step, info in enumerate(episode['inf']) # for key in ('GOAL.LOC', 'SPAWN.LOC', 'POSE', 'env_name') # for idx, value in enumerate(info[key])) # step_episode_summary = sess.run(self._step_history_op, # feed_dict={self._step_history: summary_text}) # self._writer.add_summary(step_episode_summary, global_step=np_global_step) self._writer.add_summary( self._build_trajectory_summary( episode['rwd'], episode['inf'], exp, random_rate, postfix), global_step=np_global_step) if self._eval and FLAGS.total_steps <= np_global_step: coord.request_stop() except Exception as e: print e
cv2.imshow("TB", img_tb_rgb) cv2.imwrite("init_player_view.png", img) cv2.imwrite("init_top_view.png", img_tb_rgb) cv2.waitKey(0) # print(obs["pose"]) # init episode = dict() episode['act'] = [np.argmax(exp.get_optimal_action(obs))] episode['obs'] = [obs] episode['ego'] = [[0., 0., 0.]] episode['est'] = [ exp.get_free_space_map(obs, estimate_size=estimate_size) ] episode['gol'] = [ exp.get_goal_map(obs, estimate_size=estimate_size) ] episode['rwd'] = [0.] estimate_map_list = [ np.zeros((1, estimate_size, estimate_size, 3)) for _ in xrange(estimate_scale) ] old_estimate_map_list = estimate_map_list # episode循环 for _ in xrange(episode_size): prev_obs = deepcopy(episode['obs'][-1]) optimal_action = exp.get_optimal_action(prev_obs) expand_dim = lambda x: np.array([[x[-1]]])