def __call__(self, lock, history, sess, coord):
        assert isinstance(history, deque)
        assert isinstance(sess, tf.Session)
        assert isinstance(coord, tf.train.Coordinator)

        history_lock = lock

        env = environment.get_game_environment(
            self._maps,
            multiproc=FLAGS.multiproc,
            random_goal=FLAGS.random_goal,
            random_spawn=FLAGS.random_spawn,
            apple_prob=FLAGS.apple_prob,
            episode_length=FLAGS.episode_length)
        exp = Expert()

        with sess.as_default(), sess.graph.as_default():
            while not coord.should_stop():
                try:
                    if not self._eval:
                        train_global_step, np_global_step, model_version = sess.run(
                            [
                                self._train_global_step,
                                self._update_explore_global_step_op,
                                self._model_version
                            ])

                        if model_version != train_global_step:
                            self._update_graph(sess)

                        random_rate = FLAGS.supervision_rate * np.exp(
                            -train_global_step / FLAGS.decay)
                        if FLAGS.learn_mapper:
                            random_rate = 2
                    else:
                        np_global_step = sess.run(
                            self._update_explore_global_step_op)
                        random_rate = 0

                    env.reset()
                    obs, info = env.observations()

                    episode = dict()
                    episode['act'] = [np.argmax(exp.get_optimal_action(info))]
                    episode['obs'] = [self._merge_depth(obs, info['depth'])]
                    episode['ego'] = [[0., 0., 0.]]
                    episode['est'] = [
                        exp.get_free_space_map(
                            info, estimate_size=FLAGS.estimate_size)
                    ]
                    episode['gol'] = [
                        exp.get_goal_map(info,
                                         estimate_size=FLAGS.estimate_size)
                    ]
                    episode['rwd'] = [0.]
                    episode['inf'] = [deepcopy(info)]

                    estimate_map_list = [
                        np.zeros(
                            (1, FLAGS.estimate_size, FLAGS.estimate_size, 3))
                        for _ in xrange(FLAGS.estimate_scale)
                    ]
                    old_estimate_map_list = estimate_map_list

                    for _ in xrange(FLAGS.episode_size):
                        prev_info = deepcopy(episode['inf'][-1])
                        optimal_action = exp.get_optimal_action(prev_info)

                        expand_dim = lambda x: np.array([[x[-1]]])
                        feed_data = {
                            'sequence_length': np.array([1]),
                            'visual_input': expand_dim(episode['obs']),
                            'egomotion': expand_dim(episode['ego']),
                            'reward': expand_dim(episode['rwd']),
                            'space_map': expand_dim(episode['est']),
                            'goal_map': expand_dim(episode['gol']),
                            'estimate_map_list': estimate_map_list,
                            'optimal_action': expand_dim(episode['act']),
                            'optimal_estimate': expand_dim(episode['est']),
                            'is_training': False
                        }
                        feed_dict = prepare_feed_dict(self._net.input_tensors,
                                                      feed_data)

                        results = sess.run(
                            [self._net.output_tensors['action']] + self._net.
                            intermediate_tensors['estimate_map_list'],
                            feed_dict=feed_dict)

                        predict_action = np.squeeze(results[0])
                        old_estimate_map_list = estimate_map_list
                        estimate_map_list = [m[0] for m in results[1:]]

                        if np.random.rand() < random_rate and not self._eval:
                            dagger_action = optimal_action
                        else:
                            dagger_action = predict_action

                        action = np.argmax(dagger_action)
                        obs, reward, terminal, info = env.step(action)

                        if not terminal:
                            episode['act'].append(np.argmax(optimal_action))
                            episode['obs'].append(
                                self._merge_depth(obs, info['depth']))
                            episode['ego'].append(
                                environment.calculate_egomotion(
                                    prev_info['POSE'], info['POSE']))
                            episode['est'].append(
                                exp.get_free_space_map(
                                    info, estimate_size=FLAGS.estimate_size))
                            episode['gol'].append(
                                exp.get_goal_map(
                                    info, estimate_size=FLAGS.estimate_size))
                            episode['rwd'].append(deepcopy(reward))
                            episode['inf'].append(deepcopy(info))
                        else:
                            break

                    if not self._eval:
                        history.append(episode)

                    if np_global_step % FLAGS.save_every == 0 or self._eval:
                        feed_data = {
                            'sequence_length': np.array([1]),
                            'visual_input': expand_dim(episode['obs']),
                            'egomotion': expand_dim(episode['ego']),
                            'reward': expand_dim(episode['rwd']),
                            'space_map': expand_dim(episode['est']),
                            'goal_map': expand_dim(episode['gol']),
                            'estimate_map_list': old_estimate_map_list,
                            'optimal_action': expand_dim(episode['act']),
                            'optimal_estimate': expand_dim(episode['est']),
                            'is_training': False
                        }
                        feed_dict = prepare_feed_dict(self._net.input_tensors,
                                                      feed_data)

                        summary_ops = self._estimate_maps + self._goal_maps + self._reward_maps + self._value_maps
                        results = sess.run(summary_ops, feed_dict=feed_dict)

                        estimate_maps_images = results[:len(self._estimate_maps
                                                            )]
                        results = results[len(self._estimate_maps):]
                        goal_maps_images = results[:len(self._goal_maps)]
                        results = results[len(self._goal_maps):]
                        fused_maps_images = results[:len(self._reward_maps)]
                        results = results[len(self._reward_maps):]
                        value_maps_images = results[:len(self._value_maps)]
                        results = results[len(self._value_maps):]

                        assert len(results) == 0

                        postfix = '_eval' if self._eval else ''

                        self._writer.add_summary(self._build_map_summary(
                            estimate_maps_images, episode['est'],
                            goal_maps_images, fused_maps_images,
                            value_maps_images, postfix),
                                                 global_step=np_global_step)

                        # summary_text = ','.join('{}[{}]-{}={}'.format(key, idx, step, value)
                        #                         for step, info in enumerate(episode['inf'])
                        #                         for key in ('GOAL.LOC', 'SPAWN.LOC', 'POSE', 'env_name')
                        #                         for idx, value in enumerate(info[key]))
                        # step_episode_summary = sess.run(self._step_history_op,
                        #                                 feed_dict={self._step_history: summary_text})
                        # self._writer.add_summary(step_episode_summary, global_step=np_global_step)
                        self._writer.add_summary(
                            self._build_trajectory_summary(
                                episode['rwd'], episode['inf'], exp,
                                random_rate, postfix),
                            global_step=np_global_step)

                    if self._eval and FLAGS.total_steps <= np_global_step:
                        coord.request_stop()
                except Exception as e:
                    print e
Esempio n. 2
0
            cv2.imshow("TB", img_tb_rgb)
            cv2.imwrite("init_player_view.png", img)
            cv2.imwrite("init_top_view.png", img_tb_rgb)
            cv2.waitKey(0)
            # print(obs["pose"])

            # init
            episode = dict()
            episode['act'] = [np.argmax(exp.get_optimal_action(obs))]
            episode['obs'] = [obs]
            episode['ego'] = [[0., 0., 0.]]
            episode['est'] = [
                exp.get_free_space_map(obs, estimate_size=estimate_size)
            ]
            episode['gol'] = [
                exp.get_goal_map(obs, estimate_size=estimate_size)
            ]
            episode['rwd'] = [0.]
            estimate_map_list = [
                np.zeros((1, estimate_size, estimate_size, 3))
                for _ in xrange(estimate_scale)
            ]
            old_estimate_map_list = estimate_map_list

            # episode循环
            for _ in xrange(episode_size):

                prev_obs = deepcopy(episode['obs'][-1])
                optimal_action = exp.get_optimal_action(prev_obs)
                expand_dim = lambda x: np.array([[x[-1]]])