コード例 #1
0
    def test_convergence(self):
        a3c = a3c_impl.A3C(
            model=a3c_impl.CreateModel(
                state_shape=(3, ),
                action_space_size=2,
                hidden_layer_sizes=(3, ),
            ),
            # optimizer=a3c_impl.CreateDefaultOptimizer(learning_rate=0.05),
        )
        s = numpy.array([[1, 2, 3]])
        a1 = numpy.array([[1, 0]])
        a2 = numpy.array([[0, 1]])

        for _ in range(10):
            # Needs to train for both actions as one step, otherwise it shows some
            # "staggering" effect.
            a3c.UpdateFromTransitions([
                base.Transition(s=s, a=a1, r=1.0, sp=None),
            ])
            a3c.UpdateFromTransitions([
                base.Transition(s=s, a=a2, r=-1.0, sp=s),
            ])
            logging.printf('%s', a3c.GetValues(s))
        old_value_a1 = a3c.GetActionValues(a3c.GetValues(s), a1)
        # Trains for one step, for both actions.
        a3c.UpdateFromTransitions([
            base.Transition(s=s, a=a1, r=1.0, sp=None),
        ])
        a3c.UpdateFromTransitions([
            base.Transition(s=s, a=a2, r=-1.0, sp=s),
        ])
        self.assertGreaterEqual(a3c.GetActionValues(a3c.GetValues(s), a1),
                                old_value_a1)
コード例 #2
0
    def OnCompletionCallback(self):
        logging.printf(
            'Total: avg_reward = %3.2f, avg_steps=%3.2f',
            float(numpy.mean(self._episode_rewards)),
            float(numpy.mean(self._episode_steps)),
        )
        # Note that since "block=False", if you run it on CLI the image will be
        # shown then disappear immediately. The result will persist if you run it
        # in notebooks.
        pyplot.title('Episode Rewards')
        pyplot.plot(self._episode_rewards)
        pyplot.show()

        if self._report_steps:
            pyplot.title('Episode Steps')
            pyplot.plot(self._episode_steps)
            pyplot.show()
コード例 #3
0
ファイル: a3c_impl.py プロジェクト: ChihChiu29/deep_learning
    def __init__(
        self,
        model: keras.Model,
        optimizer: tensorflow.train.Optimizer = None,
        discount_factor: float = _DEFAULT_DISCOUNT_FACTOR,
        loss_v: float = _DEFAULT_LOSS_V,
        loss_entropy: float = _DEFAULT_LOSS_ENTROPY,
    ):
        """Ctor.

    Args:
      model: a model that
    """
        if _ACTIVE_INSTANCES:
            instance = _ACTIVE_INSTANCES[0]
            logging.printf(
                'WARNING: only one A3C instance can be active; the previous instance '
                '%s is now deactivated.', instance)
            instance.Deactivate()
            _ACTIVE_INSTANCES.pop()

        self._model = model
        self._optimizer = optimizer if optimizer else CreateDefaultOptimizer()
        self._gamma = discount_factor
        self._loss_v = loss_v
        self._loss_entropy = loss_entropy

        self._state_batch_shape = self._model.layers[0].input_shape
        # Layer -1 is the output for V, -2 is for the values of Pi.
        output_shape = self._model.layers[-2].output_shape[
            1:]  # type: t.Tuple[int]
        if len(output_shape) != 1:
            raise NotImplementedError(
                'Only supports 1D action space; got: %s' % str(output_shape))
        self._action_space_size = output_shape[0]

        self._graph = self._BuildGraph(self._model)

        self.session = tensorflow.Session()
        backend.set_session(self.session)
        self.session.run(tensorflow.global_variables_initializer())

        # Only one A3C instance can be active at a time.
        self._active = True
        _ACTIVE_INSTANCES.append(self)
コード例 #4
0
    def OnCompletionCallback(self, env: Environment, qfunc: QFunction,
                             num_of_episodes: int):
        logging.printf(
            'Total: run %d episodes, avg_reward = %3.2f, avg_steps=%3.2f',
            num_of_episodes,
            float(numpy.mean(self._episode_rewards[-num_of_episodes:])),
            float(numpy.mean(self._episode_steps[-num_of_episodes:])),
        )
        # Note that since "block=False", if you run it on CLI the image will be
        # shown then disappear immediately. The result will persist if you run it
        # in notebooks.
        pyplot.title('Episode Rewards')
        pyplot.plot(self._episode_rewards)
        pyplot.show(block=False)

        pyplot.title('Episode Steps')
        pyplot.plot(self._episode_steps)
        pyplot.show(block=False)
コード例 #5
0
    def OnEpisodeFinishedCallback(self, env: Environment, brain: Brain,
                                  episode_idx: int, num_of_episodes: int,
                                  episode_reward: float, steps: int):
        """Reports episode progress and rewards."""
        self._episode_rewards.append(episode_reward)
        self._episode_steps.append(steps)

        episode_idx += 1  # make it 1-based.
        if episode_idx % self._report_every_num_of_episodes == 0:
            logging.printf(
                'Episode %d/%d: avg_reward = %3.2f, '
                'avg_steps=%3.2f (over %d episodes)',
                episode_idx,
                num_of_episodes,
                float(
                    numpy.mean(self._episode_rewards[
                        -self._report_every_num_of_episodes:])),
                float(
                    numpy.mean(
                        self.
                        _episode_steps[-self._report_every_num_of_episodes:])),
                self._report_every_num_of_episodes,
            )
コード例 #6
0
ファイル: shortcut.py プロジェクト: ChihChiu29/deep_learning
    def __init__(
        self,
        gym_env_name: t.Text,
        model_shape: t.Iterable[int] = (20, 20, 20),
        report_every_num_of_episodes: int = 100,
    ):
        """Ctor.

    Default implementations are provided for all objects. They can be changed
    by directly setting the public properties after the creation.

    Args:
      gym_env_name: name of the gym environment, like "LunarLander-v2".
      model_shape: a list of number of nodes per hidden layer.
      report_every_num_of_episodes: do progress report every this number of
        episodes.
    """
        self._gym_env_name = gym_env_name
        self._model_shape = tuple(model_shape)

        self.env = environment_impl.GymEnvironment(gym.make(gym_env_name))
        self.qfunc = qfunc_impl.DDQN(
            model_pair=(qfunc_impl.CreateModel(
                state_shape=self.env.GetStateShape(),
                action_space_size=self.env.GetActionSpaceSize(),
                hidden_layer_sizes=model_shape),
                        qfunc_impl.CreateModel(
                            state_shape=self.env.GetStateShape(),
                            action_space_size=self.env.GetActionSpaceSize(),
                            hidden_layer_sizes=model_shape)),
            training_batch_size=DEFAULT_BATCH_SIZE,
            discount_factor=0.99,
        )
        logging.printf('Using qfunc implementation: %s',
                       string.GetClassName(self.qfunc))
        self.policy = policy_impl.GreedyPolicyWithDecreasingRandomness(
            initial_epsilon=1.0,
            final_epsilon=0.1,
            decay_by_half_after_num_of_episodes=500)
        logging.printf('Using policy implementation: %s',
                       string.GetClassName(self.policy))

        self.runner = runner_impl.ExperienceReplayRunner(
            experience_capacity=100000,
            experience_sample_batch_size=DEFAULT_BATCH_SIZE)
        logging.printf('Using runner implementation: %s',
                       string.GetClassName(self.runner))

        self._progress_tracer = runner_extension_impl.ProgressTracer(
            report_every_num_of_episodes=report_every_num_of_episodes)
        self._model_saver = runner_extension_impl.ModelSaver(
            self._GetModelWeightsFilepath())
コード例 #7
0
def MainTest():
    images, labels = CreateImageData(num_blank_images=5,
                                     num_annotated_images=5)
    for idx in range(5):
        logging.printf('Label %d: %s', idx, labels[idx])
        PlotImage(images[idx])
コード例 #8
0
ファイル: shortcut.py プロジェクト: ChihChiu29/deep_learning
    def __init__(
        self,
        gym_env_name: t.Text,
        gym_env=None,
        report_every_num_of_episodes: int = 1,
        use_ddqn: bool = True,
        use_large_model: bool = True,
    ):
        """Ctor.

    Args:
      gym_env_name: name of the gym environment that will be created.
      gym_env: Gym environment. If set, use the provided Gym environment and
        gym_env_name is only used as a tag.
      report_every_num_of_episodes: do progress report every this number of
        episodes.
      use_ddqn: whether to use DDQN or DQN_TargetNetwork.
      use_large_model: whether to use the larger model. Without GPU it's very
        slow to use it.
    """
        self._gym_env_name = gym_env_name
        if gym_env:
            env = gym_env
        else:
            env = gym.make(gym_env_name)
        self.env = screen_learning.ScreenGymEnvironment(env)
        if use_large_model:
            model_pair = (screen_learning.CreateOriginalConvolutionModel(
                action_space_size=self.env.GetActionSpaceSize()),
                          screen_learning.CreateOriginalConvolutionModel(
                              action_space_size=self.env.GetActionSpaceSize()))
        else:
            model_pair = (screen_learning.CreateConvolutionModel(
                action_space_size=self.env.GetActionSpaceSize()),
                          screen_learning.CreateConvolutionModel(
                              action_space_size=self.env.GetActionSpaceSize()))
        if use_ddqn:
            self.qfunc = qfunc_impl.DDQN(
                model_pair=model_pair,
                training_batch_size=DEFAULT_BATCH_SIZE,
                discount_factor=0.99,
            )
        else:
            self.qfunc = qfunc_impl.DQN_TargetNetwork(
                model=model_pair[0],
                training_batch_size=DEFAULT_BATCH_SIZE,
                discount_factor=0.99)
        logging.printf('Using qfunc implementation: %s',
                       string.GetClassName(self.qfunc))
        self.policy = policy_impl.GreedyPolicyWithDecreasingRandomness(
            initial_epsilon=1.0,
            final_epsilon=0.1,
            decay_by_half_after_num_of_episodes=50)
        logging.printf('Using policy implementation: %s',
                       string.GetClassName(self.policy))

        self.runner = runner_impl.ExperienceReplayRunner(
            experience_capacity=100000,
            experience_sample_batch_size=DEFAULT_BATCH_SIZE)
        logging.printf('Using runner implementation: %s',
                       string.GetClassName(self.runner))

        self._progress_tracer = runner_extension_impl.ProgressTracer(
            report_every_num_of_episodes=report_every_num_of_episodes)
        self._model_saver = runner_extension_impl.ModelSaver(
            self._GetModelWeightsFilepath(),
            use_averaged_value_over_num_of_episodes=report_every_num_of_episodes
        )