コード例 #1
0
ファイル: runner.py プロジェクト: twildhage/rl-demo
    def __init__(self, algo, game, kwargs_agent):

        self._algo = algo

        self._game = game

        self._kwargs_agent = kwargs_agent

        self.log = Logger.get_active_logger()
コード例 #2
0
 def __init__(self, mode, algo, game, render, num_threads, kwargs_agent):
     
     self._mode = mode 
     
     self._algo = algo
     
     self._game = game
     
     self._render = render 
     
     self.num_threads = num_threads
     
     self._kwargs_agent = kwargs_agent
     
     self.log = Logger.get_active_logger()
コード例 #3
0
    def __init__(self,
                 game,
                 tensorboard,
                 checkpoint_dir,
                 share_weights,
                 presenter=False,
                 name='A3CAgent'):

        super(BaseAgent, self).__init__(name=name)

        self._game = game

        self.env = gym.make(game)

        self.set_env_info(self.env)

        self._tensorboard = tensorboard

        self._input_shape = self._observation_shape

        self.share_weights = share_weights

        self._presenter = presenter

        self.log = Logger.get_active_logger()

        if self._frame_based_observations:
            self._BaseNet = ConvBaseNet

        else:
            self._BaseNet = DenseBaseNet

        if share_weights:
            self._base_net = self._BaseNet(self._input_shape)

            self._policy = Policy(self._base_net, self._action_dim,
                                  self._actionspace_is_discrete)

            self._valuef = ValueFunction(self._base_net)

        else:
            self._base_net_policy = self._BaseNet(self._input_shape)

            self._policy = Policy(self._base_net_policy, self._action_dim,
                                  self._actionspace_is_discrete)

            self._base_net_valuef = self._BaseNet(self._input_shape)

            self._valuef = ValueFunction(self._base_net_valuef)

        self.discount_factor = 0.95

        self.discount_factors = []

        self.ActionDist = tf.contrib.distributions.Normal

        self.history = {'loss': []}

        if self._presenter:

            # Tensorboard
            self._tb_summary = tf.contrib.summary

            self._tb_path = os.path.join(Config.TENSORBOARD_PATH, self._game,
                                         RL_ALGORITHM)

            self._tb_record_intervall = Config.TENSORBOARD_RECORD_INTERVALL

            self._tb_writer = tf.contrib.summary.create_file_writer(
                self._tb_path)

            self._tb_writer.set_as_default()

        # Checkpoints
        # TODO fix conditional checkpoint for shared net
        path_to_this_file = os.path.abspath(os.path.dirname(__file__))

        self._checkpoint_dir = os.path.join(path_to_this_file, checkpoint_dir,
                                            self._game, RL_ALGORITHM)

        self._checkpoint = []

        self._agent_initialized = False