def __init__(self, algo, game, kwargs_agent): self._algo = algo self._game = game self._kwargs_agent = kwargs_agent self.log = Logger.get_active_logger()
def __init__(self, mode, algo, game, render, num_threads, kwargs_agent): self._mode = mode self._algo = algo self._game = game self._render = render self.num_threads = num_threads self._kwargs_agent = kwargs_agent self.log = Logger.get_active_logger()
def __init__(self, game, tensorboard, checkpoint_dir, share_weights, presenter=False, name='A3CAgent'): super(BaseAgent, self).__init__(name=name) self._game = game self.env = gym.make(game) self.set_env_info(self.env) self._tensorboard = tensorboard self._input_shape = self._observation_shape self.share_weights = share_weights self._presenter = presenter self.log = Logger.get_active_logger() if self._frame_based_observations: self._BaseNet = ConvBaseNet else: self._BaseNet = DenseBaseNet if share_weights: self._base_net = self._BaseNet(self._input_shape) self._policy = Policy(self._base_net, self._action_dim, self._actionspace_is_discrete) self._valuef = ValueFunction(self._base_net) else: self._base_net_policy = self._BaseNet(self._input_shape) self._policy = Policy(self._base_net_policy, self._action_dim, self._actionspace_is_discrete) self._base_net_valuef = self._BaseNet(self._input_shape) self._valuef = ValueFunction(self._base_net_valuef) self.discount_factor = 0.95 self.discount_factors = [] self.ActionDist = tf.contrib.distributions.Normal self.history = {'loss': []} if self._presenter: # Tensorboard self._tb_summary = tf.contrib.summary self._tb_path = os.path.join(Config.TENSORBOARD_PATH, self._game, RL_ALGORITHM) self._tb_record_intervall = Config.TENSORBOARD_RECORD_INTERVALL self._tb_writer = tf.contrib.summary.create_file_writer( self._tb_path) self._tb_writer.set_as_default() # Checkpoints # TODO fix conditional checkpoint for shared net path_to_this_file = os.path.abspath(os.path.dirname(__file__)) self._checkpoint_dir = os.path.join(path_to_this_file, checkpoint_dir, self._game, RL_ALGORITHM) self._checkpoint = [] self._agent_initialized = False