Esempio n. 1
0
    def __init__(self, *args, **kwargs):
        super(GymDiscreteProblemWithAgent, self).__init__(*args, **kwargs)
        self._env = None
        self.debug_dump_frames_path = "debug_frames_env"
        self.make_extra_debug_info = True

        # defaults
        self.environment_spec = lambda: gym.make(self.env_name)
        self.real_env = self.environment_spec()
        self.in_graph_wrappers = []
        self.collect_hparams = rl.ppo_atari_base()
        self.settable_num_steps = 20000
        self.simulated_environment = None
        self.warm_up = 10  # TODO(piotrm): This should be probably removed.
Esempio n. 2
0
  def __init__(self, *args, **kwargs):
    super(GymDiscreteProblemWithAgent, self).__init__(*args, **kwargs)
    self._env = None
    self.debug_dump_frames_path = "debug_frames_env"
    self.make_extra_debug_info = True
    self.autoencoder_model = None

    # Defaults.
    self.environment_spec = lambda: gym.make(self.env_name)
    self.real_env = self.environment_spec()
    self.in_graph_wrappers = []
    self.collect_hparams = rl.ppo_atari_base()
    self.settable_num_steps = 20000
    self.simulated_environment = None
    self.warm_up = 10  # TODO(piotrm): This should be probably removed.

    # Debug info.
    self.dones = 0
    self.real_reward = 0
    self.real_env.reset()
    self.total_sim_reward, self.total_real_reward = 0.0, 0.0
    self.sum_of_rewards = 0.0
    self.successful_episode_reward_predictions = 0