def __init__(self, registry, env_creator, config, logdir): env = env_creator(config["env_config"]) env = wrap_dqn(registry, env, config["model"]) self.env = env self.config = config tf_config = tf.ConfigProto(**config["tf_session_args"]) self.sess = tf.Session(config=tf_config) self.dqn_graph = models.DQNGraph(registry, env, config, logdir) # Create the schedule for exploration starting from 1. self.exploration = LinearSchedule( schedule_timesteps=int(config["exploration_fraction"] * config["schedule_max_timesteps"]), initial_p=1.0, final_p=config["exploration_final_eps"]) # Initialize the parameters and copy them to the target network. self.sess.run(tf.global_variables_initializer()) self.dqn_graph.update_target(self.sess) self.global_timestep = 0 self.local_timestep = 0 # Note that this encompasses both the Q and target network self.variables = ray.experimental.TensorFlowVariables( tf.group(self.dqn_graph.q_t, self.dqn_graph.q_tp1), self.sess) self.episode_rewards = [0.0] self.episode_lengths = [0.0] self.saved_mean_reward = None self.obs = self.env.reset()
def _init(self): config = self.config env = gym.make(self.env_name) # TODO(ekl): replace this with RLlib preprocessors if "NoFrameskip" in self.env_name: env = ScaledFloatFrame(wrap_dqn(env)) self.env = env num_cpu = config["num_cpu"] tf_config = tf.ConfigProto( inter_op_parallelism_threads=num_cpu, intra_op_parallelism_threads=num_cpu) self.sess = tf.Session(config=tf_config) self.dqn_graph = models.DQNGraph(env, config) # Create the replay buffer if config["prioritized_replay"]: self.replay_buffer = PrioritizedReplayBuffer( config["buffer_size"], alpha=config["prioritized_replay_alpha"]) prioritized_replay_beta_iters = ( config["prioritized_replay_beta_iters"]) if prioritized_replay_beta_iters is None: prioritized_replay_beta_iters = ( config["schedule_max_timesteps"]) self.beta_schedule = LinearSchedule( prioritized_replay_beta_iters, initial_p=config["prioritized_replay_beta0"], final_p=1.0) else: self.replay_buffer = ReplayBuffer(config["buffer_size"]) self.beta_schedule = None # Create the schedule for exploration starting from 1. self.exploration = LinearSchedule( schedule_timesteps=int( config["exploration_fraction"] * config["schedule_max_timesteps"]), initial_p=1.0, final_p=config["exploration_final_eps"]) # Initialize the parameters and copy them to the target network. self.sess.run(tf.global_variables_initializer()) self.dqn_graph.update_target(self.sess) self.episode_rewards = [0.0] self.episode_lengths = [0.0] self.saved_mean_reward = None self.obs = self.env.reset() self.num_timesteps = 0 self.num_iterations = 0 self.file_writer = tf.summary.FileWriter(self.logdir, self.sess.graph) self.saver = tf.train.Saver(max_to_keep=None)
def __init__(self, env_creator, config, logdir): env = env_creator() env = wrap_dqn(env, config["model"]) self.env = env self.config = config tf_config = tf.ConfigProto(**config["tf_session_args"]) self.sess = tf.Session(config=tf_config) self.dqn_graph = models.DQNGraph(env, config, logdir) # Create the replay buffer if config["prioritized_replay"]: self.replay_buffer = PrioritizedReplayBuffer( config["buffer_size"], alpha=config["prioritized_replay_alpha"]) prioritized_replay_beta_iters = \ config["prioritized_replay_beta_iters"] if prioritized_replay_beta_iters is None: prioritized_replay_beta_iters = \ config["schedule_max_timesteps"] self.beta_schedule = LinearSchedule( prioritized_replay_beta_iters, initial_p=config["prioritized_replay_beta0"], final_p=1.0) else: self.replay_buffer = ReplayBuffer(config["buffer_size"]) self.beta_schedule = None # Create the schedule for exploration starting from 1. self.exploration = LinearSchedule( schedule_timesteps=int(config["exploration_fraction"] * config["schedule_max_timesteps"]), initial_p=1.0, final_p=config["exploration_final_eps"]) # Initialize the parameters and copy them to the target network. self.sess.run(tf.global_variables_initializer()) self.dqn_graph.update_target(self.sess) self.set_weights_time = RunningStat(()) self.sample_time = RunningStat(()) self.grad_time = RunningStat(()) # Note that workers don't need target vars to be synced self.variables = ray.experimental.TensorFlowVariables( tf.group(self.dqn_graph.q_t, self.dqn_graph.q_tp1), self.sess) self.episode_rewards = [0.0] self.episode_lengths = [0.0] self.saved_mean_reward = None self.obs = self.env.reset() self.file_writer = tf.summary.FileWriter(logdir, self.sess.graph)
def __init__(self, env_creator, config, logdir): env = env_creator() # TODO(ekl): replace this with RLlib preprocessors if "NoFrameskip" in env.spec.id: env = ScaledFloatFrame(wrap_dqn(env)) self.env = env self.config = config num_cpu = config["num_cpu"] tf_config = tf.ConfigProto(inter_op_parallelism_threads=num_cpu, intra_op_parallelism_threads=num_cpu) self.sess = tf.Session(config=tf_config) self.dqn_graph = models.DQNGraph(env, config) # Create the replay buffer if config["prioritized_replay"]: self.replay_buffer = PrioritizedReplayBuffer( config["buffer_size"], alpha=config["prioritized_replay_alpha"]) prioritized_replay_beta_iters = \ config["prioritized_replay_beta_iters"] if prioritized_replay_beta_iters is None: prioritized_replay_beta_iters = \ config["schedule_max_timesteps"] self.beta_schedule = LinearSchedule( prioritized_replay_beta_iters, initial_p=config["prioritized_replay_beta0"], final_p=1.0) else: self.replay_buffer = ReplayBuffer(config["buffer_size"]) self.beta_schedule = None # Create the schedule for exploration starting from 1. self.exploration = LinearSchedule( schedule_timesteps=int(config["exploration_fraction"] * config["schedule_max_timesteps"]), initial_p=1.0, final_p=config["exploration_final_eps"]) # Initialize the parameters and copy them to the target network. self.sess.run(tf.global_variables_initializer()) self.dqn_graph.update_target(self.sess) self.variables = ray.experimental.TensorFlowVariables( tf.group(self.dqn_graph.q_tp1, self.dqn_graph.q_t), self.sess) self.episode_rewards = [0.0] self.episode_lengths = [0.0] self.saved_mean_reward = None self.obs = self.env.reset() self.file_writer = tf.summary.FileWriter(logdir, self.sess.graph)
def __init__(self, registry, env_creator, config, logdir, worker_index): env = env_creator(config["env_config"]) env = wrap_dqn(registry, env, config["model"], config["random_starts"]) self.env = env self.config = config if not isinstance(env.action_space, Discrete): raise UnsupportedSpaceException( "Action space {} is not supported for DQN.".format( env.action_space)) tf_config = tf.ConfigProto(**config["tf_session_args"]) self.sess = tf.Session(config=tf_config) self.dqn_graph = models.DQNGraph(registry, env, config, logdir) # Use either a different `eps` per worker, or a linear schedule. if config["per_worker_exploration"]: assert config["num_workers"] > 1, "This requires multiple workers" self.exploration = ConstantSchedule( 0.4 ** ( 1 + worker_index / float(config["num_workers"] - 1) * 7)) else: self.exploration = LinearSchedule( schedule_timesteps=int( config["exploration_fraction"] * config["schedule_max_timesteps"]), initial_p=1.0, final_p=config["exploration_final_eps"]) # Initialize the parameters and copy them to the target network. self.sess.run(tf.global_variables_initializer()) self.dqn_graph.update_target(self.sess) self.global_timestep = 0 self.local_timestep = 0 # Note that this encompasses both the Q and target network self.variables = ray.experimental.TensorFlowVariables( tf.group(self.dqn_graph.q_t, self.dqn_graph.q_tp1), self.sess) self.episode_rewards = [0.0] self.episode_lengths = [0.0] self.saved_mean_reward = None self.obs = self.env.reset()