예제 #1
0
    def __init__(self, env_creator, config, logdir):
        DQNEvaluator.__init__(self, env_creator, config, logdir)

        # Create extra workers if needed
        if self.config["num_workers"] > 1:
            remote_cls = ray.remote(num_cpus=1)(DQNEvaluator)
            self.workers = [
                remote_cls.remote(env_creator, config, logdir)
                for _ in range(self.config["num_workers"])
            ]
        else:
            self.workers = []

        # Create the replay buffer
        if config["prioritized_replay"]:
            self.replay_buffer = PrioritizedReplayBuffer(
                config["buffer_size"],
                alpha=config["prioritized_replay_alpha"])
            prioritized_replay_beta_iters = \
                config["prioritized_replay_beta_iters"]
            if prioritized_replay_beta_iters is None:
                prioritized_replay_beta_iters = \
                    config["schedule_max_timesteps"]
            self.beta_schedule = LinearSchedule(
                prioritized_replay_beta_iters,
                initial_p=config["prioritized_replay_beta0"],
                final_p=1.0)
        else:
            self.replay_buffer = ReplayBuffer(config["buffer_size"])
            self.beta_schedule = None

        self.samples_to_prioritize = None
예제 #2
0
파일: dqn.py 프로젝트: xgong/ray
    def __init__(self, env_name, config, upload_dir=None):
        config.update({"alg": "DQN"})
        Algorithm.__init__(self, env_name, config, upload_dir=upload_dir)
        env = gym.make(env_name)
        env = ScaledFloatFrame(wrap_dqn(env))
        self.env = env
        model = models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
                                  hiddens=[256],
                                  dueling=True)
        sess = U.make_session(num_cpu=config["num_cpu"])
        sess.__enter__()

        def make_obs_ph(name):
            return U.BatchInput(env.observation_space.shape, name=name)

        self.act, self.optimize, self.update_target, self.debug = build_train(
            make_obs_ph=make_obs_ph,
            q_func=model,
            num_actions=env.action_space.n,
            optimizer=tf.train.AdamOptimizer(learning_rate=config["lr"]),
            gamma=config["gamma"],
            grad_norm_clipping=10)
        # Create the replay buffer
        if config["prioritized_replay"]:
            self.replay_buffer = PrioritizedReplayBuffer(
                config["buffer_size"],
                alpha=config["prioritized_replay_alpha"])
            prioritized_replay_beta_iters = (
                config["prioritized_replay_beta_iters"])
            if prioritized_replay_beta_iters is None:
                prioritized_replay_beta_iters = (
                    config["schedule_max_timesteps"])
            self.beta_schedule = LinearSchedule(
                prioritized_replay_beta_iters,
                initial_p=config["prioritized_replay_beta0"],
                final_p=1.0)
        else:
            self.replay_buffer = ReplayBuffer(config["buffer_size"])
            self.beta_schedule = None
        # Create the schedule for exploration starting from 1.
        self.exploration = LinearSchedule(
            schedule_timesteps=int(config["exploration_fraction"] *
                                   config["schedule_max_timesteps"]),
            initial_p=1.0,
            final_p=config["exploration_final_eps"])

        # Initialize the parameters and copy them to the target network.
        U.initialize()
        self.update_target()

        self.episode_rewards = [0.0]
        self.episode_lengths = [0.0]
        self.saved_mean_reward = None
        self.obs = self.env.reset()
        self.num_timesteps = 0
        self.num_iterations = 0
예제 #3
0
파일: dqn.py 프로젝트: meikei/ray
    def _init(self):
        config = self.config
        env = gym.make(self.env_name)
        # TODO(ekl): replace this with RLlib preprocessors
        if "NoFrameskip" in self.env_name:
            env = ScaledFloatFrame(wrap_dqn(env))
        self.env = env

        num_cpu = config["num_cpu"]
        tf_config = tf.ConfigProto(
            inter_op_parallelism_threads=num_cpu,
            intra_op_parallelism_threads=num_cpu)
        self.sess = tf.Session(config=tf_config)
        self.dqn_graph = models.DQNGraph(env, config)

        # Create the replay buffer
        if config["prioritized_replay"]:
            self.replay_buffer = PrioritizedReplayBuffer(
                config["buffer_size"],
                alpha=config["prioritized_replay_alpha"])
            prioritized_replay_beta_iters = (
                config["prioritized_replay_beta_iters"])
            if prioritized_replay_beta_iters is None:
                prioritized_replay_beta_iters = (
                    config["schedule_max_timesteps"])
            self.beta_schedule = LinearSchedule(
                prioritized_replay_beta_iters,
                initial_p=config["prioritized_replay_beta0"],
                final_p=1.0)
        else:
            self.replay_buffer = ReplayBuffer(config["buffer_size"])
            self.beta_schedule = None
        # Create the schedule for exploration starting from 1.
        self.exploration = LinearSchedule(
            schedule_timesteps=int(
                config["exploration_fraction"] *
                config["schedule_max_timesteps"]),
            initial_p=1.0,
            final_p=config["exploration_final_eps"])

        # Initialize the parameters and copy them to the target network.
        self.sess.run(tf.global_variables_initializer())
        self.dqn_graph.update_target(self.sess)

        self.episode_rewards = [0.0]
        self.episode_lengths = [0.0]
        self.saved_mean_reward = None
        self.obs = self.env.reset()
        self.num_timesteps = 0
        self.num_iterations = 0
        self.file_writer = tf.summary.FileWriter(self.logdir, self.sess.graph)
        self.saver = tf.train.Saver(max_to_keep=None)
예제 #4
0
파일: dqn.py 프로젝트: zcli/ray
    def __init__(self, env_creator, config, logdir):
        env = env_creator()
        env = wrap_dqn(env, config["model"])
        self.env = env
        self.config = config

        tf_config = tf.ConfigProto(**config["tf_session_args"])
        self.sess = tf.Session(config=tf_config)
        self.dqn_graph = models.DQNGraph(env, config, logdir)

        # Create the replay buffer
        if config["prioritized_replay"]:
            self.replay_buffer = PrioritizedReplayBuffer(
                config["buffer_size"],
                alpha=config["prioritized_replay_alpha"])
            prioritized_replay_beta_iters = \
                config["prioritized_replay_beta_iters"]
            if prioritized_replay_beta_iters is None:
                prioritized_replay_beta_iters = \
                    config["schedule_max_timesteps"]
            self.beta_schedule = LinearSchedule(
                prioritized_replay_beta_iters,
                initial_p=config["prioritized_replay_beta0"],
                final_p=1.0)
        else:
            self.replay_buffer = ReplayBuffer(config["buffer_size"])
            self.beta_schedule = None
        # Create the schedule for exploration starting from 1.
        self.exploration = LinearSchedule(
            schedule_timesteps=int(config["exploration_fraction"] *
                                   config["schedule_max_timesteps"]),
            initial_p=1.0,
            final_p=config["exploration_final_eps"])

        # Initialize the parameters and copy them to the target network.
        self.sess.run(tf.global_variables_initializer())
        self.dqn_graph.update_target(self.sess)
        self.set_weights_time = RunningStat(())
        self.sample_time = RunningStat(())
        self.grad_time = RunningStat(())

        # Note that workers don't need target vars to be synced
        self.variables = ray.experimental.TensorFlowVariables(
            tf.group(self.dqn_graph.q_t, self.dqn_graph.q_tp1), self.sess)

        self.episode_rewards = [0.0]
        self.episode_lengths = [0.0]
        self.saved_mean_reward = None
        self.obs = self.env.reset()
        self.file_writer = tf.summary.FileWriter(logdir, self.sess.graph)
예제 #5
0
파일: dqn.py 프로젝트: cybermaster/ray
    def __init__(self, env_creator, config, logdir):
        env = env_creator()
        # TODO(ekl): replace this with RLlib preprocessors
        if "NoFrameskip" in env.spec.id:
            env = ScaledFloatFrame(wrap_dqn(env))
        self.env = env
        self.config = config

        num_cpu = config["num_cpu"]
        tf_config = tf.ConfigProto(inter_op_parallelism_threads=num_cpu,
                                   intra_op_parallelism_threads=num_cpu)
        self.sess = tf.Session(config=tf_config)
        self.dqn_graph = models.DQNGraph(env, config)

        # Create the replay buffer
        if config["prioritized_replay"]:
            self.replay_buffer = PrioritizedReplayBuffer(
                config["buffer_size"],
                alpha=config["prioritized_replay_alpha"])
            prioritized_replay_beta_iters = \
                config["prioritized_replay_beta_iters"]
            if prioritized_replay_beta_iters is None:
                prioritized_replay_beta_iters = \
                    config["schedule_max_timesteps"]
            self.beta_schedule = LinearSchedule(
                prioritized_replay_beta_iters,
                initial_p=config["prioritized_replay_beta0"],
                final_p=1.0)
        else:
            self.replay_buffer = ReplayBuffer(config["buffer_size"])
            self.beta_schedule = None
        # Create the schedule for exploration starting from 1.
        self.exploration = LinearSchedule(
            schedule_timesteps=int(config["exploration_fraction"] *
                                   config["schedule_max_timesteps"]),
            initial_p=1.0,
            final_p=config["exploration_final_eps"])

        # Initialize the parameters and copy them to the target network.
        self.sess.run(tf.global_variables_initializer())
        self.dqn_graph.update_target(self.sess)
        self.variables = ray.experimental.TensorFlowVariables(
            tf.group(self.dqn_graph.q_tp1, self.dqn_graph.q_t), self.sess)

        self.episode_rewards = [0.0]
        self.episode_lengths = [0.0]
        self.saved_mean_reward = None
        self.obs = self.env.reset()
        self.file_writer = tf.summary.FileWriter(logdir, self.sess.graph)