Beispiel #1
0
    def __init__(self, env_name, config, upload_dir=None):
        config.update({"alg": "PolicyGradient"})

        Algorithm.__init__(self, env_name, config, upload_dir=upload_dir)

        # TODO(ekl): preprocessor should be associated with the env elsewhere
        if self.env_name == "Pong-v0":
            preprocessor = AtariPixelPreprocessor()
        elif self.env_name == "Pong-ram-v3":
            preprocessor = AtariRamPreprocessor()
        elif self.env_name == "CartPole-v0" or self.env_name == "CartPole-v1":
            preprocessor = NoPreprocessor()
        elif self.env_name == "Hopper-v1":
            preprocessor = NoPreprocessor()
        elif self.env_name == "Walker2d-v1":
            preprocessor = NoPreprocessor()
        elif self.env_name == "Humanoid-v1":
            preprocessor = NoPreprocessor()
        else:
            preprocessor = AtariPixelPreprocessor()

        self.preprocessor = preprocessor
        self.global_step = 0
        self.j = 0
        self.kl_coeff = config["kl_coeff"]
        self.model = Agent(self.env_name, 1, self.preprocessor, self.config,
                           self.logdir, False)
        self.agents = [
            RemoteAgent.remote(self.env_name, 1, self.preprocessor,
                               self.config, self.logdir, True)
            for _ in range(config["num_agents"])
        ]
        self.start_time = time.time()
Beispiel #2
0
  def __init__(self, env_name, config):
    Algorithm.__init__(self, env_name, config)

    # TODO(ekl) the preprocessor should be associated with the env elsewhere
    if self.env_name == "Pong-v0":
      preprocessor = AtariPixelPreprocessor()
    elif self.env_name == "Pong-ram-v3":
      preprocessor = AtariRamPreprocessor()
    elif self.env_name == "CartPole-v0":
      preprocessor = NoPreprocessor()
    elif self.env_name == "Walker2d-v1":
      preprocessor = NoPreprocessor()
    else:
      preprocessor = AtariPixelPreprocessor()

    self.preprocessor = preprocessor
    self.global_step = 0
    self.j = 0
    self.kl_coeff = config["kl_coeff"]
    self.model = Agent(
        self.env_name, 1, self.preprocessor, self.config, False)
    self.agents = [
        RemoteAgent.remote(
            self.env_name, 1, self.preprocessor, self.config, True)
        for _ in range(config["num_agents"])]
Beispiel #3
0
    def __init__(self, env_name, config, upload_dir=None):
        config.update({"alg": "PolicyGradient"})

        Algorithm.__init__(self, env_name, config, upload_dir=upload_dir)

        self.global_step = 0
        self.j = 0
        self.kl_coeff = config["kl_coeff"]
        self.model = Agent(self.env_name, 1, self.config, self.logdir, False)
        self.agents = [
            RemoteAgent.remote(
                self.env_name, 1, self.config, self.logdir, True)
            for _ in range(config["num_agents"])]
        self.start_time = time.time()
Beispiel #4
0
    def __init__(self, env_name, config, upload_dir=None):
        config.update({"alg": "PolicyGradient"})

        Algorithm.__init__(self, env_name, config, upload_dir=upload_dir)

        self.global_step = 0
        self.j = 0
        self.kl_coeff = config["kl_coeff"]
        self.model = Agent(self.env_name, 1, self.config, self.logdir, False)
        self.agents = [
            RemoteAgent.remote(
                self.env_name, 1, self.config, self.logdir, True)
            for _ in range(config["num_agents"])]
        self.start_time = time.time()
        # TF does not support to write logs to S3 at the moment
        write_tf_logs = config["write_logs"] and self.logdir.startswith("file")
        if write_tf_logs:
            self.file_writer = tf.summary.FileWriter(
                self.logdir, self.model.sess.graph)
        else:
            self.file_writer = None
        self.saver = tf.train.Saver(max_to_keep=None)
Beispiel #5
0
    # is detected
    "use_tf_debugger": False,
    # If True, we write checkpoints and tensorflow logging
    "write_logs": True,
    # Name of the model checkpoint file
    "model_checkpoint_file": "iteration-%s.ckpt"
}

config = DEFAULT_CONFIG
config["model"]["fcnet_hiddens"] = [32, 32]

#dirname = "/tmp/ray/HIHIHI_PolicyGradient_2017-08-19_23-11-54x6m9b1il"
dirname = "/tmp/ray/HIHIHI_PolicyGradient_2017-08-20_02-53-02vwr5ax_v"
#dirname = "tmp_checkpoints/"

model = Agent("HIHIHI", 1, NoPreprocessor(), config, dirname, False)

saver = tf.train.Saver(max_to_keep=None)
saver.restore(model.sess, tf.train.latest_checkpoint(dirname))

##################

from pongjsenv import PongJSEnv
import matplotlib as mpl
import time
# mpl.use("MacOSX")

import matplotlib.pyplot as plt

game = PongJSEnv()
terminated = False