def __init__(self, env_name, config, upload_dir=None): config.update({"alg": "PolicyGradient"}) Algorithm.__init__(self, env_name, config, upload_dir=upload_dir) # TODO(ekl): preprocessor should be associated with the env elsewhere if self.env_name == "Pong-v0": preprocessor = AtariPixelPreprocessor() elif self.env_name == "Pong-ram-v3": preprocessor = AtariRamPreprocessor() elif self.env_name == "CartPole-v0" or self.env_name == "CartPole-v1": preprocessor = NoPreprocessor() elif self.env_name == "Hopper-v1": preprocessor = NoPreprocessor() elif self.env_name == "Walker2d-v1": preprocessor = NoPreprocessor() elif self.env_name == "Humanoid-v1": preprocessor = NoPreprocessor() else: preprocessor = AtariPixelPreprocessor() self.preprocessor = preprocessor self.global_step = 0 self.j = 0 self.kl_coeff = config["kl_coeff"] self.model = Agent(self.env_name, 1, self.preprocessor, self.config, self.logdir, False) self.agents = [ RemoteAgent.remote(self.env_name, 1, self.preprocessor, self.config, self.logdir, True) for _ in range(config["num_agents"]) ] self.start_time = time.time()
def __init__(self, env_name, config): Algorithm.__init__(self, env_name, config) # TODO(ekl) the preprocessor should be associated with the env elsewhere if self.env_name == "Pong-v0": preprocessor = AtariPixelPreprocessor() elif self.env_name == "Pong-ram-v3": preprocessor = AtariRamPreprocessor() elif self.env_name == "CartPole-v0": preprocessor = NoPreprocessor() elif self.env_name == "Walker2d-v1": preprocessor = NoPreprocessor() else: preprocessor = AtariPixelPreprocessor() self.preprocessor = preprocessor self.global_step = 0 self.j = 0 self.kl_coeff = config["kl_coeff"] self.model = Agent( self.env_name, 1, self.preprocessor, self.config, False) self.agents = [ RemoteAgent.remote( self.env_name, 1, self.preprocessor, self.config, True) for _ in range(config["num_agents"])]
def __init__(self, env_name, config, upload_dir=None): config.update({"alg": "PolicyGradient"}) Algorithm.__init__(self, env_name, config, upload_dir=upload_dir) self.global_step = 0 self.j = 0 self.kl_coeff = config["kl_coeff"] self.model = Agent(self.env_name, 1, self.config, self.logdir, False) self.agents = [ RemoteAgent.remote( self.env_name, 1, self.config, self.logdir, True) for _ in range(config["num_agents"])] self.start_time = time.time()
def __init__(self, env_name, config, upload_dir=None): config.update({"alg": "PolicyGradient"}) Algorithm.__init__(self, env_name, config, upload_dir=upload_dir) self.global_step = 0 self.j = 0 self.kl_coeff = config["kl_coeff"] self.model = Agent(self.env_name, 1, self.config, self.logdir, False) self.agents = [ RemoteAgent.remote( self.env_name, 1, self.config, self.logdir, True) for _ in range(config["num_agents"])] self.start_time = time.time() # TF does not support to write logs to S3 at the moment write_tf_logs = config["write_logs"] and self.logdir.startswith("file") if write_tf_logs: self.file_writer = tf.summary.FileWriter( self.logdir, self.model.sess.graph) else: self.file_writer = None self.saver = tf.train.Saver(max_to_keep=None)
# is detected "use_tf_debugger": False, # If True, we write checkpoints and tensorflow logging "write_logs": True, # Name of the model checkpoint file "model_checkpoint_file": "iteration-%s.ckpt" } config = DEFAULT_CONFIG config["model"]["fcnet_hiddens"] = [32, 32] #dirname = "/tmp/ray/HIHIHI_PolicyGradient_2017-08-19_23-11-54x6m9b1il" dirname = "/tmp/ray/HIHIHI_PolicyGradient_2017-08-20_02-53-02vwr5ax_v" #dirname = "tmp_checkpoints/" model = Agent("HIHIHI", 1, NoPreprocessor(), config, dirname, False) saver = tf.train.Saver(max_to_keep=None) saver.restore(model.sess, tf.train.latest_checkpoint(dirname)) ################## from pongjsenv import PongJSEnv import matplotlib as mpl import time # mpl.use("MacOSX") import matplotlib.pyplot as plt game = PongJSEnv() terminated = False