def start_a3c(cluster, role, task_index): server = tf.train.Server(cluster, job_name=role, task_index=task_index) if role == 'ps': logging.warning('Parameter server started.') server.join() else: with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:{}".format(task_index), cluster=cluster)): # Make env. env = gym.make('CartPole-v0') env.seed(1) env = env.unwrapped # Init session. session = tf.Session(server.target) # Init agent. agent = PPO.Agent( env.action_space.n, env.observation_space.shape[0], **{ KEY_SESSION: session, KEY_MODEL_NAME: 'PPO', KEY_TRAIN_EPISODE: 1000 }) start_game(env, agent)
def main(_): # Make env. env = gym.make('CartPole-v0') env.seed(1) env = env.unwrapped # Init agent. agent = Agent(env.action_space.n, env.observation_space.shape[0], **{ KEY_MODEL_NAME: 'PolicyGradient', KEY_TRAIN_EPISODE: 10000 }) start_game(env, agent)