예제 #1
0
    def _init(self):
        self.global_step = 0
        self.local_evaluator = PPOEvaluator(self.env_creator, self.config,
                                            self.logdir, False)
        RemotePPOEvaluator = ray.remote(
            num_cpus=self.config["num_cpus_per_worker"],
            num_gpus=self.config["num_gpus_per_worker"])(PPOEvaluator)
        self.remote_evaluators = [
            RemotePPOEvaluator.remote(self.env_creator, self.config,
                                      self.logdir, True)
            for _ in range(self.config["num_workers"])
        ]

        self.optimizer = LocalMultiGPUOptimizer(
            {
                "sgd_batch_size": self.config["sgd_batchsize"],
                "sgd_stepsize": self.config["sgd_stepsize"],
                "num_sgd_iter": self.config["num_sgd_iter"],
                "timesteps_per_batch": self.config["timesteps_per_batch"]
            },
            self.local_evaluator,
            self.remote_evaluators,
        )

        self.saver = tf.train.Saver(max_to_keep=None)
예제 #2
0
 def _init(self):
     self.global_step = 0
     self.kl_coeff = self.config["kl_coeff"]
     self.local_evaluator = PPOEvaluator(self.registry, self.env_creator,
                                         self.config, self.logdir, False)
     self.remote_evaluators = [
         RemotePPOEvaluator.remote(self.registry, self.env_creator,
                                   self.config, self.logdir, True)
         for _ in range(self.config["num_workers"])
     ]
     self.start_time = time.time()
     if self.config["write_logs"]:
         self.file_writer = tf.summary.FileWriter(
             self.logdir, self.local_evaluator.sess.graph)
     else:
         self.file_writer = None
     self.saver = tf.train.Saver(max_to_keep=None)