예제 #1
0
    def testRemoteTrainingLoss(self):
        ray.init(num_workers=2)

        net = ray.actor(TrainActor)()
        loss, variables, _, sess, grads, train, placeholders = TrainActor(
        ).values

        before_acc = sess.run(loss,
                              feed_dict=dict(
                                  zip(placeholders, [[2] * 100, [4] * 100])))

        for _ in range(3):
            gradients_list = ray.get(
                [net.training_step(variables.get_weights()) for _ in range(2)])
            mean_grads = [
                sum([gradients[i]
                     for gradients in gradients_list]) / len(gradients_list)
                for i in range(len(gradients_list[0]))
            ]
            feed_dict = {
                grad[0]: mean_grad
                for (grad, mean_grad) in zip(grads, mean_grads)
            }
            sess.run(train, feed_dict=feed_dict)
        after_acc = sess.run(loss,
                             feed_dict=dict(
                                 zip(placeholders, [[2] * 100, [4] * 100])))
        self.assertTrue(before_acc < after_acc)
        ray.worker.cleanup()
예제 #2
0
    def testRemoteTrainingStep(self):
        ray.init(num_workers=1)

        net = ray.actor(TrainActor)()
        ray.get(net.training_step(net.get_weights()))

        ray.worker.cleanup()
예제 #3
0
    def testNetworkDriverWorkerIndependent(self):
        ray.init(num_workers=1)

        # Create a network on the driver locally.
        sess1 = tf.Session()
        loss1, init1, _, _ = make_linear_network()
        net_vars1 = ray.experimental.TensorFlowVariables(loss1, sess1)
        sess1.run(init1)

        net2 = ray.actor(NetActor)()
        weights2 = ray.get(net2.get_weights())

        new_weights2 = ray.get(net2.set_and_get_weights(net2.get_weights()))
        self.assertEqual(weights2, new_weights2)

        ray.worker.cleanup()
예제 #4
0
파일: agent.py 프로젝트: zdoop/ray
class Agent(object):

  def __init__(self, name, batchsize, preprocessor, config, use_gpu):
    if not use_gpu:
      os.environ["CUDA_VISIBLE_DEVICES"] = ""
    self.env = BatchedEnv(name, batchsize, preprocessor=preprocessor)
    if preprocessor.shape is None:
      preprocessor.shape = self.env.observation_space.shape
    self.sess = tf.Session()
    self.ppo = ProximalPolicyLoss(self.env.observation_space, self.env.action_space, preprocessor, config, self.sess)
    self.optimizer = tf.train.AdamOptimizer(config["sgd_stepsize"])
    self.train_op = self.optimizer.minimize(self.ppo.loss)
    self.variables = ray.experimental.TensorFlowVariables(self.ppo.loss, self.sess)
    self.observation_filter = MeanStdFilter(preprocessor.shape, clip=None)
    self.reward_filter = MeanStdFilter((), clip=5.0)
    self.sess.run(tf.global_variables_initializer())

  def get_weights(self):
    return self.variables.get_weights()

  def load_weights(self, weights):
    self.variables.set_weights(weights)

  def compute_trajectory(self, gamma, lam, horizon):
    trajectory = rollouts(self.ppo, self.env, horizon, self.observation_filter, self.reward_filter)
    add_advantage_values(trajectory, gamma, lam, self.reward_filter)
    return trajectory

RemoteAgent = ray.actor(Agent)