def testRemoteTrainingLoss(self): ray.init(num_workers=2) net = ray.actor(TrainActor)() loss, variables, _, sess, grads, train, placeholders = TrainActor( ).values before_acc = sess.run(loss, feed_dict=dict( zip(placeholders, [[2] * 100, [4] * 100]))) for _ in range(3): gradients_list = ray.get( [net.training_step(variables.get_weights()) for _ in range(2)]) mean_grads = [ sum([gradients[i] for gradients in gradients_list]) / len(gradients_list) for i in range(len(gradients_list[0])) ] feed_dict = { grad[0]: mean_grad for (grad, mean_grad) in zip(grads, mean_grads) } sess.run(train, feed_dict=feed_dict) after_acc = sess.run(loss, feed_dict=dict( zip(placeholders, [[2] * 100, [4] * 100]))) self.assertTrue(before_acc < after_acc) ray.worker.cleanup()
def testRemoteTrainingStep(self): ray.init(num_workers=1) net = ray.actor(TrainActor)() ray.get(net.training_step(net.get_weights())) ray.worker.cleanup()
def testNetworkDriverWorkerIndependent(self): ray.init(num_workers=1) # Create a network on the driver locally. sess1 = tf.Session() loss1, init1, _, _ = make_linear_network() net_vars1 = ray.experimental.TensorFlowVariables(loss1, sess1) sess1.run(init1) net2 = ray.actor(NetActor)() weights2 = ray.get(net2.get_weights()) new_weights2 = ray.get(net2.set_and_get_weights(net2.get_weights())) self.assertEqual(weights2, new_weights2) ray.worker.cleanup()
class Agent(object): def __init__(self, name, batchsize, preprocessor, config, use_gpu): if not use_gpu: os.environ["CUDA_VISIBLE_DEVICES"] = "" self.env = BatchedEnv(name, batchsize, preprocessor=preprocessor) if preprocessor.shape is None: preprocessor.shape = self.env.observation_space.shape self.sess = tf.Session() self.ppo = ProximalPolicyLoss(self.env.observation_space, self.env.action_space, preprocessor, config, self.sess) self.optimizer = tf.train.AdamOptimizer(config["sgd_stepsize"]) self.train_op = self.optimizer.minimize(self.ppo.loss) self.variables = ray.experimental.TensorFlowVariables(self.ppo.loss, self.sess) self.observation_filter = MeanStdFilter(preprocessor.shape, clip=None) self.reward_filter = MeanStdFilter((), clip=5.0) self.sess.run(tf.global_variables_initializer()) def get_weights(self): return self.variables.get_weights() def load_weights(self, weights): self.variables.set_weights(weights) def compute_trajectory(self, gamma, lam, horizon): trajectory = rollouts(self.ppo, self.env, horizon, self.observation_filter, self.reward_filter) add_advantage_values(trajectory, gamma, lam, self.reward_filter) return trajectory RemoteAgent = ray.actor(Agent)