def testPutGet(self): ray.init(start_ray_local=True, num_workers=0) for i in range(100): value_before = i * 10 ** 6 objectid = ray.put(value_before) value_after = ray.get(objectid) self.assertEqual(value_before, value_after) for i in range(100): value_before = i * 10 ** 6 * 1.0 objectid = ray.put(value_before) value_after = ray.get(objectid) self.assertEqual(value_before, value_after) for i in range(100): value_before = "h" * i objectid = ray.put(value_before) value_after = ray.get(objectid) self.assertEqual(value_before, value_after) for i in range(100): value_before = [1] * i objectid = ray.put(value_before) value_after = ray.get(objectid) self.assertEqual(value_before, value_after) ray.worker.cleanup()
def testObjStore(self): node_ip_address = "127.0.0.1" scheduler_address = ray.services.start_ray_local(num_objstores=2, num_workers=0, worker_path=None) ray.connect(node_ip_address, scheduler_address, mode=ray.SCRIPT_MODE) objstore_addresses = [objstore_info["address"] for objstore_info in ray.scheduler_info()["objstores"]] w1 = ray.worker.Worker() w2 = ray.worker.Worker() ray.reusables._cached_reusables = [] # This is a hack to make the test run. ray.connect(node_ip_address, scheduler_address, objstore_address=objstore_addresses[0], mode=ray.SCRIPT_MODE, worker=w1) ray.reusables._cached_reusables = [] # This is a hack to make the test run. ray.connect(node_ip_address, scheduler_address, objstore_address=objstore_addresses[1], mode=ray.SCRIPT_MODE, worker=w2) for cls in [Foo, Bar, Baz, Qux, SubQux, Exception, CustomError, Point, NamedTupleExample]: ray.register_class(cls) # putting and getting an object shouldn't change it for data in RAY_TEST_OBJECTS: objectid = ray.put(data, w1) result = ray.get(objectid, w1) assert_equal(result, data) # putting an object, shipping it to another worker, and getting it shouldn't change it for data in RAY_TEST_OBJECTS: objectid = ray.put(data, w1) result = ray.get(objectid, w2) assert_equal(result, data) # putting an object, shipping it to another worker, and getting it shouldn't change it for data in RAY_TEST_OBJECTS: objectid = ray.put(data, w2) result = ray.get(objectid, w1) assert_equal(result, data) # This test fails. See https://github.com/ray-project/ray/issues/159. # getting multiple times shouldn't matter # for data in [np.zeros([10, 20]), np.random.normal(size=[45, 25]), np.zeros([10, 20], dtype=np.dtype("float64")), np.zeros([10, 20], dtype=np.dtype("float32")), np.zeros([10, 20], dtype=np.dtype("int64")), np.zeros([10, 20], dtype=np.dtype("int32"))]: # objectid = worker.put(data, w1) # result = worker.get(objectid, w2) # result = worker.get(objectid, w2) # result = worker.get(objectid, w2) # assert_equal(result, data) # Getting a buffer after modifying it before it finishes should return updated buffer objectid = ray.libraylib.get_objectid(w1.handle) buf = ray.libraylib.allocate_buffer(w1.handle, objectid, 100) buf[0][0] = 1 ray.libraylib.finish_buffer(w1.handle, objectid, buf[1], 0) completedbuffer = ray.libraylib.get_buffer(w1.handle, objectid) self.assertEqual(completedbuffer[0][0], 1) # We started multiple drivers manually, so we will disconnect them manually. ray.disconnect(worker=w1) ray.disconnect(worker=w2) ray.worker.cleanup()
def test_getting_and_putting(ray_start_sharded): for n in range(8): x = np.zeros(10**n) for _ in range(100): ray.put(x) x_id = ray.put(x) for _ in range(1000): ray.get(x_id) assert ray.services.remaining_processes_alive()
def testGettingAndPutting(self): ray.init(num_workers=1) for n in range(8): x = np.zeros(10 ** n) for _ in range(100): ray.put(x) x_id = ray.put(x) for _ in range(1000): ray.get(x_id) self.assertTrue(ray.services.all_processes_alive()) ray.worker.cleanup()
def step(self): weights = ray.put(self.local_evaluator.get_weights()) gradient_queue = [] num_gradients = 0 # Kick off the first wave of async tasks for e in self.remote_evaluators: e.set_weights.remote(weights) fut = e.compute_gradients.remote(e.sample.remote()) gradient_queue.append((fut, e)) num_gradients += 1 # Note: can't use wait: https://github.com/ray-project/ray/issues/1128 while gradient_queue: with self.wait_timer: fut, e = gradient_queue.pop(0) gradient = ray.get(fut) if gradient is not None: with self.apply_timer: self.local_evaluator.apply_gradients(gradient) if num_gradients < self.grads_per_step: with self.dispatch_timer: e.set_weights.remote(self.local_evaluator.get_weights()) fut = e.compute_gradients.remote(e.sample.remote()) gradient_queue.append((fut, e)) num_gradients += 1 self.num_steps_sampled += self.grads_per_step * self.batch_size self.num_steps_trained += self.grads_per_step * self.batch_size
def modified_lu(q): """Perform a modified LU decomposition of a matrix. This takes a matrix q with orthonormal columns, returns l, u, s such that q - s = l * u. Args: q: A two dimensional orthonormal matrix q. Returns: A tuple of a lower triangular matrix l, an upper triangular matrix u, and a a vector representing a diagonal matrix s such that q - s = l * u. """ q = q.assemble() m, b = q.shape[0], q.shape[1] S = np.zeros(b) q_work = np.copy(q) for i in range(b): S[i] = -1 * np.sign(q_work[i, i]) q_work[i, i] -= S[i] # Scale ith column of L by diagonal element. q_work[(i + 1):m, i] /= q_work[i, i] # Perform Schur complement update. q_work[(i + 1):m, (i + 1):b] -= np.outer(q_work[(i + 1):m, i], q_work[i, (i + 1):b]) L = np.tril(q_work) for i in range(b): L[i, i] = 1 U = np.triu(q_work)[:b, :] # TODO(rkn): Get rid of the put below. return ray.get(core.numpy_to_dist.remote(ray.put(L))), U, S
def testRecursiveObjects(self): ray.init(start_ray_local=True, num_workers=0) class ClassA(object): pass ray.register_class(ClassA) # Make a list that contains itself. l = [] l.append(l) # Make an object that contains itself as a field. a1 = ClassA() a1.field = a1 # Make two objects that contain each other as fields. a2 = ClassA() a3 = ClassA() a2.field = a3 a3.field = a2 # Make a dictionary that contains itself. d1 = {} d1["key"] = d1 # Create a list of recursive objects. recursive_objects = [l, a1, a2, a3, d1] # Check that exceptions are thrown when we serialize the recursive objects. for obj in recursive_objects: self.assertRaises(Exception, lambda : ray.put(obj)) ray.worker.cleanup()
def step(self): with self.update_weights_timer: if self.remote_evaluators: weights = ray.put(self.local_evaluator.get_weights()) for e in self.remote_evaluators: e.set_weights.remote(weights) with self.sample_timer: samples = [] while sum(s.count for s in samples) < self.train_batch_size: if self.remote_evaluators: samples.extend( ray.get([ e.sample.remote() for e in self.remote_evaluators ])) else: samples.append(self.local_evaluator.sample()) samples = SampleBatch.concat_samples(samples) self.sample_timer.push_units_processed(samples.count) with self.grad_timer: for i in range(self.num_sgd_iter): fetches = self.local_evaluator.compute_apply(samples) if "stats" in fetches: self.learner_stats = fetches["stats"] if self.num_sgd_iter > 1: logger.debug("{} {}".format(i, fetches)) self.grad_timer.push_units_processed(samples.count) self.num_steps_sampled += samples.count self.num_steps_trained += samples.count return fetches
def modified_lu(q): """ Algorithm 5 from http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf takes a matrix q with orthonormal columns, returns l, u, s such that q - s = l * u arguments: q: a two dimensional orthonormal q return values: l: lower triangular u: upper triangular s: a diagonal matrix represented by its diagonal """ q = q.assemble() m, b = q.shape[0], q.shape[1] S = np.zeros(b) q_work = np.copy(q) for i in range(b): S[i] = -1 * np.sign(q_work[i, i]) q_work[i, i] -= S[i] q_work[(i + 1):m, i] /= q_work[i, i] # scale ith column of L by diagonal element q_work[(i + 1):m, (i + 1):b] -= np.outer(q_work[(i + 1):m, i], q_work[i, (i + 1):b]) # perform Schur complement update L = np.tril(q_work) for i in range(b): L[i, i] = 1 U = np.triu(q_work)[:b, :] return ray.get(numpy_to_dist.remote(ray.put(L))), U, S # TODO(rkn): get rid of put
def numpy_to_dist(a): result = DistArray(a.shape) for index in np.ndindex(*result.num_blocks): lower = DistArray.compute_block_lower(index, a.shape) upper = DistArray.compute_block_upper(index, a.shape) result.objectids[index] = ray.put(a[[slice(l, u) for (l, u) in zip(lower, upper)]]) return result
def step(self): with self.update_weights_timer: if self.remote_evaluators: weights = ray.put(self.local_evaluator.get_weights()) for e in self.remote_evaluators: e.set_weights.remote(weights) with self.sample_timer: if self.remote_evaluators: batches = ray.get( [e.sample.remote() for e in self.remote_evaluators]) else: batches = [self.local_evaluator.sample()] # Handle everything as if multiagent tmp = [] for batch in batches: if isinstance(batch, SampleBatch): batch = MultiAgentBatch({ DEFAULT_POLICY_ID: batch }, batch.count) tmp.append(batch) batches = tmp for batch in batches: self.replay_buffer.append(batch) self.num_steps_sampled += batch.count self.buffer_size += batch.count while self.buffer_size > self.max_buffer_size: evicted = self.replay_buffer.pop(0) self.buffer_size -= evicted.count if self.num_steps_sampled >= self.replay_starts: self._optimize()
def testRegisterClass(self): ray.init(start_ray_local=True, num_workers=0) # Check that putting an object of a class that has not been registered # throws an exception. class TempClass(object): pass self.assertRaises(Exception, lambda : ray.put(Foo)) # Check that registering a class that Ray cannot serialize efficiently # raises an exception. self.assertRaises(Exception, lambda : ray.register_class(type(True))) # Check that registering the same class with pickle works. ray.register_class(type(float), pickle=True) self.assertEqual(ray.get(ray.put(float)), float) ray.worker.cleanup()
def step(self): with self.update_weights_timer: if self.remote_evaluators: weights = ray.put(self.local_evaluator.get_weights()) for e in self.remote_evaluators: e.set_weights.remote(weights) with self.sample_timer: if self.remote_evaluators: batch = SampleBatch.concat_samples( ray.get( [e.sample.remote() for e in self.remote_evaluators])) else: batch = self.local_evaluator.sample() # Handle everything as if multiagent if isinstance(batch, SampleBatch): batch = MultiAgentBatch({ DEFAULT_POLICY_ID: batch }, batch.count) for policy_id, s in batch.policy_batches.items(): for row in s.rows(): self.replay_buffers[policy_id].add( pack_if_needed(row["obs"]), row["actions"], row["rewards"], pack_if_needed(row["new_obs"]), row["dones"], weight=None) if self.num_steps_sampled >= self.replay_starts: self._optimize() self.num_steps_sampled += batch.count
def __setstate__(self, state): if "evaluator" in state: self.local_evaluator.restore(state["evaluator"]) remote_state = ray.put(state["evaluator"]) for r in self.remote_evaluators: r.restore.remote(remote_state) if "optimizer" in state: self.optimizer.restore(state["optimizer"])
def Driver(success): success.value = True # Start driver. ray.init(redis_address=redis_address) summary_start = StateSummary() if (0, 1) != summary_start[:2]: success.value = False max_attempts_before_failing = 100 # Two new objects. ray.get(ray.put(1111)) ray.get(ray.put(1111)) attempts = 0 while (2, 1, summary_start[2]) != StateSummary(): time.sleep(0.1) attempts += 1 if attempts == max_attempts_before_failing: success.value = False break @ray.remote def f(): ray.put(1111) # Yet another object. return 1111 # A returned object as well. # 1 new function. attempts = 0 while (2, 1, summary_start[2] + 1) != StateSummary(): time.sleep(0.1) attempts += 1 if attempts == max_attempts_before_failing: success.value = False break ray.get(f.remote()) attempts = 0 while (4, 2, summary_start[2] + 1) != StateSummary(): time.sleep(0.1) attempts += 1 if attempts == max_attempts_before_failing: success.value = False break ray.shutdown()
def test_cache(ray_start_regular): A = np.random.rand(1, 1000000) v = np.random.rand(1000000) A_id = ray.put(A) v_id = ray.put(v) a = time.time() for i in range(100): A.dot(v) b = time.time() - a c = time.time() for i in range(100): ray.get(A_id).dot(ray.get(v_id)) d = time.time() - c if d > 1.5 * b: if os.getenv("TRAVIS") is None: raise Exception("The caching test was too slow. " "d = {}, b = {}".format(d, b)) else: print("WARNING: The caching test was too slow. " "d = {}, b = {}".format(d, b))
def pin_in_object_store(obj): """Pin an object in the object store. It will be available as long as the pinning process is alive. The pinned object can be retrieved by calling get_pinned_object on the identifier returned by this call. """ obj_id = ray.put(_to_pinnable(obj)) _pinned_objects.append(ray.get(obj_id)) return "{}{}".format(PINNED_OBJECT_PREFIX, base64.b64encode(obj_id.binary()).decode("utf-8"))
def from_pandas(df, npartitions=None, chunksize=None): """Converts a pandas DataFrame to a Ray DataFrame. Args: df (pandas.DataFrame): The pandas DataFrame to convert. npartitions (int): The number of partitions to split the DataFrame into. Has priority over chunksize. chunksize (int): The number of rows to put in each partition. Returns: A new Ray DataFrame object. """ from .dataframe import DataFrame if npartitions is not None: chunksize = int(len(df) / npartitions) elif chunksize is None: raise ValueError("The number of partitions or chunksize must be set.") temp_df = df dataframes = [] lengths = [] while len(temp_df) > chunksize: t_df = temp_df[:chunksize] lengths.append(len(t_df)) # reset_index here because we want a pd.RangeIndex # within the partitions. It is smaller and sometimes faster. t_df = t_df.reset_index(drop=True) top = ray.put(t_df) dataframes.append(top) temp_df = temp_df[chunksize:] else: temp_df = temp_df.reset_index(drop=True) dataframes.append(ray.put(temp_df)) lengths.append(len(temp_df)) return DataFrame(dataframes, df.columns, index=df.index)
def train(self, num_iter): start = time.time() for i in range(num_iter): t1 = time.time() self.train_step() t2 = time.time() print('total time of one step', t2 - t1) print('iter ', i,' done') # record statistics every 10 iterations if ((i + 1) % 10 == 0): rewards = self.aggregate_rollouts(num_rollouts = 100, evaluate = True) w = ray.get(self.workers[0].get_weights_plus_stats.remote()) np.savez(self.logdir + "/lin_policy_plus", w) print(sorted(self.params.items())) logz.log_tabular("Time", time.time() - start) logz.log_tabular("Iteration", i + 1) logz.log_tabular("AverageReward", np.mean(rewards)) logz.log_tabular("StdRewards", np.std(rewards)) logz.log_tabular("MaxRewardRollout", np.max(rewards)) logz.log_tabular("MinRewardRollout", np.min(rewards)) logz.log_tabular("timesteps", self.timesteps) logz.dump_tabular() t1 = time.time() # get statistics from all workers for j in range(self.num_workers): self.policy.observation_filter.update(ray.get(self.workers[j].get_filter.remote())) self.policy.observation_filter.stats_increment() # make sure master filter buffer is clear self.policy.observation_filter.clear_buffer() # sync all workers filter_id = ray.put(self.policy.observation_filter) setting_filters_ids = [worker.sync_filter.remote(filter_id) for worker in self.workers] # waiting for sync of all workers ray.get(setting_filters_ids) increment_filters_ids = [worker.stats_increment.remote() for worker in self.workers] # waiting for increment of all workers ray.get(increment_filters_ids) t2 = time.time() print('Time to sync statistics:', t2 - t1) return
def _step(self): sample_timesteps, train_timesteps = 0, 0 weights = None with self.timers["sample_processing"]: completed = list(self.sample_tasks.completed()) counts = ray.get([c[1][1] for c in completed]) for i, (ev, (sample_batch, count)) in enumerate(completed): sample_timesteps += counts[i] # Send the data to the replay buffer random.choice( self.replay_actors).add_batch.remote(sample_batch) # Update weights if needed self.steps_since_update[ev] += counts[i] if self.steps_since_update[ev] >= self.max_weight_sync_delay: # Note that it's important to pull new weights once # updated to avoid excessive correlation between actors if weights is None or self.learner.weights_updated: self.learner.weights_updated = False with self.timers["put_weights"]: weights = ray.put( self.local_evaluator.get_weights()) ev.set_weights.remote(weights) self.num_weight_syncs += 1 self.steps_since_update[ev] = 0 # Kick off another sample request self.sample_tasks.add(ev, ev.sample_with_count.remote()) with self.timers["replay_processing"]: for ra, replay in self.replay_tasks.completed(): self.replay_tasks.add(ra, ra.replay.remote()) if self.learner.inqueue.full(): self.num_samples_dropped += 1 else: with self.timers["get_samples"]: samples = ray.get(replay) # Defensive copy against plasma crashes, see #2610 #3452 self.learner.inqueue.put((ra, samples and samples.copy())) with self.timers["update_priorities"]: while not self.learner.outqueue.empty(): ra, prio_dict, count = self.learner.outqueue.get() ra.update_priorities.remote(prio_dict) train_timesteps += count return sample_timesteps, train_timesteps
def testGet(self): ray.init(start_ray_local=True, num_workers=3) for cls in [Foo, Bar, Baz, Qux, SubQux, Exception, CustomError, Point, NamedTupleExample]: ray.register_class(cls) # Remote objects should be deallocated when the corresponding ObjectID goes # out of scope, and all results of ray.get called on the ID go out of scope. for val in RAY_TEST_OBJECTS: x = ray.put(val) objectid = x.id xval = ray.get(x) del x, xval self.assertEqual(ray.scheduler_info()["reference_counts"][objectid], -1) # Remote objects that do not contain numpy arrays should be deallocated when # the corresponding ObjectID goes out of scope, even if ray.get has been # called on the ObjectID. for val in [True, False, None, 1, 1.0, 1L, "hi", u"hi", [1, 2, 3], (1, 2, 3), [(), {(): ()}]]: x = ray.put(val) objectid = x.id xval = ray.get(x) del x self.assertEqual(ray.scheduler_info()["reference_counts"][objectid], -1)
def train(): num_gpus = FLAGS.num_gpus if FLAGS.redis_address is None: ray.init(num_gpus=num_gpus) else: ray.init(redis_address=FLAGS.redis_address) train_data = get_data.remote(FLAGS.train_data_path, 50000, FLAGS.dataset) test_data = get_data.remote(FLAGS.eval_data_path, 10000, FLAGS.dataset) # Creates an actor for each gpu, or one if only using the cpu. Each actor # has access to the dataset. if FLAGS.num_gpus > 0: train_actors = [ ResNetTrainActor.remote(train_data, FLAGS.dataset, num_gpus) for _ in range(num_gpus) ] else: train_actors = [ResNetTrainActor.remote(train_data, FLAGS.dataset, 0)] test_actor = ResNetTestActor.remote(test_data, FLAGS.dataset, FLAGS.eval_batch_count, FLAGS.eval_dir) print("The log files for tensorboard are stored at ip {}.".format( ray.get(test_actor.get_ip_addr.remote()))) step = 0 weight_id = train_actors[0].get_weights.remote() acc_id = test_actor.accuracy.remote(weight_id, step) # Correction for dividing the weights by the number of gpus. if num_gpus == 0: num_gpus = 1 print("Starting training loop. Use Ctrl-C to exit.") try: while True: all_weights = ray.get([ actor.compute_steps.remote(weight_id) for actor in train_actors ]) mean_weights = { k: (sum(weights[k] for weights in all_weights) / num_gpus) for k in all_weights[0] } weight_id = ray.put(mean_weights) step += 10 if step % 200 == 0: # Retrieves the previously computed accuracy and launches a new # testing task with the current weights every 200 steps. acc = ray.get(acc_id) acc_id = test_actor.accuracy.remote(weight_id, step) print("Step {}: {:.6f}".format(step - 200, acc)) except KeyboardInterrupt: pass
def _step(self): sample_timesteps, train_timesteps = 0, 0 num_sent = 0 weights = None for ev, sample_batch in self._augment_with_replay( self.sample_tasks.completed_prefetch()): self.batch_buffer.append(sample_batch) if sum(b.count for b in self.batch_buffer) >= self.train_batch_size: train_batch = self.batch_buffer[0].concat_samples( self.batch_buffer) self.learner.inqueue.put(train_batch) self.batch_buffer = [] # If the batch was replayed, skip the update below. if ev is None: continue sample_timesteps += sample_batch.count # Put in replay buffer if enabled if self.replay_buffer_num_slots > 0: self.replay_batches.append(sample_batch) if len(self.replay_batches) > self.replay_buffer_num_slots: self.replay_batches.pop(0) # Note that it's important to pull new weights once # updated to avoid excessive correlation between actors if weights is None or (self.learner.weights_updated and num_sent >= self.broadcast_interval): self.learner.weights_updated = False weights = ray.put(self.local_evaluator.get_weights()) num_sent = 0 ev.set_weights.remote(weights) self.num_weight_syncs += 1 num_sent += 1 # Kick off another sample request self.sample_tasks.add(ev, ev.sample.remote()) while not self.learner.outqueue.empty(): count = self.learner.outqueue.get() train_timesteps += count return sample_timesteps, train_timesteps
def synchronize(local_filters, remotes): """Aggregates all filters from remote evaluators. Local copy is updated and then broadcasted to all remote evaluators. Args: local_filters (dict): Filters to be synchronized. remotes (list): Remote evaluators with filters. """ remote_filters = ray.get( [r.get_filters.remote(flush_after=True) for r in remotes]) for rf in remote_filters: for k in local_filters: local_filters[k].apply_changes(rf[k], with_buffer=False) copies = {k: v.as_serializable() for k, v in local_filters.items()} remote_copy = ray.put(copies) [r.sync_filters.remote(remote_copy) for r in remotes]
def put_task(): # Launch num_objects instances of the remote task, each dependent # on the one before it. The result of the first task should get # evicted. args = [] arg = ray.put(np.zeros(object_size, dtype=np.uint8)) for i in range(num_objects): arg = single_dependency.remote(i, arg) args.append(arg) # Get the last value to force all tasks to finish. value = ray.get(args[-1]) assert value[0] == i # Get the first value (which should have been evicted) to force # reconstruction. Currently, since we're not able to reconstruct # `ray.put` objects that were evicted and whose originating tasks # are still running, this for-loop should hang and push an error to # the driver. ray.get(args[0])
def warmup(): logger.info("Warming up object store") zeros = np.zeros(int(100e6 / 8), dtype=np.float64) start = time.time() for _ in range(10): ray.put(zeros) logger.info("Initial latency for 100MB put {}".format( (time.time() - start) / 10)) for _ in range(5): for _ in range(100): ray.put(zeros) start = time.time() for _ in range(10): ray.put(zeros) logger.info("Warmed up latency for 100MB put {}".format( (time.time() - start) / 10))
def step(self): with self.update_weights_timer: if self.remote_evaluators: weights = ray.put(self.local_evaluator.get_weights()) for e in self.remote_evaluators: e.set_weights.remote(weights) with self.sample_timer: if self.remote_evaluators: samples = SampleBatch.concat_samples( ray.get( [e.sample.remote() for e in self.remote_evaluators])) else: samples = self.local_evaluator.sample() with self.grad_timer: grad = self.local_evaluator.compute_gradients(samples) self.local_evaluator.apply_gradients(grad) self.grad_timer.push_units_processed(samples.count) self.num_steps_sampled += samples.count self.num_steps_trained += samples.count
def testPythonMode(self): reload(test_functions) ray.init(start_ray_local=True, driver_mode=ray.PYTHON_MODE) @ray.remote def f(): return np.ones([3, 4, 5]) xref = f.remote() assert_equal(xref, np.ones([3, 4, 5])) # remote functions should return by value assert_equal(xref, ray.get(xref)) # ray.get should be the identity y = np.random.normal(size=[11, 12]) assert_equal(y, ray.put(y)) # ray.put should be the identity # make sure objects are immutable, this example is why we need to copy # arguments before passing them into remote functions in python mode aref = test_functions.python_mode_f.remote() assert_equal(aref, np.array([0, 0])) bref = test_functions.python_mode_g.remote(aref) assert_equal(aref, np.array([0, 0])) # python_mode_g should not mutate aref assert_equal(bref, np.array([1, 0])) ray.worker.cleanup()
def step(self): with self.update_weights_timer: if self.remote_evaluators: weights = ray.put(self.local_evaluator.get_weights()) for e in self.remote_evaluators: e.set_weights.remote(weights) with self.sample_timer: if self.remote_evaluators: batch = SampleBatch.concat_samples( ray.get( [e.sample.remote() for e in self.remote_evaluators])) else: batch = self.local_evaluator.sample() for row in batch.rows(): self.replay_buffer.add( row["obs"], row["actions"], row["rewards"], row["new_obs"], row["dones"], row["weights"]) if len(self.replay_buffer) >= self.replay_starts: self._optimize() self.num_steps_sampled += batch.count
def step(self): with self.update_weights_timer: if self.remote_evaluators: weights = ray.put(self.local_evaluator.get_weights()) for e in self.remote_evaluators: e.set_weights.remote(weights) with self.sample_timer: if self.remote_evaluators: samples = SampleBatch.concat_samples( ray.get( [e.sample.remote() for e in self.remote_evaluators])) else: samples = self.local_evaluator.sample() assert isinstance(samples, SampleBatch) with self.load_timer: tuples_per_device = self.par_opt.load_data( self.local_evaluator.sess, samples.columns([key for key, _ in self.loss_inputs])) with self.grad_timer: for i in range(self.num_sgd_iter): batch_index = 0 num_batches = ( int(tuples_per_device) // int(self.per_device_batch_size)) permutation = np.random.permutation(num_batches) while batch_index < num_batches: # TODO(ekl) support ppo's debugging features, e.g. # printing the current loss and tracing self.par_opt.optimize( self.sess, permutation[batch_index] * self.per_device_batch_size) batch_index += 1 self.num_steps_sampled += samples.count self.num_steps_trained += samples.count
def test_workflow_storage(workflow_start_regular): workflow_id = test_workflow_storage.__name__ wf_storage = workflow_storage.WorkflowStorage(workflow_id, storage.get_global_storage()) step_id = "some_step" input_metadata = { "name": "test_basic_workflows.append1", "step_type": StepType.FUNCTION, "object_refs": ["abc"], "workflows": ["def"], "workflow_refs": ["some_ref"], "max_retries": 1, "catch_exceptions": False, "ray_options": {}, } output_metadata = { "output_step_id": "a12423", "dynamic_output_step_id": "b1234" } flattened_args = [ signature.DUMMY_TYPE, 1, signature.DUMMY_TYPE, "2", "k", b"543" ] args = signature.recover_args(flattened_args) output = ["the_answer"] object_resolved = 42 obj_ref = ray.put(object_resolved) # test basics asyncio_run( wf_storage._put(wf_storage._key_step_input_metadata(step_id), input_metadata, True)) asyncio_run( wf_storage._put(wf_storage._key_step_function_body(step_id), some_func)) asyncio_run( wf_storage._put(wf_storage._key_step_args(step_id), flattened_args)) asyncio_run( wf_storage._put(wf_storage._key_obj_id(obj_ref.hex()), ray.get(obj_ref))) asyncio_run( wf_storage._put(wf_storage._key_step_output_metadata(step_id), output_metadata, True)) asyncio_run(wf_storage._put(wf_storage._key_step_output(step_id), output)) assert wf_storage.load_step_output(step_id) == output assert wf_storage.load_step_args(step_id, [], [], []) == args assert wf_storage.load_step_func_body(step_id)(33) == 34 assert ray.get(wf_storage.load_object_ref( obj_ref.hex())) == object_resolved # test "inspect_step" inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( output_object_valid=True) assert inspect_result.is_recoverable() step_id = "some_step2" asyncio_run( wf_storage._put(wf_storage._key_step_input_metadata(step_id), input_metadata, True)) asyncio_run( wf_storage._put(wf_storage._key_step_function_body(step_id), some_func)) asyncio_run(wf_storage._put(wf_storage._key_step_args(step_id), args)) asyncio_run( wf_storage._put(wf_storage._key_step_output_metadata(step_id), output_metadata, True)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( output_step_id=output_metadata["dynamic_output_step_id"]) assert inspect_result.is_recoverable() step_id = "some_step3" asyncio_run( wf_storage._put(wf_storage._key_step_input_metadata(step_id), input_metadata, True)) asyncio_run( wf_storage._put(wf_storage._key_step_function_body(step_id), some_func)) asyncio_run(wf_storage._put(wf_storage._key_step_args(step_id), args)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( step_type=StepType.FUNCTION, args_valid=True, func_body_valid=True, object_refs=input_metadata["object_refs"], workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], ray_options={}) assert inspect_result.is_recoverable() step_id = "some_step4" asyncio_run( wf_storage._put(wf_storage._key_step_input_metadata(step_id), input_metadata, True)) asyncio_run( wf_storage._put(wf_storage._key_step_function_body(step_id), some_func)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( step_type=StepType.FUNCTION, func_body_valid=True, object_refs=input_metadata["object_refs"], workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], ray_options={}) assert not inspect_result.is_recoverable() step_id = "some_step5" asyncio_run( wf_storage._put(wf_storage._key_step_input_metadata(step_id), input_metadata, True)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( step_type=StepType.FUNCTION, object_refs=input_metadata["object_refs"], workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], ray_options={}) assert not inspect_result.is_recoverable() step_id = "some_step6" inspect_result = wf_storage.inspect_step(step_id) print(inspect_result) assert inspect_result == workflow_storage.StepInspectResult() assert not inspect_result.is_recoverable()
def put_small(): ray.put(0)
def put_large(): ray.put(arr)
def small_value_batch_arg(self, n): x = ray.put(0) results = [] for s in self.servers: results.extend([s.small_value_arg.remote(x) for _ in range(n)]) ray.get(results)
def main(results=None): results = results or [] check_optimized_build() print("Tip: set TESTS_TO_RUN='pattern' to run a subset of benchmarks") ray.init() value = ray.put(0) def get_small(): ray.get(value) def put_small(): ray.put(0) @ray.remote def do_put_small(): for _ in range(100): ray.put(0) def put_multi_small(): ray.get([do_put_small.remote() for _ in range(10)]) arr = np.zeros(100 * 1024 * 1024, dtype=np.int64) results += timeit("single client get calls (Plasma Store)", get_small) results += timeit("single client put calls (Plasma Store)", put_small) results += timeit("multi client put calls (Plasma Store)", put_multi_small, 1000) def put_large(): ray.put(arr) results += timeit("single client put gigabytes", put_large, 8 * 0.1) def small_value_batch(): submitted = [small_value.remote() for _ in range(1000)] ray.get(submitted) return 0 results += timeit("single client tasks and get batch", small_value_batch) @ray.remote def do_put(): for _ in range(10): ray.put(np.zeros(10 * 1024 * 1024, dtype=np.int64)) def put_multi(): ray.get([do_put.remote() for _ in range(10)]) results += timeit("multi client put gigabytes", put_multi, 10 * 8 * 0.1) obj_containing_ref = create_object_containing_ref.remote() def get_containing_object_ref(): ray.get(obj_containing_ref) results += timeit("single client get object containing 10k refs", get_containing_object_ref) def small_task(): ray.get(small_value.remote()) results += timeit("single client tasks sync", small_task) def small_task_async(): ray.get([small_value.remote() for _ in range(1000)]) results += timeit("single client tasks async", small_task_async, 1000) n = 10000 m = 4 actors = [Actor.remote() for _ in range(m)] def multi_task(): submitted = [a.small_value_batch.remote(n) for a in actors] ray.get(submitted) results += timeit("multi client tasks async", multi_task, n * m) a = Actor.remote() def actor_sync(): ray.get(a.small_value.remote()) results += timeit("1:1 actor calls sync", actor_sync) a = Actor.remote() def actor_async(): ray.get([a.small_value.remote() for _ in range(1000)]) results += timeit("1:1 actor calls async", actor_async, 1000) a = Actor.options(max_concurrency=16).remote() def actor_concurrent(): ray.get([a.small_value.remote() for _ in range(1000)]) results += timeit("1:1 actor calls concurrent", actor_concurrent, 1000) n = 5000 n_cpu = multiprocessing.cpu_count() // 2 actors = [Actor._remote() for _ in range(n_cpu)] client = Client.remote(actors) def actor_async_direct(): ray.get(client.small_value_batch.remote(n)) results += timeit("1:n actor calls async", actor_async_direct, n * len(actors)) n_cpu = multiprocessing.cpu_count() // 2 a = [Actor.remote() for _ in range(n_cpu)] @ray.remote def work(actors): ray.get([actors[i % n_cpu].small_value.remote() for i in range(n)]) def actor_multi2(): ray.get([work.remote(a) for _ in range(m)]) results += timeit("n:n actor calls async", actor_multi2, m * n) n = 1000 actors = [Actor._remote() for _ in range(n_cpu)] clients = [Client.remote(a) for a in actors] def actor_multi2_direct_arg(): ray.get([c.small_value_batch_arg.remote(n) for c in clients]) results += timeit("n:n actor calls with arg async", actor_multi2_direct_arg, n * len(clients)) a = AsyncActor.remote() def actor_sync(): ray.get(a.small_value.remote()) results += timeit("1:1 async-actor calls sync", actor_sync) a = AsyncActor.remote() def async_actor(): ray.get([a.small_value.remote() for _ in range(1000)]) results += timeit("1:1 async-actor calls async", async_actor, 1000) a = AsyncActor.remote() def async_actor(): ray.get([a.small_value_with_arg.remote(i) for i in range(1000)]) results += timeit("1:1 async-actor calls with args async", async_actor, 1000) n = 5000 n_cpu = multiprocessing.cpu_count() // 2 actors = [AsyncActor.remote() for _ in range(n_cpu)] client = Client.remote(actors) def async_actor_async(): ray.get(client.small_value_batch.remote(n)) results += timeit("1:n async-actor calls async", async_actor_async, n * len(actors)) n = 5000 m = 4 n_cpu = multiprocessing.cpu_count() // 2 a = [AsyncActor.remote() for _ in range(n_cpu)] @ray.remote def async_actor_work(actors): ray.get([actors[i % n_cpu].small_value.remote() for i in range(n)]) def async_actor_multi(): ray.get([async_actor_work.remote(a) for _ in range(m)]) results += timeit("n:n async-actor calls async", async_actor_multi, m * n) ray.shutdown() NUM_PGS = 100 NUM_BUNDLES = 1 ray.init(resources={"custom": 100}) def placement_group_create_removal(num_pgs): pgs = [ ray.util.placement_group(bundles=[{ "custom": 0.001 } for _ in range(NUM_BUNDLES)]) for _ in range(num_pgs) ] [pg.wait(timeout_seconds=30) for pg in pgs] # Include placement group removal here to clean up. # If we don't clean up placement groups, the whole performance # gets slower as it runs more. # Since timeit function runs multiple times without # the cleaning logic, we should have this method here. for pg in pgs: ray.util.remove_placement_group(pg) results += timeit("placement group create/removal", lambda: placement_group_create_removal(NUM_PGS), NUM_PGS) ray.shutdown() client_microbenchmark_main(results) return results
def _train(self): agents = self.remote_evaluators config = self.config model = self.local_evaluator print("===> iteration", self.iteration) iter_start = time.time() weights = ray.put(model.get_weights()) [a.set_weights.remote(weights) for a in agents] samples = collect_samples(agents, config, self.local_evaluator) def standardized(value): # Divide by the maximum of value.std() and 1e-4 # to guard against the case where all values are equal return (value - value.mean()) / max(1e-4, value.std()) samples.data["advantages"] = standardized(samples["advantages"]) rollouts_end = time.time() print("Computing policy (iterations=" + str(config["num_sgd_iter"]) + ", stepsize=" + str(config["sgd_stepsize"]) + "):") names = [ "iter", "total loss", "policy loss", "vf loss", "kl", "entropy" ] print(("{:>15}" * len(names)).format(*names)) samples.shuffle() shuffle_end = time.time() tuples_per_device = model.load_data( samples, self.iteration == 0 and config["full_trace_data_load"]) load_end = time.time() rollouts_time = rollouts_end - iter_start shuffle_time = shuffle_end - rollouts_end load_time = load_end - shuffle_end sgd_time = 0 for i in range(config["num_sgd_iter"]): sgd_start = time.time() batch_index = 0 num_batches = (int(tuples_per_device) // int(model.per_device_batch_size)) loss, policy_loss, vf_loss, kl, entropy = [], [], [], [], [] permutation = np.random.permutation(num_batches) # Prepare to drop into the debugger if self.iteration == config["tf_debug_iteration"]: model.sess = tf_debug.LocalCLIDebugWrapperSession(model.sess) while batch_index < num_batches: full_trace = (i == 0 and self.iteration == 0 and batch_index == config["full_trace_nth_sgd_batch"]) batch_loss, batch_policy_loss, batch_vf_loss, batch_kl, \ batch_entropy = model.run_sgd_minibatch( permutation[batch_index] * model.per_device_batch_size, self.kl_coeff, full_trace, self.file_writer) loss.append(batch_loss) policy_loss.append(batch_policy_loss) vf_loss.append(batch_vf_loss) kl.append(batch_kl) entropy.append(batch_entropy) batch_index += 1 loss = np.mean(loss) policy_loss = np.mean(policy_loss) vf_loss = np.mean(vf_loss) kl = np.mean(kl) entropy = np.mean(entropy) sgd_end = time.time() print("{:>15}{:15.5e}{:15.5e}{:15.5e}{:15.5e}{:15.5e}".format( i, loss, policy_loss, vf_loss, kl, entropy)) values = [] if i == config["num_sgd_iter"] - 1: metric_prefix = "ppo/sgd/final_iter/" values.append( tf.Summary.Value(tag=metric_prefix + "kl_coeff", simple_value=self.kl_coeff)) values.extend([ tf.Summary.Value(tag=metric_prefix + "mean_entropy", simple_value=entropy), tf.Summary.Value(tag=metric_prefix + "mean_loss", simple_value=loss), tf.Summary.Value(tag=metric_prefix + "mean_kl", simple_value=kl) ]) if self.file_writer: sgd_stats = tf.Summary(value=values) self.file_writer.add_summary(sgd_stats, self.global_step) self.global_step += 1 sgd_time += sgd_end - sgd_start if kl > 2.0 * config["kl_target"]: self.kl_coeff *= 1.5 elif kl < 0.5 * config["kl_target"]: self.kl_coeff *= 0.5 info = { "kl_divergence": kl, "kl_coefficient": self.kl_coeff, "rollouts_time": rollouts_time, "shuffle_time": shuffle_time, "load_time": load_time, "sgd_time": sgd_time, "sample_throughput": len(samples["observations"]) / sgd_time } FilterManager.synchronize(self.local_evaluator.filters, self.remote_evaluators) res = self._fetch_metrics_from_remote_evaluators() res = res._replace(info=info) return res
def aggregate_rollouts(self, num_rollouts=None, evaluate=False): """ Aggregate update step from rollouts generated in parallel. """ if num_rollouts is None: num_deltas = self.num_deltas else: num_deltas = num_rollouts # put policy weights in the object store policy_id = ray.put(self.w_policy) t1 = time.time() num_rollouts = int(num_deltas / self.num_workers) # parallel generation of rollouts rollout_ids_one = [ worker.do_rollouts.remote(policy_id, num_rollouts=num_rollouts, shift=self.shift, evaluate=evaluate) for worker in self.workers ] rollout_ids_two = [ worker.do_rollouts.remote(policy_id, num_rollouts=1, shift=self.shift, evaluate=evaluate) for worker in self.workers[:(num_deltas % self.num_workers)] ] # gather results results_one = ray.get(rollout_ids_one) results_two = ray.get(rollout_ids_two) rollout_rewards, deltas_idx = [], [] for result in results_one: if not evaluate: self.timesteps += result["steps"] deltas_idx += result['deltas_idx'] rollout_rewards += result['rollout_rewards'] for result in results_two: if not evaluate: self.timesteps += result["steps"] deltas_idx += result['deltas_idx'] rollout_rewards += result['rollout_rewards'] deltas_idx = np.array(deltas_idx) rollout_rewards = np.array(rollout_rewards, dtype=np.float64) print('Maximum reward of collected rollouts:', rollout_rewards.max()) t2 = time.time() print('Time to generate rollouts:', t2 - t1) if evaluate: return rollout_rewards # select top performing directions if deltas_used < num_deltas max_rewards = np.max(rollout_rewards, axis=1) if self.deltas_used > self.num_deltas: self.deltas_used = self.num_deltas idx = np.arange(max_rewards.size)[max_rewards >= np.percentile( max_rewards, 100 * (1 - (self.deltas_used / self.num_deltas)))] deltas_idx = deltas_idx[idx] rollout_rewards = rollout_rewards[idx, :] # normalize rewards by their standard deviation rollout_rewards /= np.std(rollout_rewards) t1 = time.time() # aggregate rollouts to form g_hat, the gradient used to compute SGD step g_hat, count = utils.batched_weighted_sum( rollout_rewards[:, 0] - rollout_rewards[:, 1], (self.deltas.get(idx, self.w_policy.size) for idx in deltas_idx), batch_size=500) g_hat /= deltas_idx.size t2 = time.time() print('time to aggregate rollouts', t2 - t1) return g_hat
def __init__(self): self.loop = self self.large_object = ray.put( np.zeros(40 * 1024 * 1024, dtype=np.uint8))
def test_global_gc_when_full(shutdown_only): cluster = ray.cluster_utils.Cluster() for _ in range(2): cluster.add_node(num_cpus=1, num_gpus=0, object_store_memory=100 * 1024 * 1024) ray.init(address=cluster.address) class LargeObjectWithCyclicRef: def __init__(self): self.loop = self self.large_object = ray.put( np.zeros(40 * 1024 * 1024, dtype=np.uint8)) @ray.remote(num_cpus=1) class GarbageHolder: def __init__(self): gc.disable() x = LargeObjectWithCyclicRef() self.garbage = weakref.ref(x) def has_garbage(self): return self.garbage() is not None def return_large_array(self): return np.zeros(80 * 1024 * 1024, dtype=np.uint8) try: gc.disable() # Local driver. local_ref = weakref.ref(LargeObjectWithCyclicRef()) # Remote workers. actors = [GarbageHolder.remote() for _ in range(2)] assert local_ref() is not None assert all(ray.get([a.has_garbage.remote() for a in actors])) # GC should be triggered for all workers, including the local driver, # when the driver tries to ray.put a value that doesn't fit in the # object store. This should cause the captured ObjectRefs' numpy arrays # to be evicted. ray.put(np.zeros(80 * 1024 * 1024, dtype=np.uint8)) def check_refs_gced(): return (local_ref() is None and not any(ray.get([a.has_garbage.remote() for a in actors]))) wait_for_condition(check_refs_gced) # Local driver. local_ref = weakref.ref(LargeObjectWithCyclicRef()) # Remote workers. actors = [GarbageHolder.remote() for _ in range(2)] assert all(ray.get([a.has_garbage.remote() for a in actors])) # GC should be triggered for all workers, including the local driver, # when a remote task tries to put a return value that doesn't fit in # the object store. This should cause the captured ObjectRefs' numpy # arrays to be evicted. ray.get(actors[0].return_large_array.remote()) def check_refs_gced(): return (local_ref() is None and not any(ray.get([a.has_garbage.remote() for a in actors]))) wait_for_condition(check_refs_gced) finally: gc.enable()
def __init__(self): print("I also log a line") self.obj_ref = ray.put([1, 2, 3])
def put(cls, obj): return OmnisciOnRayFramePartition( object_id=ray.put(obj), length=len(obj.index), width=len(obj.columns), )
def test_object_broadcast(ray_start_cluster_with_resource): cluster, num_nodes = ray_start_cluster_with_resource @ray.remote def f(x): return x = np.zeros(1024 * 1024, dtype=np.uint8) @ray.remote def create_object(): return np.zeros(1024 * 1024, dtype=np.uint8) object_refs = [] for _ in range(3): # Broadcast an object to all machines. x_id = ray.put(x) object_refs.append(x_id) ray.get([ f._remote(args=[x_id], resources={str(i % num_nodes): 1}) for i in range(10 * num_nodes) ]) for _ in range(3): # Broadcast an object to all machines. x_id = create_object.remote() object_refs.append(x_id) ray.get([ f._remote(args=[x_id], resources={str(i % num_nodes): 1}) for i in range(10 * num_nodes) ]) # Wait for profiling information to be pushed to the profile table. time.sleep(1) transfer_events = ray.state.object_transfer_timeline() # Make sure that each object was transferred a reasonable number of times. for x_id in object_refs: relevant_events = [ event for event in transfer_events if event["cat"] == "transfer_send" and event["args"][0] == x_id.hex() and event["args"][2] == 1 ] # NOTE: Each event currently appears twice because we duplicate the # send and receive boxes to underline them with a box (black if it is a # send and gray if it is a receive). So we need to remove these extra # boxes here. deduplicated_relevant_events = [ event for event in relevant_events if event["cname"] != "black" ] assert len(deduplicated_relevant_events) * 2 == len(relevant_events) relevant_events = deduplicated_relevant_events # Each object must have been broadcast to each remote machine. assert len(relevant_events) >= num_nodes - 1 # If more object transfers than necessary have been done, print a # warning. if len(relevant_events) > num_nodes - 1: warnings.warn("This object was transferred {} times, when only {} " "transfers were required.".format( len(relevant_events), num_nodes - 1)) # Each object should not have been broadcast more than once from every # machine to every other machine. Also, a pair of machines should not # both have sent the object to each other. assert len(relevant_events) <= (num_nodes - 1) * num_nodes / 2 # Make sure that no object was sent multiple times between the same # pair of object managers. send_counts = defaultdict(int) for event in relevant_events: # The pid identifies the sender and the tid identifies the # receiver. send_counts[(event["pid"], event["tid"])] += 1 assert all(value == 1 for value in send_counts.values())
def __init__(self): self.x = ray.put(np.zeros(1024 * 1024, dtype=np.uint8))
def full_loss(theta): theta_id = ray.put(theta) loss_ids = [actor.loss.remote(theta_id) for actor in actors] return sum(ray.get(loss_ids))
def pin_in_object_store(obj): """Deprecated, use ray.put(value) instead.""" obj_ref = ray.put(obj) _pinned_objects.append(obj_ref) return obj_ref
def full_grad(theta): theta_id = ray.put(theta) grad_ids = [actor.grad.remote(theta_id) for actor in actors] # The float64 conversion is necessary for use with fmin_l_bfgs_b. return sum(ray.get(grad_ids)).astype("float64")
def step(self): config = self.config theta = self.policy.get_flat_weights() assert theta.dtype == np.float32 assert len(theta.shape) == 1 # Put the current policy weights in the object store. theta_id = ray.put(theta) # Use the actors to do rollouts. Note that we pass in the ID of the # policy weights as these are shared. results, num_episodes, num_timesteps = self._collect_results( theta_id, config["episodes_per_batch"], config["train_batch_size"]) # Update our sample steps counters. self._counters[NUM_AGENT_STEPS_SAMPLED] += num_timesteps self._counters[NUM_ENV_STEPS_SAMPLED] += num_timesteps all_noise_indices = [] all_training_returns = [] all_training_lengths = [] all_eval_returns = [] all_eval_lengths = [] # Loop over the results. for result in results: all_eval_returns += result.eval_returns all_eval_lengths += result.eval_lengths all_noise_indices += result.noise_indices all_training_returns += result.noisy_returns all_training_lengths += result.noisy_lengths assert len(all_eval_returns) == len(all_eval_lengths) assert (len(all_noise_indices) == len(all_training_returns) == len(all_training_lengths)) self.episodes_so_far += num_episodes # Assemble the results. eval_returns = np.array(all_eval_returns) eval_lengths = np.array(all_eval_lengths) noise_indices = np.array(all_noise_indices) noisy_returns = np.array(all_training_returns) noisy_lengths = np.array(all_training_lengths) # Process the returns. proc_noisy_returns = utils.compute_centered_ranks(noisy_returns) # Compute and take a step. g, count = utils.batched_weighted_sum( proc_noisy_returns[:, 0] - proc_noisy_returns[:, 1], (self.noise.get(index, self.policy.num_params) for index in noise_indices), batch_size=500, ) g /= noisy_returns.size assert (g.shape == (self.policy.num_params, ) and g.dtype == np.float32 and count == len(noise_indices)) # Compute the new weights theta. theta, update_ratio = self.optimizer.update(-g + config["l2_coeff"] * theta) # Update our train steps counters. self._counters[NUM_AGENT_STEPS_TRAINED] += num_timesteps self._counters[NUM_ENV_STEPS_TRAINED] += num_timesteps # Set the new weights in the local copy of the policy. self.policy.set_flat_weights(theta) # Store the rewards if len(all_eval_returns) > 0: self.reward_list.append(np.mean(eval_returns)) # Now sync the filters FilterManager.synchronize( {DEFAULT_POLICY_ID: self.policy.observation_filter}, self.workers) info = { "weights_norm": np.square(theta).sum(), "grad_norm": np.square(g).sum(), "update_ratio": update_ratio, "episodes_this_iter": noisy_lengths.size, "episodes_so_far": self.episodes_so_far, } reward_mean = np.mean(self.reward_list[-self.report_length:]) result = dict( episode_reward_mean=reward_mean, episode_len_mean=eval_lengths.mean(), timesteps_this_iter=noisy_lengths.sum(), info=info, ) return result
real_batch = next(iter(dataloader)) plt.figure(figsize=(8, 8)) plt.axis("off") plt.title("Original Images") plt.imshow( np.transpose( vutils.make_grid(real_batch[0][:64], padding=2, normalize=True).cpu(), (1, 2, 0))) plt.show() # load the pretrained mnist classification model for inception_score mnist_cnn = Net() mnist_cnn.load_state_dict(torch.load(MODEL_PATH)) mnist_cnn.eval() mnist_model_ref = ray.put(mnist_cnn) # __tune_begin__ scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="is_score", mode="max", perturbation_interval=5, hyperparam_mutations={ # distribution for resampling "netG_lr": lambda: np.random.uniform(1e-2, 1e-5), "netD_lr": lambda: np.random.uniform(1e-2, 1e-5), }) tune_iter = 5 if args.smoke_test else 300 analysis = tune.run(
def create_object_containing_ref(): obj_refs = [] for _ in range(10000): obj_refs.append(ray.put(1)) return obj_refs
def child(*xs): oid = ray.put(np.zeros(1024 * 1024, dtype=np.uint8)) return oid
def do_put(): for _ in range(10): ray.put(np.zeros(10 * 1024 * 1024, dtype=np.int64))
def churn(): return ray.put(np.zeros(1024 * 1024, dtype=np.uint8))
def do_put_small(): for _ in range(100): ray.put(0)
def __setitem__(self, key, value): object_id = ray.put(value) shape = getattr(value, 'shape', None) meta = ChunkMeta(shape=shape, object_id=object_id) set_meta = self.meta_store.set_meta.remote(key, meta) ray.wait([object_id, set_meta])
def _worker(): arr = np.random.rand(1024 * 1024) # 8 MB data ref = ray.put(arr) ray.experimental.force_spill_objects([ref]) return ref
def f(y): from ray.internal.internal_api import memory_summary x_id = ray.put("HI") info = memory_summary(address) del x_id return info
def nested_ref(): return ray.put(1)
def __init__(self): self.ref = ray.put(np.zeros(100000))
def __init__(self): self.obj_ref = ray.put([1, 2, 3])
def test_object_directory_basic(ray_start_cluster_with_resource): cluster, num_nodes = ray_start_cluster_with_resource @ray.remote def task(x): pass # Test a single task. x_id = ray.put(np.zeros(1024 * 1024, dtype=np.uint8)) ray.get(task.options(resources={str(3): 1}).remote(x_id), timeout=10) # Test multiple tasks on all nodes can find locations properly. object_refs = [] for _ in range(num_nodes): object_refs.append(ray.put(np.zeros(1024 * 1024, dtype=np.uint8))) ray.get([ task.options(resources={ str(i): 1 }).remote(object_refs[i]) for i in range(num_nodes) ]) del object_refs @ray.remote class ObjectHolder: def __init__(self): self.x = ray.put(np.zeros(1024 * 1024, dtype=np.uint8)) def get_obj(self): return self.x def ready(self): return True # Test if tasks can find object location properly # when there are multiple owners object_holders = [ ObjectHolder.options(num_cpus=0.01, resources={ str(i): 1 }).remote() for i in range(num_nodes) ] ray.get([o.ready.remote() for o in object_holders]) object_refs = [] for i in range(num_nodes): object_refs.append(object_holders[(i + 1) % num_nodes].get_obj.remote()) ray.get([ task.options(num_cpus=0.01, resources={ str(i): 1 }).remote(object_refs[i]) for i in range(num_nodes) ]) # Test a stressful scenario. object_refs = [] repeat = 10 for _ in range(num_nodes): for _ in range(repeat): object_refs.append(ray.put(np.zeros(1024 * 1024, dtype=np.uint8))) tasks = [] for i in range(num_nodes): for r in range(repeat): tasks.append( task.options(num_cpus=0.01, resources={ str(i): 0.1 }).remote(object_refs[i * r])) ray.get(tasks) object_refs = [] for i in range(num_nodes): object_refs.append(object_holders[(i + 1) % num_nodes].get_obj.remote()) tasks = [] for i in range(num_nodes): for _ in range(10): tasks.append( task.options(num_cpus=0.01, resources={ str(i): 0.1 }).remote(object_refs[(i + 1) % num_nodes]))