예제 #1
0
파일: runtest.py 프로젝트: amplab/ray
  def testPutGet(self):
    ray.init(start_ray_local=True, num_workers=0)

    for i in range(100):
      value_before = i * 10 ** 6
      objectid = ray.put(value_before)
      value_after = ray.get(objectid)
      self.assertEqual(value_before, value_after)

    for i in range(100):
      value_before = i * 10 ** 6 * 1.0
      objectid = ray.put(value_before)
      value_after = ray.get(objectid)
      self.assertEqual(value_before, value_after)

    for i in range(100):
      value_before = "h" * i
      objectid = ray.put(value_before)
      value_after = ray.get(objectid)
      self.assertEqual(value_before, value_after)

    for i in range(100):
      value_before = [1] * i
      objectid = ray.put(value_before)
      value_after = ray.get(objectid)
      self.assertEqual(value_before, value_after)

    ray.worker.cleanup()
예제 #2
0
파일: runtest.py 프로젝트: amplab/ray
  def testObjStore(self):
    node_ip_address = "127.0.0.1"
    scheduler_address = ray.services.start_ray_local(num_objstores=2, num_workers=0, worker_path=None)
    ray.connect(node_ip_address, scheduler_address, mode=ray.SCRIPT_MODE)
    objstore_addresses = [objstore_info["address"] for objstore_info in ray.scheduler_info()["objstores"]]
    w1 = ray.worker.Worker()
    w2 = ray.worker.Worker()
    ray.reusables._cached_reusables = [] # This is a hack to make the test run.
    ray.connect(node_ip_address, scheduler_address, objstore_address=objstore_addresses[0], mode=ray.SCRIPT_MODE, worker=w1)
    ray.reusables._cached_reusables = [] # This is a hack to make the test run.
    ray.connect(node_ip_address, scheduler_address, objstore_address=objstore_addresses[1], mode=ray.SCRIPT_MODE, worker=w2)

    for cls in [Foo, Bar, Baz, Qux, SubQux, Exception, CustomError, Point, NamedTupleExample]:
      ray.register_class(cls)

    # putting and getting an object shouldn't change it
    for data in RAY_TEST_OBJECTS:
      objectid = ray.put(data, w1)
      result = ray.get(objectid, w1)
      assert_equal(result, data)

    # putting an object, shipping it to another worker, and getting it shouldn't change it
    for data in RAY_TEST_OBJECTS:
      objectid = ray.put(data, w1)
      result = ray.get(objectid, w2)
      assert_equal(result, data)

    # putting an object, shipping it to another worker, and getting it shouldn't change it
    for data in RAY_TEST_OBJECTS:
      objectid = ray.put(data, w2)
      result = ray.get(objectid, w1)
      assert_equal(result, data)

    # This test fails. See https://github.com/ray-project/ray/issues/159.
    # getting multiple times shouldn't matter
    # for data in [np.zeros([10, 20]), np.random.normal(size=[45, 25]), np.zeros([10, 20], dtype=np.dtype("float64")), np.zeros([10, 20], dtype=np.dtype("float32")), np.zeros([10, 20], dtype=np.dtype("int64")), np.zeros([10, 20], dtype=np.dtype("int32"))]:
    #   objectid = worker.put(data, w1)
    #   result = worker.get(objectid, w2)
    #   result = worker.get(objectid, w2)
    #   result = worker.get(objectid, w2)
    #   assert_equal(result, data)

    # Getting a buffer after modifying it before it finishes should return updated buffer
    objectid = ray.libraylib.get_objectid(w1.handle)
    buf = ray.libraylib.allocate_buffer(w1.handle, objectid, 100)
    buf[0][0] = 1
    ray.libraylib.finish_buffer(w1.handle, objectid, buf[1], 0)
    completedbuffer = ray.libraylib.get_buffer(w1.handle, objectid)
    self.assertEqual(completedbuffer[0][0], 1)

    # We started multiple drivers manually, so we will disconnect them manually.
    ray.disconnect(worker=w1)
    ray.disconnect(worker=w2)
    ray.worker.cleanup()
예제 #3
0
def test_getting_and_putting(ray_start_sharded):
    for n in range(8):
        x = np.zeros(10**n)

        for _ in range(100):
            ray.put(x)

        x_id = ray.put(x)
        for _ in range(1000):
            ray.get(x_id)

    assert ray.services.remaining_processes_alive()
예제 #4
0
    def testGettingAndPutting(self):
        ray.init(num_workers=1)

        for n in range(8):
            x = np.zeros(10 ** n)

            for _ in range(100):
                ray.put(x)

            x_id = ray.put(x)
            for _ in range(1000):
                ray.get(x_id)

        self.assertTrue(ray.services.all_processes_alive())
        ray.worker.cleanup()
예제 #5
0
파일: async.py 프로젝트: adgirish/ray
    def step(self):
        weights = ray.put(self.local_evaluator.get_weights())
        gradient_queue = []
        num_gradients = 0

        # Kick off the first wave of async tasks
        for e in self.remote_evaluators:
            e.set_weights.remote(weights)
            fut = e.compute_gradients.remote(e.sample.remote())
            gradient_queue.append((fut, e))
            num_gradients += 1

        # Note: can't use wait: https://github.com/ray-project/ray/issues/1128
        while gradient_queue:
            with self.wait_timer:
                fut, e = gradient_queue.pop(0)
                gradient = ray.get(fut)

            if gradient is not None:
                with self.apply_timer:
                    self.local_evaluator.apply_gradients(gradient)

            if num_gradients < self.grads_per_step:
                with self.dispatch_timer:
                    e.set_weights.remote(self.local_evaluator.get_weights())
                    fut = e.compute_gradients.remote(e.sample.remote())
                    gradient_queue.append((fut, e))
                    num_gradients += 1

        self.num_steps_sampled += self.grads_per_step * self.batch_size
        self.num_steps_trained += self.grads_per_step * self.batch_size
예제 #6
0
파일: linalg.py 프로젝트: jamescasbon/ray
def modified_lu(q):
    """Perform a modified LU decomposition of a matrix.

    This takes a matrix q with orthonormal columns, returns l, u, s such that
    q - s = l * u.

    Args:
        q: A two dimensional orthonormal matrix q.

    Returns:
        A tuple of a lower triangular matrix l, an upper triangular matrix u,
            and a a vector representing a diagonal matrix s such that
            q - s = l * u.
    """
    q = q.assemble()
    m, b = q.shape[0], q.shape[1]
    S = np.zeros(b)

    q_work = np.copy(q)

    for i in range(b):
        S[i] = -1 * np.sign(q_work[i, i])
        q_work[i, i] -= S[i]
        # Scale ith column of L by diagonal element.
        q_work[(i + 1):m, i] /= q_work[i, i]
        # Perform Schur complement update.
        q_work[(i + 1):m, (i + 1):b] -= np.outer(q_work[(i + 1):m, i],
                                                 q_work[i, (i + 1):b])

    L = np.tril(q_work)
    for i in range(b):
        L[i, i] = 1
    U = np.triu(q_work)[:b, :]
    # TODO(rkn): Get rid of the put below.
    return ray.get(core.numpy_to_dist.remote(ray.put(L))), U, S
예제 #7
0
파일: runtest.py 프로젝트: amplab/ray
  def testRecursiveObjects(self):
    ray.init(start_ray_local=True, num_workers=0)

    class ClassA(object):
      pass

    ray.register_class(ClassA)

    # Make a list that contains itself.
    l = []
    l.append(l)
    # Make an object that contains itself as a field.
    a1 = ClassA()
    a1.field = a1
    # Make two objects that contain each other as fields.
    a2 = ClassA()
    a3 = ClassA()
    a2.field = a3
    a3.field = a2
    # Make a dictionary that contains itself.
    d1 = {}
    d1["key"] = d1
    # Create a list of recursive objects.
    recursive_objects = [l, a1, a2, a3, d1]

    # Check that exceptions are thrown when we serialize the recursive objects.
    for obj in recursive_objects:
      self.assertRaises(Exception, lambda : ray.put(obj))

    ray.worker.cleanup()
예제 #8
0
    def step(self):
        with self.update_weights_timer:
            if self.remote_evaluators:
                weights = ray.put(self.local_evaluator.get_weights())
                for e in self.remote_evaluators:
                    e.set_weights.remote(weights)

        with self.sample_timer:
            samples = []
            while sum(s.count for s in samples) < self.train_batch_size:
                if self.remote_evaluators:
                    samples.extend(
                        ray.get([
                            e.sample.remote() for e in self.remote_evaluators
                        ]))
                else:
                    samples.append(self.local_evaluator.sample())
            samples = SampleBatch.concat_samples(samples)
            self.sample_timer.push_units_processed(samples.count)

        with self.grad_timer:
            for i in range(self.num_sgd_iter):
                fetches = self.local_evaluator.compute_apply(samples)
                if "stats" in fetches:
                    self.learner_stats = fetches["stats"]
                if self.num_sgd_iter > 1:
                    logger.debug("{} {}".format(i, fetches))
            self.grad_timer.push_units_processed(samples.count)

        self.num_steps_sampled += samples.count
        self.num_steps_trained += samples.count
        return fetches
예제 #9
0
파일: linalg.py 프로젝트: amplab/ray
def modified_lu(q):
  """
  Algorithm 5 from http://www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-175.pdf
  takes a matrix q with orthonormal columns, returns l, u, s such that q - s = l * u
  arguments:
    q: a two dimensional orthonormal q
  return values:
    l: lower triangular
    u: upper triangular
    s: a diagonal matrix represented by its diagonal
  """
  q = q.assemble()
  m, b = q.shape[0], q.shape[1]
  S = np.zeros(b)

  q_work = np.copy(q)

  for i in range(b):
    S[i] = -1 * np.sign(q_work[i, i])
    q_work[i, i] -= S[i]
    q_work[(i + 1):m, i] /= q_work[i, i] # scale ith column of L by diagonal element
    q_work[(i + 1):m, (i + 1):b] -= np.outer(q_work[(i + 1):m, i], q_work[i, (i + 1):b]) # perform Schur complement update

  L = np.tril(q_work)
  for i in range(b):
    L[i, i] = 1
  U = np.triu(q_work)[:b, :]
  return ray.get(numpy_to_dist.remote(ray.put(L))), U, S # TODO(rkn): get rid of put
예제 #10
0
파일: core.py 프로젝트: amplab/ray
def numpy_to_dist(a):
  result = DistArray(a.shape)
  for index in np.ndindex(*result.num_blocks):
    lower = DistArray.compute_block_lower(index, a.shape)
    upper = DistArray.compute_block_upper(index, a.shape)
    result.objectids[index] = ray.put(a[[slice(l, u) for (l, u) in zip(lower, upper)]])
  return result
예제 #11
0
    def step(self):
        with self.update_weights_timer:
            if self.remote_evaluators:
                weights = ray.put(self.local_evaluator.get_weights())
                for e in self.remote_evaluators:
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.remote_evaluators:
                batches = ray.get(
                    [e.sample.remote() for e in self.remote_evaluators])
            else:
                batches = [self.local_evaluator.sample()]

            # Handle everything as if multiagent
            tmp = []
            for batch in batches:
                if isinstance(batch, SampleBatch):
                    batch = MultiAgentBatch({
                        DEFAULT_POLICY_ID: batch
                    }, batch.count)
                tmp.append(batch)
            batches = tmp

            for batch in batches:
                self.replay_buffer.append(batch)
                self.num_steps_sampled += batch.count
                self.buffer_size += batch.count
                while self.buffer_size > self.max_buffer_size:
                    evicted = self.replay_buffer.pop(0)
                    self.buffer_size -= evicted.count

        if self.num_steps_sampled >= self.replay_starts:
            self._optimize()
예제 #12
0
파일: runtest.py 프로젝트: amplab/ray
  def testRegisterClass(self):
    ray.init(start_ray_local=True, num_workers=0)

    # Check that putting an object of a class that has not been registered
    # throws an exception.
    class TempClass(object):
      pass
    self.assertRaises(Exception, lambda : ray.put(Foo))
    # Check that registering a class that Ray cannot serialize efficiently
    # raises an exception.
    self.assertRaises(Exception, lambda : ray.register_class(type(True)))
    # Check that registering the same class with pickle works.
    ray.register_class(type(float), pickle=True)
    self.assertEqual(ray.get(ray.put(float)), float)

    ray.worker.cleanup()
예제 #13
0
    def step(self):
        with self.update_weights_timer:
            if self.remote_evaluators:
                weights = ray.put(self.local_evaluator.get_weights())
                for e in self.remote_evaluators:
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.remote_evaluators:
                batch = SampleBatch.concat_samples(
                    ray.get(
                        [e.sample.remote() for e in self.remote_evaluators]))
            else:
                batch = self.local_evaluator.sample()

            # Handle everything as if multiagent
            if isinstance(batch, SampleBatch):
                batch = MultiAgentBatch({
                    DEFAULT_POLICY_ID: batch
                }, batch.count)

            for policy_id, s in batch.policy_batches.items():
                for row in s.rows():
                    self.replay_buffers[policy_id].add(
                        pack_if_needed(row["obs"]),
                        row["actions"],
                        row["rewards"],
                        pack_if_needed(row["new_obs"]),
                        row["dones"],
                        weight=None)

        if self.num_steps_sampled >= self.replay_starts:
            self._optimize()

        self.num_steps_sampled += batch.count
예제 #14
0
 def __setstate__(self, state):
     if "evaluator" in state:
         self.local_evaluator.restore(state["evaluator"])
         remote_state = ray.put(state["evaluator"])
         for r in self.remote_evaluators:
             r.restore.remote(remote_state)
     if "optimizer" in state:
         self.optimizer.restore(state["optimizer"])
예제 #15
0
    def Driver(success):
        success.value = True
        # Start driver.
        ray.init(redis_address=redis_address)
        summary_start = StateSummary()
        if (0, 1) != summary_start[:2]:
            success.value = False

        max_attempts_before_failing = 100

        # Two new objects.
        ray.get(ray.put(1111))
        ray.get(ray.put(1111))
        attempts = 0
        while (2, 1, summary_start[2]) != StateSummary():
            time.sleep(0.1)
            attempts += 1
            if attempts == max_attempts_before_failing:
                success.value = False
                break

        @ray.remote
        def f():
            ray.put(1111)  # Yet another object.
            return 1111  # A returned object as well.

        # 1 new function.
        attempts = 0
        while (2, 1, summary_start[2] + 1) != StateSummary():
            time.sleep(0.1)
            attempts += 1
            if attempts == max_attempts_before_failing:
                success.value = False
                break

        ray.get(f.remote())
        attempts = 0
        while (4, 2, summary_start[2] + 1) != StateSummary():
            time.sleep(0.1)
            attempts += 1
            if attempts == max_attempts_before_failing:
                success.value = False
                break

        ray.shutdown()
예제 #16
0
def test_cache(ray_start_regular):
    A = np.random.rand(1, 1000000)
    v = np.random.rand(1000000)
    A_id = ray.put(A)
    v_id = ray.put(v)
    a = time.time()
    for i in range(100):
        A.dot(v)
    b = time.time() - a
    c = time.time()
    for i in range(100):
        ray.get(A_id).dot(ray.get(v_id))
    d = time.time() - c

    if d > 1.5 * b:
        if os.getenv("TRAVIS") is None:
            raise Exception("The caching test was too slow. "
                            "d = {}, b = {}".format(d, b))
        else:
            print("WARNING: The caching test was too slow. "
                  "d = {}, b = {}".format(d, b))
예제 #17
0
파일: util.py 프로젝트: robertnishihara/ray
def pin_in_object_store(obj):
    """Pin an object in the object store.

    It will be available as long as the pinning process is alive. The pinned
    object can be retrieved by calling get_pinned_object on the identifier
    returned by this call.
    """

    obj_id = ray.put(_to_pinnable(obj))
    _pinned_objects.append(ray.get(obj_id))
    return "{}{}".format(PINNED_OBJECT_PREFIX,
                         base64.b64encode(obj_id.binary()).decode("utf-8"))
예제 #18
0
파일: utils.py 프로젝트: adgirish/ray
def from_pandas(df, npartitions=None, chunksize=None):
    """Converts a pandas DataFrame to a Ray DataFrame.

    Args:
        df (pandas.DataFrame): The pandas DataFrame to convert.
        npartitions (int): The number of partitions to split the DataFrame
            into. Has priority over chunksize.
        chunksize (int): The number of rows to put in each partition.

    Returns:
        A new Ray DataFrame object.
    """
    from .dataframe import DataFrame

    if npartitions is not None:
        chunksize = int(len(df) / npartitions)
    elif chunksize is None:
        raise ValueError("The number of partitions or chunksize must be set.")

    temp_df = df

    dataframes = []
    lengths = []
    while len(temp_df) > chunksize:
        t_df = temp_df[:chunksize]
        lengths.append(len(t_df))
        # reset_index here because we want a pd.RangeIndex
        # within the partitions. It is smaller and sometimes faster.
        t_df = t_df.reset_index(drop=True)
        top = ray.put(t_df)
        dataframes.append(top)
        temp_df = temp_df[chunksize:]
    else:
        temp_df = temp_df.reset_index(drop=True)
        dataframes.append(ray.put(temp_df))
        lengths.append(len(temp_df))

    return DataFrame(dataframes, df.columns, index=df.index)
예제 #19
0
파일: ars.py 프로젝트: zhan0903/ARS
    def train(self, num_iter):

        start = time.time()
        for i in range(num_iter):
            
            t1 = time.time()
            self.train_step()
            t2 = time.time()
            print('total time of one step', t2 - t1)           
            print('iter ', i,' done')

            # record statistics every 10 iterations
            if ((i + 1) % 10 == 0):
                
                rewards = self.aggregate_rollouts(num_rollouts = 100, evaluate = True)
                w = ray.get(self.workers[0].get_weights_plus_stats.remote())
                np.savez(self.logdir + "/lin_policy_plus", w)
                
                print(sorted(self.params.items()))
                logz.log_tabular("Time", time.time() - start)
                logz.log_tabular("Iteration", i + 1)
                logz.log_tabular("AverageReward", np.mean(rewards))
                logz.log_tabular("StdRewards", np.std(rewards))
                logz.log_tabular("MaxRewardRollout", np.max(rewards))
                logz.log_tabular("MinRewardRollout", np.min(rewards))
                logz.log_tabular("timesteps", self.timesteps)
                logz.dump_tabular()
                
            t1 = time.time()
            # get statistics from all workers
            for j in range(self.num_workers):
                self.policy.observation_filter.update(ray.get(self.workers[j].get_filter.remote()))
            self.policy.observation_filter.stats_increment()

            # make sure master filter buffer is clear
            self.policy.observation_filter.clear_buffer()
            # sync all workers
            filter_id = ray.put(self.policy.observation_filter)
            setting_filters_ids = [worker.sync_filter.remote(filter_id) for worker in self.workers]
            # waiting for sync of all workers
            ray.get(setting_filters_ids)
         
            increment_filters_ids = [worker.stats_increment.remote() for worker in self.workers]
            # waiting for increment of all workers
            ray.get(increment_filters_ids)            
            t2 = time.time()
            print('Time to sync statistics:', t2 - t1)
                        
        return 
예제 #20
0
    def _step(self):
        sample_timesteps, train_timesteps = 0, 0
        weights = None

        with self.timers["sample_processing"]:
            completed = list(self.sample_tasks.completed())
            counts = ray.get([c[1][1] for c in completed])
            for i, (ev, (sample_batch, count)) in enumerate(completed):
                sample_timesteps += counts[i]

                # Send the data to the replay buffer
                random.choice(
                    self.replay_actors).add_batch.remote(sample_batch)

                # Update weights if needed
                self.steps_since_update[ev] += counts[i]
                if self.steps_since_update[ev] >= self.max_weight_sync_delay:
                    # Note that it's important to pull new weights once
                    # updated to avoid excessive correlation between actors
                    if weights is None or self.learner.weights_updated:
                        self.learner.weights_updated = False
                        with self.timers["put_weights"]:
                            weights = ray.put(
                                self.local_evaluator.get_weights())
                    ev.set_weights.remote(weights)
                    self.num_weight_syncs += 1
                    self.steps_since_update[ev] = 0

                # Kick off another sample request
                self.sample_tasks.add(ev, ev.sample_with_count.remote())

        with self.timers["replay_processing"]:
            for ra, replay in self.replay_tasks.completed():
                self.replay_tasks.add(ra, ra.replay.remote())
                if self.learner.inqueue.full():
                    self.num_samples_dropped += 1
                else:
                    with self.timers["get_samples"]:
                        samples = ray.get(replay)
                    # Defensive copy against plasma crashes, see #2610 #3452
                    self.learner.inqueue.put((ra, samples and samples.copy()))

        with self.timers["update_priorities"]:
            while not self.learner.outqueue.empty():
                ra, prio_dict, count = self.learner.outqueue.get()
                ra.update_priorities.remote(prio_dict)
                train_timesteps += count

        return sample_timesteps, train_timesteps
예제 #21
0
파일: runtest.py 프로젝트: amplab/ray
  def testGet(self):
    ray.init(start_ray_local=True, num_workers=3)

    for cls in [Foo, Bar, Baz, Qux, SubQux, Exception, CustomError, Point, NamedTupleExample]:
      ray.register_class(cls)

    # Remote objects should be deallocated when the corresponding ObjectID goes
    # out of scope, and all results of ray.get called on the ID go out of scope.
    for val in RAY_TEST_OBJECTS:
      x = ray.put(val)
      objectid = x.id
      xval = ray.get(x)
      del x, xval
      self.assertEqual(ray.scheduler_info()["reference_counts"][objectid], -1)

    # Remote objects that do not contain numpy arrays should be deallocated when
    # the corresponding ObjectID goes out of scope, even if ray.get has been
    # called on the ObjectID.
    for val in [True, False, None, 1, 1.0, 1L, "hi", u"hi", [1, 2, 3], (1, 2, 3), [(), {(): ()}]]:
      x = ray.put(val)
      objectid = x.id
      xval = ray.get(x)
      del x
      self.assertEqual(ray.scheduler_info()["reference_counts"][objectid], -1)
예제 #22
0
def train():
    num_gpus = FLAGS.num_gpus
    if FLAGS.redis_address is None:
        ray.init(num_gpus=num_gpus)
    else:
        ray.init(redis_address=FLAGS.redis_address)
    train_data = get_data.remote(FLAGS.train_data_path, 50000, FLAGS.dataset)
    test_data = get_data.remote(FLAGS.eval_data_path, 10000, FLAGS.dataset)
    # Creates an actor for each gpu, or one if only using the cpu. Each actor
    # has access to the dataset.
    if FLAGS.num_gpus > 0:
        train_actors = [
            ResNetTrainActor.remote(train_data, FLAGS.dataset, num_gpus)
            for _ in range(num_gpus)
        ]
    else:
        train_actors = [ResNetTrainActor.remote(train_data, FLAGS.dataset, 0)]
    test_actor = ResNetTestActor.remote(test_data, FLAGS.dataset,
                                        FLAGS.eval_batch_count, FLAGS.eval_dir)
    print("The log files for tensorboard are stored at ip {}.".format(
        ray.get(test_actor.get_ip_addr.remote())))
    step = 0
    weight_id = train_actors[0].get_weights.remote()
    acc_id = test_actor.accuracy.remote(weight_id, step)
    # Correction for dividing the weights by the number of gpus.
    if num_gpus == 0:
        num_gpus = 1
    print("Starting training loop. Use Ctrl-C to exit.")
    try:
        while True:
            all_weights = ray.get([
                actor.compute_steps.remote(weight_id) for actor in train_actors
            ])
            mean_weights = {
                k: (sum(weights[k] for weights in all_weights) / num_gpus)
                for k in all_weights[0]
            }
            weight_id = ray.put(mean_weights)
            step += 10
            if step % 200 == 0:
                # Retrieves the previously computed accuracy and launches a new
                # testing task with the current weights every 200 steps.
                acc = ray.get(acc_id)
                acc_id = test_actor.accuracy.remote(weight_id, step)
                print("Step {}: {:.6f}".format(step - 200, acc))
    except KeyboardInterrupt:
        pass
예제 #23
0
    def _step(self):
        sample_timesteps, train_timesteps = 0, 0
        num_sent = 0
        weights = None

        for ev, sample_batch in self._augment_with_replay(
                self.sample_tasks.completed_prefetch()):
            self.batch_buffer.append(sample_batch)
            if sum(b.count
                   for b in self.batch_buffer) >= self.train_batch_size:
                train_batch = self.batch_buffer[0].concat_samples(
                    self.batch_buffer)
                self.learner.inqueue.put(train_batch)
                self.batch_buffer = []

            # If the batch was replayed, skip the update below.
            if ev is None:
                continue

            sample_timesteps += sample_batch.count

            # Put in replay buffer if enabled
            if self.replay_buffer_num_slots > 0:
                self.replay_batches.append(sample_batch)
                if len(self.replay_batches) > self.replay_buffer_num_slots:
                    self.replay_batches.pop(0)

            # Note that it's important to pull new weights once
            # updated to avoid excessive correlation between actors
            if weights is None or (self.learner.weights_updated
                                   and num_sent >= self.broadcast_interval):
                self.learner.weights_updated = False
                weights = ray.put(self.local_evaluator.get_weights())
                num_sent = 0
            ev.set_weights.remote(weights)
            self.num_weight_syncs += 1
            num_sent += 1

            # Kick off another sample request
            self.sample_tasks.add(ev, ev.sample.remote())

        while not self.learner.outqueue.empty():
            count = self.learner.outqueue.get()
            train_timesteps += count

        return sample_timesteps, train_timesteps
예제 #24
0
    def synchronize(local_filters, remotes):
        """Aggregates all filters from remote evaluators.

        Local copy is updated and then broadcasted to all remote evaluators.

        Args:
            local_filters (dict): Filters to be synchronized.
            remotes (list): Remote evaluators with filters.
        """
        remote_filters = ray.get(
            [r.get_filters.remote(flush_after=True) for r in remotes])
        for rf in remote_filters:
            for k in local_filters:
                local_filters[k].apply_changes(rf[k], with_buffer=False)
        copies = {k: v.as_serializable() for k, v in local_filters.items()}
        remote_copy = ray.put(copies)
        [r.sync_filters.remote(remote_copy) for r in remotes]
예제 #25
0
        def put_task():
            # Launch num_objects instances of the remote task, each dependent
            # on the one before it. The result of the first task should get
            # evicted.
            args = []
            arg = ray.put(np.zeros(object_size, dtype=np.uint8))
            for i in range(num_objects):
                arg = single_dependency.remote(i, arg)
                args.append(arg)

            # Get the last value to force all tasks to finish.
            value = ray.get(args[-1])
            assert value[0] == i

            # Get the first value (which should have been evicted) to force
            # reconstruction. Currently, since we're not able to reconstruct
            # `ray.put` objects that were evicted and whose originating tasks
            # are still running, this for-loop should hang and push an error to
            # the driver.
            ray.get(args[0])
예제 #26
0
파일: util.py 프로젝트: jamescasbon/ray
def warmup():
    logger.info("Warming up object store")
    zeros = np.zeros(int(100e6 / 8), dtype=np.float64)
    start = time.time()
    for _ in range(10):
        ray.put(zeros)
    logger.info("Initial latency for 100MB put {}".format(
        (time.time() - start) / 10))
    for _ in range(5):
        for _ in range(100):
            ray.put(zeros)
        start = time.time()
        for _ in range(10):
            ray.put(zeros)
        logger.info("Warmed up latency for 100MB put {}".format(
            (time.time() - start) / 10))
예제 #27
0
파일: local_sync.py 프로젝트: adgirish/ray
    def step(self):
        with self.update_weights_timer:
            if self.remote_evaluators:
                weights = ray.put(self.local_evaluator.get_weights())
                for e in self.remote_evaluators:
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.remote_evaluators:
                samples = SampleBatch.concat_samples(
                    ray.get(
                        [e.sample.remote() for e in self.remote_evaluators]))
            else:
                samples = self.local_evaluator.sample()

        with self.grad_timer:
            grad = self.local_evaluator.compute_gradients(samples)
            self.local_evaluator.apply_gradients(grad)
            self.grad_timer.push_units_processed(samples.count)

        self.num_steps_sampled += samples.count
        self.num_steps_trained += samples.count
예제 #28
0
파일: runtest.py 프로젝트: amplab/ray
  def testPythonMode(self):
    reload(test_functions)
    ray.init(start_ray_local=True, driver_mode=ray.PYTHON_MODE)

    @ray.remote
    def f():
      return np.ones([3, 4, 5])
    xref = f.remote()
    assert_equal(xref, np.ones([3, 4, 5])) # remote functions should return by value
    assert_equal(xref, ray.get(xref)) # ray.get should be the identity
    y = np.random.normal(size=[11, 12])
    assert_equal(y, ray.put(y)) # ray.put should be the identity

    # make sure objects are immutable, this example is why we need to copy
    # arguments before passing them into remote functions in python mode
    aref = test_functions.python_mode_f.remote()
    assert_equal(aref, np.array([0, 0]))
    bref = test_functions.python_mode_g.remote(aref)
    assert_equal(aref, np.array([0, 0])) # python_mode_g should not mutate aref
    assert_equal(bref, np.array([1, 0]))

    ray.worker.cleanup()
예제 #29
0
    def step(self):
        with self.update_weights_timer:
            if self.remote_evaluators:
                weights = ray.put(self.local_evaluator.get_weights())
                for e in self.remote_evaluators:
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.remote_evaluators:
                batch = SampleBatch.concat_samples(
                    ray.get(
                        [e.sample.remote() for e in self.remote_evaluators]))
            else:
                batch = self.local_evaluator.sample()
            for row in batch.rows():
                self.replay_buffer.add(
                    row["obs"], row["actions"], row["rewards"], row["new_obs"],
                    row["dones"], row["weights"])

        if len(self.replay_buffer) >= self.replay_starts:
            self._optimize()

        self.num_steps_sampled += batch.count
예제 #30
0
파일: multi_gpu.py 프로젝트: adgirish/ray
    def step(self):
        with self.update_weights_timer:
            if self.remote_evaluators:
                weights = ray.put(self.local_evaluator.get_weights())
                for e in self.remote_evaluators:
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.remote_evaluators:
                samples = SampleBatch.concat_samples(
                    ray.get(
                        [e.sample.remote() for e in self.remote_evaluators]))
            else:
                samples = self.local_evaluator.sample()
            assert isinstance(samples, SampleBatch)

        with self.load_timer:
            tuples_per_device = self.par_opt.load_data(
                self.local_evaluator.sess,
                samples.columns([key for key, _ in self.loss_inputs]))

        with self.grad_timer:
            for i in range(self.num_sgd_iter):
                batch_index = 0
                num_batches = (
                    int(tuples_per_device) // int(self.per_device_batch_size))
                permutation = np.random.permutation(num_batches)
                while batch_index < num_batches:
                    # TODO(ekl) support ppo's debugging features, e.g.
                    # printing the current loss and tracing
                    self.par_opt.optimize(
                        self.sess,
                        permutation[batch_index] * self.per_device_batch_size)
                    batch_index += 1

        self.num_steps_sampled += samples.count
        self.num_steps_trained += samples.count
예제 #31
0
def test_workflow_storage(workflow_start_regular):
    workflow_id = test_workflow_storage.__name__
    wf_storage = workflow_storage.WorkflowStorage(workflow_id,
                                                  storage.get_global_storage())
    step_id = "some_step"
    input_metadata = {
        "name": "test_basic_workflows.append1",
        "step_type": StepType.FUNCTION,
        "object_refs": ["abc"],
        "workflows": ["def"],
        "workflow_refs": ["some_ref"],
        "max_retries": 1,
        "catch_exceptions": False,
        "ray_options": {},
    }
    output_metadata = {
        "output_step_id": "a12423",
        "dynamic_output_step_id": "b1234"
    }
    flattened_args = [
        signature.DUMMY_TYPE, 1, signature.DUMMY_TYPE, "2", "k", b"543"
    ]
    args = signature.recover_args(flattened_args)
    output = ["the_answer"]
    object_resolved = 42
    obj_ref = ray.put(object_resolved)

    # test basics
    asyncio_run(
        wf_storage._put(wf_storage._key_step_input_metadata(step_id),
                        input_metadata, True))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_function_body(step_id),
                        some_func))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_args(step_id), flattened_args))

    asyncio_run(
        wf_storage._put(wf_storage._key_obj_id(obj_ref.hex()),
                        ray.get(obj_ref)))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_output_metadata(step_id),
                        output_metadata, True))
    asyncio_run(wf_storage._put(wf_storage._key_step_output(step_id), output))

    assert wf_storage.load_step_output(step_id) == output
    assert wf_storage.load_step_args(step_id, [], [], []) == args
    assert wf_storage.load_step_func_body(step_id)(33) == 34
    assert ray.get(wf_storage.load_object_ref(
        obj_ref.hex())) == object_resolved

    # test "inspect_step"
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        output_object_valid=True)
    assert inspect_result.is_recoverable()

    step_id = "some_step2"
    asyncio_run(
        wf_storage._put(wf_storage._key_step_input_metadata(step_id),
                        input_metadata, True))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_function_body(step_id),
                        some_func))
    asyncio_run(wf_storage._put(wf_storage._key_step_args(step_id), args))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_output_metadata(step_id),
                        output_metadata, True))

    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        output_step_id=output_metadata["dynamic_output_step_id"])
    assert inspect_result.is_recoverable()

    step_id = "some_step3"
    asyncio_run(
        wf_storage._put(wf_storage._key_step_input_metadata(step_id),
                        input_metadata, True))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_function_body(step_id),
                        some_func))
    asyncio_run(wf_storage._put(wf_storage._key_step_args(step_id), args))
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        step_type=StepType.FUNCTION,
        args_valid=True,
        func_body_valid=True,
        object_refs=input_metadata["object_refs"],
        workflows=input_metadata["workflows"],
        workflow_refs=input_metadata["workflow_refs"],
        ray_options={})
    assert inspect_result.is_recoverable()

    step_id = "some_step4"
    asyncio_run(
        wf_storage._put(wf_storage._key_step_input_metadata(step_id),
                        input_metadata, True))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_function_body(step_id),
                        some_func))
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        step_type=StepType.FUNCTION,
        func_body_valid=True,
        object_refs=input_metadata["object_refs"],
        workflows=input_metadata["workflows"],
        workflow_refs=input_metadata["workflow_refs"],
        ray_options={})
    assert not inspect_result.is_recoverable()

    step_id = "some_step5"
    asyncio_run(
        wf_storage._put(wf_storage._key_step_input_metadata(step_id),
                        input_metadata, True))
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        step_type=StepType.FUNCTION,
        object_refs=input_metadata["object_refs"],
        workflows=input_metadata["workflows"],
        workflow_refs=input_metadata["workflow_refs"],
        ray_options={})
    assert not inspect_result.is_recoverable()

    step_id = "some_step6"
    inspect_result = wf_storage.inspect_step(step_id)
    print(inspect_result)
    assert inspect_result == workflow_storage.StepInspectResult()
    assert not inspect_result.is_recoverable()
예제 #32
0
파일: ray_perf.py 프로젝트: stjordanis/ray
 def put_small():
     ray.put(0)
예제 #33
0
파일: ray_perf.py 프로젝트: stjordanis/ray
 def put_large():
     ray.put(arr)
예제 #34
0
파일: ray_perf.py 프로젝트: stjordanis/ray
 def small_value_batch_arg(self, n):
     x = ray.put(0)
     results = []
     for s in self.servers:
         results.extend([s.small_value_arg.remote(x) for _ in range(n)])
     ray.get(results)
예제 #35
0
파일: ray_perf.py 프로젝트: stjordanis/ray
def main(results=None):
    results = results or []

    check_optimized_build()

    print("Tip: set TESTS_TO_RUN='pattern' to run a subset of benchmarks")

    ray.init()

    value = ray.put(0)

    def get_small():
        ray.get(value)

    def put_small():
        ray.put(0)

    @ray.remote
    def do_put_small():
        for _ in range(100):
            ray.put(0)

    def put_multi_small():
        ray.get([do_put_small.remote() for _ in range(10)])

    arr = np.zeros(100 * 1024 * 1024, dtype=np.int64)

    results += timeit("single client get calls (Plasma Store)", get_small)

    results += timeit("single client put calls (Plasma Store)", put_small)

    results += timeit("multi client put calls (Plasma Store)", put_multi_small,
                      1000)

    def put_large():
        ray.put(arr)

    results += timeit("single client put gigabytes", put_large, 8 * 0.1)

    def small_value_batch():
        submitted = [small_value.remote() for _ in range(1000)]
        ray.get(submitted)
        return 0

    results += timeit("single client tasks and get batch", small_value_batch)

    @ray.remote
    def do_put():
        for _ in range(10):
            ray.put(np.zeros(10 * 1024 * 1024, dtype=np.int64))

    def put_multi():
        ray.get([do_put.remote() for _ in range(10)])

    results += timeit("multi client put gigabytes", put_multi, 10 * 8 * 0.1)

    obj_containing_ref = create_object_containing_ref.remote()

    def get_containing_object_ref():
        ray.get(obj_containing_ref)

    results += timeit("single client get object containing 10k refs",
                      get_containing_object_ref)

    def small_task():
        ray.get(small_value.remote())

    results += timeit("single client tasks sync", small_task)

    def small_task_async():
        ray.get([small_value.remote() for _ in range(1000)])

    results += timeit("single client tasks async", small_task_async, 1000)

    n = 10000
    m = 4
    actors = [Actor.remote() for _ in range(m)]

    def multi_task():
        submitted = [a.small_value_batch.remote(n) for a in actors]
        ray.get(submitted)

    results += timeit("multi client tasks async", multi_task, n * m)

    a = Actor.remote()

    def actor_sync():
        ray.get(a.small_value.remote())

    results += timeit("1:1 actor calls sync", actor_sync)

    a = Actor.remote()

    def actor_async():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("1:1 actor calls async", actor_async, 1000)

    a = Actor.options(max_concurrency=16).remote()

    def actor_concurrent():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("1:1 actor calls concurrent", actor_concurrent, 1000)

    n = 5000
    n_cpu = multiprocessing.cpu_count() // 2
    actors = [Actor._remote() for _ in range(n_cpu)]
    client = Client.remote(actors)

    def actor_async_direct():
        ray.get(client.small_value_batch.remote(n))

    results += timeit("1:n actor calls async", actor_async_direct,
                      n * len(actors))

    n_cpu = multiprocessing.cpu_count() // 2
    a = [Actor.remote() for _ in range(n_cpu)]

    @ray.remote
    def work(actors):
        ray.get([actors[i % n_cpu].small_value.remote() for i in range(n)])

    def actor_multi2():
        ray.get([work.remote(a) for _ in range(m)])

    results += timeit("n:n actor calls async", actor_multi2, m * n)

    n = 1000
    actors = [Actor._remote() for _ in range(n_cpu)]
    clients = [Client.remote(a) for a in actors]

    def actor_multi2_direct_arg():
        ray.get([c.small_value_batch_arg.remote(n) for c in clients])

    results += timeit("n:n actor calls with arg async",
                      actor_multi2_direct_arg, n * len(clients))

    a = AsyncActor.remote()

    def actor_sync():
        ray.get(a.small_value.remote())

    results += timeit("1:1 async-actor calls sync", actor_sync)

    a = AsyncActor.remote()

    def async_actor():
        ray.get([a.small_value.remote() for _ in range(1000)])

    results += timeit("1:1 async-actor calls async", async_actor, 1000)

    a = AsyncActor.remote()

    def async_actor():
        ray.get([a.small_value_with_arg.remote(i) for i in range(1000)])

    results += timeit("1:1 async-actor calls with args async", async_actor,
                      1000)

    n = 5000
    n_cpu = multiprocessing.cpu_count() // 2
    actors = [AsyncActor.remote() for _ in range(n_cpu)]
    client = Client.remote(actors)

    def async_actor_async():
        ray.get(client.small_value_batch.remote(n))

    results += timeit("1:n async-actor calls async", async_actor_async,
                      n * len(actors))

    n = 5000
    m = 4
    n_cpu = multiprocessing.cpu_count() // 2
    a = [AsyncActor.remote() for _ in range(n_cpu)]

    @ray.remote
    def async_actor_work(actors):
        ray.get([actors[i % n_cpu].small_value.remote() for i in range(n)])

    def async_actor_multi():
        ray.get([async_actor_work.remote(a) for _ in range(m)])

    results += timeit("n:n async-actor calls async", async_actor_multi, m * n)
    ray.shutdown()

    NUM_PGS = 100
    NUM_BUNDLES = 1
    ray.init(resources={"custom": 100})

    def placement_group_create_removal(num_pgs):
        pgs = [
            ray.util.placement_group(bundles=[{
                "custom": 0.001
            } for _ in range(NUM_BUNDLES)]) for _ in range(num_pgs)
        ]
        [pg.wait(timeout_seconds=30) for pg in pgs]
        # Include placement group removal here to clean up.
        # If we don't clean up placement groups, the whole performance
        # gets slower as it runs more.
        # Since timeit function runs multiple times without
        # the cleaning logic, we should have this method here.
        for pg in pgs:
            ray.util.remove_placement_group(pg)

    results += timeit("placement group create/removal",
                      lambda: placement_group_create_removal(NUM_PGS), NUM_PGS)
    ray.shutdown()

    client_microbenchmark_main(results)

    return results
예제 #36
0
파일: ppo.py 프로젝트: zwhinmedia/ray
    def _train(self):
        agents = self.remote_evaluators
        config = self.config
        model = self.local_evaluator

        print("===> iteration", self.iteration)

        iter_start = time.time()
        weights = ray.put(model.get_weights())
        [a.set_weights.remote(weights) for a in agents]
        samples = collect_samples(agents, config, self.local_evaluator)

        def standardized(value):
            # Divide by the maximum of value.std() and 1e-4
            # to guard against the case where all values are equal
            return (value - value.mean()) / max(1e-4, value.std())

        samples.data["advantages"] = standardized(samples["advantages"])

        rollouts_end = time.time()
        print("Computing policy (iterations=" + str(config["num_sgd_iter"]) +
              ", stepsize=" + str(config["sgd_stepsize"]) + "):")
        names = [
            "iter", "total loss", "policy loss", "vf loss", "kl", "entropy"
        ]
        print(("{:>15}" * len(names)).format(*names))
        samples.shuffle()
        shuffle_end = time.time()
        tuples_per_device = model.load_data(
            samples, self.iteration == 0 and config["full_trace_data_load"])
        load_end = time.time()
        rollouts_time = rollouts_end - iter_start
        shuffle_time = shuffle_end - rollouts_end
        load_time = load_end - shuffle_end
        sgd_time = 0
        for i in range(config["num_sgd_iter"]):
            sgd_start = time.time()
            batch_index = 0
            num_batches = (int(tuples_per_device) //
                           int(model.per_device_batch_size))
            loss, policy_loss, vf_loss, kl, entropy = [], [], [], [], []
            permutation = np.random.permutation(num_batches)
            # Prepare to drop into the debugger
            if self.iteration == config["tf_debug_iteration"]:
                model.sess = tf_debug.LocalCLIDebugWrapperSession(model.sess)
            while batch_index < num_batches:
                full_trace = (i == 0 and self.iteration == 0 and batch_index
                              == config["full_trace_nth_sgd_batch"])
                batch_loss, batch_policy_loss, batch_vf_loss, batch_kl, \
                    batch_entropy = model.run_sgd_minibatch(
                        permutation[batch_index] * model.per_device_batch_size,
                        self.kl_coeff, full_trace,
                        self.file_writer)
                loss.append(batch_loss)
                policy_loss.append(batch_policy_loss)
                vf_loss.append(batch_vf_loss)
                kl.append(batch_kl)
                entropy.append(batch_entropy)
                batch_index += 1
            loss = np.mean(loss)
            policy_loss = np.mean(policy_loss)
            vf_loss = np.mean(vf_loss)
            kl = np.mean(kl)
            entropy = np.mean(entropy)
            sgd_end = time.time()
            print("{:>15}{:15.5e}{:15.5e}{:15.5e}{:15.5e}{:15.5e}".format(
                i, loss, policy_loss, vf_loss, kl, entropy))

            values = []
            if i == config["num_sgd_iter"] - 1:
                metric_prefix = "ppo/sgd/final_iter/"
                values.append(
                    tf.Summary.Value(tag=metric_prefix + "kl_coeff",
                                     simple_value=self.kl_coeff))
                values.extend([
                    tf.Summary.Value(tag=metric_prefix + "mean_entropy",
                                     simple_value=entropy),
                    tf.Summary.Value(tag=metric_prefix + "mean_loss",
                                     simple_value=loss),
                    tf.Summary.Value(tag=metric_prefix + "mean_kl",
                                     simple_value=kl)
                ])
                if self.file_writer:
                    sgd_stats = tf.Summary(value=values)
                    self.file_writer.add_summary(sgd_stats, self.global_step)
            self.global_step += 1
            sgd_time += sgd_end - sgd_start
        if kl > 2.0 * config["kl_target"]:
            self.kl_coeff *= 1.5
        elif kl < 0.5 * config["kl_target"]:
            self.kl_coeff *= 0.5

        info = {
            "kl_divergence": kl,
            "kl_coefficient": self.kl_coeff,
            "rollouts_time": rollouts_time,
            "shuffle_time": shuffle_time,
            "load_time": load_time,
            "sgd_time": sgd_time,
            "sample_throughput": len(samples["observations"]) / sgd_time
        }

        FilterManager.synchronize(self.local_evaluator.filters,
                                  self.remote_evaluators)
        res = self._fetch_metrics_from_remote_evaluators()
        res = res._replace(info=info)

        return res
예제 #37
0
파일: ars.py 프로젝트: sgillen/ARS
    def aggregate_rollouts(self, num_rollouts=None, evaluate=False):
        """ 
        Aggregate update step from rollouts generated in parallel.
        """

        if num_rollouts is None:
            num_deltas = self.num_deltas
        else:
            num_deltas = num_rollouts

        # put policy weights in the object store
        policy_id = ray.put(self.w_policy)

        t1 = time.time()
        num_rollouts = int(num_deltas / self.num_workers)

        # parallel generation of rollouts
        rollout_ids_one = [
            worker.do_rollouts.remote(policy_id,
                                      num_rollouts=num_rollouts,
                                      shift=self.shift,
                                      evaluate=evaluate)
            for worker in self.workers
        ]

        rollout_ids_two = [
            worker.do_rollouts.remote(policy_id,
                                      num_rollouts=1,
                                      shift=self.shift,
                                      evaluate=evaluate)
            for worker in self.workers[:(num_deltas % self.num_workers)]
        ]

        # gather results
        results_one = ray.get(rollout_ids_one)
        results_two = ray.get(rollout_ids_two)

        rollout_rewards, deltas_idx = [], []

        for result in results_one:
            if not evaluate:
                self.timesteps += result["steps"]
            deltas_idx += result['deltas_idx']
            rollout_rewards += result['rollout_rewards']

        for result in results_two:
            if not evaluate:
                self.timesteps += result["steps"]
            deltas_idx += result['deltas_idx']
            rollout_rewards += result['rollout_rewards']

        deltas_idx = np.array(deltas_idx)
        rollout_rewards = np.array(rollout_rewards, dtype=np.float64)

        print('Maximum reward of collected rollouts:', rollout_rewards.max())
        t2 = time.time()

        print('Time to generate rollouts:', t2 - t1)

        if evaluate:
            return rollout_rewards

        # select top performing directions if deltas_used < num_deltas
        max_rewards = np.max(rollout_rewards, axis=1)
        if self.deltas_used > self.num_deltas:
            self.deltas_used = self.num_deltas

        idx = np.arange(max_rewards.size)[max_rewards >= np.percentile(
            max_rewards, 100 * (1 - (self.deltas_used / self.num_deltas)))]
        deltas_idx = deltas_idx[idx]
        rollout_rewards = rollout_rewards[idx, :]

        # normalize rewards by their standard deviation
        rollout_rewards /= np.std(rollout_rewards)

        t1 = time.time()
        # aggregate rollouts to form g_hat, the gradient used to compute SGD step
        g_hat, count = utils.batched_weighted_sum(
            rollout_rewards[:, 0] - rollout_rewards[:, 1],
            (self.deltas.get(idx, self.w_policy.size) for idx in deltas_idx),
            batch_size=500)
        g_hat /= deltas_idx.size
        t2 = time.time()
        print('time to aggregate rollouts', t2 - t1)
        return g_hat
예제 #38
0
 def __init__(self):
     self.loop = self
     self.large_object = ray.put(
         np.zeros(40 * 1024 * 1024, dtype=np.uint8))
예제 #39
0
def test_global_gc_when_full(shutdown_only):
    cluster = ray.cluster_utils.Cluster()
    for _ in range(2):
        cluster.add_node(num_cpus=1,
                         num_gpus=0,
                         object_store_memory=100 * 1024 * 1024)
    ray.init(address=cluster.address)

    class LargeObjectWithCyclicRef:
        def __init__(self):
            self.loop = self
            self.large_object = ray.put(
                np.zeros(40 * 1024 * 1024, dtype=np.uint8))

    @ray.remote(num_cpus=1)
    class GarbageHolder:
        def __init__(self):
            gc.disable()
            x = LargeObjectWithCyclicRef()
            self.garbage = weakref.ref(x)

        def has_garbage(self):
            return self.garbage() is not None

        def return_large_array(self):
            return np.zeros(80 * 1024 * 1024, dtype=np.uint8)

    try:
        gc.disable()

        # Local driver.
        local_ref = weakref.ref(LargeObjectWithCyclicRef())

        # Remote workers.
        actors = [GarbageHolder.remote() for _ in range(2)]
        assert local_ref() is not None
        assert all(ray.get([a.has_garbage.remote() for a in actors]))

        # GC should be triggered for all workers, including the local driver,
        # when the driver tries to ray.put a value that doesn't fit in the
        # object store. This should cause the captured ObjectRefs' numpy arrays
        # to be evicted.
        ray.put(np.zeros(80 * 1024 * 1024, dtype=np.uint8))

        def check_refs_gced():
            return (local_ref() is None and
                    not any(ray.get([a.has_garbage.remote() for a in actors])))

        wait_for_condition(check_refs_gced)

        # Local driver.
        local_ref = weakref.ref(LargeObjectWithCyclicRef())

        # Remote workers.
        actors = [GarbageHolder.remote() for _ in range(2)]
        assert all(ray.get([a.has_garbage.remote() for a in actors]))

        # GC should be triggered for all workers, including the local driver,
        # when a remote task tries to put a return value that doesn't fit in
        # the object store. This should cause the captured ObjectRefs' numpy
        # arrays to be evicted.
        ray.get(actors[0].return_large_array.remote())

        def check_refs_gced():
            return (local_ref() is None and
                    not any(ray.get([a.has_garbage.remote() for a in actors])))

        wait_for_condition(check_refs_gced)
    finally:
        gc.enable()
예제 #40
0
 def __init__(self):
     print("I also log a line")
     self.obj_ref = ray.put([1, 2, 3])
예제 #41
0
 def put(cls, obj):
     return OmnisciOnRayFramePartition(
         object_id=ray.put(obj),
         length=len(obj.index),
         width=len(obj.columns),
     )
예제 #42
0
def test_object_broadcast(ray_start_cluster_with_resource):
    cluster, num_nodes = ray_start_cluster_with_resource

    @ray.remote
    def f(x):
        return

    x = np.zeros(1024 * 1024, dtype=np.uint8)

    @ray.remote
    def create_object():
        return np.zeros(1024 * 1024, dtype=np.uint8)

    object_refs = []

    for _ in range(3):
        # Broadcast an object to all machines.
        x_id = ray.put(x)
        object_refs.append(x_id)
        ray.get([
            f._remote(args=[x_id], resources={str(i % num_nodes): 1})
            for i in range(10 * num_nodes)
        ])

    for _ in range(3):
        # Broadcast an object to all machines.
        x_id = create_object.remote()
        object_refs.append(x_id)
        ray.get([
            f._remote(args=[x_id], resources={str(i % num_nodes): 1})
            for i in range(10 * num_nodes)
        ])

    # Wait for profiling information to be pushed to the profile table.
    time.sleep(1)
    transfer_events = ray.state.object_transfer_timeline()

    # Make sure that each object was transferred a reasonable number of times.
    for x_id in object_refs:
        relevant_events = [
            event for event in transfer_events
            if event["cat"] == "transfer_send"
            and event["args"][0] == x_id.hex() and event["args"][2] == 1
        ]

        # NOTE: Each event currently appears twice because we duplicate the
        # send and receive boxes to underline them with a box (black if it is a
        # send and gray if it is a receive). So we need to remove these extra
        # boxes here.
        deduplicated_relevant_events = [
            event for event in relevant_events if event["cname"] != "black"
        ]
        assert len(deduplicated_relevant_events) * 2 == len(relevant_events)
        relevant_events = deduplicated_relevant_events

        # Each object must have been broadcast to each remote machine.
        assert len(relevant_events) >= num_nodes - 1
        # If more object transfers than necessary have been done, print a
        # warning.
        if len(relevant_events) > num_nodes - 1:
            warnings.warn("This object was transferred {} times, when only {} "
                          "transfers were required.".format(
                              len(relevant_events), num_nodes - 1))
        # Each object should not have been broadcast more than once from every
        # machine to every other machine. Also, a pair of machines should not
        # both have sent the object to each other.
        assert len(relevant_events) <= (num_nodes - 1) * num_nodes / 2

        # Make sure that no object was sent multiple times between the same
        # pair of object managers.
        send_counts = defaultdict(int)
        for event in relevant_events:
            # The pid identifies the sender and the tid identifies the
            # receiver.
            send_counts[(event["pid"], event["tid"])] += 1
        assert all(value == 1 for value in send_counts.values())
예제 #43
0
 def __init__(self):
     self.x = ray.put(np.zeros(1024 * 1024, dtype=np.uint8))
예제 #44
0
def full_loss(theta):
    theta_id = ray.put(theta)
    loss_ids = [actor.loss.remote(theta_id) for actor in actors]
    return sum(ray.get(loss_ids))
예제 #45
0
파일: util.py 프로젝트: toydogcat/ray
def pin_in_object_store(obj):
    """Deprecated, use ray.put(value) instead."""

    obj_ref = ray.put(obj)
    _pinned_objects.append(obj_ref)
    return obj_ref
예제 #46
0
def full_grad(theta):
    theta_id = ray.put(theta)
    grad_ids = [actor.grad.remote(theta_id) for actor in actors]
    # The float64 conversion is necessary for use with fmin_l_bfgs_b.
    return sum(ray.get(grad_ids)).astype("float64")
예제 #47
0
파일: es.py 프로젝트: vishalbelsare/ray
    def step(self):
        config = self.config

        theta = self.policy.get_flat_weights()
        assert theta.dtype == np.float32
        assert len(theta.shape) == 1

        # Put the current policy weights in the object store.
        theta_id = ray.put(theta)
        # Use the actors to do rollouts. Note that we pass in the ID of the
        # policy weights as these are shared.
        results, num_episodes, num_timesteps = self._collect_results(
            theta_id, config["episodes_per_batch"], config["train_batch_size"])
        # Update our sample steps counters.
        self._counters[NUM_AGENT_STEPS_SAMPLED] += num_timesteps
        self._counters[NUM_ENV_STEPS_SAMPLED] += num_timesteps

        all_noise_indices = []
        all_training_returns = []
        all_training_lengths = []
        all_eval_returns = []
        all_eval_lengths = []

        # Loop over the results.
        for result in results:
            all_eval_returns += result.eval_returns
            all_eval_lengths += result.eval_lengths

            all_noise_indices += result.noise_indices
            all_training_returns += result.noisy_returns
            all_training_lengths += result.noisy_lengths

        assert len(all_eval_returns) == len(all_eval_lengths)
        assert (len(all_noise_indices) == len(all_training_returns) ==
                len(all_training_lengths))

        self.episodes_so_far += num_episodes

        # Assemble the results.
        eval_returns = np.array(all_eval_returns)
        eval_lengths = np.array(all_eval_lengths)
        noise_indices = np.array(all_noise_indices)
        noisy_returns = np.array(all_training_returns)
        noisy_lengths = np.array(all_training_lengths)

        # Process the returns.
        proc_noisy_returns = utils.compute_centered_ranks(noisy_returns)

        # Compute and take a step.
        g, count = utils.batched_weighted_sum(
            proc_noisy_returns[:, 0] - proc_noisy_returns[:, 1],
            (self.noise.get(index, self.policy.num_params)
             for index in noise_indices),
            batch_size=500,
        )
        g /= noisy_returns.size
        assert (g.shape == (self.policy.num_params, ) and g.dtype == np.float32
                and count == len(noise_indices))
        # Compute the new weights theta.
        theta, update_ratio = self.optimizer.update(-g +
                                                    config["l2_coeff"] * theta)

        # Update our train steps counters.
        self._counters[NUM_AGENT_STEPS_TRAINED] += num_timesteps
        self._counters[NUM_ENV_STEPS_TRAINED] += num_timesteps

        # Set the new weights in the local copy of the policy.
        self.policy.set_flat_weights(theta)
        # Store the rewards
        if len(all_eval_returns) > 0:
            self.reward_list.append(np.mean(eval_returns))

        # Now sync the filters
        FilterManager.synchronize(
            {DEFAULT_POLICY_ID: self.policy.observation_filter}, self.workers)

        info = {
            "weights_norm": np.square(theta).sum(),
            "grad_norm": np.square(g).sum(),
            "update_ratio": update_ratio,
            "episodes_this_iter": noisy_lengths.size,
            "episodes_so_far": self.episodes_so_far,
        }

        reward_mean = np.mean(self.reward_list[-self.report_length:])
        result = dict(
            episode_reward_mean=reward_mean,
            episode_len_mean=eval_lengths.mean(),
            timesteps_this_iter=noisy_lengths.sum(),
            info=info,
        )

        return result
예제 #48
0
        real_batch = next(iter(dataloader))
        plt.figure(figsize=(8, 8))
        plt.axis("off")
        plt.title("Original Images")
        plt.imshow(
            np.transpose(
                vutils.make_grid(real_batch[0][:64], padding=2,
                                 normalize=True).cpu(), (1, 2, 0)))

        plt.show()

    # load the pretrained mnist classification model for inception_score
    mnist_cnn = Net()
    mnist_cnn.load_state_dict(torch.load(MODEL_PATH))
    mnist_cnn.eval()
    mnist_model_ref = ray.put(mnist_cnn)

    # __tune_begin__
    scheduler = PopulationBasedTraining(
        time_attr="training_iteration",
        metric="is_score",
        mode="max",
        perturbation_interval=5,
        hyperparam_mutations={
            # distribution for resampling
            "netG_lr": lambda: np.random.uniform(1e-2, 1e-5),
            "netD_lr": lambda: np.random.uniform(1e-2, 1e-5),
        })

    tune_iter = 5 if args.smoke_test else 300
    analysis = tune.run(
예제 #49
0
파일: ray_perf.py 프로젝트: stjordanis/ray
def create_object_containing_ref():
    obj_refs = []
    for _ in range(10000):
        obj_refs.append(ray.put(1))
    return obj_refs
def child(*xs):
    oid = ray.put(np.zeros(1024 * 1024, dtype=np.uint8))
    return oid
예제 #51
0
파일: ray_perf.py 프로젝트: stjordanis/ray
 def do_put():
     for _ in range(10):
         ray.put(np.zeros(10 * 1024 * 1024, dtype=np.int64))
def churn():
    return ray.put(np.zeros(1024 * 1024, dtype=np.uint8))
예제 #53
0
파일: ray_perf.py 프로젝트: stjordanis/ray
 def do_put_small():
     for _ in range(100):
         ray.put(0)
예제 #54
0
파일: core.py 프로젝트: BobinMathew/mars
 def __setitem__(self, key, value):
     object_id = ray.put(value)
     shape = getattr(value, 'shape', None)
     meta = ChunkMeta(shape=shape, object_id=object_id)
     set_meta = self.meta_store.set_meta.remote(key, meta)
     ray.wait([object_id, set_meta])
예제 #55
0
 def _worker():
     arr = np.random.rand(1024 * 1024)  # 8 MB data
     ref = ray.put(arr)
     ray.experimental.force_spill_objects([ref])
     return ref
예제 #56
0
 def f(y):
     from ray.internal.internal_api import memory_summary
     x_id = ray.put("HI")
     info = memory_summary(address)
     del x_id
     return info
예제 #57
0
 def nested_ref():
     return ray.put(1)
예제 #58
0
 def __init__(self):
     self.ref = ray.put(np.zeros(100000))
예제 #59
0
 def __init__(self):
     self.obj_ref = ray.put([1, 2, 3])
예제 #60
0
def test_object_directory_basic(ray_start_cluster_with_resource):
    cluster, num_nodes = ray_start_cluster_with_resource

    @ray.remote
    def task(x):
        pass

    # Test a single task.
    x_id = ray.put(np.zeros(1024 * 1024, dtype=np.uint8))
    ray.get(task.options(resources={str(3): 1}).remote(x_id), timeout=10)

    # Test multiple tasks on all nodes can find locations properly.
    object_refs = []
    for _ in range(num_nodes):
        object_refs.append(ray.put(np.zeros(1024 * 1024, dtype=np.uint8)))
    ray.get([
        task.options(resources={
            str(i): 1
        }).remote(object_refs[i]) for i in range(num_nodes)
    ])
    del object_refs

    @ray.remote
    class ObjectHolder:
        def __init__(self):
            self.x = ray.put(np.zeros(1024 * 1024, dtype=np.uint8))

        def get_obj(self):
            return self.x

        def ready(self):
            return True

    # Test if tasks can find object location properly
    # when there are multiple owners
    object_holders = [
        ObjectHolder.options(num_cpus=0.01, resources={
            str(i): 1
        }).remote() for i in range(num_nodes)
    ]
    ray.get([o.ready.remote() for o in object_holders])

    object_refs = []
    for i in range(num_nodes):
        object_refs.append(object_holders[(i + 1) %
                                          num_nodes].get_obj.remote())
    ray.get([
        task.options(num_cpus=0.01, resources={
            str(i): 1
        }).remote(object_refs[i]) for i in range(num_nodes)
    ])

    # Test a stressful scenario.
    object_refs = []
    repeat = 10
    for _ in range(num_nodes):
        for _ in range(repeat):
            object_refs.append(ray.put(np.zeros(1024 * 1024, dtype=np.uint8)))
    tasks = []
    for i in range(num_nodes):
        for r in range(repeat):
            tasks.append(
                task.options(num_cpus=0.01, resources={
                    str(i): 0.1
                }).remote(object_refs[i * r]))
    ray.get(tasks)

    object_refs = []
    for i in range(num_nodes):
        object_refs.append(object_holders[(i + 1) %
                                          num_nodes].get_obj.remote())
    tasks = []
    for i in range(num_nodes):
        for _ in range(10):
            tasks.append(
                task.options(num_cpus=0.01, resources={
                    str(i): 0.1
                }).remote(object_refs[(i + 1) % num_nodes]))