Esempio n. 1
0
  def testCachingReusables(self):
    # Test that we can define reusable variables before the driver is connected.
    def foo_initializer():
      return 1
    def bar_initializer():
      return []
    def bar_reinitializer(bar):
      return []
    ray.reusables.foo = ray.Reusable(foo_initializer)
    ray.reusables.bar = ray.Reusable(bar_initializer, bar_reinitializer)

    @ray.remote
    def use_foo():
      return ray.reusables.foo
    @ray.remote
    def use_bar():
      ray.reusables.bar.append(1)
      return ray.reusables.bar

    ray.init(start_ray_local=True, num_workers=2)

    self.assertEqual(ray.get(use_foo.remote()), 1)
    self.assertEqual(ray.get(use_foo.remote()), 1)
    self.assertEqual(ray.get(use_bar.remote()), [1])
    self.assertEqual(ray.get(use_bar.remote()), [1])

    ray.worker.cleanup()
Esempio n. 2
0
  def testFailImportingReusableVariable(self):
    ray.init(start_ray_local=True, num_workers=2, driver_mode=ray.SILENT_MODE)

    # This will throw an exception when the reusable variable is imported on the
    # workers.
    def initializer():
      if ray.worker.global_worker.mode == ray.WORKER_MODE:
        raise Exception("The initializer failed.")
      return 0
    ray.reusables.foo = ray.Reusable(initializer)
    for _ in range(100): # Retry if we need to wait longer.
      if len(ray.task_info()["failed_reusable_variable_imports"]) >= 1:
        break
      time.sleep(0.1)
    # Check that the error message is in the task info.
    self.assertTrue("The initializer failed." in ray.task_info()["failed_reusable_variable_imports"][0]["error_message"])

    ray.worker.cleanup()
Esempio n. 3
0
    def testFailImportingReusableVariable(self):
        ray.init(start_ray_local=True,
                 num_workers=2,
                 driver_mode=ray.SILENT_MODE)

        # This will throw an exception when the reusable variable is imported on the
        # workers.
        def initializer():
            if ray.worker.global_worker.mode == ray.WORKER_MODE:
                raise Exception("The initializer failed.")
            return 0

        ray.reusables.foo = ray.Reusable(initializer)
        wait_for_errors("ReusableVariableImportError", 1)
        # Check that the error message is in the task info.
        self.assertTrue("The initializer failed." in ray.error_info()
                        ["ReusableVariableImportError"][0]["message"])

        ray.worker.cleanup()
Esempio n. 4
0
  def testFailReinitializingVariable(self):
    ray.init(start_ray_local=True, num_workers=2, driver_mode=ray.SILENT_MODE)

    def initializer():
      return 0
    def reinitializer(foo):
      raise Exception("The reinitializer failed.")
    ray.reusables.foo = ray.Reusable(initializer, reinitializer)
    @ray.remote
    def use_foo():
      ray.reusables.foo
    use_foo.remote()
    for _ in range(100): # Retry if we need to wait longer.
      if len(ray.task_info()["failed_reinitialize_reusable_variables"]) >= 1:
        break
      time.sleep(0.1)
    # Check that the error message is in the task info.
    self.assertTrue("The reinitializer failed." in ray.task_info()["failed_reinitialize_reusable_variables"][0]["error_message"])

    ray.worker.cleanup()
def rnn_ray(argv):

  #num_workers = 1
  scale = 10
  num_steps = 10
  try:
    opts, args = getopt.getopt(argv, "hw:s:n:", ["workers=","scale=","num_steps="])
  except getopt.GetoptError:
    print 'rnn_ray_loop -w <num_workers> -s <scale> -n <num_steps>'
    sys.exit(2)
  for opt, arg in opts:
    if opt == '-h':
      print 'rnn_ray_loop -w <num_workers>'
      sys.exit()
    elif opt in ("-w", "--workers"):
      num_of_workers = int(arg)
    elif opt in ("-s", "--scale"):
      scale = int(arg)
    elif opt in ("-n", "--num_steps"):
      print "num steps is {}".format(arg)
      num_steps = int(arg)
 
  ray.init(start_ray_local=True, num_workers=num_of_workers)
  start_time = time.time()
  scale = scale*5
  batch_size = scale - 1

  xdim = scale * 10
  h1dim = (scale + 1) * 10
  h2dim = (scale + 2) * 10
  h3dim = (scale + 3) * 10
  h4dim = (scale + 4) * 10
  h5dim = (scale + 5) * 10
  ydim = (2 * scale + 6) * 10

  ray.reusables.net_vars = ray.Reusable(lambda : rnn.net_initialization(scale,num_steps,batch_size,xdim,h1dim,h2dim,h3dim,h4dim,h5dim,ydim), rnn.net_reinitialization)
  res = ray_rnn_int.remote(num_of_workers, scale, num_steps, batch_size, xdim, h1dim, h2dim, h3dim, h4dim, h5dim, ydim)
  ray.get(res)
Esempio n. 6
0
    def testReusableVariablesInPythonMode(self):
        reload(test_functions)
        ray.init(start_ray_local=True, driver_mode=ray.PYTHON_MODE)

        def l_init():
            return []

        def l_reinit(l):
            return []

        ray.reusables.l = ray.Reusable(l_init, l_reinit)

        @ray.remote
        def use_l():
            l = ray.reusables.l
            l.append(1)
            return l

        # Get the local copy of the reusable variable. This should be stateful.
        l = ray.reusables.l
        assert_equal(l, [])

        # Make sure the remote function does what we expect.
        assert_equal(ray.get(use_l.remote()), [1])
        assert_equal(ray.get(use_l.remote()), [1])

        # Make sure the local copy of the reusable variable has not been mutated.
        assert_equal(l, [])
        l = ray.reusables.l
        assert_equal(l, [])

        # Make sure that running a remote function does not reset the state of the
        # local copy of the reusable variable.
        l.append(2)
        assert_equal(ray.get(use_l.remote()), [1])
        assert_equal(l, [2])

        ray.worker.cleanup()
Esempio n. 7
0
    def testUsingReusablesOnDriver(self):
        ray.init(start_ray_local=True, num_workers=1)

        # Test that we can add a variable to the key-value store.

        def foo_initializer():
            return []

        def foo_reinitializer(foo):
            return []

        ray.reusables.foo = ray.Reusable(foo_initializer, foo_reinitializer)

        @ray.remote
        def use_foo():
            foo = ray.reusables.foo
            foo.append(1)
            return foo

        # Check that running a remote function does not reset the reusable variable
        # on the driver.
        foo = ray.reusables.foo
        self.assertEqual(foo, [])
        foo.append(2)
        self.assertEqual(foo, [2])
        foo.append(3)
        self.assertEqual(foo, [2, 3])

        self.assertEqual(ray.get(use_foo.remote()), [1])
        self.assertEqual(ray.get(use_foo.remote()), [1])
        self.assertEqual(ray.get(use_foo.remote()), [1])

        # Check that the copy of foo on the driver has not changed.
        self.assertEqual(foo, [2, 3])
        foo = ray.reusables.foo
        self.assertEqual(foo, [2, 3])

        ray.worker.cleanup()
Esempio n. 8
0
    def testFailReinitializingVariable(self):
        ray.init(start_ray_local=True,
                 num_workers=2,
                 driver_mode=ray.SILENT_MODE)

        def initializer():
            return 0

        def reinitializer(foo):
            raise Exception("The reinitializer failed.")

        ray.reusables.foo = ray.Reusable(initializer, reinitializer)

        @ray.remote
        def use_foo():
            ray.reusables.foo

        use_foo.remote()
        wait_for_errors("ReusableVariableReinitializeError", 1)
        # Check that the error message is in the task info.
        self.assertTrue("The reinitializer failed." in ray.error_info()
                        ["ReusableVariableReinitializeError"][0]["message"])

        ray.worker.cleanup()
Esempio n. 9
0
  def testReusables(self):
    ray.init(start_ray_local=True, num_workers=1)

    # Test that we can add a variable to the key-value store.

    def foo_initializer():
      return 1
    def foo_reinitializer(foo):
      return foo

    ray.reusables.foo = ray.Reusable(foo_initializer, foo_reinitializer)
    self.assertEqual(ray.reusables.foo, 1)

    @ray.remote
    def use_foo():
      return ray.reusables.foo
    self.assertEqual(ray.get(use_foo.remote()), 1)
    self.assertEqual(ray.get(use_foo.remote()), 1)
    self.assertEqual(ray.get(use_foo.remote()), 1)

    # Test that we can add a variable to the key-value store, mutate it, and reset it.

    def bar_initializer():
      return [1, 2, 3]

    ray.reusables.bar = ray.Reusable(bar_initializer)

    @ray.remote
    def use_bar():
      ray.reusables.bar.append(4)
      return ray.reusables.bar
    self.assertEqual(ray.get(use_bar.remote()), [1, 2, 3, 4])
    self.assertEqual(ray.get(use_bar.remote()), [1, 2, 3, 4])
    self.assertEqual(ray.get(use_bar.remote()), [1, 2, 3, 4])

    # Test that we can use the reinitializer.

    def baz_initializer():
      return np.zeros([4])
    def baz_reinitializer(baz):
      for i in range(len(baz)):
        baz[i] = 0
      return baz

    ray.reusables.baz = ray.Reusable(baz_initializer, baz_reinitializer)

    @ray.remote
    def use_baz(i):
      baz = ray.reusables.baz
      baz[i] = 1
      return baz
    assert_equal(ray.get(use_baz.remote(0)), np.array([1, 0, 0, 0]))
    assert_equal(ray.get(use_baz.remote(1)), np.array([0, 1, 0, 0]))
    assert_equal(ray.get(use_baz.remote(2)), np.array([0, 0, 1, 0]))
    assert_equal(ray.get(use_baz.remote(3)), np.array([0, 0, 0, 1]))

    # Make sure the reinitializer is actually getting called. Note that this is
    # not the correct usage of a reinitializer because it does not reset qux to
    # its original state. This is just for testing.

    def qux_initializer():
      return 0
    def qux_reinitializer(x):
      return x + 1

    ray.reusables.qux = ray.Reusable(qux_initializer, qux_reinitializer)

    @ray.remote
    def use_qux():
      return ray.reusables.qux
    self.assertEqual(ray.get(use_qux.remote()), 0)
    self.assertEqual(ray.get(use_qux.remote()), 1)
    self.assertEqual(ray.get(use_qux.remote()), 2)

    ray.worker.cleanup()
Esempio n. 10
0
# Arguments to specify where the imagenet data is stored.
parser = argparse.ArgumentParser(description="Run the AlexNet example.")
parser.add_argument("--s3-bucket", required=True, type=str, help="Name of the bucket that contains the image data.")
parser.add_argument("--key-prefix", default="ILSVRC2012_img_train/n015", type=str, help="Prefix for files to fetch.")
parser.add_argument("--label-file", default="train.txt", type=str, help="File containing labels.")

if __name__ == "__main__":
  args = parser.parse_args()

  ray.init(start_ray_local=True, num_workers=10)

  # Note we do not do sess.run(tf.initialize_all_variables()) because that would
  # result in a different initialization on each worker. Instead, we initialize
  # the weights on the driver and load the weights on the workers every time we
  # compute a gradient.
  ray.reusables.net_vars = ray.Reusable(alexnet.net_initialization, alexnet.net_reinitialization)

  # Prepare keys for downloading the data.
  s3_resource = boto3.resource("s3")
  imagenet_bucket = s3_resource.Bucket(args.s3_bucket)
  objects = imagenet_bucket.objects.filter(Prefix=args.key_prefix)
  image_tar_files = [str(obj.key) for obj in objects.all()]
  print "Images will be downloaded from {} files.".format(len(image_tar_files))

  # Downloading the label file, and create a dictionary mapping the filenames of
  # the images to their labels.
  s3_client = boto3.client("s3")
  label_file = s3_client.get_object(Bucket=args.s3_bucket, Key=args.label_file)
  filename_label_str = label_file["Body"].read().strip().split("\n")
  filename_label_pairs = [line.split(" ") for line in filename_label_str]
  filename_label_dict = dict([(os.path.basename(name), label) for name, label in filename_label_pairs])
Esempio n. 11
0

# Function for initializing the gym environment.
def env_initializer():
    return gym.make("Pong-v0")


# Function for reinitializing the gym environment in order to guarantee that
# the state of the game is reset after each remote task.
def env_reinitializer(env):
    env.reset()
    return env


# Create a reusable variable for the gym environment.
ray.reusables.env = ray.Reusable(env_initializer, env_reinitializer)


def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x)
                  )  # sigmoid "squashing" function to interval [0,1]


def preprocess(I):
    """preprocess 210x160x3 uint8 frame into 6400 (80x80) 1D float vector"""
    I = I[35:195]  # crop
    I = I[::2, ::2, 0]  # downsample by factor of 2
    I[I == 144] = 0  # erase background (background type 1)
    I[I == 109] = 0  # erase background (background type 2)
    I[I != 0] = 1  # everything else (paddles, ball) just set to 1
    return I.astype(np.float).ravel()
Esempio n. 12
0
        return sess, cross_entropy, cross_entropy_grads, x, y_, get_weights, set_weights

    # By default, when a reusable variable is used by a remote function, the
    # initialization code will be rerun at the end of the remote task to ensure
    # that the state of the variable is not changed by the remote task. However,
    # the initialization code may be expensive. This case is one example, because
    # a TensorFlow network is constructed. In this case, we pass in a special
    # reinitialization function which gets run instead of the original
    # initialization code. As users, if we pass in custom reinitialization code,
    # we must ensure that no state is leaked between tasks.
    def net_reinitialization(net_vars):
        return net_vars

    # Create a reusable variable for the network.
    ray.reusables.net_vars = ray.Reusable(net_initialization,
                                          net_reinitialization)

    # Load the weights into the network.
    def load_weights(theta):
        sess, _, _, _, _, get_weights, set_weights = ray.reusables.net_vars
        set_weights(
            [theta[:w_size].reshape(w_shape), theta[w_size:].reshape(b_shape)])

    # Compute the loss on a batch of data.
    @ray.remote
    def loss(theta, xs, ys):
        sess, cross_entropy, _, x, y_, _, _ = ray.reusables.net_vars
        load_weights(theta)
        return float(sess.run(cross_entropy, feed_dict={x: xs, y_: ys}))

    # Compute the gradient of the loss on a batch of data.
Esempio n. 13
0
def rnn_ray(argv):

    #num_workers = 1
    scale = 10
    num_steps = 10
    try:
        opts, args = getopt.getopt(argv, "hw:s:n:",
                                   ["workers=", "scale=", "num_steps="])
    except getopt.GetoptError:
        print 'rnn_ray_loop -w <num_workers> -s <scale> -n <num_steps>'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'rnn_ray_loop -w <num_workers>'
            sys.exit()
        elif opt in ("-w", "--workers"):
            num_of_workers = int(arg)
        elif opt in ("-s", "--scale"):
            scale = int(arg)
        elif opt in ("-n", "--num_steps"):
            print "num steps is {}".format(arg)
            num_steps = int(arg)

    ray.init(start_ray_local=True, num_workers=num_of_workers)

    for k in range(1):

        scale = scale * 5
        batch_size = scale - 1

        xdim = scale * 10
        h1dim = (scale + 1) * 10
        h2dim = (scale + 2) * 10
        h3dim = (scale + 3) * 10
        h4dim = (scale + 4) * 10
        h5dim = (scale + 5) * 10
        ydim = (2 * scale + 6) * 10

        ray.reusables.net_vars = ray.Reusable(
            lambda: rnn.net_initialization(scale, num_steps, batch_size, xdim,
                                           h1dim, h2dim, h3dim, h4dim, h5dim,
                                           ydim), rnn.net_reinitialization)

        h1 = ra.zeros.remote([batch_size, h1dim])
        h2 = ra.zeros.remote([batch_size, h2dim])
        h3 = ra.zeros.remote([batch_size, h3dim])
        h4 = ra.zeros.remote([batch_size, h4dim])
        h5 = ra.zeros.remote([batch_size, h5dim])
        inputs = [
            ra.random.normal.remote([batch_size, xdim])
            for _ in range(num_steps)
        ]

        # Run distributed RNN
        elapsed_time_1_layers = []
        elapsed_time_2_layers = []
        elapsed_time_3_layers = []
        elapsed_time_4_layers = []
        elapsed_time_5_layers = []
        elapsed_time_6_layers = []
        for _ in range(10):
            start_time = time.time()
            for t in range(num_steps):
                h1 = rnn.first_layer.remote(inputs[t], h1)
            ray.get(h1)
            end_time = time.time()
            elapsed_time_1_layers.append(end_time - start_time)
            #print "Distributed RNN, 1 layer, elapsed_time = {} seconds.".format(end_time - start_time)

            start_time = time.time()
            for t in range(num_steps):
                h1 = rnn.first_layer.remote(inputs[t], h1)
                h2 = rnn.second_layer.remote(h1, h2)
            ray.get(h2)
            end_time = time.time()
            elapsed_time_2_layers.append(end_time - start_time)
            #print "Distributed RNN, 2 layer, elapsed_time = {} seconds.".format(end_time - start_time)

            start_time = time.time()
            for t in range(num_steps):
                h1 = rnn.first_layer.remote(inputs[t], h1)
                h2 = rnn.second_layer.remote(h1, h2)
                h3 = rnn.third_layer.remote(h2, h3)
            ray.get(h3)
            end_time = time.time()
            elapsed_time_3_layers.append(end_time - start_time)
            #print "Distributed RNN, 3 layer, elapsed_time = {} seconds.".format(end_time - start_time)

            start_time = time.time()
            for t in range(num_steps):
                h1 = rnn.first_layer.remote(inputs[t], h1)
                h2 = rnn.second_layer.remote(h1, h2)
                h3 = rnn.third_layer.remote(h2, h3)
                h4 = rnn.fourth_layer.remote(h3, h4)
            ray.get(h4)
            end_time = time.time()
            elapsed_time_4_layers.append(end_time - start_time)
            #print "Distributed RNN, 4 layer, elapsed_time = {} seconds.".format(end_time - start_time)

            start_time = time.time()
            for t in range(num_steps):
                h1 = rnn.first_layer.remote(inputs[t], h1)
                h2 = rnn.second_layer.remote(h1, h2)
                h3 = rnn.third_layer.remote(h2, h3)
                h4 = rnn.fourth_layer.remote(h3, h4)
                h5 = rnn.fifth_layer.remote(h4, h5)
            ray.get(h5)
            end_time = time.time()
            elapsed_time_5_layers.append(end_time - start_time)
            #print "Distributed RNN, 5 layer, elapsed_time = {} seconds.".format(end_time - start_time)

            start_time = time.time()
            outputs = []
            for t in range(num_steps):
                h1 = rnn.first_layer.remote(inputs[t], h1)
                h2 = rnn.second_layer.remote(h1, h2)
                h3 = rnn.third_layer.remote(h2, h3)
                h4 = rnn.fourth_layer.remote(h3, h4)
                h5 = rnn.fifth_layer.remote(h4, h5)
                outputs.append(rnn.sixth_layer.remote(h5))
            for t in range(num_steps):
                ray.get(outputs[t])
            end_time = time.time()
            elapsed_time_6_layers.append(end_time - start_time)
            #print "Distributed RNN, 6 layer, elapsed_time = {} seconds.".format(end_time - start_time)

        elapsed_time_1_layers = np.sort(elapsed_time_1_layers)
        elapsed_time_2_layers = np.sort(elapsed_time_2_layers)
        elapsed_time_3_layers = np.sort(elapsed_time_3_layers)
        elapsed_time_4_layers = np.sort(elapsed_time_4_layers)
        elapsed_time_5_layers = np.sort(elapsed_time_5_layers)
        elapsed_time_6_layers = np.sort(elapsed_time_6_layers)

        elapsed_time_1_layers_average = sum(elapsed_time_1_layers) / 10
        elapsed_time_2_layers_average = sum(elapsed_time_2_layers) / 10
        elapsed_time_3_layers_average = sum(elapsed_time_3_layers) / 10
        elapsed_time_4_layers_average = sum(elapsed_time_4_layers) / 10
        elapsed_time_5_layers_average = sum(elapsed_time_5_layers) / 10
        elapsed_time_6_layers_average = sum(elapsed_time_6_layers) / 10

        print ""
        print "Number of workers = {}.".format(num_of_workers)
        print "Scale = {}.".format(scale)
        print "Load measure (scale/num_workers) = {}.".format(scale /
                                                              num_of_workers)
        print "Time required for 1 layer RNN:"
        print "    Average: {}".format(elapsed_time_1_layers_average)
        print "    90th precentile: {}".format(elapsed_time_1_layers[8])
        print "    Worst: {}".format(elapsed_time_1_layers[9])

        print "Time required for 2 layer RNN:"
        print "    Average: {}".format(elapsed_time_2_layers_average)
        print "    90th precentile: {}".format(elapsed_time_2_layers[8])
        print "    Worst: {}".format(elapsed_time_2_layers[9])

        print "Time required for 3 layer RNN:"
        print "    Average: {}".format(elapsed_time_3_layers_average)
        print "    90th precentile: {}".format(elapsed_time_3_layers[8])
        print "    Worst: {}".format(elapsed_time_3_layers[9])

        print "Time required for 4 layer RNN:"
        print "    Average: {}".format(elapsed_time_4_layers_average)
        print "    90th precentile: {}".format(elapsed_time_4_layers[8])
        print "    Worst: {}".format(elapsed_time_4_layers[9])

        print "Time required for 5 layer RNN:"
        print "    Average: {}".format(elapsed_time_5_layers_average)
        print "    90th precentile: {}".format(elapsed_time_5_layers[8])
        print "    Worst: {}".format(elapsed_time_5_layers[9])

        print "Time required for 6 layer RNN:"
        print "    Average: {}".format(elapsed_time_6_layers_average)
        print "    90th precentile: {}".format(elapsed_time_6_layers[8])
        print "    Worst: {}".format(elapsed_time_6_layers[9])

        print "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}".format(
            num_of_workers, scale, num_steps, elapsed_time_1_layers_average,
            elapsed_time_1_layers[8], elapsed_time_1_layers[9],
            elapsed_time_2_layers_average, elapsed_time_2_layers[8],
            elapsed_time_2_layers[9], elapsed_time_3_layers_average,
            elapsed_time_3_layers[8], elapsed_time_3_layers[9],
            elapsed_time_4_layers_average, elapsed_time_4_layers[8],
            elapsed_time_4_layers[9], elapsed_time_5_layers_average,
            elapsed_time_5_layers[8], elapsed_time_5_layers[9],
            elapsed_time_6_layers_average, elapsed_time_6_layers[8],
            elapsed_time_6_layers[9])
Esempio n. 14
0
scale = 50
num_steps = 10
batch_size = scale - 1

xdim = scale * 10
h1dim = (scale + 1) * 10
h2dim = (scale + 2) * 10
h3dim = (scale + 3) * 10
h4dim = (scale + 4) * 10
h5dim = (scale + 5) * 10
ydim = (2 * scale + 6) * 10

ray.init(start_ray_local=True, num_workers=10)
ray.reusables.net_vars = ray.Reusable(
    lambda: rnn.net_initialization(scale, num_steps, batch_size, xdim, h1dim,
                                   h2dim, h3dim, h4dim, h5dim, ydim),
    rnn.net_reinitialization)
#ray.reusables.net_vars = ray.Reusable(rnn.net_initialization, rnn.net_reinitialization)

h1 = ra.zeros.remote([batch_size, h1dim])
h2 = ra.zeros.remote([batch_size, h2dim])
h3 = ra.zeros.remote([batch_size, h3dim])
h4 = ra.zeros.remote([batch_size, h4dim])
h5 = ra.zeros.remote([batch_size, h5dim])

inputs = [
    ra.random.normal.remote([batch_size, xdim]) for _ in range(num_steps)
]

# Run distributed RNN
start_time = time.time()