def benchmark_create_1000_partitions_with_100_parameter_servers(self):
    workers, _ = test.create_local_cluster(num_workers=1, num_ps=100)
    worker_sessions = [session_lib.Session(w.target) for w in workers]
    worker = worker_sessions[0]
    partition_sizes = (1, 512, 1024 * 32, 1024 * 128)

    partitioned = []

    for partition_size in partition_sizes:
      # max_shard_bytes is 4, shape is 1000*partition_size float32s which should
      # partition into 1000 shards, each containing partition_size float32s.
      print("Building partitioned variable with %d floats per partition" %
            partition_size)
      with ops.device(device_setter.replica_device_setter(ps_tasks=100)):
        partitioned_ix = variable_scope.get_variable(
            "partitioned_%d" % partition_size,
            shape=[1000 * partition_size],
            dtype=dtypes.float32,
            # Each partition to have exactly N float32s
            partitioner=partitioned_variables.variable_axis_size_partitioner(
                max_shard_bytes=4 * partition_size))
        # Concatenates along axis 0
        partitioned.append(ops.convert_to_tensor(partitioned_ix))

    variables.global_variables_initializer().run(session=worker)

    for ix, partition_size in enumerate(partition_sizes):
      print("Running benchmark having partitions with %d floats" %
            partition_size)
      self.run_op_benchmark(
          worker,
          partitioned[ix],
          name=("read_concat_1000_partitions_from_"
                "100_parameter_servers_partsize_%d_floats" % partition_size))
  def testTensorArrayGetsDeviceFromFirstWriteInWhileLoop(self):
    with ops.device("/job:worker/task:0/cpu:0"):
      ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=2)

    def _body(i, ta_i):
      with ops.device("/job:worker/task:1/cpu:0"):
        return i + 1, ta_i.write(i, 0.0)

    _, ta_out = control_flow_ops.while_loop(
        lambda i, ta: i < 2, _body, loop_vars=[0, ta])

    workers, _ = test.create_local_cluster(num_workers=3, num_ps=0)
    session = session_lib.Session(workers[0].target)

    run_options = config_pb2.RunOptions(
        trace_level=config_pb2.RunOptions.FULL_TRACE)
    run_metadata = config_pb2.RunMetadata()

    session.run(ta_out.flow, options=run_options, run_metadata=run_metadata)
    self.assertTrue(run_metadata.HasField("step_stats"))
    dev_stats = {d.device: d.node_stats
                 for d in run_metadata.step_stats.dev_stats}
    for d in dev_stats:
      if "/task:1/" in d:
        self.assertTrue(
            [s for s in dev_stats[d] if "/TensorArray" in s.node_name])
      else:
        self.assertFalse(
            [s for s in dev_stats[d] if "/TensorArray" in s.node_name])
  def benchmarkCreateLocalCluster(self):
    deltas = []
    iters = 5
    for _ in range(iters):
      start_time = time.time()
      test.create_local_cluster(num_workers=1, num_ps=10)
      end_time = time.time()
      deltas.append(end_time - start_time)

    median_deltas = np.median(deltas)
    print("\n\nbenchmark_create_local_cluster_1_worker_10_ps.  "
          "iterations: %d, median wall time: %g\n\n" % (iters, median_deltas))
    self.report_benchmark(
        iters=iters,
        wall_time=median_deltas,
        name="benchmark_create_local_cluster_1_worker_10_ps")
    def benchmarkCreateLocalCluster(self):
        deltas = []
        iters = 5
        for _ in range(iters):
            start_time = time.time()
            test.create_local_cluster(num_workers=1, num_ps=10)
            end_time = time.time()
            deltas.append(end_time - start_time)

        median_deltas = np.median(deltas)
        print("\n\nbenchmark_create_local_cluster_1_worker_10_ps.  "
              "iterations: %d, median wall time: %g\n\n" %
              (iters, median_deltas))
        self.report_benchmark(
            iters=iters,
            wall_time=median_deltas,
            name="benchmark_create_local_cluster_1_worker_10_ps")
예제 #5
0
  def testBasics(self):
    (worker,), _ = test.create_local_cluster(num_workers=1, num_ps=0)
    self.assertTrue(worker.target.startswith("grpc://"))
    tf_context = xrt.get_tf_context(worker.target[len("grpc://"):], "worker")
    backend = xrt.XrtBackend(tf_context, "XLA_CPU")

    a = np.arange(10)
    b = np.arange(10)

    c = BuildAddAndScaleComputation(
        xla_client.Shape.from_pyval(a), xla_client.Shape.from_pyval(b))

    executable = c.Compile(backend=backend)
    output = executable.ExecuteWithPythonValues((a, b))
    self.assertAllEqual(output, (a + b) * 3)
  def testTensorArrayGetsDeviceFromFirstWrite(self):
    with ops.device("/job:worker/task:0/cpu:0"):
      # this initial device will be ignored.
      ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=2)
    with ops.device("/job:worker/task:1/cpu:0"):
      # the first write sets the op's device.
      ta = ta.write(0, 1.0)
    with ops.device("/job:worker/task:2/cpu:0"):
      # subsequent writes do not modify the op's device.
      ta = ta.write(1, 1.0)

    # The gradient TA will sit on the same device as the forward TA.
    ta_grad = ta.grad("grad")
    flows = [ta.flow, ta_grad.flow]

    # Similar tests for unpack and split
    with ops.device("/job:worker/task:0/cpu:0"):
      ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=3)
    with ops.device("/job:worker/task:1/cpu:0"):
      ta = ta.unstack([1.0, 2.0])
    with ops.device("/job:worker/task:2/cpu:0"):
      ta = ta.write(2, 3.0)
    flows.append(ta.flow)

    with ops.device("/job:worker/task:0/cpu:0"):
      ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=2)
    with ops.device("/job:worker/task:1/cpu:0"):
      ta = ta.split([1.0, 2.0], [1, 1])
    flows.append(ta.flow)

    workers, _ = test.create_local_cluster(num_workers=3, num_ps=0)
    session = session_lib.Session(workers[0].target)

    run_options = config_pb2.RunOptions(
        trace_level=config_pb2.RunOptions.FULL_TRACE)
    run_metadata = config_pb2.RunMetadata()

    session.run(flows, options=run_options, run_metadata=run_metadata)
    self.assertTrue(run_metadata.HasField("step_stats"))
    dev_stats = {d.device: d.node_stats
                 for d in run_metadata.step_stats.dev_stats}
    for d in dev_stats:
      if "/task:1/" in d:
        self.assertTrue(
            [s for s in dev_stats[d] if "/TensorArray" in s.node_name])
      else:
        self.assertFalse(
            [s for s in dev_stats[d] if "/TensorArray" in s.node_name])
예제 #7
0
  def testTuples(self):
    (worker,), _ = test.create_local_cluster(num_workers=1, num_ps=0)
    self.assertTrue(worker.target.startswith("grpc://"))
    tf_context = xrt.get_tf_context(worker.target[len("grpc://"):], "worker")
    backend = xrt.XrtBackend(tf_context, "XLA_CPU")

    a = np.random.randn(10)
    b = np.random.randn(15, 3)
    pieces = [
        xla_client.Buffer.from_pyval(a, backend=backend),
        xla_client.Buffer.from_pyval(b, backend=backend)
    ]
    t = xla_client.Buffer.make_tuple(pieces, backend=backend)
    a_out, b_out = t.destructure()
    self.assertAllEqual(a, a_out.to_py())
    self.assertAllEqual(b, b_out.to_py())
예제 #8
0
  def testBasics(self):
    (worker,), _ = test.create_local_cluster(num_workers=1, num_ps=0)
    self.assertTrue(worker.target.startswith("grpc://"))
    tf_context = xrt.get_tf_context(worker.target[len("grpc://"):], "worker")
    backend = xrt.XrtBackend(tf_context, "XLA_CPU")

    a = np.arange(10)
    b = np.arange(10)

    c = BuildAddAndScaleComputation(
        xla_client.shape_from_pyval(a), xla_client.shape_from_pyval(b))

    executable = c.Compile(backend=backend)
    output = xla_client.execute_with_python_values(
        executable, (a, b), backend=backend)
    self.assertAllEqual(output, (a + b) * 3)
    def testTuples(self):
        (worker, ), _ = test.create_local_cluster(num_workers=1, num_ps=0)
        self.assertTrue(worker.target.startswith("grpc://"))
        tf_context = xrt.get_tf_context(worker.target[len("grpc://"):],
                                        "worker")
        backend = xrt.XrtBackend(tf_context, "XLA_CPU")

        a = np.random.randn(10)
        b = np.random.randn(15, 3)
        pieces = [
            xla_client.Buffer.from_pyval(a, backend=backend),
            xla_client.Buffer.from_pyval(b, backend=backend)
        ]
        t = xla_client.Buffer.make_tuple(pieces, backend=backend)
        a_out, b_out = t.destructure()
        self.assertAllEqual(a, a_out.to_py())
        self.assertAllEqual(b, b_out.to_py())
    def testCreateLocalCluster(self):
        workers, _ = test.create_local_cluster(num_workers=2, num_ps=2)
        worker_sessions = [session_lib.Session(w.target) for w in workers]
        with ops.device("/job:ps/task:0"):
            var0 = variables.Variable(0.0)
        with ops.device("/job:ps/task:1"):
            var1 = variables.Variable(1.0)
        worker_sessions[0].run([var0.initializer, var1.initializer])
        with ops.device("/job:ps/task:0"):
            var2 = variables.Variable(2.0)
        with ops.device("/job:ps/task:1"):
            var3 = variables.Variable(3.0)
        worker_sessions[1].run([var2.initializer, var3.initializer])

        # Read values back in the opposite session
        self.assertAllEqual(0.0, var0.eval(session=worker_sessions[1]))
        self.assertAllEqual(1.0, var1.eval(session=worker_sessions[1]))
        self.assertAllEqual(2.0, var2.eval(session=worker_sessions[0]))
        self.assertAllEqual(3.0, var3.eval(session=worker_sessions[0]))
  def testCreateLocalCluster(self):
    workers, _ = test.create_local_cluster(num_workers=2, num_ps=2)
    worker_sessions = [session_lib.Session(w.target) for w in workers]
    with ops.device("/job:ps/task:0"):
      var0 = variables.Variable(0.0)
    with ops.device("/job:ps/task:1"):
      var1 = variables.Variable(1.0)
    worker_sessions[0].run([var0.initializer, var1.initializer])
    with ops.device("/job:ps/task:0"):
      var2 = variables.Variable(2.0)
    with ops.device("/job:ps/task:1"):
      var3 = variables.Variable(3.0)
    worker_sessions[1].run([var2.initializer, var3.initializer])

    # Read values back in the opposite session
    self.assertAllEqual(0.0, var0.eval(session=worker_sessions[1]))
    self.assertAllEqual(1.0, var1.eval(session=worker_sessions[1]))
    self.assertAllEqual(2.0, var2.eval(session=worker_sessions[0]))
    self.assertAllEqual(3.0, var3.eval(session=worker_sessions[0]))
    def benchmark_create_1000_partitions_with_100_parameter_servers(self):
        workers, _ = test.create_local_cluster(num_workers=1, num_ps=100)
        worker_sessions = [session_lib.Session(w.target) for w in workers]
        worker = worker_sessions[0]
        partition_sizes = (1, 512, 1024 * 32, 1024 * 128)

        partitioned = []

        for partition_size in partition_sizes:
            # max_shard_bytes is 4, shape is 1000*partition_size float32s which should
            # partition into 1000 shards, each containing partition_size float32s.
            print(
                "Building partitioned variable with %d floats per partition" %
                partition_size)
            with ops.device(device_setter.replica_device_setter(ps_tasks=100)):
                partitioned_ix = variable_scope.get_variable(
                    "partitioned_%d" % partition_size,
                    shape=[1000 * partition_size],
                    dtype=dtypes.float32,
                    # Each partition to have exactly N float32s
                    partitioner=partitioned_variables.
                    variable_axis_size_partitioner(max_shard_bytes=4 *
                                                   partition_size))
                # Concatenates along axis 0
                partitioned.append(ops.convert_to_tensor(partitioned_ix))

        variables.global_variables_initializer().run(session=worker)

        for ix, partition_size in enumerate(partition_sizes):
            print("Running benchmark having partitions with %d floats" %
                  partition_size)
            self.run_op_benchmark(
                worker,
                partitioned[ix],
                name=("read_concat_1000_partitions_from_"
                      "100_parameter_servers_partsize_%d_floats" %
                      partition_size))
예제 #13
0
 def setUpClass(cls):
   super(TensorArrayTest, cls).setUpClass()
   cls._workers, _ = test.create_local_cluster(num_workers=3, num_ps=0)
예제 #14
0
 def setUpClass(cls):
   super(TensorArrayTest, cls).setUpClass()
   cls._workers, _ = test.create_local_cluster(num_workers=3, num_ps=0)