def benchmark_create_1000_partitions_with_100_parameter_servers(self):
    workers, _ = test.create_local_cluster(num_workers=1, num_ps=100)
    worker_sessions = [session_lib.Session(w.target) for w in workers]
    worker = worker_sessions[0]
    partition_sizes = (1, 512, 1024 * 32, 1024 * 128)

    partitioned = []

    for partition_size in partition_sizes:
      # max_shard_bytes is 4, shape is 1000*partition_size float32s which should
      # partition into 1000 shards, each containing partition_size float32s.
      print("Building partitioned variable with %d floats per partition" %
            partition_size)
      with ops.device(device_setter.replica_device_setter(ps_tasks=100)):
        partitioned_ix = variable_scope.get_variable(
            "partitioned_%d" % partition_size,
            shape=[1000 * partition_size],
            dtype=dtypes.float32,
            # Each partition to have exactly N float32s
            partitioner=partitioned_variables.variable_axis_size_partitioner(
                max_shard_bytes=4 * partition_size))
        # Concatenates along axis 0
        partitioned.append(ops.convert_to_tensor(partitioned_ix))

    variables.global_variables_initializer().run(session=worker)

    for ix, partition_size in enumerate(partition_sizes):
      print("Running benchmark having partitions with %d floats" %
            partition_size)
      self.run_op_benchmark(
          worker,
          partitioned[ix],
          name=("read_concat_1000_partitions_from_"
                "100_parameter_servers_partsize_%d_floats" % partition_size))
예제 #2
0
  def testVariableCreationInALoop(self):
    """Tests the variable created inside a loop can be used outside the loop."""
    with self.test_session():
      with variable_scope.variable_scope("ascope") as scope:
        def Body(i, _):
          var_x = variable_scope.get_variable(
              "x",
              shape=[2],
              initializer=init_ops.ones_initializer(),
              partitioner=partitioned_variables.variable_axis_size_partitioner(
                  4))
          return (i + 1, var_x.as_tensor())

        cond = lambda i, _: i < 2
        _, x = control_flow_ops.while_loop(
            cond, Body, (0, constant_op.constant([7, 8], dtypes.float32)))
        variables.global_variables_initializer().run()
        self.assertAllClose([1.0, 1.0], x.eval())

        scope.reuse_variables()
        var_x = variable_scope.get_variable(
            "x",
            shape=[2],
            initializer=init_ops.ones_initializer(),
            partitioner=partitioned_variables.variable_axis_size_partitioner(4))

        self.assertAllClose([1.0, 1.0], var_x.as_tensor().eval())
  def testVariableCreationInALoop(self):
    """Tests the variable created inside a loop can be used outside the loop."""
    with self.test_session():
      with variable_scope.variable_scope("ascope") as scope:
        def Body(i, _):
          var_x = variable_scope.get_variable(
              "x",
              shape=[2],
              initializer=init_ops.ones_initializer(),
              partitioner=partitioned_variables.variable_axis_size_partitioner(
                  4))
          return (i + 1, var_x.as_tensor())

        cond = lambda i, _: i < 2
        _, x = control_flow_ops.while_loop(
            cond, Body, (0, constant_op.constant([7, 8], dtypes.float32)))
        variables.global_variables_initializer().run()
        self.assertAllClose([1.0, 1.0], x.eval())

        scope.reuse_variables()
        var_x = variable_scope.get_variable(
            "x",
            shape=[2],
            initializer=init_ops.ones_initializer(),
            partitioner=partitioned_variables.variable_axis_size_partitioner(4))

        self.assertAllClose([1.0, 1.0], var_x.as_tensor().eval())
예제 #4
0
  def testControlDepsNone(self):
    with self.cached_session() as session:
      c = constant_op.constant(1.0)
      with ops.control_dependencies([c]):
        # d get the control dependency.
        d = constant_op.constant(2.0)
        # Partitioned variables do not.
        var_x = variable_scope.get_variable(
            "x",
            shape=[2],
            initializer=init_ops.ones_initializer(),
            partitioner=partitioned_variables.variable_axis_size_partitioner(4))

        ops_before_read = session.graph.get_operations()
        var_x.as_tensor()  # Caches the ops for subsequent reads.
        reading_ops = [
            op for op in session.graph.get_operations()
            if op not in ops_before_read
        ]

      self.assertEqual([c.op], d.op.control_inputs)
      # Tests that no control dependencies are added to reading a partitioned
      # variable which is similar to reading a variable.
      for op in reading_ops:
        self.assertEqual([], op.control_inputs)
  def testControlDepsNone(self):
    with self.test_session() as session:
      c = constant_op.constant(1.0)
      with ops.control_dependencies([c]):
        # d get the control dependency.
        d = constant_op.constant(2.0)
        # Partitioned variables do not.
        var_x = variable_scope.get_variable(
            "x",
            shape=[2],
            initializer=init_ops.ones_initializer(),
            partitioner=partitioned_variables.variable_axis_size_partitioner(4))

        ops_before_read = session.graph.get_operations()
        var_x.as_tensor()  # Caches the ops for subsequent reads.
        reading_ops = [
            op for op in session.graph.get_operations()
            if op not in ops_before_read
        ]

      self.assertEqual([c.op], d.op.control_inputs)
      # Tests that no control dependencies are added to reading a partitioned
      # variable which is similar to reading a variable.
      for op in reading_ops:
        self.assertEqual([], op.control_inputs)
예제 #6
0
 def Body(i, _):
   var_x = variable_scope.get_variable(
       "x",
       shape=[2],
       initializer=init_ops.ones_initializer(),
       partitioner=partitioned_variables.variable_axis_size_partitioner(
           4))
   return (i + 1, var_x.as_tensor())
 def Body(i, _):
   var_x = variable_scope.get_variable(
       "x",
       shape=[2],
       initializer=init_ops.ones_initializer(),
       partitioner=partitioned_variables.variable_axis_size_partitioner(
           4))
   return (i + 1, var_x.as_tensor())
예제 #8
0
 def testPartitions(self):
   shape = (10, 10)
   init = init_ops.identity_initializer()
   partitioner = partitioned_variables.variable_axis_size_partitioner(1)
   with self.test_session(graph=ops.Graph(), use_gpu=True):
     with variable_scope.variable_scope(
         "foo", partitioner=partitioner, initializer=init):
       v = array_ops.identity(variable_scope.get_variable("bar", shape=shape))
     variables.global_variables_initializer().run()
     self.assertAllClose(v.eval(), np.eye(*shape))
예제 #9
0
 def testPartitions(self):
   shape = (10, 10)
   init = init_ops.identity_initializer()
   partitioner = partitioned_variables.variable_axis_size_partitioner(1)
   with self.test_session(graph=ops.Graph(), use_gpu=True):
     with variable_scope.variable_scope(
         "foo", partitioner=partitioner, initializer=init):
       v = array_ops.identity(variable_scope.get_variable("bar", shape=shape))
     variables.global_variables_initializer().run()
     self.assertAllClose(v.eval(), np.eye(*shape))
예제 #10
0
 def testPartitionedVariableMasking(self):
     partitioner = partitioned_variables.variable_axis_size_partitioner(40)
     with self.test_session() as session:
         with variable_scope.variable_scope("", partitioner=partitioner):
             sparsity = variables.Variable(0.5, name="Sparsity")
             weights = variable_scope.get_variable(
                 "weights", initializer=math_ops.linspace(1.0, 100.0, 100))
             masked_weights = pruning.apply_mask(
                 weights, scope=variable_scope.get_variable_scope())
         p = pruning.Pruning(sparsity=sparsity, partitioner=partitioner)
         p._spec.threshold_decay = 0.0
         mask_update_op = p.mask_update_op()
         variables.global_variables_initializer().run()
         masked_weights_val = masked_weights.eval()
         session.run(mask_update_op)
         masked_weights_val = masked_weights.eval()
         self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
  def _testVariableAxisSizePartitioner(self,
                                       name,
                                       axis,
                                       max_shard_bytes,
                                       expected_axis_shards,
                                       expected_partitions,
                                       max_shards=None):
    partitioner = partitioned_variables.variable_axis_size_partitioner(
        axis=axis, max_shard_bytes=max_shard_bytes, max_shards=max_shards)

    with variable_scope.variable_scope("root", partitioner=partitioner):
      v0 = variable_scope.get_variable(
          name, dtype=dtypes.float32, shape=(4, 8, 16, 32))
      v0_list = v0._get_variable_list()
      v0_part = v0._get_partitions()
      self.assertEqual(len(v0_list), expected_axis_shards)
      self.assertAllEqual(v0_part, expected_partitions)
예제 #12
0
  def _testVariableAxisSizePartitioner(self,
                                       name,
                                       axis,
                                       max_shard_bytes,
                                       expected_axis_shards,
                                       expected_partitions,
                                       max_shards=None):
    partitioner = partitioned_variables.variable_axis_size_partitioner(
        axis=axis, max_shard_bytes=max_shard_bytes, max_shards=max_shards)

    with variable_scope.variable_scope("root", partitioner=partitioner):
      v0 = variable_scope.get_variable(
          name, dtype=dtypes.float32, shape=(4, 8, 16, 32))
      v0_list = v0._get_variable_list()
      v0_part = v0._get_partitions()
      self.assertEqual(len(v0_list), expected_axis_shards)
      self.assertAllEqual(v0_part, expected_partitions)
예제 #13
0
 def testPartitionedVariableMasking(self):
   partitioner = partitioned_variables.variable_axis_size_partitioner(40)
   with self.test_session() as session:
     with variable_scope.variable_scope("", partitioner=partitioner):
       sparsity = variables.Variable(0.5, name="Sparsity")
       weights = variable_scope.get_variable(
           "weights", initializer=math_ops.linspace(1.0, 100.0, 100))
       masked_weights = pruning.apply_mask(
           weights, scope=variable_scope.get_variable_scope())
     p = pruning.Pruning(sparsity=sparsity)
     p._spec.threshold_decay = 0.0
     mask_update_op = p.mask_update_op()
     variables.global_variables_initializer().run()
     masked_weights_val = masked_weights.eval()
     session.run(mask_update_op)
     masked_weights_val = masked_weights.eval()
     self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
  def testReadInWhileLoop(self):
    """Tests the value is current (not cached) when read within a loop."""
    with self.test_session():
      var_x = variable_scope.get_variable(
          "x",
          shape=[2],
          initializer=init_ops.ones_initializer(),
          partitioner=partitioned_variables.variable_axis_size_partitioner(4))

      def Body(i, _):
        # Use a SGD step to update the variable's value.
        loss = math_ops.reduce_sum(var_x)
        optimizer = gradient_descent.GradientDescentOptimizer(1.0)
        minimize = optimizer.minimize(loss * 0.7)
        with ops.control_dependencies([minimize]):
          return (i + 1, var_x.as_tensor())

      cond = lambda i, _: i < 2
      _, x = control_flow_ops.while_loop(
          cond, Body, (0, constant_op.constant([7, 8], dtypes.float32)))
      variables.global_variables_initializer().run()
      self.assertAllClose([-0.4, -0.4], x.eval())
예제 #15
0
  def testReadInWhileLoop(self):
    """Tests the value is current (not cached) when read within a loop."""
    with self.test_session():
      var_x = variable_scope.get_variable(
          "x",
          shape=[2],
          initializer=init_ops.ones_initializer(),
          partitioner=partitioned_variables.variable_axis_size_partitioner(4))

      def Body(i, _):
        # Use a SGD step to update the variable's value.
        loss = math_ops.reduce_sum(var_x)
        optimizer = gradient_descent.GradientDescentOptimizer(1.0)
        minimize = optimizer.minimize(loss * 0.7)
        with ops.control_dependencies([minimize]):
          return (i + 1, var_x.as_tensor())

      cond = lambda i, _: i < 2
      _, x = control_flow_ops.while_loop(
          cond, Body, (0, constant_op.constant([7, 8], dtypes.float32)))
      variables.global_variables_initializer().run()
      self.assertAllClose([-0.4, -0.4], x.eval())
예제 #16
0
  def testConcat(self):
    with self.cached_session() as session:
      var_x = variable_scope.get_variable(
          "x",
          initializer=constant_op.constant([1., 2.]),
          partitioner=partitioned_variables.variable_axis_size_partitioner(4))

      c = constant_op.constant(1.0)
      with ops.control_dependencies([c]):
        ops_before_concat = session.graph.get_operations()
        value = var_x._concat()  # pylint: disable=protected-access
        concat_ops = [
            op for op in session.graph.get_operations()
            if op not in ops_before_concat
        ]

      concat_control_inputs = [
          ci for op in concat_ops for ci in op.control_inputs
      ]
      self.assertTrue(
          c.op in concat_control_inputs,
          "var_x._concat() should get control dependencies from its scope.")
      variables.global_variables_initializer().run()
      self.assertAllClose(value.eval(), var_x.as_tensor().eval())
  def testConcat(self):
    with self.test_session() as session:
      var_x = variable_scope.get_variable(
          "x",
          initializer=constant_op.constant([1., 2.]),
          partitioner=partitioned_variables.variable_axis_size_partitioner(4))

      c = constant_op.constant(1.0)
      with ops.control_dependencies([c]):
        ops_before_concat = session.graph.get_operations()
        value = var_x._concat()  # pylint: disable=protected-access
        concat_ops = [
            op for op in session.graph.get_operations()
            if op not in ops_before_concat
        ]

      concat_control_inputs = [
          ci for op in concat_ops for ci in op.control_inputs
      ]
      self.assertTrue(
          c.op in concat_control_inputs,
          "var_x._concat() should get control dependencies from its scope.")
      variables.global_variables_initializer().run()
      self.assertAllClose(value.eval(), var_x.as_tensor().eval())
    def testVariableAxisSizePartitioner(self):
        with self.test_session():
            # Create a partitioned variable of shape (4, 8, 16, 32) type float32
            # Bytes per slice along the given axes:

            # 8 * 16 * 32 * sizeof(float32) = 16384 / slice on axis 0
            # 4 * 16 * 32 * sizeof(float32) = 8192 / slice on axis 1
            # 4 * 8 * 32 * sizeof(float32) = 4096 / slice on axis 2
            # 4 * 8 * 16 * sizeof(float32) = 2048 / slice on axis 3

            # Now partition it in different ways...

            # No need to slice: bytes_per_slice * dim0 = 65536 < max_shard_bytes
            self._testVariableAxisSizePartitioner("v0",
                                                  axis=0,
                                                  max_shard_bytes=131072,
                                                  expected_axis_shards=1,
                                                  expected_partitions=(1, 1, 1,
                                                                       1))

            # Slice exactly once: bytes_per_slice * dim1 = 65536 = max_shard_bytes
            self._testVariableAxisSizePartitioner("v1",
                                                  axis=1,
                                                  max_shard_bytes=65536,
                                                  expected_axis_shards=1,
                                                  expected_partitions=(1, 1, 1,
                                                                       1))

            # Slice into 2 parts:
            # bytes_per_slice = 4096
            # slices_per_shard = 32768 / 4096 = 8
            # axis_shards = 16 / 8 = 2
            self._testVariableAxisSizePartitioner("v2",
                                                  axis=2,
                                                  max_shard_bytes=32768,
                                                  expected_axis_shards=2,
                                                  expected_partitions=(1, 1, 2,
                                                                       1))

            # This partitioner makes sure we maximize the number of shards along
            # axis 3. Slice it into 32 parts:
            # bytes_per_slice = 2048
            # slices_per_shard = 2048 / 2048 = 1
            # axis_shards = 32 / 1 = 32
            self._testVariableAxisSizePartitioner("v3a",
                                                  axis=3,
                                                  max_shard_bytes=2048,
                                                  expected_axis_shards=32,
                                                  expected_partitions=(1, 1, 1,
                                                                       32))

            # This partitioner makes sure we do not go past the bound of allowable
            # number of shards along axis 3.
            # Slice into 32 parts:
            # bytes_per_slice = 2048
            # slices_per_shard = max(1, 1024 / 2048) = 1
            # axis_shards = 32 / 1 = 32
            # Slice into max of 32 parts because: max_shard_bytes < bytes_per_slice
            self._testVariableAxisSizePartitioner("v3b",
                                                  axis=3,
                                                  max_shard_bytes=1024,
                                                  expected_axis_shards=32,
                                                  expected_partitions=(1, 1, 1,
                                                                       32))

            # Specify max_shards so that it won't affect sharding.
            self._testVariableAxisSizePartitioner("v3c",
                                                  axis=3,
                                                  max_shard_bytes=1024,
                                                  expected_axis_shards=32,
                                                  expected_partitions=(1, 1, 1,
                                                                       32),
                                                  max_shards=33)

            # Specify max_shards so that it will affect sharding.
            self._testVariableAxisSizePartitioner("v3d",
                                                  axis=3,
                                                  max_shard_bytes=1024,
                                                  expected_axis_shards=2,
                                                  expected_partitions=(1, 1, 1,
                                                                       2),
                                                  max_shards=2)

            # Use the partitioner with strings
            partitioner_axis3_str = partitioned_variables.variable_axis_size_partitioner(
                axis=3, max_shard_bytes=32768, bytes_per_string_element=8)

            with variable_scope.variable_scope(
                    "root", partitioner=partitioner_axis3_str):
                v3str = variable_scope.get_variable(
                    "v3str",
                    initializer=np.array([""] * 4 * 8 * 16 * 32).reshape(
                        4, 8, 16, 32),
                    dtype=dtypes.string,
                    shape=(4, 8, 16, 32))
                v3str_list = v3str._get_variable_list()
                v3str_part = v3str._get_partitions()

                # Now the estimated bytes_per_slice = 4*8*16*bytes_per_string_element
                # which is equal to 4096.  Setting a max_shard_bytes of 32768
                # and we should get a split of 4.
                # Slice into 4 parts:
                # bytes_per_slice = 4096
                # slices_per_shard = 32768 / 4096 = 8
                # axis_shards = 32 / 8 = 4
                self.assertEqual(len(v3str_list), 4)
                self.assertAllEqual(v3str_part, (1, 1, 1, 4))
  def testVariableAxisSizePartitioner(self):
    with self.test_session():
      # Create a partitioned variable of shape (4, 8, 16, 32) type float32
      # Bytes per slice along the given axes:

      # 8 * 16 * 32 * sizeof(float32) = 16384 / slice on axis 0
      # 4 * 16 * 32 * sizeof(float32) = 8192 / slice on axis 1
      # 4 * 8 * 32 * sizeof(float32) = 4096 / slice on axis 2
      # 4 * 8 * 16 * sizeof(float32) = 2048 / slice on axis 3

      # Now partition it in different ways...

      # No need to slice: bytes_per_slice * dim0 = 65536 < max_shard_bytes
      self._testVariableAxisSizePartitioner(
          "v0",
          axis=0,
          max_shard_bytes=131072,
          expected_axis_shards=1,
          expected_partitions=(1, 1, 1, 1))

      # Slice exactly once: bytes_per_slice * dim1 = 65536 = max_shard_bytes
      self._testVariableAxisSizePartitioner(
          "v1",
          axis=1,
          max_shard_bytes=65536,
          expected_axis_shards=1,
          expected_partitions=(1, 1, 1, 1))

      # Slice into 2 parts:
      # bytes_per_slice = 4096
      # slices_per_shard = 32768 / 4096 = 8
      # axis_shards = 16 / 8 = 2
      self._testVariableAxisSizePartitioner(
          "v2",
          axis=2,
          max_shard_bytes=32768,
          expected_axis_shards=2,
          expected_partitions=(1, 1, 2, 1))

      # This partitioner makes sure we maximize the number of shards along
      # axis 3. Slice it into 32 parts:
      # bytes_per_slice = 2048
      # slices_per_shard = 2048 / 2048 = 1
      # axis_shards = 32 / 1 = 32
      self._testVariableAxisSizePartitioner(
          "v3a",
          axis=3,
          max_shard_bytes=2048,
          expected_axis_shards=32,
          expected_partitions=(1, 1, 1, 32))

      # This partitioner makes sure we do not go past the bound of allowable
      # number of shards along axis 3.
      # Slice into 32 parts:
      # bytes_per_slice = 2048
      # slices_per_shard = max(1, 1024 / 2048) = 1
      # axis_shards = 32 / 1 = 32
      # Slice into max of 32 parts because: max_shard_bytes < bytes_per_slice
      self._testVariableAxisSizePartitioner(
          "v3b",
          axis=3,
          max_shard_bytes=1024,
          expected_axis_shards=32,
          expected_partitions=(1, 1, 1, 32))

      # Specify max_shards so that it won't affect sharding.
      self._testVariableAxisSizePartitioner(
          "v3c",
          axis=3,
          max_shard_bytes=1024,
          expected_axis_shards=32,
          expected_partitions=(1, 1, 1, 32),
          max_shards=33)

      # Specify max_shards so that it will affect sharding.
      self._testVariableAxisSizePartitioner(
          "v3d",
          axis=3,
          max_shard_bytes=1024,
          expected_axis_shards=2,
          expected_partitions=(1, 1, 1, 2),
          max_shards=2)

      # Use the partitioner with strings
      partitioner_axis3_str = partitioned_variables.variable_axis_size_partitioner(  # pylint: disable=line-too-long
          axis=3,
          max_shard_bytes=32768,
          bytes_per_string_element=8)

      with variable_scope.variable_scope(
          "root", partitioner=partitioner_axis3_str):
        v3str = variable_scope.get_variable(
            "v3str",
            initializer=np.array([""] * 4 * 8 * 16 * 32).reshape(4, 8, 16, 32),
            dtype=dtypes.string,
            shape=(4, 8, 16, 32))
        v3str_list = v3str._get_variable_list()
        v3str_part = v3str._get_partitions()

        # Now the estimated bytes_per_slice = 4*8*16*bytes_per_string_element
        # which is equal to 4096.  Setting a max_shard_bytes of 32768
        # and we should get a split of 4.
        # Slice into 4 parts:
        # bytes_per_slice = 4096
        # slices_per_shard = 32768 / 4096 = 8
        # axis_shards = 32 / 8 = 4
        self.assertEqual(len(v3str_list), 4)
        self.assertAllEqual(v3str_part, (1, 1, 1, 4))
예제 #20
0
 def __call__(self, shape, dtype, axis=0):
     return partitioned_variables.variable_axis_size_partitioner(
         max_shard_bytes=self._max_shard_bytes,
         max_shards=self._max_shards,
         bytes_per_string_element=self._bytes_per_string,
         axis=axis)(shape, dtype)