def benchmark_create_1000_partitions_with_100_parameter_servers(self): workers, _ = test.create_local_cluster(num_workers=1, num_ps=100) worker_sessions = [session_lib.Session(w.target) for w in workers] worker = worker_sessions[0] partition_sizes = (1, 512, 1024 * 32, 1024 * 128) partitioned = [] for partition_size in partition_sizes: # max_shard_bytes is 4, shape is 1000*partition_size float32s which should # partition into 1000 shards, each containing partition_size float32s. print("Building partitioned variable with %d floats per partition" % partition_size) with ops.device(device_setter.replica_device_setter(ps_tasks=100)): partitioned_ix = variable_scope.get_variable( "partitioned_%d" % partition_size, shape=[1000 * partition_size], dtype=dtypes.float32, # Each partition to have exactly N float32s partitioner=partitioned_variables.variable_axis_size_partitioner( max_shard_bytes=4 * partition_size)) # Concatenates along axis 0 partitioned.append(ops.convert_to_tensor(partitioned_ix)) variables.global_variables_initializer().run(session=worker) for ix, partition_size in enumerate(partition_sizes): print("Running benchmark having partitions with %d floats" % partition_size) self.run_op_benchmark( worker, partitioned[ix], name=("read_concat_1000_partitions_from_" "100_parameter_servers_partsize_%d_floats" % partition_size))
def testVariableCreationInALoop(self): """Tests the variable created inside a loop can be used outside the loop.""" with self.test_session(): with variable_scope.variable_scope("ascope") as scope: def Body(i, _): var_x = variable_scope.get_variable( "x", shape=[2], initializer=init_ops.ones_initializer(), partitioner=partitioned_variables.variable_axis_size_partitioner( 4)) return (i + 1, var_x.as_tensor()) cond = lambda i, _: i < 2 _, x = control_flow_ops.while_loop( cond, Body, (0, constant_op.constant([7, 8], dtypes.float32))) variables.global_variables_initializer().run() self.assertAllClose([1.0, 1.0], x.eval()) scope.reuse_variables() var_x = variable_scope.get_variable( "x", shape=[2], initializer=init_ops.ones_initializer(), partitioner=partitioned_variables.variable_axis_size_partitioner(4)) self.assertAllClose([1.0, 1.0], var_x.as_tensor().eval())
def testControlDepsNone(self): with self.cached_session() as session: c = constant_op.constant(1.0) with ops.control_dependencies([c]): # d get the control dependency. d = constant_op.constant(2.0) # Partitioned variables do not. var_x = variable_scope.get_variable( "x", shape=[2], initializer=init_ops.ones_initializer(), partitioner=partitioned_variables.variable_axis_size_partitioner(4)) ops_before_read = session.graph.get_operations() var_x.as_tensor() # Caches the ops for subsequent reads. reading_ops = [ op for op in session.graph.get_operations() if op not in ops_before_read ] self.assertEqual([c.op], d.op.control_inputs) # Tests that no control dependencies are added to reading a partitioned # variable which is similar to reading a variable. for op in reading_ops: self.assertEqual([], op.control_inputs)
def testControlDepsNone(self): with self.test_session() as session: c = constant_op.constant(1.0) with ops.control_dependencies([c]): # d get the control dependency. d = constant_op.constant(2.0) # Partitioned variables do not. var_x = variable_scope.get_variable( "x", shape=[2], initializer=init_ops.ones_initializer(), partitioner=partitioned_variables.variable_axis_size_partitioner(4)) ops_before_read = session.graph.get_operations() var_x.as_tensor() # Caches the ops for subsequent reads. reading_ops = [ op for op in session.graph.get_operations() if op not in ops_before_read ] self.assertEqual([c.op], d.op.control_inputs) # Tests that no control dependencies are added to reading a partitioned # variable which is similar to reading a variable. for op in reading_ops: self.assertEqual([], op.control_inputs)
def Body(i, _): var_x = variable_scope.get_variable( "x", shape=[2], initializer=init_ops.ones_initializer(), partitioner=partitioned_variables.variable_axis_size_partitioner( 4)) return (i + 1, var_x.as_tensor())
def testPartitions(self): shape = (10, 10) init = init_ops.identity_initializer() partitioner = partitioned_variables.variable_axis_size_partitioner(1) with self.test_session(graph=ops.Graph(), use_gpu=True): with variable_scope.variable_scope( "foo", partitioner=partitioner, initializer=init): v = array_ops.identity(variable_scope.get_variable("bar", shape=shape)) variables.global_variables_initializer().run() self.assertAllClose(v.eval(), np.eye(*shape))
def testPartitionedVariableMasking(self): partitioner = partitioned_variables.variable_axis_size_partitioner(40) with self.test_session() as session: with variable_scope.variable_scope("", partitioner=partitioner): sparsity = variables.Variable(0.5, name="Sparsity") weights = variable_scope.get_variable( "weights", initializer=math_ops.linspace(1.0, 100.0, 100)) masked_weights = pruning.apply_mask( weights, scope=variable_scope.get_variable_scope()) p = pruning.Pruning(sparsity=sparsity, partitioner=partitioner) p._spec.threshold_decay = 0.0 mask_update_op = p.mask_update_op() variables.global_variables_initializer().run() masked_weights_val = masked_weights.eval() session.run(mask_update_op) masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
def _testVariableAxisSizePartitioner(self, name, axis, max_shard_bytes, expected_axis_shards, expected_partitions, max_shards=None): partitioner = partitioned_variables.variable_axis_size_partitioner( axis=axis, max_shard_bytes=max_shard_bytes, max_shards=max_shards) with variable_scope.variable_scope("root", partitioner=partitioner): v0 = variable_scope.get_variable( name, dtype=dtypes.float32, shape=(4, 8, 16, 32)) v0_list = v0._get_variable_list() v0_part = v0._get_partitions() self.assertEqual(len(v0_list), expected_axis_shards) self.assertAllEqual(v0_part, expected_partitions)
def testPartitionedVariableMasking(self): partitioner = partitioned_variables.variable_axis_size_partitioner(40) with self.test_session() as session: with variable_scope.variable_scope("", partitioner=partitioner): sparsity = variables.Variable(0.5, name="Sparsity") weights = variable_scope.get_variable( "weights", initializer=math_ops.linspace(1.0, 100.0, 100)) masked_weights = pruning.apply_mask( weights, scope=variable_scope.get_variable_scope()) p = pruning.Pruning(sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.mask_update_op() variables.global_variables_initializer().run() masked_weights_val = masked_weights.eval() session.run(mask_update_op) masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
def testReadInWhileLoop(self): """Tests the value is current (not cached) when read within a loop.""" with self.test_session(): var_x = variable_scope.get_variable( "x", shape=[2], initializer=init_ops.ones_initializer(), partitioner=partitioned_variables.variable_axis_size_partitioner(4)) def Body(i, _): # Use a SGD step to update the variable's value. loss = math_ops.reduce_sum(var_x) optimizer = gradient_descent.GradientDescentOptimizer(1.0) minimize = optimizer.minimize(loss * 0.7) with ops.control_dependencies([minimize]): return (i + 1, var_x.as_tensor()) cond = lambda i, _: i < 2 _, x = control_flow_ops.while_loop( cond, Body, (0, constant_op.constant([7, 8], dtypes.float32))) variables.global_variables_initializer().run() self.assertAllClose([-0.4, -0.4], x.eval())
def testConcat(self): with self.cached_session() as session: var_x = variable_scope.get_variable( "x", initializer=constant_op.constant([1., 2.]), partitioner=partitioned_variables.variable_axis_size_partitioner(4)) c = constant_op.constant(1.0) with ops.control_dependencies([c]): ops_before_concat = session.graph.get_operations() value = var_x._concat() # pylint: disable=protected-access concat_ops = [ op for op in session.graph.get_operations() if op not in ops_before_concat ] concat_control_inputs = [ ci for op in concat_ops for ci in op.control_inputs ] self.assertTrue( c.op in concat_control_inputs, "var_x._concat() should get control dependencies from its scope.") variables.global_variables_initializer().run() self.assertAllClose(value.eval(), var_x.as_tensor().eval())
def testConcat(self): with self.test_session() as session: var_x = variable_scope.get_variable( "x", initializer=constant_op.constant([1., 2.]), partitioner=partitioned_variables.variable_axis_size_partitioner(4)) c = constant_op.constant(1.0) with ops.control_dependencies([c]): ops_before_concat = session.graph.get_operations() value = var_x._concat() # pylint: disable=protected-access concat_ops = [ op for op in session.graph.get_operations() if op not in ops_before_concat ] concat_control_inputs = [ ci for op in concat_ops for ci in op.control_inputs ] self.assertTrue( c.op in concat_control_inputs, "var_x._concat() should get control dependencies from its scope.") variables.global_variables_initializer().run() self.assertAllClose(value.eval(), var_x.as_tensor().eval())
def testVariableAxisSizePartitioner(self): with self.test_session(): # Create a partitioned variable of shape (4, 8, 16, 32) type float32 # Bytes per slice along the given axes: # 8 * 16 * 32 * sizeof(float32) = 16384 / slice on axis 0 # 4 * 16 * 32 * sizeof(float32) = 8192 / slice on axis 1 # 4 * 8 * 32 * sizeof(float32) = 4096 / slice on axis 2 # 4 * 8 * 16 * sizeof(float32) = 2048 / slice on axis 3 # Now partition it in different ways... # No need to slice: bytes_per_slice * dim0 = 65536 < max_shard_bytes self._testVariableAxisSizePartitioner("v0", axis=0, max_shard_bytes=131072, expected_axis_shards=1, expected_partitions=(1, 1, 1, 1)) # Slice exactly once: bytes_per_slice * dim1 = 65536 = max_shard_bytes self._testVariableAxisSizePartitioner("v1", axis=1, max_shard_bytes=65536, expected_axis_shards=1, expected_partitions=(1, 1, 1, 1)) # Slice into 2 parts: # bytes_per_slice = 4096 # slices_per_shard = 32768 / 4096 = 8 # axis_shards = 16 / 8 = 2 self._testVariableAxisSizePartitioner("v2", axis=2, max_shard_bytes=32768, expected_axis_shards=2, expected_partitions=(1, 1, 2, 1)) # This partitioner makes sure we maximize the number of shards along # axis 3. Slice it into 32 parts: # bytes_per_slice = 2048 # slices_per_shard = 2048 / 2048 = 1 # axis_shards = 32 / 1 = 32 self._testVariableAxisSizePartitioner("v3a", axis=3, max_shard_bytes=2048, expected_axis_shards=32, expected_partitions=(1, 1, 1, 32)) # This partitioner makes sure we do not go past the bound of allowable # number of shards along axis 3. # Slice into 32 parts: # bytes_per_slice = 2048 # slices_per_shard = max(1, 1024 / 2048) = 1 # axis_shards = 32 / 1 = 32 # Slice into max of 32 parts because: max_shard_bytes < bytes_per_slice self._testVariableAxisSizePartitioner("v3b", axis=3, max_shard_bytes=1024, expected_axis_shards=32, expected_partitions=(1, 1, 1, 32)) # Specify max_shards so that it won't affect sharding. self._testVariableAxisSizePartitioner("v3c", axis=3, max_shard_bytes=1024, expected_axis_shards=32, expected_partitions=(1, 1, 1, 32), max_shards=33) # Specify max_shards so that it will affect sharding. self._testVariableAxisSizePartitioner("v3d", axis=3, max_shard_bytes=1024, expected_axis_shards=2, expected_partitions=(1, 1, 1, 2), max_shards=2) # Use the partitioner with strings partitioner_axis3_str = partitioned_variables.variable_axis_size_partitioner( axis=3, max_shard_bytes=32768, bytes_per_string_element=8) with variable_scope.variable_scope( "root", partitioner=partitioner_axis3_str): v3str = variable_scope.get_variable( "v3str", initializer=np.array([""] * 4 * 8 * 16 * 32).reshape( 4, 8, 16, 32), dtype=dtypes.string, shape=(4, 8, 16, 32)) v3str_list = v3str._get_variable_list() v3str_part = v3str._get_partitions() # Now the estimated bytes_per_slice = 4*8*16*bytes_per_string_element # which is equal to 4096. Setting a max_shard_bytes of 32768 # and we should get a split of 4. # Slice into 4 parts: # bytes_per_slice = 4096 # slices_per_shard = 32768 / 4096 = 8 # axis_shards = 32 / 8 = 4 self.assertEqual(len(v3str_list), 4) self.assertAllEqual(v3str_part, (1, 1, 1, 4))
def testVariableAxisSizePartitioner(self): with self.test_session(): # Create a partitioned variable of shape (4, 8, 16, 32) type float32 # Bytes per slice along the given axes: # 8 * 16 * 32 * sizeof(float32) = 16384 / slice on axis 0 # 4 * 16 * 32 * sizeof(float32) = 8192 / slice on axis 1 # 4 * 8 * 32 * sizeof(float32) = 4096 / slice on axis 2 # 4 * 8 * 16 * sizeof(float32) = 2048 / slice on axis 3 # Now partition it in different ways... # No need to slice: bytes_per_slice * dim0 = 65536 < max_shard_bytes self._testVariableAxisSizePartitioner( "v0", axis=0, max_shard_bytes=131072, expected_axis_shards=1, expected_partitions=(1, 1, 1, 1)) # Slice exactly once: bytes_per_slice * dim1 = 65536 = max_shard_bytes self._testVariableAxisSizePartitioner( "v1", axis=1, max_shard_bytes=65536, expected_axis_shards=1, expected_partitions=(1, 1, 1, 1)) # Slice into 2 parts: # bytes_per_slice = 4096 # slices_per_shard = 32768 / 4096 = 8 # axis_shards = 16 / 8 = 2 self._testVariableAxisSizePartitioner( "v2", axis=2, max_shard_bytes=32768, expected_axis_shards=2, expected_partitions=(1, 1, 2, 1)) # This partitioner makes sure we maximize the number of shards along # axis 3. Slice it into 32 parts: # bytes_per_slice = 2048 # slices_per_shard = 2048 / 2048 = 1 # axis_shards = 32 / 1 = 32 self._testVariableAxisSizePartitioner( "v3a", axis=3, max_shard_bytes=2048, expected_axis_shards=32, expected_partitions=(1, 1, 1, 32)) # This partitioner makes sure we do not go past the bound of allowable # number of shards along axis 3. # Slice into 32 parts: # bytes_per_slice = 2048 # slices_per_shard = max(1, 1024 / 2048) = 1 # axis_shards = 32 / 1 = 32 # Slice into max of 32 parts because: max_shard_bytes < bytes_per_slice self._testVariableAxisSizePartitioner( "v3b", axis=3, max_shard_bytes=1024, expected_axis_shards=32, expected_partitions=(1, 1, 1, 32)) # Specify max_shards so that it won't affect sharding. self._testVariableAxisSizePartitioner( "v3c", axis=3, max_shard_bytes=1024, expected_axis_shards=32, expected_partitions=(1, 1, 1, 32), max_shards=33) # Specify max_shards so that it will affect sharding. self._testVariableAxisSizePartitioner( "v3d", axis=3, max_shard_bytes=1024, expected_axis_shards=2, expected_partitions=(1, 1, 1, 2), max_shards=2) # Use the partitioner with strings partitioner_axis3_str = partitioned_variables.variable_axis_size_partitioner( # pylint: disable=line-too-long axis=3, max_shard_bytes=32768, bytes_per_string_element=8) with variable_scope.variable_scope( "root", partitioner=partitioner_axis3_str): v3str = variable_scope.get_variable( "v3str", initializer=np.array([""] * 4 * 8 * 16 * 32).reshape(4, 8, 16, 32), dtype=dtypes.string, shape=(4, 8, 16, 32)) v3str_list = v3str._get_variable_list() v3str_part = v3str._get_partitions() # Now the estimated bytes_per_slice = 4*8*16*bytes_per_string_element # which is equal to 4096. Setting a max_shard_bytes of 32768 # and we should get a split of 4. # Slice into 4 parts: # bytes_per_slice = 4096 # slices_per_shard = 32768 / 4096 = 8 # axis_shards = 32 / 8 = 4 self.assertEqual(len(v3str_list), 4) self.assertAllEqual(v3str_part, (1, 1, 1, 4))
def __call__(self, shape, dtype, axis=0): return partitioned_variables.variable_axis_size_partitioner( max_shard_bytes=self._max_shard_bytes, max_shards=self._max_shards, bytes_per_string_element=self._bytes_per_string, axis=axis)(shape, dtype)