def test_stack_outside_push(self): s = data_flow_ops.stack_v2(max_size=4, elem_type=dtypes.int32) def loop_fn(_): return data_flow_ops.stack_push_v2(s, 7) with self.assertRaisesRegexp(ValueError, "StackPushV2 not allowed.*"): pfor_control_flow_ops.pfor(loop_fn, iters=2)
def test_parallel_iterations_zero(self): with self.assertRaisesRegexp(ValueError, "positive integer"): pfor_control_flow_ops.pfor(lambda i: 1, 8, parallel_iterations=0) with self.assertRaisesRegexp(TypeError, "positive integer"): pfor_control_flow_ops.for_loop(lambda i: 1, dtypes.int32, 8, parallel_iterations=0)
def test_tile_loop_dependent(self): x = random_ops.random_uniform([3, 2, 3]) def loop_fn(i): x1 = array_ops.gather(x, i) return array_ops.tile(x1, [i, 1]) with self.assertRaisesRegexp(ValueError, "expected to be loop invariant"): pfor_control_flow_ops.pfor(loop_fn, 2)
def test_parallel_iterations(self): x = random_ops.random_uniform([8, 3]) def loop_fn(i, pfor_config): x_i = array_ops.gather(x, i) return pfor_config.reduce_sum(x_i) with self.assertRaisesRegexp( ValueError, "parallel_iterations currently unsupported"): pfor_control_flow_ops.pfor(loop_fn, 8, parallel_iterations=2)
def test_tensor_array_grad(self): inp = constant_op.constant(np.random.rand(3, 4, 2), dtype=dtypes.float32) ta = tensor_array_ops.TensorArray(dtypes.float32, size=3) ta = ta.unstack(inp) def loop_fn(i): def body(j, x): value = ta.gather([j]) value = array_ops.gather(array_ops.reshape(value, [4, 2]), i) return j + 1, x + value _, out = control_flow_ops.while_loop(lambda j, _: j < 3, body, (0, array_ops.zeros([2]))) out = math_ops.reduce_prod(out) return out, gradient_ops.gradients(out, inp)[0] pfor_out, pfor_out_grad = pfor_control_flow_ops.pfor(loop_fn, 4) # Note that tf.while_loop does not work in the setup above. So we manually # construct the equivalent computation of the above loops here. real_out = math_ops.reduce_sum(inp, axis=[0]) real_out = math_ops.reduce_prod(real_out, axis=[1]) # Note that gradients of real_out will accumulate the gradients across the # output value. Hence we do the same aggregation on pfor_out_grad. real_out_grad = gradient_ops.gradients(real_out, inp)[0] sum_pfor_out_grad = math_ops.reduce_sum(pfor_out_grad, axis=[0]) with session.Session() as sess: v1, v2, v1_grad, v2_grad = sess.run( [pfor_out, real_out, sum_pfor_out_grad, real_out_grad]) self.assertAllClose(v1, v2) self.assertAllClose(v1_grad, v2_grad)
def create_lstm_per_eg_grad(batch_size, state_size, steps): inputs = [ random_ops.random_normal([batch_size, state_size]) for _ in range(steps) ] cell = rnn_cell.BasicLSTMCell(state_size) init_state = cell.zero_state(batch_size, dtypes.float32) def model_fn(inps, init_state): state = init_state for inp in inps: _, state = cell(inp, state) output = nn.l2_loss(state.c) return gradient_ops.gradients(output, variables.trainable_variables()) def loop_fn(i): loop_inputs = [ array_ops.expand_dims(array_ops.gather(x, i), 0) for x in inputs ] loop_init_state = rnn_cell.LSTMStateTuple( *[array_ops.expand_dims(array_ops.gather(x, i), 0) for x in init_state]) return model_fn(loop_inputs, loop_init_state) pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size) loop_fn_dtypes = [x.dtype for x in variables.trainable_variables()] while_outputs = control_flow_ops.for_loop(loop_fn, loop_fn_dtypes, batch_size) return pfor_outputs, while_outputs
def test_external_while_loop_grad(self): # Here we test that external while_loops that are extended from inside pfor # (due to gradient calls) are not actually converted. If the below was # converted all pfor iterations would write to the same tensor array # indices. x = constant_op.constant(1.) def body(j, ta): ta = ta.write(j, x) return j + 1, ta _, ta = control_flow_ops.while_loop( lambda j, _: j < 4, body, (0, tensor_array_ops.TensorArray(dtypes.float32, size=4))) out = ta.stack() def loop_fn(i): out_i = array_ops.gather(out, i) return gradient_ops.gradients(out_i, x)[0] with session.Session() as sess: # out is [x, x, x]. Hence the gradients should be [1, 1, 1]. self.assertAllEqual([1, 1, 1], sess.run(pfor_control_flow_ops.pfor( loop_fn, 3)))
def test_create_outside_and_write_and_scatter(self): t = tensor_array_ops.TensorArray(dtypes.int32, 10, clear_after_read=False) handle = t.handle def loop_fn(i): ta = t.write(i + 2, 2 * i).write(i, 5) ta = ta.scatter([4 + i], [4]).scatter([6 + i, 8 + i], [6 + i, 8 + i]) return ta.flow t1 = pfor_control_flow_ops.pfor(loop_fn, iters=2) out1 = tensor_array_ops.TensorArray(dtypes.int32, handle=handle, flow=t1[-1]).stack() output1 = self._run_targets(out1) t2 = pfor_control_flow_ops.for_loop(loop_fn, dtypes.float32, iters=2) out2 = tensor_array_ops.TensorArray(dtypes.int32, handle=handle, flow=t2[-1]).stack() output2 = self._run_targets(out2) self.assertAllClose(output2, output1)
def test_assert(self): def loop_fn(i): return control_flow_ops.Assert(i < 10, [i, [10], [i + 1]]) # TODO(agarwal): make this work with for_loop. with session.Session() as sess: sess.run(pfor_control_flow_ops.pfor(loop_fn, 3))
def test_parse_single_example(self): def _int64_feature(*values): return feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=values)) def _bytes_feature(*values): return feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=[v.encode("utf-8") for v in values])) examples = constant_op.constant([ example_pb2.Example(features=feature_pb2.Features( feature={ "dense_int": _int64_feature(i), "dense_str": _bytes_feature(str(i)), "sparse_int": _int64_feature(i, i * 2, i * 4, i * 8), "sparse_str": _bytes_feature(*["abc"] * i) })).SerializeToString() for i in range(10) ]) features = { "dense_int": parsing_ops.FixedLenFeature((), dtypes.int64, 0), "dense_str": parsing_ops.FixedLenFeature((), dtypes.string, ""), "sparse_int": parsing_ops.VarLenFeature(dtypes.int64), "sparse_str": parsing_ops.VarLenFeature(dtypes.string), } def loop_fn(i): example_proto = array_ops.gather(examples, i) f = parsing_ops.parse_single_example(example_proto, features) return f pfor = pfor_control_flow_ops.pfor(loop_fn, iters=10) manual = parsing_ops.parse_example(examples, features) self.run_and_assert_equal(pfor, manual)
def f(): def loop_fn(i, pfor_config): x_i = array_ops.gather(x, i) return x_i - pfor_config.reduce_mean(x_i) return pfor_control_flow_ops.pfor(loop_fn, 8)
def create_lstm_per_eg_grad(batch_size, state_size, steps, inputs_size=None): inputs_size = inputs_size or state_size inputs = [ random_ops.random_normal([batch_size, inputs_size]) for _ in range(steps) ] cell = rnn_cell.BasicLSTMCell(state_size) init_state = cell.zero_state(batch_size, dtypes.float32) def model_fn(inps, init_state): state = init_state for inp in inps: _, state = cell(inp, state) output = nn.l2_loss(state.c) return gradient_ops.gradients(output, variables.trainable_variables()) def loop_fn(i): loop_inputs = [ array_ops.expand_dims(array_ops.gather(x, i), 0) for x in inputs ] loop_init_state = rnn_cell.LSTMStateTuple(*[ array_ops.expand_dims(array_ops.gather(x, i), 0) for x in init_state ]) return model_fn(loop_inputs, loop_init_state) pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size) loop_fn_dtypes = [x.dtype for x in variables.trainable_variables()] while_outputs = control_flow_ops.for_loop(loop_fn, loop_fn_dtypes, batch_size) return pfor_outputs, while_outputs
def test_while_jacobian(self): x = random_ops.random_uniform([1, 3]) y = random_ops.random_uniform([3, 3]) # out = x @ y @ y @ y @ y, where @ is matmul operator. _, out = control_flow_ops.while_loop( lambda i, _: i < 4, lambda i, out: (i + 1, math_ops.matmul(out, y)), [0, x]) def loop_fn(i): out_i = array_ops.gather(out, i, axis=1) return array_ops.reshape(gradient_ops.gradients(out_i, x)[0], [-1]) out = pfor_control_flow_ops.pfor(loop_fn, iters=3) # The above code does not work with tf.while_loop instead of pfor. So we # manually compute the expected output here. # Note that gradient of output w.r.t is (y @ y @ y @ y)^T. expected_output = y for _ in range(3): expected_output = math_ops.matmul(expected_output, y) expected_output = array_ops.transpose(expected_output, [1, 0]) with session.Session() as sess: out, expected = sess.run([out, expected_output]) self.assertAllClose(expected, out)
def batch_jacobian(output, inp, use_pfor=True, parallel_iterations=None): """Computes and stacks jacobians of `output[i,...]` w.r.t. `input[i,...]`. e.g. x = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) y = x * x jacobian = batch_jacobian(y, x) # => [[[2, 0], [0, 4]], [[6, 0], [0, 8]]] Args: output: A tensor with shape [b, y1, ..., y_n]. `output[i,...]` should only depend on `inp[i,...]`. inp: A tensor with shape [b, x1, ..., x_m] use_pfor: If true, uses pfor for computing the Jacobian. Else uses a tf.while_loop. parallel_iterations: A knob to control how many iterations and dispatched in parallel. This knob can be used to control the total memory usage. Returns: A tensor `t` with shape [b, y_1, ..., y_n, x1, ..., x_m] where `t[i, ...]` is the jacobian of `output[i, ...]` w.r.t. `inp[i, ...]`, i.e. stacked per-example jacobians. Raises: ValueError: if first dimension of `output` and `inp` do not match. """ output_shape = output.shape if not output_shape[0].is_compatible_with(inp.shape[0]): raise ValueError( "Need first dimension of output shape (%s) and inp shape " "(%s) to match." % (output.shape, inp.shape)) if output_shape.is_fully_defined(): batch_size = int(output_shape[0]) output_row_size = output_shape.num_elements() // batch_size else: output_shape = array_ops.shape(output) batch_size = output_shape[0] output_row_size = array_ops.size(output) // batch_size inp_shape = array_ops.shape(inp) # Flatten output to 2-D. with ops.control_dependencies( [check_ops.assert_equal(batch_size, inp_shape[0])]): output = array_ops.reshape(output, [batch_size, output_row_size]) def loop_fn(i): y = array_ops.gather(output, i, axis=1) return gradient_ops.gradients(y, inp, unconnected_gradients='zero')[0] if use_pfor: pfor_output = control_flow_ops.pfor( loop_fn, output_row_size, parallel_iterations=parallel_iterations) else: pfor_output = control_flow_ops.for_loop( loop_fn, output.dtype, output_row_size, parallel_iterations=parallel_iterations) if pfor_output is None: return None pfor_output = array_ops.reshape(pfor_output, [output_row_size, batch_size, -1]) output = array_ops.transpose(pfor_output, [1, 0, 2]) new_shape = array_ops.concat([output_shape, inp_shape[1:]], axis=0) return array_ops.reshape(output, new_shape)
def batch_jacobian(output, inp, use_pfor=True): """Computes and stacks jacobians of `output[i,...]` w.r.t. `input[i,...]`. e.g. x = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) y = x * x jacobian = batch_jacobian(y, x) # => [[[2, 0], [0, 4]], [[6, 0], [0, 8]]] Args: output: A tensor with shape [b, y1, ..., y_n]. `output[i,...]` should only depend on `inp[i,...]`. inp: A tensor with shape [b, x1, ..., x_m] use_pfor: If true, uses pfor for computing the Jacobian. Else uses a tf.while_loop. Returns: A tensor `t` with shape [b, y_1, ..., y_n, x1, ..., x_m] where `t[i, ...]` is the jacobian of `output[i, ...]` w.r.t. `inp[i, ...]`, i.e. stacked per-example jacobians. Raises: ValueError: if first dimension of `output` and `inp` do not match. """ output_shape = output.shape if not output_shape[0].is_compatible_with(inp.shape[0]): raise ValueError("Need first dimension of output shape (%s) and inp shape " "(%s) to match." % (output.shape, inp.shape)) if output_shape.is_fully_defined(): batch_size = int(output_shape[0]) output_row_size = output_shape.num_elements() // batch_size else: output_shape = array_ops.shape(output) batch_size = output_shape[0] output_row_size = array_ops.size(output) // batch_size inp_shape = array_ops.shape(inp) # Flatten output to 2-D. with ops.control_dependencies( [check_ops.assert_equal(batch_size, inp_shape[0])]): output = array_ops.reshape(output, [batch_size, output_row_size]) def loop_fn(i): y = array_ops.gather(output, i, axis=1) return gradient_ops.gradients(y, inp)[0] if use_pfor: pfor_output = control_flow_ops.pfor(loop_fn, output_row_size) else: pfor_output = control_flow_ops.for_loop(loop_fn, output.dtype, output_row_size) if pfor_output is None: return None pfor_output = array_ops.reshape(pfor_output, [output_row_size, batch_size, -1]) output = array_ops.transpose(pfor_output, [1, 0, 2]) new_shape = array_ops.concat([output_shape, inp_shape[1:]], axis=0) return array_ops.reshape(output, new_shape)
def _test_loop_fn(self, loop_fn, iters, parallel_iterations=None): t1 = pfor_control_flow_ops.pfor( loop_fn, iters=iters, parallel_iterations=parallel_iterations) loop_fn_dtypes = nest.map_structure(lambda x: x.dtype, t1) t2 = pfor_control_flow_ops.for_loop( loop_fn, loop_fn_dtypes, iters=iters, parallel_iterations=parallel_iterations) self.run_and_assert_equal(t1, t2)
def test_var_loop_len(self): num_iters = array_ops.placeholder(dtypes.int32) def loop_fn(_): return sparse_tensor.SparseTensor([[0], [1], [2]], [4, 5, 6], [3]) # [0, 2, 0] pfor = pfor_control_flow_ops.pfor(loop_fn, num_iters) with self.cached_session() as sess: sess.run(pfor, feed_dict={num_iters: 3})
def test_reduce_sum(self): x = random_ops.random_uniform([8, 3]) def loop_fn(i, pfor_config): x_i = array_ops.gather(x, i) return x_i - pfor_config.reduce_sum(x_i) output = pfor_control_flow_ops.pfor(loop_fn, 8) ans = x - math_ops.reduce_sum(x, axis=0) output_val, ans_val = self.evaluate([output, ans]) self.assertAllClose(ans_val, output_val)
def test_sparse_result_shapes_stacked(self): num_iters = 10 def loop_fn(i): i = array_ops.expand_dims(math_ops.cast(i, dtypes.int64), 0) return sparse_tensor.SparseTensor([[0]], [1], i + 1) # [1, 0, ..., 0] # Expected result: [[1, 0, 0, ...], [1, 0, 0, ...], ...] pfor = pfor_control_flow_ops.pfor(loop_fn, num_iters) manual = sparse_tensor.SparseTensor([[i, 0] for i in range(num_iters)], [1] * num_iters, (num_iters, num_iters)) self.run_and_assert_equal(pfor, manual)
def jacobian(output, inputs, use_pfor=True, parallel_iterations=None): """Computes jacobian of `output` w.r.t. `inputs`. Args: output: A tensor. inputs: A tensor or a nested structure of tensor objects. use_pfor: If true, uses pfor for computing the jacobian. Else uses tf.while_loop. parallel_iterations: A knob to control how many iterations and dispatched in parallel. This knob can be used to control the total memory usage. Returns: A tensor or a nested structure of tensors with the same structure as `inputs`. Each entry is the jacobian of `output` w.r.t. to the corresponding value in `inputs`. If output has shape [y_1, ..., y_n] and inputs_i has shape [x_1, ..., x_m], the corresponding jacobian has shape [y_1, ..., y_n, x_1, ..., x_m]. Note that in cases where the gradient is sparse (IndexedSlices), jacobian function currently makes it dense and returns a Tensor instead. This may change in the future. """ flat_inputs = nest.flatten(inputs) output_tensor_shape = output.shape output_shape = array_ops.shape(output) output = array_ops.reshape(output, [-1]) def loop_fn(i): y = array_ops.gather(output, i) return gradient_ops.gradients(y, flat_inputs) try: output_size = int(output.shape[0]) except TypeError: output_size = array_ops.shape(output)[0] if use_pfor: pfor_outputs = control_flow_ops.pfor( loop_fn, output_size, parallel_iterations=parallel_iterations) else: pfor_outputs = control_flow_ops.for_loop( loop_fn, [output.dtype] * len(flat_inputs), output_size, parallel_iterations=parallel_iterations) for i, out in enumerate(pfor_outputs): if isinstance(out, ops.Tensor): new_shape = array_ops.concat( [output_shape, array_ops.shape(out)[1:]], axis=0) out = array_ops.reshape(out, new_shape) out.set_shape(output_tensor_shape.concatenate( flat_inputs[i].shape)) pfor_outputs[i] = out return nest.pack_sequence_as(inputs, pfor_outputs)
def diag_jacobian_pfor(xs, ys=None, fn=None, sample_shape=None, use_pfor=True, name=None): with tf.name_scope(name, 'jacobians_diag', [xs, ys]): if sample_shape is None: sample_shape = [1] # Output Jacobian diagonal jacobians_diag_res = [] # Convert input `xs` to a list xs = list(xs) if _is_list_like(xs) else [xs] xs = [tf.convert_to_tensor(x) for x in xs] if ys is None: if fn is None: raise ValueError('Both `ys` and `fn` can not be `None`') else: ys = fn(*xs) # Convert ys to a list ys = list(ys) if _is_list_like(ys) else [ys] if len(xs) != len(ys): raise ValueError('`xs` and `ys` should have the same length') for y, x in zip(ys, xs): shape_x = tf.shape(x) # Broadcast `y` to the shape of `x`. y_ = y + tf.zeros_like(x) # Change `event_shape` to one-dimension flat_y = tf.reshape(y_, shape=tf.concat([[-1], sample_shape], -1)) n = tf.size(x) / tf.to_int32(tf.reduce_prod(sample_shape)) n = tf.to_int32(n) def grad_fn(i): res = tf.gradients(tf.gather(flat_y, i), x)[0] if res is None: res = tf.zeros(shape_x, dtype=x.dtype) # pylint: disable=cell-var-from-loop flat_res = tf.reshape(res, tf.concat([[-1], sample_shape], -1)) return tf.gather(flat_res, i) if use_pfor: jacobian_diag_res = control_flow_ops.pfor(grad_fn, n) else: jacobian_diag_res = control_flow_ops.for_loop(grad_fn, [y.dtype], n) reshaped_jacobian_diag = tf.reshape(jacobian_diag_res, shape_x) jacobians_diag_res.append(reshaped_jacobian_diag) return jacobians_diag_res
def create_dynamic_lstm(cell_fn, batch_size, state_size, max_steps): cell = cell_fn(state_size) inputs, sequence_length = dynamic_lstm_input_fn(batch_size, state_size, max_steps) inputs_ta = tensor_array_ops.TensorArray( dtypes.float32, size=max_steps, element_shape=[batch_size, state_size]) inputs_time_major = array_ops.transpose(inputs, [1, 0, 2]) inputs_ta = inputs_ta.unstack(inputs_time_major) zeros = array_ops.zeros([state_size]) def loop_fn(i): sequence_length_i = array_ops.gather(sequence_length, i) def body_fn(t, state, ta): inputs_t = array_ops.expand_dims( array_ops.gather(inputs_ta.read(t), i), 0) output, new_state = cell(inputs_t, state) output = array_ops.reshape(output, [-1]) # TODO(agarwal): one optimization that dynamic_rnn uses is to avoid the # array_ops.where when t < min(sequence_length). Doing that requires # supporting tf.cond pfor conversion. done = t >= sequence_length_i output = array_ops.where(done, zeros, output) ta = ta.write(t, output) new_state = [ array_ops.where(done, s, ns) for s, ns in zip(nest.flatten(state), nest.flatten(new_state)) ] new_state = nest.pack_sequence_as(state, new_state) return t + 1, new_state, ta def condition_fn(t, _, unused): del unused return t < max_steps initial_state = cell.zero_state(1, dtypes.float32) _, state, ta = control_flow_ops.while_loop(condition_fn, body_fn, [ 0, initial_state, tensor_array_ops.TensorArray(dtypes.float32, max_steps) ]) new_state = [array_ops.reshape(x, [-1]) for x in nest.flatten(state)] new_state = nest.pack_sequence_as(initial_state, new_state) return ta.stack(), new_state pfor_output = pfor_control_flow_ops.pfor(loop_fn, batch_size) tf_output = rnn.dynamic_rnn(cell, inputs, sequence_length=sequence_length, initial_state=cell.zero_state( batch_size, dtypes.float32)) return pfor_output, tf_output
def test_reduce_functools_partial(self): x = random_ops.random_uniform([8, 3]) def fn(i, pfor_config, dummy=None): del dummy x_i = array_ops.gather(x, i) return x_i - pfor_config.reduce_mean(x_i) loop_fn = functools.partial(fn, dummy=1) output = pfor_control_flow_ops.pfor(loop_fn, 8) ans = x - math_ops.reduce_mean(x, axis=0) output_val, ans_val = self.evaluate([output, ans]) self.assertAllClose(ans_val, output_val)
def test_reduce_concat(self): x = random_ops.random_uniform([8, 3]) def loop_fn(i, pfor_config): x_i = array_ops.gather(x, i) vectorized_value = pfor_config.reduce_concat(x_i) mean_value = math_ops.reduce_mean(vectorized_value, axis=0) return x_i - mean_value output = pfor_control_flow_ops.pfor(loop_fn, 8) ans = x - math_ops.reduce_mean(x, axis=0) output_val, ans_val = self.evaluate([output, ans]) self.assertAllClose(ans_val, output_val)
def _test_loop_fn(self, loop_fn, iters, loop_fn_dtypes=dtypes.float32, parallel_iterations=None): t1 = pfor_control_flow_ops.pfor( loop_fn, iters=iters, parallel_iterations=parallel_iterations) t2 = pfor_control_flow_ops.for_loop( loop_fn, loop_fn_dtypes, iters=iters, parallel_iterations=parallel_iterations) self.run_and_assert_equal(t1, t2)
def test_sparse_result_indices_stacked(self): num_iters = 10 def loop_fn(i): i = array_ops.expand_dims(math_ops.cast(i, dtypes.int64), 0) indices = array_ops.expand_dims(i, 0) return sparse_tensor.SparseTensor(indices, [1], [num_iters]) # Expected result: identity matrix size num_iters * num_iters pfor = pfor_control_flow_ops.pfor(loop_fn, num_iters) manual = sparse_tensor.SparseTensor([[i, i] for i in range(num_iters)], [1] * num_iters, (num_iters, num_iters)) self.run_and_assert_equal(pfor, manual)
def create_dynamic_lstm(cell_fn, batch_size, state_size, max_steps): cell = cell_fn(state_size) inputs, sequence_length = dynamic_lstm_input_fn(batch_size, state_size, max_steps) inputs_ta = tensor_array_ops.TensorArray( dtypes.float32, size=max_steps, element_shape=[batch_size, state_size]) inputs_time_major = array_ops.transpose(inputs, [1, 0, 2]) inputs_ta = inputs_ta.unstack(inputs_time_major) zeros = array_ops.zeros([state_size]) def loop_fn(i): sequence_length_i = array_ops.gather(sequence_length, i) def body_fn(t, state, ta): inputs_t = array_ops.expand_dims( array_ops.gather(inputs_ta.read(t), i), 0) output, new_state = cell(inputs_t, state) output = array_ops.reshape(output, [-1]) # TODO(agarwal): one optimization that dynamic_rnn uses is to avoid the # array_ops.where when t < min(sequence_length). Doing that requires # supporting tf.cond pfor conversion. done = t >= sequence_length_i output = array_ops.where(done, zeros, output) ta = ta.write(t, output) new_state = [array_ops.where(done, s, ns) for s, ns in zip(nest.flatten(state), nest.flatten(new_state))] new_state = nest.pack_sequence_as(state, new_state) return t + 1, new_state, ta def condition_fn(t, _, unused): del unused return t < max_steps initial_state = cell.zero_state(1, dtypes.float32) _, state, ta = control_flow_ops.while_loop(condition_fn, body_fn, [ 0, initial_state, tensor_array_ops.TensorArray(dtypes.float32, max_steps) ]) new_state = [array_ops.reshape(x, [-1]) for x in nest.flatten(state)] new_state = nest.pack_sequence_as(initial_state, new_state) return ta.stack(), new_state pfor_output = pfor_control_flow_ops.pfor(loop_fn, batch_size) tf_output = rnn.dynamic_rnn( cell, inputs, sequence_length=sequence_length, initial_state=cell.zero_state(batch_size, dtypes.float32)) return pfor_output, tf_output
def test_sparse_result_all_stacked(self): num_iters = 10 def loop_fn(i): i = array_ops.expand_dims(math_ops.cast(i, dtypes.int64), 0) indices = array_ops.expand_dims(i, 0) return sparse_tensor.SparseTensor(indices, i, i + 1) # [0, ..., 0, i] # Expected result: [[0], [0, 1], [0, 0, 2], [0, 0, 0, 3], ...] pfor = pfor_control_flow_ops.pfor(loop_fn, num_iters) manual = sparse_tensor.SparseTensor([[i, i] for i in range(num_iters)], list(range(num_iters)), (num_iters, num_iters)) self.run_and_assert_equal(pfor, manual)
def create_mnist_autobatch(batch_size, data_format, training): images = random_ops.random_uniform([batch_size, 28, 28]) model = Mnist(data_format) manual = model(images, training=training) def loop_fn(i): image = array_ops.gather(images, i) return model(image, training=training) pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size) while_outputs = control_flow_ops.for_loop( loop_fn, dtypes.float32, batch_size) return pfor_outputs, while_outputs, manual
def create_mnist_autobatch(batch_size, data_format, training): images = random_ops.random_uniform([batch_size, 28, 28]) model = Mnist(data_format) manual = model(images, training=training) def loop_fn(i): image = array_ops.gather(images, i) return model(image, training=training) pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size) while_outputs = control_flow_ops.for_loop(loop_fn, dtypes.float32, batch_size) return pfor_outputs, while_outputs, manual
def benchmark_basic_while(self): with ops.Graph().as_default(): def loop_fn(i): _, s = control_flow_ops.while_loop(lambda t, x: t < i, lambda t, x: (t + 1, x + i), [0, 0]) return s iters = 50 pfor_output = pfor_control_flow_ops.pfor(loop_fn, iters) for_loop_output = pfor_control_flow_ops.for_loop( loop_fn, dtypes.int32, iters) self._run(pfor_output, 100, name="pfor_basic") self._run(for_loop_output, 100, name="for_loop_basic")
def test_sparse_result_none_stacked(self): num_iters = 10 def loop_fn(_): return sparse_tensor.SparseTensor([[0], [1], [2]], [4, 5, 6], [3]) # [0, 2, 0] pfor = pfor_control_flow_ops.pfor(loop_fn, num_iters) indices = [[i, j] for i in range(num_iters) for j in range(3)] values = [4, 5, 6] * num_iters dense_shapes = [num_iters, 3] # Expected result: [[4, 5, 6], [4, 5, 6], [4, 5, 6], ...] manual = sparse_tensor.SparseTensor(indices, values, dense_shapes) self.run_and_assert_equal(pfor, manual)
def benchmark_basic_while(self): with ops.Graph().as_default(): def loop_fn(i): _, s = control_flow_ops.while_loop( lambda t, x: t < i, lambda t, x: (t + 1, x + i), [0, 0]) return s iters = 50 pfor_output = pfor_control_flow_ops.pfor(loop_fn, iters) for_loop_output = pfor_control_flow_ops.for_loop(loop_fn, dtypes.int32, iters) self._run(pfor_output, 100, name="pfor_basic") self._run(for_loop_output, 100, name="for_loop_basic")
def test_reduce_class(self): x = random_ops.random_uniform([8, 3]) class LoopFn(object): def __init__(self): pass def __call__(self, i, pfor_config): x_i = array_ops.gather(x, i) return x_i - pfor_config.reduce_mean(x_i) output = pfor_control_flow_ops.pfor(LoopFn(), 8) ans = x - math_ops.reduce_mean(x, axis=0) output_val, ans_val = self.evaluate([output, ans]) self.assertAllClose(ans_val, output_val)
def test_grad(self): x = random_ops.random_uniform([3, 2]) ta = tensor_array_ops.TensorArray( dtypes.float32, 3, clear_after_read=False).unstack(x) y = math_ops.square(ta.stack()) def loop_fn(i): y_i = array_ops.gather(y, i) grad = gradient_ops.gradients(y_i, x)[0] return array_ops.gather(grad, i) t1 = pfor_control_flow_ops.pfor(loop_fn, iters=3) # y = x * x. Hence dy/dx = 2 * x. actual_grad = 2.0 * x with session.Session() as sess: actual_grad, computed_grad = sess.run([t1, actual_grad]) self.assertAllClose(actual_grad, computed_grad)
def create_mnist_per_eg_jacobian(batch_size, data_format, training): images = random_ops.random_uniform([batch_size, 28, 28]) model = Mnist(data_format) def loop_fn(i, use_pfor): image = array_ops.gather(images, i) logits = array_ops.reshape(model(image, training=training), [-1]) return gradients.jacobian( logits, variables.trainable_variables(), use_pfor=use_pfor) pfor_outputs = control_flow_ops.pfor( functools.partial(loop_fn, use_pfor=True), batch_size) while_outputs = control_flow_ops.for_loop( functools.partial(loop_fn, use_pfor=False), [dtypes.float32] * len(variables.trainable_variables()), batch_size) return pfor_outputs, while_outputs
def create_mnist_per_eg_jacobian(batch_size, data_format, training): images = random_ops.random_uniform([batch_size, 28, 28]) model = Mnist(data_format) def loop_fn(i, use_pfor): image = array_ops.gather(images, i) logits = array_ops.reshape(model(image, training=training), [-1]) return gradients.jacobian(logits, variables.trainable_variables(), use_pfor=use_pfor) pfor_outputs = control_flow_ops.pfor( functools.partial(loop_fn, use_pfor=True), batch_size) while_outputs = control_flow_ops.for_loop( functools.partial(loop_fn, use_pfor=False), [dtypes.float32] * len(variables.trainable_variables()), batch_size) return pfor_outputs, while_outputs
def jacobian(output, inputs, use_pfor=True): """Computes jacobian of `output` w.r.t. `inputs`. Args: output: A tensor. inputs: A tensor or a nested structure of tensor objects. use_pfor: If true, uses pfor for computing the jacobian. Else uses tf.while_loop. Returns: A tensor or a nested strucutre of tensors with the same structure as `inputs`. Each entry is the jacobian of `output` w.rt. to the corresponding value in `inputs`. If output has shape [y_1, ..., y_n] and inputs_i has shape [x_1, ..., x_m], the corresponding jacobian has shape [y_1, ..., y_n, x_1, ..., x_m]. """ flat_inputs = nest.flatten(inputs) output_tensor_shape = output.shape output_shape = array_ops.shape(output) output = array_ops.reshape(output, [-1]) def loop_fn(i): y = array_ops.gather(output, i) return gradient_ops.gradients(y, flat_inputs) try: output_size = int(output.shape[0]) except TypeError: output_size = array_ops.shape(output)[0] if use_pfor: pfor_outputs = control_flow_ops.pfor(loop_fn, output_size) else: pfor_outputs = control_flow_ops.for_loop( loop_fn, [output.dtype] * len(flat_inputs), output_size) for i, out in enumerate(pfor_outputs): if out is not None: new_shape = array_ops.concat( [output_shape, array_ops.shape(out)[1:]], axis=0) out = array_ops.reshape(out, new_shape) out.set_shape(output_tensor_shape.concatenate(flat_inputs[i].shape)) pfor_outputs[i] = out return nest.pack_sequence_as(inputs, pfor_outputs)
def benchmark_matmul(self): with ops.Graph().as_default(): n = 1024 params = 1000 x = random_ops.random_normal([n, params]) y = random_ops.random_normal([params, params]) def loop_fn(i): x_i = array_ops.expand_dims(array_ops.gather(x, i), 0) return math_ops.matmul(x_i, y) pfor_outputs = pfor_control_flow_ops.pfor(loop_fn, n) while_outputs = pfor_control_flow_ops.for_loop(loop_fn, dtypes.float32, n) manual = math_ops.matmul(x, y) self._run(manual, 1000, name="manual_matmul") self._run(pfor_outputs, 1000, name="pfor_matmul") self._run(while_outputs, 100, name="while_matmul")
def benchmark_reduction(self): n = 1024 with ops.Graph().as_default(): x = random_ops.random_uniform([n, n]) w = random_ops.random_uniform([n, n]) def loop_fn(i, pfor_config): x_i = array_ops.gather(x, i) return math_ops.reduce_sum( math_ops.matmul(pfor_config.reduce_concat(x_i), w)) # Note that output_reduction will be tiled, so there may be some minor # overheads compared to output_no_reduction. output_reduction = pfor_control_flow_ops.pfor(loop_fn, n) output_no_reduction = math_ops.reduce_sum(math_ops.matmul(x, w)) # Benchmark to test that reduction does not add overhead and its output is # treated as loop invariant. self._run(output_reduction, 30, name="matmul_reduction") self._run(output_no_reduction, 30, name="matmul_no_reduction")
def benchmark_add(self): with ops.Graph().as_default(): n = 256 params = 1000 x = random_ops.random_normal([n, params]) y = random_ops.random_normal([n, params]) def loop_fn(i): x_i = array_ops.gather(x, i) y_i = array_ops.gather(y, i) return x_i + y_i pfor_outputs = pfor_control_flow_ops.pfor(loop_fn, n) while_outputs = pfor_control_flow_ops.for_loop(loop_fn, dtypes.float32, n) manual = x + y self._run(manual, 1000, name="manual_add") self._run(pfor_outputs, 1000, name="pfor_add") self._run(while_outputs, 100, name="while_add")
def test_create_outside_and_write_and_scatter(self): t = tensor_array_ops.TensorArray(dtypes.int32, 10, clear_after_read=False) handle = t.handle def loop_fn(i): ta = t.write(i + 2, 2 * i).write(i, 5) ta = ta.scatter([4 + i], [4]).scatter([6 + i, 8 + i], [6 + i, 8 + i]) return ta.flow t1 = pfor_control_flow_ops.pfor(loop_fn, iters=2) out1 = tensor_array_ops.TensorArray( dtypes.int32, handle=handle, flow=t1[-1]).stack() output1 = self._run_targets(out1) t2 = pfor_control_flow_ops.for_loop(loop_fn, dtypes.float32, iters=2) out2 = tensor_array_ops.TensorArray( dtypes.int32, handle=handle, flow=t2[-1]).stack() output2 = self._run_targets(out2) self.assertAllClose(output2, output1)
def create_fc_per_eg_jacobians(batch_size, activation_size, num_layers): model = FullyConnectedModel(activation_size=activation_size, num_layers=num_layers) inp = random_ops.random_normal([batch_size, activation_size]) output = model(inp) jacobians = gradients.jacobian(output, variables.trainable_variables()) def loop_fn(i, use_pfor): inp_i = array_ops.expand_dims(array_ops.gather(inp, i), 0) output = array_ops.reshape(model(inp_i), [-1]) return gradients.jacobian( output, variables.trainable_variables(), use_pfor=use_pfor) per_eg_jacobians_pfor = control_flow_ops.pfor( functools.partial(loop_fn, use_pfor=True), batch_size) per_eg_jacobians_while = control_flow_ops.for_loop( functools.partial(loop_fn, use_pfor=False), [dtypes.float32] * len(variables.trainable_variables()), batch_size) return jacobians, per_eg_jacobians_pfor, per_eg_jacobians_while
def create_mnist_per_eg_grad(batch_size, data_format, training): images = random_ops.random_uniform([batch_size, 28, 28]) sparse_labels = np.random.randint( low=0, high=10, size=[batch_size]).astype(np.int32) labels = np.zeros((batch_size, 10)).astype(np.float32) labels[np.arange(batch_size), sparse_labels] = 1. model = Mnist(data_format) def loop_fn(i): image = array_ops.gather(images, i) label = array_ops.gather(labels, i) logits = array_ops.reshape(model(image, training=training), [-1]) loss = losses.softmax_cross_entropy( logits=logits, onehot_labels=label, reduction=losses.Reduction.NONE) return gradient_ops.gradients(loss, variables.trainable_variables()) pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size) while_outputs = control_flow_ops.for_loop( loop_fn, [dtypes.float32] * len(variables.trainable_variables()), batch_size) return pfor_outputs, while_outputs
def create_fc_per_eg_grad(batch_size, activation_size, num_layers): inp = random_ops.random_normal([batch_size, activation_size]) layers = [ tf_layers.Dense(activation_size, activation=nn.relu) for _ in range(num_layers) ] projection = tf_layers.Dense(1) def model_fn(activation): for layer in layers: activation = layer(activation) activation = projection(activation) activation = nn.l2_loss(activation) return gradient_ops.gradients(activation, variables.trainable_variables()) def loop_fn(i): return model_fn(array_ops.expand_dims(array_ops.gather(inp, i), 0)) pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size) loop_fn_dtypes = [x.dtype for x in variables.trainable_variables()] while_outputs = control_flow_ops.for_loop(loop_fn, loop_fn_dtypes, batch_size) return pfor_outputs, while_outputs