def testAggregate(self): a = array_ops.constant([3., 4.]) b = array_ops.constant([5., 6.]) hint = op_hint.OpHint("agg") a0, a1 = array_ops.unstack(a) b0, b1 = array_ops.unstack(b) a0 = hint.add_input(a0, tag="c", aggregate=op_hint.OpHint.AGGREGATE_STACK) b0 = hint.add_input(b0, tag="n", aggregate=op_hint.OpHint.AGGREGATE_STACK) a1 = hint.add_input(a1, tag="c", aggregate=op_hint.OpHint.AGGREGATE_STACK) b1 = hint.add_input(b1, tag="n", aggregate=op_hint.OpHint.AGGREGATE_STACK) c0 = math_ops.add(a0, b0, name="addleft") c1 = math_ops.add(a1, b1, name="addright") c0 = hint.add_output( c0, tag="out", aggregate=op_hint.OpHint.AGGREGATE_STACK) c1 = hint.add_output( c1, tag="out", aggregate=op_hint.OpHint.AGGREGATE_STACK) curr = array_ops.stack([c0, c1]) output = array_ops.identity(curr, name="FINAL_OUTPUT") with self.cached_session() as sess: stubbed_graphdef = op_hint.convert_op_hints_to_stubs( graph_def=sess.graph_def) self.assertEqual( self._getGraphOpTypes( stubbed_graphdef, output_nodes=[op_hint._tensor_name_base(output.name)]), set(["agg", "Const", "Identity"]))
def _test_add(data): """ One iteration of add """ assert len(data) == 2 need_transpose = False if len(data[0].shape) == 1 or len(data[0].shape) == 2: tvm_data = data elif len(data[0].shape) == 3: need_transpose = True tvm_data = [np.transpose(d, axes=(0, 2, 1)) for d in data] elif len(data[0].shape) == 4: need_transpose = True tvm_data = [np.transpose(d, axes=(0, 3, 1, 2)) for d in data] else: raise NotImplementedError("Not support input shape {} of add : ". format(str(len(data.shape)))) # Test with two tensors with tf.Graph().as_default(): in_data = [array_ops.placeholder(shape=data[0].shape, dtype=data[0].dtype, name='in_0'), array_ops.placeholder(shape=data[1].shape, dtype=data[1].dtype, name='in_1')] out = math_ops.add(in_data[0], in_data[1]) compare_tflite_with_tvm(data, tvm_data, ['in_0:0','in_1:0'], in_data, [out], need_transpose) # Test with tensor and constant with tf.Graph().as_default(): in_data = [array_ops.placeholder(shape=data[0].shape, dtype=data[0].dtype, name='in')] out = math_ops.add(in_data[0], ops.convert_to_tensor(data[1], dtype=data[1].dtype)) compare_tflite_with_tvm([data[0]], [tvm_data[0]], ['in:0'], in_data, [out], need_transpose)
def GetParams(self): """Create a graph containing two segment.""" input_name = "input" input_dims = [2, 32, 32, 3] g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtypes.float32, shape=input_dims, name=input_name) with g.device("/GPU:0"): n = inp for i in range(2): c = constant_op.constant(1.0, name="c%d" % i) n = math_ops.add(n, c, name="add%d" % i) n = math_ops.mul(n, n, name="mul%d" % i) edge = self.trt_incompatible_op(n, name="incompatible") with g.control_dependencies([edge]): c = constant_op.constant(1.0, name="c2") n = math_ops.add(n, c, name="add2") n = math_ops.mul(n, n, name="mul2") c = constant_op.constant(1.0, name="c3") n = math_ops.add(n, c, name="add3") n = math_ops.mul(n, n, name="mul3") array_ops.squeeze(n, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], expected_engines={ # Only the first engine is built. "my_trt_op_0": ["c0", "c1", "add0", "add1", "mul0", "mul1"] }, expected_output_dims=tuple(input_dims), allclose_atol=1.e-06, allclose_rtol=1.e-06)
def testWhileWithScopedAllocator(self): group_size = 2 group_key = 1 instance_key0 = 1 instance_key1 = 2 config = config_pb2.ConfigProto(device_count={'CPU': group_size}) rewrite_options = config.graph_options.rewrite_options rewrite_options.scoped_allocator_optimization = ( rewriter_config_pb2.RewriterConfig.ON) del rewrite_options.scoped_allocator_opts.enable_op[:] rewrite_options.scoped_allocator_opts.enable_op.append('CollectiveReduce') with self.session(config=config) as sess: run_ops = [] for i in range(group_size): with ops.device('CPU:%d' % i): constant = constant_op.constant(0.) cond = lambda i: math_ops.less(i, 10.) body = lambda i: math_ops.add(i, 1.) input0 = control_flow_ops.while_loop(cond, body, [constant]) input1 = math_ops.add(constant, 5) colred0 = collective_ops.all_reduce(input0, group_size, group_key, instance_key0, 'Add', 'Id') colred1 = collective_ops.all_reduce(input1, group_size, group_key, instance_key1, 'Add', 'Id') run_ops.append(math_ops.add_n([colred0, colred1])) results = sess.run(run_ops) self.assertEqual(results, [30., 30.])
def testDebugMakeCallableFromOptionsWithCustomOptionsAndMetadataWorks(self): variable_1 = variables.Variable( 10.5, dtype=dtypes.float32, name="variable_1") a = math_ops.add(variable_1, variable_1, "callable_a") math_ops.add(a, a, "callable_b") self.sess.run(variable_1.initializer) wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( [["run"], ["run"]], self.sess, dump_root=self._tmp_dir) callable_options = config_pb2.CallableOptions() callable_options.fetch.append("callable_b") callable_options.run_options.trace_level = config_pb2.RunOptions.FULL_TRACE sess_callable = wrapped_sess._make_callable_from_options(callable_options) run_metadata = config_pb2.RunMetadata() # Call the callable with a custom run_metadata. callable_output = sess_callable(run_metadata=run_metadata) # Verify that step_stats is populated in the custom run_metadata. self.assertTrue(run_metadata.step_stats) self.assertAllClose(np.array(42.0, dtype=np.float32), callable_output[0]) debug_dumps = wrapped_sess.observers["debug_dumps"] self.assertEqual(1, len(debug_dumps)) debug_dump = debug_dumps[0] node_names = [datum.node_name for datum in debug_dump.dumped_tensor_data] self.assertItemsEqual( ["callable_a", "callable_b", "variable_1", "variable_1/read"], node_names)
def testDebuggingMakeCallableFromOptionsWithTwoFeedsWorks(self): ph1 = array_ops.placeholder(dtypes.float32, name="callable_ph1") ph2 = array_ops.placeholder(dtypes.float32, name="callable_ph2") a = math_ops.add(ph1, ph2, "callable_a") math_ops.add(a, a, "callable_b") wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( [["run"]] * 3, self.sess, dump_root=self._tmp_dir) callable_options = config_pb2.CallableOptions() callable_options.feed.append("callable_ph1") callable_options.feed.append("callable_ph2") callable_options.fetch.append("callable_b") sess_callable = wrapped_sess._make_callable_from_options(callable_options) ph1_value = np.array(5.0, dtype=np.float32) ph2_value = np.array(16.0, dtype=np.float32) for _ in range(2): callable_output = sess_callable(ph1_value, ph2_value) self.assertAllClose(np.array(42.0, dtype=np.float32), callable_output[0]) debug_dumps = wrapped_sess.observers["debug_dumps"] self.assertEqual(2, len(debug_dumps)) for debug_dump in debug_dumps: node_names = [datum.node_name for datum in debug_dump.dumped_tensor_data] self.assertItemsEqual(["callable_a", "callable_b"], node_names)
def _TestRandomGraphWithDevices(self, sess, seed, op_placement, devices, debug_mode=False): data = [] shape = (self._dim, self._dim) feed_dict = {} # Initialize the matrices for i in range(len(devices)): with ops.device(devices[i]): var = array_ops.placeholder(dtypes.float32, shape=shape) np.random.seed(seed + i) feed_dict[var] = np.random.uniform( low=0, high=0.1, size=shape).astype(np.float32) data.append(var) # Run the 'add' operations on those matrices for op in op_placement: with ops.device(devices[op[2]]): data[op[2]] = math_ops.add(data[op[0]], data[op[1]]) with ops.device('/cpu:0'): s = data[0] for i in range(1, len(data)): s = math_ops.add(s, data[i]) if debug_mode: logging.info(ops.get_default_graph().as_graph_def()) result = sess.run(s, feed_dict=feed_dict) self._LogMatrix(result, self._dim) return result
def _session_run_for_graph_structure_lookup(self): with session.Session() as sess: u_name = "testDumpGraphStructureLookup/u" v_name = "testDumpGraphStructureLookup/v" w_name = "testDumpGraphStructureLookup/w" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) u.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) return u_name, v_name, w_name, dump
def GetParams(self): """Test for rank 2 input in TF-TRT.""" input_names = ["input", "input2"] # Two paths: first with rank 2 input, second with rank 4 input. input_dims = [[12, 5], [12, 5, 2, 2]] output_name = "output" g = ops.Graph() with g.as_default(): outputs = [] for i in range(2): x = array_ops.placeholder( dtype=dtypes.float32, shape=input_dims[i], name=input_names[i]) c = constant_op.constant(1.0, name="c%d_1" % i) q = math_ops.add(x, c, name="add%d_1" % i) q = math_ops.abs(q, name="abs%d_1" % i) c = constant_op.constant(2.2, name="c%d_2" % i) q = math_ops.add(q, c, name="add%d_2" % i) q = math_ops.abs(q, name="abs%d_2" % i) c = constant_op.constant(3.0, name="c%d_3" % i) q = math_ops.add(q, c, name="add%d_3" % i) if i == 0: for j in range(2): q = array_ops.expand_dims(q, -1, name="expand%d_%d" % (i, j)) q = gen_math_ops.reciprocal(q, name="reciprocal%d" % i) outputs.append(q) # Combine both paths q = math_ops.add(outputs[0], outputs[1], name="add") array_ops.squeeze(q, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=input_names, input_dims=input_dims, output_names=[output_name], expected_output_dims=[tuple(input_dims[1])])
def testIgnoredArguments(self): """Tests that JIT computations can ignore formal parameters.""" with self.session(config=NoRewriteSessionConfig()) as sess: x = array_ops.placeholder(dtypes.int32) y = array_ops.placeholder(dtypes.int32) with jit_scope(): z = math_ops.add(x, x) w = math_ops.add(y, y) # Pulls 'w' into the same compilation via control dependencies. with ops.control_dependencies([w]): n = control_flow_ops.no_op() with ops.control_dependencies([n]): t = math_ops.add(z, z) run_metadata = config_pb2.RunMetadata() out = test_utils.RunWithWarmup( sess, t, { x: np.int32(7), y: np.int32(404) }, run_metadata=run_metadata, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE)) self.assert_(MetadataHasXlaRunOp(run_metadata)) self.assertAllClose(28, out)
def body(value, denom, i, ret_rate): i += 1 ret_rate = r_(value, denom) with ops.control_dependencies([ret_rate]): value = math_ops.add(value, 2) denom = math_ops.add(denom, 1) return [value, denom, i, ret_rate]
def GetParams(self): """Create a graph containing multiple segment.""" input_name = "input" input_dims = [2, 32, 32, 3] g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtypes.float32, shape=input_dims, name=input_name) with g.device("/GPU:0"): n = inp c = constant_op.constant(1.0, name="c") n = math_ops.add(n, c, name="add") n = math_ops.mul(n, n, name="mul") n = math_ops.add(n, n, name="add1") n = self.trt_incompatible_op(n, name="incompatible1") n = math_ops.add(n, c, name="add2") n = math_ops.mul(n, n, name="mul1") n = math_ops.add(n, n, name="add3") array_ops.squeeze(n, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], expected_engines={ "my_trt_op_0": ["add2", "add3", "mul1"], # Why segment ["add", "add1", "mul"] was assigned segment id 1 # instead of 0: the parent node of this segment is actually const # node 'c', but it's removed later since it's const output of the # segment which is not allowed. "my_trt_op_1": ["add", "add1", "mul"] }, expected_output_dims=tuple(input_dims), allclose_atol=1.e-06, allclose_rtol=1.e-06)
def GetParams(self): """Create a graph containing multiple segment.""" input_name = "input" input_dims = [2, 32, 32, 3] output_name = "output" g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtypes.float32, shape=input_dims, name=input_name) with g.device("/GPU:0"): n = inp c = constant_op.constant(1.0, name="c") # Adds control dependency from the constant op to a trt incompatible op, # and adds control dependency from the trt incompatible op to all other # ops, to make sure the constant op cannot be contracted with any trt # segment that depends on it. with g.control_dependencies([c]): d = self.trt_incompatible_op(n, name="incompatible") with g.control_dependencies([d]): n = math_ops.add(n, c, name="add") n = math_ops.mul(n, n, name="mul") n = math_ops.add(n, n, name="add1") n = self.trt_incompatible_op(n, name="incompatible1") with g.control_dependencies([d]): n = math_ops.add(n, c, name="add2") n = math_ops.mul(n, n, name="mul1") n = math_ops.add(n, n, name="add3") array_ops.squeeze(n, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], output_names=[output_name], expected_output_dims=[tuple(input_dims)])
def GetParams(self): """Create a graph containing two segment.""" input_name = "input" input_dims = [2, 32, 32, 3] output_name = "output" g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtypes.float32, shape=input_dims, name=input_name) with g.device("/GPU:0"): n = inp for i in range(2): c = constant_op.constant(1.0, name="c%d" % i) n = math_ops.add(n, c, name="add%d" % i) n = math_ops.mul(n, n, name="mul%d" % i) edge = self.trt_incompatible_op(n, name="incompatible") with g.control_dependencies([edge]): c = constant_op.constant(1.0, name="c2") n = math_ops.add(n, c, name="add2") n = math_ops.mul(n, n, name="mul2") c = constant_op.constant(1.0, name="c3") n = math_ops.add(n, c, name="add3") n = math_ops.mul(n, n, name="mul3") array_ops.squeeze(n, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], output_names=[output_name], expected_output_dims=[tuple(input_dims)])
def GetParams(self): """Create a graph containing multiple segment.""" input_name = "input" input_dims = [2, 32, 32, 3] output_name = "output" g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtypes.float32, shape=input_dims, name=input_name) with g.device("/GPU:0"): n = inp c = constant_op.constant(1.0, name="c") n = math_ops.add(n, c, name="add") n = math_ops.mul(n, n, name="mul") n = math_ops.add(n, n, name="add1") n = self.trt_incompatible_op(n, name="incompatible1") n = math_ops.add(n, c, name="add2") n = math_ops.mul(n, n, name="mul1") n = math_ops.add(n, n, name="add3") array_ops.squeeze(n, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], output_names=[output_name], expected_output_dims=[tuple(input_dims)])
def GetParams(self): """Create a graph containing multiple segment.""" input_name = "input" input_dims = [2, 32, 32, 3] output_name = "output" g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtypes.float32, shape=input_dims, name=input_name) with g.device("/GPU:0"): c1 = constant_op.constant(1.0, name="c1") c2 = constant_op.constant(1.0, name="c2") d1 = constant_op.constant(1.0, name="d1") d2 = self.trt_incompatible_op(inp, name="d2") with g.control_dependencies([d1, d2]): add = math_ops.add(inp, c1, name="add") with g.control_dependencies([d1, d2]): mul = math_ops.mul(add, add, name="mul") with g.control_dependencies([d1, d2]): add1 = math_ops.add(mul, mul, name="add1") edge = self.trt_incompatible_op(add1, name="incompatible") with g.control_dependencies([d1, d2, add, mul]): add2 = math_ops.add(edge, c2, name="add2") with g.control_dependencies([d1, d2, add1, mul]): mul1 = math_ops.mul(add2, add2, name="mul1") with g.control_dependencies([d1, d2, add, add1]): add3 = math_ops.add(mul1, mul1, name="add3") array_ops.squeeze(add3, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], output_names=[output_name], expected_output_dims=[tuple(input_dims)])
def fn(x): with context.device('/gpu:0'): b = constant_op.constant(2.0) c = math_ops.add(x.gpu(), b) # TODO(apassos): remove cpu below by making TensorVSPace aware # of devices. return math_ops.add(c, constant_op.constant(3.0)).cpu()
def fn(x): with context.device('/gpu:0'): b = tensor.Tensor(2.0) c = math_ops.add(x.as_gpu_tensor(), b) # TODO(apassos): remove as_cpu_tensor below by making TensorVSPace aware # of devices. return math_ops.add(c, tensor.Tensor(3.0)).as_cpu_tensor()
def test_multiple_outputs(self): # - + # / \y0 y1/ \ # x split z # | # y (nodes are ops; edges are going up) g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtypes.float32, shape=[1], name='x') y = array_ops.placeholder(dtypes.float32, shape=[2], name='y') y0, y1 = array_ops.split(y, num_or_size_splits=2, axis=0) z = array_ops.placeholder(dtypes.float32, shape=[1], name='z') math_ops.add(x, y0) math_ops.subtract(y1, z) y1_pattern = graph_matcher.OpTypePattern('*') minus_pattern = graph_matcher.OpTypePattern('Sub', inputs=[y1_pattern, '*']) matcher = graph_matcher.GraphMatcher(minus_pattern) match_results = list(matcher.match_graph(g)) self.assertEqual(1, len(match_results)) match_result = match_results[0] self.assertEqual(y0.op, y1.op) self.assertEqual(match_result.get_op(y1_pattern), y1.op) self.assertEqual(match_result.get_tensor(y1_pattern), y1)
def testDumpCausalityCheck(self): with session.Session() as sess: u_name = "testDumpCausalityCheck/u" v_name = "testDumpCausalityCheck/v" w_name = "testDumpCausalityCheck/w" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) u.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls="file://%s" % self._dump_root ) run_metadata = config_pb2.RunMetadata() sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) # First, loading the original dump without supplying the # partition_graphs should not cause a RuntimeError, validation occurs # only with partition_graphs loaded. debug_data.DebugDumpDir(self._dump_root) # Now, loading the original dump with partition graphs supplied should # succeed. The validation should pass quietly. dump = debug_data.DebugDumpDir(self._dump_root, partition_graphs=run_metadata.partition_graphs) # Get the dump file names and compute their timestamps. self.assertEqual(1, len(dump.get_tensor_file_paths(u_name, 0, "DebugIdentity"))) u_file_path = dump.get_tensor_file_paths(u_name, 0, "DebugIdentity")[0] self.assertEqual(1, len(dump.get_tensor_file_paths(v_name, 0, "DebugIdentity"))) v_file_path = dump.get_tensor_file_paths(v_name, 0, "DebugIdentity")[0] u_timestamp = int(u_file_path[u_file_path.rindex("_") + 1 :]) v_timestamp = int(v_file_path[v_file_path.rindex("_") + 1 :]) # Swap the time stamps new_u_file_path = u_file_path[: u_file_path.rindex("_")] + "_%d" % v_timestamp new_v_file_path = v_file_path[: v_file_path.rindex("_")] + "_%d" % u_timestamp os.rename(u_file_path, new_u_file_path) os.rename(v_file_path, new_v_file_path) # Load the dump directory again. Now a ValueError is expected to be # raised due to the timestamp swap. with self.assertRaisesRegexp(ValueError, "Causality violated"): dump = debug_data.DebugDumpDir(self._dump_root, partition_graphs=run_metadata.partition_graphs) # Loading the dump directory with kwarg "validate" set explicitly to # False should get rid of the error. dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs, validate=False )
def testStripUnusedMultipleInputs(self): input_graph_name = "input_graph.pb" output_graph_name = "output_graph.pb" # We'll create an input graph that multiplies two input nodes. with ops.Graph().as_default(): constant_node1 = constant_op.constant(1.0, name="constant_node1") constant_node2 = constant_op.constant(2.0, name="constant_node2") input_node1 = math_ops.subtract(constant_node1, 3.0, name="input_node1") input_node2 = math_ops.subtract(constant_node2, 5.0, name="input_node2") output_node = math_ops.multiply( input_node1, input_node2, name="output_node") math_ops.add(output_node, 2.0, name="later_node") sess = session.Session() output = sess.run(output_node) self.assertNear(6.0, output, 0.00001) graph_io.write_graph(sess.graph, self.get_temp_dir(), input_graph_name) # We save out the graph to disk, and then call the const conversion # routine. input_graph_path = os.path.join(self.get_temp_dir(), input_graph_name) input_binary = False input_node_names = "input_node1,input_node2" input_node_types = [ dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum ] output_binary = True output_node_names = "output_node" output_graph_path = os.path.join(self.get_temp_dir(), output_graph_name) strip_unused_lib.strip_unused_from_files(input_graph_path, input_binary, output_graph_path, output_binary, input_node_names, output_node_names, input_node_types) # Now we make sure the variable is now a constant, and that the graph still # produces the expected result. with ops.Graph().as_default(): output_graph_def = graph_pb2.GraphDef() with open(output_graph_path, "rb") as f: output_graph_def.ParseFromString(f.read()) _ = importer.import_graph_def(output_graph_def, name="") self.assertEqual(3, len(output_graph_def.node)) for node in output_graph_def.node: self.assertNotEqual("Add", node.op) self.assertNotEqual("Sub", node.op) if node.name == input_node_names: self.assertTrue("shape" in node.attr) with session.Session() as sess: input_node1 = sess.graph.get_tensor_by_name("input_node1:0") input_node2 = sess.graph.get_tensor_by_name("input_node2:0") output_node = sess.graph.get_tensor_by_name("output_node:0") output = sess.run(output_node, feed_dict={input_node1: [10.0], input_node2: [-5.0]}) self.assertNear(-50.0, output, 0.00001)
def setUp(self): self.graph = ops.Graph() with self.graph.as_default(): c0 = constant_op.constant(1.0, shape=[10], name="Const") c1 = constant_op.constant(1.0, shape=[10], name="Const") c2 = constant_op.constant(1.0, shape=[10], name="Const") i = constant_op.constant(1.0, shape=[10], name="Input") self.o = math_ops.add(c2, math_ops.add(c1, math_ops.add(c0, i)))
def setUp(self): self.graph = ops.Graph() with self.graph.as_default(): c0 = constant_op.constant(1.0, shape=[10], name="Const") c0.op._set_attr("_foo", attr_value_pb2.AttrValue(s=b"foo")) c1 = constant_op.constant(1.0, shape=[10], name="Const") c2 = constant_op.constant(1.0, shape=[10], name="Const") i = constant_op.constant(1.0, shape=[10], name="Input") self.o = math_ops.add(c2, math_ops.add(c1, math_ops.add(c0, i)))
def testElementwiseOpUnknownRankError(self): if context.executing_eagerly(): return x = ragged_factory_ops.constant([[1, 2], [3]]) y = ragged_tensor.RaggedTensor.from_row_splits( array_ops.placeholder_with_default([1, 2, 3], shape=None), x.row_splits) with self.assertRaisesRegexp(ValueError, r'Unable to broadcast: unknown rank'): math_ops.add(x, y)
def testReonstructGraphWithCond(self): with session.Session(config=self._no_rewrite_session_config()) as sess: x = variables.Variable(10.0, name="x") y = variables.Variable(20.0, name="y") cond = control_flow_ops.cond( x > y, lambda: math_ops.add(x, 1), lambda: math_ops.add(y, 1)) sess.run(x.initializer) sess.run(y.initializer) self._compareOriginalAndReconstructedGraphDefs( sess, cond, expected_output=21.0)
def func(_): with variable_scope.variable_scope( "variable", reuse=variable_scope.AUTO_REUSE): with ops.device("/device:CPU:0"): a = variable_scope.get_variable( "a", (), dtypes.int32, use_resource=True) a = math_ops.add(a, 1) with ops.device("/device:CPU:1"): b = variable_scope.get_variable( "b", (), dtypes.int32, use_resource=True) return math_ops.add(a, b)
def setUp(self): self.graph = ops.Graph() with self.graph.as_default(): self.a0 = constant_op.constant(1.0, shape=[2], name="a0") self.b0 = constant_op.constant(2.0, shape=[2], name="b0") self.c0 = math_ops.add(self.a0, self.b0, name="c0") self.a1 = constant_op.constant(3.0, shape=[2], name="a1") self.b1 = constant_op.constant(4.0, shape=[2], name="b1") self.c1 = math_ops.add(self.a1, self.b1, name="c1") self.a2 = constant_op.constant(3.0, shape=[3], name="a2") self.b2 = constant_op.constant(4.0, shape=[3], name="b2") self.c2 = math_ops.add(self.a2, self.b2, name="c2")
def setUp(self): self.v = variables.Variable(10.0, name="v") self.p = math_ops.add(self.v, self.v, name="p") self.q = math_ops.multiply(self.p, self.p, name="q") self.delta = constant_op.constant(2.0, name="delta") self.v_add = state_ops.assign_add(self.v, self.delta, name="v_add") self.v_add_plus_one = math_ops.add(self.v_add, 1.0, name="v_add_plus_one") self.sess = session.Session() self.sess.run(self.v.initializer)
def tfadd_with_ckpt(out_dir): x = array_ops.placeholder(dtypes.int32, name='x_hold') y = variables.Variable(constant_op.constant([0]), name='y_saved') math_ops.add(x, y, name='x_y_sum') init_op = variables.initialize_all_variables() saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V1) with session.Session() as sess: sess.run(init_op) sess.run(y.assign(y + 42)) # Without the checkpoint, the variable won't be set to 42. ckpt = '%s/test_graph_tfadd_with_ckpt.ckpt' % out_dir saver.save(sess, ckpt)
def testFoldl_Simple(self): elems = constant_op.constant([1, 2, 3, 4, 5, 6], name="data") r = functional_ops.foldl( lambda a, x: math_ops.multiply(math_ops.add(a, x), 2), elems) self.assertAllEqual(208, self.evaluate(r)) r = functional_ops.foldl( lambda a, x: math_ops.multiply(math_ops.add(a, x), 2), elems, initializer=10) self.assertAllEqual(880, self.evaluate(r))
def testDumpStringTensorsWorks(self): with session.Session() as sess: str1_init_val = np.array(b"abc") str2_init_val = np.array(b"def") str1_init = constant_op.constant(str1_init_val) str2_init = constant_op.constant(str2_init_val) str1_name = "str1" str2_name = "str2" str1 = variables.Variable(str1_init, name=str1_name) str2 = variables.Variable(str2_init, name=str2_name) # Concatenate str1 and str2 str_concat = math_ops.add(str1, str2, name="str_concat") str1.initializer.run() str2.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = self._debug_urls() # Add debug tensor watch for u. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % str1_name, 0, debug_urls=debug_urls) # Add debug tensor watch for v. debug_utils.add_debug_tensor_watch(run_options, "%s/read" % str2_name, 0, debug_urls=debug_urls) run_metadata = config_pb2.RunMetadata() sess.run(str_concat, options=run_options, run_metadata=run_metadata) # String ops are located on CPU. self.assertEqual(1, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertIn(str1_name, dump.nodes()) self.assertIn(str2_name, dump.nodes()) self.assertEqual(2, dump.size) self.assertEqual([str1_init_val], dump.get_tensors("%s/read" % str1_name, 0, "DebugIdentity")) self.assertEqual([str2_init_val], dump.get_tensors("%s/read" % str2_name, 0, "DebugIdentity")) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % str1_name, 0, "DebugIdentity")[0], 0) self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % str2_name, 0, "DebugIdentity")[0], 0) self.assertGreater( dump.get_dump_sizes_bytes("%s/read" % str1_name, 0, "DebugIdentity")[0], 0) self.assertGreater( dump.get_dump_sizes_bytes("%s/read" % str2_name, 0, "DebugIdentity")[0], 0)
def testCausalityCheckOnDumpsDetectsWrongTemporalOrder(self): with session.Session() as sess: u_name = "testDumpCausalityCheck/u" v_name = "testDumpCausalityCheck/v" w_name = "testDumpCausalityCheck/w" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) u.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) # First, loading the original dump without supplying the # partition_graphs should not cause a LookupError, validation occurs # only with partition_graphs loaded. debug_data.DebugDumpDir(self._dump_root) # Now, loading the original dump with partition graphs supplied should # succeed. The validation should pass quietly. dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Get the dump file names and compute their timestamps. self.assertEqual( 1, len(dump.get_tensor_file_paths(u_name, 0, "DebugIdentity"))) u_file_path = dump.get_tensor_file_paths(u_name, 0, "DebugIdentity")[0] self.assertEqual( 1, len(dump.get_tensor_file_paths(v_name, 0, "DebugIdentity"))) v_file_path = dump.get_tensor_file_paths(v_name, 0, "DebugIdentity")[0] u_timestamp = int(u_file_path[u_file_path.rindex("_") + 1:]) v_timestamp = int(v_file_path[v_file_path.rindex("_") + 1:]) # Swap the time stamps new_u_file_path = u_file_path[:u_file_path. rindex("_")] + "_%d" % v_timestamp new_v_file_path = v_file_path[:v_file_path. rindex("_")] + "_%d" % u_timestamp os.rename(u_file_path, new_u_file_path) os.rename(v_file_path, new_v_file_path) # Load the dump directory again. Now a ValueError is expected to be # raised due to the timestamp swap. with self.assertRaisesRegexp(ValueError, "Causality violated"): dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Loading the dump directory with kwarg "validate" set explicitly to # False should get rid of the error. dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs, validate=False)
def add_or_or(x1, x2): if x1.dtype == dtypes.bool: assert x2.dtype == dtypes.bool return math_ops.logical_or(x1, x2) return math_ops.add(x1, x2)
def fn(): with ops.device("/device:CPU:1"): c = math_ops.add(a, a, name="c") return c
def fn(): x_read = ops.get_collection("x")[0] y_read = ops.get_collection("y")[0] return math_ops.add(x_read, y_read)
def false_fn(): z = math_ops.add(x, y) return math_ops.mul(x, z)
def foo(a, b): return None, a * math_ops.add(a, b), None, 2 * a
def f(x): return math_ops.add(x, constant_op.constant(3))
def polynomial_decay(learning_rate, global_step, decay_steps, end_learning_rate=0.0001, power=1.0, cycle=False, name=None): """Applies a polynomial decay to the learning rate. It is commonly observed that a monotonically decreasing learning rate, whose degree of change is carefully chosen, results in a better performing model. This function applies a polynomial decay function to a provided initial `learning_rate` to reach an `end_learning_rate` in the given `decay_steps`. It requires a `global_step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The function returns a no-arg callable that outputs the decayed learning rate. This can be useful for changing the learning rate value across different invocations of optimizer functions. It is computed as: ```python global_step = min(global_step, decay_steps) decayed_learning_rate = (learning_rate - end_learning_rate) * (1 - global_step / decay_steps) ^ (power) + end_learning_rate ``` If `cycle` is True then a multiple of `decay_steps` is used, the first one that is bigger than `global_steps`. ```python decay_steps = decay_steps * ceil(global_step / decay_steps) decayed_learning_rate_fn = (learning_rate - end_learning_rate) * (1 - global_step / decay_steps) ^ (power) + end_learning_rate decayed_learning_rate = decayed_learning_rate_fn() ``` Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5): ```python ... global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 end_learning_rate = 0.01 decay_steps = 10000 learning_rate_fn = tf.train.polynomial_decay(starter_learning_rate, global_step, decay_steps, end_learning_rate, power=0.5) # Passing global_step to minimize() will increment it at each step. learning_step = ( tf.train.GradientDescentOptimizer(learning_rate_fn) .minimize(...my loss..., global_step=global_step) ) ``` Args: learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The initial learning rate. global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global step to use for the decay computation. Must not be negative. decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Must be positive. See the decay computation above. end_learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The minimal end learning rate. power: A scalar `float32` or `float64` `Tensor` or a Python number. The power of the polynomial. Defaults to linear, 1.0. cycle: A boolean, whether or not it should cycle beyond decay_steps. name: String. Optional name of the operation. Defaults to 'PolynomialDecay'. Returns: A no-arg function that outputs the decayed learning rate, a scalar `Tensor` of the same type as `learning_rate`. Raises: ValueError: if `global_step` is not supplied. """ if global_step is None: raise ValueError("global_step is required for polynomial_decay.") with ops.name_scope( name, "PolynomialDecay", [learning_rate, global_step, decay_steps, end_learning_rate, power ]) as name: learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate") dtype = learning_rate.dtype end_learning_rate = math_ops.cast(end_learning_rate, dtype) power = math_ops.cast(power, dtype) global_step_recomp = math_ops.cast(global_step, dtype) decay_steps_recomp = math_ops.cast(decay_steps, dtype) if cycle: # Find the first multiple of decay_steps that is bigger than # global_step. If global_step is zero set the multiplier to 1 multiplier = control_flow_ops.cond( math_ops.equal(global_step_recomp, 0), lambda: 1.0, lambda: math_ops.ceil(global_step_recomp / decay_steps)) decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier) else: # Make sure that the global_step used is not bigger than decay_steps. global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps) p = math_ops.divide(global_step_recomp, decay_steps_recomp) return math_ops.add(math_ops.multiply( learning_rate - end_learning_rate, math_ops.pow(1 - p, power)), end_learning_rate, name=name)
def add(a, b): return math_ops.add(a, b)
def func(): return memoryview(math_ops.add(a, b))
def triplet_semihard_loss(labels, embeddings, margin=1.0): """Computes the triplet loss with semi-hard negative mining. The loss encourages the positive distances (between a pair of embeddings with the same labels) to be smaller than the minimum negative distance among which are at least greater than the positive distance plus the margin constant (called semi-hard negative) in the mini-batch. If no such negative exists, uses the largest negative distance instead. See: https://arxiv.org/abs/1503.03832. Args: labels: 1-D tf.int32 `Tensor` with shape [batch_size] of multiclass integer labels. embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should be l2 normalized. margin: Float, margin term in the loss definition. Returns: triplet_loss: tf.float32 scalar. """ # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = array_ops.shape(labels) assert lshape.shape == 1 labels = array_ops.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pdist_matrix = pairwise_distance(embeddings, squared=True) # Build pairwise binary adjacency matrix. adjacency = math_ops.equal(labels, array_ops.transpose(labels)) # Invert so we can select negatives only. adjacency_not = math_ops.logical_not(adjacency) batch_size = array_ops.size(labels) # Compute the mask. pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1]) mask = math_ops.logical_and( array_ops.tile(adjacency_not, [batch_size, 1]), math_ops.greater( pdist_matrix_tile, array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1]))) mask_final = array_ops.reshape( math_ops.greater( math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True), 0.0), [batch_size, batch_size]) mask_final = array_ops.transpose(mask_final) adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32) mask = math_ops.cast(mask, dtype=dtypes.float32) # negatives_outside: smallest D_an where D_an > D_ap. negatives_outside = array_ops.reshape( masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size]) negatives_outside = array_ops.transpose(negatives_outside) # negatives_inside: largest D_an. negatives_inside = array_ops.tile( masked_maximum(pdist_matrix, adjacency_not), [1, batch_size]) semi_hard_negatives = array_ops.where(mask_final, negatives_outside, negatives_inside) loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives) mask_positives = math_ops.cast(adjacency, dtype=dtypes.float32) - array_ops.diag( array_ops.ones([batch_size])) # In lifted-struct, the authors multiply 0.5 for upper triangular # in semihard, they take all positive pairs except the diagonal. num_positives = math_ops.reduce_sum(mask_positives) _triplet_loss = math_ops.truediv(math_ops.reduce_sum( math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)), num_positives, name='triplet_semihard_loss') return _triplet_loss
def Add2(x, y): return math_ops.add(x, y)
def inner_fn(a, b): return a * math_ops.add(a, b)
def true_fn(): z = math_ops.add(x, y) ops.add_to_collection("z", 7) return math_ops.mul(x, z)
def f(x): return math_ops.add(x, three)
def fn(): x_const = constant_op.constant(ops.get_collection("x")[0]) y_const = constant_op.constant(ops.get_collection("y")[0]) return math_ops.add(x_const, y_const)
def g(x): tape.watch_variable(x) y = math_ops.add(x, three) f(y)
def fn(): with ops.colocate_with(b.op): c = math_ops.add(a, a, name="c") return c
def f(x): # This function intentionally takes a taped variable as input, # but does not return any values math_ops.add(x, three)
def testAllowsWatchingUnconnectedOutputTensor(self): """Watch an output slot not emitting any edges. (Not even control edges from the node.) """ with session.Session() as sess: x_init = constant_op.constant([2, 2, 3, 5, 5]) x = variables.Variable(x_init, name="unconnected/x") # The UniqueOp (tf.unique) has two output slots. Use only slot 0 in the # graph. Let the debugger watch the unused slot 1. unique_x, _ = array_ops.unique(x, name="unconnected/unique_x") y = math_ops.add(unique_x, [0, 1, 2], name="unconnected/y") x.initializer.run() # Verify that only slot 0 of unique_x has recipients, while slot 1 of the # same node does not have recipients. unique_x_slot_0_recipients = [] unique_x_slot_1_recipients = [] for op in sess.graph.get_operations(): for inp in op.inputs: if inp.name == "unconnected/unique_x:0": unique_x_slot_0_recipients.append(op.name) elif inp.name == "unconnected/unique_x:1": unique_x_slot_1_recipients.append(op.name) self.assertEqual(["unconnected/y"], unique_x_slot_0_recipients) self.assertEqual([], unique_x_slot_1_recipients) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() result = sess.run(y, options=run_options, run_metadata=run_metadata) self.assertAllClose([2, 4, 7], result) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Assert that the connected slot (slot 0) is dumped properly. unique_x_slot_0_dumps = dump.watch_key_to_data( "unconnected/unique_x:0:DebugIdentity") self.assertEqual(1, len(unique_x_slot_0_dumps)) self.assertEqual("unconnected/unique_x", unique_x_slot_0_dumps[0].node_name) self.assertEqual(0, unique_x_slot_0_dumps[0].output_slot) self.assertAllClose([2, 3, 5], unique_x_slot_0_dumps[0].get_tensor()) # Assert that the unconnected slot (slot 1) is dumped properly. unique_x_slot_1_dumps = dump.watch_key_to_data( "unconnected/unique_x:1:DebugIdentity") self.assertEqual(1, len(unique_x_slot_1_dumps)) self.assertEqual("unconnected/unique_x", unique_x_slot_1_dumps[0].node_name) self.assertEqual(1, unique_x_slot_1_dumps[0].output_slot) self.assertAllClose([0, 0, 1, 2, 2], unique_x_slot_1_dumps[0].get_tensor())
def f(x): return math_ops.add(x, x)
def body(i): new_u = state_ops.assign_add(u, v) new_i = math_ops.add(i, 1) op = control_flow_ops.group(new_u) new_i = control_flow_ops.with_dependencies([op], new_i) return [new_i]
def result(self): return math_ops.div_no_nan( self.true_positives, (math_ops.add(self.true_positives, self.false_negatives)))
def random_uniform(shape, minval=0, maxval=None, dtype=dtypes.float32, seed=None, name=None): """Outputs random values from a uniform distribution. The generated values follow a uniform distribution in the range `[minval, maxval)`. The lower bound `minval` is included in the range, while the upper bound `maxval` is excluded. For floats, the default range is `[0, 1)`. For ints, at least `maxval` must be specified explicitly. In the integer case, the random integers are slightly biased unless `maxval - minval` is an exact power of two. The bias is small for values of `maxval - minval` significantly smaller than the range of the output (either `2**32` or `2**64`). Examples: >>> tf.random.uniform(shape=[2]) <tf.Tensor: shape=(2,), dtype=float32, numpy=array([..., ...], dtype=float32)> >>> tf.random.uniform(shape=[], minval=-1., maxval=0.) <tf.Tensor: shape=(), dtype=float32, numpy=-...> >>> tf.random.uniform(shape=[], minval=5, maxval=10, dtype=tf.int64) <tf.Tensor: shape=(), dtype=int64, numpy=...> The `seed` argument produces a deterministic sequence of tensors across multiple calls. To repeat that sequence, use `tf.random.set_seed`: >>> tf.random.set_seed(5) >>> tf.random.uniform(shape=[], maxval=3, dtype=tf.int32, seed=10) <tf.Tensor: shape=(), dtype=int32, numpy=2> >>> tf.random.uniform(shape=[], maxval=3, dtype=tf.int32, seed=10) <tf.Tensor: shape=(), dtype=int32, numpy=0> >>> tf.random.set_seed(5) >>> tf.random.uniform(shape=[], maxval=3, dtype=tf.int32, seed=10) <tf.Tensor: shape=(), dtype=int32, numpy=2> >>> tf.random.uniform(shape=[], maxval=3, dtype=tf.int32, seed=10) <tf.Tensor: shape=(), dtype=int32, numpy=0> Without `tf.random.set_seed` but with a `seed` argument is specified, small changes to function graphs or previously executed operations will change the returned value. See `tf.random.set_seed` for details. Args: shape: A 1-D integer Tensor or Python array. The shape of the output tensor. minval: A Tensor or Python value of type `dtype`, broadcastable with `maxval`. The lower bound on the range of random values to generate (inclusive). Defaults to 0. maxval: A Tensor or Python value of type `dtype`, broadcastable with `minval`. The upper bound on the range of random values to generate (exclusive). Defaults to 1 if `dtype` is floating point. dtype: The type of the output: `float16`, `float32`, `float64`, `int32`, or `int64`. seed: A Python integer. Used in combination with `tf.random.set_seed` to create a reproducible sequence of tensors across multiple calls. name: A name for the operation (optional). Returns: A tensor of the specified shape filled with random uniform values. Raises: ValueError: If `dtype` is integral and `maxval` is not specified. """ dtype = dtypes.as_dtype(dtype) if dtype not in (dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64, dtypes.int32, dtypes.int64): raise ValueError("Invalid dtype %r" % dtype) if maxval is None: if dtype.is_integer: raise ValueError("Must specify maxval for integer dtype %r" % dtype) maxval = 1 with ops.name_scope(name, "random_uniform", [shape, minval, maxval]) as name: shape = tensor_util.shape_tensor(shape) # In case of [0,1) floating results, minval and maxval is unused. We do an # `is` comparison here since this is cheaper than isinstance or __eq__. minval_is_zero = minval is 0 # pylint: disable=literal-comparison maxval_is_one = maxval is 1 # pylint: disable=literal-comparison if not minval_is_zero or not maxval_is_one or dtype.is_integer: minval = ops.convert_to_tensor(minval, dtype=dtype, name="min") maxval = ops.convert_to_tensor(maxval, dtype=dtype, name="max") seed1, seed2 = random_seed.get_seed(seed) if dtype.is_integer: result = gen_random_ops.random_uniform_int(shape, minval, maxval, seed=seed1, seed2=seed2, name=name) else: result = gen_random_ops.random_uniform(shape, dtype, seed=seed1, seed2=seed2) if minval_is_zero: if not maxval_is_one: result = math_ops.multiply(result, maxval) else: result = math_ops.add(result * (maxval - minval), minval, name=name) # TODO(b/132092188): C++ shape inference inside functional ops does not # cross FuncGraph boundaries since that information is only available in # python. So we manually get the static shape using # `constant_value_as_shape` which *does* cross function boundaries. tensor_util.maybe_set_static_shape(result, shape) return result
def testTensorsPlacedOnDevice(self): ds = Dataset.from_tensors([0., 1.]) with ops.device(test.gpu_device_name()): x = datasets.Iterator(ds).next() x = math_ops.add(x, x) self.assertAllEqual([0., 2.], x.numpy())
def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): """Gradient for concat op. Args: op: An operation. grad: `Tensor` or `IndexedSlices` representing the gradients with respect to each output of the op. start_value_index: An integer index of the first value in the op.inputs. end_value_index: An integer index of the last value in the op.inputs. dim_index: An integer index of concat_dim or axis parameter in op.inputs. Returns: Tensors representing the partial gradients with respect to each input of the op. Raises: ValueError: if concat_dim/axis is not statically known. """ def _CreateDenseMaskAndBegin(sizes, concat_dim): """Create variables for iteratively slicing a dense gradients tensor.""" # Since shape is 1-D, shape_of_shape = [rank-of-inputs] shape_of_shape = array_ops.shape(sizes[0]) # Make a vector of length equal to the input's dimensions, # with 0's everywhere and 1 in the concat dim position. # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now) mask = array_ops.concat([ array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1], array_ops.fill(shape_of_shape - concat_dim - 1, 0) ], 0) begin = array_ops.fill(shape_of_shape, 0) return mask, begin def _ExtractInputShapes(inputs): """Extract the shapes of a set of input tensors.""" if context.executing_eagerly(): return array_ops.shape_n(inputs) sizes = [] fully_known = True for x in inputs: input_shape = array_ops.shape(x) if not isinstance(input_shape, ops.Tensor) or input_shape.op.type != "Const": fully_known = False break sizes.append(input_shape) if fully_known: return sizes else: return array_ops.shape_n(inputs) # Degenerate concatenation, just return grad. if len(op.inputs) == 2: return grad + [None] if end_value_index <= dim_index else [None] + grad concat_dim = op.inputs[dim_index] input_values = op.inputs[start_value_index:end_value_index] out_grads = [] if isinstance(grad, ops.Tensor): if context.executing_eagerly() or isinstance(concat_dim, ops.EagerTensor): # Using mod here for convenience since concat_dim is already verified # in concat implementation to be within the allowed [-rank, rank) range. non_neg_concat_dim = (concat_dim._numpy().item(0) % input_values[0]._rank()) # pylint: disable=protected-access # All inputs are guaranteed to be EagerTensors in eager mode sizes = pywrap_tfe.TFE_Py_TensorShapeSlice(input_values, non_neg_concat_dim) out_grads = array_ops.split(grad, sizes, non_neg_concat_dim) else: if constant_op.is_constant(concat_dim): # If concat_dim is a constant defined in a different context, # then we duplicate it in the current context to avoid passing it # through an Enter node. # This is a small optimization in general, but it is required when # compiling with XLA, as XLA needs the concat input to be folded into a # constant. grad_context = control_flow_util.GetOutputContext(grad.op) dim_context = control_flow_util.GetOutputContext(concat_dim.op) if dim_context != grad_context: value = tensor_util.constant_value(concat_dim) concat_dim = constant_op.constant(value=value, dtype=concat_dim.dtype) # Using mod here for convenience since concat_dim is already verified # in concat implementation to be within the allowed [-rank, rank) range. non_neg_concat_dim = concat_dim % array_ops.rank(input_values[0]) # Get the inputs' tensor shapes sizes = _ExtractInputShapes(input_values) # The magic number of 16 was found through benchmarking a range of sizes # on CPUs and a Maxwell TitanX. A speedup was seen in a large majority of # cases when switching implementations at N=16, but it is possible that # there will be a small number of performance regressions. if len(sizes) > 16: # extract the size of each input along the concat dimension sizes = array_ops.squeeze( array_ops.slice(array_ops.stack(sizes, axis=1), [non_neg_concat_dim, 0], [1, -1])) out_grads = array_ops.split(grad, sizes, non_neg_concat_dim) else: offset = gen_array_ops.concat_offset(non_neg_concat_dim, sizes) for (begin, size) in zip(offset, sizes): out_grads.append(array_ops.slice(grad, begin, size)) elif isinstance(grad, ops.IndexedSlices): # Using mod here for convenience since concat_dim is already verified # in concat implementation to be within the allowed [-rank, rank) range. non_neg_concat_dim = concat_dim % array_ops.rank(input_values[0]) concat_dim_static = tensor_util.constant_value(concat_dim) if concat_dim_static is None: raise ValueError("Can only compute IndexedSlices gradient with " "statically-known concat_dim") if concat_dim_static < 0: rank = tensor_util.constant_value(array_ops.rank(input_values[0])) if rank is None: raise ValueError( "Can only compute IndexedSlices gradient with " "negative concat_dim when first value rank is " "statically-known.") concat_dim_static %= rank # Get the inputs' tensor shapes sizes = [array_ops.shape(x) for x in input_values] if concat_dim_static > 0: # IndexedSlices, non_neg_concat_dim > 0. Each input gets IndexedSlices # gradients with all the indices, but with grad.values sliced accordingly. # This is like the Tensor case, except shape(grad.values)[0] is not equal # to shape(sizes[i])[0], since only a subset of the dim-0 values are # stored. mask, begin = _CreateDenseMaskAndBegin(sizes, non_neg_concat_dim) for size in sizes: new_values = array_ops.slice( grad.values, begin, array_ops.concat( [[-1], array_ops.slice(size, [1], [-1])], 0)) out_grads.append( ops.IndexedSlices(new_values, grad.indices, size)) # Lint complains begin = begin + ... begin = math_ops.add(begin, size * mask) else: # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients # only for the relevant indices. start = constant_op.constant(0, dtype=grad.indices.dtype) for size in sizes: size_concat_dim = array_ops.gather(size, non_neg_concat_dim) if size_concat_dim.dtype != grad.indices.dtype: size_concat_dim = math_ops.cast(size_concat_dim, dtype=grad.indices.dtype) end = start + size_concat_dim # Compute the 1-D Tensor of indices relevant for this input. indices_to_select = array_ops.squeeze(array_ops.where( math_ops.logical_and(grad.indices >= start, grad.indices < end)), axis=[1]) new_indices = array_ops.gather(grad.indices, indices_to_select) - start new_values = array_ops.gather(grad.values, indices_to_select) out_grads.append( ops.IndexedSlices(new_values, new_indices, size)) start = end else: raise TypeError("Expected Tensor or IndexedSlices, got %s" % type(grad)) return (out_grads + [None] if end_value_index <= dim_index else [None] + out_grads)
def _TestFoldDepthwiseConv2dWithoutScale(self, relu, relu_op_name, with_bypass): """Tests folding: inputs -> DepthwiseConv2d with batch norm -> Relu*. Args: relu: Callable that returns an Operation, a factory method for the Relu*. relu_op_name: String, name of the Relu* operation. with_bypass: Bool, when true there is an extra connection added from inputs to just before Relu*. """ g = ops.Graph() with g.as_default(): batch_size, height, width = 5, 128, 128 inputs = array_ops.zeros((batch_size, height, width, 3)) stride = 1 if with_bypass else 2 activation_fn = None if with_bypass else relu bn_params = copy.copy(_DEFAULT_BATCH_NORM_PARAMS) bn_params['scale'] = False scope = 'test/test2' if with_bypass else 'test' node = separable_conv2d(inputs, None, [5, 5], stride=stride, depth_multiplier=1.0, padding='SAME', weights_initializer=self._WeightInit(0.09), activation_fn=activation_fn, normalizer_fn=batch_norm, normalizer_params=bn_params, scope=scope) if with_bypass: node = math_ops.add(inputs, node, name='test/Add') relu(node, name='test/' + relu_op_name) fold_batch_norms.FoldBatchNorms(g) folded_mul = g.get_operation_by_name(scope + '/mul_fold') self.assertEqual(folded_mul.type, 'Mul') self._AssertInputOpsAre( folded_mul, [scope + '/depthwise_weights/read', scope + '/scale_reshape']) self._AssertOutputGoesToOps(folded_mul, g, [scope + '/depthwise_Fold']) scale_reshape = g.get_operation_by_name(scope + '/scale_reshape') self.assertEqual(scale_reshape.type, 'Reshape') self._AssertInputOpsAre(scale_reshape, [ scope + '/BatchNorm/batchnorm/Rsqrt', scope + '/scale_reshape/shape' ]) self._AssertOutputGoesToOps(scale_reshape, g, [scope + '/mul_fold']) folded_conv = g.get_operation_by_name(scope + '/depthwise_Fold') self.assertEqual(folded_conv.type, 'DepthwiseConv2dNative') self._AssertInputOpsAre(folded_conv, [scope + '/mul_fold', inputs.op.name]) self._AssertOutputGoesToOps(folded_conv, g, [scope + '/add_fold']) folded_add = g.get_operation_by_name(scope + '/add_fold') self.assertEqual(folded_add.type, 'Add') self._AssertInputOpsAre( folded_add, [scope + '/depthwise_Fold', scope + '/BatchNorm/batchnorm/sub']) output_op_names = [ 'test/Add' if with_bypass else 'test/' + relu_op_name ] self._AssertOutputGoesToOps(folded_add, g, output_op_names)
def _ConcatGrad(op, grad): """Gradient for concat op.""" def _CreateDenseMaskAndBegin(sizes, concat_dim): """Create variables for iteratively slicing a dense gradients tensor.""" # Since shape is 1-D, shape_of_shape = [rank-of-inputs] shape_of_shape = array_ops.shape(sizes[0]) # Make a vector of length equal to the input's dimensions, # with 0's everywhere and 1 in the concat dim position. # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now) mask = array_ops.concat(0, [ array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1], array_ops.fill(shape_of_shape - concat_dim - 1, 0) ]) begin = array_ops.fill(shape_of_shape, 0) return mask, begin def _ExtractInputShapes(inputs): """Extract the shapes of a set of input tensors.""" sizes = [] fully_known = True for x in inputs: input_shape = array_ops.shape(x) if not isinstance(input_shape, ops.Tensor) or input_shape.op.type != "Const": fully_known = False break else: sizes.append(input_shape) if fully_known: return sizes else: return array_ops.shape_n(inputs) # Degenerate concatenation, just return grad. if len(op.inputs) == 2: return [None, grad] concat_dim = op.inputs[0] out_grads = [] if isinstance(grad, ops.Tensor): # Get the inputs' tensor shapes sizes = _ExtractInputShapes(op.inputs[1:]) # pylint: disable=protected-access offset = gen_array_ops._concat_offset(concat_dim, sizes) # pylint: enable=protected-access for (begin, size) in zip(offset, sizes): out_grads.append(array_ops.slice(grad, begin, size)) elif isinstance(grad, ops.IndexedSlices): concat_dim_static = tensor_util.constant_value(concat_dim) if concat_dim_static is None: raise ValueError("Can only compute IndexedSlices gradient with " "statically-known concat_dim") # Get the inputs' tensor shapes sizes = [array_ops.shape(x) for x in op.inputs[1:]] if concat_dim_static > 0: # IndexedSlices, concat_dim > 0. Each input gets IndexedSlices gradients # with all the indices, but with grad.values sliced accordingly. This # is like the Tensor case, except shape(grad.values)[0] is not equal to # shape(sizes[i])[0], since only a subset of the dim-0 values are stored. mask, begin = _CreateDenseMaskAndBegin(sizes, concat_dim) for size in sizes: new_values = array_ops.slice( grad.values, begin, array_ops.concat( 0, [[-1], array_ops.slice(size, [1], [-1])])) out_grads.append( ops.IndexedSlices(new_values, grad.indices, size)) # Lint complains begin = begin + ... begin = math_ops.add(begin, size * mask) else: # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients # only for the relevant indices. start = constant_op.constant(0, dtype=grad.indices.dtype) for size in sizes: size_concat_dim = array_ops.gather(size, concat_dim) if size_concat_dim.dtype != grad.indices.dtype: size_concat_dim = math_ops.cast(size_concat_dim, dtype=grad.indices.dtype) end = start + size_concat_dim # Compute the 1-D Tensor of indices relevant for this input. indices_to_select = array_ops.squeeze(array_ops.where( math_ops.logical_and(grad.indices >= start, grad.indices < end)), squeeze_dims=[1]) new_indices = array_ops.gather(grad.indices, indices_to_select) - start new_values = array_ops.gather(grad.values, indices_to_select) out_grads.append( ops.IndexedSlices(new_values, new_indices, size)) start = end else: raise TypeError("Expected Tensor or IndexedSlices, got %s" % type(grad)) return [None] + out_grads
def _find_loss_augmented_facility_idx(pairwise_distances, labels, chosen_ids, candidate_ids, margin_multiplier, margin_type): """Find the next centroid that maximizes the loss augmented inference. This function is a subroutine called from compute_augmented_facility_locations Args: pairwise_distances: 2-D Tensor of pairwise distances. labels: 1-D Tensor of ground truth cluster assignment. chosen_ids: 1-D Tensor of current centroid indices. candidate_ids: 1-D Tensor of candidate indices. margin_multiplier: multiplication constant. margin_type: Type of structured margin to use. Default is nmi. Returns: integer index. """ num_candidates = array_ops.shape(candidate_ids)[0] pairwise_distances_chosen = array_ops.gather(pairwise_distances, chosen_ids) pairwise_distances_candidate = array_ops.gather(pairwise_distances, candidate_ids) pairwise_distances_chosen_tile = array_ops.tile(pairwise_distances_chosen, [1, num_candidates]) candidate_scores = -1.0 * math_ops.reduce_sum(array_ops.reshape( math_ops.reduce_min(array_ops.concat([ pairwise_distances_chosen_tile, array_ops.reshape(pairwise_distances_candidate, [1, -1]) ], 0), axis=0, keepdims=True), [num_candidates, -1]), axis=1) nmi_scores = array_ops.zeros([num_candidates]) iteration = array_ops.constant(0) def func_cond(iteration, nmi_scores): del nmi_scores # Unused in func_cond() return iteration < num_candidates def func_body(iteration, nmi_scores): predictions = get_cluster_assignment( pairwise_distances, array_ops.concat([chosen_ids, [candidate_ids[iteration]]], 0)) nmi_score_i = compute_clustering_score(labels, predictions, margin_type) pad_before = array_ops.zeros([iteration]) pad_after = array_ops.zeros([num_candidates - 1 - iteration]) # return 1 - NMI score as the structured loss. # because NMI is higher the better [0,1]. return iteration + 1, nmi_scores + array_ops.concat( [pad_before, [1.0 - nmi_score_i], pad_after], 0) _, nmi_scores = control_flow_ops.while_loop(func_cond, func_body, [iteration, nmi_scores]) candidate_scores = math_ops.add(candidate_scores, margin_multiplier * nmi_scores) argmax_index = math_ops.to_int32( math_ops.argmax(candidate_scores, dimension=0)) return candidate_ids[argmax_index]