def build_graph(device, input_shape, filter_shape, strides, padding, num_iters): """builds a graph containing a sequence of conv2d operations. Args: device: String, the device to run on. input_shape: Shape of the input tensor. filter_shape: Shape of the filter tensor. strides: A list of ints. 1-D of length 4. The stride of sliding window for each dimension of input. padding: A string from: "SAME", "VALID". The type of padding algorithm to use. num_iters: number of iterations to run conv2d. Returns: An array of tensors to run() """ with ops.device("/%s:0" % device): inp = variables.Variable(random_ops.truncated_normal(input_shape)) filt = variables.Variable(random_ops.truncated_normal(filter_shape)) outputs = [] conv2d_op = nn_ops.conv2d(inp, filt, strides, padding, data_format="NHWC") outputs.append(conv2d_op) for _ in range(1, num_iters): with ops.control_dependencies([conv2d_op]): conv2d_op = nn_ops.conv2d( inp, filt, strides, padding, data_format="NHWC") outputs.append(conv2d_op) return control_flow_ops.group(*outputs)
def testSmallNetwork(self): image = array_ops.placeholder(dtypes.float32, shape=[1, 28, 28, 1]) label = array_ops.placeholder(dtypes.float32, shape=[1, 10]) w = variables.Variable( random_ops.truncated_normal([5, 5, 1, 32], stddev=0.1)) b = variables.Variable(random_ops.truncated_normal([32], stddev=0.1)) conv = nn_ops.conv2d(image, w, strides=[1, 1, 1, 1], padding="SAME") h_conv = nn_ops.relu(conv + b) h_conv_flat = array_ops.reshape(h_conv, [1, -1]) w_fc = variables.Variable( random_ops.truncated_normal([25088, 10], stddev=0.1)) b_fc = variables.Variable(random_ops.truncated_normal([10], stddev=0.1)) y_conv = nn_ops.softmax(math_ops.matmul(h_conv_flat, w_fc) + b_fc) cross_entropy = math_ops.reduce_mean(-math_ops.reduce_sum( label * math_ops.log(y_conv), reduction_indices=[1])) _ = adam.AdamOptimizer(1e-4).minimize(cross_entropy) mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph()) report = cost_analyzer.GenerateCostReport(mg) self.assertTrue(b"MatMul" in report) self.assertTrue(b"ApplyAdam" in report) self.assertTrue(b"Conv2D" in report) self.assertTrue(b"Conv2DBackpropInput" in report) self.assertTrue(b"Conv2DBackpropFilter" in report) self.assertTrue(b"Softmax" in report) # Also print the report to make it easier to debug print("{}".format(report))
def testNoCSE(self): with self.test_session(use_gpu=True): shape = [2, 3, 4] rnd1 = random_ops.truncated_normal(shape, 0.0, 1.0, dtypes.float32) rnd2 = random_ops.truncated_normal(shape, 0.0, 1.0, dtypes.float32) diff = rnd2 - rnd1 self.assertTrue(np.linalg.norm(diff.eval()) > 0.1)
def loop(): random_seed.set_random_seed(0) x1 = random_ops.truncated_normal([1, 784], seed=0) x2 = random_ops.truncated_normal([1, 784], seed=0) x3 = random_ops.truncated_normal([1, 784], seed=0) x4 = random_ops.truncated_normal([1, 784], seed=0) elems = (x1, x2, x3, x4) outputs = functional_ops.map_fn(two_layer_model, elems, dtype=dtypes.float32) return outputs
def _loop_with_vec_and_4d(): random_seed.set_random_seed(0) x1 = random_ops.truncated_normal([1, 784], seed=0) x2 = random_ops.truncated_normal([1, 784], seed=0) x3 = random_ops.truncated_normal([1, 784], seed=0) x4 = random_ops.truncated_normal([1, 784], seed=0) elems = (x1, x2, x3, x4) outputs = functional_ops.map_fn( _model_with_vec_and_4d, elems, dtype=dtypes.float32) return outputs
def testTruncatedNormal(self): # Fully known shape. rnd1 = random_ops.truncated_normal([1, 2, 3]) self.assertEqual([1, 2, 3], rnd1.get_shape()) # Partially known shape. rnd2 = random_ops.truncated_normal( array_ops.placeholder(dtypes.int32, shape=(3,))) self.assertEqual([None, None, None], rnd2.get_shape().as_list()) # Unknown shape. rnd3 = random_ops.truncated_normal(array_ops.placeholder(dtypes.int32)) self.assertIs(None, rnd3.get_shape().ndims)
def build_fused_conv_bias_relu_graph(device, input_shape, filter_shape, strides, padding, num_iters, data_format): """builds a graph containing a sequence of conv2d operations. Args: device: String, the device to run on. input_shape: Shape of the input tensor. filter_shape: Shape of the filter tensor. strides: A list of ints. 1-D of length 4. The stride of sliding window for each dimension of input. padding: A string from: "SAME", "VALID". The type of padding algorithm to use. num_iters: number of iterations to run conv2d. data_format: data format string of input, 'NHWC' and 'NCHW' are supported. Returns: An array of tensors to run() """ if data_format == "NCHW": input_shape = [ input_shape[0], input_shape[3], input_shape[1], input_shape[2] ] with ops.device("/%s:0" % device): inp = variables.Variable(random_ops.truncated_normal(input_shape)) filt = variables.Variable(random_ops.truncated_normal(filter_shape)) bias_shape = [filter_shape[-1]] bias = variables.Variable(random_ops.truncated_normal(bias_shape)) outputs = [] fused_out = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( inp, filt, bias, strides, padding, data_format=data_format, activation_mode="Relu") outputs.append(fused_out) for _ in range(1, num_iters): with ops.control_dependencies([fused_out]): # pylint: disable=g-line-too-long fused_out = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( # pylint: disable=line-too-long inp, filt, bias, strides, padding, data_format=data_format, activation_mode="Relu") outputs.append(fused_out) return control_flow_ops.group(*outputs)
def testSmallNetwork(self): image = array_ops.placeholder(dtypes.float32, shape=[1, 28, 28, 1]) label = array_ops.placeholder(dtypes.float32, shape=[1, 10]) w = variables.Variable( random_ops.truncated_normal([5, 5, 1, 32], stddev=0.1)) b = variables.Variable(random_ops.truncated_normal([32], stddev=0.1)) conv = nn_ops.conv2d(image, w, strides=[1, 1, 1, 1], padding="SAME") h_conv = nn_ops.relu(conv + b) h_conv_flat = array_ops.reshape(h_conv, [1, -1]) w_fc = variables.Variable( random_ops.truncated_normal([25088, 10], stddev=0.1)) b_fc = variables.Variable(random_ops.truncated_normal([10], stddev=0.1)) y_conv = nn_ops.softmax(math_ops.matmul(h_conv_flat, w_fc) + b_fc) cross_entropy = math_ops.reduce_mean(-math_ops.reduce_sum( label * math_ops.log(y_conv), reduction_indices=[1])) _ = adam.AdamOptimizer(1e-4).minimize(cross_entropy) mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph()) report = cost_analyzer.GenerateCostReport(mg) # Print the report to make it easier to debug print("{}".format(report)) self.assertTrue(b"MatMul" in report) self.assertTrue(b"ApplyAdam" in report) self.assertTrue(b"Conv2D" in report) self.assertTrue(b"Conv2DBackpropInput" in report) self.assertTrue(b"Conv2DBackpropFilter" in report) self.assertTrue(b"Softmax" in report) for op_type in [ b"MatMul", b"Conv2D", b"Conv2DBackpropInput", b"Conv2DBackpropFilter" ]: matcher = re.compile( br"\s+" + op_type + br",\s*(\d+),\s*(\d+),\s*([\d\.eE+-]+)%,\s*" + br"([\d\.eE+-]+)%,\s*(-?\d+),\s*(\d+),", re.MULTILINE) m = matcher.search(report) op_count = int(m.group(1)) # upper = int(m.group(5)) lower = int(m.group(6)) if op_type is b"MatMul": self.assertEqual(3, op_count) else: self.assertEqual(1, op_count) self.assertTrue(0 <= lower)
def sequence_softmax(inputs, noutput, scope=None, name=None, linear_name=None): """Run a softmax layer over all the time steps of an input sequence. Args: inputs: (length, batch_size, depth) tensor noutput: output depth scope: optional scope name name: optional name for output tensor linear_name: name for linear (pre-softmax) output Returns: A tensor of size (length, batch_size, noutput). """ length, _, ninputs = _shape(inputs) inputs_u = array_ops.unstack(inputs) output_u = [] with variable_scope.variable_scope(scope, "SequenceSoftmax", [inputs]): initial_w = random_ops.truncated_normal([0 + ninputs, noutput], stddev=0.1) initial_b = constant_op.constant(0.1, shape=[noutput]) w = variables.model_variable("weights", initializer=initial_w) b = variables.model_variable("biases", initializer=initial_b) for i in xrange(length): with variable_scope.variable_scope(scope, "SequenceSoftmaxStep", [inputs_u[i]]): # TODO(tmb) consider using slim.fully_connected(..., # activation_fn=tf.nn.softmax) linear = nn_ops.xw_plus_b(inputs_u[i], w, b, name=linear_name) output = nn_ops.softmax(linear) output_u += [output] outputs = array_ops.stack(output_u, name=name) return outputs
def testTwoConvLayers(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) output = two_layer_model(x) with session.Session() as sess: output_val_ref = sess.run(output) with session.Session(config=get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if node.name.startswith('LayoutOptimizerTranspose'): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0', nodes) self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Relu_1-MaxPool_1', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testStridedSliceWithMask1011(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) # This will generate a StridedSlice op with begin mask and # end mask 11(1011). s = conv[:, :, 1:-1, :] output = array_ops.identity(s) with session.Session() as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes) self.assertIn('strided_slice-1-LayoutOptimizer', nodes) self.assertIn('strided_slice-2-LayoutOptimizer', nodes) self.assertIn('strided_slice-3-LayoutOptimizer', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testTernaryOp(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) add = math_ops.add(conv, conv) mean = math_ops.reduce_mean(conv) condition = math_ops.less(conv, mean) select = gen_math_ops._select(condition, conv, add) output = array_ops.identity(select) with session.Session() as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if node.name.startswith('LayoutOptimizerTranspose'): num_transposes += 1 nodes.append(node.name) expected_num_transposes = 3 self.assertEqual(expected_num_transposes, num_transposes) self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes) self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Select-0-0', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testPadWithNonConstPaddings(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) paddings = array_ops.placeholder(dtype='int32') pad = array_ops.pad(conv, paddings) output = array_ops.identity(pad) paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]] with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={paddings: paddings_val}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run( output, run_metadata=metadata, feed_dict={ paddings: paddings_val }) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if node.name.startswith('LayoutOptimizerTranspose'): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes) self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Pad-0-0', nodes) self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_Pad_1', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def __call__(self, shape, dtype=None, partition_info=None): if dtype is None: dtype = self.dtype scale = self.scale scale_shape = shape if partition_info is not None: scale_shape = partition_info.full_shape fan_in, fan_out = _compute_fans(scale_shape) if self.mode == "fan_in": scale /= max(1., fan_in) elif self.mode == "fan_out": scale /= max(1., fan_out) else: scale /= max(1., (fan_in + fan_out) / 2.) if self.distribution == "normal" or self.distribution == "truncated_normal": # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) stddev = math.sqrt(scale) / .87962566103423978 return random_ops.truncated_normal( shape, 0.0, stddev, dtype, seed=self.seed) elif self.distribution == "untruncated_normal": stddev = math.sqrt(scale) return random_ops.random_normal( shape, 0.0, stddev, dtype, seed=self.seed) else: limit = math.sqrt(3.0 * scale) return random_ops.random_uniform( shape, -limit, limit, dtype, seed=self.seed)
def testGradient(self): if not test.is_gpu_available(cuda_only=True): self.skipTest('GPU required') random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 200, 200, 3], seed=0) y = conv_layers.conv2d(x, 32, [3, 3]) z = conv_layers.conv2d(y, 32, [3, 3]) optimizer = gradient_descent.GradientDescentOptimizer(1e-4) loss = math_ops.reduce_mean(z) train_op = optimizer.minimize(loss) graph = ops.get_default_graph() graph.add_to_collection('train_op', train_op) meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def()) rewrite_options = rewriter_config_pb2.RewriterConfig( optimize_tensor_layout=True) optimized_graph = tf_optimizer.OptimizeGraph(rewrite_options, meta_graph) found = 0 for node in optimized_graph.node: if node.op in ['Conv2D', 'Conv2DBackpropFilter', 'Conv2DBackpropInput']: found += 1 self.assertEqual(node.attr['data_format'].s, 'NCHW') self.assertEqual(found, 5)
def testGradient(self): with ops.Graph().as_default() as g: inputs = array_ops.placeholder( dtypes.float32, shape=[None, 100], name="input") weights = array_ops.placeholder( dtypes.float32, shape=[100, 10], name="weights") biases = array_ops.placeholder(dtypes.float32, shape=[10], name="biases") activations = nn_ops.relu( math_ops.matmul(inputs, weights) + biases, name="activations") loss = math_ops.reduce_mean(activations, name="loss") gdef = g.as_graph_def() with ops.Graph().as_default() as g: input_placeholder = array_ops.placeholder(dtypes.float32, shape=[32, 100]) weights_var = variables.Variable( random_ops.truncated_normal([100, 10]), name="weights") biases_var = variables.Variable(array_ops.zeros([10]), name="biases") activations, loss = importer.import_graph_def( gdef, input_map={ "input:0": input_placeholder, "weights:0": weights_var, "biases:0": biases_var }, return_elements=["activations:0", "loss:0"]) self.assertEqual([32, 10], activations.get_shape()) self.assertEqual([], loss.get_shape()) weights_grad, biases_grad = gradients_impl.gradients( loss, [weights_var, biases_var]) self.assertEqual([100, 10], weights_grad.get_shape()) self.assertEqual([10], biases_grad.get_shape())
def testSliceWithNonConstAxis(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) size = array_ops.placeholder(dtype='int32') s = array_ops.slice(conv, [0, 0, 0, 0], size) output = array_ops.identity(s) size_val = [1, 2, 3, 4] with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={size: size_val}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run( output, run_metadata=metadata, feed_dict={ size: size_val }) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_trans_nchw_to_nhwc('Slice-0-0', nodes) self._assert_vec_nhwc_to_nchw('Slice-2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testSelectOpScalarCondition(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) add = math_ops.add(conv, conv) condition = constant_op.constant(True) select = gen_math_ops._select(condition, conv, add) output = array_ops.identity(select) with session.Session() as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_trans_nchw_to_nhwc('Select-0-0', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testSplitWithNonConstAxis(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) dim = array_ops.placeholder(dtype='int32') split = array_ops.split(conv, 2, axis=dim) scale = constant_op.constant(0.1, shape=[32]) offset = constant_op.constant(0.3, shape=[32]) bn0 = nn.fused_batch_norm(split[0], scale, offset) bn1 = nn.fused_batch_norm(split[1], scale, offset) add = bn0[0] + bn1[0] output = array_ops.identity(add) with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={dim: 3}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3}) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_trans_nchw_to_nhwc('add_2-0-0', nodes) self._assert_map_nhwc_to_nchw('split-0', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testConcatWithControlDependency(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) axis = constant_op.constant(3) var = variables.Variable(3) assign = state_ops.assign(var, 6) with ops.control_dependencies([assign]): concat = array_ops.concat([conv, conv], axis) output = array_ops.identity(concat) with session.Session() as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_trans_nchw_to_nhwc('concat-0-0', nodes) self.assertIn('concat-2-LayoutOptimizer', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testReduceSumAlongC(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) reduce_sum = math_ops.reduce_sum(conv, axis=[3]) output = array_ops.identity(reduce_sum) with session.Session() as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) # Three transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 1 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testSelectOpConditionUnknownShape(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) add = math_ops.add(conv, conv) condition = array_ops.placeholder(dtype='bool') select = gen_math_ops._select(condition, conv, add) output = array_ops.identity(select) condition_val = np.zeros((1, 7, 7, 64)) with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={condition: condition_val}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run( output, run_metadata=metadata, feed_dict={condition: condition_val}) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) expected_num_transposes = 3 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testReverseWithConstDims(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) dims = constant_op.constant([3, 1], name='DimsConst') reverse = array_ops.reverse(conv, dims) output = array_ops.identity(reverse) with session.Session() as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes) self.assertIn('ReverseV2-1-LayoutOptimizer', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def _initializer(shape, dtype=dtype, partition_info=None): """Initializer function.""" if not dtype.is_floating: raise TypeError('Cannot create initializer for non-floating point type.') # Estimating fan_in and fan_out is not possible to do perfectly, but we try. # This is the right thing for matrix multiply and convolutions. if shape: fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1]) fan_out = float(shape[-1]) else: fan_in = 1.0 fan_out = 1.0 for dim in shape[:-2]: fan_in *= float(dim) fan_out *= float(dim) if mode == 'FAN_IN': # Count only number of input connections. n = fan_in elif mode == 'FAN_OUT': # Count only number of output connections. n = fan_out elif mode == 'FAN_AVG': # Average number of inputs and output connections. n = (fan_in + fan_out) / 2.0 if uniform: # To get stddev = math.sqrt(factor / n) need to adjust for uniform. limit = math.sqrt(3.0 * factor / n) return random_ops.random_uniform(shape, -limit, limit, dtype, seed=seed) else: # To get stddev = math.sqrt(factor / n) need to adjust for truncated. trunc_stddev = math.sqrt(1.3 * factor / n) return random_ops.truncated_normal(shape, 0.0, trunc_stddev, dtype, seed=seed)
def testSplitWithNonConstAxis(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) dim = array_ops.placeholder(dtype='int32') split = array_ops.split(conv, 2, axis=dim) output = math_ops.reduce_sum(split[0]) with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={dim: 3}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3}) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if node.name.startswith('LayoutOptimizerTranspose'): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0', nodes) self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-split-Sum-0', nodes) self.assertIn('LayoutOptimizerDim-split', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testSplitVWithNonConstAxis(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) dim = array_ops.placeholder(dtype='int32') sizes = constant_op.constant([50, 10, 4], shape=[3]) split = gen_array_ops._split_v( value=conv, size_splits=sizes, axis=dim, num_split=3) output = math_ops.reduce_sum(split[0]) with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={dim: 3}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3}) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_trans_nchw_to_nhwc('SplitV-0-0', nodes) self._assert_map_nhwc_to_nchw('SplitV-2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testMaxPoolV2(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) ksize = constant_op.constant([1, 2, 3, 1], shape=[4]) strides = array_ops.placeholder(dtype='int32', shape=[4]) max_pool = gen_nn_ops._max_pool_v2(conv, ksize, strides, 'VALID') output = array_ops.identity(max_pool) strides_val = [1, 3, 2, 1] with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={strides: strides_val}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run( output, run_metadata=metadata, feed_dict={ strides: strides_val }) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_trans_nchw_to_nhwc('MaxPoolV2-0-0', nodes) self._assert_vec_nhwc_to_nchw('MaxPoolV2-2', nodes) self.assertIn('MaxPoolV2-1-LayoutOptimizer', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def func(): with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess: rng = random_ops.truncated_normal( [num], mean=mu, stddev=sigma, dtype=dtype, seed=seed) ret = np.empty([10, num]) for i in xrange(10): ret[i, :] = sess.run(rng) return ret
def _model_with_second_port(): random_seed.set_random_seed(0) x = random_ops.truncated_normal([2, 5, 5, 4], seed=0) scale = constant_op.constant(0.1, shape=[4]) offset = constant_op.constant(0.3, shape=[4]) y, mean, _ = nn.fused_batch_norm(x, scale, offset) mul = math_ops.add(y, mean) output = array_ops.identity(mul) return output
def testConv2DTransposeShapeInference(self): # Test case for 8972 initializer = random_ops.truncated_normal( [3, 3, 5, 1], mean=0.0, stddev=0.01, dtype=dtypes.float32) x = variables.Variable(random_ops.random_normal([3, 10, 5, 1])) f = variable_scope.get_variable("f", initializer=initializer) f_shape = array_ops.stack([array_ops.shape(x)[0], 10, 5, 5]) output = nn_ops.conv2d_transpose( x, f, f_shape, strides=[1, 1, 1, 1], padding="SAME") self.assertEqual(output.get_shape().as_list(), [None, 10, 5, 5])
def _add_scaled_noise_to_gradients(grads_and_vars, gradient_noise_scale): """Adds scaled noise from a 0-mean normal distribution to gradients.""" gradients, variables = zip(*grads_and_vars) noisy_gradients = [] for gradient in gradients: if gradient is None: noisy_gradients.append(None) continue if isinstance(gradient, ops.IndexedSlices): gradient_shape = gradient.dense_shape else: gradient_shape = gradient.get_shape() noise = random_ops.truncated_normal(gradient_shape) * gradient_noise_scale noisy_gradients.append(gradient + noise) return list(zip(noisy_gradients, variables))
def build_graph(device, input_shape, axes, num_layers, mode, scale, train): """Build a graph containing a sequence of batch normalizations. Args: device: string, the device to run on. input_shape: shape of the input tensor. axes: axes that are to be normalized across. num_layers: number of batch normalization layers in the graph. mode: "op", "py" or "slow" depending on the implementation. scale: scale after normalization. train: if true, also run backprop. Returns: An array of tensors to run() """ moment_shape = [] keep_dims = mode == "py" or mode == "slow" if keep_dims: for axis in range(len(input_shape)): if axis in axes: moment_shape.append(1) else: moment_shape.append(input_shape[axis]) else: for axis in range(len(input_shape)): if axis not in axes: moment_shape.append(input_shape[axis]) with ops.device("/%s:0" % device): tensor = variables.Variable(random_ops.truncated_normal(input_shape)) for _ in range(num_layers): mean, variance = nn_impl.moments(tensor, axes, keep_dims=keep_dims) beta = variables.Variable(array_ops.zeros(moment_shape)) gamma = variables.Variable( constant_op.constant(1.0, shape=moment_shape)) if mode == "py": tensor = batch_norm_py(tensor, mean, variance, beta, gamma, scale) elif mode == "op": tensor = batch_norm_op(tensor, mean, variance, beta, gamma, scale) elif mode == "slow": tensor = batch_norm_slow(tensor, mean, variance, beta, gamma, scale) if train: return gradients_impl.gradients([tensor], variables.trainable_variables()) else: return [tensor]
def testSecondGradient(self): images_placeholder = array_ops.placeholder(dtypes.float32, shape=(3, 2)) labels_placeholder = array_ops.placeholder(dtypes.int32, shape=(3)) weights = variables.Variable(random_ops.truncated_normal([2], stddev=1.0)) weights_with_zeros = array_ops.stack([array_ops.zeros([2]), weights], axis=1) logits = math_ops.matmul(images_placeholder, weights_with_zeros) cross_entropy = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=labels_placeholder, logits=logits) loss = math_ops.reduce_mean(cross_entropy) # Taking ths second gradient should fail, since it is not # yet supported. with self.assertRaisesRegexp(LookupError, "explicitly disabled"): _ = gradients_impl.hessians(loss, [weights])
def testTruncatedNormalIsInRange(self): count = 10000000 # TODO(b/34339814): make this test work with 16 bit float types. for dtype in self._random_types() & {dtypes.float32, dtypes.float64}: with self.cached_session() as sess: with self.test_scope(): x = random_ops.truncated_normal(shape=[count], dtype=dtype) y = self.evaluate(x) def normal_cdf(x): return .5 * math.erfc(-x / math.sqrt(2)) def normal_pdf(x): return math.exp(-(x**2) / 2.) / math.sqrt(2 * math.pi) def probit(x, sess=sess): return self.evaluate(special_math.ndtri(x)) a = -2. b = 2. mu = 0. sigma = 1. alpha = (a - mu) / sigma beta = (b - mu) / sigma z = normal_cdf(beta) - normal_cdf(alpha) self.assertTrue((y >= a).sum() == count) self.assertTrue((y <= b).sum() == count) # For more information on these calculations, see: # Burkardt, John. "The Truncated Normal Distribution". # Department of Scientific Computing website. Florida State University. expected_mean = mu + (normal_pdf(alpha) - normal_pdf(beta)) / z * sigma actual_mean = np.mean(y) self.assertAllClose(actual_mean, expected_mean, atol=2e-3) expected_median = mu + probit( (normal_cdf(alpha) + normal_cdf(beta)) / 2.) * sigma actual_median = np.median(y) self.assertAllClose(actual_median, expected_median, atol=1e-2) expected_variance = sigma**2 * (1 + ( (alpha * normal_pdf(alpha) - beta * normal_pdf(beta)) / z) - ( (normal_pdf(alpha) - normal_pdf(beta)) / z)**2) actual_variance = np.var(y) self.assertAllClose(actual_variance, expected_variance, rtol=2*1e-3)
def __call__(self, shape, dtype=dtypes.float32): """Returns a tensor object initialized as specified by the initializer. Args: shape: Shape of the tensor. dtype: Optional dtype of the tensor. Only floating point types are supported. Raises: ValueError: If the dtype is not floating point """ dtype = _assert_float_dtype(dtype) return random_ops.truncated_normal(shape, self.mean, self.stddev, dtype, seed=self.seed)
def _initializer(shape, dtype=dtype, partition_info=None): # total number of basis filters Q = shape[0]*shape[1] if mode == 'FAN_IN': fan_in = shape[-2] C = fan_in # count number of input connections. elif mode == 'FAN_OUT': fan_out = shape[-2] # count number of output connections. C = fan_out n = C*Q # to get stddev = math.sqrt(factor / n) need to adjust for truncated. trunc_stddev = math.sqrt(factor / n) / .87962566103423978 return random_ops.truncated_normal(shape, 0.0, trunc_stddev, dtype, seed=seed)
def _initializer(shape, dtype=dtype, partition_info=None): """Initializer function.""" if not dtype.is_floating: raise TypeError( 'Cannot create initializer for non-floating point type.') # Estimating fan_in and fan_out is not possible to do perfectly, but we try. # This is the right thing for matrix multiply and convolutions. if shape: fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1]) fan_out = float(shape[-1]) else: fan_in = 1.0 fan_out = 1.0 for dim in shape[:-2]: fan_in *= float(dim) fan_out *= float(dim) if mode == 'FAN_IN': # Count only number of input connections. n = fan_in elif mode == 'FAN_OUT': # Count only number of output connections. n = fan_out elif mode == 'FAN_AVG': # Average number of inputs and output connections. n = (fan_in + fan_out) / 2.0 if other: w = scale / np.sqrt(fan_in + fan_out) logging.info('in xavier, use other type, scale is{}'.format(scale)) return w * random_ops.random_normal(shape, seed=seed) if uniform: # To get stddev = math.sqrt(factor / n) need to adjust for uniform. limit = math.sqrt(3.0 * factor / n) return random_ops.random_uniform(shape, -limit, limit, dtype, seed=seed) else: # To get stddev = math.sqrt(factor / n) need to adjust for truncated. trunc_stddev = math.sqrt(1.3 * factor / n) return random_ops.truncated_normal(shape, 0.0, trunc_stddev, dtype, seed=seed)
def __call__(self, shape, dtype=dtypes.float32): """Returns a tensor object initialized as specified by the initializer. Args: shape: Shape of the tensor. dtype: Optional dtype of the tensor. Only floating point types are supported. Raises: ValueError: If the dtype is not floating point """ partition_info = None # Keeps logic so can be readded later if necessary dtype = _assert_float_dtype(dtype) scale = self.scale scale_shape = shape if partition_info is not None: scale_shape = partition_info.full_shape fan_in, fan_out = _compute_fans(scale_shape) if self.mode == "fan_in": scale /= max(1., fan_in) elif self.mode == "fan_out": scale /= max(1., fan_out) else: scale /= max(1., (fan_in + fan_out) / 2.) if self.distribution == "truncated_normal": # constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) stddev = math.sqrt(scale) / .87962566103423978 return random_ops.truncated_normal(shape, 0.0, stddev, dtype, seed=self.seed) elif self.distribution == "untruncated_normal": stddev = math.sqrt(scale) return random_ops.random_normal(shape, 0.0, stddev, dtype, seed=self.seed) else: limit = math.sqrt(3.0 * scale) return random_ops.random_uniform(shape, -limit, limit, dtype, seed=self.seed)
def testStridedSliceGradWithNonConstAxis(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) end = array_ops.placeholder(dtype='int32') shape = array_ops.shape(conv) end_val = [1, 2, 3, 4] s = array_ops.strided_slice( conv, [0, 0, 0, 0], end_val, strides=[1, 2, 3, 1]) s_grad = array_ops.strided_slice_grad(shape, [0, 0, 0, 0], end, [1, 2, 3, 1], s) output = array_ops.identity(s_grad) with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={end: end_val}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run( output, run_metadata=metadata, feed_dict={ end: end_val }) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if node.name.startswith('LayoutOptimizerTranspose'): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes) self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-StridedSliceGrad-0-0', nodes) self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_StridedSliceGrad_2', nodes) self.assertIn('LayoutOptimizer-StridedSlice-StridedSliceGrad/begin', nodes) self.assertIn('LayoutOptimizer-StridedSlice-StridedSliceGrad/strides', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def _xavier(n_inputs, n_outputs, shape, uniform, seed, dtype): if uniform: # 6 was used in the paper. init_range = math.sqrt(6.0 / (n_inputs + n_outputs)) return random_ops.random_uniform(shape, -init_range, init_range, dtype, seed=seed) else: # 3 gives us approximately the same limits as above since this repicks # values greater than 2 standard deviations from the mean. stddev = math.sqrt(3.0 / (n_inputs + n_outputs)) return random_ops.truncated_normal(shape, 0.0, stddev, dtype, seed=seed)
def _initializer(shape, dtype=dtype, partition_info=None): """Initializer function.""" if not dtype.is_floating: raise TypeError( 'Cannot create initializer for non-floating point type.') # Estimating fan_in and fan_out is not possible to do perfectly, but we try. # This is the right thing for matrix multiply and convolutions. if shape: f1 = lambda: tf.cast(shape[-2], dtype) f2 = lambda: tf.cast(shape[-1], dtype) fan_in = tf.case([(tf.greater(len(shape), 1), f1)], default=f2) fan_out = tf.cast(shape[-1], dtype) else: fan_in = 1.0 fan_out = 1.0 for dim in shape[:-2]: fan_in = tf.multiply(dim, fan_in) fan_out = tf.multiply(dim, fan_out) if mode == 'FAN_IN': # Count only number of input connections. n = fan_in elif mode == 'FAN_OUT': # Count only number of output connections. n = fan_out elif mode == 'FAN_AVG': # Average number of inputs and output connections. n = tf.divide(tf.add(fan_in, fan_out), 2.0) if uniform: # To get stddev = math.sqrt(factor / n) need to adjust for uniform. limit = tf.sqrt(tf.divide(tf.multiply(3.0, factor), n)) return random_ops.random_uniform(shape, -limit, limit, dtype, seed=seed) else: # To get stddev = math.sqrt(factor / n) need to adjust for truncated. trunc_stddev = tf.sqrt(tf.divide(tf.multiply(1.3, factor), n)) return random_ops.truncated_normal(shape, 0.0, trunc_stddev, dtype, seed=seed)
def _initializer(shape, dtype=dtype, partition_info=None): """Initializer function.""" if not dtype.is_floating: raise TypeError( 'Cannot create initializer for non-floating point type.') if shape: fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1]) fan_out = float(shape[-1]) else: fan_in = 1.0 fan_out = 1.0 for dim in shape[:-2]: fan_in *= float(dim) fan_out *= float(dim) if mode == 'FAN_IN': # Count only number of input connections. n = fan_in elif mode == 'FAN_OUT': # Count only number of output connections. n = fan_out elif mode == 'FAN_AVG': # Average number of inputs and output connections. n = (fan_in + fan_out) / 2.0 if uniform: # To get stddev = math.sqrt(factor / n) need to adjust for uniform. limit = math.sqrt(3.0 * factor / n) return random_ops.random_uniform(shape, -limit, limit, dtype, seed=seed) else: # To get stddev = math.sqrt(factor / n) need to adjust for truncated. trunc_stddev = math.sqrt(1.3 * factor / n) return random_ops.truncated_normal(shape, 0.0, trunc_stddev, dtype, seed=seed) return _initializer
def model_fn(features, labels, mode, params): """Model function defining an inpainting estimator.""" batch_size = params['batch_size'] z_shape = [batch_size] + params['z_shape'] add_summaries = params['add_summaries'] input_clip = params['input_clip'] z = variable_scope.get_variable( name=INPUT_NAME, initializer=random_ops.truncated_normal(z_shape), constraint=lambda x: clip_ops.clip_by_value( x, -input_clip, input_clip)) generator = functools.partial(generator_fn, mode=mode) discriminator = functools.partial(discriminator_fn, mode=mode) gan_model = tfgan_train.gan_model(generator_fn=generator, discriminator_fn=discriminator, real_data=labels, generator_inputs=z, check_shapes=False) loss = loss_fn(gan_model, features, labels, add_summaries) # Use a variable scope to make sure that estimator variables dont cause # save/load problems when restoring from ckpts. with variable_scope.variable_scope(OPTIMIZER_NAME): opt = optimizer(learning_rate=params['learning_rate'], **params['opt_kwargs']) train_op = opt.minimize( loss=loss, global_step=training_util.get_or_create_global_step(), var_list=[z]) if add_summaries: z_grads = gradients_impl.gradients(loss, z) summary.scalar('z_loss/z_grads', clip_ops.global_norm(z_grads)) summary.scalar('z_loss/loss', loss) return model_fn_lib.EstimatorSpec(mode=mode, predictions=gan_model.generated_data, loss=loss, train_op=train_op)
def _initializer(shape, dtype=_assert_float_dtype(dtype), partition_info=None): scale = scale_ scale_shape = shape if partition_info is not None: scale_shape = partition_info.full_shape fan_in, fan_out = _compute_fans(scale_shape) if mode == "fan_in": scale /= max(1., fan_in) elif mode == "fan_out": scale /= max(1., fan_out) else: scale /= max(1., (fan_in + fan_out) / 2.) if distribution == "normal": stddev = math.sqrt(scale) return random_ops.truncated_normal(shape, 0.0, stddev, dtype, seed=seed) else: limit = math.sqrt(3.0 * scale) return random_ops.random_uniform(shape, -limit, limit, dtype, seed=seed)
def __call__for_keras_init_v1(self, shape, dtype=None, partition_info=None): """ Making keras VarianceScaling initializers v1 support dynamic shape. """ if dtype is None: dtype = self.dtype scale = self.scale scale_shape = shape if partition_info is not None: scale_shape = partition_info.full_shape fan_in, fan_out = _compute_fans_for_keras_init_v1_v2(scale_shape) fan_in = math_ops.cast(fan_in, dtype=dtype) fan_out = math_ops.cast(fan_out, dtype=dtype) if self.mode == "fan_in": scale /= math_ops.maximum(1., fan_in) elif self.mode == "fan_out": scale /= math_ops.maximum(1., fan_out) else: scale /= math_ops.maximum(1., (fan_in + fan_out) / 2.) if self.distribution == "normal" or self.distribution == "truncated_normal": # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) stddev = math_ops.sqrt(scale) / .87962566103423978 return random_ops.truncated_normal(shape, 0.0, stddev, dtype, seed=self.seed) elif self.distribution == "untruncated_normal": stddev = math_ops.sqrt(scale) return random_ops.random_normal(shape, 0.0, stddev, dtype, seed=self.seed) else: limit = math_ops.sqrt(3.0 * scale) return random_ops.random_uniform(shape, -limit, limit, dtype, seed=self.seed)
def testSplitVWithNonConstAxis(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) dim = array_ops.placeholder(dtype='int32') sizes = constant_op.constant([50, 10, 4], shape=[3]) split = gen_array_ops._split_v(value=conv, size_splits=sizes, axis=dim, num_split=3) output = math_ops.reduce_sum(split[0]) with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={dim: 3}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3}) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if node.name.startswith('LayoutOptimizerTranspose'): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes) self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-SplitV-0-0', nodes) self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_SplitV_2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def parameterized_vs_naive(shape, num_iters, use_gpu=False): np.random.seed(1618) # Make it reproducible. # No CSE/CF. optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=optimizer_options)) with tf.Session(config=config) as sess: with tf.device("/cpu:0" if not use_gpu else None): param_op = tf.group( random_ops.parameterized_truncated_normal(shape)) naive_op = tf.group(random_ops.truncated_normal(shape)) # Burn-in to avoid session setup costs in the timing. sess.run(param_op) sess.run(param_op) param_dt = timeit.timeit(lambda: sess.run(param_op), number=num_iters) sess.run(naive_op) sess.run(naive_op) naive_dt = timeit.timeit(lambda: sess.run(naive_op), number=num_iters) return param_dt, naive_dt
def __call__(self, shape, dtype=None, partition_info=None): if dtype is None: dtype = self.dtype scale = self.scale scale_shape = shape if partition_info is not None: scale_shape = partition_info.full_shape fan_in, fan_out = _compute_fans(scale_shape) if self.mode == "fan_in": scale /= max(1., fan_in) elif self.mode == "fan_out": scale /= max(1., fan_out) else: scale /= max(1., (fan_in + fan_out) / 2.) if self.distribution == "normal": stddev = math.sqrt(scale) return random_ops.truncated_normal(shape, 0.0, stddev, dtype, seed=self.seed) else: limit = math.sqrt(3.0 * scale) return random_ops.random_uniform(shape, -limit, limit, dtype, seed=self.seed)
def testGradient(self): with ops.Graph().as_default() as g: inputs = array_ops.placeholder(dtypes.float32, shape=[None, 100], name="input") weights = array_ops.placeholder(dtypes.float32, shape=[100, 10], name="weights") biases = array_ops.placeholder(dtypes.float32, shape=[10], name="biases") activations = nn_ops.relu(math_ops.matmul(inputs, weights) + biases, name="activations") loss = math_ops.reduce_mean(activations, name="loss") gdef = g.as_graph_def() with ops.Graph().as_default() as g: input_placeholder = array_ops.placeholder(dtypes.float32, shape=[32, 100]) weights_var = variables.Variable(random_ops.truncated_normal( [100, 10]), name="weights") biases_var = variables.Variable(array_ops.zeros([10]), name="biases") activations, loss = importer.import_graph_def( gdef, input_map={ "input:0": input_placeholder, "weights:0": weights_var, "biases:0": biases_var }, return_elements=["activations:0", "loss:0"]) self.assertEqual([32, 10], activations.get_shape()) self.assertEqual([], loss.get_shape()) weights_grad, biases_grad = gradients_impl.gradients( loss, [weights_var, biases_var]) self.assertEqual([100, 10], weights_grad.get_shape()) self.assertEqual([10], biases_grad.get_shape())
def testMaxPoolGradV2(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) ksize = constant_op.constant([1, 2, 3, 1], shape=[4]) strides = array_ops.placeholder(dtype='int32', shape=[4]) max_pool_grad = gen_nn_ops.max_pool_grad_v2(conv, conv, conv, ksize, strides, 'VALID') output = array_ops.identity(max_pool_grad) strides_val = [1, 3, 2, 1] with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={strides: strides_val}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run( output, run_metadata=metadata, feed_dict={ strides: strides_val }) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if node.name.startswith('LayoutOptimizerTranspose'): num_transposes += 1 nodes.append(node.name) expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes) self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-MaxPoolGradV2-0-0', nodes) self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_MaxPoolGradV2_4', nodes) self.assertIn('LayoutOptimizer-MaxPoolGradV2-Const_2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testStridedSliceWithNonConstAxis(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) end = array_ops.placeholder(dtype='int32') s = array_ops.strided_slice(conv, [0, 0, 0, 0], end, strides=[1, 2, 3, 1]) output = array_ops.identity(s) end_val = [1, 2, 3, 4] with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={end: end_val}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run( output, run_metadata=metadata, feed_dict={ end: end_val }) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_trans_nchw_to_nhwc('StridedSlice-0-0', nodes) self._assert_vec_nhwc_to_nchw('StridedSlice-2', nodes) self.assertIn('StridedSlice-1-LayoutOptimizer', nodes) self.assertIn('StridedSlice-3-LayoutOptimizer', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def sequence_softmax(inputs, noutputs, scope=None, name=None, linear_name=None): """Run a softmax layer over all time_steps of an input sequence Args: inputs: (seq_length, batch_size, depth) tensor noutputs: output_depth scope: optional scope name name: optional name for output tensor linear_name: optional name for linear (pre-softmax) output Returns: A tensor of size (seq_length, batch_size, noutputs) """ seq_length, _, ninputs = _shape(inputs) inputs_u = array_ops.unstack(inputs) outputs_u = [] with variable_scope.variable_scope(scope, "Sequential_Softmax", [inputs]): initial_w = random_ops.truncated_normal([0 + ninputs, noutputs], stddev=0.1) initial_b = constant_op.constant(0.1, shape=[noutputs]) w = variables.model_variable("weights", initializer=initial_w) b = variables.model_variable("biases", initializer=initial_b) for i in xrange(seq_length): with variable_scope.variable_scope(scope, "Sequence_Softmax_Step", [inputs_u[i]]): linear = nn_ops.xw_plus_b_v1(inputs_u[i], w, b, name=linear_name) output = nn_ops.softmax(linear) outputs_u += [output] outputs = array_ops.stack(outputs_u, name=name) return outputs
def __call__(self, shape, dtype=None, partition_info=None): if dtype is None: dtype = self.dtype scale = self.scale scale_shape = shape if partition_info is not None: scale_shape = partition_info.full_shape fan_in, fan_out = _compute_fans(scale_shape) if self.mode == "fan_in": scale /= max(1., fan_in) elif self.mode == "fan_out": scale /= max(1., fan_out) else: scale /= max(1., (fan_in + fan_out) / 2.) if self.distribution == "normal" or self.distribution == "truncated_normal": # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) stddev = math.sqrt(scale) / .87962566103423978 return random_ops.truncated_normal(shape, 0.0, stddev, dtype, seed=self.seed) elif self.distribution == "untruncated_normal": stddev = math.sqrt(scale) return random_ops.random_normal(shape, 0.0, stddev, dtype, seed=self.seed) else: limit = math.sqrt(3.0 * scale) return random_ops.random_uniform(shape, -limit, limit, dtype, seed=self.seed)
def testStridedSliceWithMask(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) # This will generate a StridedSlice op with begin mask and end mask. s = conv[:, :, 1:-1, :] output = array_ops.identity(s) with session.Session() as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run(output, run_metadata=metadata) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if node.name.startswith('LayoutOptimizerTranspose'): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes) self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-strided_slice-0-0', nodes) self.assertIn('LayoutOptimizer-strided_slice-strided_slice/stack', nodes) self.assertIn('LayoutOptimizer-strided_slice-strided_slice/stack_1', nodes) self.assertIn('LayoutOptimizer-strided_slice-strided_slice/stack_2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def _initializer(shape, dtype=dtype, partition_info=None): del partition_info if shape: fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1]) fan_out = float(shape[-1]) else: fan_in = 1. fan_out = 1. for dim in shape[:-2]: fan_in *= float(dim) fan_out *= float(dim) if mode == 'FAN_IN': n = fan_in elif mode == 'FAN_OUT': n = fan_out else: # mode == 'FAN_AVG': n = (fan_in + fan_out) / 2. if uniform: limit = math.sqrt(3.0 * factor / n) _init = random_ops.random_uniform(shape, -limit, limit, dtype, seed=seed) else: trunc_stddev = math.sqrt(1.3 * factor / n) _init = random_ops.truncated_normal(shape, 0., trunc_stddev, dtype, seed=seed) return _init * scale_factor
def __call__(self, shape, dtype=None, partition_info=None): if dtype is None: dtype = self.dtype return random_ops.truncated_normal( shape, self.mean, self.stddev, dtype, seed=self.seed)
def testLargeShape(self): with self.test_session(use_gpu=True): v = variables.Variable( array_ops.zeros(dtype=dtypes.float32, shape=[2**33, 1])) n = random_ops.truncated_normal(v.shape) self.assertEqual([8589934592, 1], n.shape.as_list())
def _testScopedExport(self, test_dir, exported_filenames): graph = ops.Graph() with graph.as_default(): # Creates an inference graph. # Hidden 1 colocate_constraint = constant_op.constant(1.2, name="constraint") images = constant_op.constant(1.2, dtypes.float32, shape=[100, 28], name="images") with ops.name_scope("hidden1"): with graph.colocate_with(colocate_constraint.op): weights1 = variables.Variable(random_ops.truncated_normal( [28, 128], stddev=1.0 / math.sqrt(float(28))), name="weights") # The use of control_flow_ops.cond here is purely for adding test # coverage the save and restore of control flow context (which doesn't # make any sense here from a machine learning perspective). The typical # biases is a simple Variable without the conditions. biases1 = variables.Variable(control_flow_ops.cond( math_ops.less(random.random(), 0.5), lambda: array_ops.ones([128]), lambda: array_ops.zeros([128])), name="biases") hidden1 = nn_ops.relu( math_ops.matmul(images, weights1) + biases1) # Hidden 2 with ops.name_scope("hidden2"): weights2 = variables.Variable(random_ops.truncated_normal( [128, 32], stddev=1.0 / math.sqrt(float(128))), name="weights") # The use of control_flow_ops.while_loop here is purely for adding test # coverage the save and restore of control flow context (which doesn't # make any sense here from a machine learning perspective). The typical # biases is a simple Variable without the conditions. def loop_cond(it, _): return it < 2 def loop_body(it, biases2): biases2 += constant_op.constant(0.1, shape=[32]) return it + 1, biases2 _, biases2 = control_flow_ops.while_loop( loop_cond, loop_body, [ constant_op.constant(0), variables.Variable(array_ops.zeros([32]), name="biases") ]) hidden2 = nn_ops.relu( math_ops.matmul(hidden1, weights2) + biases2) # Linear with ops.name_scope("softmax_linear"): weights3 = variables.Variable(random_ops.truncated_normal( [32, 10], stddev=1.0 / math.sqrt(float(32))), name="weights") biases3 = variables.Variable(array_ops.zeros([10]), name="biases") logits = math_ops.matmul(hidden2, weights3) + biases3 ops.add_to_collection("logits", logits) # Exports each sub-graph. # Exports the first one with unbound_inputs_col_name set to default. orig_meta_graph1, var_list = meta_graph.export_scoped_meta_graph( filename=os.path.join(test_dir, exported_filenames[0]), graph=ops.get_default_graph(), export_scope="hidden1") self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) var_names = [v.name for _, v in var_list.items()] self.assertEqual(["hidden1/biases:0", "hidden1/weights:0"], sorted(var_names)) # Exports the rest with no unbound_inputs_col_name. orig_meta_graph2, _ = meta_graph.export_scoped_meta_graph( filename=os.path.join(test_dir, exported_filenames[1]), graph=ops.get_default_graph(), export_scope="hidden2", unbound_inputs_col_name=None) orig_meta_graph3, _ = meta_graph.export_scoped_meta_graph( filename=os.path.join(test_dir, exported_filenames[2]), graph=ops.get_default_graph(), export_scope="softmax_linear", unbound_inputs_col_name=None) return [orig_meta_graph1, orig_meta_graph2, orig_meta_graph3]
def __call__(self, shape, dtype=None, partition_info=None): if dtype is None: dtype = self.dtype v = random_ops.truncated_normal(shape, 0, 1.0, dtype, seed=self.seed) return unit(v, self.eps)
def variance_scaling_initializer(shape, factor=2.0, mode='FAN_IN', uniform=False, seed=None, dtype=dtypes.float32, mask=None): """Returns an initializer that generates tensors without scaling variance. When initializing a deep network, it is in principle advantageous to keep the scale of the input variance constant, so it does not explode / diminish by reaching the final layer. This initializer use the following formula: ```python if mode='FAN_IN': # Count only number of input connections. n = fan_in elif mode='FAN_OUT': # Count only number of output connections. n = fan_out elif mode='FAN_AVG': # Average number of inputs and output connections. n = (fan_in + fan_out)/2.0 truncated_normal(shape, 0.0, stddev=sqrt(factor / n)) ``` * To get [Delving Deep into Rectifiers]( http://arxiv.org/pdf/1502.01852v1.pdf), use (Default):<br/> `factor=2.0 mode='FAN_IN' uniform=False` * To get [Convolutional Architecture for Fast Feature Embedding]( http://arxiv.org/abs/1408.5093), use:<br/> `factor=1.0 mode='FAN_IN' uniform=True` * To get [Understanding the difficulty of training deep feedforward neural networks](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf), use:<br/> `factor=1.0 mode='FAN_AVG' uniform=True.` * To get `xavier_initializer` use either:<br/> `factor=1.0 mode='FAN_AVG' uniform=True`, or<br/> `factor=1.0 mode='FAN_AVG' uniform=False`. Args: factor: Float. A multiplicative factor. mode: String. 'FAN_IN', 'FAN_OUT', 'FAN_AVG'. uniform: Whether to use uniform or normal distributed random. seed: A Python integer. Used to create random seeds. See @{tf.set_random_seed} for behavior. dtype: The data type. Only floating point types are supported. Returns: An initializer that generates tensors with unit variance. Raises: ValueError: if `dtype` is not a floating point type. TypeError: if `mode` is not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG']. """ if not dtype.is_floating: raise TypeError( 'Cannot create initializer for non-floating point type.') if mode not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG']: raise TypeError('Unknow mode %s [FAN_IN, FAN_OUT, FAN_AVG]', mode) if not dtype.is_floating: raise TypeError( 'Cannot create initializer for non-floating point type.') # Estimating fan_in and fan_out is not perfect, but we try. # This is the right thing for matrix multiply and convolutions. if shape: fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1]) fan_out = float(shape[-1]) else: fan_in = 1.0 fan_out = 1.0 for dim in shape[:-2]: fan_in *= float(dim) fan_out *= float(dim) if mode == 'FAN_IN': # Count only number of input connections. n = fan_in elif mode == 'FAN_OUT': # Count only number of output connections. n = fan_out elif mode == 'FAN_AVG': # Average number of inputs and output connections. n = (fan_in + fan_out) / 2.0 if uniform: # To get stddev = math.sqrt(factor / n) need to adjust for uniform. limit = math.sqrt(3.0 * factor / n) init = random_ops.random_uniform(shape, -limit, limit, dtype, seed=seed) else: # To get stddev = math.sqrt(factor / n) adjust for truncated. trunc_stddev = math.sqrt(1.3 * factor / n) init = random_ops.truncated_normal(shape, 0.0, trunc_stddev, dtype, seed=seed) if mask is not None: return mask * init else: return init