def call(self, inputs, **kwargs): if type(inputs) is list: features = inputs[0] mask = inputs[1] else: # if no maks is provided, get it from the features features = inputs mask = tf.expand_dims(tf.reduce_sum(features, axis=-1), axis=-1) mask = tf.where(tf.equal(mask, 0), tf.zeros_like(mask), tf.ones_like(mask)) features = tf.multiply(features, mask) features = nn_ops.convolution(features, self.kernel, self.padding.upper(), self.strides, self.dilation_rate) kernel = tf.ones([*self.kernel_size, 1, 1]) norm = nn_ops.convolution(mask, kernel, self.padding.upper(), self.strides, self.dilation_rate) if self.binary: mask = nn_ops.pool(mask, self.kernel_size, 'MAX', self.padding.upper(), self.dilation_rate, self.strides) else: mask = norm / np.prod(self.kernel_size) norm = tf.where(tf.equal(norm,0), tf.zeros_like(norm), tf.reciprocal(norm)) features = tf.multiply(features, norm) if self.use_bias: features = tf.add(features, self.bias) return [features, mask]
def testAtrousSequence(self): """Tests optimization of sequence of atrous convolutions. See the documentation of with_space_to_batch. """ with self._delay_checks() as add_check: for padding in ["SAME", "VALID"]: for height in range(15, 17): for width in range(15, 17): x_shape = [3, height, width, 2] x = np.random.random_sample(x_shape).astype(np.float32) kernel_sizes = [1, 3] if padding == "SAME" else range( 1, 3) for kernel in kernel_sizes: f_shape = [kernel, kernel, 2, 2] f1 = 1e-2 * np.random.random_sample( f_shape).astype(np.float32) f2 = 1e-2 * np.random.random_sample( f_shape).astype(np.float32) def combined_op(converted_input, num_spatial_dims, padding_arg): # pylint: disable=unused-argument # pylint: disable=cell-var-from-loop result = nn_ops.convolution( input=converted_input, filter=f1, padding=padding) result = nn_ops.convolution(input=result, filter=f2, padding=padding) # pylint: enable=cell-var-from-loop return result for rate_height in range(2, 4): for rate_width in range(2, 4): dilation_rate = [rate_height, rate_width] y1 = nn_ops.convolution( input=x, filter=f1, padding=padding, dilation_rate=dilation_rate) y1 = nn_ops.convolution( input=y1, filter=f2, padding=padding, dilation_rate=dilation_rate) y2 = nn_ops.with_space_to_batch( input=x, dilation_rate=dilation_rate, op=combined_op, padding="VALID") def check(y1_eval, y2_eval): self.assertAllClose(y1_eval, y2_eval, rtol=1e-2, atol=1e-2) add_check(check, y1, y2)
def combined_op(converted_input, num_spatial_dims, padding_arg): # pylint: disable=unused-argument result = nn_ops.convolution( input=converted_input, filter=f1, padding=padding) # pylint: disable=cell-var-from-loop result = nn_ops.convolution( input=result, filter=f2, padding=padding) # pylint: disable=cell-var-from-loop return result
def _test_atrous_convolution(self, input_shape, filter_shape, dilation_rate, **kwargs): filters = np.arange( np.prod(filter_shape), dtype=np.float32).reshape(filter_shape) filters_upsampled = upsample_filters(filters, dilation_rate) x = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) y1 = nn_ops.convolution( input=x, filter=filters, dilation_rate=dilation_rate, **kwargs) y2 = nn_ops.convolution(input=x, filter=filters_upsampled, **kwargs) self.assertAllClose(y1.eval(), y2.eval(), rtol=1e-2, atol=1e-2)
def testAtrousSequence(self): """Tests optimization of sequence of atrous convolutions. See the documentation of with_space_to_batch. """ with self._delay_checks() as add_check: for padding in ["SAME", "VALID"]: for height in range(15, 17): for width in range(15, 17): x_shape = [3, height, width, 2] x = np.random.random_sample(x_shape).astype(np.float32) kernel_sizes = [1, 3] if padding == "SAME" else range(1, 3) for kernel in kernel_sizes: f_shape = [kernel, kernel, 2, 2] f1 = 1e-2 * np.random.random_sample(f_shape).astype(np.float32) f2 = 1e-2 * np.random.random_sample(f_shape).astype(np.float32) def combined_op(converted_input, num_spatial_dims, padding_arg): # pylint: disable=unused-argument # pylint: disable=cell-var-from-loop result = nn_ops.convolution( input=converted_input, filter=f1, padding=padding) result = nn_ops.convolution( input=result, filter=f2, padding=padding) # pylint: enable=cell-var-from-loop return result for rate_height in range(2, 4): for rate_width in range(2, 4): dilation_rate = [rate_height, rate_width] y1 = nn_ops.convolution( input=x, filter=f1, padding=padding, dilation_rate=dilation_rate) y1 = nn_ops.convolution( input=y1, filter=f2, padding=padding, dilation_rate=dilation_rate) y2 = nn_ops.with_space_to_batch( input=x, dilation_rate=dilation_rate, op=combined_op, padding="VALID") def check(y1_eval, y2_eval): self.assertAllClose(y1_eval, y2_eval, rtol=1e-2, atol=1e-2) add_check(check, y1, y2)
def testDataLayout(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa1 = array_ops.placeholder(np.float32, [1, 14, 14, 64], name="a") pb1 = array_ops.placeholder(np.float32, [3, 3, 64, 128], name="b") bi1 = array_ops.placeholder(np.float32, [128], name="b") op1 = nn_ops.convolution(pa1, pb1, padding="SAME", data_format='NHWC') op1 = nn_ops.bias_add(op1, bi1, data_format='NHWC') pa2 = array_ops.placeholder(np.float32, [1, 64, 14, 14], name="a") pb2 = array_ops.placeholder(np.float32, [3, 3, 64, 128], name="b") bi2 = array_ops.placeholder(np.float32, [128], name="b") op2 = nn_ops.convolution(pa2, pb2, padding="SAME", data_format='NCHW') op2 = nn_ops.bias_add(op2, bi2, data_format='NCHW') report = tu.ReportJSON(self, sess) report.reset() fd = { pa1: np.zeros([1, 14, 14, 64]), pb1: np.zeros([3, 3, 64, 128]), bi1: np.zeros([128]), pa2: np.zeros([1, 64, 14, 14]), pb2: np.zeros([3, 3, 64, 128]), bi2: np.zeros([128]), } result = sess.run(op1, fd) self.assertAllClose(result, np.zeros([1, 14, 14, 128])) report.parse_log() mem_nhwc = report.get_total_tile_memory() result = sess.run(op2, fd) self.assertAllClose(result, np.zeros([1, 128, 14, 14])) report.parse_log() mem_nchw = report.get_total_tile_memory() self.assertTrue((mem_nhwc - mem_nchw) / mem_nhwc > -0.1)
def test_loop(): size = int(2e8) while True: with self.test_session(): # Force the compiled code to not be constant by feeding in a # parameter. p = array_ops.placeholder(dtypes.float32, shape=[2, 1, 1]) with self.test_scope(): # Create a computation that produces a large R1 tensor as an # intermediate result. Reduce it down so that if this file was # compiled without --config=cuda, we don't force a D2H copy of a # large tensor and potentially OOM the host. # # This is a bit tricky because XLA:GPU doesn't currently support RNG # ops. Here we rely on the fact that XLA doesn't do algebraic # simplifications on conv(<ones>, <filter>). c = math_ops.reduce_sum( nn_ops.convolution( array_ops.ones([1, size, 1]), p, padding='SAME', data_format='NWC')) c.eval(feed_dict={p: [[[1.0]], [[2.0]]]}) size *= 2
def _RunAndVerifyBackprop(self, input_sizes, filter_sizes, output_sizes, strides, dilations, padding, data_format, use_gpu, err, mode): total_input_size = 1 total_filter_size = 1 for s in input_sizes: total_input_size *= s for s in filter_sizes: total_filter_size *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_input_size + 1)] x2 = [f * 1.0 for f in range(1, total_filter_size + 1)] default_dilations = ( dilations[0] == 1 and dilations[1] == 1 and dilations[2] == 1) # If any dilation rate is larger than 1, only do test on the GPU # because we currently do not have a CPU implementation for arbitrary # dilation rates. if default_dilations or use_gpu: with self.cached_session(use_gpu=use_gpu) as sess: if data_format == "NCDHW": input_sizes = test_util.NHWCToNCHW(input_sizes) t1 = constant_op.constant(x1, shape=input_sizes) t2 = constant_op.constant(x2, shape=filter_sizes) full_strides = [1] + strides + [1] full_dilations = [1] + dilations + [1] if data_format == "NCDHW": full_strides = test_util.NHWCToNCHW(full_strides) full_dilations = test_util.NHWCToNCHW(full_dilations) actual = nn_ops.conv3d( t1, t2, strides=full_strides, dilations=full_dilations, padding=padding, data_format=data_format) expected = nn_ops.convolution( t1, t2, padding=padding, strides=strides, dilation_rate=dilations, data_format=data_format) if data_format == "NCDHW": actual = test_util.NCHWToNHWC(actual) expected = test_util.NCHWToNHWC(expected) actual_grad = gradients_impl.gradients(actual, t1 if mode == "input" else t2)[0] expected_grad = gradients_impl.gradients(expected, t1 if mode == "input" else t2)[0] # "values" consists of two tensors for two backprops actual_value = self.evaluate(actual_grad) expected_value = self.evaluate(expected_grad) self.assertShapeEqual(actual_value, actual_grad) self.assertShapeEqual(expected_value, expected_grad) print("expected = ", expected_value) print("actual = ", actual_value) self.assertArrayNear(expected_value.flatten(), actual_value.flatten(), err)
def test3DConv3x3x3_WithBias(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [1, 14, 14, 14, 16], name="a") pb = array_ops.placeholder(np.float32, [3, 3, 3, 16, 32], name="b") bi = array_ops.placeholder(np.float32, [32], name="b") output = nn_ops.convolution(pa, pb, padding="SAME") output = nn_ops.bias_add(output, bi) report = tu.ReportJSON(self, sess) report.reset() fd = { pa: np.zeros([1, 14, 14, 14, 16]), pb: np.zeros([3, 3, 3, 16, 32]), bi: np.zeros([32]), } result = sess.run(output, fd) self.assertAllClose(result, np.zeros([1, 14, 14, 14, 32])) report.parse_log() ok = [ '__seed*', 'host-exchange-local-copy-', 'Copy_', 'convolution/convolution.*/Conv_3x3x3', 'BiasAdd/fusion/Op/Add' ] report.assert_all_compute_sets_and_list(ok)
def call(self, inputs, **kwargs): if type(inputs) is list: features = inputs[0] mask = inputs[1] # if mask has only one channel, repeat if self.mask_shape[-1] == 1: mask = tf.repeat(mask, tf.shape(features)[-1], axis=-1) else: # if no mask is provided, get it from the features features = inputs mask = tf.where(tf.equal(features, 0), 0.0, 1.0) if self.weightnorm: norm = tf.sqrt( tf.reduce_sum(tf.square(self.kernel), (0, 1, 2)) + self.eps) kernel = self.kernel / norm * self.wn_g else: kernel = self.kernel mask_kernel = self.mask_kernel features = tf.multiply(features, mask) features = nn_ops.convolution(features, kernel, self.padding.upper(), self.strides, self.dilation_rate) norm = nn_ops.convolution(mask, mask_kernel, self.padding.upper(), self.strides, self.dilation_rate) mask_fan_in = tf.cast(self.mask_fan_in, 'float32') if self.binary: mask = tf.where(tf.greater(norm, 0), 1.0, 0.0) else: mask = norm / mask_fan_in ratio = tf.where(tf.equal(norm, 0), 0.0, mask_fan_in / norm) features = tf.multiply(features, ratio) if self.use_bias: features = tf.add(features, self.bias) if self.activation is not None: features = self.activation(features) return [features, mask]
def testDataLayout(self): with ops.device("/device:IPU:0"): pa1 = array_ops.placeholder(np.float32, [1, 14, 14, 64], name="a") pb1 = array_ops.placeholder(np.float32, [3, 3, 64, 128], name="b") bi1 = array_ops.placeholder(np.float32, [128], name="b") op1 = nn_ops.convolution(pa1, pb1, padding="SAME", data_format='NHWC') op1 = nn_ops.bias_add(op1, bi1, data_format='NHWC') pa2 = array_ops.placeholder(np.float32, [1, 64, 14, 14], name="a") pb2 = array_ops.placeholder(np.float32, [3, 3, 64, 128], name="b") bi2 = array_ops.placeholder(np.float32, [128], name="b") op2 = nn_ops.convolution(pa2, pb2, padding="SAME", data_format='NCHW') op2 = nn_ops.bias_add(op2, bi2, data_format='NCHW') with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = { pa1: np.zeros([1, 14, 14, 64]), pb1: np.zeros([3, 3, 64, 128]), bi1: np.zeros([128]), pa2: np.zeros([1, 64, 14, 14]), pb2: np.zeros([3, 3, 64, 128]), bi2: np.zeros([128]), } result = sess.run(op1, fd) self.assertAllClose(result, np.zeros([1, 14, 14, 128])) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) mem_nhwc = tu.get_total_memory_from_report(s) result = sess.run(op2, fd) self.assertAllClose(result, np.zeros([1, 128, 14, 14])) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) mem_nchw = tu.get_total_memory_from_report(s) self.assertTrue((mem_nhwc - mem_nchw) / mem_nhwc > -0.1)
def testConvolutionWith2SpatialDimensionsAndExpandedBatch(self): tensor_in_sizes_batch = [10, 2, 3, 1, 3] tensor_in_sizes_expanded_batch = [2, 5, 2, 3, 1, 3] filter_in_sizes = [1, 1, 1, 3, 3] filter_in = self._CreateNumpyTensor(filter_in_sizes) x1 = self._CreateNumpyTensor(tensor_in_sizes_batch) x2 = x1.reshape(tensor_in_sizes_expanded_batch) conv1 = nn_ops.convolution(x1, filter_in, strides=[1, 1, 1], padding="VALID") conv2 = nn_ops.convolution(x2, filter_in, strides=[1, 1, 1], padding="VALID") self.assertEqual(conv1.shape, tensor_in_sizes_batch) self.assertEqual(conv2.shape, tensor_in_sizes_expanded_batch) self.assertAllEqual(conv1, self.evaluate(conv2).reshape(conv1.shape))
def test_unknown_spatial_dims_for_channel_first_format(self): x = array_ops.placeholder(dtypes.float32, [1, 10, None, None]) w = array_ops.zeros([3, 3, 10, 20]) y = nn_ops.convolution(x, w, "VALID", dilation_rate=[2, 2], data_format="NCHW") self.assertEqual(y.shape.as_list(), [1, 20, None, None])
def _test_gradient(self, x_shape, f_shape, dilation_rate, padding): x_val = np.random.random_sample(x_shape).astype(np.float32) f_val = np.random.random_sample(f_shape).astype(np.float32) x = constant_op.constant(x_val, name="x", dtype=dtypes.float32) f = constant_op.constant(f_val, name="f", dtype=dtypes.float32) output = nn_ops.convolution( input=x, filter=f, dilation_rate=dilation_rate, padding=padding) y_shape = output.get_shape().as_list() err = gradient_checker.compute_gradient_error([x, f], [x_shape, f_shape], output, y_shape) err_tolerance = 1e-3 self.assertLess(err, err_tolerance)
def test3DConv1x1x1_Stride2x1x1_In1x1x5(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [1, 1, 1, 5, 1], name="a") pb = array_ops.placeholder(np.float32, [1, 1, 1, 1, 1], name="b") output = nn_ops.convolution(pa, pb, strides=[1, 1, 2], padding="VALID") with session_lib.Session() as sess: fd = {pa: [[[[[1], [2], [3], [4], [5]]]]], pb: [[[[[10]]]]]} result = sess.run(output, fd) self.assertAllClose(result, [[[[[10], [30], [50]]]]])
def call(self, inputs, **kwargs): if type(inputs) is list: features = inputs[0] mask = inputs[1] else: # if no mask is provided, get it from the features features = inputs mask = tf.where( tf.equal(tf.reduce_sum(features, axis=-1, keepdims=True), 0), 0.0, 1.0) features = tf.multiply(features, mask) features = nn_ops.convolution(features, self.kernel, self.padding.upper(), self.strides, self.dilation_rate) norm = nn_ops.convolution(mask, self.mask_kernel, self.padding.upper(), self.strides, self.dilation_rate) mask_fan_in = tf.cast(self.mask_fan_in, 'float32') if self.binary: mask = tf.where(tf.greater(norm, 0), 1.0, 0.0) else: mask = norm / mask_fan_in #ratio = tf.where(tf.equal(norm,0), 0.0, 1/norm) # Note: The authors use this in the paper, but it would require special initialization... ratio = tf.where(tf.equal(norm, 0), 0.0, mask_fan_in / norm) features = tf.multiply(features, ratio) if self.use_bias: features = tf.add(features, self.bias) if self.activation is not None: features = self.activation(features) return [features, mask]
def test3DConv3x3x3_Pad1x1x1(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [1, 14, 14, 14, 64], name="a") pb = array_ops.placeholder(np.float32, [3, 3, 3, 64, 128], name="b") output = nn_ops.convolution(pa, pb, padding="SAME") fd = { pa: np.zeros([1, 14, 14, 14, 64]), pb: np.zeros([3, 3, 3, 64, 128]) } result = sess.run(output, fd) self.assertAllClose(result, np.zeros([1, 14, 14, 14, 128]))
def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes, stride, dilation, padding, data_format, use_gpu): total_size_tensor = 1 total_size_filter = 1 for s in tensor_in_sizes: total_size_tensor *= s for s in filter_in_sizes: total_size_filter *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_size_tensor + 1)] x2 = [f * 1.0 for f in range(1, total_size_filter + 1)] with self.cached_session(use_gpu=use_gpu): t1 = constant_op.constant(x1, shape=tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) if isinstance(stride, collections.Iterable): strides = list(stride) else: strides = [stride, stride, stride] if data_format == "NCDHW": t1 = test_util.NHWCToNCHW(t1) full_strides = [1, 1] + strides full_dilation = [1, 1] + dilation else: full_strides = [1] + strides + [1] full_dilation = [1] + dilation + [1] expected = nn_ops.convolution( t1, t2, padding=padding, strides=strides, dilation_rate=dilation, data_format=data_format) computed = nn_ops.conv3d( t1, t2, strides=full_strides, dilations=full_dilation, padding=padding, data_format=data_format) if data_format == "NCDHW": expected = test_util.NCHWToNHWC(expected) computed = test_util.NCHWToNHWC(computed) return expected, computed
def testConv3x3_Pad1x1(self): for fmt in self.data_formats: with ops.device("/device:IPU:0"): pa = array_ops.placeholder( np.float32, self._ip_shp([1, 14, 14, 64], fmt), name="a") pb = array_ops.placeholder(np.float32, [3, 3, 64, 128], name="b") output = nn_ops.convolution( pa, pb, padding="SAME", data_format=fmt, name='cnv2') with session_lib.Session() as sess: fd = { pa: np.zeros(self._ip_shp([1, 14, 14, 64], fmt)), pb: np.zeros([3, 3, 64, 128]) } result = sess.run(output, fd) self.assertAllClose(result, np.zeros(self._ip_shp([1, 14, 14, 128], fmt)))
def testExtractConvolutionPatches(self): with ops.Graph().as_default(), self.test_session() as sess: batch_size = 10 image_spatial_shape = [9, 10, 11] in_channels = out_channels = 32 kernel_spatial_shape = [5, 3, 3] spatial_strides = [1, 2, 1] spatial_dilation = [1, 1, 1] padding = 'SAME' images = random_ops.random_uniform( [batch_size] + image_spatial_shape + [in_channels], seed=0) kernel_shape = kernel_spatial_shape + [in_channels, out_channels] kernel = random_ops.random_uniform(kernel_shape, seed=1) # Ensure shape matches expectation. patches = utils.extract_convolution_patches( images, kernel_shape, padding, strides=spatial_strides, dilation_rate=spatial_dilation) result_spatial_shape = ( patches.shape.as_list()[1:1 + len(image_spatial_shape)]) self.assertEqual(patches.shape.as_list(), [batch_size] + result_spatial_shape + kernel_spatial_shape + [in_channels]) # Ensure extract...patches() + matmul() and convolution() implementation # give the same answer. outputs = nn_ops.convolution( images, kernel, padding, strides=spatial_strides, dilation_rate=spatial_dilation) patches_flat = array_ops.reshape( patches, [-1, np.prod(kernel_spatial_shape) * in_channels]) kernel_flat = array_ops.reshape(kernel, [-1, out_channels]) outputs_flat = math_ops.matmul(patches_flat, kernel_flat) outputs_, outputs_flat_ = sess.run([outputs, outputs_flat]) self.assertAllClose(outputs_.flatten(), outputs_flat_.flatten())
def testExtractConvolutionPatches(self): with ops.Graph().as_default(), self.cached_session() as sess: batch_size = 10 image_spatial_shape = [9, 10, 11] in_channels = out_channels = 32 kernel_spatial_shape = [5, 3, 3] spatial_strides = [1, 2, 1] spatial_dilation = [1, 1, 1] padding = 'SAME' images = random_ops.random_uniform( [batch_size] + image_spatial_shape + [in_channels], seed=0) kernel_shape = kernel_spatial_shape + [in_channels, out_channels] kernel = random_ops.random_uniform(kernel_shape, seed=1) # Ensure shape matches expectation. patches = utils.extract_convolution_patches( images, kernel_shape, padding, strides=spatial_strides, dilation_rate=spatial_dilation) result_spatial_shape = ( patches.shape.as_list()[1:1 + len(image_spatial_shape)]) self.assertEqual(patches.shape.as_list(), [batch_size] + result_spatial_shape + kernel_spatial_shape + [in_channels]) # Ensure extract...patches() + matmul() and convolution() implementation # give the same answer. outputs = nn_ops.convolution(images, kernel, padding, strides=spatial_strides, dilation_rate=spatial_dilation) patches_flat = array_ops.reshape( patches, [-1, np.prod(kernel_spatial_shape) * in_channels]) kernel_flat = array_ops.reshape(kernel, [-1, out_channels]) outputs_flat = math_ops.matmul(patches_flat, kernel_flat) outputs_, outputs_flat_ = sess.run([outputs, outputs_flat]) self.assertAllClose(outputs_.flatten(), outputs_flat_.flatten())
def testConv3x3_WithBias(self): for fmt in self.data_formats: with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, self._ip_shp([1, 14, 14, 64], fmt), name="a") pb = array_ops.placeholder(np.float32, [3, 3, 64, 128], name="b") bi = array_ops.placeholder(np.float32, [128], name="b") output = nn_ops.convolution(pa, pb, padding="SAME", data_format=fmt, name='cnv3') output = nn_ops.bias_add(output, bi, data_format=fmt, name='ba3') report = tu.ReportJSON(self, sess) report.reset() fd = { pa: np.zeros(self._ip_shp([1, 14, 14, 64], fmt)), pb: np.zeros([3, 3, 64, 128]), bi: np.zeros([128]), } result = sess.run(output, fd) self.assertAllClose( result, np.zeros(self._ip_shp([1, 14, 14, 128], fmt))) report.parse_log() ok = [ '__seed*', 'Copy_*actsRearranged', 'host-exchange-local-copy-', 'cnv3*/convolution.*/Conv_3x3', 'ba3*/fusion/Op/Add' ] report.assert_all_compute_sets_and_list(ok)
def testConv1x1_Stride2x1_In1x5(self): for fmt in self.data_formats: with ops.device("/device:IPU:0"): pa = array_ops.placeholder( np.float32, self._ip_shp([1, 1, 5, 1], fmt), name="a") pb = array_ops.placeholder(np.float32, [1, 1, 1, 1], name="b") output = nn_ops.convolution( pa, pb, strides=[1, 2], padding="VALID", data_format=fmt, name='cnv1') with session_lib.Session() as sess: fd = { pa: np.zeros(self._ip_shp([1, 1, 5, 1], fmt)), pb: np.zeros([1, 1, 1, 1]) } result = sess.run(output, fd) self.assertAllClose(result, np.zeros(self._ip_shp([1, 1, 3, 1], fmt)))
def testConv3x3_WithBias(self): for fmt in self.data_formats: with ops.device("/device:IPU:0"): pa = array_ops.placeholder( np.float32, self._ip_shp([1, 14, 14, 64], fmt), name="a") pb = array_ops.placeholder(np.float32, [3, 3, 64, 128], name="b") bi = array_ops.placeholder(np.float32, [128], name="b") output = nn_ops.convolution( pa, pb, padding="SAME", data_format=fmt, name='cnv3') output = nn_ops.bias_add(output, bi, data_format=fmt, name='ba3') with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = { pa: np.zeros(self._ip_shp([1, 14, 14, 64], fmt)), pb: np.zeros([3, 3, 64, 128]), bi: np.zeros([128]), } result = sess.run(output, fd) self.assertAllClose(result, np.zeros( self._ip_shp([1, 14, 14, 128], fmt))) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'Copy_*actsRearranged', 'host-exchange-local-copy-', 'cnv3*/convolution.*/Conv_3x3', 'ba3*/fusion/addToChannel' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def test3DConv3x3x3_WithBias(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [1, 14, 14, 14, 16], name="a") pb = array_ops.placeholder(np.float32, [3, 3, 3, 16, 32], name="b") bi = array_ops.placeholder(np.float32, [32], name="b") output = nn_ops.convolution(pa, pb, padding="SAME") output = nn_ops.bias_add(output, bi) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = { pa: np.zeros([1, 14, 14, 14, 16]), pb: np.zeros([3, 3, 3, 16, 32]), bi: np.zeros([32]), } result = sess.run(output, fd) self.assertAllClose(result, np.zeros([1, 14, 14, 14, 32])) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'host-exchange-local-copy-', 'Copy_', 'convolution/convolution.*/Conv_3x3x3', 'BiasAdd/fusion/addToChannel' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def test_unknown_spatial_dims_for_channel_first_format(self): x = array_ops.placeholder(dtypes.float32, [1, 10, None, None]) w = array_ops.zeros([3, 3, 10, 20]) y = nn_ops.convolution( x, w, "VALID", dilation_rate=[2, 2], data_format="NCHW") self.assertEqual(y.shape.as_list(), [1, 20, None, None])
def _testQuantize_Conv2dWithBatchNorm(self, activation, activation_op_name, with_bypass, delay, use_ema): """Tests quantization: inputs -> Conv2d with batch norm -> Activation. Args: activation: Callable that returns an Operation, a factory method for the Activation. activation_op_name: String, name of the Activation operation. with_bypass: Bool, when true there is an extra connection added from inputs to just before Activation. delay: Int (optional), delay in number of steps until quantization starts. use_ema: Bool, when true uses EMA quantization for BN folded weights. """ graph = ops.Graph() with graph.as_default(): training.create_global_step(graph) batch_size, height, width, depth = 5, 128, 128, 3 inputs = array_ops.zeros((batch_size, height, width, depth)) stride = 1 if with_bypass else 2 out_depth = 3 if with_bypass else 32 scope = 'test/test2' if with_bypass else 'test' node = conv2d(inputs, out_depth, [5, 5], stride=stride, padding='SAME', weights_initializer=self._WeightInit(0.09), activation_fn=None, normalizer_fn=batch_norm, normalizer_params=_DEFAULT_BATCH_NORM_PARAMS, scope=scope) # Manually fold the batch norm. weights = graph.get_operation_by_name(scope + '/weights/read').outputs[0] bn_mult = (graph.get_operation_by_name(scope + '/BatchNorm/batchnorm/mul') .outputs[0]) mul_fold = math_ops.multiply(weights, bn_mult, name=scope + '/mul_fold') stride = [stride, stride] conv_fold = nn_ops.convolution( input=inputs, filter=mul_fold, padding='SAME', strides=stride, data_format='NHWC', name=scope + '/convolution_Fold') bn_bias = (graph.get_operation_by_name(scope + '/BatchNorm/batchnorm/sub') .outputs[0]) add_fold = math_ops.add(conv_fold, bn_bias, name=scope + '/add_fold') # Manually add a bypass (optionaly) and an activation. if with_bypass: node = math_ops.add(inputs, add_fold, name='test/Add') else: node = add_fold node = activation(node, name='test/' + activation_op_name) update_barrier = control_flow_ops.no_op(name='update_barrier') with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') quantize.Quantize( graph, quant_delay=delay, quantize_folded_weights_use_ema=use_ema) quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) expected_inputs = [ scope + '/weights_quant/' + ('min/read' if use_ema else 'Minimum'), scope + '/weights_quant/' + ('max/read' if use_ema else 'Maximum'), scope + '/mul_fold' ] self._AssertInputOpsAre(weights_quant, expected_inputs) output_op_name = scope + ('/weights_quant/delayed_quant/Switch_1' if (delay and use_ema) else '/convolution_Fold') self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name]) if with_bypass: conv_quant = graph.get_operation_by_name(scope + '/conv_quant/' + quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) expected_inputs = [ scope + '/conv_quant/min/read', scope + '/conv_quant/max/read', scope + '/add_fold' ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' if delay else 'test/Add') self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name]) act_quant = graph.get_operation_by_name('test/act_quant/' + quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) expected_inputs = [ 'test/act_quant/min/read', 'test/act_quant/max/read', 'test/' + activation_op_name ] self._AssertInputOpsAre(act_quant, expected_inputs) output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name])
def _RunAndVerifyBackprop(self, input_sizes, filter_sizes, output_sizes, strides, dilations, padding, data_format, use_gpu, err, mode): total_input_size = 1 total_filter_size = 1 for s in input_sizes: total_input_size *= s for s in filter_sizes: total_filter_size *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_input_size + 1)] x2 = [f * 1.0 for f in range(1, total_filter_size + 1)] default_dilations = (dilations[0] == 1 and dilations[1] == 1 and dilations[2] == 1) # If any dilation rate is larger than 1, only do test on the GPU # because we currently do not have a CPU implementation for arbitrary # dilation rates. # if default_dilations or use_gpu: with self.cached_session(use_gpu=use_gpu) as sess: t1_ph = tf.compat.v1.placeholder(np.float32, shape=input_sizes) t1 = constant_op.constant(x1, shape=input_sizes) t2_ph = tf.compat.v1.placeholder(np.float32, shape=filter_sizes) t2 = constant_op.constant(x2, shape=filter_sizes) full_strides = [1] + strides + [1] full_dilations = [1] + dilations + [1] actual = nn_ops.conv3d(t1_ph, t2_ph, strides=full_strides, dilations=full_dilations, padding=padding, data_format=data_format) expected = nn_ops.convolution(t1, t2, padding=padding, strides=strides, dilation_rate=dilations, data_format=data_format) actual_grad = gradients_impl.gradients( actual, t1_ph if mode == "input" else t2_ph)[0] expected_grad = gradients_impl.gradients( expected, t1 if mode == "input" else t2)[0] # "values" consists of two tensors for two backprops expected_value = self.evaluate(expected_grad) actual_sess_fn = lambda sess: sess.run(actual_grad, feed_dict={ t1_ph: t1.eval(), t2_ph: t2.eval() }) actual_value = self.with_ngraph(actual_sess_fn) self.assertShapeEqual(actual_value, actual_grad) self.assertShapeEqual(expected_value, expected_grad) print("expected = ", expected_value) print("actual = ", actual_value) self.assertArrayNear(expected_value.flatten(), actual_value.flatten(), err)
def F(value): return nn_ops.convolution(value, k, "SAME")
def extract_convolution_patches(inputs, filter_shape, padding, strides=None, dilation_rate=None, name=None, data_format=None): """Extracts inputs to each output coordinate in tf.nn.convolution. This is a generalization of tf.extract_image_patches() to tf.nn.convolution(), where the number of spatial dimensions may be something other than 2. Assumes, - First dimension of inputs is batch_size - Convolution filter is applied to all input channels. Args: inputs: Tensor of shape [batch_size, ..spatial_image_shape.., ..spatial_filter_shape.., in_channels]. Inputs to tf.nn.convolution(). filter_shape: List of ints. Shape of filter passed to tf.nn.convolution(). padding: string. Padding method. One of "VALID", "SAME". strides: None or list of ints. Strides along spatial dimensions. dilation_rate: None or list of ints. Dilation along spatial dimensions. name: None or str. Name of Op. data_format: None or str. Format of data. Returns: Tensor of shape [batch_size, ..spatial_image_shape.., ..spatial_filter_shape.., in_channels] Raises: ValueError: If data_format does not put channel last. ValueError: If inputs and filter disagree on in_channels. """ if not is_data_format_channel_last(data_format): raise ValueError("Channel must be last dimension.") with ops.name_scope( name, "extract_convolution_patches", [inputs, filter_shape, padding, strides, dilation_rate]): batch_size = inputs.shape.as_list()[0] in_channels = inputs.shape.as_list()[-1] # filter_shape = spatial_filter_shape + [in_channels, out_channels] spatial_filter_shape = filter_shape[:-2] if in_channels != filter_shape[-2]: raise ValueError( "inputs and filter_shape must agree on in_channels.") # Map each input feature to a location in the output. out_channels = np.prod(spatial_filter_shape) * in_channels filters = linalg_ops.eye(out_channels) filters = array_ops.reshape( filters, list(spatial_filter_shape) + [in_channels, out_channels]) result = nn_ops.convolution(inputs, filters, padding=padding, strides=strides, dilation_rate=dilation_rate) spatial_output_shape = result.shape.as_list()[1:-1] result = array_ops.reshape(result, [batch_size or -1] + spatial_output_shape + list(spatial_filter_shape) + [in_channels]) return result
def extract_convolution_patches(inputs, filter_shape, padding, strides=None, dilation_rate=None, name=None, data_format=None): """Extracts inputs to each output coordinate in tf.nn.convolution. This is a generalization of tf.extract_image_patches() to tf.nn.convolution(), where the number of spatial dimensions may be something other than 2. Assumes, - First dimension of inputs is batch_size - Convolution filter is applied to all input channels. Args: inputs: Tensor of shape [batch_size, ..spatial_image_shape.., ..spatial_filter_shape.., in_channels]. Inputs to tf.nn.convolution(). filter_shape: List of ints. Shape of filter passed to tf.nn.convolution(). padding: string. Padding method. One of "VALID", "SAME". strides: None or list of ints. Strides along spatial dimensions. dilation_rate: None or list of ints. Dilation along spatial dimensions. name: None or str. Name of Op. data_format: None or str. Format of data. Returns: Tensor of shape [batch_size, ..spatial_image_shape.., ..spatial_filter_shape.., in_channels] Raises: ValueError: If data_format does not put channel last. ValueError: If inputs and filter disagree on in_channels. """ if not is_data_format_channel_last(data_format): raise ValueError("Channel must be last dimension.") with ops.name_scope(name, "extract_convolution_patches", [inputs, filter_shape, padding, strides, dilation_rate]): batch_size = inputs.shape.as_list()[0] in_channels = inputs.shape.as_list()[-1] # filter_shape = spatial_filter_shape + [in_channels, out_channels] spatial_filter_shape = filter_shape[:-2] if in_channels != filter_shape[-2]: raise ValueError("inputs and filter_shape must agree on in_channels.") # Map each input feature to a location in the output. out_channels = np.prod(spatial_filter_shape) * in_channels filters = linalg_ops.eye(out_channels) filters = array_ops.reshape( filters, list(spatial_filter_shape) + [in_channels, out_channels]) result = nn_ops.convolution( inputs, filters, padding=padding, strides=strides, dilation_rate=dilation_rate) spatial_output_shape = result.shape.as_list()[1:-1] result = array_ops.reshape(result, [batch_size or -1] + spatial_output_shape + list(spatial_filter_shape) + [in_channels]) return result