def _compareShapeN(self, x, use_gpu=False): np_ans = np.array(np.shape(x)) with self.test_session(use_gpu=use_gpu) as sess: tf_ans = array_ops.shape_n([x, x, x]) tf_ans_64 = array_ops.shape_n([x, x, x], out_type=dtypes.int64) result = sess.run(tf_ans) result_64 = sess.run(tf_ans_64) for i in range(3): self.assertAllEqual(np_ans, result[i]) self.assertAllEqual(np_ans, result_64[i]) self.assertShapeEqual(np_ans, tf_ans[i])
def testShapeN(self): if test.is_gpu_available(cuda_only=True): x = array_ops.placeholder(dtype='float32') conv = _two_layer_model(x) shapen = array_ops.shape_n([conv, conv]) output = math_ops.add(shapen[0], shapen[1]) x_val = [1.7] * 784 with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={x: x_val}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run( output, run_metadata=metadata, feed_dict={ x: x_val }) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if node.name.startswith('LayoutOptimizerTranspose'): num_transposes += 1 nodes.append(node.name) expected_num_transposes = 1 self.assertEqual(expected_num_transposes, num_transposes) self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes) self.assertIn('LayoutOptimizerVecPermuteNCHWToNHWC-ShapeN-0-0', nodes) self.assertAllEqual(output_val_ref, output_val)
def _Conv2DGrad(op, grad): dilations = op.get_attr("dilations") strides = op.get_attr("strides") padding = op.get_attr("padding") use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu") data_format = op.get_attr("data_format") shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]]) return [ nn_ops.conv2d_backprop_input( shape_0, op.inputs[1], grad, dilations=dilations, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format), nn_ops.conv2d_backprop_filter( op.inputs[0], shape_1, grad, dilations=dilations, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) ]
def testShapeN(self): if test.is_gpu_available(cuda_only=True): x = array_ops.placeholder(dtype='float32') conv = _two_layer_model(x) shapen = array_ops.shape_n([conv, conv]) output = math_ops.add(shapen[0], shapen[1]) x_val = [1.7] * 784 with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={x: x_val}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run( output, run_metadata=metadata, feed_dict={ x: x_val }) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) expected_num_transposes = 1 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_vec_nchw_to_nhwc('ShapeN-0-0', nodes) self.assertAllEqual(output_val_ref, output_val)
def _initialize_updated_shapes(self, session): shapes = array_ops.shape_n(self._vars) var_shapes = list(map(tuple, session.run(shapes))) if self._var_shapes is not None: new_old_shapes = zip(self._var_shapes, var_shapes) if all([old == new for old, new in new_old_shapes]): return self._var_shapes = var_shapes vars_and_shapes = zip(self._vars, self._var_shapes) vars_and_shapes_dict = dict(vars_and_shapes) packed_bounds = None if self._var_to_bounds is not None: left_packed_bounds = [] right_packed_bounds = [] for var, var_shape in vars_and_shapes: shape = list(var_shape) bounds = (-np.infty, np.infty) if var in var_to_bounds: bounds = var_to_bounds[var] left_packed_bounds.extend(list(np.broadcast_to(bounds[0], shape).flat)) right_packed_bounds.extend(list(np.broadcast_to(bounds[1], shape).flat)) packed_bounds = list(zip(left_packed_bounds, right_packed_bounds)) self._packed_bounds = packed_bounds self._update_placeholders = [ array_ops.placeholder(var.dtype) for var in self._vars ] self._var_updates = [ var.assign(array_ops.reshape(placeholder, vars_and_shapes_dict[var])) for var, placeholder in zip(self._vars, self._update_placeholders) ] loss_grads = _compute_gradients(self._loss, self._vars) equalities_grads = [ _compute_gradients(equality, self._vars) for equality in self._equalities ] inequalities_grads = [ _compute_gradients(inequality, self._vars) for inequality in self._inequalities ] self._packed_var = self._pack(self._vars) self._packed_loss_grad = self._pack(loss_grads) self._packed_equality_grads = [ self._pack(equality_grads) for equality_grads in equalities_grads ] self._packed_inequality_grads = [ self._pack(inequality_grads) for inequality_grads in inequalities_grads ] dims = [_prod(vars_and_shapes_dict[var]) for var in self._vars] accumulated_dims = list(_accumulate(dims)) self._packing_slices = [ slice(start, end) for start, end in zip(accumulated_dims[:-1], accumulated_dims[1:]) ]
def _ExtractInputShapes(inputs): """Extract the shapes of a set of input tensors.""" if context.executing_eagerly(): return array_ops.shape_n(inputs) sizes = [] fully_known = True for x in inputs: input_shape = array_ops.shape(x) if not isinstance(input_shape, ops.Tensor) or input_shape.op.type != "Const": fully_known = False break sizes.append(input_shape) if fully_known: return sizes else: return array_ops.shape_n(inputs)
def testShapeN(self): with self.test_scope(): # Shapes of directly constructed tensors shapes = array_ops.shape_n([ constant_op.constant(1.0), constant_op.constant([1.0, 2.0, 3.0]), constant_op.constant([[1.0, 2.0], [3.0, 4.0]])]) self.assertAllEqual( [[], [3], [2, 2]], [x.numpy().tolist() for x in shapes]) # Shapes of tensors created by op running on device shapes = array_ops.shape_n([ array_ops.ones([]), array_ops.ones([3]), array_ops.ones([2, 2])]) self.assertAllEqual( [[], [3], [2, 2]], [x.numpy().tolist() for x in shapes])
def _ExtractInputShapes(inputs): """Extract the shapes of a set of input tensors.""" sizes = [] fully_known = True for x in inputs: input_shape = array_ops.shape(x) if not isinstance(input_shape, ops.Tensor) or input_shape.op.type != "Const": fully_known = False break else: sizes.append(input_shape) if fully_known: return sizes else: return array_ops.shape_n(inputs)
def _move_tensors(tensors, device): """Moves a list of tensors to a device by concatenating/splitting them.""" # Reset the device setting to avoid weird interactions with device merging # logic. zero = constant_op.constant(0, dtype=dtypes.int32) with ops.device(None): if all(tensor.shape == tensor_shape.scalar() for tensor in tensors): with ops.device(tensors[0].device): values = array_ops.stack(tensors) with ops.device(device): return array_ops.unstack(values) else: with ops.device(tensors[0].device): sizes = array_ops.stack(array_ops.shape_n(tensors))[:, 0] values = array_ops.concat(tensors, axis=zero) with ops.device(device): sizes = array_ops.unstack(sizes) return list(array_ops.split(values, sizes, axis=zero))
def _ssim_per_channel(img1, img2, max_val=1.0): filter_size = constant_op.constant(11, dtype=dtypes.int32) filter_sigma = constant_op.constant(1.5, dtype=img1.dtype) shape1, shape2 = array_ops.shape_n([img1, img2]) checks = [ control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape1[-3:-1], filter_size)), [shape1, filter_size], summarize=8), control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape2[-3:-1], filter_size)), [shape2, filter_size], summarize=8) ] # Enforce the check to run before computation. with ops.control_dependencies(checks): img1 = array_ops.identity(img1) kernel = _fspecial_gauss(filter_size, filter_sigma) kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) compensation = 1.0 def reducer(x): shape = array_ops.shape(x) x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') return array_ops.reshape( y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0)) luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation) # Average over the second and the third from the last: height, width. axes = constant_op.constant([-3, -2], dtype=dtypes.int32) ssim_val = math_ops.reduce_mean(luminance * cs, axes) cs = math_ops.reduce_mean(cs, axes) luminance = math_ops.reduce_mean(luminance, axes) return ssim_val, cs, luminance
def _Conv2DGrad(op, grad): strides = op.get_attr("strides") padding = op.get_attr("padding") use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu") data_format = op.get_attr("data_format") shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]]) return [nn_ops.conv2d_backprop_input(shape_0, op.inputs[1], grad, strides, padding, use_cudnn_on_gpu, data_format), nn_ops.conv2d_backprop_filter(op.inputs[0], shape_1, grad, strides, padding, use_cudnn_on_gpu, data_format)]
def _Conv3DGrad(op, grad): """Weight sharing for symmetric lateral connections.""" strides = op.get_attr('strides') padding = op.get_attr('padding') data_format = op.get_attr('data_format') shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]]) dx = nn_ops.conv3d_backprop_input_v2(shape_0, op.inputs[1], grad, strides=strides, padding=padding, data_format=data_format) dw = nn_ops.conv3d_backprop_filter_v2(op.inputs[0], shape_1, grad, strides=strides, padding=padding, data_format=data_format) dw = 0.5 * (dw + tf.transpose(dw, (0, 1, 2, 4, 3))) return dx, dw
def _verify_compatible_image_shapes(img1, img2): """Checks if two image tensors are compatible for applying SSIM or PSNR. This function checks if two sets of images have ranks at least 3, and if the last three dimensions match. Args: img1: Tensor containing the first image batch. img2: Tensor containing the second image batch. Returns: A tuple containing: the first tensor shape, the second tensor shape, and a list of control_flow_ops.Assert() ops implementing the checks. Raises: ValueError: When static shape check fails. """ shape1 = img1.get_shape().with_rank_at_least(3) shape2 = img2.get_shape().with_rank_at_least(3) shape1[-3:].assert_is_compatible_with(shape2[-3:]) if shape1.ndims is not None and shape2.ndims is not None: for dim1, dim2 in zip(reversed(shape1[:-3]), reversed(shape2[:-3])): if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)): raise ValueError('Two images are not compatible: %s and %s' % (shape1, shape2)) # Now assign shape tensors. shape1, shape2 = array_ops.shape_n([img1, img2]) # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable. checks = [] checks.append( control_flow_ops.Assert(math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2], summarize=10)) checks.append( control_flow_ops.Assert(math_ops.reduce_all( math_ops.equal(shape1[-3:], shape2[-3:])), [shape1, shape2], summarize=10)) return shape1, shape2, checks
def _ssim_per_channel(img1, img2, alpha, beta_gamma, max_val=1.0): filter_size = constant_op.constant(11, dtype=dtypes.int32) filter_sigma = constant_op.constant(1.5, dtype=img1.dtype) (shape1, shape2) = array_ops.shape_n([img1, img2]) checks = [ control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape1[-3:-1], filter_size)), [shape1, filter_size], summarize=8), control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape2[-3:-1], filter_size)), [shape2, filter_size], summarize=8) ] with ops.control_dependencies(checks): img1 = array_ops.identity(img1) kernel = _fspecial_gauss(filter_size, filter_sigma) kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) compensation = 1.0 def reducer(x): shape = array_ops.shape(x) x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') return array_ops.reshape( y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0)) (luminance, cs) = _ssim_helper(img1, img2, reducer, max_val, alpha, beta_gamma, compensation) axes = constant_op.constant([-3, -2], dtype=dtypes.int32) ssim_val = math_ops.reduce_mean(luminance * cs, axes) cs = math_ops.reduce_mean(cs, axes) return (ssim_val, cs)
def _verify_compatible_image_shapes(img1, img2): shape1 = img1.get_shape().with_rank_at_least(3) shape2 = img2.get_shape().with_rank_at_least(3) shape1[-3:].assert_is_compatible_with(shape2[-3:]) if shape1.ndims is not None and shape2.ndims is not None: for (dim1, dim2) in zip(reversed(shape1[:-3]), reversed(shape2[:-3])): if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)): raise ValueError('Two images are not compatible: %s and %s' % (shape1, shape2)) (shape1, shape2) = array_ops.shape_n([img1, img2]) checks = [] checks.append( control_flow_ops.Assert(math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2], summarize=10)) checks.append( control_flow_ops.Assert(math_ops.reduce_all( math_ops.equal(shape1[-3:], shape2[-3:])), [shape1, shape2], summarize=10)) return (shape1, shape2, checks)
def _Conv2DGrad(op, grad): """Gradient function for Conv2D.""" dilations = op.get_attr("dilations") strides = op.get_attr("strides") padding = op.get_attr("padding") explicit_paddings = op.get_attr("explicit_paddings") use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu") data_format = op.get_attr("data_format") shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]]) # We call the gen_nn_ops backprop functions instead of nn_ops backprop # functions for performance reasons in Eager mode. gen_nn_ops functions take a # `explicit_paddings` parameter, but nn_ops functions do not. So if were were # to use the nn_ops functions, we would have to convert `padding` and # `explicit_paddings` into a single `padding` parameter, increasing overhead # in Eager mode. return [ gen_nn_ops.conv2d_backprop_input( shape_0, op.inputs[1], grad, dilations=dilations, strides=strides, padding=padding, explicit_paddings=explicit_paddings, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format), gen_nn_ops.conv2d_backprop_filter( op.inputs[0], shape_1, grad, dilations=dilations, strides=strides, padding=padding, explicit_paddings=explicit_paddings, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) ]
def _SConv2DGrad(op, grad): """Weight sharing for symmetric lateral connections.""" strides = op.get_attr('strides') padding = op.get_attr('padding') use_cudnn_on_gpu = op.get_attr('use_cudnn_on_gpu') data_format = op.get_attr('data_format') shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]]) dx = nn_ops.conv2d_backprop_input(shape_0, op.inputs[1], grad, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) dw = nn_ops.conv2d_backprop_filter(op.inputs[0], shape_1, grad, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) dw = 0.5 * (dw + dw[::-1, ::-1, :, :]) return dx, dw
def _Conv2DGrad(op, grad): strides = op.get_attr("strides") padding = op.get_attr("padding") use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu") data_format = op.get_attr("data_format") shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]]) dx = nn_ops.conv2d_backprop_input(shape_0, op.inputs[1], grad, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) dw = nn_ops.conv2d_backprop_filter(op.inputs[0], shape_1, grad, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) dw_t = tf.transpose(dw, (2, 3, 0, 1)) dw_symm_t = (0.5) * (dw_t + tf.transpose(dw_t, (1, 0, 2, 3))) dw_symm = tf.transpose(dw_symm_t, (2, 3, 0, 1)) return dx, dw_symm
def _ConcatGrad(op, grad): """Gradient for concat op.""" def _CreateDenseMaskAndBegin(sizes, concat_dim): """Create variables for iteratively slicing a dense gradients tensor.""" # Since shape is 1-D, shape_of_shape = [rank-of-inputs] shape_of_shape = array_ops.shape(sizes[0]) # Make a vector of length equal to the input's dimensions, # with 0's everywhere and 1 in the concat dim position. # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now) mask = array_ops.concat(0, [array_ops.fill( array_ops.expand_dims(concat_dim, 0), 0), [1], array_ops.fill( shape_of_shape - concat_dim - 1, 0)]) begin = array_ops.fill(shape_of_shape, 0) return mask, begin # Degenerate concatenation, just return grad. if len(op.inputs) == 2: return [None, grad] concat_dim = op.inputs[0] out_grads = [] if isinstance(grad, ops.Tensor): # Get the inputs' tensor shapes sizes = array_ops.shape_n(op.inputs[1:]) # pylint: disable=protected-access offset = gen_array_ops._concat_offset(concat_dim, sizes) # pylint: enable=protected-access for (begin, size) in zip(offset, sizes): out_grads.append(array_ops.slice(grad, begin, size)) elif isinstance(grad, ops.IndexedSlices): concat_dim_static = tensor_util.constant_value(concat_dim) if concat_dim_static is None: raise ValueError("Can only compute IndexedSlices gradient with " "statically-known concat_dim") # Get the inputs' tensor shapes sizes = [array_ops.shape(x) for x in op.inputs[1:]] if concat_dim_static > 0: # IndexedSlices, concat_dim > 0. Each input gets IndexedSlices gradients # with all the indices, but with grad.values sliced accordingly. This # is like the Tensor case, except shape(grad.values)[0] is not equal to # shape(sizes[i])[0], since only a subset of the dim-0 values are stored. mask, begin = _CreateDenseMaskAndBegin(sizes, concat_dim) for size in sizes: new_values = array_ops.slice( grad.values, begin, array_ops.concat(0, [[-1], array_ops.slice(size, [1], [-1])])) out_grads.append( ops.IndexedSlices(new_values, grad.indices, size)) # Lint complains begin = begin + ... begin = math_ops.add(begin, size * mask) else: # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients # only for the relevant indices. start = constant_op.constant(0, dtype=grad.indices.dtype) for size in sizes: size_concat_dim = array_ops.gather(size, concat_dim) if size_concat_dim.dtype != grad.indices.dtype: size_concat_dim = math_ops.cast(size_concat_dim, dtype=grad.indices.dtype) end = start + size_concat_dim # Compute the 1-D Tensor of indices relevant for this input. indices_to_select = array_ops.squeeze( array_ops.where(math_ops.logical_and(grad.indices >= start, grad.indices < end)), squeeze_dims=[1]) new_indices = array_ops.gather(grad.indices, indices_to_select) - start new_values = array_ops.gather(grad.values, indices_to_select) out_grads.append( ops.IndexedSlices(new_values, new_indices, size)) start = end else: raise TypeError("Expected Tensor or IndexedSlices, got %s" % type(grad)) return [None] + out_grads
def loop_fn(i): x_i = array_ops.gather(x, i) y_i = array_ops.gather(y, i) return array_ops.shape_n([x_i, x, y, y_i]), array_ops.shape_n( [x_i, x, y, y_i], out_type=dtypes.int64)
def _ConcatGrad(op, grad): """Gradient for concat op.""" def _CreateDenseMaskAndBegin(sizes, concat_dim): """Create variables for iteratively slicing a dense gradients tensor.""" # Since shape is 1-D, shape_of_shape = [rank-of-inputs] shape_of_shape = array_ops.shape(sizes[0]) # Make a vector of length equal to the input's dimensions, # with 0's everywhere and 1 in the concat dim position. # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now) mask = array_ops.concat(0, [ array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1], array_ops.fill(shape_of_shape - concat_dim - 1, 0) ]) begin = array_ops.fill(shape_of_shape, 0) return mask, begin # Degenerate concatenation, just return grad. if len(op.inputs) == 2: return [None, grad] concat_dim = op.inputs[0] out_grads = [] if isinstance(grad, ops.Tensor): # Get the inputs' tensor shapes sizes = array_ops.shape_n(op.inputs[1:]) # pylint: disable=protected-access offset = gen_array_ops._concat_offset(concat_dim, sizes) # pylint: enable=protected-access for (begin, size) in zip(offset, sizes): out_grads.append(array_ops.slice(grad, begin, size)) elif isinstance(grad, ops.IndexedSlices): concat_dim_static = tensor_util.constant_value(concat_dim) if concat_dim_static is None: raise ValueError("Can only compute IndexedSlices gradient with " "statically-known concat_dim") # Get the inputs' tensor shapes sizes = [array_ops.shape(x) for x in op.inputs[1:]] if concat_dim_static > 0: # IndexedSlices, concat_dim > 0. Each input gets IndexedSlices gradients # with all the indices, but with grad.values sliced accordingly. This # is like the Tensor case, except shape(grad.values)[0] is not equal to # shape(sizes[i])[0], since only a subset of the dim-0 values are stored. mask, begin = _CreateDenseMaskAndBegin(sizes, concat_dim) for size in sizes: new_values = array_ops.slice( grad.values, begin, array_ops.concat( 0, [[-1], array_ops.slice(size, [1], [-1])])) out_grads.append( ops.IndexedSlices(new_values, grad.indices, size)) # Lint complains begin = begin + ... begin = math_ops.add(begin, size * mask) else: # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients # only for the relevant indices. start = constant_op.constant(0, dtype=grad.indices.dtype) for size in sizes: size_concat_dim = array_ops.gather(size, concat_dim) if size_concat_dim.dtype != grad.indices.dtype: size_concat_dim = math_ops.cast(size_concat_dim, dtype=grad.indices.dtype) end = start + size_concat_dim # Compute the 1-D Tensor of indices relevant for this input. indices_to_select = array_ops.squeeze(array_ops.where( math_ops.logical_and(grad.indices >= start, grad.indices < end)), squeeze_dims=[1]) new_indices = array_ops.gather(grad.indices, indices_to_select) - start new_values = array_ops.gather(grad.values, indices_to_select) out_grads.append( ops.IndexedSlices(new_values, new_indices, size)) start = end else: raise TypeError("Expected Tensor or IndexedSlices, got %s" % type(grad)) return [None] + out_grads
def _Conv2DGrad(op, grad): """Gradient function for Conv2D.""" dilations = op.get_attr("dilations") strides = op.get_attr("strides") padding = op.get_attr("padding") explicit_paddings = op.get_attr("explicit_paddings") use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu") data_format = op.get_attr("data_format") use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu") shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]]) enable_quantop_grad = int(os.getenv('ENABLE_QUANTOP_CONV_GRAD', 0)) enable_quantop_input = int(os.getenv('ENABLE_QUANTOP_CONV', 0)) enable_quantop_wtgrad = int(os.getenv('ENABLE_QUANTOP_CONV_WTGRAD', 0)) dformat = 'channels_last' inp_channels = op.inputs[0].get_shape()[3].value if data_format == b'NCHW': dformat = 'channels_first' inp_channels = op.inputs[0].get_shape()[1].value elif data_format == b'None': dformat = 'unknown' quant_input_copy = int(os.getenv('QUANTEMU_ALLOCATE_COPY_INPUTS', 23)) quant_filter_copy = int(os.getenv('QUANTEMU_ALLOCATE_COPY_FILTERS', 23)) quant_input_precision = int(os.getenv('QUANTEMU_PRECISION_CONV_INPUTS', 23)) quant_filter_precision = int( os.getenv('QUANTEMU_PRECISION_CONV_FILTERS', 23)) quant_grad_precision = int(os.getenv('QUANTEMU_PRECISION_CONV_GRADS', 23)) quant_wtgrad_precision = int( os.getenv('QUANTEMU_PRECISION_CONV_WTGRADS', 23)) if inp_channels == 3: quant_grad_precision = quant_input_precision = int( os.getenv('QUANTEMU_FIRST_LAYER_PRECISION', 23)) quant_filter_precision = int( os.getenv('QUANTEMU_FIRST_LAYER_PRECISION', 23)) if enable_quantop_grad == 1: grad = quantemu_ops.quantize_emu( grad, data_format=dformat, data_type=int(os.getenv('QUANTEMU_GRAD_DATA_TYPE', 0)), precision= quant_grad_precision, #int(os.getenv('QUANTEMU_PRECISION_CONV_GRADS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), channel_blocking_type=int( os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_GRADS', 0)), channels_per_block=int(os.getenv('QUANTEMU_CBLOCK_SIZE_GRADS', 0)), round_mode=int(os.getenv('QUANTEMU_RMODE_GRADS', 0))) if enable_quantop_input == 1: if quant_input_copy == 1: acts = quantemu_ops.quantize_emu( op.inputs[0], data_format=dformat, data_type=int(os.getenv('QUANTEMU_INPUT_DATA_TYPE', 0)), precision= quant_input_precision, #int(os.getenv('QUANTEMU_PRECISION_CONV_INPUTS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), channel_blocking_type=int( os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_INPUTS', 0)), channels_per_block=int( os.getenv('QUANTEMU_CBLOCK_SIZE_INPUTS', 0)), round_mode=int(os.getenv('QUANTEMU_RMODE_INPUTS', 0))) else: acts = op.inputs[0] if quant_filter_copy == 1: filters = quantemu_ops.quantize_emu( op.inputs[1], data_format=dformat, allocate_copy=int( os.getenv('QUANTEMU_ALLOCATE_COPY_FILTERS', 0)), data_type=int(os.getenv('QUANTEMU_FILTER_DATA_TYPE', 0)), precision= quant_filter_precision, #int(os.getenv('QUANTEMU_PRECISION_CONV_FILTERS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), channel_blocking_type=int( os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_FILTERS', 0)), channels_per_block=int( os.getenv('QUANTEMU_CBLOCK_SIZE_FILTERS', 0)), round_mode=int(os.getenv('QUANTEMU_RMODE_FILTERS', 0))) else: filters = op.inputs[1] outgrad = nn_ops.conv2d_backprop_input( shape_0, #op.inputs[1], filters, grad, dilations=dilations, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) wtgrad = nn_ops.conv2d_backprop_filter( #op.inputs[0], acts, shape_1, grad, dilations=dilations, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) if enable_quantop_wtgrad == 1: wtgrad = quantemu_ops.quantize_emu( wtgrad, data_format=dformat, data_type=int(os.getenv('QUANTEMU_WTGRAD_DATA_TYPE', 0)), precision= quant_wtgrad_precision, #int(os.getenv('QUANTEMU_PRECISION_CONV_GRADS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), channel_blocking_type=int( os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_WTGRADS', 0)), channels_per_block=int( os.getenv('QUANTEMU_CBLOCK_SIZE_WTGRADS', 0)), round_mode=int(os.getenv('QUANTEMU_RMODE_WTGRADS', 0))) return [outgrad, wtgrad] else: # No Quantization return [ nn_ops.conv2d_backprop_input(shape_0, op.inputs[1], grad, dilations=dilations, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format), nn_ops.conv2d_backprop_filter(op.inputs[0], shape_1, grad, dilations=dilations, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) ]
def _update_ensemble(): """A method to update the tree ensemble.""" # Get next stamp token. next_ensemble_stamp = ensemble_stamp + 1 # Finalize bias stats. _, _, _, bias_grads, bias_hess = bias_stats_accumulator.flush( ensemble_stamp, next_ensemble_stamp) # Finalize handler splits. are_splits_ready_list = [] partition_ids_list = [] gains_list = [] split_info_list = [] for handler in handlers: (are_splits_ready, partition_ids, gains, split_info) = handler.make_splits( ensemble_stamp, next_ensemble_stamp, class_id) are_splits_ready_list.append(are_splits_ready) partition_ids_list.append(partition_ids) gains_list.append(gains) split_info_list.append(split_info) # Stack all the inputs to one tensor per type. # This is a workaround for the slowness of graph building in tf.cond. # See (b/36554864). split_sizes = array_ops.reshape( array_ops.shape_n(partition_ids_list), [len(partition_ids_list)]) partition_ids = array_ops.concat(partition_ids_list, axis=0) gains = array_ops.concat(gains_list, axis=0) split_infos = array_ops.concat(split_info_list, axis=0) # Determine if all splits are ready. are_all_splits_ready = math_ops.reduce_all( array_ops.stack( are_splits_ready_list, axis=0, name="stack_handler_readiness")) # Define bias centering update operation. def _center_bias_fn(): # Center tree ensemble bias. delta_updates = array_ops.where(bias_hess > 0, -bias_grads / bias_hess, array_ops.zeros_like(bias_grads)) center_bias = training_ops.center_tree_ensemble_bias( tree_ensemble_handle=self._ensemble_handle, stamp_token=ensemble_stamp, next_stamp_token=next_ensemble_stamp, delta_updates=delta_updates, learner_config=self._learner_config_serialized) return continue_centering.assign(center_bias) # Define ensemble growing operations. def _grow_ensemble_ready_fn(): # Grow the ensemble given the current candidates. sizes = array_ops.unstack(split_sizes) partition_ids_list = list(array_ops.split(partition_ids, sizes, axis=0)) gains_list = list(array_ops.split(gains, sizes, axis=0)) split_info_list = list(array_ops.split(split_infos, sizes, axis=0)) return training_ops.grow_tree_ensemble( tree_ensemble_handle=self._ensemble_handle, stamp_token=ensemble_stamp, next_stamp_token=next_ensemble_stamp, learning_rate=learning_rate, partition_ids=partition_ids_list, gains=gains_list, splits=split_info_list, learner_config=self._learner_config_serialized, dropout_seed=dropout_seed, center_bias=self._center_bias) def _grow_ensemble_not_ready_fn(): # Don't grow the ensemble, just update the stamp. return training_ops.grow_tree_ensemble( tree_ensemble_handle=self._ensemble_handle, stamp_token=ensemble_stamp, next_stamp_token=next_ensemble_stamp, learning_rate=0, partition_ids=[], gains=[], splits=[], learner_config=self._learner_config_serialized, dropout_seed=dropout_seed, center_bias=self._center_bias) def _grow_ensemble_fn(): # Conditionally grow an ensemble depending on whether the splits # from all the handlers are ready. return control_flow_ops.cond(are_all_splits_ready, _grow_ensemble_ready_fn, _grow_ensemble_not_ready_fn) # Update ensemble. update_ops = [are_all_splits_ready] if self._center_bias: update_model = control_flow_ops.cond(continue_centering, _center_bias_fn, _grow_ensemble_fn) else: update_model = _grow_ensemble_fn() update_ops.append(update_model) # Update ensemble stats. with ops.control_dependencies([update_model]): stats = training_ops.tree_ensemble_stats( self._ensemble_handle, stamp_token=next_ensemble_stamp) update_ops.append(self._finalized_trees.assign(stats.num_trees)) update_ops.append(self._attempted_trees.assign(stats.attempted_trees)) update_ops.append(num_layers.assign(stats.num_layers)) update_ops.append(active_tree.assign(stats.active_tree)) update_ops.append(active_layer.assign(stats.active_layer)) # Flush step stats. update_ops.extend( steps_accumulator.flush(ensemble_stamp, next_ensemble_stamp)) return control_flow_ops.group(*update_ops, name="update_ensemble")
def _ssim_per_channel(img1, img2, img3, max_val=1.0, mode='test', compensation=1): """Computes SSIM index between img1 and img2 per color channel. This function matches the standard SSIM implementation from: Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image quality assessment: from error visibility to structural similarity. IEEE transactions on image processing. Details: - 11x11 Gaussian filter of width 1.5 is used. - k1 = 0.01, k2 = 0.03 as in the original paper. Args: img1: First image batch. img2: Second image batch. max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values). Returns: A pair of tensors containing and channel-wise SSIM and contrast-structure values. The shape is [..., channels]. """ filter_size = constant_op.constant(11, dtype=dtypes.int32) filter_sigma = constant_op.constant(1.5, dtype=img1.dtype) shape1, shape2 = array_ops.shape_n([img1, img2]) shape1, shape2, shape3 = array_ops.shape_n([img1, img2, img3]) checks = [ control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape1[-3:-1], filter_size)), [shape1, filter_size], summarize=8), control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape2[-3:-1], filter_size)), [shape2, filter_size], summarize=8), control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape2[-3:-1], filter_size)), [shape3, filter_size], summarize=8) ] # Enforce the check to run before computation. with ops.control_dependencies(checks): img1 = array_ops.identity(img1) # TODO(sjhwang): Try to cache kernels and compensation factor. kernel = _fspecial_gauss(filter_size, filter_sigma) kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`, # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead. #compensation = 1.0 # TODO(sjhwang): Try FFT. # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying # 1-by-n and n-by-1 Gaussain filters instead of an n-by-n filter. def reducer(x): shape = array_ops.shape(x) x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') return array_ops.reshape( y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0)) #luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation) if mode == 'debug': luminance_gt, cs_gt = _ssim_helper(img1, img2, reducer, max_val, compensation) luminance, cs = _my_ssim_helper(img1, img1 - img2, img1 - img3, reducer, max_val, compensation, mode='train') else: luminance, cs = _my_ssim_helper(img1, img2, img3, reducer, max_val, compensation, mode) if mode == 'debug': axes = constant_op.constant([-3, -2], dtype=dtypes.int32) ssim_val_gt = math_ops.reduce_mean(luminance_gt * cs_gt, axes) lm_gt = math_ops.reduce_mean(luminance_gt, axes) cs_gt = math_ops.reduce_mean(cs_gt, axes) lm = math_ops.reduce_mean(luminance, axes) cs = math_ops.reduce_mean(cs, axes) return lm_gt, cs_gt, lm, cs, ssim_val_gt else: # Average over the second and the third from the last: height, width. axes = constant_op.constant([-3, -2], dtype=dtypes.int32) #ssim_val = math_ops.reduce_mean(luminance * cs, axes) ssim_val = math_ops.reduce_mean(luminance + cs, axes) print('ssim_shape', ssim_val.shape) cs = math_ops.reduce_mean(cs, axes) return ssim_val, cs
def _initialize_updated_shapes(self, session): shapes = array_ops.shape_n(self._vars) var_shapes = list(map(tuple, session.run(shapes))) if self._var_shapes is not None: new_old_shapes = zip(self._var_shapes, var_shapes) if all([old == new for old, new in new_old_shapes]): return self._var_shapes = var_shapes vars_and_shapes = zip(self._vars, self._var_shapes) vars_and_shapes_dict = dict(vars_and_shapes) packed_bounds = None if self._var_to_bounds is not None: left_packed_bounds = [] right_packed_bounds = [] for var, var_shape in vars_and_shapes: shape = list(var_shape) bounds = (-np.infty, np.infty) if var in var_to_bounds: bounds = var_to_bounds[var] left_packed_bounds.extend( list(np.broadcast_to(bounds[0], shape).flat)) right_packed_bounds.extend( list(np.broadcast_to(bounds[1], shape).flat)) packed_bounds = list(zip(left_packed_bounds, right_packed_bounds)) self._packed_bounds = packed_bounds self._update_placeholders = [ array_ops.placeholder(var.dtype) for var in self._vars ] self._var_updates = [ var.assign( array_ops.reshape(placeholder, vars_and_shapes_dict[var])) for var, placeholder in zip(self._vars, self._update_placeholders) ] loss_grads = _compute_gradients(self._loss, self._vars) equalities_grads = [ _compute_gradients(equality, self._vars) for equality in self._equalities ] inequalities_grads = [ _compute_gradients(inequality, self._vars) for inequality in self._inequalities ] self._packed_var = self._pack(self._vars) self._packed_loss_grad = self._pack(loss_grads) self._packed_equality_grads = [ self._pack(equality_grads) for equality_grads in equalities_grads ] self._packed_inequality_grads = [ self._pack(inequality_grads) for inequality_grads in inequalities_grads ] dims = [_prod(vars_and_shapes_dict[var]) for var in self._vars] accumulated_dims = list(_accumulate(dims)) self._packing_slices = [ slice(start, end) for start, end in zip(accumulated_dims[:-1], accumulated_dims[1:]) ]