def GetParams(self): """Tests for scale & elementwise layers in TF-TRT.""" input_name = "input" input_dims = [10, 24, 24, 20] output_name = "output" g = ops.Graph() with g.as_default(): x = array_ops.placeholder( dtype=dtypes.float32, shape=input_dims, name=input_name) for weights_shape in [ (1,), # scale (24, 1, 1), # scale (24, 24, 20), # scale (20,), # elementwise (1, 24, 1, 1), # elementwise (1, 24, 24, 1), # elementwise (1, 24, 24, 20), # elementwise (24, 20), # elementwise ]: a = self._ConstOp(weights_shape) f = x + a x = self.trt_incompatible_op(f) a = self._ConstOp(weights_shape) f = a + x x = self.trt_incompatible_op(f) gen_array_ops.reshape(x, [5, -1], name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[[input_dims]], output_names=[output_name], expected_output_dims=[[[5, 23040]]])
def _dense_projection(inputs, shape, trainable=True): shape = list(shape) flat_shape = array_utils.product(shape) target_shape = _EXTRA_DIMS + shape input_shape = inputs.get_shape().as_list() input_dims = len(input_shape) expected_dims = len(target_shape) assert_utils.assert_true( input_dims == expected_dims, ', '.join([ '`inputs` must have the same number of dims as the number expected dims.' 'expected = {}, actual = {}'.format(input_dims, expected_dims) ])) if not array_utils.all_equal(input_shape, target_shape): input_shape_ = array_ops.shape(inputs) if len(input_shape) > 3: inputs = gen_array_ops.reshape( inputs, [input_shape_[0], input_shape_[1], -1]) inputs = core.dense(inputs, flat_shape, use_bias=False, trainable=trainable) inputs = gen_array_ops.reshape( inputs, [input_shape_[0], input_shape_[1]] + shape) return inputs
def GetParams(self): """Tests for scale & elementwise layers in TF-TRT.""" input_name = "input" input_dims = [10, 24, 24, 20] output_name = "output" g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtypes.float32, shape=input_dims, name=input_name) for weights_shape in [ (1, ), # scale (24, 1, 1), # scale (24, 24, 20), # scale (20, ), # elementwise (1, 24, 1, 1), # elementwise (1, 24, 24, 1), # elementwise (1, 24, 24, 20), # elementwise (24, 20), # elementwise ]: a = self._ConstOp(weights_shape) f = x + a x = math_ops.sigmoid(f) a = self._ConstOp(weights_shape) f = a + x x = math_ops.sigmoid(f) gen_array_ops.reshape(x, [5, -1], name=output_name) return trt_test.TfTrtIntegrationTestParams(gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], output_names=[output_name], expected_output_dims=[ (5, 23040) ])
def update_state(self, y_true, y_pred, sample_weight=None): if self.calc_fr == False: true_landmarks = y_true[:, :self.landmark_num * 2] pred_landmarks = y_pred[:, :self.landmark_num * 2] pred_landmarks = sigmoid(pred_landmarks) # calc landmark error error_all_points = reduce_sum( sqrt( reduce_sum( square( reshape(pred_landmarks, (self.batch_size, self.landmark_num, 2)) - reshape(true_landmarks, (self.batch_size, self.landmark_num, 2))), [2])), 1) # use interocular distance calc landmark error interocular_distance = sqrt( reduce_sum( square((true_landmarks[:, 120:122] - true_landmarks[:, 144:146])), 1)) error_norm = error_all_points / (interocular_distance * self.landmark_num) self.landmark_error.assign_add(reduce_sum(error_norm)) # error norm > 0.1 ===> failure_number + 1 self.failure_num.assign_add( reduce_sum(cast(error_norm > 0.1, self.dtype))) self.total.assign_add(self.batch_size)
def lovasz_jaccard_loss(y_true, y_pred, error_func=None, data_format=None): '''Lovasz extension for Jaccard index, or Intersection over Union (IoU). (loss) This function applies the theory of Lovasz extension. Although Lovasz extension could be used on any submodular set function, the implementation is aimed at constructing the trainable complementary of IoU. To learn more about this topic, please refer: The Lovasz-Softmax loss: A tractable surrogate for the optimization of the intersection-over-union measure in neural networks https://arxiv.org/abs/1705.08790 This implementation is not adapted from the author's github codes. It computes the Lovasz loss on each channel of each sample independently, and then calculate the average value. NOTE THAT THIS IMPLEMENTATION IS THE COMPLEMENTARY OF JACCARD INDEX. Arguments: error_func: the function that is used to calculate errors. If set None, would use L1 norm (linear interpolation). data_format: 'channels_first' or 'channels_last'. The default setting is generally 'channels_last' like other tf.keras APIs. Input: y_true: label, tensor in any shape, should have at least 3 axes. y_pred: prediction, tensor in any shape, should have at least 3 axes. Output: scalar, the approximated and complementary mean Jaccard index between y_true and y_pred over all channels. ''' get_shapes = y_true.get_shape().as_list() get_dims = len(get_shapes) if get_dims < 3: raise ValueError( 'The input tensor should has channel dimension, i.e. it should have at least 3 axes.' ) if data_format is None: data_format = K.image_data_format() if data_format == 'channels_last': get_permute_axes = (0, get_dims - 1, *range(1, get_dims - 1)) get_length = _get_prod(get_shapes[1:-1]) y_true = array_ops.transpose( y_true, perm=get_permute_axes) # switch to channels_first y_pred = array_ops.transpose(y_pred, perm=get_permute_axes) else: get_length = _get_prod(get_shapes[2:]) y_true = gen_array_ops.reshape([-1, get_length]) y_pred = gen_array_ops.reshape([-1, get_length]) if error_func is None: error_func = losses.mean_absolute_error def split_process(inputs): get_y_true, get_y_pred = inputs get_errors = error_func(get_y_true, get_y_pred) return _lovasz_jaccard_flat(get_errors, get_y_true) get_losses = functional_ops.map_fn(split_process, (y_true, y_pred), dtype=y_pred.dtype) return math_ops.reduce_mean(get_losses)
def q_quantile(q_dist, expected_q_dist): shape = array_ops.shape(q_dist) batch_size, sequence_size = shape[0], shape[1] num_quantiles = q_dist.get_shape()[-1].value big_expected_q_dist = array_ops.transpose(gen_array_ops.reshape( array_ops.tile(expected_q_dist, [1, 1, num_quantiles]), [batch_size, sequence_size, num_quantiles, num_quantiles]), perm=[0, 1, 3, 2]) big_q_dist = gen_array_ops.reshape( array_ops.tile(q_dist, [1, 1, num_quantiles]), [batch_size, sequence_size, num_quantiles, num_quantiles]) return (big_q_dist, big_expected_q_dist)
def GetParams(self): """Testing conversion of BatchMatMul in TF-TRT conversion.""" dtype = dtypes.float32 input_name = "input" input_dims = [12, 5, 8, 12] output_name = "output" w1_name = "matmul_w1" w1_dims = [12, 5, 12, 7] w2_name = "matmul_w2" w2_dims = [12, 12, 7] g = ops.Graph() with g.as_default(): inp = array_ops.placeholder(dtype=dtype, shape=[None] + input_dims[1:], name=input_name) w1 = array_ops.placeholder(dtype=dtype, shape=w1_dims, name=w1_name) w2 = array_ops.placeholder(dtype=dtype, shape=w2_dims, name=w2_name) with g.device("/GPU:0"): b = constant_op.constant(np.random.randn(12, 5, 12, 7), dtype=dtype) x1 = math_ops.matmul(inp, b) c = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype) x1 = x1 + c x2 = math_ops.matmul(inp, w1) d = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype) x2 = x2 * d e = self.trt_incompatible_op(inp) e = gen_array_ops.reshape(e, [12, 40, 12]) x3 = math_ops.matmul(e, w2) f = constant_op.constant(np.random.randn(40, 1), dtype=dtype) x3 = x3 + f x3 = gen_array_ops.reshape(x3, [12, 5, 8, 7]) x3 = self.trt_incompatible_op(x3) out = x1 + x2 + x3 array_ops.squeeze(out, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(add_shapes=True), input_names=[input_name, w1_name, w2_name], input_dims=[[input_dims, w1_dims, w2_dims]], output_names=[output_name], expected_output_dims=[[[12, 5, 8, 7]]])
def GraphFn(self, x): dtype = x.dtype # scale a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r1 = x / a a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r2 = a / x a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype) r3 = a + x a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype) r4 = x * a a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r5 = x - a a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r6 = a - x a = constant_op.constant(np.random.randn(3, 1), dtype=dtype) r7 = x - a a = constant_op.constant(np.random.randn(3, 1), dtype=dtype) r8 = a - x a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r9 = gen_math_ops.maximum(x, a) a = constant_op.constant(np.random.randn(3, 1), dtype=dtype) r10 = gen_math_ops.minimum(a, x) a = constant_op.constant(np.random.randn(3), dtype=dtype) r11 = x * a a = constant_op.constant(np.random.randn(1), dtype=dtype) r12 = a * x concat1 = array_ops.concat([r1, r2, r3, r4, r5, r6], axis=-1) concat2 = array_ops.concat([r7, r8, r9, r10, r11, r12], axis=3) x = array_ops.concat([concat1, concat2], axis=-1) return gen_array_ops.reshape(x, [2, -1], name="output_0")
def call(self, inputs, state, training=False): hidden_state, fast_weights = state batch_size = array_ops.shape(fast_weights)[0] add = math_ops.add scalar_mul = math_ops.scalar_mul slow = array_ops.expand_dims( add( math_ops.matmul(hidden_state, self._kernel_w), nn_ops.bias_add(math_ops.matmul(inputs, self._kernel_c), self._bias_c)), 1) hidden_state = self._activation(slow) fast_weights = add( scalar_mul(self._fast_decay_rate, fast_weights), scalar_mul( self._fast_learning_rate, math_ops.matmul(array_ops.transpose(hidden_state, [0, 2, 1]), hidden_state))) h = array_ops.identity(hidden_state) inner = add(slow, math_ops.matmul(h, fast_weights)) h = self._activation( layers.layer_norm(inner) if self._use_layer_norm else inner) hidden_state = gen_array_ops.reshape(h, [batch_size, self._num_units]) return hidden_state, FastWeightsStateTuple(hidden_state, fast_weights)
def call(self, inputs, state): """c_k_RNN basic operations """ #e.g. scipy.special.binom(3,[0,1,2,3]) = array([1., 3., 3., 1.]) # coeff_mat = math_ops.cast(scipy.special.binom(self._c_n, np.arange(self._c_n)) * # np.power(-1, np.flip(np.arange(self._c_n))), # dtype = dtypes.float32) #np.power(-1, np.arange(c_n) + 1) is the (-1)^n term #state dimension is [batch_size, c_k * num_hidden] #we want [batch_size, c_k, num_hidden] full_state = state[:, :self._num_units * (self._c_n - 1)] #full_state records the entire c_k timestep states, now we discard the earliest state from the previous step state = gen_array_ops.reshape(state, [-1, self._c_n, self._num_units]) # tanh(W[h,x]+b) current_state = math_ops.matmul( array_ops.concat([inputs, state[:, 0, :]], 1), self._kernel) current_state = nn_ops.bias_add(current_state, self._bias) current_state = self._activation(current_state) current_state += special_math_ops.einsum('ijk,jk->ik', state, self._kernel_A) # current_state = special_math_ops.einsum('ijk,jk->ik', state, self._kernel_A) + special_math_ops.einsum('ij,j->ij', current_state, (1-math_ops.reduce_sum(self._kernel_A, 0))) #Einstein summation, state: [batch_size, c_k, num_hidden] #kernel_A: [c_k, num_hidden, num_hidden], result: [batch_size, num_hidden] full_state = array_ops.concat([current_state, full_state], axis=1) output = array_ops.concat([ self._kernel[inputs.get_shape().as_list()[1]:, :], self._kernel_A ], axis=0) return output, full_state
def testPrefixPathWithReshape(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) z = array_ops.placeholder(np.float32, shape=[32]) with variable_scope.variable_scope("vs", use_resource=True): y = layers.Conv2D( 2, 1, use_bias=True, kernel_initializer=init_ops.ones_initializer())(x) res = gen_array_ops.reshape(y, [32]) + z opts = utils.create_ipu_config() utils.configure_ipu_system(opts) sess.run(variables.global_variables_initializer()) result = sess.run(res, { x: np.reshape(np.arange(32), [1, 4, 4, 2]), z: np.ones([32]) }) # Confirmed with values on the CPU. self.assertAllClose(result, [ 2., 2., 6., 6., 10., 10., 14., 14., 18., 18., 22., 22., 26., 26., 30., 30., 34., 34., 38., 38., 42., 42., 46., 46., 50., 50., 54., 54., 58., 58., 62., 62. ])
def call(self, inputs): if self._channels_first: rank = inputs.shape.rank if rank and rank > 1: # Switch to channels-last format. permutation = [0] permutation.extend(range(2, rank)) permutation.append(1) inputs = array_ops.transpose(inputs, perm=permutation) if context.executing_eagerly(): # Full static shape is guaranteed to be available. # Performance: Using `constant_op` is much faster than passing a list. flattened_shape = constant_op.constant([inputs.shape[0], -1]) return gen_array_ops.reshape(inputs, flattened_shape) else: input_shape = inputs.shape rank = input_shape.rank if rank == 1: return array_ops.expand_dims_v2(inputs, axis=1) else: batch_dim = tensor_shape.dimension_value(input_shape[0]) non_batch_dims = input_shape[1:] # Reshape in a way that preserves as much shape info as possible. if non_batch_dims.is_fully_defined(): last_dim = int( functools.reduce(operator.mul, non_batch_dims)) flattened_shape = constant_op.constant([-1, last_dim]) elif batch_dim is not None: flattened_shape = constant_op.constant( [int(batch_dim), -1]) else: flattened_shape = [array_ops.shape_v2(inputs)[0], -1] return array_ops.reshape(inputs, flattened_shape)
def GraphFn(self, x, y): q = array_ops.shape(x) z = y * y + y z = gen_array_ops.reshape(z, q) out_0 = array_ops.identity(q, name="output_0") out_1 = array_ops.identity(z, name="output_1") return (out_0, out_1)
def interaction_logit_fn(features): """Interaction model logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. Returns: A `Tensor` representing the logits. """ flat_val = feature_column.input_layer( features, feature_columns) # shape(batch_size, column_num * embedding_size) vals = gen_array_ops.reshape(flat_val, (-1, column_num, dimension), "interaction_embeddings") # sum-square-part summed_val = math_ops.reduce_sum(vals, 1) summed_square_val = math_ops.square(summed_val) # squre-sum-part...2 squared_val = math_ops.square(vals) squared_sum_val = math_ops.reduce_sum(squared_val, 1) # second order...3 logits = math_ops.reduce_sum( 0.5 * math_ops.subtract(summed_square_val, squared_sum_val), -1) return logits
def maxout(inputs, num_units, axis=None, outputs_collections=None, scope=None): """Adds a maxout op which is a max pooling performed in filter/channel dimension. This can also be used after fully-connected layers to reduce number of features. Args: inputs: A Tensor on which maxout will be performed num_units: Specifies how many features will remain after max pooling at the channel dimension. This must be multiple of number of channels. axis: The dimension where max pooling will be performed. Default is the last dimension. outputs_collections: The collections to which the outputs are added. scope: Optional scope for name_scope. Returns: A `Tensor` representing the results of the pooling operation. Raises: ValueError: if num_units is not multiple of number of features. """ with ops.name_scope(scope, 'MaxOut', [inputs]) as sc: inputs = ops.convert_to_tensor(inputs) shape = inputs.get_shape().as_list() if axis is None: # Assume that channel is the last dimension axis = -1 num_channels = shape[axis] if num_channels % num_units: raise ValueError('number of features({}) is not ' 'a multiple of num_units({})'.format( num_channels, num_units)) shape[axis] = -1 shape += [num_channels // num_units] outputs = math_ops.reduce_max(gen_array_ops.reshape(inputs, shape), -1, keep_dims=False) return utils.collect_named_outputs(outputs_collections, sc, outputs)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): gradients = [] # Number of stale gradients. stale_counter = variable_scope.get_variable( "stale_counter", [], initializer=init_ops.zeros_initializer(), trainable=False) def _AcceptGradientOp(): with ops.control_dependencies( [self._opt.apply_gradients( grads_and_vars, global_step=global_step, name=name)]): return gen_array_ops.identity(0.0) def _DropGradientOp(): return gen_array_ops.identity(1.0) for grad_and_var in grads_and_vars: grad = grad_and_var[0] if isinstance(grad, ops.Tensor): gradients.append(grad) else: gradients.append(grad.op) with ops.control_dependencies(gradients), ops.colocate_with(global_step): staleness = gen_array_ops.reshape( global_step - self._local_step, shape=()) conditional_update = stale_counter.assign_add(control_flow_ops.cond( gen_math_ops.less_equal(staleness, self._staleness), _AcceptGradientOp, _DropGradientOp)) summary.scalar( "Gradient staleness percentage", stale_counter / (math_ops.cast(global_step + 1, dtypes.float32))) return conditional_update
def call(self, inputs, state): (c, h), fast_weights = state batch_size = array_ops.shape(fast_weights)[0] add = math_ops.add multiply = math_ops.multiply sigmoid = math_ops.sigmoid scalar_mul = math_ops.scalar_mul # Parameters of gates are concatenated into one multiply for efficiency. gate_inputs = math_ops.matmul(array_ops.concat([inputs, h], 1), self._kernel) gate_inputs = nn_ops.bias_add(gate_inputs, self._bias) if self._use_layer_norm: gate_inputs = layers.layer_norm(gate_inputs) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=gate_inputs, num_or_size_splits=4, axis=1) fast_j = self._activation(j) expand_fast_j = array_ops.expand_dims(fast_j, 1) fast_weights = add( scalar_mul(self._fast_learning_rate, fast_weights), scalar_mul( self._fast_decay_rate, math_ops.matmul(array_ops.transpose(expand_fast_j, [0, 2, 1]), expand_fast_j))) fast_weights_j = math_ops.matmul( gen_array_ops.reshape(fast_j, [batch_size, 1, -1]), fast_weights) fast_weights_j = gen_array_ops.reshape(fast_weights_j, [batch_size, self._num_units]) fast_j = self._activation(add(fast_j, fast_weights_j)) # Note that using `add` and `multiply` instead of `+` and `*` gives a # performance improvement. So using those at the cost of readability. new_c = add(multiply(c, sigmoid(add(f, self._forget_bias))), multiply(sigmoid(i), fast_j)) if self._use_layer_norm: new_c = layers.layer_norm(new_c) new_h = multiply(self._activation(new_c), sigmoid(o)) return new_h, FastWeightsStateTuple( rnn_cell_impl.LSTMStateTuple(new_c, new_h), fast_weights)
def GetParams(self): """Testing Concatenation in TF-TRT conversion.""" dtype = dtypes.float32 input_name = "input" input_dims = [2, 3, 3, 1] output_name = "output" g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) # scale a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r1 = x / a a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r2 = a / x a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype) r3 = a + x a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype) r4 = x * a a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r5 = x - a a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r6 = a - x a = constant_op.constant(np.random.randn(3, 1), dtype=dtype) r7 = x - a a = constant_op.constant(np.random.randn(3, 1), dtype=dtype) r8 = a - x a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r9 = gen_math_ops.maximum(x, a) a = constant_op.constant(np.random.randn(3, 1), dtype=dtype) r10 = gen_math_ops.minimum(a, x) a = constant_op.constant(np.random.randn(3), dtype=dtype) r11 = x * a a = constant_op.constant(np.random.randn(1), dtype=dtype) r12 = a * x concat1 = array_ops.concat([r1, r2, r3, r4, r5, r6], axis=-1) concat2 = array_ops.concat([r7, r8, r9, r10, r11, r12], axis=3) x = array_ops.concat([concat1, concat2], axis=-1) gen_array_ops.reshape(x, [2, -1], name=output_name) return trt_test.TfTrtIntegrationTestParams(gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], output_names=[output_name], expected_output_dims=[ (2, 126) ])
def unflatten(x, shapes): arrays = [] start = 0 for shape in shapes: end = product(shape) arrays.append(gen_array_ops.reshape(x[start:start + end], shape)) start += end return arrays
def GraphFn(self, x): q = 2 * x + 1 q = array_ops.shape(q) q = gen_array_ops.reshape(q, [2, 2]) q = math_ops.cast(q, dtypes.float32) q = self.trt_incompatible_op(q) q = q * 2 + q * q return array_ops.identity(q, name="output_0")
def gather_along_second_axis(data, indices): ndims = len(data.get_shape().as_list()) shape = array_ops.shape(data) re_shape = [shape[0] * shape[1]] indices = array_ops.reshape(indices, re_shape) for idx in range(2, ndims): re_shape.append(shape[idx]) data = array_ops.reshape(data, re_shape) batch_offset = math_ops.range(0, array_ops.shape(data)[0]) flat_indices = array_ops.stack([batch_offset, indices], axis=1) two_d = gen_array_ops.gather_nd(data, flat_indices) if ndims == 4: three_d = gen_array_ops.reshape(two_d, [shape[0], shape[1], -1]) elif ndims == 3: three_d = gen_array_ops.reshape(two_d, [shape[0], shape[1]]) return three_d
def GetParams(self): """Testing Concatenation in TF-TRT conversion.""" dtype = dtypes.float32 input_name = "input" input_dims = [2, 3, 3, 1] g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) # scale a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r1 = x / a a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r2 = a / x a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype) r3 = a + x a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype) r4 = x * a a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r5 = x - a a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r6 = a - x a = constant_op.constant(np.random.randn(3, 1), dtype=dtype) r7 = x - a a = constant_op.constant(np.random.randn(3, 1), dtype=dtype) r8 = a - x a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype) r9 = gen_math_ops.maximum(x, a) a = constant_op.constant(np.random.randn(3, 1), dtype=dtype) r10 = gen_math_ops.minimum(a, x) a = constant_op.constant(np.random.randn(3), dtype=dtype) r11 = x * a a = constant_op.constant(np.random.randn(1), dtype=dtype) r12 = a * x concat1 = array_ops.concat([r1, r2, r3, r4, r5, r6], axis=-1) concat2 = array_ops.concat([r7, r8, r9, r10, r11, r12], axis=3) x = array_ops.concat([concat1, concat2], axis=-1) gen_array_ops.reshape(x, [2, -1], name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], expected_engines=["my_trt_op_0"], expected_output_dims=(2, 126), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def compute_gradient_Hs(self, y_pred, var_list=None, gate_gradients=Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None): """Computes the EKF linearize measurment matrix H for the variables in `var_list`. This is the first part of `minimize()`. It returns a list of (H, variable) pairs where "H" is the derivative of the measurment function h with respect to "variable" for "variable". Args: y_pred: The prediction of the network NOTE THAT THIS SHOULD BE A TENSOR AND NOT A SCALAR LIKE IN OTHER OPTIMIZERS. var_list: Optional list of `tf.Variable` to update to minimize `loss`. Defaults to the list of variables collected in the graph under the key `GraphKey.TRAINABLE_VARIABLES`. gate_gradients: How to gate the computation of gradients. Can be `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. aggregation_method: Specifies the method used to combine gradient terms. Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. Returns: A list of (H, variable) pairs. Variable is always present, but H can be `None`. Raises: TypeError: If `var_list` contains anything else than `Variable` objects. ValueError: If some arguments are invalid. """ if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH]: raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, " "Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" % gate_gradients) self._assert_valid_dtypes([y_pred]) if grad_loss is not None: self._assert_valid_dtypes([grad_loss]) if var_list is None: var_list = ( variables.trainable_variables() + ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) if not var_list: raise ValueError("No variables to optimize.") Hs = self.calc_H(gen_array_ops.reshape(y_pred, [-1]), var_list, self.y_dim, gate_gradients=(gate_gradients == Optimizer.GATE_OP), aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops) if gate_gradients == Optimizer.GATE_GRAPH: Hs = control_flow_ops.tuple(Hs) grads_and_vars = list(zip(Hs, var_list)) self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None]) return grads_and_vars
def conv2d_rnn_encoder(inputs, input_shape, filters, kernel_sizes, strides, activation, latent_hidden_sizes, latent_hidden_activation, rnn_hidden_sizes, rnn_cell_fn, scope=None, reuse=None): with variable_scope.variable_scope(scope, default_name=scope, reuse=reuse): inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] sequence_length = inputs_shape[1] stacked_inputs = gen_array_ops.reshape( inputs, [batch_size * sequence_length] + input_shape) x, encoder, shapes = conv2d_encoder( stacked_inputs, filters, kernel_sizes, strides, scope='encoder', activation=activation, reuse=reuse) output_shape = array_ops.shape(x) x = core.flatten(x) x_size = x.get_shape()[-1] x = gen_array_ops.reshape( x, [batch_size, sequence_length, x_size]) for hidden in latent_hidden_sizes: x = core.dense( x, hidden, activation=latent_hidden_activation) outputs, states, initial_state_phs, zero_states = stacked_rnn_impl.stacked_rnn( x, rnn_hidden_sizes, rnn_cell_fn, scope='stacked_rnn', reuse=reuse) return ( # TODO(wenkesj): make this less crazy outputs, states, initial_state_phs, zero_states, encoder, shapes, output_shape, [x_size] + latent_hidden_sizes)
def GetParams(self): """Testing conversion of BatchMatMul in TF-TRT conversion.""" dtype = dtypes.float32 input_name = "input" input_dims = [12, 5, 8, 12] w1_name = "matmul_w1" w1_dims = [12, 5, 12, 7] w2_name = "matmul_w2" w2_dims = [12, 12, 7] g = ops.Graph() with g.as_default(): inp = array_ops.placeholder(dtype=dtype, shape=[None] + input_dims[1:], name=input_name) w1 = array_ops.placeholder(dtype=dtype, shape=w1_dims, name=w1_name) w2 = array_ops.placeholder(dtype=dtype, shape=w2_dims, name=w2_name) with g.device("/GPU:0"): b = constant_op.constant(np.random.randn(12, 5, 12, 7), dtype=dtype) c = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype) d = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype) x1 = math_ops.matmul(inp, b) x1 = x1 + c x2 = math_ops.matmul(inp, w1) x2 = x2 * d e = gen_array_ops.reshape(inp, [12, 40, 12]) x3 = math_ops.matmul(e, w2) f = constant_op.constant(np.random.randn(40, 1), dtype=dtype) x3 = x3 + f x3 = gen_array_ops.reshape(x3, [12, 5, 8, 7]) out = x1 + x2 + x3 array_ops.squeeze(out, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name, w1_name, w2_name], input_dims=[input_dims, w1_dims, w2_dims], num_expected_engines=1, expected_output_dims=(12, 5, 8, 7), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def GraphFn(self, x1, x2): x = x1 q = math_ops.abs(x) q = q + 1.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = array_ops.squeeze(q, axis=-2) q = math_ops.abs(q) q = q + 2.2 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = array_ops.squeeze(q, axis=3) q = math_ops.abs(q) q = q + 3.0 a = gen_math_ops.reciprocal(q) # this chain of operations has a batch size of 5, which is different from # the batch size for the other operations. x = constant_op.constant(np.random.randn(5, 8, 12), dtype=x.dtype) q = math_ops.abs(x) q = q + 2.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 2.1 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 4.0 b = gen_math_ops.reciprocal(q) # TODO(jie): this one will break, broadcasting on batch. x = x2 q = math_ops.abs(x) q = q + 5.0 q = gen_math_ops.exp(q) q = array_ops.squeeze(q, axis=[-1, -2, 3]) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 5.1 q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12]) q = array_ops.squeeze(q, axis=[5, 2, 3]) q = gen_math_ops.sqrt(q) q = math_ops.abs(q) q = q + 5.2 q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 5.3 c = gen_math_ops.reciprocal(q) q = a * b q = q / c return array_ops.squeeze(q, name="output_0")
def minimize(self, y_target, y_pred, global_step=None, var_list=None, gate_gradients=optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """ Applies the EKF optimization using measurement y_target and prediction y_pred Args: y_target: The target tensor which we would like the trained network to output y_pred: The actual output Tensor of the network global_step: Optional `Variable` to increment by one after the variables have been updated. var_list: Optional list of `Variable` objects to update to minimize `loss`. Defaults to the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. gate_gradients: How to gate the computation of gradients. Can be `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. aggregation_method: Specifies the method used to combine gradient terms. Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. name: Optional name for the returned operation. grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. Returns: An Operation that updates the variables in `var_list`. If `global_step` was not `None`, that operation also increments `global_step`. Raises: ValueError: If some of the variables are not `Variable` objects. """ if var_list is None: var_list = ( variables.trainable_variables() + ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) real_grads = gradients.gradients(y_pred, var_list) filter_none_gs = [g for g in real_grads if g is not None] n_nones = len([g for g in real_grads if g is None]) logger.info("Number of identically zero gradients: {}".format(n_nones)) Hs_and_vars = self.compute_gradient_Hs( y_pred, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) vars_with_grad = [v for H, v in Hs_and_vars if H is not None] if not filter_none_gs: raise ValueError( "No gradients provided for any variable, check your graph for ops" " that do not support gradients, between variables %s and y_pred %s." % ([str(v) for _, v in Hs_and_vars], y_pred)) error = gen_array_ops.reshape(y_target-y_pred, [-1, 1], name="error") return self.apply_gradients(Hs_and_vars, error, global_step=global_step, name=name)
def prune_by_bbb(variable_metadata, percentage): """Prune a percentage of variables based on their signal to noise ratios. Arguments: variable_metadata: `list` of `bbb._VariableMetadata`, suggest using `bbb.get_variable_metadata()`. percentage: a `tf.Tensor` that is scalar representing what percentage of variables to prune. """ if not variable_metadata: return [] signal_to_noise_ratios = [] variable_estimates = [] variable_info = [] # get signal to noise and mean posterior for meta in variable_metadata: posterior_dist = meta.posterior signal_to_noise_ratios.append( array_utils.flatten( distribution_utils.signal_to_noise_ratio(posterior_dist))) variable_estimates.append(array_utils.flatten(meta.posterior_estimate)) variable_info.append((meta.raw_variable_name, meta.raw_variable_shape)) # flatten variables flat_variable_estimates = array_ops.concat(variable_estimates, 0) flat_signal_to_noise_ratios = array_ops.concat(signal_to_noise_ratios, 0) flat_variable_size = flat_variable_estimates.get_shape().as_list()[-1] flat_drop_size = math_ops.cast(flat_variable_size * percentage, dtypes.int32) # sort by signal to noise ratio _, indices = nn_ops.top_k(flat_signal_to_noise_ratios, k=flat_variable_size, sorted=True) zero_indices = array_ops.expand_dims(indices[:flat_drop_size], -1) mask = math_ops.cast( sparse_ops.sparse_to_dense(zero_indices, [flat_variable_size], sparse_values=0, default_value=1, validate_indices=False), flat_variable_estimates.dtype) flat_variable_estimates *= mask # unflatten variables start = 0 dsts = [] for name, shape in variable_info: end = array_utils.product(shape) dst = gen_array_ops.reshape(flat_variable_estimates[start:start + end], shape, name=name) dsts.append(dst) start += end return dsts
def GetParams(self): """Testing conversion of BatchMatMul in TF-TRT conversion.""" dtype = dtypes.float32 input_name = "input" input_dims = [12, 5, 8, 12] output_name = "output" w1_name = "matmul_w1" w1_dims = [12, 5, 12, 7] w2_name = "matmul_w2" w2_dims = [12, 12, 7] g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtype, shape=[None] + input_dims[1:], name=input_name) w1 = array_ops.placeholder(dtype=dtype, shape=w1_dims, name=w1_name) w2 = array_ops.placeholder(dtype=dtype, shape=w2_dims, name=w2_name) with g.device("/GPU:0"): b = constant_op.constant(np.random.randn(12, 5, 12, 7), dtype=dtype) x1 = math_ops.matmul(inp, b) c = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype) x1 = x1 + c x2 = math_ops.matmul(inp, w1) d = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype) x2 = x2 * d e = self.trt_incompatible_op(inp) e = gen_array_ops.reshape(e, [12, 40, 12]) x3 = math_ops.matmul(e, w2) f = constant_op.constant(np.random.randn(40, 1), dtype=dtype) x3 = x3 + f x3 = gen_array_ops.reshape(x3, [12, 5, 8, 7]) x3 = self.trt_incompatible_op(x3) out = x1 + x2 + x3 array_ops.squeeze(out, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(add_shapes=True), input_names=[input_name, w1_name, w2_name], input_dims=[[input_dims, w1_dims, w2_dims]], output_names=[output_name], expected_output_dims=[[[12, 5, 8, 7]]])
def alphas(shape, alpha_value, name=None): """Creates a tensor with all elements set to `alpha_value`. This operation returns a tensor of type `dtype` with shape `shape` and all elements set to alpha. Parameters ---------- shape: A list of integers, a tuple of integers, or a 1-D `Tensor` of type `int32`. The shape of the desired tensor alpha_value: `float32`, `float64`, `int8`, `uint8`, `int16`, `uint16`, int32`, `int64` The value used to fill the resulting `Tensor`. name: str A name for the operation (optional). Returns ------- A `Tensor` with all elements set to alpha. Examples -------- >>> tl.alphas([2, 3], tf.int32) # [[alpha, alpha, alpha], [alpha, alpha, alpha]] """ with ops.name_scope(name, "alphas", [shape]) as name: alpha_tensor = convert_to_tensor(alpha_value) alpha_dtype = dtypes.as_dtype(alpha_tensor.dtype).base_dtype if not isinstance(shape, ops.Tensor): try: shape = constant_op._tensor_shape_tensor_conversion_function( tensor_shape.TensorShape(shape)) except (TypeError, ValueError): shape = ops.convert_to_tensor(shape, dtype=dtypes.int32) if not shape._shape_tuple(): shape = reshape(shape, [-1]) # Ensure it's a vector try: output = constant(alpha_value, shape=shape, dtype=alpha_dtype, name=name) except (TypeError, ValueError): output = fill(shape, constant(alpha_value, dtype=alpha_dtype), name=name) if output.dtype.base_dtype != alpha_dtype: raise AssertionError("Dtypes do not corresponds: %s and %s" % (output.dtype.base_dtype, alpha_dtype)) return output
def GraphFn(self, inp, inp1, inp2): dtype = inp.dtype b = constant_op.constant(np.random.randn(12, 5, 12, 7), dtype=dtype) x1 = math_ops.matmul(inp, b) c = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype) x1 = x1 + c x2 = math_ops.matmul(inp, inp1) d = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype) x2 = x2 * d e = self.trt_incompatible_op(inp) e = gen_array_ops.reshape(e, [12, 40, 12]) x3 = math_ops.matmul(e, inp2) f = constant_op.constant(np.random.randn(40, 1), dtype=dtype) x3 = x3 + f x3 = gen_array_ops.reshape(x3, [12, 5, 8, 7]) x3 = self.trt_incompatible_op(x3) out = x1 + x2 + x3 return array_ops.squeeze(out, name="output_0")
def maxout(inputs, num_units, axis): inputs = ops.convert_to_tensor(inputs) shape = inputs.get_shape().as_list() num_channels = shape[axis] shape[axis] = -1 shape += [num_channels // num_units] for i in range(len(shape)): if shape[i] is None: shape[i] = gen_array_ops.shape(inputs)[i] outputs = math_ops.reduce_max(gen_array_ops.reshape(inputs, shape), -1, keep_dims=False) return outputs
def GetParams(self): """Testing conversion of BatchMatMul in TF-TRT conversion.""" dtype = dtypes.float32 input_name = "input" input_dims = [12, 5, 8, 12] w1_name = "matmul_w1" w1_dims = [12, 5, 12, 7] w2_name = "matmul_w2" w2_dims = [12, 12, 7] g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtype, shape=[None] + input_dims[1:], name=input_name) w1 = array_ops.placeholder(dtype=dtype, shape=w1_dims, name=w1_name) w2 = array_ops.placeholder(dtype=dtype, shape=w2_dims, name=w2_name) with g.device("/GPU:0"): b = constant_op.constant(np.random.randn(12, 5, 12, 7), dtype=dtype) c = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype) d = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype) x1 = math_ops.matmul(inp, b) x1 = x1 + c x2 = math_ops.matmul(inp, w1) x2 = x2 * d e = gen_array_ops.reshape(inp, [12, 40, 12]) x3 = math_ops.matmul(e, w2) f = constant_op.constant(np.random.randn(40, 1), dtype=dtype) x3 = x3 + f x3 = gen_array_ops.reshape(x3, [12, 5, 8, 7]) out = x1 + x2 + x3 array_ops.squeeze(out, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name, w1_name, w2_name], input_dims=[input_dims, w1_dims, w2_dims], expected_engines=["my_trt_op_0"], expected_output_dims=(12, 5, 8, 7), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def _tile_batch(tensor, multiplier): """ Core single-tensor implementation of tile_batch. """ tensor = ops.convert_to_tensor(tensor, name='t') shape_tensor = array_ops.shape(tensor) if tensor.shape.ndims is None: raise ValueError('tensor must have statically known rank') if tensor.shape.ndims == 0: # We can't tile scalars (e.g. time) return tensor tiling = [1] * (tensor.shape.ndims + 1) tiling[1] = multiplier tiled_static_batch_size = (tensor.shape[0].value * multiplier if tensor.shape[0].value is not None else None) tiled = gen_array_ops.tile(array_ops.expand_dims(tensor, 1), tiling) tiled = gen_array_ops.reshape(tiled, array_ops.concat(([shape_tensor[0] * multiplier], shape_tensor[1:]), 0)) tiled.set_shape(tensor_shape.TensorShape([tiled_static_batch_size]).concatenate(tensor.shape[1:])) return tiled
def _apply_dense(self, H, var, error): Q = self.get_slot(var, "Q") # Process noise P = self.get_slot(var, "P") # Covariance matrix S = self._Rt + math_ops.matmul(math_ops.matmul(H, P), H, transpose_b=True) Sinv = linalg_ops.matrix_inverse(S, name="Sinv") K = math_ops.matmul(math_ops.matmul(P, H, transpose_b=True), Sinv) #debugP = math_ops.trace(P)/math_ops.cast(gen_array_ops.shape(P)[0], dtype=np.float32) #debugK = math_ops.sqrt(math_ops.reduce_sum(math_ops.square(K))/math_ops.cast(gen_array_ops.shape(K)[1], dtype=np.float32)) #K = Print(K, [debugP, debugK], message="P, K : ") dW = math_ops.matmul(K, error) update_weights = state_ops.assign_add(var, gen_array_ops.reshape(dW, gen_array_ops.shape(var)), use_locking=self._use_locking) update_P = state_ops.assign_add(P, Q - math_ops.matmul(math_ops.matmul(K, S), K, transpose_b=True), use_locking=self._use_locking) return control_flow_ops.group(*[update_weights, update_P])
def alphas(shape, alpha_value, name=None): """Creates a tensor with all elements set to `alpha_value`. This operation returns a tensor of type `dtype` with shape `shape` and all elements set to alpha. Parameters ---------- shape: A list of integers, a tuple of integers, or a 1-D `Tensor` of type `int32`. The shape of the desired tensor alpha_value: `float32`, `float64`, `int8`, `uint8`, `int16`, `uint16`, int32`, `int64` The value used to fill the resulting `Tensor`. name: str A name for the operation (optional). Returns ------- A `Tensor` with all elements set to alpha. Examples -------- >>> tl.alphas([2, 3], tf.int32) # [[alpha, alpha, alpha], [alpha, alpha, alpha]] """ with ops.name_scope(name, "alphas", [shape]) as name: alpha_tensor = convert_to_tensor(alpha_value) alpha_dtype = dtypes.as_dtype(alpha_tensor.dtype).base_dtype if not isinstance(shape, ops.Tensor): try: shape = constant_op._tensor_shape_tensor_conversion_function(tensor_shape.TensorShape(shape)) except (TypeError, ValueError): shape = ops.convert_to_tensor(shape, dtype=dtypes.int32) if not shape._shape_tuple(): shape = reshape(shape, [-1]) # Ensure it's a vector try: output = constant(alpha_value, shape=shape, dtype=alpha_dtype, name=name) except (TypeError, ValueError): output = fill(shape, constant(alpha_value, dtype=alpha_dtype), name=name) if output.dtype.base_dtype != alpha_dtype: raise AssertionError("Dtypes do not corresponds: %s and %s" % (output.dtype.base_dtype, alpha_dtype)) return output
def call(self, inputs): inputs = ops.convert_to_tensor(inputs) shape = inputs.get_shape().as_list() num_channels = shape[self.axis] if num_channels % self.num_units: raise ValueError('number of features({}) is not ' 'a multiple of num_units({})' .format(num_channels, self.num_units)) shape[self.axis] = -1 shape += [num_channels // self.num_units] # Dealing with batches with arbitrary sizes for i in range(len(shape)): if shape[i] is None: shape[i] = gen_array_ops.shape(inputs)[i] outputs = math_ops.reduce_max(gen_array_ops.reshape(inputs, shape), -1, keep_dims=False) return outputs
def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None): """Return histogram of values. Given the tensor `values`, this operation returns a rank 1 histogram counting the number of entries in `values` that fell into every bin. The bins are equal width and determined by the arguments `value_range` and `nbins`. Args: values: Numeric `Tensor`. value_range: Shape [2] `Tensor` of same `dtype` as `values`. values <= value_range[0] will be mapped to hist[0], values >= value_range[1] will be mapped to hist[-1]. nbins: Scalar `int32 Tensor`. Number of histogram bins. dtype: dtype for returned histogram. name: A name for this operation (defaults to 'histogram'). Returns: A 1-D `Tensor` holding histogram of values. """ with ops.name_scope(name, 'histogram', [values, value_range, nbins]) as scope: values = ops.convert_to_tensor(values, name='values') values = gen_array_ops.reshape(values, [-1]) value_range = ops.convert_to_tensor(value_range, name='value_range') nbins = ops.convert_to_tensor(nbins, dtype=np.int32, name='nbins') nbins_float = math_ops.cast(nbins, values.dtype) # Map tensor values that fall within value_range to [0, 1]. scaled_values = math_ops.truediv( values - value_range[0], value_range[1] - value_range[0], name='scaled_values') # map tensor values within the open interval value_range to {0,.., nbins-1}, # values outside the open interval will be zero or less, or nbins or more. indices = math_ops.floor(nbins_float * scaled_values, name='indices') # Clip edge cases (e.g. value = value_range[1]) or "outliers." indices = math_ops.cast( clip_ops.clip_by_value(indices, 0, nbins_float - 1), np.int32) return math_ops.unsorted_segment_sum( array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope)
def GetParams(self): """Testing conversion of BiasAdd MatMul in TF-TRT conversion.""" input_name = "input" input_matrix_rows = 4 input_matrix_columns = 144 # Note that tf.nn.bias_add supports up to 5 dimensions. input_dims = [input_matrix_rows, input_matrix_columns] output_name = "output" g = ops.Graph() with g.as_default(): x = array_ops.placeholder( dtype=dtypes.float32, shape=input_dims, name=input_name) b = self._ConstOp((input_matrix_columns, 4)) x1 = math_ops.matmul(x, b) b = self._ConstOp((1, 4)) x1 = x1 + b b = self._ConstOp((input_matrix_rows, 144)) x2 = self.trt_incompatible_op(x) x2 = math_ops.matmul(x2, b, transpose_a=True) x2 = gen_array_ops.reshape(x2, [4, -1]) x2 = self.trt_incompatible_op(x2) b = self._ConstOp((4, input_matrix_columns)) x3 = math_ops.matmul(x, b, transpose_b=True) b = self._ConstOp((16, input_matrix_rows)) x4 = self.trt_incompatible_op(x) x4 = math_ops.matmul(x4, b, transpose_b=True, transpose_a=True) x4 = gen_array_ops.reshape(x4, [4, -1]) x4 = self.trt_incompatible_op(x4) b = self._ConstOp((input_matrix_columns, 48)) x5 = math_ops.matmul(x, b) b = self._ConstOp((48,)) x5 = nn.bias_add(x5, b) x5 = gen_array_ops.reshape(x5, [4, -1]) x6 = gen_array_ops.reshape(x, [4, 24, 6]) b = self._ConstOp((6,)) x6 = nn.bias_add(x6, b, data_format="NHWC") x6 = gen_array_ops.reshape(x6, [4, -1]) x7 = gen_array_ops.reshape(x, [4, 12, 4, 3]) b = self._ConstOp((3,)) x7 = nn.bias_add(x7, b, data_format="NHWC") x7 = gen_array_ops.reshape(x7, [4, -1]) x8 = gen_array_ops.reshape(x, [4, 4, 3, 2, 6]) b = self._ConstOp((6,)) x8 = nn.bias_add(x8, b, data_format="NHWC") x8 = gen_array_ops.reshape(x8, [4, -1]) x9 = gen_array_ops.reshape(x, [4, 12, 3, 2, 2]) b = self._ConstOp((12,)) x9 = nn.bias_add(x9, b, data_format="NCHW") x9 = gen_array_ops.reshape(x9, [4, -1]) x10 = gen_array_ops.reshape(x, [4, 3, 4, 12]) b = self._ConstOp((3,)) x10 = nn.bias_add(x10, b, data_format="NCHW") x10 = gen_array_ops.reshape(x10, [4, -1]) x11 = gen_array_ops.reshape(x, [4, 6, 24]) b = self._ConstOp((6,)) x11 = nn.bias_add(x11, b, data_format="NCHW") x11 = gen_array_ops.reshape(x11, [4, -1]) out = array_ops.concat([x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11], axis=-1) out = array_ops.squeeze(out, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[[input_dims]], output_names=[output_name], expected_output_dims=[[[4, 6680]]])
def GetParams(self): """Tests for scale & elementwise layers in TF-TRT.""" dtype = dtypes.float32 input_name = "input" input_dims = [10, 24, 24, 20] g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) # scale a = constant_op.constant(np.random.randn(1), dtype=dtype) f = x + a x = math_ops.sigmoid(f) # scale a = constant_op.constant(np.random.randn(1), dtype=dtype) f = a + x x = math_ops.sigmoid(f) # scale a = constant_op.constant(np.random.randn(24, 1, 1), dtype=dtype) f = x + a x = math_ops.sigmoid(f) # scale a = constant_op.constant(np.random.randn(24, 1, 1), dtype=dtype) f = a + x x = math_ops.sigmoid(f) # scale a = constant_op.constant(np.random.randn(24, 24, 20), dtype=dtype) f = a + x x = math_ops.sigmoid(f) # scale a = constant_op.constant(np.random.randn(24, 24, 20), dtype=dtype) f = x + a x = math_ops.sigmoid(f) # elementwise a = constant_op.constant(np.random.randn(20), dtype=dtype) f = x + a x = math_ops.sigmoid(f) # elementwise a = constant_op.constant(np.random.randn(20), dtype=dtype) f = a + x x = math_ops.sigmoid(f) # elementwise a = constant_op.constant(np.random.randn(1, 24, 1, 1), dtype=dtype) f = a + x x = math_ops.sigmoid(f) # elementwise a = constant_op.constant(np.random.randn(1, 24, 1, 1), dtype=dtype) f = x + a x = math_ops.sigmoid(f) # elementwise a = constant_op.constant(np.random.randn(1, 24, 24, 1), dtype=dtype) f = a + x x = math_ops.sigmoid(f) # elementwise a = constant_op.constant(np.random.randn(1, 24, 24, 1), dtype=dtype) f = x + a x = math_ops.sigmoid(f) # elementwise a = constant_op.constant(np.random.randn(1, 24, 24, 20), dtype=dtype) f = a + x x = math_ops.sigmoid(f) # elementwise a = constant_op.constant(np.random.randn(1, 24, 24, 20), dtype=dtype) f = x + a x = math_ops.sigmoid(f) # elementwise a = constant_op.constant(np.random.randn(24, 20), dtype=dtype) f = a + x x = math_ops.sigmoid(f) # elementwise a = constant_op.constant(np.random.randn(24, 20), dtype=dtype) f = x + a x = math_ops.sigmoid(f) gen_array_ops.reshape(x, [5, -1], name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], num_expected_engines=16, expected_output_dims=(5, 23040), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def GetParams(self): """Testing conversion of BiasAdd MatMul in TF-TRT conversion.""" dtype = dtypes.float32 input_name = "input" input_dims = [48, 12] g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) b = constant_op.constant(np.random.randn(12, 4), dtype=dtype) x1 = math_ops.matmul(x, b) b = constant_op.constant(np.random.randn(1, 4), dtype=dtype) x1 = x1 + b b = constant_op.constant(np.random.randn(48, 4), dtype=dtype) x2 = math_ops.matmul(x, b, transpose_a=True) x2 = gen_array_ops.reshape(x2, [48, 1]) b = constant_op.constant(np.random.randn(4, 12), dtype=dtype) x3 = math_ops.matmul(x, b, transpose_b=True) b = constant_op.constant(np.random.randn(16, 48), dtype=dtype) x4 = math_ops.matmul(x, b, transpose_b=True, transpose_a=True) x4 = gen_array_ops.reshape(x4, [48, 4]) x5 = gen_array_ops.reshape(x, [4, 144]) b = constant_op.constant(np.random.randn(144, 48), dtype=dtype) x5 = math_ops.matmul(x5, b) b = constant_op.constant(np.random.randn(48), dtype=dtype) x5 = nn.bias_add(x5, b) x5 = gen_array_ops.reshape(x5, [48, 4]) x6 = gen_array_ops.reshape(x, [4, 12, 12]) b = constant_op.constant(np.random.randn(12), dtype=dtype) x6 = nn.bias_add(x6, b, data_format="NHWC") x6 = gen_array_ops.reshape(x6, [48, -1]) x7 = gen_array_ops.reshape(x, [4, 12, 3, 4]) b = constant_op.constant(np.random.randn(4), dtype=dtype) x7 = nn.bias_add(x7, b, data_format="NHWC") x7 = gen_array_ops.reshape(x7, [48, -1]) x8 = gen_array_ops.reshape(x, [4, 12, 3, 2, 2]) b = constant_op.constant(np.random.randn(2), dtype=dtype) x8 = nn.bias_add(x8, b, data_format="NHWC") x8 = gen_array_ops.reshape(x8, [48, -1]) x9 = gen_array_ops.reshape(x, [4, 12, 3, 2, 2]) b = constant_op.constant(np.random.randn(3), dtype=dtype) x9 = nn.bias_add(x9, b, data_format="NCHW") x9 = gen_array_ops.reshape(x9, [48, -1]) x10 = gen_array_ops.reshape(x, [4, 12, 3, 4]) b = constant_op.constant(np.random.randn(12), dtype=dtype) x10 = nn.bias_add(x10, b, data_format="NCHW") x10 = gen_array_ops.reshape(x10, [48, -1]) x11 = gen_array_ops.reshape(x, [4, 12, 12]) b = constant_op.constant(np.random.randn(4), dtype=dtype) x11 = nn.bias_add(x11, b, data_format="NCHW") x11 = gen_array_ops.reshape(x11, [48, -1]) out = array_ops.concat( [x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11], axis=-1) out = array_ops.squeeze(out, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], expected_engines=[ "my_trt_op_0", "my_trt_op_1", "my_trt_op_2", "my_trt_op_3", "my_trt_op_4", "my_trt_op_5", "my_trt_op_6" ], expected_output_dims=(48, 89), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def GetParams(self): """Test for unary operations in TF-TRT.""" dtype = dtypes.float32 input_name = "input" input_dims = [12, 5, 8, 1, 1, 12] input2_name = "input_2" input2_dims = [12, 5, 8, 1, 12, 1, 1] g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) q = math_ops.abs(x) q = q + 1.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = array_ops.squeeze(q, axis=-2) q = math_ops.abs(q) q = q + 2.2 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = array_ops.squeeze(q, axis=3) q = math_ops.abs(q) q = q + 3.0 a = gen_math_ops.reciprocal(q) x = constant_op.constant(np.random.randn(5, 8, 12), dtype=dtype) q = math_ops.abs(x) q = q + 2.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 2.1 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 4.0 b = gen_math_ops.reciprocal(q) # TODO(jie): this one will break, broadcasting on batch. x = array_ops.placeholder( dtype=dtype, shape=input2_dims, name=input2_name) q = math_ops.abs(x) q = q + 5.0 q = gen_math_ops.exp(q) q = array_ops.squeeze(q, axis=[-1, -2, 3]) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 5.1 q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12]) q = array_ops.squeeze(q, axis=[5, 2, 3]) q = gen_math_ops.sqrt(q) q = math_ops.abs(q) q = q + 5.2 q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 5.3 c = gen_math_ops.reciprocal(q) q = a * b q = q / c array_ops.squeeze(q, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name, input2_name], input_dims=[input_dims, input2_dims], num_expected_engines=5, expected_output_dims=(12, 5, 8, 12), allclose_atol=1.e-03, allclose_rtol=1.e-03)