def clip_boxes(self, boxes): """Clip boxes to fit in an image.""" ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) ymin = tf.clip_by_value(ymin, 0, self._output_size[0] - 1) xmin = tf.clip_by_value(xmin, 0, self._output_size[1] - 1) ymax = tf.clip_by_value(ymax, 0, self._output_size[0] - 1) xmax = tf.clip_by_value(xmax, 0, self._output_size[1] - 1) boxes = tf.stack([ymin, xmin, ymax, xmax], axis=1) return boxes
def _refine_motion_field(motion_field, layer): """Refines a motion field using features from another layer. This function builds an element of a UNet-like architecture. `motion_field` has a lower spatial resolution than `layer`. First motion_field is resized to `layer`'s spatial resolution using bilinear interpolation, then convolutional filters are applied on `layer` and the result is added to the upscaled `motion_field`. This scheme is inspired by FlowNet (https://arxiv.org/abs/1504.06852), and the realization that keeping the bottenecks at the same (low) dimension as the motion field will pressure the network to gradually transfer details from depth channels to space. The specifics are slightly different form FlowNet: We use two parallel towers, a 3x3 convolution, and two successive 3x3 convolutions, as opposed to one 3x3 convolution in FLowNet. Also, we add the result to the upscaled `motion_field`, forming a residual connection, unlike FlowNet. These changes seemed to improve the depth prediction metrics, but exploration was far from exhaustive. Args: motion_field: a tf.Tensor of shape [B, h1, w1, m]. m is the number of dimensions in the motion field, for example, 3 in case of a 3D translation field. layer: tf.Tensor of shape [B, h2, w2, c]. Returns: A tf.Tensor of shape [B, h2, w2, m], obtained by upscaling motion_field to h2, w2, and mixing it with layer using a few convolutions. """ _, h, w, _ = tf.unstack(tf.shape(layer)) upsampled_motion_field = tf.image.resize_bilinear(motion_field, [h, w]) conv_input = tf.concat([upsampled_motion_field, layer], axis=3) conv_output = layers.conv2d(conv_input, max(4, layer.shape.as_list()[-1]), [3, 3], stride=1) conv_input = layers.conv2d(conv_input, max(4, layer.shape.as_list()[-1]), [3, 3], stride=1) conv_output2 = layers.conv2d(conv_input, max(4, layer.shape.as_list()[-1]), [3, 3], stride=1) conv_output = tf.concat([conv_output, conv_output2], axis=-1) return upsampled_motion_field + layers.conv2d( conv_output, motion_field.shape.as_list()[-1], [1, 1], stride=1, activation_fn=None, biases_initializer=None, scope=layer.op.name + '/MotionBottleneck')
def unroll(self, actions, env_outputs, core_state): """Manual implementation of the network unroll.""" _, _, done, _ = env_outputs torso_outputs = snt.BatchApply(self._torso)((actions, env_outputs)) # Note, in this implementation we can't use CuDNN RNN to speed things up due # to the state reset. This can be XLA-compiled (LSTMBlockCell needs to be # changed to implement snt.LSTMCell). initial_core_state = self._core.zero_state(tf.shape(actions)[1], tf.float32) core_output_list = [] for input_, d in zip(tf.unstack(torso_outputs), tf.unstack(done)): # If the episode ended, the core state should be reset before the next. core_state = nest.map_structure( functools.partial(tf.where, d), initial_core_state, core_state) core_output, core_state = self._core(input_, core_state) core_output_list.append(core_output) return snt.BatchApply(self._head)(tf.stack(core_output_list)), core_state
def buildModel(self, rnn_layer, is_dynamic_rnn): """Build Mnist recognition model. Args: rnn_layer: The rnn layer either a single rnn cell or a multi rnn cell. is_dynamic_rnn: Use dynamic_rnn or not. Returns: A tuple containing: - Input tensor of the model. - Prediction tensor of the model. - Output class tensor of the model. """ # Weights and biases for output softmax layer. out_weights = tf.Variable( tf.random.normal([self.num_units, self.n_classes])) out_bias = tf.Variable(tf.random.normal([self.n_classes])) # input image placeholder x = tf.compat.v1.placeholder("float", [None, self.time_steps, self.n_input], name="INPUT_IMAGE") # x is shaped [batch_size,time_steps,num_inputs] if is_dynamic_rnn: rnn_input = tf.transpose(x, perm=[1, 0, 2]) outputs, _ = tf.compat.v1.lite.experimental.nn.dynamic_rnn( rnn_layer, rnn_input, dtype="float32") outputs = tf.unstack(outputs, axis=0) else: rnn_input = tf.unstack(x, self.time_steps, 1) outputs, _ = tf.compat.v1.nn.static_rnn(rnn_layer, rnn_input, dtype="float32") # Compute logits by multiplying outputs[-1] of shape [batch_size,num_units] # by the softmax layer's out_weight of shape [num_units,n_classes] # plus out_bias prediction = tf.matmul(outputs[-1], out_weights) + out_bias output_class = tf.nn.softmax(prediction, name="OUTPUT_CLASS") return x, prediction, output_class
def interpolate(features, hparams, decode_hp): """Interpolate between the first input frame and last target frame. Args: features: dict of tensors hparams: HParams, training hparams. decode_hp: HParams, decode hparams. Returns: images: interpolated images, 4-D Tensor, shape=(num_interp, H, W, C) first_frame: image, 3-D Tensor, shape=(1, H, W, C) last_frame: image, 3-D Tensor, shape=(1, H, W, C) """ inputs, targets = features["inputs"], features["targets"] inputs = tf.unstack(inputs, axis=1) targets = tf.unstack(targets, axis=1) coeffs = np.linspace(0.0, 1.0, decode_hp.num_interp) # (X_1, X_t) -> (z_1, z_t) first_frame, last_frame = inputs[0], targets[-1] first_top_z, first_level_eps = frame_to_latents(first_frame, hparams) last_top_z, last_level_eps = frame_to_latents(last_frame, hparams) # Interpolate latents at all levels. first_lats = first_level_eps + [first_top_z] last_lats = last_level_eps + [last_top_z] interp_lats = [] lat_iterator = enumerate(zip(first_lats, last_lats)) for level_ind, (first_lat, last_lat) in lat_iterator: if level_ind in decode_hp.level_interp: if decode_hp.channel_interp == "all": interp_lat = glow_ops.linear_interpolate( first_lat, last_lat, coeffs) else: interp_lat = glow_ops.linear_interpolate_rank( first_lat, last_lat, coeffs, decode_hp.rank_interp) else: interp_lat = tf.tile(first_lat, [decode_hp.num_interp, 1, 1, 1]) interp_lats.append(interp_lat) level_eps_interp = interp_lats[:hparams.n_levels - 1] z_top_interp = interp_lats[-1] images = latents_to_frames(z_top_interp, level_eps_interp, hparams) return images, first_frame, last_frame
def _using_motion_vector(depth, translation, rotation_angles, intrinsic_mat): """A helper for using_motion_vector. See docstring therein.""" if translation.shape.ndims not in (2, 4): raise ValueError('\'translation\' should have rank 2 or 4, not %d' % translation.shape.ndims) if translation.shape[-1] != 3: raise ValueError('translation\'s last dimension should be 3, not %d' % translation.shape[1]) if translation.shape.ndims == 2: translation = tf.expand_dims(tf.expand_dims(translation, 1), 1) _, height, width = tf.unstack(tf.shape(depth)) grid = tf.squeeze( tf.stack(tf.meshgrid(tf.range(width), tf.range(height), (1,))), axis=3) grid = tf.to_float(grid) intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat) rot_mat = transform_utils.matrix_from_angles(rotation_angles) # We have to treat separately the case of a per-image rotation vector and a # per-image rotation field, because the broadcasting capabilities of einsum # are limited. if rotation_angles.shape.ndims == 2: # The calculation here is identical to the one in inverse_warp above. # Howeverwe use einsum for better clarity. Under the hood, einsum performs # the reshaping and invocation of BatchMatMul, instead of doing it manually, # as in inverse_warp. projected_rotation = tf.einsum('bij,bjk,bkl->bil', intrinsic_mat, rot_mat, intrinsic_mat_inv) pcoords = tf.einsum('bij,jhw,bhw->bihw', projected_rotation, grid, depth) elif rotation_angles.shape.ndims == 4: # We push the H and W dimensions to the end, and transpose the rotation # matrix elements (as noted above). rot_mat = tf.transpose(rot_mat, [0, 3, 4, 1, 2]) projected_rotation = tf.einsum('bij,bjkhw,bkl->bilhw', intrinsic_mat, rot_mat, intrinsic_mat_inv) pcoords = tf.einsum('bijhw,jhw,bhw->bihw', projected_rotation, grid, depth) projected_translation = tf.einsum('bij,bhwj->bihw', intrinsic_mat, translation) pcoords += projected_translation x, y, z = tf.unstack(pcoords, axis=1) return x / z, y / z, z
def unit(hidden_memory_tuple): hidden_state, c_prev = tf.unstack(hidden_memory_tuple) # hidden_state : batch x hidden_dim for j in range(len(self.Wbo_list) // 2 - 1): hidden_state = tf.nn.relu( tf.nn.xw_plus_b(hidden_state, self.Wbo_list[2 * j], self.Wbo_list[2 * j + 1])) logits = tf.nn.xw_plus_b(hidden_state, self.Wbo_list[-2], self.Wbo_list[-1]) return logits
def unit(hidden_memory_tuple): hidden_state, c_prev = tf.unstack(hidden_memory_tuple) hidden_state = tf.nn.relu(tf.nn.xw_plus_b(hidden_state, self.Wbo_list[0], self.Wbo_list[1])) for i in range(num_highway): tran = tf.nn.relu(tf.nn.xw_plus_b(hidden_state, self.Wbo_list[2 + 4 * i], self.Wbo_list[3 + 4 * i])) gate = tf.nn.sigmoid(tf.nn.xw_plus_b(hidden_state, self.Wbo_list[4 + 4 * i], self.Wbo_list[5 + 4 * i])) hidden_state = tran * gate + (1. - gate) * hidden_state rewards = tf.nn.sigmoid(tf.nn.xw_plus_b(hidden_state, self.Wbo_list[-2], self.Wbo_list[-1])) return rewards
def _lstm(x, prev_c, prev_h, w_lstm, layer_masks): """Multi-layer LSTM. Args: x: [batch_size, num_steps, hidden_size]. prev_c: [[batch_size, hidden_size] * num_layers]. prev_h: [[batch_size, hidden_size] * num_layers]. w_lstm: [[2 * hidden_size, 4 * hidden_size] * num_layers]. layer_masks: [([hidden_size, hidden_size] or None)* num_layers]. Returns: next_c: [[batch_size, hidden_size] * num_layers]. next_h: [[batch_size, hidden_size] * num_layers]. all_h: [batch_size, num_steps, hidden_size]. """ _, num_steps, _ = tf.unstack(tf.shape(x)) num_layers = len(w_lstm) all_h = [ tf.TensorArray(dtype=tf.float32, size=num_steps, infer_shape=False) for _ in range(num_layers) ] def _condition(step, *unused_args): return tf.less(step, num_steps) def _body(step, pprev_c, pprev_h, all_h): """Apply LSTM at each step.""" next_c, next_h = [], [] for layer_id, (p_c, p_h, w, m) in enumerate( zip(pprev_c, pprev_h, w_lstm, layer_masks)): inp = x[:, step, :] if layer_id == 0 else next_h[-1] if m is not None: inp *= m ifog = tf.matmul(tf.concat([inp, p_h], axis=1), w) i, f, o, g = tf.split(ifog, 4, axis=1) i = tf.sigmoid(i) f = tf.sigmoid(f) o = tf.sigmoid(o) g = tf.tanh(g) c = i * g + f * p_c h = o * tf.tanh(c) all_h[layer_id] = all_h[layer_id].write(step, h) next_c.append(c) next_h.append(h) return step + 1, next_c, next_h, all_h loop_inps = [tf.constant(0, dtype=tf.int32), prev_c, prev_h, all_h] _, next_c, next_h, all_h = tf.while_loop(_condition, _body, loop_inps, parallel_iterations=1) all_h = [tf.transpose(h.stack(), [1, 0, 2]) for h in all_h] return next_c, next_h, all_h
def transformCropImage(opt, image, pMtrx): with tf.name_scope("transformImage"): refMtrx = tf.tile(tf.expand_dims(opt.refMtrx_b, axis=0), [opt.batchSize, 1, 1]) transMtrx = tf.matmul(refMtrx, pMtrx) # warp the canonical coordinates X, Y = np.meshgrid(np.linspace(-1, 1, opt.W), np.linspace(-1, 1, opt.H)) X, Y = X.flatten(), Y.flatten() XYhom = np.stack([X, Y, np.ones_like(X)], axis=1).T XYhom = np.tile(XYhom, [opt.batchSize, 1, 1]).astype(np.float32) XYwarpHom = tf.matmul(transMtrx, XYhom) XwarpHom, YwarpHom, ZwarpHom = tf.unstack(XYwarpHom, axis=1) Xwarp = tf.reshape(XwarpHom / (ZwarpHom + 1e-8), [opt.batchSize, opt.H, opt.W]) Ywarp = tf.reshape(YwarpHom / (ZwarpHom + 1e-8), [opt.batchSize, opt.H, opt.W]) # get the integer sampling coordinates Xfloor, Xceil = tf.floor(Xwarp), tf.ceil(Xwarp) Yfloor, Yceil = tf.floor(Ywarp), tf.ceil(Ywarp) XfloorInt, XceilInt = tf.to_int32(Xfloor), tf.to_int32(Xceil) YfloorInt, YceilInt = tf.to_int32(Yfloor), tf.to_int32(Yceil) imageIdx = np.tile( np.arange(opt.batchSize).reshape([opt.batchSize, 1, 1]), [1, opt.H, opt.W]) imageVec = tf.reshape(image, [-1, 3]) imageVecOut = tf.concat([imageVec, tf.zeros([1, 3])], axis=0) idxUL = (imageIdx * opt.dataH + YfloorInt) * opt.dataW + XfloorInt idxUR = (imageIdx * opt.dataH + YfloorInt) * opt.dataW + XceilInt idxBL = (imageIdx * opt.dataH + YceilInt) * opt.dataW + XfloorInt idxBR = (imageIdx * opt.dataH + YceilInt) * opt.dataW + XceilInt idxOutside = tf.fill([opt.batchSize, opt.H, opt.W], opt.batchSize * opt.dataH * opt.dataW) def insideIm(Xint, Yint): return (Xint >= 0) & (Xint < opt.dataW) & (Yint >= 0) & (Yint < opt.dataH) idxUL = tf.where(insideIm(XfloorInt, YfloorInt), idxUL, idxOutside) idxUR = tf.where(insideIm(XceilInt, YfloorInt), idxUR, idxOutside) idxBL = tf.where(insideIm(XfloorInt, YceilInt), idxBL, idxOutside) idxBR = tf.where(insideIm(XceilInt, YceilInt), idxBR, idxOutside) # bilinear interpolation Xratio = tf.reshape(Xwarp - Xfloor, [opt.batchSize, opt.H, opt.W, 1]) Yratio = tf.reshape(Ywarp - Yfloor, [opt.batchSize, opt.H, opt.W, 1]) imageUL = tf.to_float(tf.gather(imageVecOut, idxUL)) * (1 - Xratio) * (1 - Yratio) imageUR = tf.to_float(tf.gather(imageVecOut, idxUR)) * (Xratio) * (1 - Yratio) imageBL = tf.to_float(tf.gather(imageVecOut, idxBL)) * (1 - Xratio) * (Yratio) imageBR = tf.to_float(tf.gather(imageVecOut, idxBR)) * (Xratio) * (Yratio) imageWarp = imageUL + imageUR + imageBL + imageBR return imageWarp
def time_distributed(incoming, fn, args=None, scope=None): """ Time Distributed. This layer applies a function to every timestep of the input tensor. The custom function first argument must be the input tensor at every timestep. Additional parameters for the custom function may be specified in 'args' argument (as a list). Examples: ```python # Applying a fully_connected layer at every timestep x = time_distributed(input_tensor, fully_connected, [64]) # Using a conv layer at every timestep with a scope x = time_distributed(input_tensor, conv_2d, [64, 3], scope='tconv') ``` Input: (3+)-D Tensor [samples, timestep, input_dim]. Output: (3+)-D Tensor [samples, timestep, output_dim]. Arguments: incoming: `Tensor`. The incoming tensor. fn: `function`. A function to apply at every timestep. This function first parameter must be the input tensor per timestep. Additional parameters may be specified in 'args' argument. args: `list`. A list of parameters to use with the provided function. scope: `str`. A scope to give to each timestep tensor. Useful when sharing weights. Each timestep tensor scope will be generated as 'scope'-'i' where i represents the timestep id. Note that your custom function will be required to have a 'scope' parameter. Returns: A Tensor. """ if not args: args = list() assert isinstance(args, list), "'args' must be a list." if not isinstance(incoming, tf.Tensor): incoming = tf.transpose(tf.stack(incoming), [1, 0, 2]) input_shape = utils.get_incoming_shape(incoming) timestep = input_shape[1] x = tf.unstack(incoming, axis=1) if scope: x = [fn(x[i], scope=scope+'-'+str(i), *args) for i in range(timestep)] else: x = [fn(x[i], *args) for i in range(timestep)] x = list(map(lambda t: tf.reshape(t, [-1, 1]+utils.get_incoming_shape(t)[1:]), x)) return tf.concat(x, 1)
def _match_templates(net_z, net_x, params_names_list, params_values_list): # finalize network # z, x are [B, H, W, C] net_z = tf.transpose(net_z, perm=[1, 2, 0, 3]) net_x = tf.transpose(net_x, perm=[1, 2, 0, 3]) # z, x are [H, W, B, C] Hz, Wz, B, C = tf.unstack(tf.shape(net_z)) Hx, Wx, Bx, Cx = tf.unstack(tf.shape(net_x)) # assert B==Bx, ('Z and X should have same Batch size') # assert C==Cx, ('Z and X should have same Channels number') net_z = tf.reshape(net_z, (Hz, Wz, B * C, 1)) net_x = tf.reshape(net_x, (1, Hx, Wx, B * C)) net_final = tf.nn.depthwise_conv2d(net_x, net_z, strides=[1, 1, 1, 1], padding='VALID') #candidates candidates = net_final # final is [1, Hf, Wf, BC] net_final = tf.concat(tf.split(net_final, 3, axis=3), axis=0) # final is [B, Hf, Wf, C] net_final = tf.expand_dims(tf.reduce_sum(net_final, axis=3), axis=3) # final is [B, Hf, Wf, 1] if _bnorm_adjust: bn_beta = params_values_list[params_names_list.index('fin_adjust_bnb')] bn_gamma = params_values_list[params_names_list.index( 'fin_adjust_bnm')] bn_moments = params_values_list[params_names_list.index( 'fin_adjust_bnx')] bn_moving_mean = bn_moments[:, 0] bn_moving_variance = bn_moments[:, 1]**2 net_final = tf.layers.batch_normalization( net_final, beta_initializer=tf.constant_initializer(bn_beta), gamma_initializer=tf.constant_initializer(bn_gamma), moving_mean_initializer=tf.constant_initializer(bn_moving_mean), moving_variance_initializer=tf.constant_initializer( bn_moving_variance), training=False, trainable=False) return net_final, candidates
def _make_random_serialized_jpeg_images(self, num_frames, image_height, image_width): images = tf.cast(tf.random.uniform( [num_frames, image_height, image_width, 3], maxval=256, dtype=tf.int32), dtype=tf.uint8) images_list = tf.unstack(images, axis=0) encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list] with tf.Session() as sess: encoded_images = sess.run(encoded_images_list) return encoded_images
def get_RNN_from_words(model, word_idxs, reuse, scope=None): with variable_scope.variable_scope(scope or 'RNN_abstraction', reuse=reuse): # get mean word vectors word_vecs = tf.nn.embedding_lookup(model.word_emb, word_idxs) cell = tf.contrib.rnn.GRUCell(model.embed_size) encoder_outputs, encoder_state = tf.nn.static_rnn(cell, tf.unstack(word_vecs, axis=1), dtype=dtypes.float32) return encoder_state, [word_vecs]
def concat_tensor_display(tensor, axisToConcat, axisToSplit=3): tensors_list = tf.unstack( tensor, axis=axisToSplit) #4 * [batch, 256,256,3] Might need to use split #if tensors_list[0].get_shape()[1] == 1: # tensors_list = [tf.squeeze (tensor, axis = 1) for tensor in tensors_list] tensors = tf.concat(tensors_list, axis=axisToConcat) #[batch, 256, 256 * 4, 3] return tensors
def graph_fn(): images = tf.cast(tf.random.uniform( [num_frames, image_height, image_width, 3], maxval=256, dtype=tf.int32), dtype=tf.uint8) images_list = tf.unstack(images, axis=0) encoded_images = [ tf.io.encode_jpeg(image) for image in images_list ] return encoded_images
def tensormul(t1, t2): """Basically matmul, but t1 can have more dimensions than t2.""" dim1 = t1.get_shape().as_list()[-1] dim2 = t2.get_shape().as_list()[-1] result_shape_tensors = tf.unstack(tf.shape(t1)) result_shape_tensors[-1] = dim2 result_shape_tensor = tf.stack(result_shape_tensors) t1 = tf.reshape(t1, [-1, dim1]) result = tf.matmul(t1, t2) result = tf.reshape(result, result_shape_tensors) return result
def RNN(X, weights, biases):#输入,权重,偏置 X = tf.reshape(X, [-1, 28])#(128 * 28, 28 ) X_in = tf.matmul(X, weights['in']) + biases['in']#(128 * 28, 128 ) X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])#(128 ,28, 128 )#时间序列 cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True) init_state = cell.zero_state(batch_size=batch_size, dtype=tf.float32) outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state, time_major=False)#time_major代表时间序列 outputs = tf.unstack(tf.transpose(outputs, [1, 0, 2])) results = tf.matmul(outputs[-1], weights['out']) + biases['out'] #results = tf.matmul(final_state[1], weights['out']) + biases['out']#应该是仅仅考虑了短期记忆 return results
def host_call_fn(step, scalar_values): values = tf.unstack(scalar_values) with tf2.summary.create_file_writer( params['checkpoint_dir']).as_default(): with tf2.summary.record_if( tf.math.equal(step[0] % params['tpu_iterations_per_loop'], 0)): for key, value in zip(list(tensorboard_scalars.keys()), values): tf2.summary.scalar(key, value, step=step[0]) return tf.summary.all_v2_summary_ops()
def crop_image(image_bytes, label): """Preprocess the image.""" shape = tf.shape(image_bytes) bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box( shape, bbox, min_object_covered=0.1, aspect_ratio_range=(0.75, 1.33), area_range=(0.08, 1.0), max_attempts=10, use_image_if_no_bounding_boxes=True) offset_y, offset_x, _ = tf.unstack(bbox_begin) target_height, target_width, _ = tf.unstack(bbox_size) image = tf.image.crop_to_bounding_box(image_bytes, offset_y, offset_x, target_height, target_width) image = tf.image.resize_bicubic([image], [image_size, image_size])[0] image = tf.image.random_flip_left_right(image) image = tf.reshape(image, [image_size, image_size, 3]) return tf.image.convert_image_dtype(image, dtype), label
def gather_nd(params, indices, name=None): shape = params.get_shape().as_list() rank = len(shape) flat_params = tf.reshape(params, [-1]) multipliers = [ reduce(lambda x, y: x * y, shape[i + 1:], 1) for i in range(0, rank) ] indices_unpacked = tf.unstack( tf.transpose(indices, [rank - 1] + range(0, rank - 1), name)) flat_indices = sum([a * b for a, b in zip(multipliers, indices_unpacked)]) return tf.gather(flat_params, flat_indices, name=name)
def reinforce_loss(disc_logits, bert_scores, gen_logprobs, gamma, decay): """The REINFORCE loss. Args: disc_logits: float tensor, shape [batch_size, sequence_length]. gen_logprobs: float32 tensor, shape [batch_size, sequence_length] gamma: a float, discount factor for cumulative reward. decay: a float, decay rate for the EWMA baseline of REINFORCE. Returns: Float tensor, shape [batch_size, sequence_length], the REINFORCE loss for each timestep. """ # Assume 1 logit for each timestep. batch_size, sequence_length = disc_logits.shape.as_list() gen_logprobs.shape.assert_is_compatible_with([batch_size, sequence_length]) disc_predictions = tf.nn.sigmoid(disc_logits) bert_predictions = tf.nn.sigmoid(bert_scores) # MaskGAN uses log(D), but this is more stable empirically. # rewards = 1.5 * disc_predictions + 0.5 * bert_predictions - 1 rewards = 2 * bert_predictions - 1 # Test whether bert_predictions change with different inputs # Compute cumulative rewards. rewards_list = tf.unstack(rewards, axis=1) cumulative_rewards = [] # Cheng: the reward of each step is a discounted sum of all following rewards. # Actually calculation can be improved for t in xrange(sequence_length): cum_value = tf.zeros(shape=[batch_size]) for s in xrange(t, sequence_length): cum_value += np.power(gamma, (s - t)) * rewards_list[s] cumulative_rewards.append(cum_value) cumulative_rewards = tf.stack(cumulative_rewards, axis=1) cumulative_rewards.shape.assert_is_compatible_with( [batch_size, sequence_length]) with tf.variable_scope("reinforce", reuse=tf.AUTO_REUSE): ewma_reward = tf.get_variable("ewma_reward", initializer=0.0) mean_reward = tf.reduce_mean(cumulative_rewards) new_ewma_reward = decay * ewma_reward + (1.0 - decay) * mean_reward update_op = tf.assign(ewma_reward, new_ewma_reward) # REINFORCE with tf.control_dependencies([update_op]): advantage = cumulative_rewards - ewma_reward loss = -tf.stop_gradient(advantage) * gen_logprobs loss.shape.assert_is_compatible_with([batch_size, sequence_length]) return loss, cumulative_rewards, ewma_reward
def cdna_transformation(prev_image, cdna_input, num_masks, color_channels, dna_kernel_size, relu_shift): """Apply convolutional dynamic neural advection to previous image. Args: prev_image: previous image to be transformed. cdna_input: hidden lyaer to be used for computing CDNA kernels. num_masks: number of masks and hence the number of CDNA transformations. color_channels: the number of color channels in the images. dna_kernel_size: dna kernel size. relu_shift: shift for ReLU function. Returns: List of images transformed by the predicted CDNA kernels. """ batch_size = tf.shape(cdna_input)[0] height = int(prev_image.get_shape()[1]) width = int(prev_image.get_shape()[2]) # Predict kernels using linear function of last hidden layer. cdna_kerns = tfl.dense(cdna_input, dna_kernel_size * dna_kernel_size * num_masks, name="cdna_params", activation=None) # Reshape and normalize. cdna_kerns = tf.reshape( cdna_kerns, [batch_size, dna_kernel_size, dna_kernel_size, 1, num_masks]) cdna_kerns = (tf.nn.relu(cdna_kerns - relu_shift) + relu_shift) norm_factor = tf.reduce_sum(cdna_kerns, [1, 2, 3], keep_dims=True) cdna_kerns /= norm_factor # Treat the color channel dimension as the batch dimension since the same # transformation is applied to each color channel. # Treat the batch dimension as the channel dimension so that # depthwise_conv2d can apply a different transformation to each sample. cdna_kerns = tf.transpose(cdna_kerns, [1, 2, 0, 4, 3]) cdna_kerns = tf.reshape( cdna_kerns, [dna_kernel_size, dna_kernel_size, batch_size, num_masks]) # Swap the batch and channel dimensions. prev_image = tf.transpose(prev_image, [3, 1, 2, 0]) # Transform image. transformed = tf.nn.depthwise_conv2d(prev_image, cdna_kerns, [1, 1, 1, 1], "SAME") # Transpose the dimensions to where they belong. transformed = tf.reshape( transformed, [color_channels, height, width, batch_size, num_masks]) transformed = tf.transpose(transformed, [3, 1, 2, 0, 4]) transformed = tf.unstack(transformed, axis=-1) return transformed
def step(hprev, x): st_1, ct_1 = tf.unstack(hprev) fc_gate = tf.matmul(weight, tf.transpose(tf.concat([x, st_1], -1))) fc_gate = tf.transpose(fc_gate) + bias i, f, g, o = tf.split(fc_gate, 4, axis=1) i, f, g, o = tf.sigmoid(i), tf.sigmoid(f), tf.tanh(g), tf.sigmoid( o) ct = ct_1 * f + g * i st = tf.tanh(ct) * o return tf.stack([st, ct])
def registerTensorboard(paths, images, nbInputsMax, nbTargets, loss_value, batch_size, targetsRenderings, outputsRenderings): inputs = images[0] targets = images[1] outputs = images[2] targetsList = tf.split(targets, batch_size, axis=0) inputsList = tf.split(inputs, batch_size, axis=0) #print(targetsList[0]) #inputsList[0] = tf.Print(inputsList[0], [tf.reduce_mean(inputsList[0])], "mean of inputs 0") tf.summary.image("targets", targetsList[0], max_outputs=nbTargets) tf.summary.image("inputs", inputsList[0], max_outputs=nbInputsMax) tf.summary.image("outputs", outputs, max_outputs=nbTargets) tf.summary.scalar("loss", loss_value) #targetsRenderings is [batchSize,nbRenderings, 256, 256, 3] tf.summary.image("targets renderings", tf.unstack(tf.log(targetsRenderings[0] + 0.1), axis=0), max_outputs=9) tf.summary.image("outputs renderings", tf.unstack(tf.log(outputsRenderings[0] + 0.1), axis=0), max_outputs=9)
def relevants(inputs, lengths, is_concat=True): '''从rnn的输出中取出最后一个词的输出 Args: :type inputs: tensor, shape = [batch_size, time_step, edim] :param inputs: rnn的输出, :type lengths: tensor, shape = [batch_size] :param lengths: rnn中各个输入的真实长度 Returns: 一个shape=[batch_size * real_len, edim]的张量,第0维的每个元素是对应的Rnn的最后的输出。 ''' ips = tf.unstack(inputs, axis=0) lens = tf.unstack(lengths, axis=0) reles = [] for i in range(len(lens)): reles.append(tf.gather(ips[i], tf.range(0, lens[i]))) if is_concat: reles = tf.concat(reles, axis=0) return reles
def calibration_fn(class_predictions_with_background): """Calibrate predictions per class via 1-d linear interpolation. Prediction scores are linearly interpolated with class-specific function approximations. Note that after calibration, an anchor's class scores will not necessarily sum to 1, and score ordering may change, depending on each class' calibration parameters. Args: class_predictions_with_background: tf.float32 tensor of shape [batch_size, num_anchors, num_classes + 1] containing scores on the interval [0,1]. This is usually produced by a sigmoid or softmax layer and the result of calling the `predict` method of a detection model. Returns: tf.float32 tensor of the same shape as the input with values on the interval [0, 1]. Raises: KeyError: Calibration parameters are not present for a class. """ class_id_function_dict = _get_class_id_function_dict( calibration_config) # Tensors are split by class and then recombined at the end to recover # the input's original shape. If a class id does not have calibration # parameters, it is left unchanged. class_tensors = tf.unstack(class_predictions_with_background, axis=-1) calibrated_class_tensors = [] for class_id, class_tensor in enumerate(class_tensors): flat_class_tensor = tf.reshape(class_tensor, shape=[-1]) if class_id in class_id_function_dict: output_tensor = _tf_linear_interp1d( x_to_interpolate=flat_class_tensor, fn_x=class_id_function_dict[class_id][0], fn_y=class_id_function_dict[class_id][1]) else: tf.logging.info( 'Calibration parameters for class id `%d` not not found', class_id) output_tensor = flat_class_tensor calibrated_class_tensors.append(output_tensor) combined_calibrated_tensor = tf.stack(calibrated_class_tensors, axis=1) input_shape = shape_utils.combined_static_and_dynamic_shape( class_predictions_with_background) calibrated_class_predictions_with_background = tf.reshape( combined_calibrated_tensor, shape=input_shape, name='calibrate_scores') return calibrated_class_predictions_with_background
def get_extra_loss(self, latent_means=None, latent_stds=None, true_frames=None, gen_frames=None): """Gets extra loss from VAE and GAN.""" if not self.is_training: return 0.0 vae_loss, d_vae_loss, d_gan_loss = 0.0, 0.0, 0.0 # Use sv2p's KL divergence computation. if self.hparams.use_vae: vae_loss = super(NextFrameSavpBase, self).get_extra_loss(latent_means=latent_means, latent_stds=latent_stds) if self.hparams.use_gan: # Strip out the first context_frames for the true_frames # Strip out the first context_frames - 1 for the gen_frames context_frames = self.hparams.video_num_input_frames true_frames = tf.stack( tf.unstack(true_frames, axis=0)[context_frames:]) # discriminator for VAE. if self.hparams.use_vae: gen_enc_frames = tf.stack( tf.unstack(gen_frames, axis=0)[context_frames - 1:]) d_vae_loss = self.get_gan_loss(true_frames, gen_enc_frames, name="vae") # discriminator for GAN. gen_prior_frames = tf.stack( tf.unstack(self.gen_prior_video, axis=0)[context_frames - 1:]) d_gan_loss = self.get_gan_loss(true_frames, gen_prior_frames, name="gan") return (vae_loss + self.hparams.gan_loss_multiplier * d_gan_loss + self.hparams.gan_vae_loss_multiplier * d_vae_loss)
def __call__(self, input_cls, input_reg, is_training=True): output_conf = input_cls output_bbox = input_reg output_conf = self.cls_conv_1(output_conf, is_training=is_training) output_conf = self.cls_conv_2(output_conf, is_training=is_training) output_conf = self.cls_conv_3(output_conf, is_training=is_training) output_bbox = self.reg_conv_1(output_bbox, is_training=is_training) output_bbox = self.reg_conv_2(output_bbox, is_training=is_training) output_bbox = self.reg_conv_3(output_bbox, is_training=is_training) cls_score = self.cls_score(output_conf, is_training=is_training) with tf.name_scope('cls_score'): cls_B, cls_H, cls_W, cls_C = tf.unstack( tf.shape(cls_score, name='shape')) cls_score = tf.reshape(cls_score, (cls_B, cls_H * cls_W, cls_C), name='reshape') ctr_score = self.ctr_score(output_conf, is_training=is_training) with tf.name_scope('ctr_score'): ctr_B, ctr_H, ctr_W, ctr_C = tf.unstack( tf.shape(ctr_score, name='shape')) ctr_score = tf.reshape(ctr_score, (ctr_B, ctr_H * ctr_W, ctr_C), name='reshape') offset = self.offset(output_bbox, is_training=is_training) with tf.name_scope('offset'): offset = (self.si * offset + self.bi) offset = tf.exp(offset, name='exp') * self.total_stride offset_B, offset_H, offset_W, offset_C = tf.unstack( tf.shape(offset, name='shape')) offset = tf.reshape(offset, (offset_B, offset_H * offset_W, offset_C), name='reshape') xy0 = self.fm_ctr - offset[:, :, 0:2] xy1 = self.fm_ctr + offset[:, :, 2:] bbox = tf.concat([xy0, xy1], axis=2) return cls_score, ctr_score, bbox
def _clamp_and_filter_result(pixel_x, pixel_y, z): """Clamps and masks out out-of-bounds pixel coordinates. Args: pixel_x: a tf.Tensor containing x pixel coordinates in an image. pixel_y: a tf.Tensor containing y pixel coordinates in an image. z: a tf.Tensor containing the depth ar each (pixel_y, pixel_x) All shapes are [B, H, W]. Returns: pixel_x, pixel_y, mask, where pixel_x and pixel_y are the original ones, except: - Values that fall out of the image bounds, which are [0, W-1) in x and [0, H-1) in y, are clamped to the bounds - NaN values in pixel_x, pixel_y are replaced by zeros mask is False at allpoints where: - Clamping in pixel_x or pixel_y was performed - NaNs were replaced by zeros - z is non-positive, and True everywhere else, that is, where pixel_x, pixel_y are finite and fall within the frame. """ with tf.name_scope("Clamp", values=[pixel_x, pixel_y, z]): _, height, width = tf.unstack(tf.shape(pixel_x)) def _tensor(x): return tf.to_float(tf.convert_to_tensor(x)) x_not_underflow = pixel_x >= 0.0 y_not_underflow = pixel_y >= 0.0 x_not_overflow = pixel_x < _tensor(width - 1) y_not_overflow = pixel_y < _tensor(height - 1) z_positive = z > 0.0 x_not_nan = tf.math.logical_not(tf.is_nan(pixel_x)) y_not_nan = tf.math.logical_not(tf.is_nan(pixel_y)) not_nan = tf.logical_and(x_not_nan, y_not_nan) not_nan_mask = tf.to_float(not_nan) pixel_x *= not_nan_mask pixel_y *= not_nan_mask pixel_x = tf.clip_by_value(pixel_x, 0.0, _tensor(width - 1)) pixel_y = tf.clip_by_value(pixel_y, 0.0, _tensor(height - 1)) mask_stack = tf.stack( [ x_not_underflow, y_not_underflow, x_not_overflow, y_not_overflow, z_positive, not_nan, ], axis=0, ) mask = tf.reduce_all(mask_stack, axis=0) return pixel_x, pixel_y, mask