def create_topk_unique(inputs, k): height = inputs.shape[0] width = inputs.shape[1] neg_inf_r0 = tf.constant(-np.inf, dtype=tf.float32) ones = tf.ones([height, width], dtype=tf.float32) neg_inf_r2 = ones * neg_inf_r0 inputs = tf.where(tf.is_nan(inputs), neg_inf_r2, inputs) tmp = inputs topk_r2 = tf.zeros([height, k], dtype=tf.float32) for i in range(k): kth_order_statistic = tf.reduce_max(tmp, axis=1, keepdims=True) k_mask = tf.tile(tf.expand_dims(tf.equal(tf.range(k), tf.fill([k], i)), 0), [height, 1]) topk_r2 = tf.where(k_mask, tf.tile(kth_order_statistic, [1, k]), topk_r2) ge_r2 = tf.greater_equal(inputs, tf.tile(kth_order_statistic, [1, width])) tmp = tf.where(ge_r2, neg_inf_r2, inputs) log2_ceiling = int(math.ceil(math.log(float(int(width)), 2))) next_power_of_two = 1 << log2_ceiling count_mask = next_power_of_two - 1 mask_r0 = tf.constant(count_mask) mask_r2 = tf.fill([height, k], mask_r0) topk_r2_s32 = tf.bitcast(topk_r2, tf.int32) topk_indices_r2 = tf.bitwise.bitwise_and(topk_r2_s32, mask_r2) return topk_r2, topk_indices_r2
def apply_gradients(self, gradvars, *args, **kwargs): v_list = [tf.norm(tensor=v, ord=2) for _, v in gradvars] g_list = [ tf.norm(tensor=g, ord=2) if g is not None else 0.0 for g, _ in gradvars ] v_norms = tf.stack(v_list) g_norms = tf.stack(g_list) zeds = tf.zeros_like(v_norms) # assign epsilon if weights or grads = 0, to avoid division by zero # also prevent biases to get stuck at initialization (0.) cond = tf.logical_and(tf.not_equal(v_norms, zeds), tf.not_equal(g_norms, zeds)) true_vals = tf.scalar_mul(self._eta, tf.div(v_norms, g_norms)) false_vals = tf.fill(tf.shape(v_norms), self._epsilon) larc_local_lr = tf.where(cond, true_vals, false_vals) if self._clip: ones = tf.ones_like(v_norms) lr = tf.fill(tf.shape(v_norms), self._learning_rate) # We need gradients to compute local learning rate, # so compute_gradients from initial optimizer have to called # for which learning rate is already fixed # We then have to scale the gradients instead of the learning rate. larc_local_lr = tf.minimum(tf.div(larc_local_lr, lr), ones) gradvars = [(tf.multiply(larc_local_lr[i], g), v) if g is not None else (None, v) for i, (g, v) in enumerate(gradvars)] return self._optimizer.apply_gradients(gradvars, *args, **kwargs)
def _get_rc_model_input( question_ids, question_mask, context_ids, context_mask, vocab, ): """Create RC module input from separate batched components. Args: question_ids: <int32> [batch_size, question_len] question_mask: <int32> [batch_size, question_len] context_ids: <int32> [batch_size, context_len] context_mask: <int32> [batch_size, context_len] vocab: Instance of text_utils.Vocab. Returns: input_ids: <int32> [batch_size, rc_input_len] input_mask: <int32> [batch_size, rc_input_len] segment_ids: <int32> [batch_size, rc_input_len] """ # Get batch size. batch_size = tensor_utils.shape(context_ids, 0) # Get special tokens. cls = vocab.t2i(vocab.CLS) sep = vocab.t2i(vocab.SEP) # Join question, context, and special tokens. cls_batch = tf.fill([batch_size, 1], cls) sep_batch = tf.fill([batch_size, 1], sep) input_ids = tf.concat( [cls_batch, question_ids, sep_batch, context_ids, sep_batch], axis=1) # Create and join segment ids. segment_a_ids = tf.fill( [batch_size, tensor_utils.shape(question_ids, 1) + 2], 0) segment_b_ids = tf.fill( [batch_size, tensor_utils.shape(context_ids, 1) + 1], 1) segment_ids = tf.concat([segment_a_ids, segment_b_ids], axis=1) # Create joined mask, accounting for special tokens gaps. gap_mask = tf.fill([batch_size, 1], 1) input_mask = tf.concat( [gap_mask, question_mask, gap_mask, context_mask, gap_mask], axis=1) bool_mask = tf.cast(input_mask, tf.bool) # Select unmasked items and move all padding to the end. # Right now this looks like this: # [CLS] X X X [PAD] ... [SEP] Y Y Y [PAD] ... [SEP] [PAD] ... # And we want to change it to look like this: # [CLS] X X X [SEP] Y Y Y [SEP] [PAD] ... input_ids = tensor_utils.boolean_mask(input_ids, bool_mask) input_mask = tensor_utils.boolean_mask(input_mask, bool_mask) segment_ids = tensor_utils.boolean_mask(segment_ids, bool_mask) return input_ids, input_mask, segment_ids
def build_planner_inputs(question, answer, length, lookup_table): """Convert text to TextInputs for conditional text planner. Args: question: <string>, space-separated token string. answer: <string>, space-separated token string. length: Length to pad or truncate to. lookup_table: Instance of contrib.lookup.index_table_from_tensor. Returns: Instance of TextInputs. """ # Build question. q_tokens = tf.string_split([question]).values q_tokens = tf.concat([["[Q]"], q_tokens], axis=0) q_token_ids = tf.cast(lookup_table.lookup(q_tokens), tf.int32) q_len = tensor_utils.shape(q_token_ids, 0) q_positions = tf.range(q_len) # Build answer. a_tokens = tf.string_split([answer]).values a_tokens = tf.concat([["[A]"], a_tokens], axis=0) a_token_ids = tf.cast(lookup_table.lookup(a_tokens), tf.int32) a_len = tensor_utils.shape(a_token_ids, 0) a_positions = tf.range(a_len) # Combine. token_ids = tf.concat([q_token_ids, a_token_ids], axis=0) segment_ids = tf.concat([tf.fill([q_len], 2), tf.fill([a_len], 1)], axis=0) positions = tf.concat([q_positions, a_positions], axis=0) q_mask = tf.ones_like(q_token_ids) mask = tf.concat([q_mask, tf.ones_like(a_token_ids)], axis=0) # Truncate. token_ids = token_ids[:length] segment_ids = segment_ids[:length] mask = mask[:length] positions = positions[:length] # Pad. pad = [[0, length - tf.size(token_ids)]] token_ids = tf.pad(token_ids, pad) mask = tf.pad(mask, pad) segment_ids = tf.pad(segment_ids, pad) positions = tf.pad(positions, pad) text_input = TextInputs(token_ids=tf.ensure_shape(token_ids, [length]), mask=tf.ensure_shape(mask, [length]), segment_ids=tf.ensure_shape(segment_ids, [length]), positions=tf.ensure_shape(positions, [length])) return text_input
def binarize(self, input_): """ Transforms continuous values in [0,1] to {0,1} by applying a step function. Args: cont: tensor with continuous data in [0,1]. Returns: """ return tf.where( tf.greater(input_, tf.fill(tf.shape(input_), self.pen_threshold)), tf.fill(tf.shape(input_), 1.0), tf.fill(tf.shape(input_), 0.0))
def optimization(logits, y, population, embedding, alpha): """Loss and optimization method.""" if FLAGS.uniform_weights: weights = tf.ones(shape=tf.shape(population)) else: weights = tf.where(tf.greater(population, 0.01), tf.fill(tf.shape(population), 0.16), tf.fill(tf.shape(population), 2.5)) if not FLAGS.propensity_weights: weights = tf.sigmoid(tf.matmul(embedding, alpha)) * weights weights /= tf.reduce_mean(weights) loss = tf.losses.hinge_loss(labels=y, logits=logits, weights=weights) optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss) return optimizer, loss
def testInitialStateComputation(self, tuple_state, mask): if tuple_state: initial_state = (tf.fill([BATCH_SIZE, 6], 2), (tf.fill([BATCH_SIZE, 7], 3), tf.fill([BATCH_SIZE, 8], 4))) else: initial_state = tf.fill([BATCH_SIZE, 9], 10) trainable_state_module = snt.TrainableInitialState(initial_state, mask=mask) trainable_state = trainable_state_module() flat_trainable_state = nest.flatten(trainable_state) nest.assert_same_structure(initial_state, trainable_state) flat_initial_state = nest.flatten(initial_state) if mask is not None: flat_mask = nest.flatten(mask) else: flat_mask = (True, ) * len(flat_initial_state) self.evaluate(tf.global_variables_initializer()) # Check all variables are initialized correctly and return a state that # has the same as it is provided. for trainable_state, initial_state in zip(flat_trainable_state, flat_initial_state): self.assertAllEqual(self.evaluate(trainable_state), self.evaluate(initial_state)) # Change the value of all the trainable variables to ones. for variable in tf.trainable_variables(): self.evaluate(tf.assign(variable, tf.ones_like(variable))) # In eager mode to re-evaluate the module we must re-connect it. trainable_state = trainable_state_module() flat_trainable_state = nest.flatten(trainable_state) # Check that the values of the initial_states have changed if and only if # they are trainable. for trainable_state, initial_state, mask in zip( flat_trainable_state, flat_initial_state, flat_mask): trainable_state_value = self.evaluate(trainable_state) initial_state_value = self.evaluate(initial_state) if mask: expected_value = np.ones_like(initial_state_value) else: expected_value = initial_state_value self.assertAllEqual(trainable_state_value, expected_value)
def initialize(self, name=None): """Initialize the decoder. Args: name: Name scope for any created operations. Returns: `(finished, start_inputs, initial_state)`. """ finished, start_inputs = self._finished, self._start_inputs dtype = contrib_framework.nest.flatten(self._initial_cell_state)[0].dtype log_probs = tf.one_hot( # shape(batch_sz, beam_sz) tf.zeros([self._batch_size], dtype=tf.int32), depth=self._beam_width, on_value=tf.convert_to_tensor(0.0, dtype=dtype), off_value=tf.convert_to_tensor(-np.Inf, dtype=dtype), dtype=dtype) init_attention_probs = get_attention_probs( self._initial_cell_state, self._coverage_penalty_weight) if init_attention_probs is None: init_attention_probs = () init_pred_ids = tf.fill([self._batch_size, self._beam_width, self._max_tgt], self._end_token) initial_state = BeamSearchDecoderState( cell_state=self._initial_cell_state, log_probs=log_probs, finished=finished, lengths=tf.zeros( [self._batch_size, self._beam_width], dtype=tf.int64), accumulated_attention_probs=init_attention_probs, pred_ids=init_pred_ids) return (finished, start_inputs, initial_state)
def add_special_tokens(segment_tokens, cls_token, sep_token): """Adds special tokens to segment tokens. Appends a [SEP] token to each segment. Prepends a [CLS] token to the first segment. Args: segment_tokens (RaggedTensor): a 2-D RaggedTensor of strings. One row for each segment. Each row is a list of tokens. cls_token (unicode): string for CLS token. sep_token (unicode): string for SEP token. Returns: segment_tokens (Tensor): a 2-D string Tensor. """ num_rows = tf.to_int32(segment_tokens.nrows()) # One SEP token for every row. sep_tokens = tf.fill([num_rows, 1], sep_token) # One CLS token in the first row. cls_tokens = tf.RaggedTensor.from_row_lengths([cls_token], row_lengths=tf.one_hot( 0, num_rows, dtype=tf.int64)) segment_tokens = tf.concat([cls_tokens, segment_tokens, sep_tokens], axis=1) return segment_tokens
def testRandomDistort(self): """Tests the integrity of the return values of random_distortion. """ im_shape = (600, 900, 3) config = self._random_distort_config total_boxes = 5 label = 3 image, bboxes = self._get_image_with_boxes(im_shape, total_boxes) # Add a label to each bbox. bboxes_w_label = tf.concat( [ bboxes, tf.fill((bboxes.shape[0], 1), label) ], axis=1 ) ret_image, ret_bboxes = self._random_distort( image, config, bboxes_w_label ) # Assertions self.assertEqual(im_shape, ret_image.shape) self.assertAllEqual( bboxes, ret_bboxes[:, :4] )
def testRandomResizeImageBboxes(self): """Tests the integrity of the return values of random_resize This tests the case when bboxes is not None. """ im_shape = (600, 800, 3) config = self._random_resize_config total_boxes = 5 label = 3 image, bboxes = self._get_image_with_boxes(im_shape, total_boxes) # Add a label to each bbox. bboxes_w_label = tf.concat( [ bboxes, tf.fill((bboxes.shape[0], 1), label) ], axis=1 ) ret_image, ret_bboxes = self._random_resize( image, config, bboxes_w_label ) # Assertions self.assertEqual(ret_bboxes.shape[0], total_boxes) self.assertTrue(np.all( np.asarray(ret_image.shape[:2]) >= config.min_size )) self.assertTrue(np.all( np.asarray(ret_image.shape[:2]) <= config.max_size ))
def p_sample_loop_trajectory(self, denoise_fn, *, shape, noise_fn=tf.random_normal, repeat_noise_steps=-1): """ Generate samples, returning intermediate images Useful for visualizing how denoised images evolve over time Args: repeat_noise_steps (int): Number of denoising timesteps in which the same noise is used across the batch. If >= 0, the initial noise is the same for all batch elemements. """ i_0 = tf.constant(self.num_timesteps - 1, dtype=tf.int32) assert isinstance(shape, (tuple, list)) img_0 = noise_like(shape, noise_fn, repeat_noise_steps >= 0) times = tf.Variable([i_0]) imgs = tf.Variable([img_0]) # Steps with repeated noise times, imgs = tf.while_loop( cond=lambda times_, _: tf.less_equal(self.num_timesteps - times_[-1], repeat_noise_steps), body=lambda times_, imgs_: [ tf.concat([times_, [times_[-1] - 1]], 0), tf.concat([imgs_, [self.p_sample(denoise_fn=denoise_fn, x=imgs_[-1], t=tf.fill([shape[0]], times_[-1]), noise_fn=noise_fn, repeat_noise=True)]], 0) ], loop_vars=[times, imgs], shape_invariants=[tf.TensorShape([None, *i_0.shape]), tf.TensorShape([None, *img_0.shape])], back_prop=False ) # Steps with different noise for each batch element times, imgs = tf.while_loop( cond=lambda times_, _: tf.greater_equal(times_[-1], 0), body=lambda times_, imgs_: [ tf.concat([times_, [times_[-1] - 1]], 0), tf.concat([imgs_, [self.p_sample(denoise_fn=denoise_fn, x=imgs_[-1], t=tf.fill([shape[0]], times_[-1]), noise_fn=noise_fn, repeat_noise=False)]], 0) ], loop_vars=[times, imgs], shape_invariants=[tf.TensorShape([None, *i_0.shape]), tf.TensorShape([None, *img_0.shape])], back_prop=False ) assert imgs[-1].shape == shape return times, imgs
def create_make_unique(inputs): if inputs.shape.ndims != 2: raise ValueError("Input of top_k_with_unique must be rank-2 " "but got: %s" % inputs.shape) height = inputs.shape[0] width = inputs.shape[1] zeros = tf.zeros([height, width], dtype=tf.int32) # count_mask is used to mask away the low order bits to ensure that every # element is distinct. log2_ceiling = int(math.ceil(math.log(float(int(width)), 2))) next_power_of_two = 1 << log2_ceiling count_mask = ~(next_power_of_two - 1) count_mask_r0 = tf.constant(count_mask) count_mask_r2 = tf.fill([height, width], count_mask_r0) # smallest_normal is the bit representation of the smallest # positive normal floating point number. The sign is zero, # exponent is one, and the fraction is zero. smallest_normal = 1 << 23 smallest_normal_r0 = tf.constant(smallest_normal, dtype=tf.int32) smallest_normal_r2 = tf.fill([height, width], smallest_normal_r0) # Used to mask away the sign bit when computing the absolute value. low_bit_mask = ~(1 << 31) low_bit_mask_r0 = tf.constant(low_bit_mask, dtype=tf.int32) low_bit_mask_r2 = tf.fill([height, width], low_bit_mask_r0) iota = tf.tile(tf.expand_dims(tf.range(width, dtype=tf.int32), 0), [height, 1]) # Compare the absolute value with positive zero to handle negative zero. # # Pseudocode: input_no_zeros = abs(input) == 0 ? FLT_MIN : input input_r2 = tf.bitcast(inputs, tf.int32) abs_r2 = tf.bitwise.bitwise_and(input_r2, low_bit_mask_r2) if_zero_r2 = tf.equal(abs_r2, zeros) smallest_normal_preserving_sign_r2 = tf.bitwise.bitwise_or( input_r2, smallest_normal_r2) input_no_zeros_r2 = tf.where( if_zero_r2, smallest_normal_preserving_sign_r2, input_r2) # Discard the low-order bits and replace with iota. and_r2 = tf.bitwise.bitwise_and(input_no_zeros_r2, count_mask_r2) or_r2 = tf.bitwise.bitwise_or(and_r2, iota) return tf.bitcast(or_r2, tf.float32)
def __init__( self, learning_rate, num_layers, size, size_layer, output_size, seq_len, forget_bias=0.1, ): def lstm_cell(size_layer): return tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False) def global_pooling(x, func): batch_size = tf.shape(self.X)[0] num_units = x.get_shape().as_list()[-1] x = func(x, x.get_shape().as_list()[1], 1) x = tf.reshape(x, [batch_size, num_units]) return x rnn_cells = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple=False, ) self.X = tf.placeholder(tf.float32, (None, None, size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) drop = tf.nn.rnn_cell.DropoutWrapper(rnn_cells, output_keep_prob=forget_bias) self.hidden_layer = tf.placeholder(tf.float32, (None, num_layers * 2 * size_layer)) self.outputs, self.last_state = tf.nn.dynamic_rnn( drop, self.X, initial_state=self.hidden_layer, dtype=tf.float32, time_major=True, ) self.outputs = self.outputs[:, :, 0] x = self.X masks = tf.sign(self.outputs) batch_size = tf.shape(self.X)[0] align = tf.matmul(self.X, tf.transpose(self.X, [0, 2, 1])) paddings = tf.fill(tf.shape(align), float('-inf')) k_masks = tf.tile(tf.expand_dims(masks, 1), [1, seq_len, 1]) align = tf.where(tf.equal(k_masks, 0), paddings, align) align = tf.nn.tanh(align) q_masks = tf.to_float(masks) q_masks = tf.tile(tf.expand_dims(q_masks, -1), [1, 1, seq_len]) align *= q_masks x = tf.matmul(align, x) g_max = global_pooling(x, tf.layers.max_pooling1d) g_avg = global_pooling(x, tf.layers.average_pooling1d) self.outputs = tf.concat([g_max, g_avg], 1) self.logits = tf.layers.dense(self.outputs, output_size) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost)