def apply_dropout(): if type(inference) in [list, np.array]: t_res = [] for no_care in inference: t_res.append(nn_ops.dropout(no_care, keep_prob=keep_prob)) return t_res else: return nn_ops.dropout(inference, keep_prob=keep_prob)
def __call__(self, inputs, state, scope=None): """Run the cell with the declared dropouts.""" if not isinstance(self._input_keep_prob, float) or self._input_keep_prob < 1: inputs = nn_ops.dropout(inputs, self._input_keep_prob, seed=self._seed) output, new_state = self._cell(inputs, state) if not isinstance(self._output_keep_prob, float) or self._output_keep_prob < 1: output = nn_ops.dropout(output, self._output_keep_prob, seed=self._seed) return output, new_state
def __call__(self, inputs, state, scope=None): """Run the cell with the declared dropouts.""" if (not isinstance(self._keep_prob, float) or self._keep_prob < 1): inputs = nn_ops.dropout(inputs, self._keep_prob, seed=self._seed) c, h = state h = nn_ops.dropout(h, self._keep_prob, seed=self._seed) state = LSTMStateTuple(c, h) output, new_state = self._cell(inputs, state, scope) return output, new_state
def call(self, inputs, state): """Long short-term memory cell (LSTM) with Hypernets. Layer norm for hyperLSTM and mainLSTM Recurrent dropout for mainLSTM Args: inputs: `2-D` tensor with shape `[batch_size x input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size x self.state_size (num_unit + num_unit_hyper) ]` This state include [LSTM, hyperLSTM] Returns: A pair containing the new hidden state, and the new state. """ c_concat, h_concat = state # memory cell, hidden unit c, h = c_concat[:, 0:self._num_units], h_concat[:, 0:self._num_units] c_hyper, h_hyper = c_concat[:, self._num_units:], h_concat[:, self._num_units:] with vs.variable_scope("hyper_lstm"): inputs_hyper = array_ops.concat([inputs, h], 1) state_hyper = rnn_cell_impl.LSTMStateTuple(c_hyper, h_hyper) output_hyper, state_hyper = self._hyper_lstm_cell(inputs_hyper, state_hyper) (c_hyper, h_hyper) = state_hyper # embedding hidden state h_embed = self._embedding(h, h_hyper, scope="h") x_embed = self._embedding(inputs, h_hyper, scope="x") b_embed = self._embedding_bias(h_hyper, scope="b") cells = [] for i, name in enumerate(["i", "j", "f", "o"]): cell = h_embed[i] + x_embed[i] + b_embed[i] if self._layer_norm: cell = self._layer_normalization(cell, scope="layer_norm_%s" % name) cells.append(cell) i, j, f, o = cells g = self._activation(j) # gating # recurrent dropout (dropout gating cell) if self._recurrent_dropout: # recurrent dropout g = nn_ops.dropout(g, self._keep_prob, seed=self._seed) else: # variational dropout i = nn_ops.dropout(i, self._keep_prob, seed=self._seed) g = nn_ops.dropout(g, self._keep_prob, seed=self._seed) f = nn_ops.dropout(f, self._keep_prob, seed=self._seed) o = nn_ops.dropout(o, self._keep_prob, seed=self._seed) gated_in = math_ops.sigmoid(i) * g memory = c * math_ops.sigmoid(f + self._forget_bias) c = memory + gated_in h = self._activation(c) * math_ops.sigmoid(o) c_concat = array_ops.concat([c, c_hyper], 1) h_concat = array_ops.concat([h, h_hyper], 1) state = rnn_cell_impl.LSTMStateTuple(c_concat, h_concat) return h, state
def train(): cell_update = nn_ops.dropout( state[0], self._cell_out_prob, seed=self._seed) + nn_ops.dropout( new_state[0], 1 - self._cell_out_prob, seed=self._seed) state_update = nn_ops.dropout( state[1], self._state_out_prob, seed=self._seed) + nn_ops.dropout( new_state[1], 1 - self._state_out_prob, seed=self._seed) return cell_update, state_update
def __call__(self, inputs, state, scope=None): """Run the cell with the declared dropouts.""" if (not isinstance(self._input_keep_prob, float) or self._input_keep_prob < 1): inputs = nn_ops.dropout(inputs, self._input_keep_prob, seed=self._seed) output, new_state = self._cell(inputs, state, scope) if (not isinstance(self._output_keep_prob, float) or self._output_keep_prob < 1): output = nn_ops.dropout(output, self._output_keep_prob, seed=self._seed) return output, new_state
def __call__(self, inputs, state, scope=None): """Run the cell with the declared dropouts.""" if (not isinstance(self._input_keep_prob, float) or self._input_keep_prob < 1): do_inputs = dropout(inputs, self._input_keep_prob, seed=self._seed) inputs = tf.cond(self._is_train, lambda: do_inputs, lambda: inputs) output, new_state = self._cell(inputs, state) if (not isinstance(self._output_keep_prob, float) or self._output_keep_prob < 1): do_output = dropout(output, self._output_keep_prob, seed=self._seed) output = tf.cond(self._is_train, lambda: do_output, lambda: output) return output, new_state
def call(self, inputs, state): """Long short-term memory cell (LSTM). Args: inputs: `2-D` tensor with shape `[batch_size x input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size x self.state_size]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped `[batch_size x 2 * self.state_size]`. Returns: A pair containing the new hidden state, and the new state (either a `LSTMStateTuple` or a concatenated state, depending on `state_is_tuple`). Pep8 inspection appears since this signature is not same as `call` in tensorflow/python/layers/base. https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/layers/base.py """ c, h = state # memory cell, hidden unit args = array_ops.concat([inputs, h], 1) concat = self._linear(args, [args.get_shape()[-1], 4 * self._num_units]) i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) if self._layer_norm: i = self._layer_normalization(i, "layer_norm_i") j = self._layer_normalization(j, "layer_norm_j") f = self._layer_normalization(f, "layer_norm_f") o = self._layer_normalization(o, "layer_norm_o") g = self._activation(j) # gating # dropout (recurrent or variational) if self._recurrent_dropout: # recurrent dropout g = nn_ops.dropout(g, self._keep_prob, seed=self._seed) else: # variational dropout i = nn_ops.dropout(i, self._keep_prob, seed=self._seed) g = nn_ops.dropout(g, self._keep_prob, seed=self._seed) f = nn_ops.dropout(f, self._keep_prob, seed=self._seed) o = nn_ops.dropout(o, self._keep_prob, seed=self._seed) gated_in = math_ops.sigmoid(i) * g memory = c * math_ops.sigmoid(f + self._forget_bias) # layer normalization for memory cell (original paper didn't use for memory cell). # if self._layer_norm: # new_c = self._layer_normalization(new_c, "state") new_c = memory + gated_in new_h = self._activation(new_c) * math_ops.sigmoid(o) new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h) return new_h, new_state
def testInvalidKeepProb(self): x_dim = 40 y_dim = 30 t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) with self.assertRaises(ValueError): nn_ops.dropout(t, -1.0) with self.assertRaises(ValueError): nn_ops.dropout(t, 1.1) with self.assertRaises(ValueError): nn_ops.dropout(t, [0.0, 1.0]) with self.assertRaises(ValueError): nn_ops.dropout(t, array_ops.placeholder(dtypes.float64)) with self.assertRaises(ValueError): nn_ops.dropout(t, array_ops.placeholder(dtypes.float32, shape=[2]))
def __call__(self, inputs, state, scope=None): """LSTM cell with layer normalization and recurrent dropout.""" with vs.variable_scope(scope or type(self).__name__) as scope: # LayerNormBasicLSTMCell # pylint: disable=unused-variables c, h = state args = array_ops.concat(1, [inputs, h]) concat = self._linear(args) i, j, f, o = array_ops.split(1, 4, concat) if self._layer_norm: i = self._norm(i, "input") j = self._norm(j, "transform") f = self._norm(f, "forget") o = self._norm(o, "output") g = self._activation(j) if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: g = nn_ops.dropout(g, self._keep_prob, seed=self._seed) new_c = (c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * g) if self._layer_norm: new_c = self._norm(new_c, "state") new_h = self._activation(new_c) * math_ops.sigmoid(o) new_state = rnn_cell.LSTMStateTuple(new_c, new_h) return new_h, new_state
def __call__(self, inputs, state, scope=None): if isinstance(self.state_size, tuple) != isinstance( self._zoneout_prob, tuple): raise TypeError('Subdivided states need subdivided zoneouts.') if isinstance(self.state_size, tuple) and len(tuple( self.state_size)) != len(tuple(self._zoneout_prob)): raise ValueError('State and zoneout need equally many parts.') output, new_state = self._cell(inputs, state, scope) if isinstance(self.state_size, tuple): if self._is_training: new_state = tuple( (1 - state_part_zoneout_prob) * dropout( new_state_part - state_part, (1 - state_part_zoneout_prob), seed=self._seed, ) + state_part for new_state_part, state_part, state_part_zoneout_prob in zip(new_state, state, self._zoneout_prob)) else: new_state = tuple( state_part_zoneout_prob * state_part + (1 - state_part_zoneout_prob) * new_state_part for new_state_part, state_part, state_part_zoneout_prob in zip(new_state, state, self._zoneout_prob)) new_state = rnn_cell_impl.LSTMStateTuple(new_state[0], new_state[1]) else: raise ValueError('Only states that are tuples are supported') return output, new_state
def dropout(i, do_dropout, v): if not isinstance(do_dropout, bool) or do_dropout: return nn_ops.dropout(v, keep_prob=keep_prob, seed=self._gen_seed(salt_prefix, i)) else: return v
def testPartialShapedDropout(self): x_dim = 40 * 30 y_dim = 3 num_iter = 10 for keep_prob in [0.1, 0.5, 0.8]: with self.test_session(): t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) # Set noise_shape=[None, 1] which means [x_dim, 1]. dropout = nn_ops.dropout(t, keep_prob, noise_shape=[None, 1]) self.assertEqual([x_dim, y_dim], dropout.get_shape()) final_count = 0 for _ in xrange(0, num_iter): value = dropout.eval() final_count += np.count_nonzero(value) # Verifies that there are only two values: 0 and 1/keep_prob. sorted_value = np.unique(np.sort(value)) self.assertEqual(0, sorted_value[0]) self.assertAllClose(1 / keep_prob, sorted_value[1]) # Check that we are in the 15% error range expected_count = x_dim * y_dim * keep_prob * num_iter rel_error = math.fabs(final_count - expected_count) / expected_count print(rel_error) self.assertTrue(rel_error < 0.15)
def testShapedDropout(self): # Runs dropout with 0-1 tensor 10 times, sum the number of ones and validate # that it is producing approximately the right number of ones over a large # number of samples, based on the keep probability. This time with shaped # noise. x_dim = 40 * 30 y_dim = 3 num_iter = 10 for keep_prob in [0.1, 0.5, 0.8]: t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) dropout = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim, 1]) self.assertEqual([x_dim, y_dim], dropout.get_shape()) final_count = 0 for _ in xrange(0, num_iter): value = self.evaluate(dropout) final_count += np.count_nonzero(value) # Verifies that there are only two values: 0 and 1/keep_prob. sorted_value = np.unique(np.sort(value)) self.assertEqual(0, sorted_value[0]) self.assertAllClose(1 / keep_prob, sorted_value[1]) # Check that we are in the 15% error range expected_count = x_dim * y_dim * keep_prob * num_iter rel_error = math.fabs(final_count - expected_count) / expected_count print(rel_error) self.assertTrue(rel_error < 0.15)
def testDropoutPlaceholderKeepProb(self): # Runs dropout with 0-1 tensor 10 times, sum the number of ones and validate # that it is producing approximately the right number of ones over a large # number of samples, based on the keep probability. x_dim = 40 y_dim = 30 num_iter = 10 for keep_prob in [0.1, 0.5, 0.8]: with self.test_session(): t = constant_op.constant( 1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) keep_prob_placeholder = array_ops.placeholder(dtypes.float32) dropout = nn_ops.dropout(t, keep_prob_placeholder) final_count = 0 self.assertEqual([x_dim, y_dim], dropout.get_shape()) for _ in xrange(0, num_iter): value = dropout.eval(feed_dict={keep_prob_placeholder: keep_prob}) final_count += np.count_nonzero(value) # Verifies that there are only two values: 0 and 1/keep_prob. sorted_value = np.unique(np.sort(value)) self.assertEqual(0, sorted_value[0]) self.assertAllClose(1 / keep_prob, sorted_value[1]) # Check that we are in the 15% error range expected_count = x_dim * y_dim * keep_prob * num_iter rel_error = math.fabs(final_count - expected_count) / expected_count print(rel_error) self.assertTrue(rel_error < 0.15)
def testNoDropoutFast(self): x = array_ops.zeros((5,)) y = nn_ops.dropout(x, keep_prob=1) self.assertTrue(x is y) y = nn_ops.dropout_v2(x, rate=0) self.assertTrue(x is y)
def call(self, inputs, state): """LSTM cell with layer normalization and recurrent dropout.""" c, h = state args = array_ops.concat([inputs, h], 1) concat = self._linear(args) dtype = args.dtype i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) if self._layer_norm: i = self._norm(i, "input", dtype=dtype) j = self._norm(j, "transform", dtype=dtype) f = self._norm(f, "forget", dtype=dtype) o = self._norm(o, "output", dtype=dtype) g = self._activation(j) if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: g = nn_ops.dropout(g, self._keep_prob, seed=self._seed) new_c = (c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * g) # if self._layer_norm: # new_c = self._norm(new_c, "state", dtype=dtype) new_h = self._activation(new_c) * math_ops.sigmoid(o) new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h) return new_h, new_state
def call(self, inputs, state): """2D Convolutional LSTM cell with (optional) normalization and recurrent dropout.""" c, h = state args = array_ops.concat([inputs, h], -1) concat = self._conv2d(args) if self._normalizer_fn and not self._separate_norms: concat = self._norm(concat, "input_transform_forget_output") i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=-1) if self._normalizer_fn and self._separate_norms: i = self._norm(i, "input") j = self._norm(j, "transform") f = self._norm(f, "forget") o = self._norm(o, "output") g = self._activation_fn(j) if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: g = nn_ops.dropout(g, self._keep_prob, seed=self._seed) new_c = (c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * g) if self._normalizer_fn: new_c = self._norm(new_c, "state") new_h = self._activation_fn(new_c) * math_ops.sigmoid(o) if self._skip_connection: new_h = array_ops.concat([new_h, inputs], axis=-1) new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h) return new_h, new_state
def testDropoutPlaceholderKeepProb(self): # Runs dropout with 0-1 tensor 10 times, sum the number of ones and validate # that it is producing approximately the right number of ones over a large # number of samples, based on the keep probability. x_dim = 40 y_dim = 30 num_iter = 10 for keep_prob in [0.1, 0.5, 0.8]: with self.test_session(): t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) keep_prob_placeholder = array_ops.placeholder(dtypes.float32) dropout = nn_ops.dropout(t, keep_prob_placeholder) final_count = 0 self.assertEqual([x_dim, y_dim], dropout.get_shape()) for _ in xrange(0, num_iter): value = dropout.eval( feed_dict={keep_prob_placeholder: keep_prob}) final_count += np.count_nonzero(value) # Verifies that there are only two values: 0 and 1/keep_prob. sorted_value = np.unique(np.sort(value)) self.assertEqual(0, sorted_value[0]) self.assertAllClose(1 / keep_prob, sorted_value[1]) # Check that we are in the 15% error range expected_count = x_dim * y_dim * keep_prob * num_iter rel_error = math.fabs(final_count - expected_count) / expected_count print(rel_error) self.assertTrue(rel_error < 0.15)
def body(i, prev_c, prev_h, actions, log_probs): # pylint: disable=g-long-lambda signal = control_flow_ops.cond( math_ops.equal(i, 0), lambda: array_ops.tile( device_go_embedding, [self.hparams.num_children, 1]), lambda: embedding_ops.embedding_lookup(device_embeddings, actions.read(i - 1))) if self.hparams.keep_prob is not None: signal = nn_ops.dropout(signal, rate=(1 - self.hparams.keep_prob)) next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias) query = math_ops.matmul(next_h, attn_w_2) query = array_ops.reshape( query, [self.hparams.num_children, 1, self.hparams.hidden_size]) query = math_ops.tanh(query + attn_mem) query = array_ops.reshape(query, [ self.hparams.num_children * self.num_groups, self.hparams.hidden_size ]) query = math_ops.matmul(query, attn_v) query = array_ops.reshape( query, [self.hparams.num_children, self.num_groups]) query = nn_ops.softmax(query) query = array_ops.reshape( query, [self.hparams.num_children, self.num_groups, 1]) query = math_ops.reduce_sum(attn_mem * query, axis=1) query = array_ops.concat([next_h, query], axis=1) logits = math_ops.matmul(query, device_softmax) logits /= self.hparams.temperature if self.hparams.tanh_constant > 0: logits = math_ops.tanh(logits) * self.hparams.tanh_constant if self.hparams.logits_std_noise > 0: num_in_logits = math_ops.cast(array_ops.size(logits), dtype=dtypes.float32) avg_norm = math_ops.divide(linalg_ops.norm(logits), math_ops.sqrt(num_in_logits)) logits_noise = random_ops.random_normal( array_ops.shape(logits), stddev=self.hparams.logits_std_noise * avg_norm) logits = control_flow_ops.cond( self.global_step > self.hparams.stop_noise_step, lambda: logits, lambda: logits + logits_noise) if mode == "sample": next_y = random_ops.multinomial(logits, 1, seed=self.hparams.seed) elif mode == "greedy": next_y = math_ops.argmax(logits, 1) elif mode == "target": next_y = array_ops.slice(y, [0, i], [-1, 1]) else: raise NotImplementedError next_y = math_ops.cast(next_y, dtypes.int32) next_y = array_ops.reshape(next_y, [self.hparams.num_children]) actions = actions.write(i, next_y) log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=next_y) return i + 1, next_c, next_h, actions, log_probs
def _ragged_nn_dropout_v1(x, keep_prob=None, noise_shape=None, seed=None, name=None, rate=None): if noise_shape is not None: raise ValueError('noise_shape is not supported yet for RaggedTensor x') with ops.name_scope(name, 'RaggedNNDropout', [x, rate]): x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x') return x.with_flat_values(nn_ops.dropout(x.flat_values, keep_prob=keep_prob, seed=seed, rate=rate))
def testShapedDropoutUnknownShape(self): x_dim = 40 y_dim = 30 keep_prob = 0.5 x = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) dropout_x = nn_ops.dropout( x, keep_prob, noise_shape=array_ops.placeholder(dtypes.int32)) self.assertEqual(x.get_shape(), dropout_x.get_shape())
def body(i, prev_c, prev_h, actions, log_probs): # pylint: disable=g-long-lambda signal = control_flow_ops.cond( math_ops.equal(i, 0), lambda: array_ops.tile(device_go_embedding, [self.hparams.num_children, 1]), lambda: embedding_ops.embedding_lookup(device_embeddings, actions.read(i - 1)) ) if self.hparams.keep_prob is not None: signal = nn_ops.dropout(signal, self.hparams.keep_prob) next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias) query = math_ops.matmul(next_h, attn_w_2) query = array_ops.reshape( query, [self.hparams.num_children, 1, self.hparams.hidden_size]) query = math_ops.tanh(query + attn_mem) query = array_ops.reshape(query, [ self.hparams.num_children * self.num_groups, self.hparams.hidden_size ]) query = math_ops.matmul(query, attn_v) query = array_ops.reshape(query, [self.hparams.num_children, self.num_groups]) query = nn_ops.softmax(query) query = array_ops.reshape(query, [self.hparams.num_children, self.num_groups, 1]) query = math_ops.reduce_sum(attn_mem * query, axis=1) query = array_ops.concat([next_h, query], axis=1) logits = math_ops.matmul(query, device_softmax) logits /= self.hparams.temperature if self.hparams.tanh_constant > 0: logits = math_ops.tanh(logits) * self.hparams.tanh_constant if self.hparams.logits_std_noise > 0: num_in_logits = math_ops.cast( array_ops.size(logits), dtype=dtypes.float32) avg_norm = math_ops.divide( linalg_ops.norm(logits), math_ops.sqrt(num_in_logits)) logits_noise = random_ops.random_normal( array_ops.shape(logits), stddev=self.hparams.logits_std_noise * avg_norm) logits = control_flow_ops.cond( self.global_step > self.hparams.stop_noise_step, lambda: logits, lambda: logits + logits_noise) if mode == "sample": next_y = random_ops.multinomial(logits, 1, seed=self.hparams.seed) elif mode == "greedy": next_y = math_ops.argmax(logits, 1) elif mode == "target": next_y = array_ops.slice(y, [0, i], [-1, 1]) else: raise NotImplementedError next_y = math_ops.to_int32(next_y) next_y = array_ops.reshape(next_y, [self.hparams.num_children]) actions = actions.write(i, next_y) log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=next_y) return i + 1, next_c, next_h, actions, log_probs
def call(self, inputs, state): """Most basic RNN: output = new_state = act(W * input + U * state + B).""" h0_pr = nn_ops.dropout(inputs, self.p) ##[bs, hs] h0_pr_gate = nn_ops.dropout(inputs, self.q) ## [bs, hs] h1_l_pr_gate = nn_ops.dropout(state, self.q) ## [bs, hs] theta_1 = math_ops.add( math_ops.matmul(h1_l_pr_gate, self._weights_U_theta), math_ops.matmul(h0_pr_gate, self._weights_V_theta)) ## [bs, hs] theta_1 = tf.add(theta_1, self._bias_theta) #[bs,hs] n_1 = math_ops.add(math_ops.matmul(h1_l_pr_gate, self._weights_U_n), math_ops.matmul(h0_pr_gate, self._weights_V_n)) # [bs, hs] n_1 = math_ops.add(n_1, self._bias_n) #[bs, hs] h1_cur = tf.add( math_ops.multiply(theta_1, math_ops.tanh(state)), math_ops.multiply(n_1, math_ops.tanh(math_ops.matmul(h0_pr, self._W)))) return h1_cur, h1_cur
def testShapedDropoutCorrelation(self): # Runs a shaped dropout and tests that the correlations are correct. x_dim = 40 y_dim = 30 num_iter = 10 for keep_prob in [0.1, 0.5, 0.8]: t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) dropout = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim, 1]) self.assertEqual([x_dim, y_dim], dropout.get_shape()) for _ in xrange(0, num_iter): value = self.evaluate(dropout) # Verifies that each y column as only one type of activation. for i in xrange(x_dim): sorted_value = np.unique(np.sort(value[i, :])) self.assertEqual(sorted_value.size, 1)
def call(self, inputs, state, time): """LSTM cell with layer normalization and recurrent dropout.""" state_index_in_group = tf.mod(time, self._group_size) group_index = tf.floor_div(time, self._group_size) replicate_index = tf.mod(group_index, self._num_replicates) c, h = state args = array_ops.concat([inputs, h], -1) concat = self._linear(args) dtype = args.dtype i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=-1) if self._layer_norm: i = self._norm(i, "input", dtype=dtype) j = self._norm(j, "transform", dtype=dtype) f = self._norm(f, "forget", dtype=dtype) o = self._norm(o, "output", dtype=dtype) g = self._activation(j) if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: g = nn_ops.dropout(g, self._keep_prob, seed=self._seed) #(i,g,f,o) = (tf.expand_dims(val, -1) for val in (i,g,f,o)) new_c = (c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * g) if self._layer_norm: new_c = self._norm(new_c, "state", dtype=dtype) new_h = self._activation(new_c) * math_ops.sigmoid(o) new_h_current = tf.gather(new_h, replicate_index, axis=1) #here we reset the correct state (but only if we reached the end of the group) tmp = 1 - tf.scatter_nd( tf.expand_dims(tf.expand_dims(replicate_index, 0), 0), tf.constant([1.0]), tf.constant([self._num_replicates])) reset_mask = tf.expand_dims(tf.expand_dims(tmp, 0), -1) reset_flag = tf.equal(state_index_in_group + 1, self._group_size) new_c_reset = tf.cond(reset_flag, lambda: new_c * reset_mask, lambda: new_c) new_h_reset = tf.cond(reset_flag, lambda: new_h * reset_mask, lambda: new_h) new_state = rnn_cell_impl.LSTMStateTuple(new_c_reset, new_h_reset) return (new_h_current, new_h), new_state
def testShapedDropoutShapeError(self): # Runs shaped dropout and verifies an error is thrown on misshapen noise. x_dim = 40 y_dim = 30 keep_prob = 0.5 t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) with self.assertRaises(ValueError): _ = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim, y_dim + 10]) with self.assertRaises(ValueError): _ = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim, y_dim, 5]) with self.assertRaises(ValueError): _ = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim + 3]) with self.assertRaises(ValueError): _ = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim]) # test that broadcasting proceeds _ = nn_ops.dropout(t, keep_prob, noise_shape=[y_dim]) _ = nn_ops.dropout(t, keep_prob, noise_shape=[1, y_dim]) _ = nn_ops.dropout(t, keep_prob, noise_shape=[x_dim, 1]) _ = nn_ops.dropout(t, keep_prob, noise_shape=[1, 1])
def __init__(self, num_units, forget_bias=1.0, state_keep_prob=1.0, state_is_tuple=True, activation=None, reuse=None): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). Must set to `0.0` manually when restoring from CudnnLSTM-trained checkpoints. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. activation: Activation function of the inner states. Default: `tanh`. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. When restoring from CudnnLSTM-trained checkpoints, must use CudnnCompatibleLSTMCell instead. """ super(BasicLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: logging.warn( "%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if not (state_keep_prob >= 0.0 and state_keep_prob <= 1.0): raise ValueError( "state_keep_prob is expecting value in range 0 to 1: %f" % state_keep_prob) self._num_units = num_units self._forget_bias = forget_bias self._state_keep_prob = state_keep_prob self._state_is_tuple = state_is_tuple self._activation = activation or math_ops.tanh self._linear = None # Create mask for recurrent weights self._mask_tensor = nn_ops.dropout(array_ops.ones( [num_units, 4 * num_units]), keep_prob=state_keep_prob)
def __call__(self, inputs, state, scope=None): with _checked_scope(self, scope or "ran_cell", reuse=self._reuse): with vs.variable_scope("gates"): c, h = state gates = tf.nn.sigmoid(linear([inputs, h], 2 * self._num_units, True, normalize=self._normalize, kernel_initializer=tf.orthogonal_initializer())) i, f = array_ops.split(value=gates, num_or_size_splits=2, axis=1) with vs.variable_scope("candidate"): content = linear([inputs], self._num_units, True, normalize=self._normalize) new_c = i * content + f * c new_h = self._activation(c) new_h = tf.cond(self._is_training, lambda: nn_ops.dropout(new_h, self._keep_prob), lambda: new_h) new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h) output = new_h return output, new_state
def __call__(self, inputs, state, scope=None): """Run the cell with the declared zoneouts.""" # compute output and new state as before output, new_state = self._cell(inputs, state, scope) # if either hidden state or memory cell zoneout is applied, then split state and process if self._has_hidden_state_zoneout or self._has_memory_cell_zoneout: # split state c_old, m_old = state c_new, m_new = new_state # apply zoneout to memory cell and hidden state c_and_m = [] for s_old, s_new, p, has_zoneout in [ (c_old, c_new, self._memory_cell_keep_prob, self._has_memory_cell_zoneout), (m_old, m_new, self._hidden_state_keep_prob, self._has_hidden_state_zoneout) ]: if has_zoneout: if self._is_training: mask = nn_ops.dropout( array_ops.ones_like(s_new), p, seed=self._seed ) * p # this should just random ops instead. See dropout code for how. s = ((1. - mask) * s_old) + (mask * s_new) else: s = ((1. - p) * s_old) + (p * s_new) else: s = s_new c_and_m.append(s) # package final results new_state = LSTMStateTuple(*c_and_m) output = new_state.h return output, new_state
def testPartialShapedDropout(self): x_dim = 40 * 30 y_dim = 3 num_iter = 10 for keep_prob in [0.1, 0.5, 0.8]: t = constant_op.constant(1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) # Set noise_shape=[None, 1] which means [x_dim, 1]. dropout = nn_ops.dropout(t, keep_prob, noise_shape=[None, 1]) self.assertEqual([x_dim, y_dim], dropout.get_shape()) final_count = 0 for _ in xrange(0, num_iter): value = self.evaluate(dropout) final_count += np.count_nonzero(value) # Verifies that there are only two values: 0 and 1/keep_prob. sorted_value = np.unique(np.sort(value)) self.assertEqual(0, sorted_value[0]) self.assertAllClose(1 / keep_prob, sorted_value[1]) # Check that we are in the 15% error range expected_count = x_dim * y_dim * keep_prob * num_iter rel_error = math.fabs(final_count - expected_count) / expected_count print(rel_error) self.assertTrue(rel_error < 0.15)
def dropout(i, v): return nn_ops.dropout(v, keep_prob=keep_prob, seed=self._gen_seed(salt_prefix, i))
def testNoDropoutFast(self): x = array_ops.zeros((5, )) for p in 1, constant_op.constant(1.0): y = nn_ops.dropout(x, keep_prob=p) self.assertTrue(x is y)
def dropout(i, do_dropout, v): if not isinstance(do_dropout, bool) or do_dropout: return nn_ops.dropout( v, keep_prob=keep_prob, seed=self._gen_seed(salt_prefix, i)) else: return v
def func(): return nn_ops.dropout(self._m_2_by_2, rate=rate, noise_shape=noise_shape)
def dropout(i, v): return nn_ops.dropout( v, keep_prob=keep_prob, seed=self._gen_seed(salt_prefix, i))
def testDropoutWithIntegerInputs(self): x = constant_op.constant([1, 1, 1, 1, 1]) with self.assertRaises(ValueError): _ = nn_ops.dropout(x, 0.5)
def testNoDropoutFast(self): x = array_ops.zeros((5,)) for p in 1, constant_op.constant(1.0): y = nn_ops.dropout(x, keep_prob=p) self.assertTrue(x is y)