def spatial_dropout(self, inputs, keep_prob): if keep_prob < 1: batch_size = shape(inputs, 0) input_size = shape(inputs, -1) noise_shape = tf.stack([batch_size] + [1] + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) return inputs
def loss_layer(self, project_logits, lengths, name=None): """ calculate crf loss :param project_logits: [1, num_steps, num_tags] :return: scalar loss """ batch_size = shape(project_logits, 0) num_steps = shape(project_logits, 1) with tf.variable_scope("crf_loss" if not name else name): # small = -10000.0 # # pad logits for crf loss # start_logits = tf.concat( # [small * tf.ones(shape=[batch_size, 1, self.num_tags]), tf.zeros(shape=[batch_size, 1, 1])], axis=-1) # pad_logits = tf.cast(small * tf.ones([batch_size, num_steps, 1]), tf.float32) # logits = tf.concat([project_logits, pad_logits], axis=-1) # logits = tf.concat([start_logits, logits], axis=1) # targets = tf.concat( # [tf.cast(self.num_tags*tf.ones([batch_size, 1]), tf.int32), self.labels_ids], axis=-1) trans = tf.get_variable("transitions", shape=[self.num_tags, self.num_tags], initializer=self.initializer) log_likelihood, trans = crf_log_likelihood( inputs=project_logits, tag_indices=self.labels_ids, transition_params=trans, sequence_lengths=lengths) return tf.reduce_mean(-log_likelihood), trans
def biLSTM_layer(self, lstm_inputs, lstm_dim, lengths, num_layers, keep_prob=1.): """ :param lstm_inputs: [batch_size, num_steps, emb_size] :return: [batch_size, num_steps, 2*lstm_dim] """ batch_size = shape(lstm_inputs, 0) with tf.variable_scope("char_BiLSTM"): for layer in range(num_layers): with tf.variable_scope("layer_{}".format(layer)): with tf.variable_scope("forward"): cell_fw = CustomLSTMCell(lstm_dim, batch_size, keep_prob) with tf.variable_scope("backward"): cell_bw = CustomLSTMCell(lstm_dim, batch_size, keep_prob) state_fw = tf.contrib.rnn.LSTMStateTuple( tf.tile(cell_fw.initial_state.c, [batch_size, 1]), tf.tile(cell_fw.initial_state.h, [batch_size, 1])) state_bw = tf.contrib.rnn.LSTMStateTuple( tf.tile(cell_bw.initial_state.c, [batch_size, 1]), tf.tile(cell_bw.initial_state.h, [batch_size, 1])) (fw_outputs, bw_outputs), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=lstm_inputs, sequence_length=lengths, initial_state_fw=state_fw, initial_state_bw=state_bw) text_outputs = tf.concat( [fw_outputs, bw_outputs], 2) # [num_sentences, max_sentence_length, emb] text_outputs = tf.nn.dropout(text_outputs, keep_prob) if layer > 0: highway_gates = tf.sigmoid( projection(text_outputs, shape(text_outputs, 2)) ) # [num_sentences, max_sentence_length, emb] text_outputs = highway_gates * text_outputs + ( 1 - highway_gates) * lstm_inputs lstm_inputs = text_outputs return lstm_inputs
def project_layer(self, lstm_outputs): """ hidden layer between lstm layer and logits :param lstm_outputs: [batch_size, num_steps, emb_size] :return: [batch_size, num_steps, num_tags] """ num_steps = shape(lstm_outputs, 1) num_tags = len(self.label2id) with tf.variable_scope("project"): # with tf.variable_scope("hidden"): # W = tf.get_variable("W", shape=[self.lstm_dim*2, self.lstm_dim], # dtype=tf.float32, initializer=self.initializer) # b = tf.get_variable("b", shape=[self.lstm_dim], dtype=tf.float32, # initializer=tf.zeros_initializer()) # output = tf.reshape(lstm_outputs, shape=[-1, self.lstm_dim*2]) # hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b)) # project to score of tags output = tf.reshape(lstm_outputs, shape=[-1, self.lstm_dim * 2]) with tf.variable_scope("logits"): W = tf.get_variable("W", shape=[self.lstm_dim * 2, num_tags], dtype=tf.float32, initializer=self.initializer) b = tf.get_variable("b", shape=[num_tags], dtype=tf.float32, initializer=tf.zeros_initializer()) pred = tf.nn.xw_plus_b(output, W, b) return tf.reshape(pred, [-1, num_steps, num_tags])
def layer_norm(self, inputs, epsilon=1e-6): with tf.variable_scope("layer_norm", values=[inputs]): channel_size = shape(inputs, -1) scale = tf.get_variable("scale", shape=[channel_size], initializer=tf.ones_initializer()) offset = tf.get_variable("offset", shape=[channel_size], initializer=tf.zeros_initializer()) mean = tf.reduce_mean(inputs, axis=-1, keep_dims=True) variance = tf.reduce_mean(tf.square(inputs - mean), axis=-1, keep_dims=True) norm_inputs = (inputs - mean) * tf.rsqrt(variance + epsilon) return norm_inputs * scale + offset