def sg_quasi_conv1d(tensor, opt): opt += tf.sg_opt(is_enc=False) # Split into H and H_zfo H = tensor[:Hp.bs] H_z = tensor[Hp.bs:2 * Hp.bs] H_f = tensor[2 * Hp.bs:3 * Hp.bs] H_o = tensor[3 * Hp.bs:] if opt.is_enc: H_z, H_f, H_o = 0, 0, 0 # Convolution and merging with tf.sg_context(act="linear", causal=(not opt.is_enc), bn=opt.is_enc, ln=(not opt.is_enc)): Z = H.sg_aconv1d() + H_z # (16, 300, 320) F = H.sg_aconv1d() + H_f # (16, 300, 320) O = H.sg_aconv1d() + H_o # (16, 300, 320) # Activation Z = Z.sg_bypass(act="tanh") # (16, 300, 320) F = F.sg_bypass(act="sigmoid") # (16, 300, 320) O = O.sg_bypass(act="sigmoid") # (16, 300, 320) # Masking M = tf.sign(tf.abs(H))[:, :, :1] # (16, 300, 1) float32. 0 or 1 Z *= M # broadcasting F *= M # broadcasting O *= M # broadcasting # Concat ZFO = tf.concat(axis=0, values=[Z, F, O]) return ZFO # (16*3, 150, 320)
def rnn_classify(x, num_classes, is_test=False): with tf.sg_context(name='rnn_classify'): fw_cell = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(is_test) for _ in range(num_blocks)], state_is_tuple=True) bw_cell = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(is_test) for _ in range(num_blocks)], state_is_tuple=True) words_used_in_sent = tf.sign( tf.reduce_max(tf.abs(x), reduction_indices=2)) length = tf.cast( tf.reduce_sum(words_used_in_sent, reduction_indices=1), tf.int32) outputs, _ = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, x, dtype=tf.float32, sequence_length=length) output = tf.concat(outputs, 2).sg_reshape(shape=[-1, 2 * latent_dim]) prediction = output.sg_dense(dim=num_classes, name='dense') res = tf.reshape(prediction, [x.get_shape().as_list()[0], -1, num_classes]) return res
def sg_quasi_rnn(tensor, opt): # Split if opt.att: H, Z, F, O = tf.split(tensor, 4, axis=0) # (16, 150, 320) for all else: Z, F, O = tf.split(tensor, 3, axis=0) # (16, 150, 320) for all # step func def step(z, f, o, c): ''' Runs fo-pooling at each time step ''' c = f * c + (1 - f) * z if opt.att: # attention a = tf.nn.softmax(tf.einsum("ijk,ik->ij", H, c)) # alpha. (16, 150) k = (a.sg_expand_dims() * H).sg_sum( axis=1) # attentional sum. (16, 320) h = o * (k.sg_dense(act="linear") + \ c.sg_dense(act="linear")) else: h = o * c return h, c # hidden states, (new) cell memories # Do rnn loop c, hs = 0, [] timesteps = tensor.get_shape().as_list()[1] for t in range(timesteps): z = Z[:, t, :] # (16, 320) f = F[:, t, :] # (16, 320) o = O[:, t, :] # (16, 320) # apply step function h, c = step(z, f, o, c) # (16, 320), (16, 320) # save result hs.append(h.sg_expand_dims(axis=1)) # Concat to return H = tf.concat(hs, 1) # (16, 150, 320) seqlen = tf.to_int32( tf.reduce_sum(tf.sign(tf.abs(tf.reduce_sum(H, axis=-1))), 1)) # (16,) float32 h = tf.reverse_sequence(input=H, seq_lengths=seqlen, seq_dim=1)[:, 0, :] # last hidden state vector if opt.is_enc: H_z = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)), [1, timesteps, 1]) H_f = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)), [1, timesteps, 1]) H_o = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)), [1, timesteps, 1]) concatenated = tf.concat([H, H_z, H_f, H_o], 0) # (16*4, 150, 320) return concatenated else: return H # (16, 150, 320)
def ner_cost(tensor, opt): one_hot_labels = tf.one_hot(opt.target - 1, opt.num_classes, dtype=tf.float32) cross_entropy = one_hot_labels * tf.log(tensor) cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2) mask = tf.sign(tf.abs(opt.target)) cross_entropy *= tf.cast(mask, tf.float32) cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1) length = tf.cast(tf.reduce_sum(tf.sign(opt.target), reduction_indices=1), tf.int32) cross_entropy /= tf.cast(length, tf.float32) out = tf.reduce_mean(cross_entropy, name='ner_cost') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def ner_cost(tensor, opt): one_hot_labels = tf.one_hot(opt.target - 1, opt.num_classes, dtype=tf.float32) cross_entropy = one_hot_labels * tf.log(tensor) cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2) mask = tf.sign(tf.reduce_max(tf.abs(one_hot_labels), reduction_indices=2)) cross_entropy *= tf.cast(mask, tf.float32) cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1) length = tf.cast(tf.reduce_sum(tf.sign(opt.target), reduction_indices=1), tf.int32) cross_entropy /= tf.cast(length, tf.float32) out = tf.reduce_mean(cross_entropy, name='ner_cost') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def rnn_classify(x, num_classes, is_test=False): with tf.sg_context(name='rnn_classify'): fw_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell(is_test) for _ in range(num_blocks)], state_is_tuple=True) bw_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell(is_test) for _ in range(num_blocks)], state_is_tuple=True) words_used_in_sent = tf.sign(tf.reduce_max(tf.abs(x), reduction_indices=2)) length = tf.cast(tf.reduce_sum(words_used_in_sent, reduction_indices=1), tf.int32) outputs, _ = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, x, dtype=tf.float32, sequence_length=length) output = tf.concat(outputs, 2).sg_reshape(shape=[-1, 2 * latent_dim]) prediction = output.sg_dense(dim=num_classes, name='dense') res = tf.reshape(prediction, [x.get_shape().as_list()[0], -1, num_classes]) return res
def sg_quasi_conv1d(tensor, opt): ''' Args: tensor: A 3-D tensor of either [batch size, time steps, embedding size] for original X or [batch size * 4, time steps, embedding size] for the others. ''' opt += tf.sg_opt(is_enc=False) # Split into H and H_zfo H = tensor[:Hp.batch_size] H_z = tensor[Hp.batch_size:2 * Hp.batch_size] H_f = tensor[2 * Hp.batch_size:3 * Hp.batch_size] H_o = tensor[3 * Hp.batch_size:] if opt.is_enc: H_z, H_f, H_o = 0, 0, 0 # Convolution and merging with tf.sg_context(size=opt.size, act="linear", causal=(not opt.is_enc)): Z = H.sg_aconv1d() + H_z # (16, 150, 320) F = H.sg_aconv1d() + H_f # (16, 150, 320) O = H.sg_aconv1d() + H_o # (16, 150, 320) # Activation with tf.sg_context(ln=True): Z = Z.sg_bypass(act="tanh") # (16, 150, 320) F = F.sg_bypass(act="sigmoid") # (16, 150, 320) O = O.sg_bypass(act="sigmoid") # (16, 150, 320) # Masking M = tf.sign(tf.abs(tf.reduce_sum( H, axis=-1, keep_dims=True))) # (16, 150, 1) float32. 0 or 1 Z *= M # broadcasting F *= M # broadcasting O *= M # broadcasting # Concat ZFO = tf.concat([Z, F, O], 0) return ZFO # (16*3, 150, 320)