def func_point_logits(dec_h, enc_e, enc_len): ''' Args: dec_h : shape(b_sz, h_dec_sz) enc_e : shape(b_sz, tstp_enc, dec_emb_sz) enc_len : shape(b_sz,) ''' dec_h_ex = tf.expand_dims(dec_h, dim=1) # shape(b_sz, 1, h_dec_sz) dec_h_ex = tf.tile( dec_h_ex, [1, tstp_enc, 1]) # shape(b_sz, tstp_enc, h_dec_sz) linear_concat = tf.concat( 2, [dec_h_ex, enc_e ]) # shape(b_sz, tstp_enc, h_dec_sz+ dec_emb_sz) point_linear = TfUtils.last_dim_linear( # shape(b_sz, tstp_enc, h_dec_sz) linear_concat, output_size=h_dec_sz, bias=False, scope='Ptr_W') point_v = TfUtils.last_dim_linear( # shape(b_sz, tstp_enc, 1) tf.tanh(point_linear), output_size=1, bias=False, scope='Ptr_V') point_logits = tf.squeeze(point_v, squeeze_dims=[2 ]) # shape(b_sz, tstp_enc) mask = TfUtils.mkMask(enc_len, maxLen=tstp_enc) # shape(b_sz, tstp_enc) point_logits = tf.select(mask, point_logits, tf.ones_like(point_logits) * small_num) # shape(b_sz, tstp_enc) return point_logits
def basic_lstm_model(inputs): print "Loading basic lstm model.." for i in range(self.config.rnn_numLayers): with tf.variable_scope('rnnLayer' + str(i)): lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size) outputs, _ = tf.nn.dynamic_rnn( lstm_cell, inputs, self.ph_seqLen, #(b_sz, tstp, h_sz) dtype=tf.float32, swap_memory=True, scope='basic_lstm_model_layer-' + str(i)) inputs = outputs #b_sz, tstp, h_sz mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(outputs, self.ph_seqLen, dim=1) #b_sz, h_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.hidden_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.hidden_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def basic_cnn_model(inputs): in_channel = self.config.embed_size filter_sizes = self.config.filter_sizes out_channel = self.config.num_filters input = inputs for layer in range(self.config.cnn_numLayers): with tf.name_scope("conv-layer-" + str(layer)): conv_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%d" % filter_size): # Convolution Layer filter_shape = [ filter_size, in_channel, out_channel ] W = tf.get_variable(name='W', shape=filter_shape) b = tf.get_variable(name='b', shape=[out_channel]) conv = tf.nn.conv1d( # size (b_sz, tstp, out_channel) input, W, stride=1, padding="SAME", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") conv_outputs.append(h) input = tf.concat( axis=2, values=conv_outputs ) #b_sz, tstp, out_channel*len(filter_sizes) in_channel = out_channel * len(filter_sizes) # Maxpooling # mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp) mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp pooled = tf.reduce_max( input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) #(b_sz, out_channel*len(filter_sizes)) #size (b_sz, out_channel*len(filter_sizes)) inputs = tf.reshape(pooled, shape=[b_sz, out_channel * len(filter_sizes)]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def average_sentence_as_vector(fetch_output, lengths): """ fetch_output: shape=(batch_size, num_sentence, len_sentence, embed_size) lengths: shape=(batch_size, num_sentence) maxLen: scalar """ mask = TfUtils.mkMask( lengths, tf.shape(fetch_output)[-2]) #(batch_size, num_sentence, len_sentence) avg = TfUtils.reduce_avg(fetch_output, tf.expand_dims(mask, -1), tf.expand_dims(lengths, -1), -2) #(batch_size, num_sentence, embed_size) return avg
def basic_cbow_model(inputs): mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(inputs, self.ph_seqLen, dim=1) #b_sz, emb_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.embed_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-'+str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def lstm_sentence_rep(input): with tf.variable_scope('lstm_sentence_rep_scope') as scope: input = tf.reshape(input, shape=[b_sz * tstps_en, -1, emb_sz ]) #(b_sz*tstps_en, len_sen, emb_sz) length = tf.reshape(self.ph_input_encoder_sentence_len, shape=[-1]) #(b_sz*tstps_en) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(h_sz) """tup(shape(b_sz*tstp_enc, len_sen, h_sz))""" rep_out, _ = tf.nn.bidirectional_dynamic_rnn( # tup(shape(b_sz*tstp_enc, len_sen, h_sz)) lstm_cell, lstm_cell, input, length, dtype=tf.float32, swap_memory=True, time_major=False, scope='sentence_encode') rep_out = tf.concat(2, rep_out) #(b_sz*tstps_en, len_sen, h_sz*2) rep_out = TfUtils.reduce_avg( rep_out, length, dim=1) # shape(b_sz*tstps_en, h_sz*2) output = tf.reshape(rep_out, shape=[b_sz, tstps_en, 2 * h_sz ]) #(b_sz, tstps_en, h_sz*2) return output, None, None
def get_dec_in(): dec_in = TfUtils.batch_embed_lookup(encoder_inputs, order_index) # shape(b_sz, tstp_dec, s_emb_sz) bos = get_bos(s_emb_sz) # shape(b_sz, s_emb_sz) bos = tf.expand_dims(bos, 1) # shape(b_sz, 1, s_smb_sz) dec_in = tf.concat(1, [bos, dec_in]) # shape(b_sz, tstp_dec+1, s_emb_sz) dec_in = dec_in[:, :-1, :] # shape(b_sz, tstp_dec, s_emb_sz) return dec_in
def get_initial_state(hidden_sz): ''' Args: hidden_sz: must be a python determined number ''' avg_in_x = TfUtils.reduce_avg( encoder_inputs, # shape(b_sz, s_emb_sz) enc_lengths, dim=1) state = TfUtils.linear( avg_in_x, hidden_sz, # shape(b_sz, hidden_sz) bias=False, scope='initial_transformation') state = rnn_cell.LSTMStateTuple(state, tf.zeros_like(state)) return state
def fetch_input(self, embedding, seqIds, scope): ''' Args: embedding: embedding matrix to lookup from seqIds: sequence ids Returns: output: shape(b_sz, maxSeqLen, fetch_h_sz) ''' inputs = tf.nn.embedding_lookup(embedding, seqIds) # shape(b_sz, tstp, emb_sz) if self.config.cnn_after_embed: with tf.variable_scope('cnn_after_embed_%s' % scope): filter_shape = [ 3, self.config.embed_size, self.config.embed_size ] W = tf.get_variable(name='W', shape=filter_shape) b = tf.get_variable(name='b', shape=[self.config.embed_size]) conv = tf.nn.conv1d( # size (b_sz, tstp, out_channel) inputs, W, stride=1, padding="SAME", name="conv") h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") inputs = h # shape(b_sz, tstp, emb_sz) inputs = TfUtils.Dropout(inputs, self.config.dropout, train=self.ph_train) return inputs
def add_loss_op(self, logits, sparse_label, dec_lengths): reg_loss = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if v != self.embedding]) * self.config.reg valid_loss = TfUtils.seq_loss(logits, sparse_label, dec_lengths) train_loss = reg_loss + valid_loss return train_loss, valid_loss
def add_loss_op(logits, title_label, content_label, tit_len, content_len): ''' Returns: loss ''' title_logits, content_logits = logits loss1 = TfUtils.seq_loss(title_logits, title_label, tit_len) loss2 = TfUtils.seq_loss(content_logits, content_label, content_len) loss = tf.reduce_mean(loss1 + loss2) reg_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if v not in [self.embed_w] ]) return loss + self.config.reg * reg_loss
def Dense(output_for_title, output_for_content): ''' Get the logits for final classification, note Returns: output_for_title: shape(b_sz, rep_sz)(b_sz, seq_title, class_num) output_for_content: shape(b_sz, seq_content, class_num) ''' batch_size = tf.shape(output_for_title)[0] # batch_dim = self.config.embed_size + self.config.num_filters * len(self.config.filter_sizes) * 3 batch_dim = 2 * self.config.embed_size + self.config.num_filters * len( self.config.filter_sizes) * 3 print(batch_dim) loop_input_title = tf.reshape(output_for_title, [-1, batch_dim]) loop_input_content = tf.reshape(output_for_content, [-1, batch_dim]) if self.config.dense_hidden[-1] != self.config.class_num: raise ValueError( 'last hidden layer should be %d, but get %d' % (self.config.class_num, self.config.dense_hidden[-1])) for i, hid_num in enumerate(self.config.dense_hidden): loop_input_title = TfUtils.linear(loop_input_title, output_size=hid_num, bias=True, scope='dense-tit-layer-%d' % i) if i < len(self.config.dense_hidden) - 1: loop_input_title = tf.nn.relu(loop_input_title) loop_input_content = TfUtils.linear( loop_input_content, output_size=hid_num, bias=True, scope='dense-con-layer-%d' % i) if i < len(self.config.dense_hidden) - 1: loop_input_content = tf.nn.relu(loop_input_content) logits = (tf.reshape(loop_input_title, [batch_size, -1, self.config.class_num]), tf.reshape(loop_input_content, [batch_size, -1, self.config.class_num])) return logits
def domain_layer(output, seq_len): W_classifier = tf.get_variable( shape=[2 * lstm_dim, 2], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(2))), name='W_classifier') bias = tf.Variable(tf.zeros([2], name='class_bias')) output_avg = TfUtils.reduce_avg(output, seq_len, 1) logits = tf.matmul(output_avg, W_classifier) + bias return logits
def basic_cbow_model(inputs): mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(inputs, self.ph_seqLen, dim=1) #b_sz, emb_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.embed_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def func_point_logits(dec_h, enc_ptr, enc_len): ''' Args: dec_h : shape(b_sz, tstp_dec, h_dec_sz) enc_ptr : shape(b_sz, tstp_dec, tstp_enc, Ptr_sz) enc_len : shape(b_sz,) ''' dec_h_ex = tf.expand_dims( dec_h, axis=2) # shape(b_sz, tstp_dec, 1, h_dec_sz) dec_h_ex = tf.tile(dec_h_ex, [1, 1, tstp_enc, 1 ]) # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz) linear_concat = tf.concat(axis=3, values=[ dec_h_ex, enc_ptr ]) # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz+ Ptr_sz) point_linear = TfUtils.last_dim_linear( # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz) linear_concat, output_size=h_dec_sz, bias=False, scope='Ptr_W') point_v = TfUtils.last_dim_linear( # shape(b_sz, tstp_dec, tstp_enc, 1) tf.tanh(point_linear), output_size=1, bias=False, scope='Ptr_V') point_logits = tf.squeeze( point_v, axis=[3]) # shape(b_sz, tstp_dec, tstp_enc) enc_len = tf.expand_dims(enc_len, 1) # shape(b_sz, 1) enc_len = tf.tile(enc_len, [1, tstp_dec]) # shape(b_sz, tstp_dec) mask = TfUtils.mkMask( enc_len, maxLen=tstp_enc) # shape(b_sz, tstp_dec, tstp_enc) point_logits = tf.where( mask, point_logits, # shape(b_sz, tstp_dec, tstp_enc) tf.ones_like(point_logits) * small_num) return point_logits
def basic_lstm_model(inputs): print "Loading basic lstm model.." for i in range(self.config.rnn_numLayers): with tf.variable_scope('rnnLayer'+str(i)): lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size) outputs, _ = tf.nn.dynamic_rnn(lstm_cell, inputs, self.ph_seqLen, #(b_sz, tstp, h_sz) dtype=tf.float32 ,swap_memory=True, scope = 'basic_lstm_model_layer-'+str(i)) inputs = outputs #b_sz, tstp, h_sz mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(outputs, self.ph_seqLen, dim=1) #b_sz, h_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.hidden_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.hidden_size, bias=True, scope='fnn_layer-'+str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def basic_cnn_model(inputs): in_channel = self.config.embed_size filter_sizes = self.config.filter_sizes out_channel = self.config.num_filters input = inputs for layer in range(self.config.cnn_numLayers): with tf.name_scope("conv-layer-"+ str(layer)): conv_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%d" % filter_size): # Convolution Layer filter_shape = [filter_size, in_channel, out_channel] W = tf.get_variable(name='W', shape=filter_shape) b = tf.get_variable(name='b', shape=[out_channel]) conv = tf.nn.conv1d( # size (b_sz, tstp, out_channel) input, W, stride=1, padding="SAME", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") conv_outputs.append(h) input = tf.concat(axis=2, values=conv_outputs) #b_sz, tstp, out_channel*len(filter_sizes) in_channel = out_channel * len(filter_sizes) # Maxpooling # mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp) mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp pooled = tf.reduce_max(input*tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) #(b_sz, out_channel*len(filter_sizes)) #size (b_sz, out_channel*len(filter_sizes)) inputs = tf.reshape(pooled, shape=[b_sz, out_channel*len(filter_sizes)]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-'+str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def snt_encoder_cnn(self, seqInput, seqLen): ''' CNN encoder Args: seqInput: encoder input, shape(b_sz, maxSeqLen, dim_x) seqLen: length for each sequence in the batch Returns: output: shape(b_sz, dim_h) ''' input_shape = tf.shape(seqInput) b_sz = input_shape[0] tstp = input_shape[1] in_channel = self.config.embed_size filter_sizes = self.config.filter_sizes out_channel = self.config.num_filters input = seqInput for layer in range(self.config.cnn_numLayers): with tf.variable_scope("conv-layer-" + str(layer)): conv_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%d" % filter_size): # Convolution Layer filter_shape = [filter_size, in_channel, out_channel] W = tf.get_variable(name='W', shape=filter_shape) b = tf.get_variable(name='b', shape=[out_channel]) conv = tf.nn.conv1d( # size (b_sz, tstp, out_channel) input, W, stride=1, padding="SAME", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") conv_outputs.append(h) input = tf.concat( axis=2, values=conv_outputs ) # b_sz, tstp, out_channel*len(filter_sizes) in_channel = out_channel * len(filter_sizes) mask = TfUtils.mkMask(seqLen, tstp) # b_sz, tstp pooled = tf.reduce_mean( input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) # size (b_sz, out_channel*len(filter_sizes)) snt_enc = tf.reshape(pooled, shape=[b_sz, out_channel * len(filter_sizes)]) return snt_enc, input
def loop_fn(time, cell_output, cell_state, hit_mask): """ Args: cell_output: shape(b_sz, h_dec_sz) ==> d cell_state: tup(shape(b_sz, h_dec_sz)) pointer_logits_ta: pointer logits tensorArray hit_mask: shape(b_sz, tstp_enc) """ if cell_output is None: # time == 0 next_cell_state = init_state next_input = bos # shape(b_sz, dec_emb_sz) next_idx = tf.zeros(shape=[b_sz], dtype=tf.int32) # shape(b_sz, tstp_enc) elements_finished = tf.zeros(shape=[b_sz], dtype=tf.bool, name='elem_finished') next_hit_mask = tf.zeros(shape=[b_sz, tstp_enc], dtype=tf.bool, name='hit_mask') else: next_cell_state = cell_state encoder_e = enc( cell_output, encoder_inputs, enc_lengths) # shape(b_sz, tstp_enc, dec_emb_sz) next_idx = func_point_idx(cell_output, encoder_e, enc_lengths, hit_mask) # shape(b_sz,) cur_hit_mask = tf.one_hot( next_idx, on_value=True, # shape(b_sz, tstp_enc) off_value=False, depth=tstp_enc, dtype=tf.bool) next_hit_mask = tf.logical_or( hit_mask, cur_hit_mask, # shape(b_sz, tstp_enc) name='next_hit_mask') next_input = TfUtils.batch_embed_lookup( encoder_inputs, next_idx) # shape(b_sz, s_emb_sz) elements_finished = (time >= dec_lengths) # shape(b_sz,) return (elements_finished, next_input, next_cell_state, next_hit_mask, next_idx)
def snt_encoder_cbow(self, seqInput, seqLen): ''' Take the average word representation as sentence representation Args: seqInput: encoder input, shape(b_sz, maxSeqLen, dim_x) seqLen: length for each sequence in the batch Returns: output: shape(b_sz, dim_h) ''' aggregate_state = TfUtils.reduce_avg(seqInput, seqLen, dim=1) # b_sz, emb_sz return aggregate_state
def snt_encoder_lstm_avg(self, seqInput, seqLen): ''' Take the average of output as sentence representation Args: seqInput: encoder input, shape(b_sz, maxSeqLen, dim_x) seqLen: length for each sequence in the batch Returns: output: shape(b_sz, dim_h) ''' lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size) output, states = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=seqInput, sequence_length=seqLen, dtype=tf.float32, swap_memory=True, scope='snt_enc') snt_enc = TfUtils.reduce_avg(output, lengths=seqLen, dim=1) return snt_enc
def enc(dec_h, in_x, lengths, fake_call=False): ''' Args: inputs: shape(b_sz, tstp_enc, enc_emb_sz) ''' def func_f(in_x, in_h, in_h_hat, fake_call=False): if fake_call: return s_emb_sz + h_enc_sz * 4 in_x_sz = int(in_x.get_shape()[-1]) in_h_sz = int(in_h.get_shape()[-1]) if not in_x_sz: assert ValueError('last dimension of the first' + ' arg should be known, while got %s' % (str(type(in_x_sz)))) if not in_h_sz: assert ValueError('last dimension of the second' + ' arg should be known, while got %s' % (str(type(in_h_sz)))) res = tf.concat(2, [in_x, in_h, in_h_hat]) return res if fake_call: return func_f(None, None, None, fake_call=True) inputs = func_enc_input(dec_h, in_x) lstm_out, _ = tf.nn.bidirectional_dynamic_rnn(cell_enc, cell_enc, inputs, lengths, swap_memory=True, dtype=tf.float32, scope='sent_encoder') enc_out = tf.concat(2, lstm_out) # shape(b_sz, tstp_enc, h_enc_sz*2) enc_out = tf.reshape(enc_out, [b_sz, tstp_enc, h_enc_sz * 2]) enc_out_hat = TfUtils.self_attn(enc_out, lengths) res = func_f(in_x, enc_out, enc_out_hat) return res # shape(b_sz, tstp_enc, dec_emb_sz)
def attend(enc_h, enc_len): ''' Args: enc_h: shape(b_sz, tstp_dec, tstp_enc, h_enc_sz*2) enc_len: shape(b_sz) ''' enc_len = tf.expand_dims(enc_len, 1) # shape(b_sz, 1) attn_enc_len = tf.tile(enc_len, [1, tstp_dec]) attn_enc_len = tf.reshape(attn_enc_len, [b_sz * tstp_dec]) attn_enc_h = tf.reshape( enc_h, # shape(b_sz*tstp_dec, tstp_enc, h_enc_sz*2) [b_sz * tstp_dec, tstp_enc, np.int(enc_h.get_shape()[-1])]) attn_out = TfUtils.self_attn( # shape(b_sz*tstp_dec, tstp_enc, h_enc_sz*2) attn_enc_h, attn_enc_len) h_hat = tf.reshape( attn_out, # shape(b_sz, tstp_dec, tstp_enc, h_enc_sz*2) [ b_sz, tstp_dec, tstp_enc, np.int(attn_out.get_shape()[-1]) ]) return h_hat