def basic_lstm_model(inputs): print "Loading basic lstm model.." for i in range(self.config.rnn_numLayers): with tf.variable_scope('rnnLayer' + str(i)): lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size) outputs, _ = tf.nn.dynamic_rnn( lstm_cell, inputs, self.ph_seqLen, #(b_sz, tstp, h_sz) dtype=tf.float32, swap_memory=True, scope='basic_lstm_model_layer-' + str(i)) inputs = outputs #b_sz, tstp, h_sz mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(outputs, self.ph_seqLen, dim=1) #b_sz, h_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.hidden_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.hidden_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def func_point_logits(dec_h, enc_e, enc_len): ''' Args: dec_h : shape(b_sz, h_dec_sz) enc_e : shape(b_sz, tstp_enc, dec_emb_sz) enc_len : shape(b_sz,) ''' dec_h_ex = tf.expand_dims(dec_h, dim=1) # shape(b_sz, 1, h_dec_sz) dec_h_ex = tf.tile( dec_h_ex, [1, tstp_enc, 1]) # shape(b_sz, tstp_enc, h_dec_sz) linear_concat = tf.concat( 2, [dec_h_ex, enc_e ]) # shape(b_sz, tstp_enc, h_dec_sz+ dec_emb_sz) point_linear = TfUtils.last_dim_linear( # shape(b_sz, tstp_enc, h_dec_sz) linear_concat, output_size=h_dec_sz, bias=False, scope='Ptr_W') point_v = TfUtils.last_dim_linear( # shape(b_sz, tstp_enc, 1) tf.tanh(point_linear), output_size=1, bias=False, scope='Ptr_V') point_logits = tf.squeeze(point_v, squeeze_dims=[2 ]) # shape(b_sz, tstp_enc) mask = TfUtils.mkMask(enc_len, maxLen=tstp_enc) # shape(b_sz, tstp_enc) point_logits = tf.select(mask, point_logits, tf.ones_like(point_logits) * small_num) # shape(b_sz, tstp_enc) return point_logits
def basic_cnn_model(inputs): in_channel = self.config.embed_size filter_sizes = self.config.filter_sizes out_channel = self.config.num_filters input = inputs for layer in range(self.config.cnn_numLayers): with tf.name_scope("conv-layer-" + str(layer)): conv_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%d" % filter_size): # Convolution Layer filter_shape = [ filter_size, in_channel, out_channel ] W = tf.get_variable(name='W', shape=filter_shape) b = tf.get_variable(name='b', shape=[out_channel]) conv = tf.nn.conv1d( # size (b_sz, tstp, out_channel) input, W, stride=1, padding="SAME", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") conv_outputs.append(h) input = tf.concat( axis=2, values=conv_outputs ) #b_sz, tstp, out_channel*len(filter_sizes) in_channel = out_channel * len(filter_sizes) # Maxpooling # mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp) mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp pooled = tf.reduce_max( input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) #(b_sz, out_channel*len(filter_sizes)) #size (b_sz, out_channel*len(filter_sizes)) inputs = tf.reshape(pooled, shape=[b_sz, out_channel * len(filter_sizes)]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def average_sentence_as_vector(fetch_output, lengths): """ fetch_output: shape=(batch_size, num_sentence, len_sentence, embed_size) lengths: shape=(batch_size, num_sentence) maxLen: scalar """ mask = TfUtils.mkMask( lengths, tf.shape(fetch_output)[-2]) #(batch_size, num_sentence, len_sentence) avg = TfUtils.reduce_avg(fetch_output, tf.expand_dims(mask, -1), tf.expand_dims(lengths, -1), -2) #(batch_size, num_sentence, embed_size) return avg
def snt_encoder_cnn(self, seqInput, seqLen): ''' CNN encoder Args: seqInput: encoder input, shape(b_sz, maxSeqLen, dim_x) seqLen: length for each sequence in the batch Returns: output: shape(b_sz, dim_h) ''' input_shape = tf.shape(seqInput) b_sz = input_shape[0] tstp = input_shape[1] in_channel = self.config.embed_size filter_sizes = self.config.filter_sizes out_channel = self.config.num_filters input = seqInput for layer in range(self.config.cnn_numLayers): with tf.variable_scope("conv-layer-" + str(layer)): conv_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%d" % filter_size): # Convolution Layer filter_shape = [filter_size, in_channel, out_channel] W = tf.get_variable(name='W', shape=filter_shape) b = tf.get_variable(name='b', shape=[out_channel]) conv = tf.nn.conv1d( # size (b_sz, tstp, out_channel) input, W, stride=1, padding="SAME", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") conv_outputs.append(h) input = tf.concat( axis=2, values=conv_outputs ) # b_sz, tstp, out_channel*len(filter_sizes) in_channel = out_channel * len(filter_sizes) mask = TfUtils.mkMask(seqLen, tstp) # b_sz, tstp pooled = tf.reduce_mean( input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) # size (b_sz, out_channel*len(filter_sizes)) snt_enc = tf.reshape(pooled, shape=[b_sz, out_channel * len(filter_sizes)]) return snt_enc, input
def basic_cbow_model(inputs): mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(inputs, self.ph_seqLen, dim=1) #b_sz, emb_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.embed_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-'+str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def basic_cbow_model(inputs): mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(inputs, self.ph_seqLen, dim=1) #b_sz, emb_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.embed_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def func_point_logits(dec_h, enc_ptr, enc_len): ''' Args: dec_h : shape(b_sz, tstp_dec, h_dec_sz) enc_ptr : shape(b_sz, tstp_dec, tstp_enc, Ptr_sz) enc_len : shape(b_sz,) ''' dec_h_ex = tf.expand_dims( dec_h, axis=2) # shape(b_sz, tstp_dec, 1, h_dec_sz) dec_h_ex = tf.tile(dec_h_ex, [1, 1, tstp_enc, 1 ]) # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz) linear_concat = tf.concat(axis=3, values=[ dec_h_ex, enc_ptr ]) # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz+ Ptr_sz) point_linear = TfUtils.last_dim_linear( # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz) linear_concat, output_size=h_dec_sz, bias=False, scope='Ptr_W') point_v = TfUtils.last_dim_linear( # shape(b_sz, tstp_dec, tstp_enc, 1) tf.tanh(point_linear), output_size=1, bias=False, scope='Ptr_V') point_logits = tf.squeeze( point_v, axis=[3]) # shape(b_sz, tstp_dec, tstp_enc) enc_len = tf.expand_dims(enc_len, 1) # shape(b_sz, 1) enc_len = tf.tile(enc_len, [1, tstp_dec]) # shape(b_sz, tstp_dec) mask = TfUtils.mkMask( enc_len, maxLen=tstp_enc) # shape(b_sz, tstp_dec, tstp_enc) point_logits = tf.where( mask, point_logits, # shape(b_sz, tstp_dec, tstp_enc) tf.ones_like(point_logits) * small_num) return point_logits
def basic_lstm_model(inputs): print "Loading basic lstm model.." for i in range(self.config.rnn_numLayers): with tf.variable_scope('rnnLayer'+str(i)): lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size) outputs, _ = tf.nn.dynamic_rnn(lstm_cell, inputs, self.ph_seqLen, #(b_sz, tstp, h_sz) dtype=tf.float32 ,swap_memory=True, scope = 'basic_lstm_model_layer-'+str(i)) inputs = outputs #b_sz, tstp, h_sz mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(outputs, self.ph_seqLen, dim=1) #b_sz, h_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.hidden_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.hidden_size, bias=True, scope='fnn_layer-'+str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def basic_cnn_model(inputs): in_channel = self.config.embed_size filter_sizes = self.config.filter_sizes out_channel = self.config.num_filters input = inputs for layer in range(self.config.cnn_numLayers): with tf.name_scope("conv-layer-"+ str(layer)): conv_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%d" % filter_size): # Convolution Layer filter_shape = [filter_size, in_channel, out_channel] W = tf.get_variable(name='W', shape=filter_shape) b = tf.get_variable(name='b', shape=[out_channel]) conv = tf.nn.conv1d( # size (b_sz, tstp, out_channel) input, W, stride=1, padding="SAME", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") conv_outputs.append(h) input = tf.concat(axis=2, values=conv_outputs) #b_sz, tstp, out_channel*len(filter_sizes) in_channel = out_channel * len(filter_sizes) # Maxpooling # mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp) mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp pooled = tf.reduce_max(input*tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) #(b_sz, out_channel*len(filter_sizes)) #size (b_sz, out_channel*len(filter_sizes)) inputs = tf.reshape(pooled, shape=[b_sz, out_channel*len(filter_sizes)]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-'+str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits