def basic_lstm_model(inputs): print "Loading basic lstm model.." for i in range(self.config.rnn_numLayers): with tf.variable_scope('rnnLayer' + str(i)): lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size) outputs, _ = tf.nn.dynamic_rnn( lstm_cell, inputs, self.ph_seqLen, #(b_sz, tstp, h_sz) dtype=tf.float32, swap_memory=True, scope='basic_lstm_model_layer-' + str(i)) inputs = outputs #b_sz, tstp, h_sz mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(outputs, self.ph_seqLen, dim=1) #b_sz, h_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.hidden_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.hidden_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def basic_cnn_model(inputs): in_channel = self.config.embed_size filter_sizes = self.config.filter_sizes out_channel = self.config.num_filters input = inputs for layer in range(self.config.cnn_numLayers): with tf.name_scope("conv-layer-" + str(layer)): conv_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%d" % filter_size): # Convolution Layer filter_shape = [ filter_size, in_channel, out_channel ] W = tf.get_variable(name='W', shape=filter_shape) b = tf.get_variable(name='b', shape=[out_channel]) conv = tf.nn.conv1d( # size (b_sz, tstp, out_channel) input, W, stride=1, padding="SAME", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") conv_outputs.append(h) input = tf.concat( axis=2, values=conv_outputs ) #b_sz, tstp, out_channel*len(filter_sizes) in_channel = out_channel * len(filter_sizes) # Maxpooling # mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp) mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp pooled = tf.reduce_max( input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) #(b_sz, out_channel*len(filter_sizes)) #size (b_sz, out_channel*len(filter_sizes)) inputs = tf.reshape(pooled, shape=[b_sz, out_channel * len(filter_sizes)]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def basic_cbow_model(inputs): mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(inputs, self.ph_seqLen, dim=1) #b_sz, emb_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.embed_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-'+str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def Dense(output_for_title, output_for_content): ''' Get the logits for final classification, note Returns: output_for_title: shape(b_sz, rep_sz)(b_sz, seq_title, class_num) output_for_content: shape(b_sz, seq_content, class_num) ''' batch_size = tf.shape(output_for_title)[0] # batch_dim = self.config.embed_size + self.config.num_filters * len(self.config.filter_sizes) * 3 batch_dim = 2 * self.config.embed_size + self.config.num_filters * len( self.config.filter_sizes) * 3 print(batch_dim) loop_input_title = tf.reshape(output_for_title, [-1, batch_dim]) loop_input_content = tf.reshape(output_for_content, [-1, batch_dim]) if self.config.dense_hidden[-1] != self.config.class_num: raise ValueError( 'last hidden layer should be %d, but get %d' % (self.config.class_num, self.config.dense_hidden[-1])) for i, hid_num in enumerate(self.config.dense_hidden): loop_input_title = TfUtils.linear(loop_input_title, output_size=hid_num, bias=True, scope='dense-tit-layer-%d' % i) if i < len(self.config.dense_hidden) - 1: loop_input_title = tf.nn.relu(loop_input_title) loop_input_content = TfUtils.linear( loop_input_content, output_size=hid_num, bias=True, scope='dense-con-layer-%d' % i) if i < len(self.config.dense_hidden) - 1: loop_input_content = tf.nn.relu(loop_input_content) logits = (tf.reshape(loop_input_title, [batch_size, -1, self.config.class_num]), tf.reshape(loop_input_content, [batch_size, -1, self.config.class_num])) return logits
def basic_cbow_model(inputs): mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(inputs, self.ph_seqLen, dim=1) #b_sz, emb_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.embed_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def basic_lstm_model(inputs): print "Loading basic lstm model.." for i in range(self.config.rnn_numLayers): with tf.variable_scope('rnnLayer'+str(i)): lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size) outputs, _ = tf.nn.dynamic_rnn(lstm_cell, inputs, self.ph_seqLen, #(b_sz, tstp, h_sz) dtype=tf.float32 ,swap_memory=True, scope = 'basic_lstm_model_layer-'+str(i)) inputs = outputs #b_sz, tstp, h_sz mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1 aggregate_state = TfUtils.reduce_avg(outputs, self.ph_seqLen, dim=1) #b_sz, h_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.hidden_size]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.hidden_size, bias=True, scope='fnn_layer-'+str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def basic_cnn_model(inputs): in_channel = self.config.embed_size filter_sizes = self.config.filter_sizes out_channel = self.config.num_filters input = inputs for layer in range(self.config.cnn_numLayers): with tf.name_scope("conv-layer-"+ str(layer)): conv_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%d" % filter_size): # Convolution Layer filter_shape = [filter_size, in_channel, out_channel] W = tf.get_variable(name='W', shape=filter_shape) b = tf.get_variable(name='b', shape=[out_channel]) conv = tf.nn.conv1d( # size (b_sz, tstp, out_channel) input, W, stride=1, padding="SAME", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") conv_outputs.append(h) input = tf.concat(axis=2, values=conv_outputs) #b_sz, tstp, out_channel*len(filter_sizes) in_channel = out_channel * len(filter_sizes) # Maxpooling # mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp) mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp pooled = tf.reduce_max(input*tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) #(b_sz, out_channel*len(filter_sizes)) #size (b_sz, out_channel*len(filter_sizes)) inputs = tf.reshape(pooled, shape=[b_sz, out_channel*len(filter_sizes)]) for i in range(self.config.fnn_numLayers): inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-'+str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def get_initial_state(hidden_sz): ''' Args: hidden_sz: must be a python determined number ''' avg_in_x = TfUtils.reduce_avg( encoder_inputs, # shape(b_sz, s_emb_sz) enc_lengths, dim=1) state = TfUtils.linear( avg_in_x, hidden_sz, # shape(b_sz, hidden_sz) bias=False, scope='initial_transformation') state = rnn_cell.LSTMStateTuple(state, tf.zeros_like(state)) return state