def add_prediction_op(self): print "***Building network with ReLU activation***" x = self.add_embedding() with tf.variable_scope("layer_connections"): with tf.variable_scope("layer_1"): w1 = xavier_initializer((self.config.num_features_types * self.config.embedding_dim, self.config.hidden_size), "w1") b1 = xavier_initializer((self.config.hidden_size,), "bias1") # for visualization preactivations = tf.add(tf.matmul(x, w1), b1, name="preactivations") tf.summary.histogram("preactivations", preactivations) non_positive_activation_fraction = tf.reduce_mean(tf.cast(tf.less_equal(preactivations, 0), tf.float32)) tf.summary.scalar("non_negative_activations_fraction", non_positive_activation_fraction) h1 = tf.nn.dropout(tf.nn.relu(preactivations), keep_prob=self.dropout_placeholder, name="output_activations") with tf.variable_scope("layer_2"): w2 = xavier_initializer((self.config.hidden_size, self.config.num_classes), "w2") b2 = xavier_initializer((self.config.num_classes,), "bias2") with tf.variable_scope("predictions"): predictions = tf.add(tf.matmul(h1, w2), b2, name="prediction_logits") return predictions
def add_prediction_op(self): print "***Building network with ReLU activation***" word_context_embeddings, word_context_embeddings_expanded, \ char_context_embeddings, char_context_embeddings_expanded = self.add_embedding() # step - 1 :: CNN over characters pooled_char_outputs = [] # char CNN for i, char_filter_size in enumerate(self.config.char_filter_sizes): with tf.variable_scope("char-conv-maxpool-%s" % char_filter_size): # Convolution Layer filter_shape = [ 1, char_filter_size, self.config.char_embedding_dim, 1, self.config.char_num_filters ] # [H, W, in_c, out_c] # try xavior also # filter = random_truncated_normal_initializer(filter_shape, "filter", stddev=0.1) filter = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant( 0.1, shape=[self.config.char_num_filters]), name="conv_bias") conv = tf.nn.conv3d(char_context_embeddings_expanded, filter, strides=self.config.char_stride, padding="VALID", name="conv") print "conv shape:", conv.get_shape().as_list() h = tf.nn.tanh(tf.nn.bias_add(conv, b), name="relu") # [B, new_H, new_W, out_c] # h_batch_norm = tf.contrib.layers.batch_norm(h, # center=True, scale=True, # is_training=self.is_training, # scope='bn') h_shape = h.get_shape().as_list() h_4d = tf.reshape(h, [-1, h_shape[2], h_shape[3], h_shape[4]], "char_4d_h") pooled = tf.nn.max_pool( h_4d, ksize=[1, h_4d.get_shape().as_list()[1], 1, 1], # why k_size[2] = 1? strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_shape = pooled.get_shape().as_list() pooled_5d = tf.reshape(pooled, [ -1, h_shape[1], pooled_shape[1], pooled_shape[2], pooled_shape[3] ]) pooled_char_outputs.append(pooled_5d) char_num_filters_total = self.config.char_num_filters * len( self.config.char_filter_sizes) self.h_pool_char = tf.concat( pooled_char_outputs, 4) # collect across all output channels [B, T, o_h, o_w, o_c] self.h_pool_char_flat = tf.reshape( self.h_pool_char, [-1, self.config.max_seq_len, char_num_filters_total ]) # [B, T, num_features] # Step -2 :: Highway layer(s) over char-CNN if self.config.use_highway_layer: print("***Adding Highway Layer on top of char CNN***") curr_input = self.h_pool_char_flat for i in range(self.config.num_highway_layers): curr_input_2d = tf.reshape(curr_input, [-1, char_num_filters_total]) # Highway Layer with tf.variable_scope("highway_layer_" + str(i + 1)): with tf.variable_scope( "transform_gate" ): # use negative bias = -1 (ref: paper, blogs) W_T = xavier_initializer( (char_num_filters_total, char_num_filters_total), "W_T") # b_T = xavier_initializer((char_num_filters_total,), "bias_T") b_T = tf.Variable(tf.constant( -2., shape=[ char_num_filters_total, ]), name="bias_T") activations_T = tf.nn.sigmoid( tf.nn.xw_plus_b(curr_input_2d, W_T, b_T, name="transform_activations")) print("transformed activations shape: {}".format( activations_T.get_shape().as_list())) with tf.variable_scope("output_gate"): W = xavier_initializer( (char_num_filters_total, char_num_filters_total), "W") b = xavier_initializer((char_num_filters_total, ), "bias") activations_output = tf.nn.relu( tf.nn.xw_plus_b(curr_input_2d, W, b, name="out_activations")) print("output activations shape: {}".format( activations_output.get_shape().as_list())) activations_carry = 1. - activations_T highway_output = activations_T * activations_output + activations_carry * curr_input_2d curr_input = highway_output self.highway_output = tf.reshape( highway_output, [-1, self.config.max_seq_len, char_num_filters_total], name="highway_output") pooled_char_cnn_word_outputs = [] if self.config.use_highway_layer: char_word_context_embeddings = self.highway_output else: char_word_context_embeddings = self.h_pool_char_flat # Step-3: concat features self.context_embeddings = tf.expand_dims( tf.concat([word_context_embeddings, char_word_context_embeddings], 2), -1) feature_vec_len = self.context_embeddings.get_shape().as_list()[2] # Step - 4: word-CNN pooled_word_outputs = [] # word CNN for i, word_filter_size in enumerate(self.config.word_filter_sizes): with tf.variable_scope("word-conv-maxpool-%s" % word_filter_size): # Convolution Layer filter_shape = [ word_filter_size, feature_vec_len, 1, self.config.word_num_filters ] # [H, W, in_c, out_c] # try xavior also # filter = random_truncated_normal_initializer(filter_shape, "filter", stddev=0.1) filter = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant( 0.1, shape=[self.config.word_num_filters]), name="conv_bias") conv = tf.nn.conv2d(self.context_embeddings, filter, strides=self.config.word_stride, padding="VALID", name="conv") print "conv shape:", conv.get_shape().as_list() h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # [B, new_H, new_W, out_c] # h_batch_norm = tf.contrib.layers.batch_norm(h, # center=True, scale=True, # is_training=self.is_training, # scope='bn') pooled = tf.nn.max_pool( h, ksize=[1, h.get_shape().as_list()[1], 1, 1], # why k_size[2] = 1? strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_word_outputs.append(pooled) # Combine all the pooled features word_num_filters_total = self.config.word_num_filters * len( self.config.word_filter_sizes) self.h_pool_word = tf.concat( pooled_word_outputs, 3) # collect across all output channels [B, T, out_H, out_d] self.h_pool_word_2d = tf.reshape( self.h_pool_word, [-1, word_num_filters_total]) # [B, num_features] activations = self.h_pool_word_2d feature_vec_len = word_num_filters_total # Step-5: FC layer + dropout if self.config.use_fc_layer: # Final (unnormalized) scores and predictions with tf.variable_scope("fc_layer"): W = xavier_initializer( (feature_vec_len, self.config.fc_layer_dim), "W") b = xavier_initializer((self.config.fc_layer_dim, ), "bias") activations = tf.nn.dropout( tf.nn.relu( tf.nn.xw_plus_b(self.h_pool_word_2d, W, b, name="prediction_logits")), keep_prob=self.dropout_placeholder_fc) feature_vec_len = activations.get_shape().as_list()[1] # Step-6: softmax layer with tf.variable_scope("output_layer"): W1 = xavier_initializer((feature_vec_len, self.config.num_classes), "W") b1 = xavier_initializer((self.config.num_classes, ), "bias") predictions = tf.nn.xw_plus_b(tf.nn.dropout( activations, keep_prob=self.dropout_placeholder_word), W1, b1, name="prediction_logits") return predictions