def __init__(self, is_trainable=True): self.model_name = ModelName.resnet_clf super(ResnetClf, self).__init__(model_name=self.model_name, is_trainable=True, train_last_block=False) # self.base_image_model = Resnet(restore=False, is_trainable=False, train_last_block=False) self.resnet_representation = self.avg_pool_representation activation = tf.nn.relu self.initializer_type = 'normal' if activation == tf.nn.relu else 'xavier' dim_D = 2048 with tf.variable_scope(self.model_name.name): # FC1 [dim_d] -> [512] with tf.variable_scope('image_fc1'): W_fc1 = weight_variable(is_trainable=is_trainable, shape=[dim_D, 512], initializer_type=self.initializer_type, name='W_fc1') b_fc1 = bias_variable(is_trainable=is_trainable, shape=[512], name='b_fc1') if use_batch_norm: h_fc1 = batch_norm_dense_activation( inputs=tf.matmul(self.resnet_representation, W_fc1) + b_fc1, is_training=self.train_mode, activation=activation, is_trainable=is_trainable) else: h_fc1 = tf.nn.relu( tf.matmul(self.resnet_representation, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, self.dropout_keep_prob) # Softmax with tf.variable_scope('softmax'): W_softmax = weight_variable( is_trainable=is_trainable, shape=[512, NUM_CLASSES], initializer_type=self.initializer_type) b_softmax = bias_variable(is_trainable=is_trainable, shape=[NUM_CLASSES]) self.scores = tf.matmul(h_fc1_drop, W_softmax) + b_softmax with tf.variable_scope('optimization'): # Finalize the predictions, the optimizing function, loss/accuracy stats etc. self._set_predictions_optimizer_and_loss()
def retrieveBiases(sess, shape, layer_name): bias_values = None with tf.name_scope(layer_name): with tf.variable_scope('biases', reuse=True): biases = bias_variable(shape, layer_name) bias_values = sess.run(biases) return bias_values
def __init__(self, is_primary_model=True, is_trainable=True, train_last_layers=False): model_name = ModelName.image_cnn_v2 # with tf.variable_scope(model_name.name): super(ImageCNNV2, self).__init__(model_name=model_name, is_primary_model=is_primary_model) last_pool_image_dim = int(IMAGE_SIZE / 32) # MODEL CONFIG: self.config = { 'USE_AVG_POOLING': True, 'IMAGE_SIZE': IMAGE_SIZE, 'activation': tf.nn.relu, } activation = self.config['activation'] # MODEL DEFINITION # ================================================================================ # conv 1 # padding='SAME' # [IMG,IMG,3] -> [IMG,IMG,32] with tf.variable_scope('image_conv1'): # [filter_size, filter_size, channel_size, num_filters] W_conv1 = weight_variable(is_trainable=is_trainable, shape=[7, 7, 3, 32]) b_conv1 = bias_variable(is_trainable=is_trainable, shape=[32]) if use_batch_norm: self.h_conv1 = batch_norm_conv_activation( is_trainable=is_trainable, inputs=conv2d(x=self.x_image, W=W_conv1) + b_conv1, is_training=self.train_mode, activation=activation) else: self.h_conv1 = activation( conv2d(x=self.x_image, W=W_conv1) + b_conv1) # conv1-pool 1 # [IMG,IMG,32] -> [IMG/2,IMG/2,32] # ksize=[1,2,2,1], strides=[1,2,2,1] with tf.variable_scope('image_pool1'): self.h_pool1 = max_pool(self.h_conv1) # # norm # with tf.variable_scope('image_norm1'): # h_norm1 = lr_norm(self.h_pool1, 4) # conv2 # [IMG/2,IMG/2,32] -> [IMG/2,IMG/2,64] with tf.variable_scope('image_conv2'): # [filter_size, filter_size, channel_size, num_filters] W_conv2 = weight_variable(is_trainable=is_trainable, shape=[5, 5, 32, 64]) b_conv2 = bias_variable(is_trainable=is_trainable, shape=[64]) if use_batch_norm: self.h_conv2 = batch_norm_conv_activation( is_trainable=is_trainable, inputs=conv2d(x=self.h_pool1, W=W_conv2) + b_conv2, is_training=self.train_mode, activation=activation) else: self.h_conv2 = activation( conv2d(x=self.h_pool1, W=W_conv2) + b_conv2) # conv3 # [IMG/2,IMG/2,64] -> [IMG/2,IMG/2,64] with tf.variable_scope('image_conv3'): # [filter_size, filter_size, channel_size, num_filters] W_conv3 = weight_variable(is_trainable=is_trainable, shape=[5, 5, 64, 64]) b_conv3 = bias_variable(is_trainable=is_trainable, shape=[64]) if use_batch_norm: self.h_conv3 = batch_norm_conv_activation( is_trainable=is_trainable, inputs=conv2d(x=self.h_conv2, W=W_conv3) + b_conv3, is_training=self.train_mode, activation=activation) else: self.h_conv3 = activation( conv2d(x=self.h_conv2, W=W_conv3) + b_conv3) # conv3-pool2 # [IMG/2,IMG/2,64] -> [IMG/4,IMG/4,64] with tf.variable_scope('image_pool2'): self.h_pool2 = max_pool(self.h_conv3) # conv4 # [IMG/4,IMG/4,64] -> [IMG/4,IMG/4,128] with tf.variable_scope('image_conv4'): # [filter_size, filter_size, channel_size, num_filters] W_conv4 = weight_variable(is_trainable=is_trainable, shape=[3, 3, 64, 128]) b_conv4 = bias_variable(is_trainable=is_trainable, shape=[128]) if use_batch_norm: self.h_conv4 = batch_norm_conv_activation( is_trainable=is_trainable, inputs=conv2d(x=self.h_pool2, W=W_conv4) + b_conv4, is_training=self.train_mode, activation=activation) else: self.h_conv4 = activation( conv2d(x=self.h_pool2, W=W_conv4) + b_conv4) # conv5 # [IMG/4,IMG/4,128] -> [IMG/4,IMG/4,128] with tf.variable_scope('image_conv5'): # [filter_size, filter_size, channel_size, num_filters] W_conv5 = weight_variable(is_trainable=is_trainable, shape=[3, 3, 128, 128]) b_conv5 = bias_variable(is_trainable=is_trainable, shape=[128]) if use_batch_norm: self.h_conv5 = batch_norm_conv_activation( is_trainable=is_trainable, inputs=conv2d(x=self.h_conv4, W=W_conv5) + b_conv5, is_training=self.train_mode, activation=activation) else: self.h_conv5 = activation( conv2d(x=self.h_conv4, W=W_conv5) + b_conv5) # conv5-pool3 # [IMG/4,IMG/4,128] -> [IMG/8,IMG/8,128] with tf.variable_scope('image_pool3'): self.h_pool3 = max_pool(self.h_conv5) last_layers_trainable = is_trainable or train_last_layers # conv6 # [IMG/8,IMG/8,128] -> [IMG/8,IMG/8,256] with tf.variable_scope('image_conv6'): # [filter_size, filter_size, channel_size, num_filters] W_conv6 = weight_variable(is_trainable=is_trainable, shape=[3, 3, 128, 256]) b_conv6 = bias_variable(is_trainable=is_trainable, shape=[256]) if use_batch_norm: self.h_conv6 = batch_norm_conv_activation( is_trainable=last_layers_trainable, inputs=conv2d(x=self.h_pool3, W=W_conv6) + b_conv6, is_training=self.train_mode, activation=activation) else: self.h_conv6 = activation( conv2d(x=self.h_pool3, W=W_conv6) + b_conv6) # conv7 # [IMG/8,IMG/8,256] -> [IMG/8,IMG/8,256] with tf.variable_scope('image_conv7'): # [filter_size, filter_size, channel_size, num_filters] W_conv7 = weight_variable(is_trainable=is_trainable, shape=[3, 3, 256, 256]) b_conv7 = bias_variable(is_trainable=is_trainable, shape=[256]) if use_batch_norm: self.h_conv7 = batch_norm_conv_activation( is_trainable=last_layers_trainable, inputs=conv2d(x=self.h_conv6, W=W_conv7) + b_conv7, is_training=self.train_mode, activation=activation) else: self.h_conv7 = activation( conv2d(x=self.h_conv6, W=W_conv7) + b_conv7) # conv7-pool4 # [IMG/8,IMG/8,256] -> [IMG/16,IMG/16,256] with tf.variable_scope('image_pool4'): self.h_pool4 = max_pool(self.h_conv7) # conv8 # [IMG/16,IMG/16,256] -> [IMG/16,IMG/16,512] with tf.variable_scope('image_conv8'): # [filter_size, filter_size, channel_size, num_filters] W_conv8 = weight_variable(is_trainable=last_layers_trainable, shape=[3, 3, 256, 512]) b_conv8 = bias_variable(is_trainable=last_layers_trainable, shape=[512]) if use_batch_norm: self.h_conv8 = batch_norm_conv_activation( is_trainable=last_layers_trainable, inputs=conv2d(x=self.h_pool4, W=W_conv8) + b_conv8, is_training=self.train_mode, activation=activation) else: self.h_conv8 = activation( conv2d(x=self.h_pool4, W=W_conv8) + b_conv8) if self.config['USE_AVG_POOLING']: # conv8-avgPool # [IMG/16, IMG/16, 512] -> [512] with tf.variable_scope('image_avg_pool'): self.h_pool5_flat = tf.reduce_mean(self.h_conv8, reduction_indices=[1, 2], name="avg_pool") else: # conv8-pool5 # [IMG/16,IMG/16,512] -> [IMG/32,IMG/32,512] with tf.variable_scope('image_pool5'): self.h_pool5 = max_pool(self.h_conv8) # Flatten last pool layer self.h_pool5_flat = tf.reshape(self.h_pool5, shape=[ -1, last_pool_image_dim * last_pool_image_dim * 512 ], name='h_pool5_flat') if not self.config['USE_AVG_POOLING']: # FC0 [image_dim*image_dim*512] -> [512] with tf.variable_scope('image_fc0'): W_fc0 = weight_variable(is_trainable=is_trainable, shape=[ last_pool_image_dim * last_pool_image_dim * 512, 512 ], name='W_fc0') b_fc0 = bias_variable(is_trainable=is_trainable, shape=[512], name='b_fc0') if use_batch_norm: self.h_fc0 = batch_norm_dense_activation( inputs=tf.nn.xw_plus_b(x=self.h_pool5_flat, weights=W_fc0, biases=b_fc0), is_training=self.train_mode, activation=activation, is_trainable=is_trainable) else: self.h_fc0 = activation( tf.matmul(self.h_pool5_flat, W_fc0) + b_fc0) self.h_fc0_drop = tf.nn.dropout(self.h_fc0, self.dropout_keep_prob) last_layer = self.h_fc0_drop else: last_layer = self.h_pool5_flat if is_trainable: # FC1 [512] -> [256] with tf.variable_scope('image_fc1'): W_fc1 = weight_variable(is_trainable=is_trainable, shape=[512, 256]) b_fc1 = bias_variable(is_trainable=is_trainable, shape=[256]) if use_batch_norm: self.h_fc1 = batch_norm_dense_activation( inputs=tf.nn.xw_plus_b(x=last_layer, weights=W_fc1, biases=b_fc1), is_training=self.train_mode, activation=activation, is_trainable=is_trainable) else: self.h_fc1 = tf.nn.relu( tf.matmul(last_layer, W_fc1) + b_fc1) # dropout self.h_fc1_drop = tf.nn.dropout(self.h_fc1, self.dropout_keep_prob) # Softmax with tf.variable_scope('softmax'): self.W_softmax = weight_variable(is_trainable=is_trainable, shape=[256, NUM_CLASSES]) self.b_softmax = bias_variable(is_trainable=is_trainable, shape=[NUM_CLASSES]) self.probabilities = tf.nn.softmax( tf.matmul(self.h_fc1_drop, self.W_softmax) + self.b_softmax) # Finalize the predictions, the optimizing function, loss/accuracy stats etc. if self.is_primary_model: print("%s is a primary model, making optimizations" % self.model_name.name) self._set_predictions_optimizer_and_loss() else: print("%s not primary model, skipping optimizations" % self.model_name.name)
def __init__(self, is_trainable=True): super(ImageCNN, self).__init__(model_name=ModelName.image_cnn) # MODEL DEFINITION # ================================================================================ # conv 1 # padding='SAME' with tf.variable_scope('image_conv1'): # [filter_size, filter_size, channel_size, num_filters] self.W_conv1 = weight_variable(is_trainable=is_trainable, shape=[7, 7, 3, 64], stddev=5e-2) self.b_conv1 = bias_variable(is_trainable=is_trainable, shape=[64]) self.h_conv1 = tf.nn.relu( conv2d(self.x_image, self.W_conv1, is_training=self.train_mode) + self.b_conv1) # [128,128,64] -> [64,64,64] # ksize=[1,2,2,1], strides=[1,2,2,1] with tf.variable_scope('image_pool1'): self.h_pool1 = max_pool(self.h_conv1) # conv2 with tf.variable_scope('image_conv2'): self.W_conv2 = weight_variable(is_trainable=is_trainable, shape=[7, 7, 64, 128], stddev=5e-2) self.b_conv2 = bias_variable(is_trainable=is_trainable, shape=[128]) self.h_conv2 = tf.nn.relu( conv2d(self.h_pool1, self.W_conv2, isnorm=False, is_training=self.train_mode) + self.b_conv2) # [64,64,128] -> [32,32,128] with tf.variable_scope('image_pool2'): self.h_pool2 = max_pool(self.h_conv2) # conv 3 [32,32,128] -> [16,16,256] with tf.variable_scope('image_conv3'): self.W_conv3 = weight_variable(is_trainable=is_trainable, shape=[5, 5, 128, 256], stddev=5e-2) self.b_conv3 = bias_variable(is_trainable=is_trainable, shape=[256]) self.h_conv3 = tf.nn.relu( conv2d(self.h_pool2, self.W_conv3, strides=[1, 2, 2, 1], isnorm=False, is_training=self.train_mode) + self.b_conv3) # [16,16,256] -> [8,8,256] with tf.variable_scope('image_pool3'): self.h_pool3 = max_pool(self.h_conv3) # norm 3 with tf.variable_scope('image_norm3'): self.h_norm3 = lr_norm(self.h_pool3, 4) # conv 4 [8,8,256] -> [2,2,512] with tf.variable_scope('image_conv4'): self.W_conv4 = weight_variable(is_trainable=is_trainable, shape=[5, 5, 256, 512], stddev=5e-2) self.b_conv4 = bias_variable(is_trainable=is_trainable, shape=[512]) self.h_conv4 = tf.nn.relu( conv2d(self.h_norm3, self.W_conv4, strides=[1, 4, 4, 1], isnorm=False, is_training=self.train_mode) + self.b_conv4) # [2,2,512] -> [1,1,512] with tf.variable_scope('image_pool4'): self.h_pool4 = max_pool(self.h_conv4) # fc with tf.variable_scope('image_fc1'): self.W_fc1 = weight_variable(is_trainable=is_trainable, shape=[512, 256], stddev=0.04) self.b_fc1 = bias_variable(is_trainable=is_trainable, shape=[256]) self.h_pool4_flat = tf.reshape(self.h_pool4, [-1, 512]) self.h_fc1 = tf.nn.relu( tf.matmul(self.h_pool4_flat, self.W_fc1) + self.b_fc1) # dropout self.h_fc1_drop = tf.nn.dropout(self.h_fc1, self.dropout_keep_prob) # softmax with tf.variable_scope('softmax'): self.W_softmax = weight_variable(is_trainable=is_trainable, shape=[256, NUM_CLASSES], stddev=0.01) self.b_softmax = bias_variable(is_trainable=is_trainable, shape=[NUM_CLASSES]) self.probabilities = tf.nn.softmax( tf.matmul(self.h_fc1_drop, self.W_softmax) + self.b_softmax) # Finalize the predictions, the optimizing function, loss/accuracy stats etc. self._set_predictions_optimizer_and_loss()
def __init__(self, is_primary_model, is_trainable, filter_sizes=(1, 2, 3), num_filters=128, activation=tf.nn.tanh): """Define the TF elements for HieCoAtt's text representation. Parameters ---------- :param filter_sizes: tuple, containing the different filter sizes for convolution :param num_filters: int, number of filters of each size :param activation: activation function from tf.nn (only tf.nn.relu, tf.nn.tanh supported) :param is_primary_model: bool, whether this model's output is used to perform the task :param is_trainable: bool, whether parameters can be updated during training """ assert activation == tf.nn.relu or activation == tf.nn.tanh self.activation = activation self.initializer_type = 'xavier' if self.activation == tf.nn.tanh else 'normal' with tf.variable_scope(ModelName.hie_text.name): super(HieText, self).__init__(model_name=ModelName.hie_text, is_primary_model=is_primary_model, is_trainable=is_trainable) # Convert num_filters to list corresponding to number of filters for each filter size if isinstance(num_filters, int): num_filters = [num_filters] * len(filter_sizes) # Dimension of phrase level features should match dimension of word-level features # So, we need that num_filters matches EMBED_SIZES assert num_filters == EMBED_SIZES self.config = { 'filter_sizes': filter_sizes, 'num_filters': num_filters, 'activation': 'relu' if self.activation == tf.nn.relu else 'tanh' } # Shape fo self.word_level: [BATCH_SIZE, TEXT_LENGTH, EMBED_SIZES] self.word_level = self.seq_embedded # Convolution for phrase level self.conv_output = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-%s" % filter_size): # Convolution Layer filter_shape = [ filter_size, EMBED_SIZES, 1, num_filters[i] ] W = weight_variable(is_trainable=is_trainable, shape=filter_shape, initializer_type=self.initializer_type, name='W%d' % i) twopadding = filter_size - 1 # (h+2p-f)/s + 1 = h #s=1 #same height padding top_padding = twopadding // 2 bottom_padding = twopadding - top_padding self.x_padded = tf.pad( self.x_text, [[0, 0], [top_padding, bottom_padding], [0, 0], [0, 0]]) conv = tf.nn.conv2d(self.x_padded, W, strides=[1, 1, 1, 1], padding='VALID', name="conv") bn_conv = batch_norm(conv, num_filters[i], self.phase_train) h = self.activation(bn_conv, name="activation") self.conv_output.append(h) # Shape of full_conv_output: [BATCH_SIZE, TEXT_LENGTH, len(filter_sizes), num_filters] full_conv_output = tf.concat(self.conv_output, 2) # Phrase level output shape: [BATCH_SIZE, TEXT_LENGTH, num_filters] self.phase_level = tf.reduce_max(full_conv_output, 2) # Sentence Level lstm_cell = rnn.BasicLSTMCell(EMBED_SIZES) if self.dropout_keep_prob is not None: lstm_cell = rnn.DropoutWrapper( lstm_cell, output_keep_prob=self.dropout_keep_prob) self.lstm_outputs, states = tf.nn.dynamic_rnn(lstm_cell, self.phase_level, dtype=tf.float32) # Sentence_level text output # [BATCH_SIZE, TEXT_LENGTH, num_filters] self.sentence_level = tf.concat(self.lstm_outputs, 1) # Concatenate the different levels. # We tried the hierarchical approach in Lu et al., but it gave inferior results. self.final_text_embedding_spatial = tf.concat( values=(self.word_level, self.phase_level, self.sentence_level), axis=-1) self.final_text_embedding = tf.reduce_mean( self.final_text_embedding_spatial, axis=1) # Add dropout with tf.variable_scope("dropout"): self.final_text_embedding = tf.nn.dropout( self.final_text_embedding, self.dropout_keep_prob) self.final_embedding_dimension = self.final_text_embedding.shape[ 1].value if is_trainable: # Final (unnormalized) scores and predictions with tf.variable_scope("output"): W_o = weight_variable( is_trainable=is_trainable, shape=[self.final_embedding_dimension, NUM_CLASSES], name='W_o', initializer_type='xavier' if USE_MULTILABEL else 'normal') b_o = bias_variable(is_trainable=is_trainable, shape=[NUM_CLASSES], name='b_o') self.scores = tf.nn.xw_plus_b(self.final_text_embedding, W_o, b_o, name="scores") if self.is_primary_model: self._set_predictions_optimizer_and_loss()
def __init__(self, filter_sizes=(1, 2, 3), num_filters=(128, 256, 256), activation=tf.nn.relu, is_primary_model=True, is_trainable=True, embed_size_multiplier=1.): """Create an embedding matrix, a lookup procedure on it for text sequence input, convoultion using multiple filter sizes, and final optimizations if is_primary_model=True. Parameters ---------- :param filter_sizes: tuple, containing the different filter sizes for convolution :param num_filters: int or tuple, should be of same length as filter_sizes, denoting the number of filter of each size. If int, it implies same number of filter for all sizes. :param activation: activation function from tf.nn (only tf.nn.relu, tf.nn.tanh supported) :param is_primary_model: bool, whether this model's output is used to perform the task :param is_trainable: bool, whether parameters can be updated during training :param embed_size_multiplier: float, factor with which to multiply default embedding size """ with tf.variable_scope(ModelName.text_cnn.name): super(TextCNN, self).__init__(model_name=ModelName.text_cnn, is_primary_model=is_primary_model, is_trainable=is_trainable, embed_size_multiplier=embed_size_multiplier) # Convert num_filters to list corresponding to number of filters for each filter size if isinstance(num_filters, int): num_filters = [num_filters] * len(filter_sizes) assert len(num_filters) == len(filter_sizes) assert activation == tf.nn.relu or activation == tf.nn.tanh self.config = { 'filter_sizes': filter_sizes, 'num_filters': num_filters, 'activation': 'relu' if activation == tf.nn.relu else 'tanh' } # convolution + maxpool self.pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [ filter_size, self.embedding_size, 1, num_filters[i] ] initializer_type = "normal" if activation == tf.nn.relu else "xavier" W_o = weight_variable(is_trainable=is_trainable, shape=filter_shape, initializer_type=initializer_type, name='W%d' % i) # pad to prevent dimension reduction twopadding = filter_size - 1 # (h+2p-f)/s + 1 = h #s=1 #same height padding top_padding = twopadding // 2 bottom_padding = twopadding - top_padding self.x_padded = tf.pad( self.x_text, [[0, 0], [top_padding, bottom_padding], [0, 0], [0, 0]]) # Do convolution + batch_norm + activation conv = tf.nn.conv2d(self.x_padded, W_o, strides=[1, 1, 1, 1], padding='VALID', name="conv") bn_conv = batch_norm(conv, num_filters[i], self.phase_train) h = activation(bn_conv, "activation") # Maxpooling over the outputs pooled = tf.nn.max_pool(h, ksize=[1, TEXT_LENTH, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") self.pooled_outputs.append(pooled) # Combine all the pooled features self.final_embedding_dimension = sum(num_filters) self.h_pool = tf.concat(self.pooled_outputs, 3) self.h_pool_flat = tf.reshape(self.h_pool, [-1, self.final_embedding_dimension]) # Add dropout with tf.variable_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) if is_trainable: # Final (unnormalized) scores and predictions with tf.variable_scope("output"): W_o = weight_variable( is_trainable=is_trainable, shape=[self.final_embedding_dimension, NUM_CLASSES], name='W_o', initializer_type='xavier' if USE_MULTILABEL else 'normal') b_o = bias_variable(is_trainable=is_trainable, shape=[NUM_CLASSES], name='b_o') self.scores = tf.nn.xw_plus_b( self.h_drop, W_o, b_o, name="scores") # unnormalized scores if self.is_primary_model: self._set_predictions_optimizer_and_loss()