def build_image_embeddings(self): """Builds the image model subgraph and generates image embeddings. Inputs: self.images Outputs: self.image_embeddings """ inception_output = image_embedding.inception_v3( self.images, trainable=self.train_inception, is_training=self.is_training()) self.inception_variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3") # Map inception output into embedding space. with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=inception_output, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) # Save the embedding size in the graph. tf.constant(self.config.embedding_size, name="embedding_size") self.image_embeddings = image_embeddings
def build_image_embeddings(self): """Builds the image model subgraph and generates image embeddings. Inputs: self.images Outputs: self.image_embeddings """ inception_output = image_embedding.inception_v3( self.images, trainable=self.train_inception, is_training=self.is_training()) self.inception_variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3") # Map inception output into embedding space. # hint: the pre-trained inceptionV3 model has been flattened on the final output, # so we only need to further map this one-dim vector to a fixed length (embedding_size). with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.layers.dense( inputs=inception_output, units=self.config.embedding_size, activation=None, kernel_initializer=self.initializer, bias_initializer=None, name=scope.name) # Save the embedding size in the graph. tf.constant(self.config.embedding_size, name="embedding_size") self.image_embeddings = image_embeddings
def build_image_embeddings(self): """Builds the image model subgraph and generates image embeddings. 建立图片编码模型子网络InceptionV3,生成图片embedding特征 Inputs: self.images Outputs: self.image_embeddings """ # 获取模型输出(batch,2048) inception_output = image_embedding.inception_v3( self.images, trainable=self.train_inception, is_training=self.is_training()) self.inception_variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3") #print(self.inception_variables) # 把inception网络输出映射到embedding空间,将图像特征转化为512维的向量化的表示(batch,512) with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=inception_output, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) # Save the embedding size in the graph. tf.constant(self.config.embedding_size, name="embedding_size") self.image_embeddings = image_embeddings
def build_image_embeddings(self): inception_output=image_embedding.inception_v3(self.images,trainable=self.train_inception,is_training=self.is_training()) self.inception_variables=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,scope="InceptionV3") with tf.variable_scope("image_embedding")as scope: image_embeddings=tf.contrib.layers.fully_connected(inputs=inception_output,num_outputs=self.config.embedding_size,activation_fn=None,weights_initializer=self.initializer,biases_initializer=None,scope=scope) tf.constant(self.config.embedding_size,name="embedding_size") self.image_embeddings=image_embeddings
def build_image_embeddings(self): """Builds the image model subgraph and generates image embeddings. Inputs: self.images Outputs: self.image_embeddings """ inception_output = image_embedding.inception_v3( self.images, trainable=self.train_inception, is_training=self.is_training()) # Map inception output onto embedding space. with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=inception_output, num_outputs=self.config.sentence_embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) if self.mode == "train": # to avoid overfitting we use dropout for all fully connected layers image_embeddings = tf.nn.dropout( image_embeddings, self.config.dropout_keep_prob_encoder) # Save the embedding size in the graph. tf.constant(self.config.sentence_embedding_size, name="image_embedding_size") self.image_embeddings = image_embeddings
def build_image_embeddings(self): """Builds the image model subgraph and generates image embeddings. Inputs: self.images Outputs: self.image_embeddings """ inception_output = image_embedding.inception_v3( self.images, trainable=self.train_inception, is_training=self.is_training()) self.inception_variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV4") # Map inception output into embedding space. with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=inception_output, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) # Save the embedding size in the graph. tf.constant(self.config.embedding_size, name="embedding_size") self.image_embeddings = image_embeddings
def build_image_embeddings(self): """Builds the image model subgraph and generates image embeddings in visual semantic joint space and RNN prediction space. Inputs: self.images Outputs: self.image_embeddings self.rnn_image_embeddings """ # Reshape 5D image tensor. images = tf.reshape( self.images, [-1, self.config.image_height, self.config.image_height, 3]) inception_output = image_embedding.inception_v3( images, trainable=self.train_inception, is_training=self.is_training()) self.inception_variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3") # Map inception output into embedding space. with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=inception_output, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) with tf.variable_scope("rnn_image_embedding") as scope: rnn_image_embeddings = tf.contrib.layers.fully_connected( inputs=inception_output, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) # Save the embedding size in the graph. tf.constant(self.config.embedding_size, name="embedding_size") self.image_embeddings = tf.reshape( image_embeddings, [tf.shape(self.images)[0], -1, self.config.embedding_size]) self.rnn_image_embeddings = tf.reshape( rnn_image_embeddings, [tf.shape(self.images)[0], -1, self.config.embedding_size])
def testTrainableFalseIsTrainingTrue(self): embeddings = image_embedding.inception_v3(self._images, trainable=False, is_training=True) self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list()) self._verifyParameterCounts() self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES) self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES) self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS) self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES) self._assertCollectionSize(0, tf.GraphKeys.LOSSES) self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)
def build_image_embeddings(self): """Builds the image model subgraph and generates image embeddings. Inputs: self.images Outputs: self.image_embeddings """ if self.cnn_model == 'InceptionV3': # image embedding by inception_v3 cnn_output = image_embedding.inception_v3( self.images, trainable=self.train_cnn_model, is_training=self.is_training()) self.inception_variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3") elif self.cnn_model == 'VGG19': # image embedding by vgg19 cnn_output = image_embedding.vgg_19(self.images, trainable=self.train_cnn_model, is_training=self.is_training()) self.vgg19_variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope="vgg_19") else: print('unknown cnn model {0} for image embedding'.format( self.cnn_model)) exit(0) # Map inception/vgg output into embedding space. with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( # inputs=inception_output, inputs=cnn_output, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) # Save the embedding size in the graph. tf.constant(self.config.embedding_size, name="embedding_size") self.image_embeddings = image_embeddings
def read_data(session, im_filenames, source_path, target_path, max_size=None): """Read data from source and target files and put into buckets. Args: im_filenames: text file containing all image paths source_path: path to the files with token-ids for the source language. target_path: path to the file with token-ids for the target language; it must be aligned with the source file: n-th line contains the desired output for n-th line from the source_path. max_size: maximum number of lines to read, all other will be ignored; if 0 or None, data files will be read completely (no limit). Returns: data_set: a list of length len(_buckets); data_set[n] contains a list of (im, target) pairs read from the provided data files that fit into the n-th bucket, i.e., such that len(source) < _buckets[n][0] and len(target) < _buckets[n][1]; source and target are lists of token-ids. """ data_set = [[] for _ in _buckets] im_placeholder = tf.placeholder(shape=[1, 299, 299, 3], dtype=tf.float32) inception_output_tensor = image_embedding.inception_v3( im_placeholder, trainable=False, is_training=image_embedding.is_training(gConfig['mode'])) with tf.gfile.GFile(im_filenames, mode="r") as im_file: with tf.gfile.GFile(source_path, mode="r") as source_file: with tf.gfile.GFile(target_path, mode="r") as target_file: im, source, target = im_file.readline(), source_file.readline( ), target_file.readline() counter = 0 image_tensor = load_jpeg_with_tensorflow.get_image_tensor( im.rstrip()) tf.global_variables_initializer().run() coordinator = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coordinator) while source and target and (not max_size or counter < max_size): counter += 1 if counter % 100000 == 0: print(" reading data line %d" % counter) sys.stdout.flush() # extract image vector image = session.run(image_tensor) inception_output = session.run( inception_output_tensor, feed_dict={im_placeholder: image}) source_ids = [int(x) for x in source.split()] target_ids = [int(x) for x in target.split()] target_ids.append(data_utils.EOS_ID) for bucket_id, (source_size, target_size) in enumerate(_buckets): if len(source_ids) < source_size and len( target_ids) < target_size: data_set[bucket_id].append( [inception_output, target_ids]) break im, source, target = im_file.readline( ), source_file.readline(), target_file.readline() coordinator.request_stop() coordinator.join(threads) return data_set