def build_image_embeddings(self):
    """Builds the image model subgraph and generates image embeddings.

    Inputs:
      self.images

    Outputs:
      self.image_embeddings
    """
    inception_output = image_embedding.inception_v3(
        self.images,
        trainable=self.train_inception,
        is_training=self.is_training())
    self.inception_variables = tf.get_collection(
        tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3")

    # Map inception output into embedding space.
    with tf.variable_scope("image_embedding") as scope:
      image_embeddings = tf.contrib.layers.fully_connected(
          inputs=inception_output,
          num_outputs=self.config.embedding_size,
          activation_fn=None,
          weights_initializer=self.initializer,
          biases_initializer=None,
          scope=scope)

    # Save the embedding size in the graph.
    tf.constant(self.config.embedding_size, name="embedding_size")

    self.image_embeddings = image_embeddings
예제 #2
0
    def build_image_embeddings(self):
        """Builds the image model subgraph and generates image embeddings.

        Inputs:
          self.images

        Outputs:
          self.image_embeddings
        """
        inception_output = image_embedding.inception_v3(
            self.images,
            trainable=self.train_inception,
            is_training=self.is_training())
        self.inception_variables = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3")

        # Map inception output into embedding space.
        # hint: the pre-trained inceptionV3 model has been flattened on the final output,
        # so we only need to further map this one-dim vector to a fixed length (embedding_size).
        with tf.variable_scope("image_embedding") as scope:
            image_embeddings = tf.layers.dense(
                inputs=inception_output,
                units=self.config.embedding_size,
                activation=None,
                kernel_initializer=self.initializer,
                bias_initializer=None,
                name=scope.name)

        # Save the embedding size in the graph.
        tf.constant(self.config.embedding_size, name="embedding_size")

        self.image_embeddings = image_embeddings
예제 #3
0
    def build_image_embeddings(self):
        """Builds the image model subgraph and generates image embeddings.
    建立图片编码模型子网络InceptionV3,生成图片embedding特征

    Inputs:
      self.images

    Outputs:
      self.image_embeddings
    """
        # 获取模型输出(batch,2048)
        inception_output = image_embedding.inception_v3(
            self.images,
            trainable=self.train_inception,
            is_training=self.is_training())

        self.inception_variables = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3")
        #print(self.inception_variables)
        # 把inception网络输出映射到embedding空间,将图像特征转化为512维的向量化的表示(batch,512)
        with tf.variable_scope("image_embedding") as scope:
            image_embeddings = tf.contrib.layers.fully_connected(
                inputs=inception_output,
                num_outputs=self.config.embedding_size,
                activation_fn=None,
                weights_initializer=self.initializer,
                biases_initializer=None,
                scope=scope)

        # Save the embedding size in the graph.
        tf.constant(self.config.embedding_size, name="embedding_size")

        self.image_embeddings = image_embeddings
예제 #4
0
 def build_image_embeddings(self):
  inception_output=image_embedding.inception_v3(self.images,trainable=self.train_inception,is_training=self.is_training())
  self.inception_variables=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,scope="InceptionV3")
  with tf.variable_scope("image_embedding")as scope:
   image_embeddings=tf.contrib.layers.fully_connected(inputs=inception_output,num_outputs=self.config.embedding_size,activation_fn=None,weights_initializer=self.initializer,biases_initializer=None,scope=scope)
  tf.constant(self.config.embedding_size,name="embedding_size")
  self.image_embeddings=image_embeddings
예제 #5
0
    def build_image_embeddings(self):
        """Builds the image model subgraph and generates image embeddings.

    Inputs:
      self.images

    Outputs:
      self.image_embeddings
    """
        inception_output = image_embedding.inception_v3(
            self.images,
            trainable=self.train_inception,
            is_training=self.is_training())

        # Map inception output onto embedding space.
        with tf.variable_scope("image_embedding") as scope:
            image_embeddings = tf.contrib.layers.fully_connected(
                inputs=inception_output,
                num_outputs=self.config.sentence_embedding_size,
                activation_fn=None,
                weights_initializer=self.initializer,
                biases_initializer=None,
                scope=scope)

        if self.mode == "train":
            # to avoid overfitting we use dropout for all fully connected layers
            image_embeddings = tf.nn.dropout(
                image_embeddings, self.config.dropout_keep_prob_encoder)

        # Save the embedding size in the graph.
        tf.constant(self.config.sentence_embedding_size,
                    name="image_embedding_size")

        self.image_embeddings = image_embeddings
    def build_image_embeddings(self):
        """Builds the image model subgraph and generates image embeddings.

        Inputs:
          self.images

        Outputs:
          self.image_embeddings
        """
        inception_output = image_embedding.inception_v3(
            self.images,
            trainable=self.train_inception,
            is_training=self.is_training())
        self.inception_variables = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV4")

        # Map inception output into embedding space.
        with tf.variable_scope("image_embedding") as scope:
            image_embeddings = tf.contrib.layers.fully_connected(
                inputs=inception_output,
                num_outputs=self.config.embedding_size,
                activation_fn=None,
                weights_initializer=self.initializer,
                biases_initializer=None,
                scope=scope)

        # Save the embedding size in the graph.
        tf.constant(self.config.embedding_size, name="embedding_size")

        self.image_embeddings = image_embeddings
    def build_image_embeddings(self):
        """Builds the image model subgraph and generates image embeddings
      in visual semantic joint space and RNN prediction space.

    Inputs:
      self.images

    Outputs:
      self.image_embeddings
      self.rnn_image_embeddings
    """

        # Reshape 5D image tensor.
        images = tf.reshape(
            self.images,
            [-1, self.config.image_height, self.config.image_height, 3])

        inception_output = image_embedding.inception_v3(
            images,
            trainable=self.train_inception,
            is_training=self.is_training())
        self.inception_variables = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3")

        # Map inception output into embedding space.
        with tf.variable_scope("image_embedding") as scope:
            image_embeddings = tf.contrib.layers.fully_connected(
                inputs=inception_output,
                num_outputs=self.config.embedding_size,
                activation_fn=None,
                weights_initializer=self.initializer,
                biases_initializer=None,
                scope=scope)

        with tf.variable_scope("rnn_image_embedding") as scope:
            rnn_image_embeddings = tf.contrib.layers.fully_connected(
                inputs=inception_output,
                num_outputs=self.config.embedding_size,
                activation_fn=None,
                weights_initializer=self.initializer,
                biases_initializer=None,
                scope=scope)

        # Save the embedding size in the graph.
        tf.constant(self.config.embedding_size, name="embedding_size")
        self.image_embeddings = tf.reshape(
            image_embeddings,
            [tf.shape(self.images)[0], -1, self.config.embedding_size])

        self.rnn_image_embeddings = tf.reshape(
            rnn_image_embeddings,
            [tf.shape(self.images)[0], -1, self.config.embedding_size])
예제 #8
0
    def testTrainableFalseIsTrainingTrue(self):
        embeddings = image_embedding.inception_v3(self._images,
                                                  trainable=False,
                                                  is_training=True)
        self.assertEqual([self._batch_size, 2048],
                         embeddings.get_shape().as_list())

        self._verifyParameterCounts()
        self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES)
        self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)
        self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
        self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)
        self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
        self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)
    def build_image_embeddings(self):
        """Builds the image model subgraph and generates image embeddings.

        Inputs:
          self.images

        Outputs:
          self.image_embeddings
        """

        if self.cnn_model == 'InceptionV3':
            # image embedding by inception_v3
            cnn_output = image_embedding.inception_v3(
                self.images,
                trainable=self.train_cnn_model,
                is_training=self.is_training())
            self.inception_variables = tf.get_collection(
                tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3")
        elif self.cnn_model == 'VGG19':
            # image embedding by vgg19
            cnn_output = image_embedding.vgg_19(self.images,
                                                trainable=self.train_cnn_model,
                                                is_training=self.is_training())
            self.vgg19_variables = tf.get_collection(
                tf.GraphKeys.GLOBAL_VARIABLES, scope="vgg_19")
        else:
            print('unknown cnn model {0} for image embedding'.format(
                self.cnn_model))
            exit(0)

        # Map inception/vgg output into embedding space.
        with tf.variable_scope("image_embedding") as scope:
            image_embeddings = tf.contrib.layers.fully_connected(
                # inputs=inception_output,
                inputs=cnn_output,
                num_outputs=self.config.embedding_size,
                activation_fn=None,
                weights_initializer=self.initializer,
                biases_initializer=None,
                scope=scope)

        # Save the embedding size in the graph.
        tf.constant(self.config.embedding_size, name="embedding_size")

        self.image_embeddings = image_embeddings
예제 #10
0
def read_data(session, im_filenames, source_path, target_path, max_size=None):
    """Read data from source and target files and put into buckets.

    Args:
    im_filenames: text file containing all image paths
    source_path: path to the files with token-ids for the source language.
    target_path: path to the file with token-ids for the target language;
      it must be aligned with the source file: n-th line contains the desired
      output for n-th line from the source_path.
    max_size: maximum number of lines to read, all other will be ignored;
      if 0 or None, data files will be read completely (no limit).

    Returns:
    data_set: a list of length len(_buckets); data_set[n] contains a list of
      (im, target) pairs read from the provided data files that fit
      into the n-th bucket, i.e., such that len(source) < _buckets[n][0] and
      len(target) < _buckets[n][1]; source and target are lists of token-ids.
    """
    data_set = [[] for _ in _buckets]
    im_placeholder = tf.placeholder(shape=[1, 299, 299, 3], dtype=tf.float32)
    inception_output_tensor = image_embedding.inception_v3(
        im_placeholder,
        trainable=False,
        is_training=image_embedding.is_training(gConfig['mode']))

    with tf.gfile.GFile(im_filenames, mode="r") as im_file:
        with tf.gfile.GFile(source_path, mode="r") as source_file:
            with tf.gfile.GFile(target_path, mode="r") as target_file:
                im, source, target = im_file.readline(), source_file.readline(
                ), target_file.readline()
                counter = 0
                image_tensor = load_jpeg_with_tensorflow.get_image_tensor(
                    im.rstrip())
                tf.global_variables_initializer().run()
                coordinator = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(coord=coordinator)

                while source and target and (not max_size
                                             or counter < max_size):
                    counter += 1
                    if counter % 100000 == 0:
                        print("  reading data line %d" % counter)
                        sys.stdout.flush()

                    # extract image vector
                    image = session.run(image_tensor)
                    inception_output = session.run(
                        inception_output_tensor,
                        feed_dict={im_placeholder: image})
                    source_ids = [int(x) for x in source.split()]
                    target_ids = [int(x) for x in target.split()]
                    target_ids.append(data_utils.EOS_ID)
                    for bucket_id, (source_size,
                                    target_size) in enumerate(_buckets):
                        if len(source_ids) < source_size and len(
                                target_ids) < target_size:
                            data_set[bucket_id].append(
                                [inception_output, target_ids])
                            break
                    im, source, target = im_file.readline(
                    ), source_file.readline(), target_file.readline()
    coordinator.request_stop()
    coordinator.join(threads)
    return data_set