Beispiel #1
0
    def baseline(self, vecQuestions, questionDim, images, imageDim, hDim):
        with tf.variable_scope("baseline"):
            if config.baselineAtt:
                memory = self.linear(vecQuestions,
                                     questionDim,
                                     hDim,
                                     name="qProj")
                images = self.linear(images, imageDim, hDim, name="iProj")

                for i in range(config.baselineAttNumLayers):
                    memory = self.baselineAttLayer(images,
                                                   memory,
                                                   hDim,
                                                   hDim,
                                                   name="baseline%d" % i)
                memDim = hDim
            else:
                images, imagesDim = ops.linearizeFeatures(
                    images,
                    self.H,
                    self.W,
                    imageDim,
                    projDim=config.baselineProjDim)
                if config.baselineLSTM and config.baselineCNN:
                    memory = tf.concat([vecQuestions, images], axis=-1)
                    memDim = questionDim + imageDim
                elif config.baselineLSTM:
                    memory = vecQuestions
                    memDim = questionDim
                else:  # config.baselineCNN
                    memory = images
                    memDim = imageDim

        return memory, memDim
Beispiel #2
0
    def outputOp(self, memory, vecQuestions, images, imageInDim):
        with tf.variable_scope("outputUnit"):
            features = memory
            dim = config.memDim

            if config.outQuestion:
                eVecQuestions = ops.linear(vecQuestions,
                                           config.ctrlDim,
                                           config.memDim,
                                           name="outQuestion")
                features, dim = ops.concat(features,
                                           eVecQuestions,
                                           config.memDim,
                                           mul=config.outQuestionMul)

            if config.outImage:
                images, imagesDim = ops.linearizeFeatures(
                    images,
                    self.H,
                    self.W,
                    self.imageInDim,
                    outputDim=config.outImageDim)
                images = ops.linear(images,
                                    config.memDim,
                                    config.outImageDim,
                                    name="outImage")
                features = tf.concat([features, images], axis=-1)
                dim += config.outImageDim

        return features, dim
    def baseline(self, vecQuestions, questionDim, images, imageDim, hDim):
        with tf.variable_scope("baseline"):
            if config.baselineAtt:
                memory = ops.linear(vecQuestions,
                                    questionDim,
                                    hDim,
                                    name="qProj")
                images = ops.linear(images, imageDim, hDim, name="iProj")

                for i in range(config.baselineAttNumLayers):
                    memory = self.baselineAttLayer(images,
                                                   memory,
                                                   hDim,
                                                   hDim,
                                                   name="baseline%d" % i)
                memDim = hDim
            else:
                if config.imageObjects:
                    cff = tf.get_variable(
                        "cff",
                        shape=(imageDim, ),
                        initializer=tf.random_normal_initializer())
                    interactions, hDim = ops.mul(images, cff, imageDim)
                    attention = ops.inter2att(interactions,
                                              hDim,
                                              mask=self.imagesObjectNum)
                    images = ops.att2Smry(attention, images)
                else:
                    images, imageDim = ops.linearizeFeatures(
                        images,
                        self.H,
                        self.W,
                        imageDim,
                        projDim=config.baselineProjDim)
                if config.baselineLSTM and config.baselineCNN:
                    memory = tf.concat([vecQuestions, images], axis=-1)
                    memDim = questionDim + imageDim
                elif config.baselineLSTM:
                    memory = vecQuestions
                    memDim = questionDim
                else:  # config.baselineCNN
                    memory = images
                    memDim = imageDim

        return memory, memDim