Ejemplo n.º 1
0
    def stem(self, images, inDim, outDim, addLoc=None):

        with tf.variable_scope("stem"):
            if addLoc is None:
                addLoc = config.locationAware

            if config.stemLinear:
                features = ops.linear(images, inDim, outDim)
            else:
                dims = [inDim] + ([config.stemDim] *
                                  (config.stemNumLayers - 1)) + [outDim]

                if addLoc:
                    images, inDim = ops.addLocation(
                        images,
                        inDim,
                        config.locationDim,
                        h=self.H,
                        w=self.W,
                        locType=config.locationType)
                    dims[0] = inDim

                    # if config.locationType == "PE":
                    #     dims[-1] /= 4
                    #     dims[-1] *= 3
                    # else:
                    #     dims[-1] -= 2
                features = ops.CNNLayer(
                    images,
                    dims,
                    batchNorm=self.batchNorm if config.stemBN else None,
                    dropout=self.dropouts["stem"],
                    kernelSizes=config.stemKernelSizes,
                    strides=config.stemStrideSizes)

                # if addLoc:
                #     lDim = outDim / 4
                #     lDim /= 4
                #     features, _ = addLocation(features, dims[-1], lDim, h = H, w = W,
                #         locType = config.locationType)

                if config.stemGridRnn:
                    features = ops.multigridRNNLayer(features, H, W, outDim)

            # flatten the 2d images into a 1d KB
            features = tf.reshape(features, (self.batchSize, -1, outDim))

        return features
Ejemplo n.º 2
0
    def stem(self, images, inDim, outDim, addLoc=None):
        with tf.variable_scope("stem"):
            if config.stemNormalize:
                images = tf.nn.l2_normalize(images, dim=-1)

            if config.imageObjects:  # VQA ??? or config.useBaseline:
                features, dim = images, inDim
                if config.stemLinear:
                    features = ops.linear(images,
                                          inDim,
                                          outDim,
                                          dropout=self.dropouts["stem"])
                    dim = outDim
                elif config.stemDeep:
                    dims = [inDim] + config.stemDims + [outDim]
                    features = ops.FCLayer(features,
                                           dims,
                                           dropout=self.dropouts["stem"])

                if config.stemAct != "NON":
                    features = ops.actF(config.stemAct)(features)

                return features, dim

            if addLoc is None:
                addLoc = config.locationAware

            if config.stemLinear:
                features = ops.linear(images, inDim, outDim)
            else:
                if config.stemNumLayers == 0:
                    outDim = inDim
                else:
                    dims = [inDim] + ([config.stemDim] *
                                      (config.stemNumLayers - 1)) + [outDim]

                    if addLoc:
                        images, inDim = ops.addLocation(
                            images,
                            inDim,
                            config.locationDim,
                            h=self.H,
                            w=self.W,
                            locType=config.locationType)
                        dims[0] = inDim

                    features = ops.CNNLayer(
                        images,
                        dims,
                        batchNorm=self.batchNorm if config.stemBN else None,
                        dropout=self.dropouts["stem"],
                        kernelSizes=config.stemKernelSizes,
                        strides=config.stemStrideSizes)

                    if config.stemGridRnn:
                        features = ops.multigridRNNLayer(
                            features, H, W, outDim)

            if config.baselineNew or (not config.useBaseline):
                features = tf.reshape(features, (self.batchSize, -1, outDim))

        return features, outDim