def stem(self, images, inDim, outDim, addLoc=None): with tf.variable_scope("stem"): if addLoc is None: addLoc = config.locationAware if config.stemLinear: features = ops.linear(images, inDim, outDim) else: dims = [inDim] + ([config.stemDim] * (config.stemNumLayers - 1)) + [outDim] if addLoc: images, inDim = ops.addLocation( images, inDim, config.locationDim, h=self.H, w=self.W, locType=config.locationType) dims[0] = inDim # if config.locationType == "PE": # dims[-1] /= 4 # dims[-1] *= 3 # else: # dims[-1] -= 2 features = ops.CNNLayer( images, dims, batchNorm=self.batchNorm if config.stemBN else None, dropout=self.dropouts["stem"], kernelSizes=config.stemKernelSizes, strides=config.stemStrideSizes) # if addLoc: # lDim = outDim / 4 # lDim /= 4 # features, _ = addLocation(features, dims[-1], lDim, h = H, w = W, # locType = config.locationType) if config.stemGridRnn: features = ops.multigridRNNLayer(features, H, W, outDim) # flatten the 2d images into a 1d KB features = tf.reshape(features, (self.batchSize, -1, outDim)) return features
def stem(self, images, inDim, outDim, addLoc=None): with tf.variable_scope("stem"): if config.stemNormalize: images = tf.nn.l2_normalize(images, dim=-1) if config.imageObjects: # VQA ??? or config.useBaseline: features, dim = images, inDim if config.stemLinear: features = ops.linear(images, inDim, outDim, dropout=self.dropouts["stem"]) dim = outDim elif config.stemDeep: dims = [inDim] + config.stemDims + [outDim] features = ops.FCLayer(features, dims, dropout=self.dropouts["stem"]) if config.stemAct != "NON": features = ops.actF(config.stemAct)(features) return features, dim if addLoc is None: addLoc = config.locationAware if config.stemLinear: features = ops.linear(images, inDim, outDim) else: if config.stemNumLayers == 0: outDim = inDim else: dims = [inDim] + ([config.stemDim] * (config.stemNumLayers - 1)) + [outDim] if addLoc: images, inDim = ops.addLocation( images, inDim, config.locationDim, h=self.H, w=self.W, locType=config.locationType) dims[0] = inDim features = ops.CNNLayer( images, dims, batchNorm=self.batchNorm if config.stemBN else None, dropout=self.dropouts["stem"], kernelSizes=config.stemKernelSizes, strides=config.stemStrideSizes) if config.stemGridRnn: features = ops.multigridRNNLayer( features, H, W, outDim) if config.baselineNew or (not config.useBaseline): features = tf.reshape(features, (self.batchSize, -1, outDim)) return features, outDim