def baselineAttLayer(self, images, memory, inDim, hDim, name="", reuse=None): with tf.variable_scope("attLayer" + name, reuse=reuse): # projImages = ops.linear(images, inDim, hDim, name = "projImage") # projMemory = tf.expand_dims(ops.linear(memory, inDim, hDim, name = "projMemory"), axis = -2) # if config.saMultiplicative: # interactions = projImages * projMemory # else: # interactions = tf.tanh(projImages + projMemory) interactions, _ = ops.mul(images, memory, inDim, proj={ "dim": hDim, "shared": False }, interMod=config.baselineAttType) attention = ops.inter2att(interactions, hDim) summary = ops.att2Smry(attention, images) newMemory = memory + summary return newMemory
def baseline(self, vecQuestions, questionDim, images, imageDim, hDim): with tf.variable_scope("baseline"): if config.baselineAtt: memory = ops.linear(vecQuestions, questionDim, hDim, name="qProj") images = ops.linear(images, imageDim, hDim, name="iProj") for i in range(config.baselineAttNumLayers): memory = self.baselineAttLayer(images, memory, hDim, hDim, name="baseline%d" % i) memDim = hDim else: if config.imageObjects: cff = tf.get_variable( "cff", shape=(imageDim, ), initializer=tf.random_normal_initializer()) interactions, hDim = ops.mul(images, cff, imageDim) attention = ops.inter2att(interactions, hDim, mask=self.imagesObjectNum) images = ops.att2Smry(attention, images) else: images, imageDim = ops.linearizeFeatures( images, self.H, self.W, imageDim, projDim=config.baselineProjDim) if config.baselineLSTM and config.baselineCNN: memory = tf.concat([vecQuestions, images], axis=-1) memDim = questionDim + imageDim elif config.baselineLSTM: memory = vecQuestions memDim = questionDim else: # config.baselineCNN memory = images memDim = imageDim return memory, memDim
def write(self, memory, info, control, contControl=None, name="", reuse=None): with tf.variable_scope("write" + name, reuse=reuse): # optionally project info if config.writeInfoProj: info = ops.linear(info, config.memDim, config.memDim, name="info") # optional info nonlinearity info = ops.activations[config.writeInfoAct](info) # compute self-attention vector based on previous controls and memories if config.writeSelfAtt: print("using self attention") selfControl = control if config.writeSelfAttMod == "CONT": selfControl = contControl # elif config.writeSelfAttMod == "POST": # selfControl = postControl selfControl = ops.linear(selfControl, config.ctrlDim, config.ctrlDim, name="ctrlProj") interactions = self.controls * tf.expand_dims(selfControl, axis=1) # if config.selfAttShareInter: # selfAttlogits = self.linearP(selfAttInter, config.encDim, 1, self.interL[0], self.interL[1], name = "modSelfAttInter") attention = ops.inter2att(interactions, config.ctrlDim, name="selfAttention") self.attentions["self"].append(attention) selfSmry = ops.att2Smry(attention, self.memories) # get write unit inputs: previous memory, the new info, optionally self-attention / control newMemory, dim = memory, config.memDim if config.writeInputs == "INFO": newMemory = info elif config.writeInputs == "SUM": newMemory += info elif config.writeInputs == "BOTH": newMemory, dim = ops.concat(newMemory, info, dim, mul=config.writeConcatMul) # else: MEM if config.writeSelfAtt: newMemory = tf.concat([newMemory, selfSmry], axis=-1) dim += config.memDim if config.writeMergeCtrl: newMemory = tf.concat([newMemory, control], axis=-1) dim += config.memDim # project memory back to memory dimension if config.writeMemProj or (dim != config.memDim): newMemory = ops.linear(newMemory, dim, config.memDim, name="newMemory") # optional memory nonlinearity newMemory = ops.activations[config.writeMemAct](newMemory) # write unit gate if config.writeGate: gateDim = config.memDim if config.writeGateShared: gateDim = 1 z = tf.sigmoid( ops.linear(control, config.ctrlDim, gateDim, name="gate", bias=config.writeGateBias)) self.attentions["gate"].append(z) newMemory = newMemory * z + memory * (1 - z) # optional batch normalization if config.memoryBN: newMemory = tf.contrib.layers.batch_norm( newMemory, decay=config.bnDecay, center=config.bnCenter, scale=config.bnScale, is_training=self.train, updates_collections=None) return newMemory
def read(self, knowledgeBase, memory, control, name="", reuse=None): with tf.variable_scope("read" + name, reuse=reuse): dim = config.memDim ## memory dropout if config.memoryVariationalDropout: memory = ops.applyVarDpMask(memory, self.memDpMask, self.dropouts["memory"]) else: memory = tf.nn.dropout(memory, self.dropouts["memory"]) ## Step 1: knowledge base / memory interactions # parameters for knowledge base and memory projection proj = None if config.readProjInputs: proj = { "dim": config.attDim, "shared": config.readProjShared, "dropout": self.dropouts["read"] } dim = config.attDim # parameters for concatenating knowledge base elements concat = { "x": config.readMemConcatKB, "proj": config.readMemConcatProj } # compute interactions between knowledge base and memory interactions, interDim = ops.mul(x=knowledgeBase, y=memory, dim=config.memDim, proj=proj, concat=concat, interMod=config.readMemAttType, name="memInter") projectedKB = proj.get("x") if proj else None # project memory interactions back to hidden dimension if config.readMemProj: interactions = ops.linear(interactions, interDim, dim, act=config.readMemAct, name="memKbProj") else: dim = interDim ## Step 2: compute interactions with control if config.readCtrl: # compute interactions with control if config.ctrlDim != dim: control = ops.linear(control, ctrlDim, dim, name="ctrlProj") interactions, interDim = ops.mul( interactions, control, dim, interMod=config.readCtrlAttType, concat={"x": config.readCtrlConcatInter}, name="ctrlInter") # optionally concatenate knowledge base elements if config.readCtrlConcatKB: if config.readCtrlConcatProj: addedInp, addedDim = projectedKB, config.attDim else: addedInp, addedDim = knowledgeBase, config.memDim interactions = tf.concat([interactions, addedInp], axis=-1) dim += addedDim # optional nonlinearity interactions = ops.activations[config.readCtrlAct]( interactions) ## Step 3: sum attentions up over the knowledge base # transform vectors to attention distribution # attention = ops.inter2att_mask(interactions, dim, self.snippets_lengths, dropout = self.dropouts["read"]) attention = ops.inter2att(interactions, dim, dropout=self.dropouts["read"]) self.attentions["kb"].append(attention) # optionally use projected knowledge base instead of original if config.readSmryKBProj: knowledgeBase = projectedKB # sum up the knowledge base according to the distribution # information = ops.att2Smry(attention, knowledgeBase) # adding level snippet attention information = ops.att2Smry_snippet_level_att( attention, knowledgeBase, self.snippets_lengths) return information