def classifier(self, features, inDim, aEmbeddings=None): with tf.variable_scope("classifier"): outDim = config.answerWordsNum dims = [inDim] + config.outClassifierDims + [outDim] if config.answerMod != "NON": dims[-1] = config.wrdEmbDim logits = ops.FCLayer( features, dims, batchNorm=self.batchNorm if config.outputBN else None, dropout=self.dropouts["output"]) if config.answerMod != "NON": logits = tf.nn.dropout(logits, self.dropouts["output"]) interactions = ops.mul(aEmbeddings, logits, dims[-1], interMod=config.answerMod) logits = ops.inter2logits(interactions, dims[-1], sumMod="SUM") logits += ops.getBias((outputDim, ), "ans") # answersWeights = tf.transpose(aEmbeddings) # if config.answerMod == "BL": # Wans = ops.getWeight((dims[-1], config.wrdEmbDim), "ans") # logits = tf.matmul(logits, Wans) # elif config.answerMod == "DIAG": # Wans = ops.getWeight((config.wrdEmbDim, ), "ans") # logits = logits * Wans # logits = tf.matmul(logits, answersWeights) return logits
def baselineAttLayer(self, images, memory, inDim, hDim, name="", reuse=None): with tf.variable_scope("attLayer" + name, reuse=reuse): # projImages = ops.linear(images, inDim, hDim, name = "projImage") # projMemory = tf.expand_dims(ops.linear(memory, inDim, hDim, name = "projMemory"), axis = -2) # if config.saMultiplicative: # interactions = projImages * projMemory # else: # interactions = tf.tanh(projImages + projMemory) interactions, _ = ops.mul(images, memory, inDim, proj={ "dim": hDim, "shared": False }, interMod=config.baselineAttType) attention = ops.inter2att(interactions, hDim) summary = ops.att2Smry(attention, images) newMemory = memory + summary return newMemory
def classifier(self, features, inDim, choices=None, choicesNums=None): with tf.variable_scope("classifier"): outDim = config.answerWordsNum dims = [inDim] + config.outClassifierDims + [outDim] if config.answerMod != "NON": dims[-1] = config.wrdAEmbDim logits = ops.FCLayer( features, dims, batchNorm=self.batchNorm if config.outputBN else None, dropout=self.dropouts["output"]) if config.answerMod != "NON": logits = ops.gatedAct(config.outAct, gate=config.outGate)(logits) logits = tf.nn.dropout(logits, self.dropouts["output"]) concat = {"x": config.answerBias} interactions, interDim = ops.mul(choices, logits, dims[-1], interMod=config.answerMod, concat=concat) logits = ops.inter2logits(interactions, interDim, sumMod=config.answerSumMod) if config.ansFormat == "oe": logits += ops.getBias((outDim, ), "ans") else: logits = ops.expMask(logits, choicesNums) return logits
def baseline(self, vecQuestions, questionDim, images, imageDim, hDim): with tf.variable_scope("baseline"): if config.baselineAtt: memory = ops.linear(vecQuestions, questionDim, hDim, name="qProj") images = ops.linear(images, imageDim, hDim, name="iProj") for i in range(config.baselineAttNumLayers): memory = self.baselineAttLayer(images, memory, hDim, hDim, name="baseline%d" % i) memDim = hDim else: if config.imageObjects: cff = tf.get_variable( "cff", shape=(imageDim, ), initializer=tf.random_normal_initializer()) interactions, hDim = ops.mul(images, cff, imageDim) attention = ops.inter2att(interactions, hDim, mask=self.imagesObjectNum) images = ops.att2Smry(attention, images) else: images, imageDim = ops.linearizeFeatures( images, self.H, self.W, imageDim, projDim=config.baselineProjDim) if config.baselineLSTM and config.baselineCNN: memory = tf.concat([vecQuestions, images], axis=-1) memDim = questionDim + imageDim elif config.baselineLSTM: memory = vecQuestions memDim = questionDim else: # config.baselineCNN memory = images memDim = imageDim return memory, memDim
def memAutoEnc(newMemory, info, control, name="", reuse=None): with tf.variable_scope("memAutoEnc" + name, reuse=reuse): # inputs to auto encoder features = info if config.autoEncMemInputs == "INFO" else newMemory features = ops.linear(features, config.memDim, config.ctrlDim, act=config.autoEncMemAct, name="aeMem") # reconstruct control if config.autoEncMemLoss == "CONT": loss = tf.reduce_mean(tf.squared_difference(control, features)) else: interactions, dim = ops.mul( self.questionCntxWords, features, config.ctrlDim, concat={"x": config.autoEncMemCnct}, mulBias=config.mulBias, name="aeMem") logits = ops.inter2logits(interactions, dim) logits = self.expMask(logits, self.questionLengths) # reconstruct word attentions if config.autoEncMemLoss == "PROB": loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=self.attentions["question"][-1], logits=logits)) # reconstruct control through words attentions else: attention = tf.nn.softmax(logits) summary = ops.att2Smry(attention, self.questionCntxWords) loss = tf.reduce_mean( tf.squared_difference(control, summary)) return loss
import ops x = 10 y = 20 print(ops.add(x, y)) print(ops.sub(x, y)) print(ops.mul(x, y)) print(ops.div(x, y)) print(ops.PI)
def read(self, knowledgeBase, memory, control, name="", reuse=None): with tf.variable_scope("read" + name, reuse=reuse): dim = config.memDim ## memory dropout if config.memoryVariationalDropout: memory = ops.applyVarDpMask(memory, self.memDpMask, self.dropouts["memory"]) else: memory = tf.nn.dropout(memory, self.dropouts["memory"]) ## Step 1: knowledge base / memory interactions # parameters for knowledge base and memory projection proj = None if config.readProjInputs: proj = { "dim": config.attDim, "shared": config.readProjShared, "dropout": self.dropouts["read"] } dim = config.attDim # parameters for concatenating knowledge base elements concat = { "x": config.readMemConcatKB, "proj": config.readMemConcatProj } # compute interactions between knowledge base and memory interactions, interDim = ops.mul(x=knowledgeBase, y=memory, dim=config.memDim, proj=proj, concat=concat, interMod=config.readMemAttType, name="memInter") projectedKB = proj.get("x") if proj else None # project memory interactions back to hidden dimension if config.readMemProj: interactions = ops.linear(interactions, interDim, dim, act=config.readMemAct, name="memKbProj") else: dim = interDim ## Step 2: compute interactions with control if config.readCtrl: # compute interactions with control if config.ctrlDim != dim: control = ops.linear(control, ctrlDim, dim, name="ctrlProj") interactions, interDim = ops.mul( interactions, control, dim, interMod=config.readCtrlAttType, concat={"x": config.readCtrlConcatInter}, name="ctrlInter") # optionally concatenate knowledge base elements if config.readCtrlConcatKB: if config.readCtrlConcatProj: addedInp, addedDim = projectedKB, config.attDim else: addedInp, addedDim = knowledgeBase, config.memDim interactions = tf.concat([interactions, addedInp], axis=-1) dim += addedDim # optional nonlinearity interactions = ops.activations[config.readCtrlAct]( interactions) ## Step 3: sum attentions up over the knowledge base # transform vectors to attention distribution # attention = ops.inter2att_mask(interactions, dim, self.snippets_lengths, dropout = self.dropouts["read"]) attention = ops.inter2att(interactions, dim, dropout=self.dropouts["read"]) self.attentions["kb"].append(attention) # optionally use projected knowledge base instead of original if config.readSmryKBProj: knowledgeBase = projectedKB # sum up the knowledge base according to the distribution # information = ops.att2Smry(attention, knowledgeBase) # adding level snippet attention information = ops.att2Smry_snippet_level_att( attention, knowledgeBase, self.snippets_lengths) return information