def embedFact(self, factIdx): '''Embed facts i.e. caption and round 0 or question-answer pair otherwise''' # Caption if factIdx == 0: seq, seqLens = self.captionEmbed, self.captionLens factEmbed, states = utils.dynamicRNN(self.factRNN, seq, seqLens, returnStates=True) # QA pairs elif factIdx > 0: quesTokens, quesLens = \ self.questionTokens[factIdx - 1], self.questionLens[factIdx - 1] ansTokens, ansLens = \ self.answerTokens[factIdx - 1], self.answerLengths[factIdx - 1] qaTokens = utils.concatPaddedSequences(quesTokens, quesLens, ansTokens, ansLens, padding='right') qa = self.wordEmbed(qaTokens) qaLens = quesLens + ansLens qaEmbed, states = utils.dynamicRNN(self.factRNN, qa, qaLens, returnStates=True) factEmbed = qaEmbed factRNNstates = states self.factEmbeds.append((factEmbed, factRNNstates))
def embedFact(self, factIdx): '''Embed facts i.e. caption and round 0 or question-answer pair otherwise''' # Caption if factIdx == 0: seq, seqLens = self.captionEmbed, self.captionLens factEmbed, states = utils.dynamicRNN(self.factRNN, seq, seqLens, returnStates=True) # states: hidden state & cell states(two layers) . factEmbed: rnn output(one layer) # QA pairs elif factIdx > 0: quesTokens, quesLens = \ self.questionTokens[factIdx - 1], self.questionLens[factIdx - 1] ansTokens, ansLens = \ self.answerTokens[factIdx - 1], self.answerLengths[factIdx - 1] qaTokens = utils.concatPaddedSequences( # concat non-0-token (q,a) and pad with 0 to maxlength quesTokens, quesLens, ansTokens, ansLens, padding='right') qa = self.wordEmbed(qaTokens) qaLens = quesLens + ansLens # states: hidden state & cell states(two layers 2*2*20*512) . factEmbed: rnn output(one layer 20*512) qaEmbed, states = utils.dynamicRNN(self.factRNN, qa, qaLens, returnStates=True) factEmbed = qaEmbed factRNNstates = states self.factEmbeds.append((factEmbed, factRNNstates))
def embedFact(self, factIdx, debug): '''Embed facts i.e. caption and round 0 or question-answer pair otherwise''' # Caption if factIdx == 0 and not self.isLoaded: seq, seqLens = self.captionEmbed, self.captionLens factEmbed, states = utils.dynamicRNN( self.factRNN, seq, seqLens, returnStates=True) # self.captionEmbedded = True # QA pairs else: idx = factIdx if self.isLoaded else factIdx - 1 quesTokens, quesLens = \ self.questionTokens[idx], self.questionLens[idx] if debug: print("quesTokens", quesTokens) ansTokens, ansLens = \ self.answerTokens[idx], self.answerLengths[idx] qaTokens = utils.concatPaddedSequences( quesTokens, quesLens, ansTokens, ansLens, padding='right') qa = self.wordEmbed(qaTokens) qaLens = quesLens + ansLens qaEmbed, states = utils.dynamicRNN( self.factRNN, qa, qaLens, returnStates=True) fact_output = utils.dynamicRNN( self.factRNN, qa, qaLens, returnStates=False) factEmbed = qaEmbed factRNNstates = states if debug: print("Fact", factEmbed, factRNNstates) self.factEmbeds.append((factEmbed, factRNNstates))
def embedAnswer(self, aIdx): '''Embed questions''' ansIn = self.answerEmbeds[aIdx] ansLens = self.answerLengths[aIdx] aEmbed, states = utils.dynamicRNN( self.ansRNN, ansIn, ansLens, returnStates=True) ansRNNStates = states self.answerRNNStates.append((aEmbed, ansRNNStates))
def embedQuestion(self, qIdx): '''Embed questions''' quesIn = self.questionEmbeds[qIdx] quesLens = self.questionLens[qIdx] qEmbed, states = utils.dynamicRNN(self.quesRNN, quesIn, quesLens, returnStates=True) quesRNNstates = states #2[1,20,512] self.questionRNNStates.append((qEmbed, quesRNNstates))
def embedQuestion(self, qIdx): '''Embed questions''' quesIn = self.questionEmbeds[qIdx] quesLens = self.questionLens[qIdx] if self.useIm == 'early': image = self.imageEmbed.unsqueeze(1).repeat(1, quesIn.size(1), 1) quesIn = torch.cat([quesIn, image], 2) qEmbed, states = utils.dynamicRNN( self.quesRNN, quesIn, quesLens, returnStates=True) quesRNNstates = states self.questionRNNStates.append((qEmbed, quesRNNstates))
def embedFact(self, factIdx): """Embed facts i.e. image and round 0 or question-answer pair otherwise""" # Image if factIdx == 0: factEmbed, states = ( self.im2hids(self.imageEmbed), [ self.im2states[0](self.imageEmbed), self.im2states[1](self.imageEmbed), ], ) # QAA triplets elif factIdx > 0: quesTokens, quesLens = ( self.questionTokens[factIdx - 1], self.questionLens[factIdx - 1], ) ansTokens, ansLens = ( self.answerTokens[factIdx - 1], self.answerLengths[factIdx - 1], ) ansTokens2, ansLens2 = ( self.answerTokens2[factIdx - 1], self.answerLengths2[factIdx - 1], ) qaTokens = utils.concatPaddedSequences( quesTokens, quesLens, ansTokens, ansLens, padding="right" ) qaaTokens = utils.concatPaddedSequences( qaTokens, quesLens + ansLens, ansTokens2, ansLens2, padding="right" ) qaa = self.wordEmbed(qaaTokens) qaaLens = quesLens + ansLens + ansLens2 qaaEmbed, states = utils.dynamicRNN( self.factRNN, qaa, qaaLens, returnStates=True ) factEmbed = qaaEmbed factRNNstates = states self.factEmbeds.append((factEmbed, factRNNstates))