def __call__(self, inp, output_prev, _img): shared = self.Shared(self.Embed(inp)) img_pred = self.Visual(shared) txt_pred = softmax3d( self.Embed.unembed( self.ToTxt(self.LM(last(shared), self.Embed(output_prev))))) return (img_pred, txt_pred)
def __call__(self, inp, output_prev, img): img_pred = self.Visual(self.Embed(inp)) txt_pred = softmax3d( self.Embed.unembed( self.ToTxt(self.LM(self.FromImg(img), self.Embed(output_prev))))) return (img_pred, txt_pred)
def __call__(self, inp, output_prev, _img): rep = self.Visual.encode(self.Embed(inp)) img_pred = self.Visual.visual_activation(self.Visual.ToImg(rep)) txt_pred = softmax3d( self.Embed.unembed( self.ToTxt(self.LM(rep, self.Embed(output_prev))))) return (img_pred, txt_pred)
def __call__(self, prev): return softmax3d(self.network.ToTxt(self.network.EncodeLM(self.network.Shared(prev))))
def __call__(self, out_prev): return softmax3d(self.ToTxt(self.Decode(out_prev)))
def __call__(self, inp, output_prev, _img): img_pred = self.Visual(self.Embed(inp)) txt_pred = softmax3d(self.ToTxt(self.LM(self.Embed(output_prev)))) return (img_pred, txt_pred)
def __call__(self, inp, output_prev, _img): img_pred = self.Visual(self.Embed(inp)) txt_pred = softmax3d(self.OH(output_prev)) # fake output prediction return (img_pred, txt_pred)
def __call__(self, inp, output_prev, _img): rep = self.Visual.encode(self.Embed(inp)) img_pred = self.Visual.visual_activation(self.Visual.ToImg(rep)) txt_pred = softmax3d(self.Embed.unembed(self.ToTxt(self.LM(rep, self.Embed(output_prev))))) return (img_pred, txt_pred)
def __call__(self, inp, output_prev, _img): shared = self.Shared(self.Embed(inp)) img_pred = self.Visual(shared) txt_pred = softmax3d(self.Embed.unembed(self.ToTxt(self.LM(last(shared), self.Embed(output_prev))))) return (img_pred, txt_pred)
def __call__(self, inp, out_prev, img): img_out = self.visual_activation(self.ToVis(last(self.Encode(self.Embed(inp))))) txt_out = softmax3d(self.Embed.unembed(self.PredictT(self.Decode(self.visual_activation(self.FromVis(img)), self.Embed(out_prev))))) return (img_out, txt_out)
def __call__(self, inp, out_prev, _img): rep = last(self.Encode(self.Embed(inp))) img = self.visual_activation(self.DecodeV(rep)) txt = softmax3d(self.Embed.unembed(self.PredictT(self.DecodeT(rep, self.Embed(out_prev))))) return (img, txt)
def __call__(self, inp, output_prev, _img): inp_e = self.Joint(self.Embed(inp)) output_prev_e = self.Embed(output_prev) img = self.visual_activation(self.Visual(inp_e)) txt = softmax3d(self.Embed.unembed(self.Textual(inp_e, output_prev_e, _img))) return (img, txt)
def __call__(self, inp, output_prev, img): inp_e = self.Embed(inp) output_prev_e = self.Embed(output_prev) img_pred = self.visual_activation(self.Visual(inp_e)) txt_pred = softmax3d(self.Embed.unembed(self.Textual(inp_e, output_prev_e, img))) return (img_pred, txt_pred)