def rec(self, mem_tm1, h_tm1, *args): inpenc = args[-1] states_tm1 = args[:-1] batsize = inpenc.shape[0] # mem_tm1: f(batsize, outseqlen, outvocsize) # h_tm1: f(batsize, thinkerdim) # inpenc: f(batsize, inplen, inpencdim) # summarize memory mem_tm1_sam = self._memsample(mem_tm1) # sample from mem mem_tm1_embsum = T.dot( mem_tm1_sam, self._memembmat) # f(batsize, outseqlen, memembdim) mem_tm1_sum = self._memencode( mem_tm1_embsum) # f(batsize, outseqlen, memsumdim) if self._memposvecs is not None: memposvecs = T.repeat(self._memposvecs.dimadd(0), batsize, axis=0) mem_tm1_sum = T.concatenate([mem_tm1_sum, memposvecs], axis=2) # input and memory read attentions inp_ctx_t = self._get_inp_ctx(h_tm1, inpenc) # (batsize, inpencdim) mem_ctx_t = self._get_mem_ctx(h_tm1, mem_tm1_sum) # (batsize, memsumdim) # update thinker state i_t = T.concatenate([inp_ctx_t, mem_ctx_t], axis=1) rnuret = self._core.rec(i_t, *states_tm1) h_t = rnuret[0] states_t = rnuret[1:] # memory change interface mem_t_addr = self._get_addr_weights( h_t, mem_tm1_sum) # float-(batsize, outseqlen) mem_t_write = self._get_write_weights(h_t) # (batsize, memvocsize) e_t = self._get_erase(h_t) # (0..1)-(batsize,) c_t = self._get_change(h_t) # (0..1)-(batsize,) # memory change can_mem_t = mem_tm1 - T.batched_dot( e_t, mem_tm1 * mem_t_addr.dimshuffle(0, 1, 'x')) # erase where we addressed can_mem_t = can_mem_t + T.batched_tensordot( mem_t_addr, mem_t_write, axes=0) # write new value mem_t = T.batched_dot(1 - c_t, mem_tm1) + T.batched_dot( c_t, can_mem_t) # interpolate between old and new value mem_t = T.softmax(mem_t) # normalize to probabilities return (mem_t, mem_t, h_t) + tuple(states_t)
def apply(self, sp, o): entembs = self.A(sp[:, 0]) relembs = self.R[sp[:, 1], :, :] ret = T.batched_dot(entembs, relembs) return self.scorer(ret, self.A(o))
def apply(self, l, r): # l: f32^(batsize, dim), r: f32^(batsize, dim) return T.batched_dot(l, r)
def apply(self, l, r): # l: f32^(batsize, dim), r:f32^(batsize, dim) dots = T.batched_dot(l, r) lnorms = l.norm(2, axis=1) rnorms = r.norm(2, axis=1) return dots / (lnorms * rnorms + 1e-6)
def apply(self, l, r): # (batsize, dims) ldot = T.dot(self.W, l.T) # (batsize, rdim) ret = T.batched_dot(ldot.T, r) return ret
def getscores(self, criterion, data): return T.batched_dot(data, criterion)
def apply(self, l, r): # l: f32^(batsize, dim), r:f32^(batsize, dim) dots = T.batched_dot(l, r) lnorms = l.norm(2, axis=1) rnorms = r.norm(2, axis=1) return dots/(lnorms*rnorms + 1e-6)
def apply(self, criterion, data): return T.batched_dot(data, criterion)
def rec( x_t, att_t, acc ): # x_t: (batsize, elem_dim), att_t: (batsize, ), acc: (batsize, elem_dim) acc += T.batched_dot(x_t, att_t) return acc # (batsize, elem_dim)
def rec(x_t, att_t, acc): # x_t: (batsize, elem_dim), att_t: (batsize, ), acc: (batsize, elem_dim) acc += T.batched_dot(x_t, att_t) return acc # (batsize, elem_dim)
def getscores(self, crit, data): # crit: (batsize, crit_dim), data: (batsize, seqlen, datadim) a = T.dot(data, self.W) # (batsize, seqlen, memdim) ret = T.batched_dot(a, crit) return ret
def apply(self, l, r): # l: f32^(batsize, dim), r: f32^(batsize, dim) l = self.dropout(l) return T.batched_dot(l, r)