def apply(self, inp): carry = self.carry_activation(T.dot(inp, self.W_c) + self.b_c) pre = super(ForwardHighway, self).apply(inp) if self.W_t is not None: inp = T.dot(inp, self.W_t) ret = pre * carry + (1 - carry) * inp return ret
def getscores(self, crit, data): l = T.dot(data, self.W) # (batsize, seqlen, attdim) r = T.dot(crit, self.U) # (batsize, attdim) r = r.dimshuffle(0, "x", 1) # (batsize, 1, attdim) a = T.tanh(l + r) # (batsize, seqlen, attdim) ret = T.dot(a, self.V) return ret
def apply(self, criterion): # criterion: (batsize, indim), self.mem: (mem_size, mem_dim), out: (batsize, mem_size) memdot = T.dot(self.memblock.innervar, self.W) # (mem_size, indim) '''def rec(x_t, crit): # x_t: (indim), crit: (batsize, indim) d = T.dot(crit, x_t) # (batsize, ) return T.nnet.sigmoid(d) o, _ = T.scan(fn=rec, sequences=memdot, non_sequences=criterion) # (mem_size, batsize) return o.dimswap(1, 0)''' return T.dot(criterion, memdot.T) # (batsize, mem_size)
def rec(x_t, crit): combo = self._get_combo(x_t, crit) # (batsize, crit_dim + datadim) trans = T.dot(combo, self.W) # (batsize, innerdim) if self.nonlinearities: trans = T.tanh(trans) # apply tanh ret = T.dot(trans, self.U) # (batsize, ) if self.nonlinearities: ret = T.nnet.sigmoid(ret) # apply sigmoid return ret
def rec(self, x_t, h_tm1): # x_t: (batsize, dim), h_tm1: (batsize, innerdim) inp = T.dot(x_t, self.w) # w: (dim, innerdim) ==> inp: (batsize, innerdim) rep = T.dot( h_tm1, self.u) # u: (innerdim, innerdim) ==> rep: (batsize, innerdim) h = inp + rep + self.b # h: (batsize, innerdim) h = self.outpactivation(h) # return [h, h] #T.tanh(inp+rep)
def rec(self, x_t, h_tm1): # x_t: (batsize, dim), h_tm1: (batsize, innerdim) x_t = self.dropout_in(x_t) if not self.noinput else 0 inp = T.dot(x_t, self.w) # w: (dim, innerdim) ==> inp: (batsize, innerdim) h_tm1 = self.dropout_h(h_tm1) rep = T.dot( h_tm1, self.u) # u: (innerdim, innerdim) ==> rep: (batsize, innerdim) h = inp + rep + self.b # h: (batsize, innerdim) '''h = self.normalize_layer(h)''' h = self.outpactivation(h) # return [h, h] #T.tanh(inp+rep)
def rec(self, x_t, y_tm1, c_tm1): fgate = self.gateactivation(c_tm1*self.pf + self.bf + T.dot(x_t, self.wf) + T.dot(y_tm1, self.rf)) igate = self.gateactivation(c_tm1*self.pi + self.bi + T.dot(x_t, self.wi) + T.dot(y_tm1, self.ri)) cf = c_tm1 * fgate ifi = self.outpactivation(T.dot(x_t, self.w) + T.dot(y_tm1, self.r) + self.b) * igate c_t = cf + ifi ogate = self.gateactivation(c_t*self.po + self.bo + T.dot(x_t, self.wo) + T.dot(y_tm1, self.ro)) y_t = ogate * self.outpactivation(c_t) return [y_t, y_t, c_t]
def setUp(self): dim = 50 self.vocabsize = 2000 data = np.arange(0, self.vocabsize).astype("int32") self.O = param((dim, self.vocabsize)).uniform() self.W = VectorEmbed(indim=self.vocabsize, dim=50) self.out = stack(self.W, asblock(lambda x: T.dot(self.O, x)), Softmax())(Input(ndim=1, dtype="int32"))
def setUp(self): dim=50 self.vocabsize=2000 data = np.arange(0, self.vocabsize).astype("int32") self.O = param((dim, self.vocabsize)).uniform() self.W = VectorEmbed(indim=self.vocabsize, dim=50) self.out = stack(self.W, asblock(lambda x: T.dot(self.O, x)), Softmax())(Input(ndim=1, dtype="int32"))
def rec(self, x_t, h_tm1): ''' :param x_t: input values (nb_samples, nb_feats) for this recurrence step :param h_tm1: previous states (nb_samples, out_dim) :return: new state (nb_samples, out_dim) ''' mgate = self.gateactivation(T.dot(h_tm1, self.um) + T.dot(x_t, self.wm) + self.bm) hfgate = self.gateactivation(T.dot(h_tm1, self.uhf) + T.dot(x_t, self.whf) + self.bhf) ifgate = self.gateactivation(T.dot(h_tm1, self.uif) + T.dot(x_t, self.wif) + self.bif) canh = self.outpactivation(T.dot(h_tm1 * hfgate, self.u) + T.dot(x_t * ifgate, self.w) + self.b) h = mgate * h_tm1 + (1-mgate) * canh return [h, h]
def rec(self, x_t, y_tm1, c_tm1): fgate = self.gateactivation(c_tm1 * self.pf + self.bf + T.dot(x_t, self.wf) + T.dot(y_tm1, self.rf)) igate = self.gateactivation(c_tm1 * self.pi + self.bi + T.dot(x_t, self.wi) + T.dot(y_tm1, self.ri)) cf = c_tm1 * fgate ifi = self.outpactivation( T.dot(x_t, self.w) + T.dot(y_tm1, self.r) + self.b) * igate c_t = cf + ifi ogate = self.gateactivation(c_t * self.po + self.bo + T.dot(x_t, self.wo) + T.dot(y_tm1, self.ro)) y_t = ogate * self.outpactivation(c_t) return [y_t, y_t, c_t]
def rec(self, x_t, h_tm1): ''' :param x_t: input values (nb_samples, nb_feats) for this recurrence step :param h_tm1: previous states (nb_samples, out_dim) :return: new state (nb_samples, out_dim) ''' mgate = self.gateactivation( T.dot(h_tm1, self.um) + T.dot(x_t, self.wm) + self.bm) hfgate = self.gateactivation( T.dot(h_tm1, self.uhf) + T.dot(x_t, self.whf) + self.bhf) ifgate = self.gateactivation( T.dot(h_tm1, self.uif) + T.dot(x_t, self.wif) + self.bif) canh = self.outpactivation( T.dot(h_tm1 * hfgate, self.u) + T.dot(x_t * ifgate, self.w) + self.b) h = mgate * h_tm1 + (1 - mgate) * canh return [h, h]
def rec(self, mem_tm1, h_tm1, *args): inpenc = args[-1] states_tm1 = args[:-1] batsize = inpenc.shape[0] # mem_tm1: f(batsize, outseqlen, outvocsize) # h_tm1: f(batsize, thinkerdim) # inpenc: f(batsize, inplen, inpencdim) # summarize memory mem_tm1_sam = self._memsample(mem_tm1) # sample from mem mem_tm1_embsum = T.dot( mem_tm1_sam, self._memembmat) # f(batsize, outseqlen, memembdim) mem_tm1_sum = self._memencode( mem_tm1_embsum) # f(batsize, outseqlen, memsumdim) if self._memposvecs is not None: memposvecs = T.repeat(self._memposvecs.dimadd(0), batsize, axis=0) mem_tm1_sum = T.concatenate([mem_tm1_sum, memposvecs], axis=2) # input and memory read attentions inp_ctx_t = self._get_inp_ctx(h_tm1, inpenc) # (batsize, inpencdim) mem_ctx_t = self._get_mem_ctx(h_tm1, mem_tm1_sum) # (batsize, memsumdim) # update thinker state i_t = T.concatenate([inp_ctx_t, mem_ctx_t], axis=1) rnuret = self._core.rec(i_t, *states_tm1) h_t = rnuret[0] states_t = rnuret[1:] # memory change interface mem_t_addr = self._get_addr_weights( h_t, mem_tm1_sum) # float-(batsize, outseqlen) mem_t_write = self._get_write_weights(h_t) # (batsize, memvocsize) e_t = self._get_erase(h_t) # (0..1)-(batsize,) c_t = self._get_change(h_t) # (0..1)-(batsize,) # memory change can_mem_t = mem_tm1 - T.batched_dot( e_t, mem_tm1 * mem_t_addr.dimshuffle(0, 1, 'x')) # erase where we addressed can_mem_t = can_mem_t + T.batched_tensordot( mem_t_addr, mem_t_write, axes=0) # write new value mem_t = T.batched_dot(1 - c_t, mem_tm1) + T.batched_dot( c_t, can_mem_t) # interpolate between old and new value mem_t = T.softmax(mem_t) # normalize to probabilities return (mem_t, mem_t, h_t) + tuple(states_t)
def rec(self, x_t, y_tm1, c_tm1): x_t = self.dropout_in(x_t) if not self.noinput else 0 c_tm1 = self.dropout_h(c_tm1) fgate = self.gateactivation(c_tm1 * self.pf + self.bf + T.dot(x_t, self.wf) + T.dot(y_tm1, self.rf)) igate = self.gateactivation(c_tm1 * self.pi + self.bi + T.dot(x_t, self.wi) + T.dot(y_tm1, self.ri)) cf = c_tm1 * fgate ifi = self.outpactivation( T.dot(x_t, self.w) + T.dot(y_tm1, self.r) + self.b) * igate c_t = cf + ifi ogate = self.gateactivation(c_t * self.po + self.bo + T.dot(x_t, self.wo) + T.dot(y_tm1, self.ro)) y_t = ogate * self.outpactivation(c_t) return [y_t, y_t, c_t]
def rec(self, x_t, h_tm1): ''' :param x_t: input values (nb_samples, nb_feats) for this recurrence step :param h_tm1: previous states (nb_samples, out_dim) :return: new state (nb_samples, out_dim) ''' x_t = self.dropout_in(x_t) if not self.noinput else 0 h_tm1_i = self.dropout_h(h_tm1) mgate = self.gateactivation( T.dot(h_tm1_i, self.um) + T.dot(x_t, self.wm) + self.bm) hfgate = self.gateactivation( T.dot(h_tm1_i, self.uhf) + T.dot(x_t, self.whf) + self.bhf) canh = T.dot(h_tm1_i * hfgate, self.u) + T.dot(x_t, self.w) + self.b '''canh = self.normalize_layer(canh)''' canh = self.outpactivation(canh) h = mgate * h_tm1 + (1 - mgate) * canh #h = self.normalize_layer(h) return [h, h]
def rec(x_t): return T.dot(x_t, W)
def rec(x_t, crit): # x_t: (mem_dim), crit: (batsize, crit_dim) combo = self._get_combo(x_t, crit) # (batsize, crit_dim + datadim) trans = T.dot(combo, self.W) # (batsize, outdim) trans = T.tanh(trans) # apply tanh ret = T.dot(trans, self.U) # (batsize, ) return ret
def apply(self, inptensor): return T.dot(inptensor, self.W)
def apply(self, l, r): # (batsize, dims) ldot = T.dot(self.W, l.T) # (batsize, rdim) ret = T.batched_dot(ldot.T, r) return ret
def apply(self, inptensor): emb = self.W(inptensor) out = T.dot(emb, self.O) out.output_as("out") probs = Softmax()(out) return probs
def apply(self, criterion): wmem = T.dot(self.memblock.innervar, self.W) ucrit = T.dot(criterion, self.U) return T.dot(ucrit, wmem.T)
def apply(self, l, r): con = T.concatenate([l, r], axis=1) att = T.dot(con, self.W) ret = T.dot(att, self.U) return ret
def rec(x_t, crit ): # x_t is (batsize, elem_dim), crit is (batsize, crit_dim) ret = T.dot(T.concatenate([x_t, crit], axis=1), self.W) # (batsize, innerdim) return T.sum(ret, axis=1) # (batsize, )
def apply(self, x): a = T.dot(x, self.W) b = T.sum(a, axis=1) c = T.sum(b, axis=0) a.push_extra_outs({"b": b, "c": c}) return a
def apply(self, x): self.W = param((x.kshape[1], 2), name="test_param").uniform() a = T.dot(x, self.W) a.kshape = (x.kshape[0], 2) return a
def apply(self, x): self.W.shape[0] = x.kshape[1] a = T.dot(x, self.W) return a
def setUp(self): x = param((10, 20)).uniform() ab = asblock(lambda y: T.dot(y, x)) inputdata = np.random.random((100, 10)) self.outvals = ab.predict(inputdata)
def apply(self, x, mask=None, weights=None): ret = super(SimpleSeq2Bool, self).apply(x, mask=mask, weights=weights) return T.tanh(T.dot(ret, self.summ))
def apply( self, criterion): # (batsize, encdim), memblock.var:: (memsize, encdim) return T.dot(criterion, self.memblock.innervar.T)
def apply(self, inp): return T.dot(inp, self.W) + self.b
def rec(x_t): emb = E[x_t] outs = T.dot(emb, W) return sm(outs)
def apply(self, criterion): # (batsize, encdim), memblock.var:: (memsize, encdim) return T.dot(criterion, self.memblock.innervar.T)
def rec(self, x_t, h_tm1): # x_t: (batsize, dim), h_tm1: (batsize, innerdim) inp = T.dot(x_t, self.w) # w: (dim, innerdim) ==> inp: (batsize, innerdim) rep = T.dot(h_tm1, self.u) # u: (innerdim, innerdim) ==> rep: (batsize, innerdim) h = inp + rep + self.b # h: (batsize, innerdim) h = self.outpactivation(h) # return [h, h] #T.tanh(inp+rep)
def rec(self, x_t, m_tm1, mem_tm1, h_tm1): """ :param x_t: current input vector: (batsize, inp_dim) :param h_tm1: previous state vector: (batsize, state_dim) :param m_tm1: previous memory content vector: (batsize, state_dim) :param mem_tm1: previous memory state: (batsize, mem_size, state_dim) :return: (y_t, h_t, m_t, mem_t) """ # read memory memory_addr_gate1 = self.gateactivation( T.dot(h_tm1, self.uma) + T.dot(x_t, self.wma) + T.dot(m_tm1, self.mma) + self.bma) memory_addr_gate2 = self.gateactivation( T.dot(h_tm1, self.uma2) + T.dot(x_t, self.wma2) + T.dot(m_tm1, self.mma2) + self.bma2) memaddrcan = memory_addr_gate1 * h_tm1 + (1 - memory_addr_gate1) * m_tm1 memaddr = memory_addr_gate2 * memaddrcan + ( 1 - memory_addr_gate2 ) * x_t # TODO: ERROR HERE: x_t shape incompatible with internal shapes memsel = self.attgen(memaddr, mem_tm1) m_t = self.attcon(mem_tm1, memsel) # update inner stuff state_filter_gate = self.gateactivation( T.dot(h_tm1, self.usf) + T.dot(x_t, self.wsf) + T.dot(m_t, self.msf) + self.bsf) memory_filter_gate = self.gateactivation( T.dot(h_tm1, self.umf) + T.dot(x_t, self.wmf) + T.dot(m_t, self.mmf) + self.bmf) input_filter_gate = self.gateactivation( T.dot(h_tm1, self.uif) + T.dot(x_t, self.wif) + T.dot(m_t, self.mif) + self.bif) update_gate = self.gateactivation( T.dot(h_tm1, self.uug) + T.dot(x_t, self.wug) + T.dot(m_t, self.mug) + self.bug) # compute new state h_tm1_filtered = T.dot(state_filter_gate * h_tm1, self.u) x_t_filtered = T.dot(input_filter_gate * x_t, self.w) m_t_filtered = T.dot(memory_filter_gate * m_t, self.m) h_t_can = self.outpactivation(h_tm1_filtered + x_t_filtered + m_t_filtered + self.b) h_t = update_gate * h_tm1 + (1 - update_gate) * h_t_can # write memory memory_write_filter = self.gateactivation( T.dot(h_tm1, self.uwf) + T.dot(x_t, self.wwf) + T.dot(m_t, self.mwf) + self.bwf) # (batsize, state_dim) if self.discrete: # memsel: (batsize, mem_size) memseln = T.zeros_like(memsel) memsel = T.argmax(memsel, axis=1) memseln[T.arange(memsel.shape[0]), memsel] = 1.0 # TODO: doesn't work memsel = memseln memwritesel = T.batched_tensordot( memsel, memory_write_filter, axes=0) # (batsize, mem_size, state_dim) h_t_rep = h_t.reshape( (h_t.shape[0], 1, h_t.shape[1])).repeat(mem_tm1.shape[1], axis=1) mem_t = memwritesel * mem_tm1 + (1 - memwritesel) * h_t_rep return [h_t, m_t, mem_t, h_t]
def apply(self, inptensor): emb = self.W(inptensor) out = T.dot(emb, self.O) probs = Softmax()(out) return probs
def rec(self, x_t, m_tm1, mem_tm1, h_tm1): """ :param x_t: current input vector: (batsize, inp_dim) :param h_tm1: previous state vector: (batsize, state_dim) :param m_tm1: previous memory content vector: (batsize, state_dim) :param mem_tm1: previous memory state: (batsize, mem_size, state_dim) :return: (y_t, h_t, m_t, mem_t) """ # read memory memory_addr_gate1 = self.gateactivation(T.dot(h_tm1, self.uma) + T.dot(x_t, self.wma) + T.dot(m_tm1, self.mma) + self.bma) memory_addr_gate2 = self.gateactivation(T.dot(h_tm1, self.uma2) + T.dot(x_t, self.wma2) + T.dot(m_tm1, self.mma2) + self.bma2) memaddrcan = memory_addr_gate1 * h_tm1 + (1 - memory_addr_gate1) * m_tm1 memaddr = memory_addr_gate2 * memaddrcan + (1 - memory_addr_gate2) * x_t # TODO: ERROR HERE: x_t shape incompatible with internal shapes memsel = self.attgen(memaddr, mem_tm1) m_t = self.attcon(mem_tm1, memsel) # update inner stuff state_filter_gate = self.gateactivation(T.dot(h_tm1, self.usf) + T.dot(x_t, self.wsf) + T.dot(m_t, self.msf) + self.bsf) memory_filter_gate = self.gateactivation(T.dot(h_tm1, self.umf) + T.dot(x_t, self.wmf) + T.dot(m_t, self.mmf) + self.bmf) input_filter_gate = self.gateactivation(T.dot(h_tm1, self.uif) + T.dot(x_t, self.wif) + T.dot(m_t, self.mif) + self.bif) update_gate = self.gateactivation(T.dot(h_tm1, self.uug) + T.dot(x_t, self.wug) + T.dot(m_t, self.mug) + self.bug) # compute new state h_tm1_filtered = T.dot(state_filter_gate * h_tm1, self.u) x_t_filtered = T.dot(input_filter_gate * x_t, self.w) m_t_filtered = T.dot(memory_filter_gate * m_t, self.m) h_t_can = self.outpactivation(h_tm1_filtered + x_t_filtered + m_t_filtered + self.b) h_t = update_gate * h_tm1 + (1 - update_gate) * h_t_can # write memory memory_write_filter= self.gateactivation(T.dot(h_tm1, self.uwf) + T.dot(x_t, self.wwf) + T.dot(m_t, self.mwf) + self.bwf) # (batsize, state_dim) if self.discrete: # memsel: (batsize, mem_size) memseln = T.zeros_like(memsel) memsel = T.argmax(memsel, axis=1) memseln[T.arange(memsel.shape[0]), memsel] = 1.0 # TODO: doesn't work memsel = memseln memwritesel = T.batched_tensordot(memsel, memory_write_filter, axes=0) # (batsize, mem_size, state_dim) h_t_rep = h_t.reshape((h_t.shape[0], 1, h_t.shape[1])).repeat(mem_tm1.shape[1], axis=1) mem_t = memwritesel * mem_tm1 + (1 - memwritesel) * h_t_rep return [h_t, m_t, mem_t, h_t]
def apply(self, x): y = T.dot(x, self.block) z = T.dot(y, self.agg) # (batsize,) ret = T.nnet.sigmoid(z) return ret
def rec(x_t, crit): # x_t is (batsize, elem_dim), crit is (batsize, crit_dim) ret = T.dot(T.concatenate([x_t, crit], axis=1), self.W) # (batsize, innerdim) return T.sum(ret, axis=1) # (batsize, )
def apply(self, datas): return T.dot(datas, self.w) + self.b[0]
def apply(self, x, mask=None): enco, mask = self.enc(x, mask=mask) return T.nnet.sigmoid(T.dot(enco, self.summ)), mask