def getscores(self, criterion, data): # data is (batsize, seqlen, elem_dim) def rec(x_t, crit): # x_t is (batsize, elem_dim), crit is (batsize, crit_dim) ret = T.dot(T.concatenate([x_t, crit], axis=1), self.W) # (batsize, innerdim) return T.sum(ret, axis=1) # (batsize, ) o, _ = T.scan(fn=rec, sequences=data.dimswap(1, 0), non_sequences=criterion) # o is (seqlen, batsize) return o.dimswap(1, 0) # returns (batsize, seqlen), softmaxed on seqlen
def apply(self, steps): initinfo = self.block.get_init_info(1) seqs = T.zeros((steps, 1, 1)) outputs = T.scan(self.block.rec, sequences=seqs, outputs_info=[None] + initinfo) return outputs[0][:, 0, :]
def recembed(self, x): E = self.E def rec(x_t): return E[x_t] return T.scan(fn=rec, sequences=x.dimshuffle(1, 0), outputs_info=None)[0].dimshuffle(1, 0, 2)
def innerapply(self, l, r): scores, _ = T.scan(self.rec, sequences=[l.dimswap(1, 0), r.dimswap(1, 0)]) scores = scores.dimswap(1, 0) ret = self.agg(scores) print ret.ndim return ret
def apply(self, se, initstates=None): seq = se.dimswap(1, 0) initstatearg = initstates if initstates is not None else seq.shape[1] outputs, _ = T.scan(fn=self.rec, sequences=seq, outputs_info=[None]+self.get_init_info(initstatearg)) output = outputs[0] return output.dimswap(1, 0)
def apply(self, criterion): # criterion: (batsize, crit_dim), self.mem: (mem_size, mem_dim), out: (batsize, mem_size) def rec(x_t, crit): # x_t: (mem_dim), crit: (batsize, crit_dim) combo = self._get_combo(x_t, crit) # (batsize, crit_dim + datadim) trans = T.dot(combo, self.W) # (batsize, outdim) trans = T.tanh(trans) # apply tanh ret = T.dot(trans, self.U) # (batsize, ) return ret o, _ = T.scan(fn=rec, sequences=self.memblock.innervar, non_sequences=criterion) # (memsize, batsize) return o.dimswap(1, 0) # (batsize, memsize)
def test_update_propagation_through_scan(self): x = Input(ndim=2, dtype="float32") x.push_updates({"a": "b"}) def f(ix): return ix + 1 y, _ = T.scan(f, x) self.assertEqual(y.allupdates, x.allupdates)
def recout(self, x): W = self.W def rec(x_t): return T.dot(x_t, W) return T.scan(fn=rec, sequences=x.dimshuffle(1, 0, 2), outputs_info=None)[0].dimshuffle(1, 0, 2)
def recret(self, x): sm = T.nnet.softmax def rec(x_t): return sm(x_t) return T.scan(fn=rec, sequences=x.dimshuffle(1, 0, 2), outputs_info=None)[0].dimshuffle(1, 0, 2)
def apply(self, data, weights): # data: (batsize, seqlen, elem_dim) def rec(x_t, att_t, acc): # x_t: (batsize, elem_dim), att_t: (batsize, ), acc: (batsize, elem_dim) acc += T.batched_dot(x_t, att_t) return acc # (batsize, elem_dim) o, _ = T.scan( fn=rec, sequences=[data.dimswap(1, 0), weights.T], outputs_info=T.zeros((data.shape[0], data.shape[2])) ) return o[-1, :, :]
def apply(self, criterion, data): # criterion: (batsize, crit_dim), data: (batsize, seqlen, datadim) def rec(x_t, crit): combo = self._get_combo(x_t, crit) # (batsize, crit_dim + datadim) trans = T.dot(combo, self.W) # (batsize, innerdim) trans = T.tanh(trans) # apply tanh ret = T.dot(trans, self.U) # (batsize, ) return T.nnet.sigmoid(ret) # apply sigmoid o, _ = T.scan(fn=rec, sequences=data.dimswap(1, 0), non_sequences=criterion) return o.dimswap(1, 0)
def apply( self, context, seq, context_0=None, initstates=None, mask=None, encmask=None, startsymemb=None, **kw ): # context: (batsize, enc.innerdim), seq: idxs-(batsize, seqlen) if initstates is None: initstates = seq.shape[0] elif issequence(initstates): if len( initstates ) < self.numstates: # fill up with batsizes for lower layers initstates = [seq.shape[0]] * (self.numstates - len(initstates)) + initstates init_info, nonseq = self.get_init_info( context, initstates, ctx_0=context_0, encmask=encmask) # sets init states to provided ones embedder = self.embedder def recemb(x): return embedder(x) seq_emb = T.scan(fn=recemb, sequences=seq[:, 1:].dimswap(1, 0)) seq_emb = seq_emb.dimswap(1, 0) seq_emb_t0 = self._get_seq_emb_t0(seq_emb.shape[0], startsymemb=startsymemb) seq_emb = T.concatenate([seq_emb_t0.dimshuffle(0, "x", 1), seq_emb], axis=1) outputs = T.scan(fn=self.rec, sequences=seq_emb.dimswap(1, 0), outputs_info=[None] + init_info, non_sequences=nonseq) ret = outputs[0].dimswap( 1, 0 ) # returns probabilities of symbols --> (batsize, seqlen, vocabsize) if mask == "auto": mask = (seq > 0).astype("int32") ret = self.applymask(ret, mask) return ret
def apply(self, x): E = self.E W = self.W sm = Softmax() def rec(x_t): emb = E[x_t] outs = T.dot(emb, W) return sm(outs) o, _ = T.scan(fn=rec, sequences=x.dimshuffle(1, 0), outputs_info=None) return o.dimshuffle(1, 0, 2)
def apply(self, seq): # seq: (batsize, 1+maxwordlen): first column: Glove idxs, subsequent cols: char ids if seq.ndim == 2: emb = self.glove(seq[:, 0]) # (batsize, embdim) enc = self.enc(seq[:, 1:]) # (batsize, encdim) return T.concatenate([emb, enc], axis=1) # (batsize, embdim + encdim) elif seq.ndim == 3: # (batsize, seqlen, 1+maxwordlen) emb = self.glove(seq[:, :, 0]) o, _ = T.scan(fn=self.recenc, sequences=seq[:, :, 1:].dimswap(1, 0), outputs_info=None) enc = o.dimswap(1, 0) return T.concatenate([emb, enc], axis=2) # (batsize, seqlen, embdim + encdim)
def apply(self, x): # x: (batsize, 4) o, _ = T.scan(fn=self.rec, sequences=[x[:, 0], x[:, 1], x[:, 2]], non_sequences=[self.xes, self.yes, self.divmul], outputs_info=None) # (batsize, outdim) #axes = T.tile(x[:, 0], (self.xes.shape[0], 1)).T #ayes = T.tile(x[:, 1], (self.xes.shape[0], 1)).T #adivs = T.tile(x[:, 2], (self.xes.shape[0], 1)).T #bxes = T.tile(self.xes, (x.shape[0], 1)) #byes = T.tile(self.yes, (x.shape[0], 1)) #o = self.rec(axes, ayes, adivs, bxes, byes, self.divmul) ret = Softmax()(o) return ret
def apply(self, context, seq, context_0=None, **kw): # context: (batsize, enc.innerdim), seq: idxs-(batsize, seqlen) sequences = [seq.dimswap(1, 0)] # sequences: (seqlen, batsize) context_0 = self._get_ctx_t0(context, context_0) if self.init_states is not None: init_info = self.block.get_init_info(self.init_states) # sets init states to provided ones else: init_info = self.block.get_init_info(seq.shape[0]) # initializes zero init states outputs, _ = T.scan(fn=self.recwrap, sequences=sequences, outputs_info=[None, context, context_0, 0] + init_info) return outputs[0].dimswap(1, 0) # returns probabilities of symbols --> (batsize, seqlen, vocabsize)
def apply(self, data, weights): # data: (batsize, seqlen, elem_dim) def rec( x_t, att_t, acc ): # x_t: (batsize, elem_dim), att_t: (batsize, ), acc: (batsize, elem_dim) acc += T.batched_dot(x_t, att_t) return acc # (batsize, elem_dim) o, _ = T.scan(fn=rec, sequences=[data.dimswap(1, 0), weights.T], outputs_info=T.zeros((data.shape[0], data.shape[2]))) return o[-1, :, :]
def innerapply(self, x, mask=None, initstates=None): assert(x.ndim == 3 and (mask is None or mask.ndim == 2)) if initstates is None: infoarg = x.shape[0] # batsize else: infoarg = initstates assert(issequence(infoarg)) inputs = x.dimswap(1, 0) # inputs is (seq_len, batsize, dim) init_info = self.get_init_info(infoarg) if mask is None: outputs, _ = T.scan(fn=self.rec, sequences=inputs, outputs_info=[None]+init_info, go_backwards=self._reverse) else: outputs, _ = T.scan(fn=self.recwmask, sequences=[inputs, mask.dimswap(1, 0)], outputs_info=[None] + init_info, go_backwards=self._reverse) outputs = [x.dimswap(1, 0) for x in outputs] return outputs[0][:, -1, :], outputs[0], outputs[1:]
def apply(self, seq, weights=None): # seq: (batsize, seqlen, dim), weights: (batsize, seqlen) inp = seq.dimswap(1, 0) # inp: (seqlen, batsize, dim) if weights is None: w = T.ones((inp.shape[0], inp.shape[1])) # (seqlen, batsize) else: self._weighted = True w = weights.dimswap(1, 0) outputs, _ = T.scan(fn=self.recwrap, sequences=[inp, w], outputs_info=[None]+self.block.get_init_info(seq.shape[0]), go_backwards=self._reverse) return self._get_apply_outputs(outputs)
def innerapply(self, x, mask=None, initstates=None): assert (x.ndim == 3 and (mask is None or mask.ndim == 2)) if initstates is None: infoarg = x.shape[0] # batsize else: infoarg = initstates assert (issequence(infoarg)) inputs = x.dimswap(1, 0) # inputs is (seq_len, batsize, dim) init_info = self.get_init_info(infoarg) if mask is None: outputs, _ = T.scan(fn=self.rec, sequences=inputs, outputs_info=[None] + init_info, go_backwards=self._reverse) else: outputs, _ = T.scan(fn=self.recwmask, sequences=[inputs, mask.dimswap(1, 0)], outputs_info=[None] + init_info, go_backwards=self._reverse) outputs = [x.dimswap(1, 0) for x in outputs] return outputs[0][:, -1, :], outputs[0], outputs[1:]
def getscores(self, criterion, data): # data is (batsize, seqlen, elem_dim) def rec(x_t, crit ): # x_t is (batsize, elem_dim), crit is (batsize, crit_dim) ret = T.dot(T.concatenate([x_t, crit], axis=1), self.W) # (batsize, innerdim) return T.sum(ret, axis=1) # (batsize, ) o, _ = T.scan(fn=rec, sequences=data.dimswap(1, 0), non_sequences=criterion) # o is (seqlen, batsize) return o.dimswap(1, 0) # returns (batsize, seqlen), softmaxed on seqlen
def getscores(self, criterion, data): # criterion: (batsize, crit_dim), data: (batsize, seqlen, datadim) #datapart = T.dot(data, self.W[:data.shape[2], :]) def rec(x_t, crit): combo = self._get_combo(x_t, crit) # (batsize, crit_dim + datadim) trans = T.dot(combo, self.W) # (batsize, innerdim) if self.nonlinearities: trans = T.tanh(trans) # apply tanh ret = T.dot(trans, self.U) # (batsize, ) if self.nonlinearities: ret = T.nnet.sigmoid(ret) # apply sigmoid return ret o, _ = T.scan(fn=rec, sequences=data.dimswap(1, 0), non_sequences=criterion) return o.dimswap(1, 0) # (batsize, seqlen)
def apply(self, x, initstates=None): if initstates is None: infoarg = x.shape[0] # batsize else: infoarg = initstates assert(issequence(infoarg)) inputs = x.dimswap(1, 0) # inputs is (seq_len, batsize, dim) init_info = self.get_init_info(infoarg) outputs, _ = T.scan(fn=self.rec, sequences=inputs, outputs_info=[None]+init_info, go_backwards=self._reverse) output = outputs[0] return output.dimswap(1, 0) # return is (batsize, seqlen, dim)
def apply( self, seq ): # seq: (batsize, 1+maxwordlen): first column: Glove idxs, subsequent cols: char ids if seq.ndim == 2: emb = self.glove(seq[:, 0]) # (batsize, embdim) enc = self.enc(seq[:, 1:]) # (batsize, encdim) return T.concatenate([emb, enc], axis=1) # (batsize, embdim + encdim) elif seq.ndim == 3: # (batsize, seqlen, 1+maxwordlen) emb = self.glove(seq[:, :, 0]) o, _ = T.scan(fn=self.recenc, sequences=seq[:, :, 1:].dimswap(1, 0), outputs_info=None) enc = o.dimswap(1, 0) return T.concatenate([emb, enc], axis=2) # (batsize, seqlen, embdim + encdim)
def apply(self, context, seq, context_0=None, initstates=None, mask=None, encmask=None, **kw): # context: (batsize, enc.innerdim), seq: idxs-(batsize, seqlen) if initstates is None: initstates = seq.shape[0] elif issequence(initstates): if len(initstates) < self.numstates: # fill up with batsizes for lower layers initstates = [seq.shape[0]] * (self.numstates - len(initstates)) + initstates init_info, nonseq = self.get_init_info(context, initstates, ctx_0=context_0, encmask=encmask) # sets init states to provided ones outputs, _ = T.scan(fn=self.rec, sequences=seq.dimswap(1, 0), outputs_info=[None] + init_info, non_sequences=nonseq) ret = outputs[0].dimswap(1, 0) # returns probabilities of symbols --> (batsize, seqlen, vocabsize) if mask == "auto": mask = (seq > 0).astype("int32") ret = self.applymask(ret, mask) return ret
def apply(self, x, mask=None): if self.enc.embedder is None: mindim = 3 maskdim = x.ndim - 1 else: mindim = 2 maskdim = x.ndim if mask is not None: assert (mask.ndim == maskdim) else: mask = T.ones(x.shape[:maskdim]) if x.ndim == mindim: return self.enc(x, mask=mask) elif x.ndim > mindim: ret = T.scan(fn=self.outerrec, sequences=[x, mask], outputs_info=None) return ret else: raise Exception("cannot have less than {} dims".format(mindim))
def apply(self, inpseq): # int-(batsize, seqlen) inpenco = self._inpencoder( inpseq) # may carry mask, based on encoder's embedder batsize = inpenco.shape[0] outvocsize = self._memembmat.shape[0] mem_0 = T.concatenate([ T.ones((batsize, self._memlen, 1), dtype="float32") * 0.95, T.ones((batsize, self._memlen, outvocsize - 1), dtype="float32") * 0.05, ], axis=2) # (batsize, outseqlen, outvocsize) mem_0 = T.softmax(mem_0) core_init_states = self._core.get_init_info(batsize) core_state_spec = self._core.get_statespec(flat=False) assert (len(core_state_spec) == len(core_init_states)) h_0 = None # take last output of core states as initial state c = 0 for ss in core_state_spec: h_0_isout = False for sss in ss: if sss[0] == "output": h_0_isout = True h_0 = core_init_states[c] if not h_0_isout: h_0 = core_init_states[c] c += 1 if self._inp_pos_repr is not None: inpposvecs = self._inp_pos_repr(inpseq.shape[1]) inpposvecs = T.repeat(inpposvecs.dimadd(0), batsize, axis=0) inpenc = T.concatenate([inpenco, inpposvecs], axis=2) inpenc.mask = inpenco.mask else: inpenc = inpenco outputs = T.scan(fn=self.rec, outputs_info=[None, mem_0, h_0] + core_init_states, n_steps=self._nsteps, non_sequences=inpenc) ret = outputs[0] ret.push_extra_outs({"mem_0": mem_0, "h_0": h_0}) # DEBUGGING return ret[-1], ret
def apply( self, ctx, seq, initstates=None, mask=None, ctxmask=None, **kw ): # context: (batsize, enc.innerdim), seq: idxs-(batsize, seqlen) batsize = seq.shape[0] init_info, nonseqs = self.get_inits(initstates, batsize, ctx, ctxmask) seq_emb = self.embedder(seq) # (batsize, seqlen, embdim) mask = seq_emb.mask if mask is None else mask outputs = T.scan(fn=self.inner_rec, sequences=seq_emb.dimswap(1, 0), outputs_info=[None] + init_info, non_sequences=nonseqs) ret = outputs[0].dimswap( 1, 0 ) # returns probabilities of symbols --> (batsize, seqlen, vocabsize) ret.mask = mask return ret
def apply(self, x): out = T.scan(self.rec, sequences=x, outputs_info=[None]) return out
def apply(self): out = T.scan(self.rec, sequences=None, outputs_info=[None], n_steps=5) return out
def apply(self, seq): # (batsize, seqlen, ...) x = seq.dimswap(1, 0) ret, _ = T.scan(self.rec, sequences=x) return ret.dimswap(1, 0)
def recurnonreclayer(cls, x, layer): y, _ = T.scan(fn=cls.dummyrec(layer), sequences=x.dimswap(1, 0), outputs_info=None) return y.dimswap(1, 0)
def apply(self, x, mask=None): # (batsize, seqlen, dim) and (batsize, seqlen) if mask is None: mask = T.zeros((x.shape[0], x.shape[1])) T.scan(fn=self.rec, sequences=[x.dimswap(1, 0), mask.dimswap(1, 0)])
def apply(self, x): outs = T.scan(fn=self.f, sequences=[x]) return outs
def apply(self, seq): # (batsize, seqlen, ...) x = seq.dimswap(1, 0) ret = T.scan(self.rec, sequences=x) return ret.dimswap(1, 0)