def apply(self, x): res = self.inner(x) # (batsize, 2, encdim) if self.mode == "multic": # take top half of first and bottom half of second if not self.inner.bidir: mid = res.shape[2] / 2 ret = T.concatenate([res[:, 0:1, :mid], res[:, 1:2, mid:]], axis=1) else: quarts = res.shape[2] / 4 ret = T.concatenate([ T.concatenate([ res[:, 0:1, :quarts], res[:, 0:1, 2 * quarts:3 * quarts] ], axis=2), T.concatenate([ res[:, 1:2, quarts:2 * quarts], res[:, 1:2, 3 * quarts:] ], axis=2) ], axis=1) else: # return as is ret = res print "NDIM MULTILEFTBLOCK !!!!!!!!!!!!!!!!!!!!!{}".format(ret.ndim) return ret # (batsize, 2, decdim)
def apply(self, seq): # seq: (batsize, 1+maxwordlen): first column: Glove idxs, subsequent cols: char ids if seq.ndim == 2: emb = self.glove(seq[:, 0]) # (batsize, embdim) enc = self.enc(seq[:, 1:]) # (batsize, encdim) return T.concatenate([emb, enc], axis=1) # (batsize, embdim + encdim) elif seq.ndim == 3: # (batsize, seqlen, 1+maxwordlen) emb = self.glove(seq[:, :, 0]) o, _ = T.scan(fn=self.recenc, sequences=seq[:, :, 1:].dimswap(1, 0), outputs_info=None) enc = o.dimswap(1, 0) return T.concatenate([emb, enc], axis=2) # (batsize, seqlen, embdim + encdim)
def innerapply(self, seq, mask=None, initstates=None): initstatesfwd = initstates[:self.fwd.numstates] if initstates is not None else initstates initstates = initstates[self.fwd.numstates:] if initstates is not None else initstates assert(initstates is None or len(initstates) == self.rew.numstates) initstatesrew = initstates fwdfinal, fwdout, fwdstates = self.fwd.innerapply(seq, mask=mask, initstates=initstatesfwd) # (batsize, seqlen, innerdim) rewfinal, rewout, rewstates = self.rew.innerapply(seq, mask=mask, initstates=initstatesrew) # TODO: reverse? # concatenate: fwdout, rewout: (batsize, seqlen, feats) ==> (batsize, seqlen, feats_fwd+feats_rew) finalout = T.concatenate([fwdfinal, rewfinal], axis=1) out = T.concatenate([fwdout, rewout.reverse(1)], axis=2) return finalout, out, fwdstates+rewstates
def rec(self, x_t, ctx_tm1, t, *args): # x_t: (batsize), context: (batsize, enc.innerdim) states_tm1 = args[:-2] ctx = args[-1] encmask = args[-2] x_t_emb = self.embedder(x_t) # i_t: (batsize, embdim) # do inconcat i_t = T.concatenate([x_t_emb, ctx_tm1], axis=1) if self.inconcat else x_t_emb rnuret = self.block.rec(i_t, *states_tm1) t += 1 h_t = rnuret[0] states_t = rnuret[1:] ctx_t = self._get_ctx_t(ctx, states_t, encmask) # get context with attention _y_t = T.concatenate([h_t, ctx_t], axis=1) if self.outconcat else h_t y_t = self.softmaxoutblock(_y_t) return [y_t, ctx_t, t] + states_t
def rec(self, mem_tm1, h_tm1, *args): inpenc = args[-1] states_tm1 = args[:-1] batsize = inpenc.shape[0] # mem_tm1: f(batsize, outseqlen, outvocsize) # h_tm1: f(batsize, thinkerdim) # inpenc: f(batsize, inplen, inpencdim) # summarize memory mem_tm1_sam = self._memsample(mem_tm1) # sample from mem mem_tm1_embsum = T.dot( mem_tm1_sam, self._memembmat) # f(batsize, outseqlen, memembdim) mem_tm1_sum = self._memencode( mem_tm1_embsum) # f(batsize, outseqlen, memsumdim) if self._memposvecs is not None: memposvecs = T.repeat(self._memposvecs.dimadd(0), batsize, axis=0) mem_tm1_sum = T.concatenate([mem_tm1_sum, memposvecs], axis=2) # input and memory read attentions inp_ctx_t = self._get_inp_ctx(h_tm1, inpenc) # (batsize, inpencdim) mem_ctx_t = self._get_mem_ctx(h_tm1, mem_tm1_sum) # (batsize, memsumdim) # update thinker state i_t = T.concatenate([inp_ctx_t, mem_ctx_t], axis=1) rnuret = self._core.rec(i_t, *states_tm1) h_t = rnuret[0] states_t = rnuret[1:] # memory change interface mem_t_addr = self._get_addr_weights( h_t, mem_tm1_sum) # float-(batsize, outseqlen) mem_t_write = self._get_write_weights(h_t) # (batsize, memvocsize) e_t = self._get_erase(h_t) # (0..1)-(batsize,) c_t = self._get_change(h_t) # (0..1)-(batsize,) # memory change can_mem_t = mem_tm1 - T.batched_dot( e_t, mem_tm1 * mem_t_addr.dimshuffle(0, 1, 'x')) # erase where we addressed can_mem_t = can_mem_t + T.batched_tensordot( mem_t_addr, mem_t_write, axes=0) # write new value mem_t = T.batched_dot(1 - c_t, mem_tm1) + T.batched_dot( c_t, can_mem_t) # interpolate between old and new value mem_t = T.softmax(mem_t) # normalize to probabilities return (mem_t, mem_t, h_t) + tuple(states_t)
def apply(self, x): # (batsize, seqlen, 2) wembeddings = self.baseemb(x[:, :, 0]) pembeddings = self.pemb(x[:, :, 1]) ret = T.concatenate([wembeddings, pembeddings], axis=2) # (batsize, seqlen, wembdim+pembdim) ret.mask = wembeddings.mask return ret
def _get_combo( self, x_t, crit ): # x_t: (mem_dim), crit: (batsize, crit_dim), out: (batsize, mem_dim + crit_dim) x_t_repped = T.repeat( x_t.reshape((x_t.shape[0], 1)), crit.shape[0], axis=1).T # (batsize, mem_dim) TODO <-- TOO SLOW BECAUSE OF THIS return T.concatenate([x_t_repped, crit], axis=1)
def apply( self, seq ): # seq: (batsize, 1+maxwordlen): first column: Glove idxs, subsequent cols: char ids if seq.ndim == 2: emb = self.glove(seq[:, 0]) # (batsize, embdim) enc = self.enc(seq[:, 1:]) # (batsize, encdim) return T.concatenate([emb, enc], axis=1) # (batsize, embdim + encdim) elif seq.ndim == 3: # (batsize, seqlen, 1+maxwordlen) emb = self.glove(seq[:, :, 0]) o, _ = T.scan(fn=self.recenc, sequences=seq[:, :, 1:].dimswap(1, 0), outputs_info=None) enc = o.dimswap(1, 0) return T.concatenate([emb, enc], axis=2) # (batsize, seqlen, embdim + encdim)
def apply(self, x): typewords = x[:, :self.typelen] subjchars = x[:, self.typelen:] typemb = self.typenc(typewords) subemb = self.subjenc(subjchars) ret = T.concatenate([subemb, typemb], axis=1) return ret
def applymask(cls, xseq, maskseq=None): if maskseq is None: ret = xseq else: mask = T.tensordot(maskseq, T.ones((xseq.shape[2],)), 0) # f32^(batsize, seqlen, outdim) -- maskseq stacked masker = T.concatenate([T.ones((xseq.shape[0], xseq.shape[1], 1)), T.zeros((xseq.shape[0], xseq.shape[1], xseq.shape[2] - 1))], axis=2) # f32^(batsize, seqlen, outdim) -- gives 100% prob to output 0 ret = xseq * mask + masker * (1.0 - mask) return ret
def apply(self, inpseq, outseq, maskseq=None): # embed with the two embedding layers iemb = self.inpemb(inpseq) # (batsize, seqlen, inpembdim) oemb = self.outemb(outseq) # (batsize, seqlen, outembdim) emb = T.concatenate([iemb, oemb], axis=2) # (batsize, seqlen, inpembdim+outembdim) res = self.block(emb) ret = SeqTransducer.applymask(res, maskseq=maskseq) return ret
def apply(self, x): weights, mask = self.tosca(x) subjenco = self.subjenc(x, weights=weights)[:, np.newaxis, :] predenco = self.predenc(x, weights=(1 - weights))[:, np.newaxis, :] ret = T.concatenate([self.subjmd(subjenco), self.predmd(predenco)], axis=1) return ret
def inner_rec(self, x_t_emb, *args): # x_t_emb: (batsize, embdim) states_tm1 = args[:-2] ctx = args[-1] # (batsize, inseqlen, inencdim) encmask = args[-2] # x_t_emb = self.embedder(x_t) # i_t: (batsize, embdim) # compute current context ctx_t = self._get_ctx_t(ctx, states_tm1[-1], encmask) # TODO: might not work with LSTM # do inconcat i_t = T.concatenate([x_t_emb, ctx_t], axis=1) if self.inconcat else x_t_emb i_t.push_extra_outs({"i_t": i_t}) rnuret = self.block.rec(i_t, *states_tm1) h_t = rnuret[0] states_t = rnuret[1:] _y_t = T.concatenate([h_t, ctx_t], axis=1) if self.outconcat else h_t y_t = self.softmaxoutblock(_y_t) return [y_t] + states_t
def apply(self, x): res = self.inner(x) res = self.trans(res) res = res.dimshuffle(0, "x", 1) # (batsize, 1, q_enc_dim) if not self.inner.bidir: mid = res.shape[2] / 2 ret = T.concatenate([res[:, :, :mid], res[:, :, mid:]], axis=1) else: quart = res.shape[2] / 2 ret = T.concatenate([ T.concatenate( [res[:, :, :quart], res[:, :, 2 * quart:3 * quart]], axis=2), T.concatenate( [res[:, :, quart:2 * quart], res[:, :, 3 * quart:]], axis=2) ], axis=1) return ret # (batsize, 2, decdim)
def apply(self, x): # word vectors and mask charten = x[:, :, 1:] charencs = EncLastDim(self.charenc)(charten) wordmat = x[:, :, 0] wordembs = self.wordemb(wordmat) wordvecs = T.concatenate([charencs, wordembs], axis=2) wordmask = T.neq(wordmat, self.maskid) wordvecs.mask = wordmask # do outerpolation weights, mask = self.outerpol(wordvecs) leftenco = self.leftenc(wordvecs, weights=weights).dimshuffle(0, 'x', 1) rightenco = self.rightenc(wordvecs, weights=(1 - weights)).dimshuffle(0, 'x', 1) ret = T.concatenate([self.leftlin(leftenco), self.rightlin(rightenco)], axis=1) return ret # (batsize, 2, decdim)
def apply(self, seq, weights=None, mask=None): att_enc_final, att_enc_all, att_enc_states = self.enc_a( seq, weights=weights, mask=mask) con_enc_final, con_enc_all, con_enc_states = self.enc_c( seq, weights=weights, mask=mask) encmask = con_enc_all.mask att_enc_all = att_enc_all.dimshuffle(0, 1, "x", 2) con_enc_all = con_enc_all.dimshuffle(0, 1, "x", 2) ret = T.concatenate([con_enc_all, att_enc_all], axis=2) ret.mask = encmask return con_enc_final, ret, con_enc_states
def applymask(cls, xseq, maskseq): if maskseq is None: return xseq else: mask = T.tensordot(maskseq, T.ones((xseq.shape[2],)), 0) # f32^(batsize, seqlen, outdim) -- maskseq stacked masker = T.concatenate( [T.ones((xseq.shape[0], xseq.shape[1], 1)), T.zeros((xseq.shape[0], xseq.shape[1], xseq.shape[2] - 1))], axis=2) # f32^(batsize, seqlen, outdim) -- gives 100% prob to output 0 ret = xseq * mask + masker * (1.0 - mask) return ret
def innerapply(self, seq, mask=None, initstates=None): initstatesfwd = initstates[:self.fwd. numstates] if initstates is not None else initstates initstates = initstates[ self.fwd.numstates:] if initstates is not None else initstates assert (initstates is None or len(initstates) == self.rew.numstates) initstatesrew = initstates fwdfinal, fwdout, fwdstates = self.fwd.innerapply( seq, mask=mask, initstates=initstatesfwd) # (batsize, seqlen, innerdim) rewfinal, rewout, rewstates = self.rew.innerapply( seq, mask=mask, initstates=initstatesrew) # TODO: reverse? # concatenate: fwdout, rewout: (batsize, seqlen, feats) ==> (batsize, seqlen, feats_fwd+feats_rew) finalout = T.concatenate([fwdfinal, rewfinal], axis=1) out = T.concatenate([fwdout, rewout.reverse(1)], axis=2) states = [] for fwdstate, rewstate in zip(fwdstates, rewstates): states.append(T.concatenate( [fwdstate, rewstate], axis=2)) # for taking both final states, we need not reverse return finalout, out, states
def apply(self, x, mask=None, weights=None): ret = self.enc(x, mask=mask, weights=weights) # (batsize, seqlen, lastdim) outs = [] # apply mask (SeqEncoder should attach mask to outvar if all_outputs() mask = ret.mask for i in range(self.numouts): selfweights = Softmax()(ret[:, :, i]) # (batsize, seqlen) selfweights *= mask # apply mask selfweights = selfweights / T.sum(selfweights, axis=1).dimshuffle( 0, "x") # renormalize weightedstates = ret[:, :, self.numouts:] * selfweights.dimshuffle( 0, 1, "x") out = T.sum(weightedstates, axis=1) # (batsize, lastdim) outs.append(out) if self.mode == "concat": ret = T.concatenate(outs, axis=1) elif self.mode == "seq": outs = [out.dimshuffle(0, "x", 1) for out in outs] ret = T.concatenate(outs, axis=1) return ret
def apply(self, inpseq): # int-(batsize, seqlen) inpenco = self._inpencoder( inpseq) # may carry mask, based on encoder's embedder batsize = inpenco.shape[0] outvocsize = self._memembmat.shape[0] mem_0 = T.concatenate([ T.ones((batsize, self._memlen, 1), dtype="float32") * 0.95, T.ones((batsize, self._memlen, outvocsize - 1), dtype="float32") * 0.05, ], axis=2) # (batsize, outseqlen, outvocsize) mem_0 = T.softmax(mem_0) core_init_states = self._core.get_init_info(batsize) core_state_spec = self._core.get_statespec(flat=False) assert (len(core_state_spec) == len(core_init_states)) h_0 = None # take last output of core states as initial state c = 0 for ss in core_state_spec: h_0_isout = False for sss in ss: if sss[0] == "output": h_0_isout = True h_0 = core_init_states[c] if not h_0_isout: h_0 = core_init_states[c] c += 1 if self._inp_pos_repr is not None: inpposvecs = self._inp_pos_repr(inpseq.shape[1]) inpposvecs = T.repeat(inpposvecs.dimadd(0), batsize, axis=0) inpenc = T.concatenate([inpenco, inpposvecs], axis=2) inpenc.mask = inpenco.mask else: inpenc = inpenco outputs = T.scan(fn=self.rec, outputs_info=[None, mem_0, h_0] + core_init_states, n_steps=self._nsteps, non_sequences=inpenc) ret = outputs[0] ret.push_extra_outs({"mem_0": mem_0, "h_0": h_0}) # DEBUGGING return ret[-1], ret
def apply(self, x, mask=None, weights=None): ret = self.enc(x, mask=mask, weights=weights) # (batsize, seqlen, lastdim) outs = [] # apply mask (SeqEncoder should attach mask to outvar if all_outputs() mask = mask if mask is not None else ret.mask if hasattr( ret, "mask") else None if self.bidir: mid = ret.shape[2] / 2 ret1 = ret[:, :, :mid] ret2 = ret[:, :, mid:] ret = ret1 for i in range(self.numouts): selfweights = ret[:, :, i] # (batsize, seqlen) if self.bidir: selfweights += ret2[:, :, i] selfweights = Softmax()(selfweights) if mask is not None: selfweights *= mask # apply mask selfweights = selfweights / T.sum(selfweights, axis=1).dimshuffle( 0, "x") # renormalize weightedstates = ret[:, :, self.numouts:] * selfweights.dimshuffle( 0, 1, "x") if self.bidir: weightedstates2 = ret2[:, :, self.numouts:] * selfweights.dimshuffle( 0, 1, "x") weightedstates = T.concatenate( [weightedstates, weightedstates2], axis=2) out = T.sum(weightedstates, axis=1) # (batsize, lastdim) outs.append(out) if self.mode == "concat": ret = T.concatenate(outs, axis=1) elif self.mode == "seq": outs = [out.dimshuffle(0, "x", 1) for out in outs] ret = T.concatenate(outs, axis=1) return ret
def apply(self, x): if self.l2emb is not None: l1tensor = x[:, :, 1:] l1encs = EncLastDim(self.l1enc)(l1tensor) l2mat = x[:, :, 0] assert (l2mat.ndim == 2) l2embs = self.l2emb(l2mat) l2vecs = T.concatenate([l1encs, l2embs], axis=2) wmask = T.neq(l2mat, self.maskid) if self.maskid is not None else None else: l2vecs = EncLastDim(self.l1enc)(x) wmask = T.gt(T.sum(T.eq(x, self.maskid), axis=2), 0) l2vecs.mask = wmask fenc = self.l2enc(l2vecs) return fenc #, wmask #mask for debug
def apply(self, *args): # args is a tuple of tuples of *args and **kwargs for each of the blocks in the concatenation res = [] for block, arg in zip(self.blocks, args): if self.argfun is not None: arglist, argdic = self.argfun(arg) elif issequence(arg): assert(len(arg) < 3 and len(arg) > 0) arglist = arg[0] argdic = arg[1] if len(arg) > 1 else {} elif isinstance(arg, (Var, Val)): arglist = [arg] argdic = {} else: raise Exception("something wrong with concat's arguments: " + str(args)) res.append(block(*arglist, **argdic)) return T.concatenate(res, axis=self.axis)
def apply( self, context, seq, context_0=None, initstates=None, mask=None, encmask=None, startsymemb=None, **kw ): # context: (batsize, enc.innerdim), seq: idxs-(batsize, seqlen) if initstates is None: initstates = seq.shape[0] elif issequence(initstates): if len( initstates ) < self.numstates: # fill up with batsizes for lower layers initstates = [seq.shape[0]] * (self.numstates - len(initstates)) + initstates init_info, nonseq = self.get_init_info( context, initstates, ctx_0=context_0, encmask=encmask) # sets init states to provided ones embedder = self.embedder def recemb(x): return embedder(x) seq_emb = T.scan(fn=recemb, sequences=seq[:, 1:].dimswap(1, 0)) seq_emb = seq_emb.dimswap(1, 0) seq_emb_t0 = self._get_seq_emb_t0(seq_emb.shape[0], startsymemb=startsymemb) seq_emb = T.concatenate([seq_emb_t0.dimshuffle(0, "x", 1), seq_emb], axis=1) outputs = T.scan(fn=self.rec, sequences=seq_emb.dimswap(1, 0), outputs_info=[None] + init_info, non_sequences=nonseq) ret = outputs[0].dimswap( 1, 0 ) # returns probabilities of symbols --> (batsize, seqlen, vocabsize) if mask == "auto": mask = (seq > 0).astype("int32") ret = self.applymask(ret, mask) return ret
def apply( self, *args ): # args is a tuple of tuples of *args and **kwargs for each of the blocks in the concatenation res = [] for block, arg in zip(self.blocks, args): if self.argfun is not None: arglist, argdic = self.argfun(arg) elif issequence(arg): assert (len(arg) < 3 and len(arg) > 0) arglist = arg[0] argdic = arg[1] if len(arg) > 1 else {} elif isinstance(arg, (Var, Val)): arglist = [arg] argdic = {} else: raise Exception("something wrong with concat's arguments: " + str(args)) res.append(block(*arglist, **argdic)) return T.concatenate(res, axis=self.axis)
def _get_emb(self, inpseq, outseq): iemb = self.inpemb(inpseq) # (batsize, seqlen, inpembdim) oemb = self.outemb(outseq) # (batsize, seqlen, outembdim) emb = T.concatenate([iemb, oemb], axis=iemb.ndim - 1) # (batsize, seqlen, inpembdim+outembdim) return emb
def apply(self, x, mask=None): enco = self.enc(x[:, 1:], mask=mask) embo = self.emb(x[:, 0]) ret = T.concatenate([enco, embo], axis=1) # (?, encdim+embdim) return ret
def _get_emb(self, inpseq, outseq): iemb = self.inpemb(inpseq) # (batsize, seqlen, inpembdim) oemb = self.outemb(outseq) # (batsize, seqlen, outembdim) emb = T.concatenate([iemb, oemb], axis=iemb.ndim-1) # (batsize, seqlen, inpembdim+outembdim) return emb
def apply(self, idxs): # (batsize,) word idxs gemb = self.glove(idxs) # (batsize, embdim) oemb = self.emb(idxs) # (batsize, outdim), return T.concatenate([gemb, oemb], axis=1) # (batsize, outdim+embdim)
def _get_g_t(self, h_t, ctx_t): return T.concatenate([h_t, ctx_t], axis=1) if self.outconcat else h_t
def apply(self, l, r): con = T.concatenate([l, r], axis=1) att = T.dot(con, self.W) ret = T.dot(att, self.U) return ret
def rec(x_t, crit ): # x_t is (batsize, elem_dim), crit is (batsize, crit_dim) ret = T.dot(T.concatenate([x_t, crit], axis=1), self.W) # (batsize, innerdim) return T.sum(ret, axis=1) # (batsize, )
def apply(self, x): # (batsize, 2, syms) subjenco = self.subjenc(x[:, 0, :]).dimshuffle(0, "x", 1) predenco = self.predenc(x[:, 1, 0] - self.offset).dimshuffle(0, "x", 1) ret = T.concatenate([subjenco, predenco], axis=1) return ret
def apply(self, x, mask=None): enco = self.enc(x, mask=mask) embo = self.emb(x[:, 0]) ret = T.concatenate([enco, embo], axis=1) # (?, encdim+embdim) return ret
def rec(x_t, crit): # x_t is (batsize, elem_dim), crit is (batsize, crit_dim) ret = T.dot(T.concatenate([x_t, crit], axis=1), self.W) # (batsize, innerdim) return T.sum(ret, axis=1) # (batsize, )
def apply(self, x): ret = T.concatenate([self.base(x), self.augment(self.adb[x])], axis=1) self._maskfrom(ret, x) return ret
def apply(self, seq, init_states=None): fwdout = self.fwd(seq) rewout = self.rew(seq) # concatenate: fwdout, rewout: (batsize, seqlen, feats) ==> (batsize, seqlen, feats_fwd+feats_rew) out = T.concatenate([fwdout, rewout], axis=2) return out
def _get_combo(self, x_t, crit): # x_t: (mem_dim), crit: (batsize, crit_dim), out: (batsize, mem_dim + crit_dim) x_t_repped = T.repeat(x_t.reshape((x_t.shape[0], 1)), crit.shape[0], axis=1).T # (batsize, mem_dim) TODO <-- TOO SLOW BECAUSE OF THIS return T.concatenate([x_t_repped, crit], axis=1)
def _get_j_t(self, i_t, ctx_tm1): return T.concatenate([i_t, ctx_tm1], axis=1) if self.inconcat else i_t
def apply(self, seq, clas): # seq: idx^(batsize, seqlen), clas: idx^(batsize,) seqemb = self.wemb(seq) # (batsize, seqlen, wembdim) clasemb = self.cemb(clas) # (batsize, cembdim) clasemb = clasemb.dimshuffle(0, 'x', 1).repeat(seqemb.shape[1], axis=1) ret = T.concatenate([seqemb, clasemb], axis=2) return self.transducer(ret)
def _get_combo(self, x_t, crit): return T.concatenate([x_t, crit], axis=1)
def apply(self, seq): # seq: (batsize, 1+maxwordlen): first column: Glove idxs, subsequent cols: char ids emb = self.glove(seq[:, 0]) # (batsize, embdim) enc = self.enc(seq[:, 1:]) # (batsize, encdim) return T.concatenate([emb, enc], axis=1) # (batsize, embdim + encdim)